From 0b260ece5721f31d756e55abdf3f5b71757c90a9 Mon Sep 17 00:00:00 2001 From: glifocat Date: Fri, 6 Mar 2026 17:47:12 +0100 Subject: [PATCH] feat(skills): add pdf-reader skill (#772) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Thanks @glifocat! Clean skill package — good docs, solid tests, nice intent files. Pushed a small fix for path traversal on the PDF filename before merging. --- .claude/skills/add-pdf-reader/SKILL.md | 100 ++ .../add/container/skills/pdf-reader/SKILL.md | 94 ++ .../container/skills/pdf-reader/pdf-reader | 203 ++++ .claude/skills/add-pdf-reader/manifest.yaml | 17 + .../modify/container/Dockerfile | 74 ++ .../modify/container/Dockerfile.intent.md | 23 + .../modify/src/channels/whatsapp.test.ts | 1069 +++++++++++++++++ .../src/channels/whatsapp.test.ts.intent.md | 22 + .../modify/src/channels/whatsapp.ts | 429 +++++++ .../modify/src/channels/whatsapp.ts.intent.md | 29 + .../add-pdf-reader/tests/pdf-reader.test.ts | 171 +++ vitest.skills.config.ts | 7 + 12 files changed, 2238 insertions(+) create mode 100644 .claude/skills/add-pdf-reader/SKILL.md create mode 100644 .claude/skills/add-pdf-reader/add/container/skills/pdf-reader/SKILL.md create mode 100755 .claude/skills/add-pdf-reader/add/container/skills/pdf-reader/pdf-reader create mode 100644 .claude/skills/add-pdf-reader/manifest.yaml create mode 100644 .claude/skills/add-pdf-reader/modify/container/Dockerfile create mode 100644 .claude/skills/add-pdf-reader/modify/container/Dockerfile.intent.md create mode 100644 .claude/skills/add-pdf-reader/modify/src/channels/whatsapp.test.ts create mode 100644 .claude/skills/add-pdf-reader/modify/src/channels/whatsapp.test.ts.intent.md create mode 100644 .claude/skills/add-pdf-reader/modify/src/channels/whatsapp.ts create mode 100644 .claude/skills/add-pdf-reader/modify/src/channels/whatsapp.ts.intent.md create mode 100644 .claude/skills/add-pdf-reader/tests/pdf-reader.test.ts create mode 100644 vitest.skills.config.ts diff --git a/.claude/skills/add-pdf-reader/SKILL.md b/.claude/skills/add-pdf-reader/SKILL.md new file mode 100644 index 0000000..a394125 --- /dev/null +++ b/.claude/skills/add-pdf-reader/SKILL.md @@ -0,0 +1,100 @@ +--- +name: add-pdf-reader +description: Add PDF reading to NanoClaw agents. Extracts text from PDFs via pdftotext CLI. Handles WhatsApp attachments, URLs, and local files. +--- + +# Add PDF Reader + +Adds PDF reading capability to all container agents using poppler-utils (pdftotext/pdfinfo). PDFs sent as WhatsApp attachments are auto-downloaded to the group workspace. + +## Phase 1: Pre-flight + +### Check if already applied + +Read `.nanoclaw/state.yaml`. If `add-pdf-reader` is in `applied_skills`, skip to Phase 3 (Verify). + +## Phase 2: Apply Code Changes + +### Initialize skills system (if needed) + +If `.nanoclaw/` directory doesn't exist: + +```bash +npx tsx scripts/apply-skill.ts --init +``` + +### Apply the skill + +```bash +npx tsx scripts/apply-skill.ts .claude/skills/add-pdf-reader +``` + +This deterministically: +- Adds `container/skills/pdf-reader/SKILL.md` (agent-facing documentation) +- Adds `container/skills/pdf-reader/pdf-reader` (CLI script) +- Three-way merges `poppler-utils` + COPY into `container/Dockerfile` +- Three-way merges PDF attachment download into `src/channels/whatsapp.ts` +- Three-way merges PDF tests into `src/channels/whatsapp.test.ts` +- Records application in `.nanoclaw/state.yaml` + +If merge conflicts occur, read the intent files: +- `modify/container/Dockerfile.intent.md` +- `modify/src/channels/whatsapp.ts.intent.md` +- `modify/src/channels/whatsapp.test.ts.intent.md` + +### Validate + +```bash +npm test +npm run build +``` + +### Rebuild container + +```bash +./container/build.sh +``` + +### Restart service + +```bash +launchctl kickstart -k gui/$(id -u)/com.nanoclaw # macOS +# Linux: systemctl --user restart nanoclaw +``` + +## Phase 3: Verify + +### Test PDF extraction + +Send a PDF file in any registered WhatsApp chat. The agent should: +1. Download the PDF to `attachments/` +2. Respond acknowledging the PDF +3. Be able to extract text when asked + +### Test URL fetching + +Ask the agent to read a PDF from a URL. It should use `pdf-reader fetch `. + +### Check logs if needed + +```bash +tail -f logs/nanoclaw.log | grep -i pdf +``` + +Look for: +- `Downloaded PDF attachment` — successful download +- `Failed to download PDF attachment` — media download issue + +## Troubleshooting + +### Agent says pdf-reader command not found + +Container needs rebuilding. Run `./container/build.sh` and restart the service. + +### PDF text extraction is empty + +The PDF may be scanned (image-based). pdftotext only handles text-based PDFs. Consider using the agent-browser to open the PDF visually instead. + +### WhatsApp PDF not detected + +Verify the message has `documentMessage` with `mimetype: application/pdf`. Some file-sharing apps send PDFs as generic files without the correct mimetype. diff --git a/.claude/skills/add-pdf-reader/add/container/skills/pdf-reader/SKILL.md b/.claude/skills/add-pdf-reader/add/container/skills/pdf-reader/SKILL.md new file mode 100644 index 0000000..01fe2ca --- /dev/null +++ b/.claude/skills/add-pdf-reader/add/container/skills/pdf-reader/SKILL.md @@ -0,0 +1,94 @@ +--- +name: pdf-reader +description: Read and extract text from PDF files — documents, reports, contracts, spreadsheets. Use whenever you need to read PDF content, not just when explicitly asked. Handles local files, URLs, and WhatsApp attachments. +allowed-tools: Bash(pdf-reader:*) +--- + +# PDF Reader + +## Quick start + +```bash +pdf-reader extract report.pdf # Extract all text +pdf-reader extract report.pdf --layout # Preserve tables/columns +pdf-reader fetch https://example.com/doc.pdf # Download and extract +pdf-reader info report.pdf # Show metadata + size +pdf-reader list # List all PDFs in directory tree +``` + +## Commands + +### extract — Extract text from PDF + +```bash +pdf-reader extract # Full text to stdout +pdf-reader extract --layout # Preserve layout (tables, columns) +pdf-reader extract --pages 1-5 # Pages 1 through 5 +pdf-reader extract --pages 3-3 # Single page (page 3) +pdf-reader extract --layout --pages 2-10 # Layout + page range +``` + +Options: +- `--layout` — Maintains spatial positioning. Essential for tables, spreadsheets, multi-column docs. +- `--pages N-M` — Extract only pages N through M (1-based, inclusive). + +### fetch — Download and extract PDF from URL + +```bash +pdf-reader fetch # Download, verify, extract with layout +pdf-reader fetch report.pdf # Also save a local copy +``` + +Downloads the PDF, verifies it has a valid `%PDF` header, then extracts text with layout preservation. Temporary files are cleaned up automatically. + +### info — PDF metadata and file size + +```bash +pdf-reader info +``` + +Shows title, author, page count, page size, PDF version, and file size on disk. + +### list — Find all PDFs in directory tree + +```bash +pdf-reader list +``` + +Recursively lists all `.pdf` files with page count and file size. + +## WhatsApp PDF attachments + +When a user sends a PDF on WhatsApp, it is automatically saved to the `attachments/` directory. The message will include a path hint like: + +> [PDF attached: attachments/document.pdf] + +To read the attached PDF: + +```bash +pdf-reader extract attachments/document.pdf --layout +``` + +## Example workflows + +### Read a contract and summarize key terms + +```bash +pdf-reader info attachments/contract.pdf +pdf-reader extract attachments/contract.pdf --layout +``` + +### Extract specific pages from a long report + +```bash +pdf-reader info report.pdf # Check total pages +pdf-reader extract report.pdf --pages 1-3 # Executive summary +pdf-reader extract report.pdf --pages 15-20 # Financial tables +``` + +### Fetch and analyze a public document + +```bash +pdf-reader fetch https://example.com/annual-report.pdf report.pdf +pdf-reader info report.pdf +``` diff --git a/.claude/skills/add-pdf-reader/add/container/skills/pdf-reader/pdf-reader b/.claude/skills/add-pdf-reader/add/container/skills/pdf-reader/pdf-reader new file mode 100755 index 0000000..be413c2 --- /dev/null +++ b/.claude/skills/add-pdf-reader/add/container/skills/pdf-reader/pdf-reader @@ -0,0 +1,203 @@ +#!/bin/bash +set -euo pipefail + +# pdf-reader — CLI wrapper around poppler-utils (pdftotext, pdfinfo) +# Provides extract, fetch, info, list commands for PDF processing. + +VERSION="1.0.0" + +usage() { + cat <<'USAGE' +pdf-reader — Extract text and metadata from PDF files + +Usage: + pdf-reader extract [--layout] [--pages N-M] + pdf-reader fetch [filename] + pdf-reader info + pdf-reader list + pdf-reader help + +Commands: + extract Extract text from a PDF file to stdout + fetch Download a PDF from a URL and extract text + info Show PDF metadata and file size + list List all PDFs in current directory tree + help Show this help message + +Extract options: + --layout Preserve original layout (tables, columns) + --pages Page range to extract (e.g. 1-5, 3-3 for single page) +USAGE +} + +cmd_extract() { + local file="" + local layout=false + local first_page="" + local last_page="" + + # Parse arguments + while [[ $# -gt 0 ]]; do + case "$1" in + --layout) + layout=true + shift + ;; + --pages) + if [[ -z "${2:-}" ]]; then + echo "Error: --pages requires a range argument (e.g. 1-5)" >&2 + exit 1 + fi + local range="$2" + first_page="${range%-*}" + last_page="${range#*-}" + shift 2 + ;; + -*) + echo "Error: Unknown option: $1" >&2 + exit 1 + ;; + *) + if [[ -z "$file" ]]; then + file="$1" + else + echo "Error: Unexpected argument: $1" >&2 + exit 1 + fi + shift + ;; + esac + done + + if [[ -z "$file" ]]; then + echo "Error: No file specified" >&2 + echo "Usage: pdf-reader extract [--layout] [--pages N-M]" >&2 + exit 1 + fi + + if [[ ! -f "$file" ]]; then + echo "Error: File not found: $file" >&2 + exit 1 + fi + + # Build pdftotext arguments + local args=() + if [[ "$layout" == true ]]; then + args+=(-layout) + fi + if [[ -n "$first_page" ]]; then + args+=(-f "$first_page") + fi + if [[ -n "$last_page" ]]; then + args+=(-l "$last_page") + fi + + pdftotext ${args[@]+"${args[@]}"} "$file" - +} + +cmd_fetch() { + local url="${1:-}" + local filename="${2:-}" + + if [[ -z "$url" ]]; then + echo "Error: No URL specified" >&2 + echo "Usage: pdf-reader fetch [filename]" >&2 + exit 1 + fi + + # Create temporary file + local tmpfile + tmpfile="$(mktemp /tmp/pdf-reader-XXXXXX.pdf)" + trap 'rm -f "$tmpfile"' EXIT + + # Download + echo "Downloading: $url" >&2 + if ! curl -sL -o "$tmpfile" "$url"; then + echo "Error: Failed to download: $url" >&2 + exit 1 + fi + + # Verify PDF header + local header + header="$(head -c 4 "$tmpfile")" + if [[ "$header" != "%PDF" ]]; then + echo "Error: Downloaded file is not a valid PDF (header: $header)" >&2 + exit 1 + fi + + # Save with name if requested + if [[ -n "$filename" ]]; then + cp "$tmpfile" "$filename" + echo "Saved to: $filename" >&2 + fi + + # Extract with layout + pdftotext -layout "$tmpfile" - +} + +cmd_info() { + local file="${1:-}" + + if [[ -z "$file" ]]; then + echo "Error: No file specified" >&2 + echo "Usage: pdf-reader info " >&2 + exit 1 + fi + + if [[ ! -f "$file" ]]; then + echo "Error: File not found: $file" >&2 + exit 1 + fi + + pdfinfo "$file" + echo "" + echo "File size: $(du -h "$file" | cut -f1)" +} + +cmd_list() { + local found=false + + # Use globbing to find PDFs (globstar makes **/ match recursively) + shopt -s nullglob globstar + + # Use associative array to deduplicate (*.pdf overlaps with **/*.pdf) + declare -A seen + for pdf in *.pdf **/*.pdf; do + [[ -v seen["$pdf"] ]] && continue + seen["$pdf"]=1 + found=true + + local pages="?" + local size + size="$(du -h "$pdf" | cut -f1)" + + # Try to get page count from pdfinfo + if page_line="$(pdfinfo "$pdf" 2>/dev/null | grep '^Pages:')"; then + pages="$(echo "$page_line" | awk '{print $2}')" + fi + + printf "%-60s %5s pages %8s\n" "$pdf" "$pages" "$size" + done + + if [[ "$found" == false ]]; then + echo "No PDF files found in current directory tree." >&2 + fi +} + +# Main dispatch +command="${1:-help}" +shift || true + +case "$command" in + extract) cmd_extract "$@" ;; + fetch) cmd_fetch "$@" ;; + info) cmd_info "$@" ;; + list) cmd_list ;; + help|--help|-h) usage ;; + version|--version|-v) echo "pdf-reader $VERSION" ;; + *) + echo "Error: Unknown command: $command" >&2 + echo "Run 'pdf-reader help' for usage." >&2 + exit 1 + ;; +esac diff --git a/.claude/skills/add-pdf-reader/manifest.yaml b/.claude/skills/add-pdf-reader/manifest.yaml new file mode 100644 index 0000000..83bf114 --- /dev/null +++ b/.claude/skills/add-pdf-reader/manifest.yaml @@ -0,0 +1,17 @@ +skill: add-pdf-reader +version: 1.1.0 +description: "Add PDF reading capability to container agents via pdftotext CLI" +core_version: 1.2.8 +adds: + - container/skills/pdf-reader/SKILL.md + - container/skills/pdf-reader/pdf-reader +modifies: + - container/Dockerfile + - src/channels/whatsapp.ts + - src/channels/whatsapp.test.ts +structured: + npm_dependencies: {} + env_additions: [] +conflicts: [] +depends: [] +test: "npx vitest run --config vitest.skills.config.ts .claude/skills/add-pdf-reader/tests/pdf-reader.test.ts" diff --git a/.claude/skills/add-pdf-reader/modify/container/Dockerfile b/.claude/skills/add-pdf-reader/modify/container/Dockerfile new file mode 100644 index 0000000..0654503 --- /dev/null +++ b/.claude/skills/add-pdf-reader/modify/container/Dockerfile @@ -0,0 +1,74 @@ +# NanoClaw Agent Container +# Runs Claude Agent SDK in isolated Linux VM with browser automation + +FROM node:22-slim + +# Install system dependencies for Chromium and PDF tools +RUN apt-get update && apt-get install -y \ + chromium \ + fonts-liberation \ + fonts-noto-cjk \ + fonts-noto-color-emoji \ + libgbm1 \ + libnss3 \ + libatk-bridge2.0-0 \ + libgtk-3-0 \ + libx11-xcb1 \ + libxcomposite1 \ + libxdamage1 \ + libxrandr2 \ + libasound2 \ + libpangocairo-1.0-0 \ + libcups2 \ + libdrm2 \ + libxshmfence1 \ + curl \ + git \ + poppler-utils \ + && rm -rf /var/lib/apt/lists/* + +# Set Chromium path for agent-browser +ENV AGENT_BROWSER_EXECUTABLE_PATH=/usr/bin/chromium +ENV PLAYWRIGHT_CHROMIUM_EXECUTABLE_PATH=/usr/bin/chromium + +# Install agent-browser and claude-code globally +RUN npm install -g agent-browser @anthropic-ai/claude-code + +# Create app directory +WORKDIR /app + +# Copy package files first for better caching +COPY agent-runner/package*.json ./ + +# Install dependencies +RUN npm install + +# Copy source code +COPY agent-runner/ ./ + +# Build TypeScript +RUN npm run build + +# Install pdf-reader CLI +COPY skills/pdf-reader/pdf-reader /usr/local/bin/pdf-reader +RUN chmod +x /usr/local/bin/pdf-reader + +# Create workspace directories +RUN mkdir -p /workspace/group /workspace/global /workspace/extra /workspace/ipc/messages /workspace/ipc/tasks /workspace/ipc/input + +# Create entrypoint script +# Secrets are passed via stdin JSON — temp file is deleted immediately after Node reads it +# Follow-up messages arrive via IPC files in /workspace/ipc/input/ +RUN printf '#!/bin/bash\nset -e\ncd /app && npx tsc --outDir /tmp/dist 2>&1 >&2\nln -s /app/node_modules /tmp/dist/node_modules\nchmod -R a-w /tmp/dist\ncat > /tmp/input.json\nnode /tmp/dist/index.js < /tmp/input.json\n' > /app/entrypoint.sh && chmod +x /app/entrypoint.sh + +# Set ownership to node user (non-root) for writable directories +RUN chown -R node:node /workspace && chmod 777 /home/node + +# Switch to non-root user (required for --dangerously-skip-permissions) +USER node + +# Set working directory to group workspace +WORKDIR /workspace/group + +# Entry point reads JSON from stdin, outputs JSON to stdout +ENTRYPOINT ["/app/entrypoint.sh"] diff --git a/.claude/skills/add-pdf-reader/modify/container/Dockerfile.intent.md b/.claude/skills/add-pdf-reader/modify/container/Dockerfile.intent.md new file mode 100644 index 0000000..c20958d --- /dev/null +++ b/.claude/skills/add-pdf-reader/modify/container/Dockerfile.intent.md @@ -0,0 +1,23 @@ +# Intent: container/Dockerfile modifications + +## What changed +Added PDF reading capability via poppler-utils and a custom pdf-reader CLI script. + +## Key sections + +### apt-get install (system dependencies block) +- Added: `poppler-utils` to the package list (provides pdftotext, pdfinfo, pdftohtml) +- Changed: Comment updated to mention PDF tools + +### After npm global installs +- Added: `COPY skills/pdf-reader/pdf-reader /usr/local/bin/pdf-reader` to copy CLI script +- Added: `RUN chmod +x /usr/local/bin/pdf-reader` to make it executable + +## Invariants (must-keep) +- All Chromium dependencies unchanged +- agent-browser and claude-code npm global installs unchanged +- WORKDIR, COPY agent-runner, npm install, npm run build sequence unchanged +- Workspace directory creation unchanged +- Entrypoint script unchanged +- User switching (node user) unchanged +- ENTRYPOINT unchanged diff --git a/.claude/skills/add-pdf-reader/modify/src/channels/whatsapp.test.ts b/.claude/skills/add-pdf-reader/modify/src/channels/whatsapp.test.ts new file mode 100644 index 0000000..3e68b85 --- /dev/null +++ b/.claude/skills/add-pdf-reader/modify/src/channels/whatsapp.test.ts @@ -0,0 +1,1069 @@ +import { describe, it, expect, beforeEach, vi, afterEach } from 'vitest'; +import { EventEmitter } from 'events'; + +// --- Mocks --- + +// Mock config +vi.mock('../config.js', () => ({ + STORE_DIR: '/tmp/nanoclaw-test-store', + ASSISTANT_NAME: 'Andy', + ASSISTANT_HAS_OWN_NUMBER: false, + GROUPS_DIR: '/tmp/test-groups', +})); + +// Mock logger +vi.mock('../logger.js', () => ({ + logger: { + debug: vi.fn(), + info: vi.fn(), + warn: vi.fn(), + error: vi.fn(), + }, +})); + +// Mock db +vi.mock('../db.js', () => ({ + getLastGroupSync: vi.fn(() => null), + setLastGroupSync: vi.fn(), + updateChatName: vi.fn(), +})); + +// Mock fs +vi.mock('fs', async () => { + const actual = await vi.importActual('fs'); + return { + ...actual, + default: { + ...actual, + existsSync: vi.fn(() => true), + mkdirSync: vi.fn(), + writeFileSync: vi.fn(), + }, + }; +}); + +// Mock child_process (used for osascript notification) +vi.mock('child_process', () => ({ + exec: vi.fn(), +})); + +// Build a fake WASocket that's an EventEmitter with the methods we need +function createFakeSocket() { + const ev = new EventEmitter(); + const sock = { + ev: { + on: (event: string, handler: (...args: unknown[]) => void) => { + ev.on(event, handler); + }, + }, + user: { + id: '1234567890:1@s.whatsapp.net', + lid: '9876543210:1@lid', + }, + sendMessage: vi.fn().mockResolvedValue(undefined), + sendPresenceUpdate: vi.fn().mockResolvedValue(undefined), + groupFetchAllParticipating: vi.fn().mockResolvedValue({}), + updateMediaMessage: vi.fn(), + end: vi.fn(), + // Expose the event emitter for triggering events in tests + _ev: ev, + }; + return sock; +} + +let fakeSocket: ReturnType; + +// Mock Baileys +vi.mock('@whiskeysockets/baileys', () => { + return { + default: vi.fn(() => fakeSocket), + Browsers: { macOS: vi.fn(() => ['macOS', 'Chrome', '']) }, + DisconnectReason: { + loggedOut: 401, + badSession: 500, + connectionClosed: 428, + connectionLost: 408, + connectionReplaced: 440, + timedOut: 408, + restartRequired: 515, + }, + downloadMediaMessage: vi + .fn() + .mockResolvedValue(Buffer.from('pdf-data')), + fetchLatestWaWebVersion: vi + .fn() + .mockResolvedValue({ version: [2, 3000, 0] }), + normalizeMessageContent: vi.fn((content: unknown) => content), + makeCacheableSignalKeyStore: vi.fn((keys: unknown) => keys), + useMultiFileAuthState: vi.fn().mockResolvedValue({ + state: { + creds: {}, + keys: {}, + }, + saveCreds: vi.fn(), + }), + }; +}); + +import { WhatsAppChannel, WhatsAppChannelOpts } from './whatsapp.js'; +import { getLastGroupSync, updateChatName, setLastGroupSync } from '../db.js'; +import { downloadMediaMessage } from '@whiskeysockets/baileys'; + +// --- Test helpers --- + +function createTestOpts( + overrides?: Partial, +): WhatsAppChannelOpts { + return { + onMessage: vi.fn(), + onChatMetadata: vi.fn(), + registeredGroups: vi.fn(() => ({ + 'registered@g.us': { + name: 'Test Group', + folder: 'test-group', + trigger: '@Andy', + added_at: '2024-01-01T00:00:00.000Z', + }, + })), + ...overrides, + }; +} + +function triggerConnection(state: string, extra?: Record) { + fakeSocket._ev.emit('connection.update', { connection: state, ...extra }); +} + +function triggerDisconnect(statusCode: number) { + fakeSocket._ev.emit('connection.update', { + connection: 'close', + lastDisconnect: { + error: { output: { statusCode } }, + }, + }); +} + +async function triggerMessages(messages: unknown[]) { + fakeSocket._ev.emit('messages.upsert', { messages }); + // Flush microtasks so the async messages.upsert handler completes + await new Promise((r) => setTimeout(r, 0)); +} + +// --- Tests --- + +describe('WhatsAppChannel', () => { + beforeEach(() => { + fakeSocket = createFakeSocket(); + vi.mocked(getLastGroupSync).mockReturnValue(null); + }); + + afterEach(() => { + vi.restoreAllMocks(); + }); + + /** + * Helper: start connect, flush microtasks so event handlers are registered, + * then trigger the connection open event. Returns the resolved promise. + */ + async function connectChannel(channel: WhatsAppChannel): Promise { + const p = channel.connect(); + // Flush microtasks so connectInternal completes its await and registers handlers + await new Promise((r) => setTimeout(r, 0)); + triggerConnection('open'); + return p; + } + + // --- Version fetch --- + + describe('version fetch', () => { + it('connects with fetched version', async () => { + const opts = createTestOpts(); + const channel = new WhatsAppChannel(opts); + await connectChannel(channel); + + const { fetchLatestWaWebVersion } = + await import('@whiskeysockets/baileys'); + expect(fetchLatestWaWebVersion).toHaveBeenCalledWith({}); + }); + + it('falls back gracefully when version fetch fails', async () => { + const { fetchLatestWaWebVersion } = + await import('@whiskeysockets/baileys'); + vi.mocked(fetchLatestWaWebVersion).mockRejectedValueOnce( + new Error('network error'), + ); + + const opts = createTestOpts(); + const channel = new WhatsAppChannel(opts); + await connectChannel(channel); + + // Should still connect successfully despite fetch failure + expect(channel.isConnected()).toBe(true); + }); + }); + + // --- Connection lifecycle --- + + describe('connection lifecycle', () => { + it('resolves connect() when connection opens', async () => { + const opts = createTestOpts(); + const channel = new WhatsAppChannel(opts); + + await connectChannel(channel); + + expect(channel.isConnected()).toBe(true); + }); + + it('sets up LID to phone mapping on open', async () => { + const opts = createTestOpts(); + const channel = new WhatsAppChannel(opts); + + await connectChannel(channel); + + // The channel should have mapped the LID from sock.user + // We can verify by sending a message from a LID JID + // and checking the translated JID in the callback + }); + + it('flushes outgoing queue on reconnect', async () => { + const opts = createTestOpts(); + const channel = new WhatsAppChannel(opts); + + await connectChannel(channel); + + // Disconnect + (channel as any).connected = false; + + // Queue a message while disconnected + await channel.sendMessage('test@g.us', 'Queued message'); + expect(fakeSocket.sendMessage).not.toHaveBeenCalled(); + + // Reconnect + (channel as any).connected = true; + await (channel as any).flushOutgoingQueue(); + + // Group messages get prefixed when flushed + expect(fakeSocket.sendMessage).toHaveBeenCalledWith('test@g.us', { + text: 'Andy: Queued message', + }); + }); + + it('disconnects cleanly', async () => { + const opts = createTestOpts(); + const channel = new WhatsAppChannel(opts); + + await connectChannel(channel); + + await channel.disconnect(); + expect(channel.isConnected()).toBe(false); + expect(fakeSocket.end).toHaveBeenCalled(); + }); + }); + + // --- QR code and auth --- + + describe('authentication', () => { + it('exits process when QR code is emitted (no auth state)', async () => { + vi.useFakeTimers(); + const mockExit = vi + .spyOn(process, 'exit') + .mockImplementation(() => undefined as never); + + const opts = createTestOpts(); + const channel = new WhatsAppChannel(opts); + + // Start connect but don't await (it won't resolve - process exits) + channel.connect().catch(() => {}); + + // Flush microtasks so connectInternal registers handlers + await vi.advanceTimersByTimeAsync(0); + + // Emit QR code event + fakeSocket._ev.emit('connection.update', { qr: 'some-qr-data' }); + + // Advance timer past the 1000ms setTimeout before exit + await vi.advanceTimersByTimeAsync(1500); + + expect(mockExit).toHaveBeenCalledWith(1); + mockExit.mockRestore(); + vi.useRealTimers(); + }); + }); + + // --- Reconnection behavior --- + + describe('reconnection', () => { + it('reconnects on non-loggedOut disconnect', async () => { + const opts = createTestOpts(); + const channel = new WhatsAppChannel(opts); + + await connectChannel(channel); + + expect(channel.isConnected()).toBe(true); + + // Disconnect with a non-loggedOut reason (e.g., connectionClosed = 428) + triggerDisconnect(428); + + expect(channel.isConnected()).toBe(false); + // The channel should attempt to reconnect (calls connectInternal again) + }); + + it('exits on loggedOut disconnect', async () => { + const mockExit = vi + .spyOn(process, 'exit') + .mockImplementation(() => undefined as never); + + const opts = createTestOpts(); + const channel = new WhatsAppChannel(opts); + + await connectChannel(channel); + + // Disconnect with loggedOut reason (401) + triggerDisconnect(401); + + expect(channel.isConnected()).toBe(false); + expect(mockExit).toHaveBeenCalledWith(0); + mockExit.mockRestore(); + }); + + it('retries reconnection after 5s on failure', async () => { + const opts = createTestOpts(); + const channel = new WhatsAppChannel(opts); + + await connectChannel(channel); + + // Disconnect with stream error 515 + triggerDisconnect(515); + + // The channel sets a 5s retry — just verify it doesn't crash + await new Promise((r) => setTimeout(r, 100)); + }); + }); + + // --- Message handling --- + + describe('message handling', () => { + it('delivers message for registered group', async () => { + const opts = createTestOpts(); + const channel = new WhatsAppChannel(opts); + + await connectChannel(channel); + + await triggerMessages([ + { + key: { + id: 'msg-1', + remoteJid: 'registered@g.us', + participant: '5551234@s.whatsapp.net', + fromMe: false, + }, + message: { conversation: 'Hello Andy' }, + pushName: 'Alice', + messageTimestamp: Math.floor(Date.now() / 1000), + }, + ]); + + expect(opts.onChatMetadata).toHaveBeenCalledWith( + 'registered@g.us', + expect.any(String), + undefined, + 'whatsapp', + true, + ); + expect(opts.onMessage).toHaveBeenCalledWith( + 'registered@g.us', + expect.objectContaining({ + id: 'msg-1', + content: 'Hello Andy', + sender_name: 'Alice', + is_from_me: false, + }), + ); + }); + + it('only emits metadata for unregistered groups', async () => { + const opts = createTestOpts(); + const channel = new WhatsAppChannel(opts); + + await connectChannel(channel); + + await triggerMessages([ + { + key: { + id: 'msg-2', + remoteJid: 'unregistered@g.us', + participant: '5551234@s.whatsapp.net', + fromMe: false, + }, + message: { conversation: 'Hello' }, + pushName: 'Bob', + messageTimestamp: Math.floor(Date.now() / 1000), + }, + ]); + + expect(opts.onChatMetadata).toHaveBeenCalledWith( + 'unregistered@g.us', + expect.any(String), + undefined, + 'whatsapp', + true, + ); + expect(opts.onMessage).not.toHaveBeenCalled(); + }); + + it('ignores status@broadcast messages', async () => { + const opts = createTestOpts(); + const channel = new WhatsAppChannel(opts); + + await connectChannel(channel); + + await triggerMessages([ + { + key: { + id: 'msg-3', + remoteJid: 'status@broadcast', + fromMe: false, + }, + message: { conversation: 'Status update' }, + messageTimestamp: Math.floor(Date.now() / 1000), + }, + ]); + + expect(opts.onChatMetadata).not.toHaveBeenCalled(); + expect(opts.onMessage).not.toHaveBeenCalled(); + }); + + it('ignores messages with no content', async () => { + const opts = createTestOpts(); + const channel = new WhatsAppChannel(opts); + + await connectChannel(channel); + + await triggerMessages([ + { + key: { + id: 'msg-4', + remoteJid: 'registered@g.us', + fromMe: false, + }, + message: null, + messageTimestamp: Math.floor(Date.now() / 1000), + }, + ]); + + expect(opts.onMessage).not.toHaveBeenCalled(); + }); + + it('extracts text from extendedTextMessage', async () => { + const opts = createTestOpts(); + const channel = new WhatsAppChannel(opts); + + await connectChannel(channel); + + await triggerMessages([ + { + key: { + id: 'msg-5', + remoteJid: 'registered@g.us', + participant: '5551234@s.whatsapp.net', + fromMe: false, + }, + message: { + extendedTextMessage: { text: 'A reply message' }, + }, + pushName: 'Charlie', + messageTimestamp: Math.floor(Date.now() / 1000), + }, + ]); + + expect(opts.onMessage).toHaveBeenCalledWith( + 'registered@g.us', + expect.objectContaining({ content: 'A reply message' }), + ); + }); + + it('extracts caption from imageMessage', async () => { + const opts = createTestOpts(); + const channel = new WhatsAppChannel(opts); + + await connectChannel(channel); + + await triggerMessages([ + { + key: { + id: 'msg-6', + remoteJid: 'registered@g.us', + participant: '5551234@s.whatsapp.net', + fromMe: false, + }, + message: { + imageMessage: { + caption: 'Check this photo', + mimetype: 'image/jpeg', + }, + }, + pushName: 'Diana', + messageTimestamp: Math.floor(Date.now() / 1000), + }, + ]); + + expect(opts.onMessage).toHaveBeenCalledWith( + 'registered@g.us', + expect.objectContaining({ content: 'Check this photo' }), + ); + }); + + it('extracts caption from videoMessage', async () => { + const opts = createTestOpts(); + const channel = new WhatsAppChannel(opts); + + await connectChannel(channel); + + await triggerMessages([ + { + key: { + id: 'msg-7', + remoteJid: 'registered@g.us', + participant: '5551234@s.whatsapp.net', + fromMe: false, + }, + message: { + videoMessage: { caption: 'Watch this', mimetype: 'video/mp4' }, + }, + pushName: 'Eve', + messageTimestamp: Math.floor(Date.now() / 1000), + }, + ]); + + expect(opts.onMessage).toHaveBeenCalledWith( + 'registered@g.us', + expect.objectContaining({ content: 'Watch this' }), + ); + }); + + it('handles message with no extractable text (e.g. voice note without caption)', async () => { + const opts = createTestOpts(); + const channel = new WhatsAppChannel(opts); + + await connectChannel(channel); + + await triggerMessages([ + { + key: { + id: 'msg-8', + remoteJid: 'registered@g.us', + participant: '5551234@s.whatsapp.net', + fromMe: false, + }, + message: { + audioMessage: { mimetype: 'audio/ogg; codecs=opus', ptt: true }, + }, + pushName: 'Frank', + messageTimestamp: Math.floor(Date.now() / 1000), + }, + ]); + + // Skipped — no text content to process + expect(opts.onMessage).not.toHaveBeenCalled(); + }); + + it('uses sender JID when pushName is absent', async () => { + const opts = createTestOpts(); + const channel = new WhatsAppChannel(opts); + + await connectChannel(channel); + + await triggerMessages([ + { + key: { + id: 'msg-9', + remoteJid: 'registered@g.us', + participant: '5551234@s.whatsapp.net', + fromMe: false, + }, + message: { conversation: 'No push name' }, + // pushName is undefined + messageTimestamp: Math.floor(Date.now() / 1000), + }, + ]); + + expect(opts.onMessage).toHaveBeenCalledWith( + 'registered@g.us', + expect.objectContaining({ sender_name: '5551234' }), + ); + }); + + it('downloads and injects PDF attachment path', async () => { + const opts = createTestOpts(); + const channel = new WhatsAppChannel(opts); + + await connectChannel(channel); + + await triggerMessages([ + { + key: { + id: 'msg-pdf', + remoteJid: 'registered@g.us', + participant: '5551234@s.whatsapp.net', + fromMe: false, + }, + message: { + documentMessage: { + mimetype: 'application/pdf', + fileName: 'report.pdf', + }, + }, + pushName: 'Alice', + messageTimestamp: Math.floor(Date.now() / 1000), + }, + ]); + + expect(downloadMediaMessage).toHaveBeenCalled(); + + const fs = await import('fs'); + expect(fs.default.writeFileSync).toHaveBeenCalled(); + + expect(opts.onMessage).toHaveBeenCalledWith( + 'registered@g.us', + expect.objectContaining({ + content: expect.stringContaining('[PDF: attachments/report.pdf'), + }), + ); + }); + + it('preserves document caption alongside PDF info', async () => { + const opts = createTestOpts(); + const channel = new WhatsAppChannel(opts); + + await connectChannel(channel); + + await triggerMessages([ + { + key: { + id: 'msg-pdf-caption', + remoteJid: 'registered@g.us', + participant: '5551234@s.whatsapp.net', + fromMe: false, + }, + message: { + documentMessage: { + mimetype: 'application/pdf', + fileName: 'report.pdf', + caption: 'Here is the monthly report', + }, + }, + pushName: 'Alice', + messageTimestamp: Math.floor(Date.now() / 1000), + }, + ]); + + expect(opts.onMessage).toHaveBeenCalledWith( + 'registered@g.us', + expect.objectContaining({ + content: expect.stringContaining('Here is the monthly report'), + }), + ); + + expect(opts.onMessage).toHaveBeenCalledWith( + 'registered@g.us', + expect.objectContaining({ + content: expect.stringContaining('[PDF: attachments/report.pdf'), + }), + ); + }); + + it('handles PDF download failure gracefully', async () => { + vi.mocked(downloadMediaMessage).mockRejectedValueOnce( + new Error('Download failed'), + ); + + const opts = createTestOpts(); + const channel = new WhatsAppChannel(opts); + + await connectChannel(channel); + + await triggerMessages([ + { + key: { + id: 'msg-pdf-fail', + remoteJid: 'registered@g.us', + participant: '5551234@s.whatsapp.net', + fromMe: false, + }, + message: { + documentMessage: { + mimetype: 'application/pdf', + fileName: 'report.pdf', + }, + }, + pushName: 'Bob', + messageTimestamp: Math.floor(Date.now() / 1000), + }, + ]); + + // Message skipped since content remains empty after failed download + expect(opts.onMessage).not.toHaveBeenCalled(); + }); + }); + + // --- LID ↔ JID translation --- + + describe('LID to JID translation', () => { + it('translates known LID to phone JID', async () => { + const opts = createTestOpts({ + registeredGroups: vi.fn(() => ({ + '1234567890@s.whatsapp.net': { + name: 'Self Chat', + folder: 'self-chat', + trigger: '@Andy', + added_at: '2024-01-01T00:00:00.000Z', + }, + })), + }); + const channel = new WhatsAppChannel(opts); + + await connectChannel(channel); + + // The socket has lid '9876543210:1@lid' → phone '1234567890@s.whatsapp.net' + // Send a message from the LID + await triggerMessages([ + { + key: { + id: 'msg-lid', + remoteJid: '9876543210@lid', + fromMe: false, + }, + message: { conversation: 'From LID' }, + pushName: 'Self', + messageTimestamp: Math.floor(Date.now() / 1000), + }, + ]); + + // Should be translated to phone JID + expect(opts.onChatMetadata).toHaveBeenCalledWith( + '1234567890@s.whatsapp.net', + expect.any(String), + undefined, + 'whatsapp', + false, + ); + }); + + it('passes through non-LID JIDs unchanged', async () => { + const opts = createTestOpts(); + const channel = new WhatsAppChannel(opts); + + await connectChannel(channel); + + await triggerMessages([ + { + key: { + id: 'msg-normal', + remoteJid: 'registered@g.us', + participant: '5551234@s.whatsapp.net', + fromMe: false, + }, + message: { conversation: 'Normal JID' }, + pushName: 'Grace', + messageTimestamp: Math.floor(Date.now() / 1000), + }, + ]); + + expect(opts.onChatMetadata).toHaveBeenCalledWith( + 'registered@g.us', + expect.any(String), + undefined, + 'whatsapp', + true, + ); + }); + + it('passes through unknown LID JIDs unchanged', async () => { + const opts = createTestOpts(); + const channel = new WhatsAppChannel(opts); + + await connectChannel(channel); + + await triggerMessages([ + { + key: { + id: 'msg-unknown-lid', + remoteJid: '0000000000@lid', + fromMe: false, + }, + message: { conversation: 'Unknown LID' }, + pushName: 'Unknown', + messageTimestamp: Math.floor(Date.now() / 1000), + }, + ]); + + // Unknown LID passes through unchanged + expect(opts.onChatMetadata).toHaveBeenCalledWith( + '0000000000@lid', + expect.any(String), + undefined, + 'whatsapp', + false, + ); + }); + }); + + // --- Outgoing message queue --- + + describe('outgoing message queue', () => { + it('sends message directly when connected', async () => { + const opts = createTestOpts(); + const channel = new WhatsAppChannel(opts); + + await connectChannel(channel); + + await channel.sendMessage('test@g.us', 'Hello'); + // Group messages get prefixed with assistant name + expect(fakeSocket.sendMessage).toHaveBeenCalledWith('test@g.us', { + text: 'Andy: Hello', + }); + }); + + it('prefixes direct chat messages on shared number', async () => { + const opts = createTestOpts(); + const channel = new WhatsAppChannel(opts); + + await connectChannel(channel); + + await channel.sendMessage('123@s.whatsapp.net', 'Hello'); + // Shared number: DMs also get prefixed (needed for self-chat distinction) + expect(fakeSocket.sendMessage).toHaveBeenCalledWith( + '123@s.whatsapp.net', + { text: 'Andy: Hello' }, + ); + }); + + it('queues message when disconnected', async () => { + const opts = createTestOpts(); + const channel = new WhatsAppChannel(opts); + + // Don't connect — channel starts disconnected + await channel.sendMessage('test@g.us', 'Queued'); + expect(fakeSocket.sendMessage).not.toHaveBeenCalled(); + }); + + it('queues message on send failure', async () => { + const opts = createTestOpts(); + const channel = new WhatsAppChannel(opts); + + await connectChannel(channel); + + // Make sendMessage fail + fakeSocket.sendMessage.mockRejectedValueOnce(new Error('Network error')); + + await channel.sendMessage('test@g.us', 'Will fail'); + + // Should not throw, message queued for retry + // The queue should have the message + }); + + it('flushes multiple queued messages in order', async () => { + const opts = createTestOpts(); + const channel = new WhatsAppChannel(opts); + + // Queue messages while disconnected + await channel.sendMessage('test@g.us', 'First'); + await channel.sendMessage('test@g.us', 'Second'); + await channel.sendMessage('test@g.us', 'Third'); + + // Connect — flush happens automatically on open + await connectChannel(channel); + + // Give the async flush time to complete + await new Promise((r) => setTimeout(r, 50)); + + expect(fakeSocket.sendMessage).toHaveBeenCalledTimes(3); + // Group messages get prefixed + expect(fakeSocket.sendMessage).toHaveBeenNthCalledWith(1, 'test@g.us', { + text: 'Andy: First', + }); + expect(fakeSocket.sendMessage).toHaveBeenNthCalledWith(2, 'test@g.us', { + text: 'Andy: Second', + }); + expect(fakeSocket.sendMessage).toHaveBeenNthCalledWith(3, 'test@g.us', { + text: 'Andy: Third', + }); + }); + }); + + // --- Group metadata sync --- + + describe('group metadata sync', () => { + it('syncs group metadata on first connection', async () => { + fakeSocket.groupFetchAllParticipating.mockResolvedValue({ + 'group1@g.us': { subject: 'Group One' }, + 'group2@g.us': { subject: 'Group Two' }, + }); + + const opts = createTestOpts(); + const channel = new WhatsAppChannel(opts); + + await connectChannel(channel); + + // Wait for async sync to complete + await new Promise((r) => setTimeout(r, 50)); + + expect(fakeSocket.groupFetchAllParticipating).toHaveBeenCalled(); + expect(updateChatName).toHaveBeenCalledWith('group1@g.us', 'Group One'); + expect(updateChatName).toHaveBeenCalledWith('group2@g.us', 'Group Two'); + expect(setLastGroupSync).toHaveBeenCalled(); + }); + + it('skips sync when synced recently', async () => { + // Last sync was 1 hour ago (within 24h threshold) + vi.mocked(getLastGroupSync).mockReturnValue( + new Date(Date.now() - 60 * 60 * 1000).toISOString(), + ); + + const opts = createTestOpts(); + const channel = new WhatsAppChannel(opts); + + await connectChannel(channel); + + await new Promise((r) => setTimeout(r, 50)); + + expect(fakeSocket.groupFetchAllParticipating).not.toHaveBeenCalled(); + }); + + it('forces sync regardless of cache', async () => { + vi.mocked(getLastGroupSync).mockReturnValue( + new Date(Date.now() - 60 * 60 * 1000).toISOString(), + ); + + fakeSocket.groupFetchAllParticipating.mockResolvedValue({ + 'group@g.us': { subject: 'Forced Group' }, + }); + + const opts = createTestOpts(); + const channel = new WhatsAppChannel(opts); + + await connectChannel(channel); + + await channel.syncGroupMetadata(true); + + expect(fakeSocket.groupFetchAllParticipating).toHaveBeenCalled(); + expect(updateChatName).toHaveBeenCalledWith('group@g.us', 'Forced Group'); + }); + + it('handles group sync failure gracefully', async () => { + fakeSocket.groupFetchAllParticipating.mockRejectedValue( + new Error('Network timeout'), + ); + + const opts = createTestOpts(); + const channel = new WhatsAppChannel(opts); + + await connectChannel(channel); + + // Should not throw + await expect(channel.syncGroupMetadata(true)).resolves.toBeUndefined(); + }); + + it('skips groups with no subject', async () => { + fakeSocket.groupFetchAllParticipating.mockResolvedValue({ + 'group1@g.us': { subject: 'Has Subject' }, + 'group2@g.us': { subject: '' }, + 'group3@g.us': {}, + }); + + const opts = createTestOpts(); + const channel = new WhatsAppChannel(opts); + + await connectChannel(channel); + + // Clear any calls from the automatic sync on connect + vi.mocked(updateChatName).mockClear(); + + await channel.syncGroupMetadata(true); + + expect(updateChatName).toHaveBeenCalledTimes(1); + expect(updateChatName).toHaveBeenCalledWith('group1@g.us', 'Has Subject'); + }); + }); + + // --- JID ownership --- + + describe('ownsJid', () => { + it('owns @g.us JIDs (WhatsApp groups)', () => { + const channel = new WhatsAppChannel(createTestOpts()); + expect(channel.ownsJid('12345@g.us')).toBe(true); + }); + + it('owns @s.whatsapp.net JIDs (WhatsApp DMs)', () => { + const channel = new WhatsAppChannel(createTestOpts()); + expect(channel.ownsJid('12345@s.whatsapp.net')).toBe(true); + }); + + it('does not own Telegram JIDs', () => { + const channel = new WhatsAppChannel(createTestOpts()); + expect(channel.ownsJid('tg:12345')).toBe(false); + }); + + it('does not own unknown JID formats', () => { + const channel = new WhatsAppChannel(createTestOpts()); + expect(channel.ownsJid('random-string')).toBe(false); + }); + }); + + // --- Typing indicator --- + + describe('setTyping', () => { + it('sends composing presence when typing', async () => { + const opts = createTestOpts(); + const channel = new WhatsAppChannel(opts); + + await connectChannel(channel); + + await channel.setTyping('test@g.us', true); + expect(fakeSocket.sendPresenceUpdate).toHaveBeenCalledWith( + 'composing', + 'test@g.us', + ); + }); + + it('sends paused presence when stopping', async () => { + const opts = createTestOpts(); + const channel = new WhatsAppChannel(opts); + + await connectChannel(channel); + + await channel.setTyping('test@g.us', false); + expect(fakeSocket.sendPresenceUpdate).toHaveBeenCalledWith( + 'paused', + 'test@g.us', + ); + }); + + it('handles typing indicator failure gracefully', async () => { + const opts = createTestOpts(); + const channel = new WhatsAppChannel(opts); + + await connectChannel(channel); + + fakeSocket.sendPresenceUpdate.mockRejectedValueOnce(new Error('Failed')); + + // Should not throw + await expect( + channel.setTyping('test@g.us', true), + ).resolves.toBeUndefined(); + }); + }); + + // --- Channel properties --- + + describe('channel properties', () => { + it('has name "whatsapp"', () => { + const channel = new WhatsAppChannel(createTestOpts()); + expect(channel.name).toBe('whatsapp'); + }); + + it('does not expose prefixAssistantName (prefix handled internally)', () => { + const channel = new WhatsAppChannel(createTestOpts()); + expect('prefixAssistantName' in channel).toBe(false); + }); + }); +}); diff --git a/.claude/skills/add-pdf-reader/modify/src/channels/whatsapp.test.ts.intent.md b/.claude/skills/add-pdf-reader/modify/src/channels/whatsapp.test.ts.intent.md new file mode 100644 index 0000000..c7302f6 --- /dev/null +++ b/.claude/skills/add-pdf-reader/modify/src/channels/whatsapp.test.ts.intent.md @@ -0,0 +1,22 @@ +# Intent: src/channels/whatsapp.test.ts modifications + +## What changed +Added mocks for downloadMediaMessage and normalizeMessageContent, and test cases for PDF attachment handling. + +## Key sections + +### Mocks (top of file) +- Modified: config mock to export `GROUPS_DIR: '/tmp/test-groups'` +- Modified: `fs` mock to include `writeFileSync` as vi.fn() +- Modified: Baileys mock to export `downloadMediaMessage`, `normalizeMessageContent` +- Modified: fake socket factory to include `updateMediaMessage` + +### Test cases (inside "message handling" describe block) +- "downloads and injects PDF attachment path" — verifies PDF download, save, and content replacement +- "handles PDF download failure gracefully" — verifies error handling (message skipped since content remains empty) + +## Invariants (must-keep) +- All existing test cases unchanged +- All existing mocks unchanged (only additive changes) +- All existing test helpers unchanged +- All describe blocks preserved diff --git a/.claude/skills/add-pdf-reader/modify/src/channels/whatsapp.ts b/.claude/skills/add-pdf-reader/modify/src/channels/whatsapp.ts new file mode 100644 index 0000000..a5f8138 --- /dev/null +++ b/.claude/skills/add-pdf-reader/modify/src/channels/whatsapp.ts @@ -0,0 +1,429 @@ +import { exec } from 'child_process'; +import fs from 'fs'; +import path from 'path'; + +import makeWASocket, { + Browsers, + DisconnectReason, + downloadMediaMessage, + WASocket, + fetchLatestWaWebVersion, + makeCacheableSignalKeyStore, + normalizeMessageContent, + useMultiFileAuthState, +} from '@whiskeysockets/baileys'; + +import { + ASSISTANT_HAS_OWN_NUMBER, + ASSISTANT_NAME, + GROUPS_DIR, + STORE_DIR, +} from '../config.js'; +import { getLastGroupSync, setLastGroupSync, updateChatName } from '../db.js'; +import { logger } from '../logger.js'; +import { + Channel, + OnInboundMessage, + OnChatMetadata, + RegisteredGroup, +} from '../types.js'; +import { registerChannel, ChannelOpts } from './registry.js'; + +const GROUP_SYNC_INTERVAL_MS = 24 * 60 * 60 * 1000; // 24 hours + +export interface WhatsAppChannelOpts { + onMessage: OnInboundMessage; + onChatMetadata: OnChatMetadata; + registeredGroups: () => Record; +} + +export class WhatsAppChannel implements Channel { + name = 'whatsapp'; + + private sock!: WASocket; + private connected = false; + private lidToPhoneMap: Record = {}; + private outgoingQueue: Array<{ jid: string; text: string }> = []; + private flushing = false; + private groupSyncTimerStarted = false; + + private opts: WhatsAppChannelOpts; + + constructor(opts: WhatsAppChannelOpts) { + this.opts = opts; + } + + async connect(): Promise { + return new Promise((resolve, reject) => { + this.connectInternal(resolve).catch(reject); + }); + } + + private async connectInternal(onFirstOpen?: () => void): Promise { + const authDir = path.join(STORE_DIR, 'auth'); + fs.mkdirSync(authDir, { recursive: true }); + + const { state, saveCreds } = await useMultiFileAuthState(authDir); + + const { version } = await fetchLatestWaWebVersion({}).catch((err) => { + logger.warn( + { err }, + 'Failed to fetch latest WA Web version, using default', + ); + return { version: undefined }; + }); + this.sock = makeWASocket({ + version, + auth: { + creds: state.creds, + keys: makeCacheableSignalKeyStore(state.keys, logger), + }, + printQRInTerminal: false, + logger, + browser: Browsers.macOS('Chrome'), + }); + + this.sock.ev.on('connection.update', (update) => { + const { connection, lastDisconnect, qr } = update; + + if (qr) { + const msg = + 'WhatsApp authentication required. Run /setup in Claude Code.'; + logger.error(msg); + exec( + `osascript -e 'display notification "${msg}" with title "NanoClaw" sound name "Basso"'`, + ); + setTimeout(() => process.exit(1), 1000); + } + + if (connection === 'close') { + this.connected = false; + const reason = ( + lastDisconnect?.error as { output?: { statusCode?: number } } + )?.output?.statusCode; + const shouldReconnect = reason !== DisconnectReason.loggedOut; + logger.info( + { + reason, + shouldReconnect, + queuedMessages: this.outgoingQueue.length, + }, + 'Connection closed', + ); + + if (shouldReconnect) { + logger.info('Reconnecting...'); + this.connectInternal().catch((err) => { + logger.error({ err }, 'Failed to reconnect, retrying in 5s'); + setTimeout(() => { + this.connectInternal().catch((err2) => { + logger.error({ err: err2 }, 'Reconnection retry failed'); + }); + }, 5000); + }); + } else { + logger.info('Logged out. Run /setup to re-authenticate.'); + process.exit(0); + } + } else if (connection === 'open') { + this.connected = true; + logger.info('Connected to WhatsApp'); + + // Announce availability so WhatsApp relays subsequent presence updates (typing indicators) + this.sock.sendPresenceUpdate('available').catch((err) => { + logger.warn({ err }, 'Failed to send presence update'); + }); + + // Build LID to phone mapping from auth state for self-chat translation + if (this.sock.user) { + const phoneUser = this.sock.user.id.split(':')[0]; + const lidUser = this.sock.user.lid?.split(':')[0]; + if (lidUser && phoneUser) { + this.lidToPhoneMap[lidUser] = `${phoneUser}@s.whatsapp.net`; + logger.debug({ lidUser, phoneUser }, 'LID to phone mapping set'); + } + } + + // Flush any messages queued while disconnected + this.flushOutgoingQueue().catch((err) => + logger.error({ err }, 'Failed to flush outgoing queue'), + ); + + // Sync group metadata on startup (respects 24h cache) + this.syncGroupMetadata().catch((err) => + logger.error({ err }, 'Initial group sync failed'), + ); + // Set up daily sync timer (only once) + if (!this.groupSyncTimerStarted) { + this.groupSyncTimerStarted = true; + setInterval(() => { + this.syncGroupMetadata().catch((err) => + logger.error({ err }, 'Periodic group sync failed'), + ); + }, GROUP_SYNC_INTERVAL_MS); + } + + // Signal first connection to caller + if (onFirstOpen) { + onFirstOpen(); + onFirstOpen = undefined; + } + } + }); + + this.sock.ev.on('creds.update', saveCreds); + + this.sock.ev.on('messages.upsert', async ({ messages }) => { + for (const msg of messages) { + try { + if (!msg.message) continue; + // Unwrap container types (viewOnceMessageV2, ephemeralMessage, + // editedMessage, etc.) so that conversation, extendedTextMessage, + // imageMessage, etc. are accessible at the top level. + const normalized = normalizeMessageContent(msg.message); + if (!normalized) continue; + const rawJid = msg.key.remoteJid; + if (!rawJid || rawJid === 'status@broadcast') continue; + + // Translate LID JID to phone JID if applicable + const chatJid = await this.translateJid(rawJid); + + const timestamp = new Date( + Number(msg.messageTimestamp) * 1000, + ).toISOString(); + + // Always notify about chat metadata for group discovery + const isGroup = chatJid.endsWith('@g.us'); + this.opts.onChatMetadata( + chatJid, + timestamp, + undefined, + 'whatsapp', + isGroup, + ); + + // Only deliver full message for registered groups + const groups = this.opts.registeredGroups(); + if (groups[chatJid]) { + let content = + normalized.conversation || + normalized.extendedTextMessage?.text || + normalized.imageMessage?.caption || + normalized.videoMessage?.caption || + ''; + + // PDF attachment handling + if (normalized?.documentMessage?.mimetype === 'application/pdf') { + try { + const buffer = await downloadMediaMessage(msg, 'buffer', {}); + const groupDir = path.join(GROUPS_DIR, groups[chatJid].folder); + const attachDir = path.join(groupDir, 'attachments'); + fs.mkdirSync(attachDir, { recursive: true }); + const filename = path.basename( + normalized.documentMessage.fileName || + `doc-${Date.now()}.pdf`, + ); + const filePath = path.join(attachDir, filename); + fs.writeFileSync(filePath, buffer as Buffer); + const sizeKB = Math.round((buffer as Buffer).length / 1024); + const pdfRef = `[PDF: attachments/${filename} (${sizeKB}KB)]\nUse: pdf-reader extract attachments/${filename}`; + const caption = normalized.documentMessage.caption || ''; + content = caption ? `${caption}\n\n${pdfRef}` : pdfRef; + logger.info( + { jid: chatJid, filename }, + 'Downloaded PDF attachment', + ); + } catch (err) { + logger.warn( + { err, jid: chatJid }, + 'Failed to download PDF attachment', + ); + } + } + + // Skip protocol messages with no text content (encryption keys, read receipts, etc.) + if (!content) continue; + + const sender = msg.key.participant || msg.key.remoteJid || ''; + const senderName = msg.pushName || sender.split('@')[0]; + + const fromMe = msg.key.fromMe || false; + // Detect bot messages: with own number, fromMe is reliable + // since only the bot sends from that number. + // With shared number, bot messages carry the assistant name prefix + // (even in DMs/self-chat) so we check for that. + const isBotMessage = ASSISTANT_HAS_OWN_NUMBER + ? fromMe + : content.startsWith(`${ASSISTANT_NAME}:`); + + this.opts.onMessage(chatJid, { + id: msg.key.id || '', + chat_jid: chatJid, + sender, + sender_name: senderName, + content, + timestamp, + is_from_me: fromMe, + is_bot_message: isBotMessage, + }); + } + } catch (err) { + logger.error( + { err, remoteJid: msg.key?.remoteJid }, + 'Error processing incoming message', + ); + } + } + }); + } + + async sendMessage(jid: string, text: string): Promise { + // Prefix bot messages with assistant name so users know who's speaking. + // On a shared number, prefix is also needed in DMs (including self-chat) + // to distinguish bot output from user messages. + // Skip only when the assistant has its own dedicated phone number. + const prefixed = ASSISTANT_HAS_OWN_NUMBER + ? text + : `${ASSISTANT_NAME}: ${text}`; + + if (!this.connected) { + this.outgoingQueue.push({ jid, text: prefixed }); + logger.info( + { jid, length: prefixed.length, queueSize: this.outgoingQueue.length }, + 'WA disconnected, message queued', + ); + return; + } + try { + await this.sock.sendMessage(jid, { text: prefixed }); + logger.info({ jid, length: prefixed.length }, 'Message sent'); + } catch (err) { + // If send fails, queue it for retry on reconnect + this.outgoingQueue.push({ jid, text: prefixed }); + logger.warn( + { jid, err, queueSize: this.outgoingQueue.length }, + 'Failed to send, message queued', + ); + } + } + + isConnected(): boolean { + return this.connected; + } + + ownsJid(jid: string): boolean { + return jid.endsWith('@g.us') || jid.endsWith('@s.whatsapp.net'); + } + + async disconnect(): Promise { + this.connected = false; + this.sock?.end(undefined); + } + + async setTyping(jid: string, isTyping: boolean): Promise { + try { + const status = isTyping ? 'composing' : 'paused'; + logger.debug({ jid, status }, 'Sending presence update'); + await this.sock.sendPresenceUpdate(status, jid); + } catch (err) { + logger.debug({ jid, err }, 'Failed to update typing status'); + } + } + + async syncGroups(force: boolean): Promise { + return this.syncGroupMetadata(force); + } + + /** + * Sync group metadata from WhatsApp. + * Fetches all participating groups and stores their names in the database. + * Called on startup, daily, and on-demand via IPC. + */ + async syncGroupMetadata(force = false): Promise { + if (!force) { + const lastSync = getLastGroupSync(); + if (lastSync) { + const lastSyncTime = new Date(lastSync).getTime(); + if (Date.now() - lastSyncTime < GROUP_SYNC_INTERVAL_MS) { + logger.debug({ lastSync }, 'Skipping group sync - synced recently'); + return; + } + } + } + + try { + logger.info('Syncing group metadata from WhatsApp...'); + const groups = await this.sock.groupFetchAllParticipating(); + + let count = 0; + for (const [jid, metadata] of Object.entries(groups)) { + if (metadata.subject) { + updateChatName(jid, metadata.subject); + count++; + } + } + + setLastGroupSync(); + logger.info({ count }, 'Group metadata synced'); + } catch (err) { + logger.error({ err }, 'Failed to sync group metadata'); + } + } + + private async translateJid(jid: string): Promise { + if (!jid.endsWith('@lid')) return jid; + const lidUser = jid.split('@')[0].split(':')[0]; + + // Check local cache first + const cached = this.lidToPhoneMap[lidUser]; + if (cached) { + logger.debug( + { lidJid: jid, phoneJid: cached }, + 'Translated LID to phone JID (cached)', + ); + return cached; + } + + // Query Baileys' signal repository for the mapping + try { + const pn = await this.sock.signalRepository?.lidMapping?.getPNForLID(jid); + if (pn) { + const phoneJid = `${pn.split('@')[0].split(':')[0]}@s.whatsapp.net`; + this.lidToPhoneMap[lidUser] = phoneJid; + logger.info( + { lidJid: jid, phoneJid }, + 'Translated LID to phone JID (signalRepository)', + ); + return phoneJid; + } + } catch (err) { + logger.debug({ err, jid }, 'Failed to resolve LID via signalRepository'); + } + + return jid; + } + + private async flushOutgoingQueue(): Promise { + if (this.flushing || this.outgoingQueue.length === 0) return; + this.flushing = true; + try { + logger.info( + { count: this.outgoingQueue.length }, + 'Flushing outgoing message queue', + ); + while (this.outgoingQueue.length > 0) { + const item = this.outgoingQueue.shift()!; + // Send directly — queued items are already prefixed by sendMessage + await this.sock.sendMessage(item.jid, { text: item.text }); + logger.info( + { jid: item.jid, length: item.text.length }, + 'Queued message sent', + ); + } + } finally { + this.flushing = false; + } + } +} + +registerChannel('whatsapp', (opts: ChannelOpts) => new WhatsAppChannel(opts)); diff --git a/.claude/skills/add-pdf-reader/modify/src/channels/whatsapp.ts.intent.md b/.claude/skills/add-pdf-reader/modify/src/channels/whatsapp.ts.intent.md new file mode 100644 index 0000000..112efa2 --- /dev/null +++ b/.claude/skills/add-pdf-reader/modify/src/channels/whatsapp.ts.intent.md @@ -0,0 +1,29 @@ +# Intent: src/channels/whatsapp.ts modifications + +## What changed +Added PDF attachment download and path injection. When a WhatsApp message contains a PDF document, it is downloaded to the group's attachments/ directory and the message content is replaced with the file path and a usage hint. Also uses `normalizeMessageContent()` from Baileys to unwrap container types before reading fields. + +## Key sections + +### Imports (top of file) +- Added: `downloadMediaMessage` from `@whiskeysockets/baileys` +- Added: `normalizeMessageContent` from `@whiskeysockets/baileys` +- Added: `GROUPS_DIR` from `../config.js` + +### messages.upsert handler (inside connectInternal) +- Added: `normalizeMessageContent(msg.message)` call to unwrap container types +- Changed: `let content` to allow reassignment for PDF messages +- Added: Check for `normalized.documentMessage?.mimetype === 'application/pdf'` +- Added: Download PDF via `downloadMediaMessage`, save to `groups/{folder}/attachments/` +- Added: Replace content with `[PDF: attachments/{filename} ({size}KB)]` and usage hint +- Note: PDF check is placed BEFORE the `if (!content) continue;` guard so PDF-only messages are not skipped + +## Invariants (must-keep) +- All existing message handling (conversation, extendedTextMessage, imageMessage, videoMessage) +- Connection lifecycle (connect, reconnect with exponential backoff, disconnect) +- LID translation logic unchanged +- Outgoing message queue unchanged +- Group metadata sync unchanged +- sendMessage prefix logic unchanged +- setTyping, ownsJid, isConnected — all unchanged +- Local timestamp format (no Z suffix) diff --git a/.claude/skills/add-pdf-reader/tests/pdf-reader.test.ts b/.claude/skills/add-pdf-reader/tests/pdf-reader.test.ts new file mode 100644 index 0000000..2d9e961 --- /dev/null +++ b/.claude/skills/add-pdf-reader/tests/pdf-reader.test.ts @@ -0,0 +1,171 @@ +import { describe, expect, it } from 'vitest'; +import fs from 'fs'; +import path from 'path'; + +describe('pdf-reader skill package', () => { + const skillDir = path.resolve(__dirname, '..'); + + it('has a valid manifest', () => { + const manifestPath = path.join(skillDir, 'manifest.yaml'); + expect(fs.existsSync(manifestPath)).toBe(true); + + const content = fs.readFileSync(manifestPath, 'utf-8'); + expect(content).toContain('skill: add-pdf-reader'); + expect(content).toContain('version: 1.1.0'); + expect(content).toContain('container/Dockerfile'); + }); + + it('has all files declared in adds', () => { + const skillMd = path.join(skillDir, 'add', 'container', 'skills', 'pdf-reader', 'SKILL.md'); + const pdfReaderScript = path.join(skillDir, 'add', 'container', 'skills', 'pdf-reader', 'pdf-reader'); + + expect(fs.existsSync(skillMd)).toBe(true); + expect(fs.existsSync(pdfReaderScript)).toBe(true); + }); + + it('pdf-reader script is a valid Bash script', () => { + const scriptPath = path.join(skillDir, 'add', 'container', 'skills', 'pdf-reader', 'pdf-reader'); + const content = fs.readFileSync(scriptPath, 'utf-8'); + + // Valid shell script + expect(content).toMatch(/^#!/); + + // Core CLI commands + expect(content).toContain('pdftotext'); + expect(content).toContain('pdfinfo'); + expect(content).toContain('extract'); + expect(content).toContain('fetch'); + expect(content).toContain('info'); + expect(content).toContain('list'); + + // Key options + expect(content).toContain('--layout'); + expect(content).toContain('--pages'); + }); + + it('container skill SKILL.md has correct frontmatter', () => { + const skillMdPath = path.join(skillDir, 'add', 'container', 'skills', 'pdf-reader', 'SKILL.md'); + const content = fs.readFileSync(skillMdPath, 'utf-8'); + + expect(content).toContain('name: pdf-reader'); + expect(content).toContain('allowed-tools: Bash(pdf-reader:*)'); + expect(content).toContain('pdf-reader extract'); + expect(content).toContain('pdf-reader fetch'); + expect(content).toContain('pdf-reader info'); + }); + + it('has all files declared in modifies', () => { + const dockerfile = path.join(skillDir, 'modify', 'container', 'Dockerfile'); + const whatsappTs = path.join(skillDir, 'modify', 'src', 'channels', 'whatsapp.ts'); + const whatsappTestTs = path.join(skillDir, 'modify', 'src', 'channels', 'whatsapp.test.ts'); + + expect(fs.existsSync(dockerfile)).toBe(true); + expect(fs.existsSync(whatsappTs)).toBe(true); + expect(fs.existsSync(whatsappTestTs)).toBe(true); + }); + + it('has intent files for all modified files', () => { + expect( + fs.existsSync(path.join(skillDir, 'modify', 'container', 'Dockerfile.intent.md')), + ).toBe(true); + expect( + fs.existsSync(path.join(skillDir, 'modify', 'src', 'channels', 'whatsapp.ts.intent.md')), + ).toBe(true); + expect( + fs.existsSync( + path.join(skillDir, 'modify', 'src', 'channels', 'whatsapp.test.ts.intent.md'), + ), + ).toBe(true); + }); + + it('modified Dockerfile includes poppler-utils and pdf-reader', () => { + const content = fs.readFileSync( + path.join(skillDir, 'modify', 'container', 'Dockerfile'), + 'utf-8', + ); + + expect(content).toContain('poppler-utils'); + expect(content).toContain('pdf-reader'); + expect(content).toContain('/usr/local/bin/pdf-reader'); + }); + + it('modified Dockerfile preserves core structure', () => { + const content = fs.readFileSync( + path.join(skillDir, 'modify', 'container', 'Dockerfile'), + 'utf-8', + ); + + expect(content).toContain('FROM node:22-slim'); + expect(content).toContain('chromium'); + expect(content).toContain('agent-browser'); + expect(content).toContain('WORKDIR /app'); + expect(content).toContain('COPY agent-runner/'); + expect(content).toContain('ENTRYPOINT'); + expect(content).toContain('/workspace/group'); + expect(content).toContain('USER node'); + }); + + it('modified whatsapp.ts includes PDF attachment handling', () => { + const content = fs.readFileSync( + path.join(skillDir, 'modify', 'src', 'channels', 'whatsapp.ts'), + 'utf-8', + ); + + expect(content).toContain('documentMessage'); + expect(content).toContain('application/pdf'); + expect(content).toContain('downloadMediaMessage'); + expect(content).toContain('attachments'); + expect(content).toContain('pdf-reader extract'); + }); + + it('modified whatsapp.ts preserves core structure', () => { + const content = fs.readFileSync( + path.join(skillDir, 'modify', 'src', 'channels', 'whatsapp.ts'), + 'utf-8', + ); + + // Core class and methods preserved + expect(content).toContain('class WhatsAppChannel'); + expect(content).toContain('implements Channel'); + expect(content).toContain('async connect()'); + expect(content).toContain('async sendMessage('); + expect(content).toContain('isConnected()'); + expect(content).toContain('ownsJid('); + expect(content).toContain('async disconnect()'); + expect(content).toContain('async setTyping('); + + // Core imports preserved + expect(content).toContain('ASSISTANT_NAME'); + expect(content).toContain('STORE_DIR'); + }); + + it('modified whatsapp.test.ts includes PDF attachment tests', () => { + const content = fs.readFileSync( + path.join(skillDir, 'modify', 'src', 'channels', 'whatsapp.test.ts'), + 'utf-8', + ); + + expect(content).toContain('PDF'); + expect(content).toContain('documentMessage'); + expect(content).toContain('application/pdf'); + }); + + it('modified whatsapp.test.ts preserves all existing test sections', () => { + const content = fs.readFileSync( + path.join(skillDir, 'modify', 'src', 'channels', 'whatsapp.test.ts'), + 'utf-8', + ); + + // All existing test describe blocks preserved + expect(content).toContain("describe('connection lifecycle'"); + expect(content).toContain("describe('authentication'"); + expect(content).toContain("describe('reconnection'"); + expect(content).toContain("describe('message handling'"); + expect(content).toContain("describe('LID to JID translation'"); + expect(content).toContain("describe('outgoing message queue'"); + expect(content).toContain("describe('group metadata sync'"); + expect(content).toContain("describe('ownsJid'"); + expect(content).toContain("describe('setTyping'"); + expect(content).toContain("describe('channel properties'"); + }); +}); diff --git a/vitest.skills.config.ts b/vitest.skills.config.ts new file mode 100644 index 0000000..3be7fcd --- /dev/null +++ b/vitest.skills.config.ts @@ -0,0 +1,7 @@ +import { defineConfig } from 'vitest/config'; + +export default defineConfig({ + test: { + include: ['.claude/skills/**/tests/*.test.ts'], + }, +});