feat: add voice transcription as nanorepo skill (#326)

Add voice transcription skill package at
.claude/skills/add-voice-transcription/ so it can be applied via the
skills engine. Skill adds src/transcription.ts (OpenAI Whisper), modifies
whatsapp.ts to detect/transcribe voice notes, and includes intent files,
3 test cases, and 8 skill validation tests.

Also fixes skills engine runNpmInstall() to use --legacy-peer-deps,
needed for any skill adding deps with Zod v3 peer requirements.

Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
gavrielc
2026-02-20 14:18:54 +02:00
committed by GitHub
parent 6b9b3a12c9
commit a4072162b7
9 changed files with 1686 additions and 412 deletions

View File

@@ -0,0 +1,963 @@
import { describe, it, expect, beforeEach, vi, afterEach } from 'vitest';
import { EventEmitter } from 'events';
// --- Mocks ---
// Mock config
vi.mock('../config.js', () => ({
STORE_DIR: '/tmp/nanoclaw-test-store',
ASSISTANT_NAME: 'Andy',
ASSISTANT_HAS_OWN_NUMBER: false,
}));
// Mock logger
vi.mock('../logger.js', () => ({
logger: {
debug: vi.fn(),
info: vi.fn(),
warn: vi.fn(),
error: vi.fn(),
},
}));
// Mock db
vi.mock('../db.js', () => ({
getLastGroupSync: vi.fn(() => null),
setLastGroupSync: vi.fn(),
updateChatName: vi.fn(),
}));
// Mock transcription
vi.mock('../transcription.js', () => ({
isVoiceMessage: vi.fn((msg: any) => msg.message?.audioMessage?.ptt === true),
transcribeAudioMessage: vi.fn().mockResolvedValue('Hello this is a voice message'),
}));
// Mock fs
vi.mock('fs', async () => {
const actual = await vi.importActual<typeof import('fs')>('fs');
return {
...actual,
default: {
...actual,
existsSync: vi.fn(() => true),
mkdirSync: vi.fn(),
},
};
});
// Mock child_process (used for osascript notification)
vi.mock('child_process', () => ({
exec: vi.fn(),
}));
// Build a fake WASocket that's an EventEmitter with the methods we need
function createFakeSocket() {
const ev = new EventEmitter();
const sock = {
ev: {
on: (event: string, handler: (...args: unknown[]) => void) => {
ev.on(event, handler);
},
},
user: {
id: '1234567890:1@s.whatsapp.net',
lid: '9876543210:1@lid',
},
sendMessage: vi.fn().mockResolvedValue(undefined),
sendPresenceUpdate: vi.fn().mockResolvedValue(undefined),
groupFetchAllParticipating: vi.fn().mockResolvedValue({}),
end: vi.fn(),
// Expose the event emitter for triggering events in tests
_ev: ev,
};
return sock;
}
let fakeSocket: ReturnType<typeof createFakeSocket>;
// Mock Baileys
vi.mock('@whiskeysockets/baileys', () => {
return {
default: vi.fn(() => fakeSocket),
Browsers: { macOS: vi.fn(() => ['macOS', 'Chrome', '']) },
DisconnectReason: {
loggedOut: 401,
badSession: 500,
connectionClosed: 428,
connectionLost: 408,
connectionReplaced: 440,
timedOut: 408,
restartRequired: 515,
},
makeCacheableSignalKeyStore: vi.fn((keys: unknown) => keys),
useMultiFileAuthState: vi.fn().mockResolvedValue({
state: {
creds: {},
keys: {},
},
saveCreds: vi.fn(),
}),
};
});
import { WhatsAppChannel, WhatsAppChannelOpts } from './whatsapp.js';
import { getLastGroupSync, updateChatName, setLastGroupSync } from '../db.js';
import { transcribeAudioMessage } from '../transcription.js';
// --- Test helpers ---
function createTestOpts(overrides?: Partial<WhatsAppChannelOpts>): WhatsAppChannelOpts {
return {
onMessage: vi.fn(),
onChatMetadata: vi.fn(),
registeredGroups: vi.fn(() => ({
'registered@g.us': {
name: 'Test Group',
folder: 'test-group',
trigger: '@Andy',
added_at: '2024-01-01T00:00:00.000Z',
},
})),
...overrides,
};
}
function triggerConnection(state: string, extra?: Record<string, unknown>) {
fakeSocket._ev.emit('connection.update', { connection: state, ...extra });
}
function triggerDisconnect(statusCode: number) {
fakeSocket._ev.emit('connection.update', {
connection: 'close',
lastDisconnect: {
error: { output: { statusCode } },
},
});
}
async function triggerMessages(messages: unknown[]) {
fakeSocket._ev.emit('messages.upsert', { messages });
// Flush microtasks so the async messages.upsert handler completes
await new Promise((r) => setTimeout(r, 0));
}
// --- Tests ---
describe('WhatsAppChannel', () => {
beforeEach(() => {
fakeSocket = createFakeSocket();
vi.mocked(getLastGroupSync).mockReturnValue(null);
});
afterEach(() => {
vi.restoreAllMocks();
});
/**
* Helper: start connect, flush microtasks so event handlers are registered,
* then trigger the connection open event. Returns the resolved promise.
*/
async function connectChannel(channel: WhatsAppChannel): Promise<void> {
const p = channel.connect();
// Flush microtasks so connectInternal completes its await and registers handlers
await new Promise((r) => setTimeout(r, 0));
triggerConnection('open');
return p;
}
// --- Connection lifecycle ---
describe('connection lifecycle', () => {
it('resolves connect() when connection opens', async () => {
const opts = createTestOpts();
const channel = new WhatsAppChannel(opts);
await connectChannel(channel);
expect(channel.isConnected()).toBe(true);
});
it('sets up LID to phone mapping on open', async () => {
const opts = createTestOpts();
const channel = new WhatsAppChannel(opts);
await connectChannel(channel);
// The channel should have mapped the LID from sock.user
// We can verify by sending a message from a LID JID
// and checking the translated JID in the callback
});
it('flushes outgoing queue on reconnect', async () => {
const opts = createTestOpts();
const channel = new WhatsAppChannel(opts);
await connectChannel(channel);
// Disconnect
(channel as any).connected = false;
// Queue a message while disconnected
await channel.sendMessage('test@g.us', 'Queued message');
expect(fakeSocket.sendMessage).not.toHaveBeenCalled();
// Reconnect
(channel as any).connected = true;
await (channel as any).flushOutgoingQueue();
// Group messages get prefixed when flushed
expect(fakeSocket.sendMessage).toHaveBeenCalledWith(
'test@g.us',
{ text: 'Andy: Queued message' },
);
});
it('disconnects cleanly', async () => {
const opts = createTestOpts();
const channel = new WhatsAppChannel(opts);
await connectChannel(channel);
await channel.disconnect();
expect(channel.isConnected()).toBe(false);
expect(fakeSocket.end).toHaveBeenCalled();
});
});
// --- QR code and auth ---
describe('authentication', () => {
it('exits process when QR code is emitted (no auth state)', async () => {
vi.useFakeTimers();
const mockExit = vi.spyOn(process, 'exit').mockImplementation(() => undefined as never);
const opts = createTestOpts();
const channel = new WhatsAppChannel(opts);
// Start connect but don't await (it won't resolve - process exits)
channel.connect().catch(() => {});
// Flush microtasks so connectInternal registers handlers
await vi.advanceTimersByTimeAsync(0);
// Emit QR code event
fakeSocket._ev.emit('connection.update', { qr: 'some-qr-data' });
// Advance timer past the 1000ms setTimeout before exit
await vi.advanceTimersByTimeAsync(1500);
expect(mockExit).toHaveBeenCalledWith(1);
mockExit.mockRestore();
vi.useRealTimers();
});
});
// --- Reconnection behavior ---
describe('reconnection', () => {
it('reconnects on non-loggedOut disconnect', async () => {
const opts = createTestOpts();
const channel = new WhatsAppChannel(opts);
await connectChannel(channel);
expect(channel.isConnected()).toBe(true);
// Disconnect with a non-loggedOut reason (e.g., connectionClosed = 428)
triggerDisconnect(428);
expect(channel.isConnected()).toBe(false);
// The channel should attempt to reconnect (calls connectInternal again)
});
it('exits on loggedOut disconnect', async () => {
const mockExit = vi.spyOn(process, 'exit').mockImplementation(() => undefined as never);
const opts = createTestOpts();
const channel = new WhatsAppChannel(opts);
await connectChannel(channel);
// Disconnect with loggedOut reason (401)
triggerDisconnect(401);
expect(channel.isConnected()).toBe(false);
expect(mockExit).toHaveBeenCalledWith(0);
mockExit.mockRestore();
});
it('retries reconnection after 5s on failure', async () => {
const opts = createTestOpts();
const channel = new WhatsAppChannel(opts);
await connectChannel(channel);
// Disconnect with stream error 515
triggerDisconnect(515);
// The channel sets a 5s retry — just verify it doesn't crash
await new Promise((r) => setTimeout(r, 100));
});
});
// --- Message handling ---
describe('message handling', () => {
it('delivers message for registered group', async () => {
const opts = createTestOpts();
const channel = new WhatsAppChannel(opts);
await connectChannel(channel);
await triggerMessages([
{
key: {
id: 'msg-1',
remoteJid: 'registered@g.us',
participant: '5551234@s.whatsapp.net',
fromMe: false,
},
message: { conversation: 'Hello Andy' },
pushName: 'Alice',
messageTimestamp: Math.floor(Date.now() / 1000),
},
]);
expect(opts.onChatMetadata).toHaveBeenCalledWith(
'registered@g.us',
expect.any(String),
undefined,
'whatsapp',
true,
);
expect(opts.onMessage).toHaveBeenCalledWith(
'registered@g.us',
expect.objectContaining({
id: 'msg-1',
content: 'Hello Andy',
sender_name: 'Alice',
is_from_me: false,
}),
);
});
it('only emits metadata for unregistered groups', async () => {
const opts = createTestOpts();
const channel = new WhatsAppChannel(opts);
await connectChannel(channel);
await triggerMessages([
{
key: {
id: 'msg-2',
remoteJid: 'unregistered@g.us',
participant: '5551234@s.whatsapp.net',
fromMe: false,
},
message: { conversation: 'Hello' },
pushName: 'Bob',
messageTimestamp: Math.floor(Date.now() / 1000),
},
]);
expect(opts.onChatMetadata).toHaveBeenCalledWith(
'unregistered@g.us',
expect.any(String),
undefined,
'whatsapp',
true,
);
expect(opts.onMessage).not.toHaveBeenCalled();
});
it('ignores status@broadcast messages', async () => {
const opts = createTestOpts();
const channel = new WhatsAppChannel(opts);
await connectChannel(channel);
await triggerMessages([
{
key: {
id: 'msg-3',
remoteJid: 'status@broadcast',
fromMe: false,
},
message: { conversation: 'Status update' },
messageTimestamp: Math.floor(Date.now() / 1000),
},
]);
expect(opts.onChatMetadata).not.toHaveBeenCalled();
expect(opts.onMessage).not.toHaveBeenCalled();
});
it('ignores messages with no content', async () => {
const opts = createTestOpts();
const channel = new WhatsAppChannel(opts);
await connectChannel(channel);
await triggerMessages([
{
key: {
id: 'msg-4',
remoteJid: 'registered@g.us',
fromMe: false,
},
message: null,
messageTimestamp: Math.floor(Date.now() / 1000),
},
]);
expect(opts.onMessage).not.toHaveBeenCalled();
});
it('extracts text from extendedTextMessage', async () => {
const opts = createTestOpts();
const channel = new WhatsAppChannel(opts);
await connectChannel(channel);
await triggerMessages([
{
key: {
id: 'msg-5',
remoteJid: 'registered@g.us',
participant: '5551234@s.whatsapp.net',
fromMe: false,
},
message: {
extendedTextMessage: { text: 'A reply message' },
},
pushName: 'Charlie',
messageTimestamp: Math.floor(Date.now() / 1000),
},
]);
expect(opts.onMessage).toHaveBeenCalledWith(
'registered@g.us',
expect.objectContaining({ content: 'A reply message' }),
);
});
it('extracts caption from imageMessage', async () => {
const opts = createTestOpts();
const channel = new WhatsAppChannel(opts);
await connectChannel(channel);
await triggerMessages([
{
key: {
id: 'msg-6',
remoteJid: 'registered@g.us',
participant: '5551234@s.whatsapp.net',
fromMe: false,
},
message: {
imageMessage: { caption: 'Check this photo', mimetype: 'image/jpeg' },
},
pushName: 'Diana',
messageTimestamp: Math.floor(Date.now() / 1000),
},
]);
expect(opts.onMessage).toHaveBeenCalledWith(
'registered@g.us',
expect.objectContaining({ content: 'Check this photo' }),
);
});
it('extracts caption from videoMessage', async () => {
const opts = createTestOpts();
const channel = new WhatsAppChannel(opts);
await connectChannel(channel);
await triggerMessages([
{
key: {
id: 'msg-7',
remoteJid: 'registered@g.us',
participant: '5551234@s.whatsapp.net',
fromMe: false,
},
message: {
videoMessage: { caption: 'Watch this', mimetype: 'video/mp4' },
},
pushName: 'Eve',
messageTimestamp: Math.floor(Date.now() / 1000),
},
]);
expect(opts.onMessage).toHaveBeenCalledWith(
'registered@g.us',
expect.objectContaining({ content: 'Watch this' }),
);
});
it('transcribes voice messages', async () => {
const opts = createTestOpts();
const channel = new WhatsAppChannel(opts);
await connectChannel(channel);
await triggerMessages([
{
key: {
id: 'msg-8',
remoteJid: 'registered@g.us',
participant: '5551234@s.whatsapp.net',
fromMe: false,
},
message: {
audioMessage: { mimetype: 'audio/ogg; codecs=opus', ptt: true },
},
pushName: 'Frank',
messageTimestamp: Math.floor(Date.now() / 1000),
},
]);
expect(transcribeAudioMessage).toHaveBeenCalled();
expect(opts.onMessage).toHaveBeenCalledTimes(1);
expect(opts.onMessage).toHaveBeenCalledWith(
'registered@g.us',
expect.objectContaining({ content: '[Voice: Hello this is a voice message]' }),
);
});
it('falls back when transcription returns null', async () => {
vi.mocked(transcribeAudioMessage).mockResolvedValueOnce(null);
const opts = createTestOpts();
const channel = new WhatsAppChannel(opts);
await connectChannel(channel);
await triggerMessages([
{
key: {
id: 'msg-8b',
remoteJid: 'registered@g.us',
participant: '5551234@s.whatsapp.net',
fromMe: false,
},
message: {
audioMessage: { mimetype: 'audio/ogg; codecs=opus', ptt: true },
},
pushName: 'Frank',
messageTimestamp: Math.floor(Date.now() / 1000),
},
]);
expect(opts.onMessage).toHaveBeenCalledTimes(1);
expect(opts.onMessage).toHaveBeenCalledWith(
'registered@g.us',
expect.objectContaining({ content: '[Voice Message - transcription unavailable]' }),
);
});
it('falls back when transcription throws', async () => {
vi.mocked(transcribeAudioMessage).mockRejectedValueOnce(new Error('API error'));
const opts = createTestOpts();
const channel = new WhatsAppChannel(opts);
await connectChannel(channel);
await triggerMessages([
{
key: {
id: 'msg-8c',
remoteJid: 'registered@g.us',
participant: '5551234@s.whatsapp.net',
fromMe: false,
},
message: {
audioMessage: { mimetype: 'audio/ogg; codecs=opus', ptt: true },
},
pushName: 'Frank',
messageTimestamp: Math.floor(Date.now() / 1000),
},
]);
expect(opts.onMessage).toHaveBeenCalledTimes(1);
expect(opts.onMessage).toHaveBeenCalledWith(
'registered@g.us',
expect.objectContaining({ content: '[Voice Message - transcription failed]' }),
);
});
it('uses sender JID when pushName is absent', async () => {
const opts = createTestOpts();
const channel = new WhatsAppChannel(opts);
await connectChannel(channel);
await triggerMessages([
{
key: {
id: 'msg-9',
remoteJid: 'registered@g.us',
participant: '5551234@s.whatsapp.net',
fromMe: false,
},
message: { conversation: 'No push name' },
// pushName is undefined
messageTimestamp: Math.floor(Date.now() / 1000),
},
]);
expect(opts.onMessage).toHaveBeenCalledWith(
'registered@g.us',
expect.objectContaining({ sender_name: '5551234' }),
);
});
});
// --- LID ↔ JID translation ---
describe('LID to JID translation', () => {
it('translates known LID to phone JID', async () => {
const opts = createTestOpts({
registeredGroups: vi.fn(() => ({
'1234567890@s.whatsapp.net': {
name: 'Self Chat',
folder: 'self-chat',
trigger: '@Andy',
added_at: '2024-01-01T00:00:00.000Z',
},
})),
});
const channel = new WhatsAppChannel(opts);
await connectChannel(channel);
// The socket has lid '9876543210:1@lid' → phone '1234567890@s.whatsapp.net'
// Send a message from the LID
await triggerMessages([
{
key: {
id: 'msg-lid',
remoteJid: '9876543210@lid',
fromMe: false,
},
message: { conversation: 'From LID' },
pushName: 'Self',
messageTimestamp: Math.floor(Date.now() / 1000),
},
]);
// Should be translated to phone JID
expect(opts.onChatMetadata).toHaveBeenCalledWith(
'1234567890@s.whatsapp.net',
expect.any(String),
undefined,
'whatsapp',
false,
);
});
it('passes through non-LID JIDs unchanged', async () => {
const opts = createTestOpts();
const channel = new WhatsAppChannel(opts);
await connectChannel(channel);
await triggerMessages([
{
key: {
id: 'msg-normal',
remoteJid: 'registered@g.us',
participant: '5551234@s.whatsapp.net',
fromMe: false,
},
message: { conversation: 'Normal JID' },
pushName: 'Grace',
messageTimestamp: Math.floor(Date.now() / 1000),
},
]);
expect(opts.onChatMetadata).toHaveBeenCalledWith(
'registered@g.us',
expect.any(String),
undefined,
'whatsapp',
true,
);
});
it('passes through unknown LID JIDs unchanged', async () => {
const opts = createTestOpts();
const channel = new WhatsAppChannel(opts);
await connectChannel(channel);
await triggerMessages([
{
key: {
id: 'msg-unknown-lid',
remoteJid: '0000000000@lid',
fromMe: false,
},
message: { conversation: 'Unknown LID' },
pushName: 'Unknown',
messageTimestamp: Math.floor(Date.now() / 1000),
},
]);
// Unknown LID passes through unchanged
expect(opts.onChatMetadata).toHaveBeenCalledWith(
'0000000000@lid',
expect.any(String),
undefined,
'whatsapp',
false,
);
});
});
// --- Outgoing message queue ---
describe('outgoing message queue', () => {
it('sends message directly when connected', async () => {
const opts = createTestOpts();
const channel = new WhatsAppChannel(opts);
await connectChannel(channel);
await channel.sendMessage('test@g.us', 'Hello');
// Group messages get prefixed with assistant name
expect(fakeSocket.sendMessage).toHaveBeenCalledWith('test@g.us', { text: 'Andy: Hello' });
});
it('prefixes direct chat messages on shared number', async () => {
const opts = createTestOpts();
const channel = new WhatsAppChannel(opts);
await connectChannel(channel);
await channel.sendMessage('123@s.whatsapp.net', 'Hello');
// Shared number: DMs also get prefixed (needed for self-chat distinction)
expect(fakeSocket.sendMessage).toHaveBeenCalledWith('123@s.whatsapp.net', { text: 'Andy: Hello' });
});
it('queues message when disconnected', async () => {
const opts = createTestOpts();
const channel = new WhatsAppChannel(opts);
// Don't connect — channel starts disconnected
await channel.sendMessage('test@g.us', 'Queued');
expect(fakeSocket.sendMessage).not.toHaveBeenCalled();
});
it('queues message on send failure', async () => {
const opts = createTestOpts();
const channel = new WhatsAppChannel(opts);
await connectChannel(channel);
// Make sendMessage fail
fakeSocket.sendMessage.mockRejectedValueOnce(new Error('Network error'));
await channel.sendMessage('test@g.us', 'Will fail');
// Should not throw, message queued for retry
// The queue should have the message
});
it('flushes multiple queued messages in order', async () => {
const opts = createTestOpts();
const channel = new WhatsAppChannel(opts);
// Queue messages while disconnected
await channel.sendMessage('test@g.us', 'First');
await channel.sendMessage('test@g.us', 'Second');
await channel.sendMessage('test@g.us', 'Third');
// Connect — flush happens automatically on open
await connectChannel(channel);
// Give the async flush time to complete
await new Promise((r) => setTimeout(r, 50));
expect(fakeSocket.sendMessage).toHaveBeenCalledTimes(3);
// Group messages get prefixed
expect(fakeSocket.sendMessage).toHaveBeenNthCalledWith(1, 'test@g.us', { text: 'Andy: First' });
expect(fakeSocket.sendMessage).toHaveBeenNthCalledWith(2, 'test@g.us', { text: 'Andy: Second' });
expect(fakeSocket.sendMessage).toHaveBeenNthCalledWith(3, 'test@g.us', { text: 'Andy: Third' });
});
});
// --- Group metadata sync ---
describe('group metadata sync', () => {
it('syncs group metadata on first connection', async () => {
fakeSocket.groupFetchAllParticipating.mockResolvedValue({
'group1@g.us': { subject: 'Group One' },
'group2@g.us': { subject: 'Group Two' },
});
const opts = createTestOpts();
const channel = new WhatsAppChannel(opts);
await connectChannel(channel);
// Wait for async sync to complete
await new Promise((r) => setTimeout(r, 50));
expect(fakeSocket.groupFetchAllParticipating).toHaveBeenCalled();
expect(updateChatName).toHaveBeenCalledWith('group1@g.us', 'Group One');
expect(updateChatName).toHaveBeenCalledWith('group2@g.us', 'Group Two');
expect(setLastGroupSync).toHaveBeenCalled();
});
it('skips sync when synced recently', async () => {
// Last sync was 1 hour ago (within 24h threshold)
vi.mocked(getLastGroupSync).mockReturnValue(
new Date(Date.now() - 60 * 60 * 1000).toISOString(),
);
const opts = createTestOpts();
const channel = new WhatsAppChannel(opts);
await connectChannel(channel);
await new Promise((r) => setTimeout(r, 50));
expect(fakeSocket.groupFetchAllParticipating).not.toHaveBeenCalled();
});
it('forces sync regardless of cache', async () => {
vi.mocked(getLastGroupSync).mockReturnValue(
new Date(Date.now() - 60 * 60 * 1000).toISOString(),
);
fakeSocket.groupFetchAllParticipating.mockResolvedValue({
'group@g.us': { subject: 'Forced Group' },
});
const opts = createTestOpts();
const channel = new WhatsAppChannel(opts);
await connectChannel(channel);
await channel.syncGroupMetadata(true);
expect(fakeSocket.groupFetchAllParticipating).toHaveBeenCalled();
expect(updateChatName).toHaveBeenCalledWith('group@g.us', 'Forced Group');
});
it('handles group sync failure gracefully', async () => {
fakeSocket.groupFetchAllParticipating.mockRejectedValue(
new Error('Network timeout'),
);
const opts = createTestOpts();
const channel = new WhatsAppChannel(opts);
await connectChannel(channel);
// Should not throw
await expect(channel.syncGroupMetadata(true)).resolves.toBeUndefined();
});
it('skips groups with no subject', async () => {
fakeSocket.groupFetchAllParticipating.mockResolvedValue({
'group1@g.us': { subject: 'Has Subject' },
'group2@g.us': { subject: '' },
'group3@g.us': {},
});
const opts = createTestOpts();
const channel = new WhatsAppChannel(opts);
await connectChannel(channel);
// Clear any calls from the automatic sync on connect
vi.mocked(updateChatName).mockClear();
await channel.syncGroupMetadata(true);
expect(updateChatName).toHaveBeenCalledTimes(1);
expect(updateChatName).toHaveBeenCalledWith('group1@g.us', 'Has Subject');
});
});
// --- JID ownership ---
describe('ownsJid', () => {
it('owns @g.us JIDs (WhatsApp groups)', () => {
const channel = new WhatsAppChannel(createTestOpts());
expect(channel.ownsJid('12345@g.us')).toBe(true);
});
it('owns @s.whatsapp.net JIDs (WhatsApp DMs)', () => {
const channel = new WhatsAppChannel(createTestOpts());
expect(channel.ownsJid('12345@s.whatsapp.net')).toBe(true);
});
it('does not own Telegram JIDs', () => {
const channel = new WhatsAppChannel(createTestOpts());
expect(channel.ownsJid('tg:12345')).toBe(false);
});
it('does not own unknown JID formats', () => {
const channel = new WhatsAppChannel(createTestOpts());
expect(channel.ownsJid('random-string')).toBe(false);
});
});
// --- Typing indicator ---
describe('setTyping', () => {
it('sends composing presence when typing', async () => {
const opts = createTestOpts();
const channel = new WhatsAppChannel(opts);
await connectChannel(channel);
await channel.setTyping('test@g.us', true);
expect(fakeSocket.sendPresenceUpdate).toHaveBeenCalledWith('composing', 'test@g.us');
});
it('sends paused presence when stopping', async () => {
const opts = createTestOpts();
const channel = new WhatsAppChannel(opts);
await connectChannel(channel);
await channel.setTyping('test@g.us', false);
expect(fakeSocket.sendPresenceUpdate).toHaveBeenCalledWith('paused', 'test@g.us');
});
it('handles typing indicator failure gracefully', async () => {
const opts = createTestOpts();
const channel = new WhatsAppChannel(opts);
await connectChannel(channel);
fakeSocket.sendPresenceUpdate.mockRejectedValueOnce(new Error('Failed'));
// Should not throw
await expect(channel.setTyping('test@g.us', true)).resolves.toBeUndefined();
});
});
// --- Channel properties ---
describe('channel properties', () => {
it('has name "whatsapp"', () => {
const channel = new WhatsAppChannel(createTestOpts());
expect(channel.name).toBe('whatsapp');
});
it('does not expose prefixAssistantName (prefix handled internally)', () => {
const channel = new WhatsAppChannel(createTestOpts());
expect('prefixAssistantName' in channel).toBe(false);
});
});
});

View File

@@ -0,0 +1,26 @@
# Intent: src/channels/whatsapp.test.ts modifications
## What changed
Added mock for the transcription module and 3 new test cases for voice message handling.
## Key sections
### Mocks (top of file)
- Added: `vi.mock('../transcription.js', ...)` with `isVoiceMessage` and `transcribeAudioMessage` mocks
- Added: `import { transcribeAudioMessage } from '../transcription.js'` for test assertions
### Test cases (inside "message handling" describe block)
- Changed: "handles message with no extractable text (e.g. voice note without caption)" → "transcribes voice messages"
- Now expects `[Voice: Hello this is a voice message]` instead of empty content
- Added: "falls back when transcription returns null" — expects `[Voice Message - transcription unavailable]`
- Added: "falls back when transcription throws" — expects `[Voice Message - transcription failed]`
## Invariants (must-keep)
- All existing test cases for text, extendedTextMessage, imageMessage, videoMessage unchanged
- All connection lifecycle tests unchanged
- All LID translation tests unchanged
- All outgoing queue tests unchanged
- All group metadata sync tests unchanged
- All ownsJid and setTyping tests unchanged
- All existing mocks (config, logger, db, fs, child_process, baileys) unchanged
- Test helpers (createTestOpts, triggerConnection, triggerDisconnect, triggerMessages, connectChannel) unchanged

View File

@@ -0,0 +1,344 @@
import { exec } from 'child_process';
import fs from 'fs';
import path from 'path';
import makeWASocket, {
Browsers,
DisconnectReason,
WASocket,
makeCacheableSignalKeyStore,
useMultiFileAuthState,
} from '@whiskeysockets/baileys';
import { ASSISTANT_HAS_OWN_NUMBER, ASSISTANT_NAME, STORE_DIR } from '../config.js';
import {
getLastGroupSync,
setLastGroupSync,
updateChatName,
} from '../db.js';
import { logger } from '../logger.js';
import { isVoiceMessage, transcribeAudioMessage } from '../transcription.js';
import { Channel, OnInboundMessage, OnChatMetadata, RegisteredGroup } from '../types.js';
const GROUP_SYNC_INTERVAL_MS = 24 * 60 * 60 * 1000; // 24 hours
export interface WhatsAppChannelOpts {
onMessage: OnInboundMessage;
onChatMetadata: OnChatMetadata;
registeredGroups: () => Record<string, RegisteredGroup>;
}
export class WhatsAppChannel implements Channel {
name = 'whatsapp';
private sock!: WASocket;
private connected = false;
private lidToPhoneMap: Record<string, string> = {};
private outgoingQueue: Array<{ jid: string; text: string }> = [];
private flushing = false;
private groupSyncTimerStarted = false;
private opts: WhatsAppChannelOpts;
constructor(opts: WhatsAppChannelOpts) {
this.opts = opts;
}
async connect(): Promise<void> {
return new Promise<void>((resolve, reject) => {
this.connectInternal(resolve).catch(reject);
});
}
private async connectInternal(onFirstOpen?: () => void): Promise<void> {
const authDir = path.join(STORE_DIR, 'auth');
fs.mkdirSync(authDir, { recursive: true });
const { state, saveCreds } = await useMultiFileAuthState(authDir);
this.sock = makeWASocket({
auth: {
creds: state.creds,
keys: makeCacheableSignalKeyStore(state.keys, logger),
},
printQRInTerminal: false,
logger,
browser: Browsers.macOS('Chrome'),
});
this.sock.ev.on('connection.update', (update) => {
const { connection, lastDisconnect, qr } = update;
if (qr) {
const msg =
'WhatsApp authentication required. Run /setup in Claude Code.';
logger.error(msg);
exec(
`osascript -e 'display notification "${msg}" with title "NanoClaw" sound name "Basso"'`,
);
setTimeout(() => process.exit(1), 1000);
}
if (connection === 'close') {
this.connected = false;
const reason = (lastDisconnect?.error as any)?.output?.statusCode;
const shouldReconnect = reason !== DisconnectReason.loggedOut;
logger.info({ reason, shouldReconnect, queuedMessages: this.outgoingQueue.length }, 'Connection closed');
if (shouldReconnect) {
logger.info('Reconnecting...');
this.connectInternal().catch((err) => {
logger.error({ err }, 'Failed to reconnect, retrying in 5s');
setTimeout(() => {
this.connectInternal().catch((err2) => {
logger.error({ err: err2 }, 'Reconnection retry failed');
});
}, 5000);
});
} else {
logger.info('Logged out. Run /setup to re-authenticate.');
process.exit(0);
}
} else if (connection === 'open') {
this.connected = true;
logger.info('Connected to WhatsApp');
// Announce availability so WhatsApp relays subsequent presence updates (typing indicators)
this.sock.sendPresenceUpdate('available').catch(() => {});
// Build LID to phone mapping from auth state for self-chat translation
if (this.sock.user) {
const phoneUser = this.sock.user.id.split(':')[0];
const lidUser = this.sock.user.lid?.split(':')[0];
if (lidUser && phoneUser) {
this.lidToPhoneMap[lidUser] = `${phoneUser}@s.whatsapp.net`;
logger.debug({ lidUser, phoneUser }, 'LID to phone mapping set');
}
}
// Flush any messages queued while disconnected
this.flushOutgoingQueue().catch((err) =>
logger.error({ err }, 'Failed to flush outgoing queue'),
);
// Sync group metadata on startup (respects 24h cache)
this.syncGroupMetadata().catch((err) =>
logger.error({ err }, 'Initial group sync failed'),
);
// Set up daily sync timer (only once)
if (!this.groupSyncTimerStarted) {
this.groupSyncTimerStarted = true;
setInterval(() => {
this.syncGroupMetadata().catch((err) =>
logger.error({ err }, 'Periodic group sync failed'),
);
}, GROUP_SYNC_INTERVAL_MS);
}
// Signal first connection to caller
if (onFirstOpen) {
onFirstOpen();
onFirstOpen = undefined;
}
}
});
this.sock.ev.on('creds.update', saveCreds);
this.sock.ev.on('messages.upsert', async ({ messages }) => {
for (const msg of messages) {
if (!msg.message) continue;
const rawJid = msg.key.remoteJid;
if (!rawJid || rawJid === 'status@broadcast') continue;
// Translate LID JID to phone JID if applicable
const chatJid = await this.translateJid(rawJid);
const timestamp = new Date(
Number(msg.messageTimestamp) * 1000,
).toISOString();
// Always notify about chat metadata for group discovery
const isGroup = chatJid.endsWith('@g.us');
this.opts.onChatMetadata(chatJid, timestamp, undefined, 'whatsapp', isGroup);
// Only deliver full message for registered groups
const groups = this.opts.registeredGroups();
if (groups[chatJid]) {
const content =
msg.message?.conversation ||
msg.message?.extendedTextMessage?.text ||
msg.message?.imageMessage?.caption ||
msg.message?.videoMessage?.caption ||
'';
const sender = msg.key.participant || msg.key.remoteJid || '';
const senderName = msg.pushName || sender.split('@')[0];
const fromMe = msg.key.fromMe || false;
// Detect bot messages: with own number, fromMe is reliable
// since only the bot sends from that number.
// With shared number, bot messages carry the assistant name prefix
// (even in DMs/self-chat) so we check for that.
const isBotMessage = ASSISTANT_HAS_OWN_NUMBER
? fromMe
: content.startsWith(`${ASSISTANT_NAME}:`);
// Transcribe voice messages before storing
let finalContent = content;
if (isVoiceMessage(msg)) {
try {
const transcript = await transcribeAudioMessage(msg, this.sock);
if (transcript) {
finalContent = `[Voice: ${transcript}]`;
logger.info({ chatJid, length: transcript.length }, 'Transcribed voice message');
} else {
finalContent = '[Voice Message - transcription unavailable]';
}
} catch (err) {
logger.error({ err }, 'Voice transcription error');
finalContent = '[Voice Message - transcription failed]';
}
}
this.opts.onMessage(chatJid, {
id: msg.key.id || '',
chat_jid: chatJid,
sender,
sender_name: senderName,
content: finalContent,
timestamp,
is_from_me: fromMe,
is_bot_message: isBotMessage,
});
}
}
});
}
async sendMessage(jid: string, text: string): Promise<void> {
// Prefix bot messages with assistant name so users know who's speaking.
// On a shared number, prefix is also needed in DMs (including self-chat)
// to distinguish bot output from user messages.
// Skip only when the assistant has its own dedicated phone number.
const prefixed = ASSISTANT_HAS_OWN_NUMBER
? text
: `${ASSISTANT_NAME}: ${text}`;
if (!this.connected) {
this.outgoingQueue.push({ jid, text: prefixed });
logger.info({ jid, length: prefixed.length, queueSize: this.outgoingQueue.length }, 'WA disconnected, message queued');
return;
}
try {
await this.sock.sendMessage(jid, { text: prefixed });
logger.info({ jid, length: prefixed.length }, 'Message sent');
} catch (err) {
// If send fails, queue it for retry on reconnect
this.outgoingQueue.push({ jid, text: prefixed });
logger.warn({ jid, err, queueSize: this.outgoingQueue.length }, 'Failed to send, message queued');
}
}
isConnected(): boolean {
return this.connected;
}
ownsJid(jid: string): boolean {
return jid.endsWith('@g.us') || jid.endsWith('@s.whatsapp.net');
}
async disconnect(): Promise<void> {
this.connected = false;
this.sock?.end(undefined);
}
async setTyping(jid: string, isTyping: boolean): Promise<void> {
try {
const status = isTyping ? 'composing' : 'paused';
logger.debug({ jid, status }, 'Sending presence update');
await this.sock.sendPresenceUpdate(status, jid);
} catch (err) {
logger.debug({ jid, err }, 'Failed to update typing status');
}
}
/**
* Sync group metadata from WhatsApp.
* Fetches all participating groups and stores their names in the database.
* Called on startup, daily, and on-demand via IPC.
*/
async syncGroupMetadata(force = false): Promise<void> {
if (!force) {
const lastSync = getLastGroupSync();
if (lastSync) {
const lastSyncTime = new Date(lastSync).getTime();
if (Date.now() - lastSyncTime < GROUP_SYNC_INTERVAL_MS) {
logger.debug({ lastSync }, 'Skipping group sync - synced recently');
return;
}
}
}
try {
logger.info('Syncing group metadata from WhatsApp...');
const groups = await this.sock.groupFetchAllParticipating();
let count = 0;
for (const [jid, metadata] of Object.entries(groups)) {
if (metadata.subject) {
updateChatName(jid, metadata.subject);
count++;
}
}
setLastGroupSync();
logger.info({ count }, 'Group metadata synced');
} catch (err) {
logger.error({ err }, 'Failed to sync group metadata');
}
}
private async translateJid(jid: string): Promise<string> {
if (!jid.endsWith('@lid')) return jid;
const lidUser = jid.split('@')[0].split(':')[0];
// Check local cache first
const cached = this.lidToPhoneMap[lidUser];
if (cached) {
logger.debug({ lidJid: jid, phoneJid: cached }, 'Translated LID to phone JID (cached)');
return cached;
}
// Query Baileys' signal repository for the mapping
try {
const pn = await this.sock.signalRepository?.lidMapping?.getPNForLID(jid);
if (pn) {
const phoneJid = `${pn.split('@')[0].split(':')[0]}@s.whatsapp.net`;
this.lidToPhoneMap[lidUser] = phoneJid;
logger.info({ lidJid: jid, phoneJid }, 'Translated LID to phone JID (signalRepository)');
return phoneJid;
}
} catch (err) {
logger.debug({ err, jid }, 'Failed to resolve LID via signalRepository');
}
return jid;
}
private async flushOutgoingQueue(): Promise<void> {
if (this.flushing || this.outgoingQueue.length === 0) return;
this.flushing = true;
try {
logger.info({ count: this.outgoingQueue.length }, 'Flushing outgoing message queue');
while (this.outgoingQueue.length > 0) {
const item = this.outgoingQueue.shift()!;
// Send directly — queued items are already prefixed by sendMessage
await this.sock.sendMessage(item.jid, { text: item.text });
logger.info({ jid: item.jid, length: item.text.length }, 'Queued message sent');
}
} finally {
this.flushing = false;
}
}
}

View File

@@ -0,0 +1,27 @@
# Intent: src/channels/whatsapp.ts modifications
## What changed
Added voice message transcription support. When a WhatsApp voice note (PTT audio) arrives, it is downloaded and transcribed via OpenAI Whisper before being stored as message content.
## Key sections
### Imports (top of file)
- Added: `isVoiceMessage`, `transcribeAudioMessage` from `../transcription.js`
### messages.upsert handler (inside connectInternal)
- Added: `let finalContent = content` variable to allow voice transcription to override text content
- Added: `isVoiceMessage(msg)` check after content extraction
- Added: try/catch block calling `transcribeAudioMessage(msg, this.sock)`
- Success: `finalContent = '[Voice: <transcript>]'`
- Null result: `finalContent = '[Voice Message - transcription unavailable]'`
- Error: `finalContent = '[Voice Message - transcription failed]'`
- Changed: `this.opts.onMessage()` call uses `finalContent` instead of `content`
## Invariants (must-keep)
- All existing message handling (conversation, extendedTextMessage, imageMessage, videoMessage) unchanged
- Connection lifecycle (connect, reconnect, disconnect) unchanged
- LID translation logic unchanged
- Outgoing message queue unchanged
- Group metadata sync unchanged
- sendMessage prefix logic unchanged
- setTyping, ownsJid, isConnected — all unchanged