feat: add voice transcription as nanorepo skill (#326)
Add voice transcription skill package at .claude/skills/add-voice-transcription/ so it can be applied via the skills engine. Skill adds src/transcription.ts (OpenAI Whisper), modifies whatsapp.ts to detect/transcribe voice notes, and includes intent files, 3 test cases, and 8 skill validation tests. Also fixes skills engine runNpmInstall() to use --legacy-peer-deps, needed for any skill adding deps with Zod v3 peer requirements. Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,98 @@
|
||||
import { downloadMediaMessage } from '@whiskeysockets/baileys';
|
||||
import { WAMessage, WASocket } from '@whiskeysockets/baileys';
|
||||
|
||||
import { readEnvFile } from './env.js';
|
||||
|
||||
interface TranscriptionConfig {
|
||||
model: string;
|
||||
enabled: boolean;
|
||||
fallbackMessage: string;
|
||||
}
|
||||
|
||||
const DEFAULT_CONFIG: TranscriptionConfig = {
|
||||
model: 'whisper-1',
|
||||
enabled: true,
|
||||
fallbackMessage: '[Voice Message - transcription unavailable]',
|
||||
};
|
||||
|
||||
async function transcribeWithOpenAI(
|
||||
audioBuffer: Buffer,
|
||||
config: TranscriptionConfig,
|
||||
): Promise<string | null> {
|
||||
const env = readEnvFile(['OPENAI_API_KEY']);
|
||||
const apiKey = env.OPENAI_API_KEY;
|
||||
|
||||
if (!apiKey) {
|
||||
console.warn('OPENAI_API_KEY not set in .env');
|
||||
return null;
|
||||
}
|
||||
|
||||
try {
|
||||
const openaiModule = await import('openai');
|
||||
const OpenAI = openaiModule.default;
|
||||
const toFile = openaiModule.toFile;
|
||||
|
||||
const openai = new OpenAI({ apiKey });
|
||||
|
||||
const file = await toFile(audioBuffer, 'voice.ogg', {
|
||||
type: 'audio/ogg',
|
||||
});
|
||||
|
||||
const transcription = await openai.audio.transcriptions.create({
|
||||
file: file,
|
||||
model: config.model,
|
||||
response_format: 'text',
|
||||
});
|
||||
|
||||
// When response_format is 'text', the API returns a plain string
|
||||
return transcription as unknown as string;
|
||||
} catch (err) {
|
||||
console.error('OpenAI transcription failed:', err);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
export async function transcribeAudioMessage(
|
||||
msg: WAMessage,
|
||||
sock: WASocket,
|
||||
): Promise<string | null> {
|
||||
const config = DEFAULT_CONFIG;
|
||||
|
||||
if (!config.enabled) {
|
||||
return config.fallbackMessage;
|
||||
}
|
||||
|
||||
try {
|
||||
const buffer = (await downloadMediaMessage(
|
||||
msg,
|
||||
'buffer',
|
||||
{},
|
||||
{
|
||||
logger: console as any,
|
||||
reuploadRequest: sock.updateMediaMessage,
|
||||
},
|
||||
)) as Buffer;
|
||||
|
||||
if (!buffer || buffer.length === 0) {
|
||||
console.error('Failed to download audio message');
|
||||
return config.fallbackMessage;
|
||||
}
|
||||
|
||||
console.log(`Downloaded audio message: ${buffer.length} bytes`);
|
||||
|
||||
const transcript = await transcribeWithOpenAI(buffer, config);
|
||||
|
||||
if (!transcript) {
|
||||
return config.fallbackMessage;
|
||||
}
|
||||
|
||||
return transcript.trim();
|
||||
} catch (err) {
|
||||
console.error('Transcription error:', err);
|
||||
return config.fallbackMessage;
|
||||
}
|
||||
}
|
||||
|
||||
export function isVoiceMessage(msg: WAMessage): boolean {
|
||||
return msg.message?.audioMessage?.ptt === true;
|
||||
}
|
||||
Reference in New Issue
Block a user