Add voice transcription skill package at .claude/skills/add-voice-transcription/ so it can be applied via the skills engine. Skill adds src/transcription.ts (OpenAI Whisper), modifies whatsapp.ts to detect/transcribe voice notes, and includes intent files, 3 test cases, and 8 skill validation tests. Also fixes skills engine runNpmInstall() to use --legacy-peer-deps, needed for any skill adding deps with Zod v3 peer requirements. Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
99 lines
2.3 KiB
TypeScript
99 lines
2.3 KiB
TypeScript
import { downloadMediaMessage } from '@whiskeysockets/baileys';
|
|
import { WAMessage, WASocket } from '@whiskeysockets/baileys';
|
|
|
|
import { readEnvFile } from './env.js';
|
|
|
|
interface TranscriptionConfig {
|
|
model: string;
|
|
enabled: boolean;
|
|
fallbackMessage: string;
|
|
}
|
|
|
|
const DEFAULT_CONFIG: TranscriptionConfig = {
|
|
model: 'whisper-1',
|
|
enabled: true,
|
|
fallbackMessage: '[Voice Message - transcription unavailable]',
|
|
};
|
|
|
|
async function transcribeWithOpenAI(
|
|
audioBuffer: Buffer,
|
|
config: TranscriptionConfig,
|
|
): Promise<string | null> {
|
|
const env = readEnvFile(['OPENAI_API_KEY']);
|
|
const apiKey = env.OPENAI_API_KEY;
|
|
|
|
if (!apiKey) {
|
|
console.warn('OPENAI_API_KEY not set in .env');
|
|
return null;
|
|
}
|
|
|
|
try {
|
|
const openaiModule = await import('openai');
|
|
const OpenAI = openaiModule.default;
|
|
const toFile = openaiModule.toFile;
|
|
|
|
const openai = new OpenAI({ apiKey });
|
|
|
|
const file = await toFile(audioBuffer, 'voice.ogg', {
|
|
type: 'audio/ogg',
|
|
});
|
|
|
|
const transcription = await openai.audio.transcriptions.create({
|
|
file: file,
|
|
model: config.model,
|
|
response_format: 'text',
|
|
});
|
|
|
|
// When response_format is 'text', the API returns a plain string
|
|
return transcription as unknown as string;
|
|
} catch (err) {
|
|
console.error('OpenAI transcription failed:', err);
|
|
return null;
|
|
}
|
|
}
|
|
|
|
export async function transcribeAudioMessage(
|
|
msg: WAMessage,
|
|
sock: WASocket,
|
|
): Promise<string | null> {
|
|
const config = DEFAULT_CONFIG;
|
|
|
|
if (!config.enabled) {
|
|
return config.fallbackMessage;
|
|
}
|
|
|
|
try {
|
|
const buffer = (await downloadMediaMessage(
|
|
msg,
|
|
'buffer',
|
|
{},
|
|
{
|
|
logger: console as any,
|
|
reuploadRequest: sock.updateMediaMessage,
|
|
},
|
|
)) as Buffer;
|
|
|
|
if (!buffer || buffer.length === 0) {
|
|
console.error('Failed to download audio message');
|
|
return config.fallbackMessage;
|
|
}
|
|
|
|
console.log(`Downloaded audio message: ${buffer.length} bytes`);
|
|
|
|
const transcript = await transcribeWithOpenAI(buffer, config);
|
|
|
|
if (!transcript) {
|
|
return config.fallbackMessage;
|
|
}
|
|
|
|
return transcript.trim();
|
|
} catch (err) {
|
|
console.error('Transcription error:', err);
|
|
return config.fallbackMessage;
|
|
}
|
|
}
|
|
|
|
export function isVoiceMessage(msg: WAMessage): boolean {
|
|
return msg.message?.audioMessage?.ptt === true;
|
|
}
|