import { downloadMediaMessage } from '@whiskeysockets/baileys'; import { WAMessage, WASocket } from '@whiskeysockets/baileys'; import { readEnvFile } from './env.js'; interface TranscriptionConfig { model: string; enabled: boolean; fallbackMessage: string; } const DEFAULT_CONFIG: TranscriptionConfig = { model: 'whisper-1', enabled: true, fallbackMessage: '[Voice Message - transcription unavailable]', }; async function transcribeWithOpenAI( audioBuffer: Buffer, config: TranscriptionConfig, ): Promise { const env = readEnvFile(['OPENAI_API_KEY']); const apiKey = env.OPENAI_API_KEY; if (!apiKey) { console.warn('OPENAI_API_KEY not set in .env'); return null; } try { const openaiModule = await import('openai'); const OpenAI = openaiModule.default; const toFile = openaiModule.toFile; const openai = new OpenAI({ apiKey }); const file = await toFile(audioBuffer, 'voice.ogg', { type: 'audio/ogg', }); const transcription = await openai.audio.transcriptions.create({ file: file, model: config.model, response_format: 'text', }); // When response_format is 'text', the API returns a plain string return transcription as unknown as string; } catch (err) { console.error('OpenAI transcription failed:', err); return null; } } export async function transcribeAudioMessage( msg: WAMessage, sock: WASocket, ): Promise { const config = DEFAULT_CONFIG; if (!config.enabled) { return config.fallbackMessage; } try { const buffer = (await downloadMediaMessage( msg, 'buffer', {}, { logger: console as any, reuploadRequest: sock.updateMediaMessage, }, )) as Buffer; if (!buffer || buffer.length === 0) { console.error('Failed to download audio message'); return config.fallbackMessage; } console.log(`Downloaded audio message: ${buffer.length} bytes`); const transcript = await transcribeWithOpenAI(buffer, config); if (!transcript) { return config.fallbackMessage; } return transcript.trim(); } catch (err) { console.error('Transcription error:', err); return config.fallbackMessage; } } export function isVoiceMessage(msg: WAMessage): boolean { return msg.message?.audioMessage?.ptt === true; }