feat(skills): add image vision skill for WhatsApp (#770)

* chore: prepare image-vision skill for template regeneration

- Delete stale modify/*.ts templates (built against 1.1.2)
- Update core_version to 1.2.6
- Strip fork-specific details from intent docs

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* feat(skills): regenerate image-vision modify/ templates against upstream

Templates regenerated against upstream 1.2.6:
- src/container-runner.ts: imageAttachments field in ContainerInput
- src/index.ts: parseImageReferences + threading to runAgent
- src/channels/whatsapp.ts: downloadMediaMessage + image handling block
- src/channels/whatsapp.test.ts: image mocks + 4 test cases
- container/agent-runner/src/index.ts: ContentBlock types, pushMultimodal, image loading

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* test: update image-vision tests for upstream templates

- Relax downloadMediaMessage import pattern check (multi-line import)
- Remove check for [Image - processing failed] (not in upstream template)
- Add vitest.skills.config.ts for skill package test runs

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* chore: update image-vision core_version to 1.2.8

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

---------

Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
glifocat
2026-03-06 17:52:59 +01:00
committed by GitHub
parent be1991108b
commit af937d6453
15 changed files with 4100 additions and 0 deletions

View File

@@ -0,0 +1,89 @@
import { describe, it, expect, vi, beforeEach } from 'vitest';
import fs from 'fs';
// Mock sharp
vi.mock('sharp', () => {
const mockSharp = vi.fn(() => ({
resize: vi.fn().mockReturnThis(),
jpeg: vi.fn().mockReturnThis(),
toBuffer: vi.fn().mockResolvedValue(Buffer.from('resized-image-data')),
}));
return { default: mockSharp };
});
vi.mock('fs');
import { processImage, parseImageReferences, isImageMessage } from './image.js';
describe('image processing', () => {
beforeEach(() => {
vi.clearAllMocks();
vi.mocked(fs.mkdirSync).mockReturnValue(undefined);
vi.mocked(fs.writeFileSync).mockReturnValue(undefined);
});
describe('isImageMessage', () => {
it('returns true for image messages', () => {
const msg = { message: { imageMessage: { mimetype: 'image/jpeg' } } };
expect(isImageMessage(msg as any)).toBe(true);
});
it('returns false for non-image messages', () => {
const msg = { message: { conversation: 'hello' } };
expect(isImageMessage(msg as any)).toBe(false);
});
it('returns false for null message', () => {
const msg = { message: null };
expect(isImageMessage(msg as any)).toBe(false);
});
});
describe('processImage', () => {
it('resizes and saves image, returns content string', async () => {
const buffer = Buffer.from('raw-image-data');
const result = await processImage(buffer, '/tmp/groups/test', 'Check this out');
expect(result).not.toBeNull();
expect(result!.content).toMatch(/^\[Image: attachments\/img-\d+-[a-z0-9]+\.jpg\] Check this out$/);
expect(result!.relativePath).toMatch(/^attachments\/img-\d+-[a-z0-9]+\.jpg$/);
expect(fs.mkdirSync).toHaveBeenCalled();
expect(fs.writeFileSync).toHaveBeenCalled();
});
it('returns content without caption when none provided', async () => {
const buffer = Buffer.from('raw-image-data');
const result = await processImage(buffer, '/tmp/groups/test', '');
expect(result).not.toBeNull();
expect(result!.content).toMatch(/^\[Image: attachments\/img-\d+-[a-z0-9]+\.jpg\]$/);
});
it('returns null on empty buffer', async () => {
const result = await processImage(Buffer.alloc(0), '/tmp/groups/test', '');
expect(result).toBeNull();
});
});
describe('parseImageReferences', () => {
it('extracts image paths from message content', () => {
const messages = [
{ content: '[Image: attachments/img-123.jpg] hello' },
{ content: 'plain text' },
{ content: '[Image: attachments/img-456.jpg]' },
];
const refs = parseImageReferences(messages as any);
expect(refs).toEqual([
{ relativePath: 'attachments/img-123.jpg', mediaType: 'image/jpeg' },
{ relativePath: 'attachments/img-456.jpg', mediaType: 'image/jpeg' },
]);
});
it('returns empty array when no images', () => {
const messages = [{ content: 'just text' }];
expect(parseImageReferences(messages as any)).toEqual([]);
});
});
});

View File

@@ -0,0 +1,63 @@
import fs from 'fs';
import path from 'path';
import sharp from 'sharp';
import type { WAMessage } from '@whiskeysockets/baileys';
const MAX_DIMENSION = 1024;
const IMAGE_REF_PATTERN = /\[Image: (attachments\/[^\]]+)\]/g;
export interface ProcessedImage {
content: string;
relativePath: string;
}
export interface ImageAttachment {
relativePath: string;
mediaType: string;
}
export function isImageMessage(msg: WAMessage): boolean {
return !!msg.message?.imageMessage;
}
export async function processImage(
buffer: Buffer,
groupDir: string,
caption: string,
): Promise<ProcessedImage | null> {
if (!buffer || buffer.length === 0) return null;
const resized = await sharp(buffer)
.resize(MAX_DIMENSION, MAX_DIMENSION, { fit: 'inside', withoutEnlargement: true })
.jpeg({ quality: 85 })
.toBuffer();
const attachDir = path.join(groupDir, 'attachments');
fs.mkdirSync(attachDir, { recursive: true });
const filename = `img-${Date.now()}-${Math.random().toString(36).slice(2, 6)}.jpg`;
const filePath = path.join(attachDir, filename);
fs.writeFileSync(filePath, resized);
const relativePath = `attachments/${filename}`;
const content = caption
? `[Image: ${relativePath}] ${caption}`
: `[Image: ${relativePath}]`;
return { content, relativePath };
}
export function parseImageReferences(
messages: Array<{ content: string }>,
): ImageAttachment[] {
const refs: ImageAttachment[] = [];
for (const msg of messages) {
let match: RegExpExecArray | null;
IMAGE_REF_PATTERN.lastIndex = 0;
while ((match = IMAGE_REF_PATTERN.exec(msg.content)) !== null) {
// Always JPEG — processImage() normalizes all images to .jpg
refs.push({ relativePath: match[1], mediaType: 'image/jpeg' });
}
}
return refs;
}