fix(db): add LIMIT to unbounded message history queries (#692) (#735)

getNewMessages() and getMessagesSince() loaded all rows after a
checkpoint with no cap, causing growing memory and token costs.
Both queries now use a DESC LIMIT subquery to return only the
most recent N messages, re-sorted chronologically.

Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Gabi Simons
2026-03-06 18:34:55 +02:00
committed by GitHub
parent 8c38a8c7ff
commit 74b02c8715
2 changed files with 82 additions and 14 deletions

View File

@@ -391,6 +391,64 @@ describe('task CRUD', () => {
});
});
// --- LIMIT behavior ---
describe('message query LIMIT', () => {
beforeEach(() => {
storeChatMetadata('group@g.us', '2024-01-01T00:00:00.000Z');
for (let i = 1; i <= 10; i++) {
store({
id: `lim-${i}`,
chat_jid: 'group@g.us',
sender: 'user@s.whatsapp.net',
sender_name: 'User',
content: `message ${i}`,
timestamp: `2024-01-01T00:00:${String(i).padStart(2, '0')}.000Z`,
});
}
});
it('getNewMessages caps to limit and returns most recent in chronological order', () => {
const { messages, newTimestamp } = getNewMessages(
['group@g.us'],
'2024-01-01T00:00:00.000Z',
'Andy',
3,
);
expect(messages).toHaveLength(3);
expect(messages[0].content).toBe('message 8');
expect(messages[2].content).toBe('message 10');
// Chronological order preserved
expect(messages[1].timestamp > messages[0].timestamp).toBe(true);
// newTimestamp reflects latest returned row
expect(newTimestamp).toBe('2024-01-01T00:00:10.000Z');
});
it('getMessagesSince caps to limit and returns most recent in chronological order', () => {
const messages = getMessagesSince(
'group@g.us',
'2024-01-01T00:00:00.000Z',
'Andy',
3,
);
expect(messages).toHaveLength(3);
expect(messages[0].content).toBe('message 8');
expect(messages[2].content).toBe('message 10');
expect(messages[1].timestamp > messages[0].timestamp).toBe(true);
});
it('returns all messages when count is under the limit', () => {
const { messages } = getNewMessages(
['group@g.us'],
'2024-01-01T00:00:00.000Z',
'Andy',
50,
);
expect(messages).toHaveLength(10);
});
});
// --- RegisteredGroup isMain round-trip ---
describe('registered group isMain', () => {

View File

@@ -306,24 +306,29 @@ export function getNewMessages(
jids: string[],
lastTimestamp: string,
botPrefix: string,
limit: number = 200,
): { messages: NewMessage[]; newTimestamp: string } {
if (jids.length === 0) return { messages: [], newTimestamp: lastTimestamp };
const placeholders = jids.map(() => '?').join(',');
// Filter bot messages using both the is_bot_message flag AND the content
// prefix as a backstop for messages written before the migration ran.
// Subquery takes the N most recent, outer query re-sorts chronologically.
const sql = `
SELECT id, chat_jid, sender, sender_name, content, timestamp, is_from_me
FROM messages
WHERE timestamp > ? AND chat_jid IN (${placeholders})
AND is_bot_message = 0 AND content NOT LIKE ?
AND content != '' AND content IS NOT NULL
ORDER BY timestamp
SELECT * FROM (
SELECT id, chat_jid, sender, sender_name, content, timestamp, is_from_me
FROM messages
WHERE timestamp > ? AND chat_jid IN (${placeholders})
AND is_bot_message = 0 AND content NOT LIKE ?
AND content != '' AND content IS NOT NULL
ORDER BY timestamp DESC
LIMIT ?
) ORDER BY timestamp
`;
const rows = db
.prepare(sql)
.all(lastTimestamp, ...jids, `${botPrefix}:%`) as NewMessage[];
.all(lastTimestamp, ...jids, `${botPrefix}:%`, limit) as NewMessage[];
let newTimestamp = lastTimestamp;
for (const row of rows) {
@@ -337,20 +342,25 @@ export function getMessagesSince(
chatJid: string,
sinceTimestamp: string,
botPrefix: string,
limit: number = 200,
): NewMessage[] {
// Filter bot messages using both the is_bot_message flag AND the content
// prefix as a backstop for messages written before the migration ran.
// Subquery takes the N most recent, outer query re-sorts chronologically.
const sql = `
SELECT id, chat_jid, sender, sender_name, content, timestamp, is_from_me
FROM messages
WHERE chat_jid = ? AND timestamp > ?
AND is_bot_message = 0 AND content NOT LIKE ?
AND content != '' AND content IS NOT NULL
ORDER BY timestamp
SELECT * FROM (
SELECT id, chat_jid, sender, sender_name, content, timestamp, is_from_me
FROM messages
WHERE chat_jid = ? AND timestamp > ?
AND is_bot_message = 0 AND content NOT LIKE ?
AND content != '' AND content IS NOT NULL
ORDER BY timestamp DESC
LIMIT ?
) ORDER BY timestamp
`;
return db
.prepare(sql)
.all(chatJid, sinceTimestamp, `${botPrefix}:%`) as NewMessage[];
.all(chatJid, sinceTimestamp, `${botPrefix}:%`, limit) as NewMessage[];
}
export function createTask(