fix(db): add LIMIT to unbounded message history queries (#692) (#735)

getNewMessages() and getMessagesSince() loaded all rows after a
checkpoint with no cap, causing growing memory and token costs.
Both queries now use a DESC LIMIT subquery to return only the
most recent N messages, re-sorted chronologically.

Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Gabi Simons
2026-03-06 18:34:55 +02:00
committed by GitHub
parent 8c38a8c7ff
commit 74b02c8715
2 changed files with 82 additions and 14 deletions

View File

@@ -306,24 +306,29 @@ export function getNewMessages(
jids: string[],
lastTimestamp: string,
botPrefix: string,
limit: number = 200,
): { messages: NewMessage[]; newTimestamp: string } {
if (jids.length === 0) return { messages: [], newTimestamp: lastTimestamp };
const placeholders = jids.map(() => '?').join(',');
// Filter bot messages using both the is_bot_message flag AND the content
// prefix as a backstop for messages written before the migration ran.
// Subquery takes the N most recent, outer query re-sorts chronologically.
const sql = `
SELECT id, chat_jid, sender, sender_name, content, timestamp, is_from_me
FROM messages
WHERE timestamp > ? AND chat_jid IN (${placeholders})
AND is_bot_message = 0 AND content NOT LIKE ?
AND content != '' AND content IS NOT NULL
ORDER BY timestamp
SELECT * FROM (
SELECT id, chat_jid, sender, sender_name, content, timestamp, is_from_me
FROM messages
WHERE timestamp > ? AND chat_jid IN (${placeholders})
AND is_bot_message = 0 AND content NOT LIKE ?
AND content != '' AND content IS NOT NULL
ORDER BY timestamp DESC
LIMIT ?
) ORDER BY timestamp
`;
const rows = db
.prepare(sql)
.all(lastTimestamp, ...jids, `${botPrefix}:%`) as NewMessage[];
.all(lastTimestamp, ...jids, `${botPrefix}:%`, limit) as NewMessage[];
let newTimestamp = lastTimestamp;
for (const row of rows) {
@@ -337,20 +342,25 @@ export function getMessagesSince(
chatJid: string,
sinceTimestamp: string,
botPrefix: string,
limit: number = 200,
): NewMessage[] {
// Filter bot messages using both the is_bot_message flag AND the content
// prefix as a backstop for messages written before the migration ran.
// Subquery takes the N most recent, outer query re-sorts chronologically.
const sql = `
SELECT id, chat_jid, sender, sender_name, content, timestamp, is_from_me
FROM messages
WHERE chat_jid = ? AND timestamp > ?
AND is_bot_message = 0 AND content NOT LIKE ?
AND content != '' AND content IS NOT NULL
ORDER BY timestamp
SELECT * FROM (
SELECT id, chat_jid, sender, sender_name, content, timestamp, is_from_me
FROM messages
WHERE chat_jid = ? AND timestamp > ?
AND is_bot_message = 0 AND content NOT LIKE ?
AND content != '' AND content IS NOT NULL
ORDER BY timestamp DESC
LIMIT ?
) ORDER BY timestamp
`;
return db
.prepare(sql)
.all(chatJid, sinceTimestamp, `${botPrefix}:%`) as NewMessage[];
.all(chatJid, sinceTimestamp, `${botPrefix}:%`, limit) as NewMessage[];
}
export function createTask(