feat: per-group queue, SQLite state, graceful shutdown
Add per-group container locking with global concurrency limit to prevent concurrent containers for the same group (#89) and cap total containers. Fix message batching bug where lastAgentTimestamp advanced to trigger message instead of latest in batch, causing redundant re-processing. Move router state, sessions, and registered groups from JSON files to SQLite with automatic one-time migration. Add SIGTERM/SIGINT handlers with graceful shutdown (SIGTERM -> grace period -> SIGKILL). Add startup recovery for messages missed during crash. Remove dead code: utils.ts, Session type, isScheduledTask flag, ContainerConfig.env, getTaskRunLogs, GroupQueue.isActive. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
248
src/group-queue.ts
Normal file
248
src/group-queue.ts
Normal file
@@ -0,0 +1,248 @@
|
||||
import { ChildProcess } from 'child_process';
|
||||
|
||||
import { MAX_CONCURRENT_CONTAINERS } from './config.js';
|
||||
import { logger } from './logger.js';
|
||||
|
||||
interface QueuedTask {
|
||||
id: string;
|
||||
groupJid: string;
|
||||
fn: () => Promise<void>;
|
||||
}
|
||||
|
||||
interface GroupState {
|
||||
active: boolean;
|
||||
pendingMessages: boolean;
|
||||
pendingTasks: QueuedTask[];
|
||||
process: ChildProcess | null;
|
||||
}
|
||||
|
||||
export class GroupQueue {
|
||||
private groups = new Map<string, GroupState>();
|
||||
private activeCount = 0;
|
||||
private waitingGroups: string[] = [];
|
||||
private processMessagesFn: ((groupJid: string) => Promise<void>) | null =
|
||||
null;
|
||||
private shuttingDown = false;
|
||||
|
||||
private getGroup(groupJid: string): GroupState {
|
||||
let state = this.groups.get(groupJid);
|
||||
if (!state) {
|
||||
state = {
|
||||
active: false,
|
||||
pendingMessages: false,
|
||||
pendingTasks: [],
|
||||
process: null,
|
||||
};
|
||||
this.groups.set(groupJid, state);
|
||||
}
|
||||
return state;
|
||||
}
|
||||
|
||||
setProcessMessagesFn(fn: (groupJid: string) => Promise<void>): void {
|
||||
this.processMessagesFn = fn;
|
||||
}
|
||||
|
||||
enqueueMessageCheck(groupJid: string): void {
|
||||
if (this.shuttingDown) return;
|
||||
|
||||
const state = this.getGroup(groupJid);
|
||||
|
||||
if (state.active) {
|
||||
state.pendingMessages = true;
|
||||
logger.debug({ groupJid }, 'Container active, message queued');
|
||||
return;
|
||||
}
|
||||
|
||||
if (this.activeCount >= MAX_CONCURRENT_CONTAINERS) {
|
||||
state.pendingMessages = true;
|
||||
if (!this.waitingGroups.includes(groupJid)) {
|
||||
this.waitingGroups.push(groupJid);
|
||||
}
|
||||
logger.debug(
|
||||
{ groupJid, activeCount: this.activeCount },
|
||||
'At concurrency limit, message queued',
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
this.runForGroup(groupJid, 'messages');
|
||||
}
|
||||
|
||||
enqueueTask(groupJid: string, taskId: string, fn: () => Promise<void>): void {
|
||||
if (this.shuttingDown) return;
|
||||
|
||||
const state = this.getGroup(groupJid);
|
||||
|
||||
// Prevent double-queuing of the same task
|
||||
if (state.pendingTasks.some((t) => t.id === taskId)) {
|
||||
logger.debug({ groupJid, taskId }, 'Task already queued, skipping');
|
||||
return;
|
||||
}
|
||||
|
||||
if (state.active) {
|
||||
state.pendingTasks.push({ id: taskId, groupJid, fn });
|
||||
logger.debug({ groupJid, taskId }, 'Container active, task queued');
|
||||
return;
|
||||
}
|
||||
|
||||
if (this.activeCount >= MAX_CONCURRENT_CONTAINERS) {
|
||||
state.pendingTasks.push({ id: taskId, groupJid, fn });
|
||||
if (!this.waitingGroups.includes(groupJid)) {
|
||||
this.waitingGroups.push(groupJid);
|
||||
}
|
||||
logger.debug(
|
||||
{ groupJid, taskId, activeCount: this.activeCount },
|
||||
'At concurrency limit, task queued',
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
// Run immediately
|
||||
this.runTask(groupJid, { id: taskId, groupJid, fn });
|
||||
}
|
||||
|
||||
registerProcess(groupJid: string, proc: ChildProcess): void {
|
||||
const state = this.getGroup(groupJid);
|
||||
state.process = proc;
|
||||
}
|
||||
|
||||
private async runForGroup(
|
||||
groupJid: string,
|
||||
reason: 'messages' | 'drain',
|
||||
): Promise<void> {
|
||||
const state = this.getGroup(groupJid);
|
||||
state.active = true;
|
||||
state.pendingMessages = false;
|
||||
this.activeCount++;
|
||||
|
||||
logger.debug(
|
||||
{ groupJid, reason, activeCount: this.activeCount },
|
||||
'Starting container for group',
|
||||
);
|
||||
|
||||
try {
|
||||
if (this.processMessagesFn) {
|
||||
await this.processMessagesFn(groupJid);
|
||||
}
|
||||
} catch (err) {
|
||||
logger.error({ groupJid, err }, 'Error processing messages for group');
|
||||
} finally {
|
||||
state.active = false;
|
||||
state.process = null;
|
||||
this.activeCount--;
|
||||
this.drainGroup(groupJid);
|
||||
}
|
||||
}
|
||||
|
||||
private async runTask(groupJid: string, task: QueuedTask): Promise<void> {
|
||||
const state = this.getGroup(groupJid);
|
||||
state.active = true;
|
||||
this.activeCount++;
|
||||
|
||||
logger.debug(
|
||||
{ groupJid, taskId: task.id, activeCount: this.activeCount },
|
||||
'Running queued task',
|
||||
);
|
||||
|
||||
try {
|
||||
await task.fn();
|
||||
} catch (err) {
|
||||
logger.error({ groupJid, taskId: task.id, err }, 'Error running task');
|
||||
} finally {
|
||||
state.active = false;
|
||||
state.process = null;
|
||||
this.activeCount--;
|
||||
this.drainGroup(groupJid);
|
||||
}
|
||||
}
|
||||
|
||||
private drainGroup(groupJid: string): void {
|
||||
if (this.shuttingDown) return;
|
||||
|
||||
const state = this.getGroup(groupJid);
|
||||
|
||||
// Tasks first (they won't be re-discovered from SQLite like messages)
|
||||
if (state.pendingTasks.length > 0) {
|
||||
const task = state.pendingTasks.shift()!;
|
||||
this.runTask(groupJid, task);
|
||||
return;
|
||||
}
|
||||
|
||||
// Then pending messages
|
||||
if (state.pendingMessages) {
|
||||
this.runForGroup(groupJid, 'drain');
|
||||
return;
|
||||
}
|
||||
|
||||
// Nothing pending for this group; check if other groups are waiting for a slot
|
||||
this.drainWaiting();
|
||||
}
|
||||
|
||||
private drainWaiting(): void {
|
||||
while (
|
||||
this.waitingGroups.length > 0 &&
|
||||
this.activeCount < MAX_CONCURRENT_CONTAINERS
|
||||
) {
|
||||
const nextJid = this.waitingGroups.shift()!;
|
||||
const state = this.getGroup(nextJid);
|
||||
|
||||
// Prioritize tasks over messages
|
||||
if (state.pendingTasks.length > 0) {
|
||||
const task = state.pendingTasks.shift()!;
|
||||
this.runTask(nextJid, task);
|
||||
} else if (state.pendingMessages) {
|
||||
this.runForGroup(nextJid, 'drain');
|
||||
}
|
||||
// If neither pending, skip this group
|
||||
}
|
||||
}
|
||||
|
||||
async shutdown(gracePeriodMs: number): Promise<void> {
|
||||
this.shuttingDown = true;
|
||||
logger.info(
|
||||
{ activeCount: this.activeCount, gracePeriodMs },
|
||||
'GroupQueue shutting down',
|
||||
);
|
||||
|
||||
// Collect all active processes
|
||||
const activeProcs: Array<{ jid: string; proc: ChildProcess }> = [];
|
||||
for (const [jid, state] of this.groups) {
|
||||
if (state.process && !state.process.killed) {
|
||||
activeProcs.push({ jid, proc: state.process });
|
||||
}
|
||||
}
|
||||
|
||||
if (activeProcs.length === 0) return;
|
||||
|
||||
// Send SIGTERM to all
|
||||
for (const { jid, proc } of activeProcs) {
|
||||
logger.info({ jid, pid: proc.pid }, 'Sending SIGTERM to container');
|
||||
proc.kill('SIGTERM');
|
||||
}
|
||||
|
||||
// Wait for grace period
|
||||
await new Promise<void>((resolve) => {
|
||||
const checkInterval = setInterval(() => {
|
||||
const alive = activeProcs.filter(
|
||||
({ proc }) => !proc.killed && proc.exitCode === null,
|
||||
);
|
||||
if (alive.length === 0) {
|
||||
clearInterval(checkInterval);
|
||||
resolve();
|
||||
}
|
||||
}, 500);
|
||||
|
||||
setTimeout(() => {
|
||||
clearInterval(checkInterval);
|
||||
// SIGKILL survivors
|
||||
for (const { jid, proc } of activeProcs) {
|
||||
if (!proc.killed && proc.exitCode === null) {
|
||||
logger.warn({ jid, pid: proc.pid }, 'Sending SIGKILL to container');
|
||||
proc.kill('SIGKILL');
|
||||
}
|
||||
}
|
||||
resolve();
|
||||
}, gracePeriodMs);
|
||||
});
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user