stock-bot/libs/queue/src/dlq-handler.ts
2025-06-19 07:20:14 -04:00

258 lines
No EOL
6.7 KiB
TypeScript

import { Queue, type Job } from 'bullmq';
import { getLogger } from '@stock-bot/logger';
import type { JobData } from './types';
import { getRedisConnection } from './utils';
const logger = getLogger('dlq-handler');
export interface DLQConfig {
maxRetries?: number;
retryDelay?: number;
alertThreshold?: number;
cleanupAge?: number; // hours
}
export class DeadLetterQueueHandler {
private dlq: Queue;
private config: Required<DLQConfig>;
private failureCount = new Map<string, number>();
constructor(
private mainQueue: Queue,
private connection: any,
config: DLQConfig = {}
) {
this.config = {
maxRetries: config.maxRetries ?? 3,
retryDelay: config.retryDelay ?? 60000, // 1 minute
alertThreshold: config.alertThreshold ?? 100,
cleanupAge: config.cleanupAge ?? 168, // 7 days
};
// Create DLQ with same name but -dlq suffix
const dlqName = `${mainQueue.name}-dlq`;
this.dlq = new Queue(dlqName, { connection: getRedisConnection(connection) });
}
/**
* Process a failed job - either retry or move to DLQ
*/
async handleFailedJob(job: Job, error: Error): Promise<void> {
const jobKey = `${job.name}:${job.id}`;
const currentFailures = (this.failureCount.get(jobKey) || 0) + 1;
this.failureCount.set(jobKey, currentFailures);
logger.warn('Job failed', {
jobId: job.id,
jobName: job.name,
attempt: job.attemptsMade,
maxAttempts: job.opts.attempts,
error: error.message,
failureCount: currentFailures,
});
// Check if job should be moved to DLQ
if (job.attemptsMade >= (job.opts.attempts || this.config.maxRetries)) {
await this.moveToDeadLetterQueue(job, error);
this.failureCount.delete(jobKey);
}
}
/**
* Move job to dead letter queue
*/
private async moveToDeadLetterQueue(job: Job, error: Error): Promise<void> {
try {
const dlqData = {
originalJob: {
id: job.id,
name: job.name,
data: job.data,
opts: job.opts,
attemptsMade: job.attemptsMade,
failedReason: job.failedReason,
processedOn: job.processedOn,
timestamp: job.timestamp,
},
error: {
message: error.message,
stack: error.stack,
name: error.name,
},
movedToDLQAt: new Date().toISOString(),
};
await this.dlq.add('failed-job', dlqData, {
removeOnComplete: false,
removeOnFail: false,
});
logger.error('Job moved to DLQ', {
jobId: job.id,
jobName: job.name,
error: error.message,
});
// Check if we need to alert
await this.checkAlertThreshold();
} catch (dlqError) {
logger.error('Failed to move job to DLQ', {
jobId: job.id,
error: dlqError,
});
}
}
/**
* Retry jobs from DLQ
*/
async retryDLQJobs(limit = 10): Promise<number> {
const jobs = await this.dlq.getCompleted(0, limit);
let retriedCount = 0;
for (const dlqJob of jobs) {
try {
const { originalJob } = dlqJob.data;
// Re-add to main queue with delay
await this.mainQueue.add(
originalJob.name,
originalJob.data,
{
...originalJob.opts,
delay: this.config.retryDelay,
attempts: this.config.maxRetries,
}
);
// Remove from DLQ
await dlqJob.remove();
retriedCount++;
logger.info('Job retried from DLQ', {
originalJobId: originalJob.id,
jobName: originalJob.name,
});
} catch (error) {
logger.error('Failed to retry DLQ job', {
dlqJobId: dlqJob.id,
error,
});
}
}
return retriedCount;
}
/**
* Get DLQ statistics
*/
async getStats(): Promise<{
total: number;
recent: number;
byJobName: Record<string, number>;
oldestJob: Date | null;
}> {
const [completed, failed, waiting] = await Promise.all([
this.dlq.getCompleted(),
this.dlq.getFailed(),
this.dlq.getWaiting(),
]);
const allJobs = [...completed, ...failed, ...waiting];
const byJobName: Record<string, number> = {};
let oldestTimestamp: number | null = null;
for (const job of allJobs) {
const jobName = job.data.originalJob?.name || 'unknown';
byJobName[jobName] = (byJobName[jobName] || 0) + 1;
if (!oldestTimestamp || job.timestamp < oldestTimestamp) {
oldestTimestamp = job.timestamp;
}
}
// Count recent jobs (last 24 hours)
const oneDayAgo = Date.now() - 24 * 60 * 60 * 1000;
const recent = allJobs.filter(job => job.timestamp > oneDayAgo).length;
return {
total: allJobs.length,
recent,
byJobName,
oldestJob: oldestTimestamp ? new Date(oldestTimestamp) : null,
};
}
/**
* Clean up old DLQ entries
*/
async cleanup(): Promise<number> {
const ageInMs = this.config.cleanupAge * 60 * 60 * 1000;
const cutoffTime = Date.now() - ageInMs;
const jobs = await this.dlq.getCompleted();
let removedCount = 0;
for (const job of jobs) {
if (job.timestamp < cutoffTime) {
await job.remove();
removedCount++;
}
}
logger.info('DLQ cleanup completed', {
removedCount,
cleanupAge: `${this.config.cleanupAge} hours`,
});
return removedCount;
}
/**
* Check if alert threshold is exceeded
*/
private async checkAlertThreshold(): Promise<void> {
const stats = await this.getStats();
if (stats.total >= this.config.alertThreshold) {
logger.error('DLQ alert threshold exceeded', {
threshold: this.config.alertThreshold,
currentCount: stats.total,
byJobName: stats.byJobName,
});
// In a real implementation, this would trigger alerts
}
}
/**
* Get failed jobs for inspection
*/
async inspectFailedJobs(limit = 10): Promise<Array<{
id: string;
name: string;
data: any;
error: any;
failedAt: string;
attempts: number;
}>> {
const jobs = await this.dlq.getCompleted(0, limit);
return jobs.map(job => ({
id: job.data.originalJob.id,
name: job.data.originalJob.name,
data: job.data.originalJob.data,
error: job.data.error,
failedAt: job.data.movedToDLQAt,
attempts: job.data.originalJob.attemptsMade,
}));
}
/**
* Shutdown DLQ handler
*/
async shutdown(): Promise<void> {
await this.dlq.close();
this.failureCount.clear();
}
}