import { Queue, type Job } from 'bullmq'; import { getLogger } from '@stock-bot/logger'; import type { JobData } from './types'; import { getRedisConnection } from './utils'; const logger = getLogger('dlq-handler'); export interface DLQConfig { maxRetries?: number; retryDelay?: number; alertThreshold?: number; cleanupAge?: number; // hours } export class DeadLetterQueueHandler { private dlq: Queue; private config: Required; private failureCount = new Map(); constructor( private mainQueue: Queue, private connection: any, config: DLQConfig = {} ) { this.config = { maxRetries: config.maxRetries ?? 3, retryDelay: config.retryDelay ?? 60000, // 1 minute alertThreshold: config.alertThreshold ?? 100, cleanupAge: config.cleanupAge ?? 168, // 7 days }; // Create DLQ with same name but -dlq suffix const dlqName = `${mainQueue.name}-dlq`; this.dlq = new Queue(dlqName, { connection: getRedisConnection(connection) }); } /** * Process a failed job - either retry or move to DLQ */ async handleFailedJob(job: Job, error: Error): Promise { const jobKey = `${job.name}:${job.id}`; const currentFailures = (this.failureCount.get(jobKey) || 0) + 1; this.failureCount.set(jobKey, currentFailures); logger.warn('Job failed', { jobId: job.id, jobName: job.name, attempt: job.attemptsMade, maxAttempts: job.opts.attempts, error: error.message, failureCount: currentFailures, }); // Check if job should be moved to DLQ if (job.attemptsMade >= (job.opts.attempts || this.config.maxRetries)) { await this.moveToDeadLetterQueue(job, error); this.failureCount.delete(jobKey); } } /** * Move job to dead letter queue */ private async moveToDeadLetterQueue(job: Job, error: Error): Promise { try { const dlqData = { originalJob: { id: job.id, name: job.name, data: job.data, opts: job.opts, attemptsMade: job.attemptsMade, failedReason: job.failedReason, processedOn: job.processedOn, timestamp: job.timestamp, }, error: { message: error.message, stack: error.stack, name: error.name, }, movedToDLQAt: new Date().toISOString(), }; await this.dlq.add('failed-job', dlqData, { removeOnComplete: false, removeOnFail: false, }); logger.error('Job moved to DLQ', { jobId: job.id, jobName: job.name, error: error.message, }); // Check if we need to alert await this.checkAlertThreshold(); } catch (dlqError) { logger.error('Failed to move job to DLQ', { jobId: job.id, error: dlqError, }); } } /** * Retry jobs from DLQ */ async retryDLQJobs(limit = 10): Promise { const jobs = await this.dlq.getCompleted(0, limit); let retriedCount = 0; for (const dlqJob of jobs) { try { const { originalJob } = dlqJob.data; // Re-add to main queue with delay await this.mainQueue.add( originalJob.name, originalJob.data, { ...originalJob.opts, delay: this.config.retryDelay, attempts: this.config.maxRetries, } ); // Remove from DLQ await dlqJob.remove(); retriedCount++; logger.info('Job retried from DLQ', { originalJobId: originalJob.id, jobName: originalJob.name, }); } catch (error) { logger.error('Failed to retry DLQ job', { dlqJobId: dlqJob.id, error, }); } } return retriedCount; } /** * Get DLQ statistics */ async getStats(): Promise<{ total: number; recent: number; byJobName: Record; oldestJob: Date | null; }> { const [completed, failed, waiting] = await Promise.all([ this.dlq.getCompleted(), this.dlq.getFailed(), this.dlq.getWaiting(), ]); const allJobs = [...completed, ...failed, ...waiting]; const byJobName: Record = {}; let oldestTimestamp: number | null = null; for (const job of allJobs) { const jobName = job.data.originalJob?.name || 'unknown'; byJobName[jobName] = (byJobName[jobName] || 0) + 1; if (!oldestTimestamp || job.timestamp < oldestTimestamp) { oldestTimestamp = job.timestamp; } } // Count recent jobs (last 24 hours) const oneDayAgo = Date.now() - 24 * 60 * 60 * 1000; const recent = allJobs.filter(job => job.timestamp > oneDayAgo).length; return { total: allJobs.length, recent, byJobName, oldestJob: oldestTimestamp ? new Date(oldestTimestamp) : null, }; } /** * Clean up old DLQ entries */ async cleanup(): Promise { const ageInMs = this.config.cleanupAge * 60 * 60 * 1000; const cutoffTime = Date.now() - ageInMs; const jobs = await this.dlq.getCompleted(); let removedCount = 0; for (const job of jobs) { if (job.timestamp < cutoffTime) { await job.remove(); removedCount++; } } logger.info('DLQ cleanup completed', { removedCount, cleanupAge: `${this.config.cleanupAge} hours`, }); return removedCount; } /** * Check if alert threshold is exceeded */ private async checkAlertThreshold(): Promise { const stats = await this.getStats(); if (stats.total >= this.config.alertThreshold) { logger.error('DLQ alert threshold exceeded', { threshold: this.config.alertThreshold, currentCount: stats.total, byJobName: stats.byJobName, }); // In a real implementation, this would trigger alerts } } /** * Get failed jobs for inspection */ async inspectFailedJobs(limit = 10): Promise> { const jobs = await this.dlq.getCompleted(0, limit); return jobs.map(job => ({ id: job.data.originalJob.id, name: job.data.originalJob.name, data: job.data.originalJob.data, error: job.data.error, failedAt: job.data.movedToDLQAt, attempts: job.data.originalJob.attemptsMade, })); } /** * Shutdown DLQ handler */ async shutdown(): Promise { await this.dlq.close(); this.failureCount.clear(); } }