import { Queue, QueueEvents } from 'bullmq'; // import { getLogger } from '@stock-bot/logger'; // const logger = getLogger('queue-metrics'); export interface QueueMetrics { // Job counts waiting: number; active: number; completed: number; failed: number; delayed: number; paused?: number; // Performance metrics processingTime: { avg: number; min: number; max: number; p95: number; p99: number; }; // Throughput throughput: { completedPerMinute: number; failedPerMinute: number; totalPerMinute: number; }; // Job age oldestWaitingJob: Date | null; // Health isHealthy: boolean; healthIssues: string[]; } export class QueueMetricsCollector { private processingTimes: number[] = []; private completedTimestamps: number[] = []; private failedTimestamps: number[] = []; private jobStartTimes = new Map(); private readonly maxSamples = 1000; private readonly metricsInterval = 60000; // 1 minute constructor( private queue: Queue, private queueEvents: QueueEvents ) { this.setupEventListeners(); } /** * Setup event listeners for metrics collection */ private setupEventListeners(): void { this.queueEvents.on('completed', () => { // Record completion this.completedTimestamps.push(Date.now()); this.cleanupOldTimestamps(); }); this.queueEvents.on('failed', () => { // Record failure this.failedTimestamps.push(Date.now()); this.cleanupOldTimestamps(); }); // Track processing times this.queueEvents.on('active', ({ jobId }) => { this.jobStartTimes.set(jobId, Date.now()); }); this.queueEvents.on('completed', ({ jobId }) => { const startTime = this.jobStartTimes.get(jobId); if (startTime) { const processingTime = Date.now() - startTime; this.recordProcessingTime(processingTime); this.jobStartTimes.delete(jobId); } }); } /** * Record processing time */ private recordProcessingTime(time: number): void { this.processingTimes.push(time); // Keep only recent samples if (this.processingTimes.length > this.maxSamples) { this.processingTimes = this.processingTimes.slice(-this.maxSamples); } } /** * Clean up old timestamps */ private cleanupOldTimestamps(): void { const cutoff = Date.now() - this.metricsInterval; this.completedTimestamps = this.completedTimestamps.filter(ts => ts > cutoff); this.failedTimestamps = this.failedTimestamps.filter(ts => ts > cutoff); } /** * Collect current metrics */ async collect(): Promise { // Get job counts const [waiting, active, completed, failed, delayed] = await Promise.all([ this.queue.getWaitingCount(), this.queue.getActiveCount(), this.queue.getCompletedCount(), this.queue.getFailedCount(), this.queue.getDelayedCount(), ]); // BullMQ doesn't have getPausedCount, check if queue is paused const paused = await this.queue.isPaused() ? waiting : 0; // Calculate processing time metrics const processingTime = this.calculateProcessingTimeMetrics(); // Calculate throughput const throughput = this.calculateThroughput(); // Get oldest waiting job const oldestWaitingJob = await this.getOldestWaitingJob(); // Check health const { isHealthy, healthIssues } = this.checkHealth({ waiting, active, failed, processingTime, }); return { waiting, active, completed, failed, delayed, paused, processingTime, throughput, oldestWaitingJob, isHealthy, healthIssues, }; } /** * Calculate processing time metrics */ private calculateProcessingTimeMetrics(): QueueMetrics['processingTime'] { if (this.processingTimes.length === 0) { return { avg: 0, min: 0, max: 0, p95: 0, p99: 0 }; } const sorted = [...this.processingTimes].sort((a, b) => a - b); const sum = sorted.reduce((acc, val) => acc + val, 0); return { avg: sorted.length > 0 ? Math.round(sum / sorted.length) : 0, min: sorted[0] || 0, max: sorted[sorted.length - 1] || 0, p95: sorted[Math.floor(sorted.length * 0.95)] || 0, p99: sorted[Math.floor(sorted.length * 0.99)] || 0, }; } /** * Calculate throughput metrics */ private calculateThroughput(): QueueMetrics['throughput'] { const now = Date.now(); const oneMinuteAgo = now - 60000; const completedPerMinute = this.completedTimestamps.filter(ts => ts > oneMinuteAgo).length; const failedPerMinute = this.failedTimestamps.filter(ts => ts > oneMinuteAgo).length; return { completedPerMinute, failedPerMinute, totalPerMinute: completedPerMinute + failedPerMinute, }; } /** * Get oldest waiting job */ private async getOldestWaitingJob(): Promise { const waitingJobs = await this.queue.getWaiting(0, 1); if (waitingJobs.length > 0) { return new Date(waitingJobs[0].timestamp); } return null; } /** * Check queue health */ private checkHealth(metrics: { waiting: number; active: number; failed: number; processingTime: QueueMetrics['processingTime']; }): { isHealthy: boolean; healthIssues: string[] } { const issues: string[] = []; // Check for high failure rate const failureRate = metrics.failed / (metrics.failed + this.completedTimestamps.length); if (failureRate > 0.1) { issues.push(`High failure rate: ${(failureRate * 100).toFixed(1)}%`); } // Check for queue backlog if (metrics.waiting > 1000) { issues.push(`Large queue backlog: ${metrics.waiting} jobs waiting`); } // Check for slow processing if (metrics.processingTime.avg > 30000) { // 30 seconds issues.push(`Slow average processing time: ${(metrics.processingTime.avg / 1000).toFixed(1)}s`); } // Check for stalled active jobs if (metrics.active > 100) { issues.push(`High number of active jobs: ${metrics.active}`); } return { isHealthy: issues.length === 0, healthIssues: issues, }; } /** * Get formatted metrics report */ async getReport(): Promise { const metrics = await this.collect(); return ` Queue Metrics Report =================== Status: ${metrics.isHealthy ? '✅ Healthy' : '⚠️ Issues Detected'} Job Counts: - Waiting: ${metrics.waiting} - Active: ${metrics.active} - Completed: ${metrics.completed} - Failed: ${metrics.failed} - Delayed: ${metrics.delayed} - Paused: ${metrics.paused} Performance: - Avg Processing Time: ${(metrics.processingTime.avg / 1000).toFixed(2)}s - Min/Max: ${(metrics.processingTime.min / 1000).toFixed(2)}s / ${(metrics.processingTime.max / 1000).toFixed(2)}s - P95/P99: ${(metrics.processingTime.p95 / 1000).toFixed(2)}s / ${(metrics.processingTime.p99 / 1000).toFixed(2)}s Throughput: - Completed/min: ${metrics.throughput.completedPerMinute} - Failed/min: ${metrics.throughput.failedPerMinute} - Total/min: ${metrics.throughput.totalPerMinute} ${metrics.oldestWaitingJob ? `Oldest Waiting Job: ${metrics.oldestWaitingJob.toISOString()}` : 'No waiting jobs'} ${metrics.healthIssues.length > 0 ? `\nHealth Issues:\n${metrics.healthIssues.map(issue => `- ${issue}`).join('\n')}` : ''} `.trim(); } /** * Export metrics in Prometheus format */ async getPrometheusMetrics(): Promise { const metrics = await this.collect(); const queueName = this.queue.name; return ` # HELP queue_jobs_total Total number of jobs by status # TYPE queue_jobs_total gauge queue_jobs_total{queue="${queueName}",status="waiting"} ${metrics.waiting} queue_jobs_total{queue="${queueName}",status="active"} ${metrics.active} queue_jobs_total{queue="${queueName}",status="completed"} ${metrics.completed} queue_jobs_total{queue="${queueName}",status="failed"} ${metrics.failed} queue_jobs_total{queue="${queueName}",status="delayed"} ${metrics.delayed} queue_jobs_total{queue="${queueName}",status="paused"} ${metrics.paused} # HELP queue_processing_time_seconds Job processing time in seconds # TYPE queue_processing_time_seconds summary queue_processing_time_seconds{queue="${queueName}",quantile="0.5"} ${(metrics.processingTime.avg / 1000).toFixed(3)} queue_processing_time_seconds{queue="${queueName}",quantile="0.95"} ${(metrics.processingTime.p95 / 1000).toFixed(3)} queue_processing_time_seconds{queue="${queueName}",quantile="0.99"} ${(metrics.processingTime.p99 / 1000).toFixed(3)} queue_processing_time_seconds_sum{queue="${queueName}"} ${(metrics.processingTime.avg * this.processingTimes.length / 1000).toFixed(3)} queue_processing_time_seconds_count{queue="${queueName}"} ${this.processingTimes.length} # HELP queue_throughput_per_minute Jobs processed per minute # TYPE queue_throughput_per_minute gauge queue_throughput_per_minute{queue="${queueName}",status="completed"} ${metrics.throughput.completedPerMinute} queue_throughput_per_minute{queue="${queueName}",status="failed"} ${metrics.throughput.failedPerMinute} queue_throughput_per_minute{queue="${queueName}",status="total"} ${metrics.throughput.totalPerMinute} # HELP queue_health Queue health status # TYPE queue_health gauge queue_health{queue="${queueName}"} ${metrics.isHealthy ? 1 : 0} `.trim(); } }