import { getLogger } from '@stock-bot/logger'; import { createCache, CacheProvider } from '@stock-bot/cache'; import type { QueueService } from '../services/queue.service'; const logger = getLogger('batch-helpers'); // Simple interfaces export interface ProcessOptions { totalDelayMs: number; batchSize?: number; priority?: number; useBatching?: boolean; retries?: number; ttl?: number; removeOnComplete?: number; removeOnFail?: number; // Job routing information provider?: string; operation?: string; } export interface BatchResult { jobsCreated: number; mode: 'direct' | 'batch'; totalItems: number; batchesCreated?: number; duration: number; } // Cache instance for payload storage let cacheProvider: CacheProvider | null = null; function getCache(): CacheProvider { if (!cacheProvider) { cacheProvider = createCache({ keyPrefix: 'batch:', ttl: 86400, // 24 hours default enableMetrics: true }); } return cacheProvider; } /** * Initialize the batch cache before any batch operations * This should be called during application startup */ export async function initializeBatchCache(): Promise { logger.info('Initializing batch cache...'); const cache = getCache(); await cache.waitForReady(10000); logger.info('Batch cache initialized successfully'); } /** * Main function - processes items either directly or in batches */ export async function processItems( items: T[], processor: (item: T, index: number) => any, queue: QueueService, options: ProcessOptions ): Promise { const startTime = Date.now(); if (items.length === 0) { return { jobsCreated: 0, mode: 'direct', totalItems: 0, duration: 0 }; } logger.info('Starting batch processing', { totalItems: items.length, mode: options.useBatching ? 'batch' : 'direct', batchSize: options.batchSize, totalDelayHours: (options.totalDelayMs / 1000 / 60 / 60).toFixed(1) }); try { const result = options.useBatching ? await processBatched(items, processor, queue, options) : await processDirect(items, processor, queue, options); const duration = Date.now() - startTime; logger.info('Batch processing completed', { ...result, duration: `${(duration / 1000).toFixed(1)}s` }); return { ...result, duration }; } catch (error) { logger.error('Batch processing failed', error); throw error; } } /** * Process items directly - each item becomes a separate job */ async function processDirect( items: T[], processor: (item: T, index: number) => any, queue: QueueService, options: ProcessOptions ): Promise> { const delayPerItem = Math.floor(options.totalDelayMs / items.length); logger.info('Creating direct jobs', { totalItems: items.length, delayPerItem: `${(delayPerItem / 1000).toFixed(1)}s` }); const jobs = items.map((item, index) => ({ name: 'process-item', data: { type: 'process-item', provider: options.provider || 'generic', operation: options.operation || 'process-item', payload: processor(item, index), priority: options.priority || 1 }, opts: { delay: index * delayPerItem, priority: options.priority || 1, attempts: options.retries || 3, removeOnComplete: options.removeOnComplete || 10, removeOnFail: options.removeOnFail || 5 } })); const createdJobs = await addJobsInChunks(queue, jobs); return { totalItems: items.length, jobsCreated: createdJobs.length, mode: 'direct' }; } /** * Process items in batches - groups of items are stored and processed together */ async function processBatched( items: T[], processor: (item: T, index: number) => any, queue: QueueService, options: ProcessOptions ): Promise> { const batchSize = options.batchSize || 100; const batches = createBatches(items, batchSize); const delayPerBatch = Math.floor(options.totalDelayMs / batches.length); logger.info('Creating batch jobs', { totalItems: items.length, batchSize, totalBatches: batches.length, delayPerBatch: `${(delayPerBatch / 1000 / 60).toFixed(2)} minutes` }); const batchJobs = await Promise.all( batches.map(async (batch, batchIndex) => { const payloadKey = await storePayload(batch, processor, options); return { name: 'process-batch', data: { type: 'process-batch', provider: options.provider || 'generic', operation: 'process-batch-items', payload: { payloadKey, batchIndex, totalBatches: batches.length, itemCount: batch.length }, priority: options.priority || 2 }, opts: { delay: batchIndex * delayPerBatch, priority: options.priority || 2, attempts: options.retries || 3, removeOnComplete: options.removeOnComplete || 10, removeOnFail: options.removeOnFail || 5 } }; }) ); const createdJobs = await addJobsInChunks(queue, batchJobs); return { totalItems: items.length, jobsCreated: createdJobs.length, batchesCreated: batches.length, mode: 'batch' }; } /** * Process a batch job - loads payload from cache and creates individual jobs */ export async function processBatchJob(jobData: any, queue: QueueService): Promise { const { payloadKey, batchIndex, totalBatches, itemCount } = jobData; logger.debug('Processing batch job', { batchIndex, totalBatches, itemCount }); try { const payload = await loadPayload(payloadKey); if (!payload || !payload.items || !payload.processorStr) { logger.error('Invalid payload data', { payloadKey, payload }); throw new Error(`Invalid payload data for key: ${payloadKey}`); } const { items, processorStr, options } = payload; // Deserialize the processor function const processor = new Function('return ' + processorStr)(); const jobs = items.map((item: any, index: number) => ({ name: 'process-item', data: { type: 'process-item', provider: options.provider || 'generic', operation: options.operation || 'generic', payload: processor(item, index), priority: options.priority || 1 }, opts: { delay: index * (options.delayPerItem || 1000), priority: options.priority || 1, attempts: options.retries || 3 } })); const createdJobs = await addJobsInChunks(queue, jobs); // Cleanup payload after successful processing await cleanupPayload(payloadKey); return { batchIndex, itemsProcessed: items.length, jobsCreated: createdJobs.length }; } catch (error) { logger.error('Batch job processing failed', { batchIndex, error }); throw error; } } // Helper functions function createBatches(items: T[], batchSize: number): T[][] { const batches: T[][] = []; for (let i = 0; i < items.length; i += batchSize) { batches.push(items.slice(i, i + batchSize)); } return batches; } async function storePayload( items: T[], processor: (item: T, index: number) => any, options: ProcessOptions ): Promise { const cache = getCache(); // Create more specific key: batch:provider:operation:payload_timestamp_random const timestamp = Date.now(); const randomId = Math.random().toString(36).substr(2, 9); const provider = options.provider || 'generic'; const operation = options.operation || 'generic'; const key = `${provider}:${operation}:payload_${timestamp}_${randomId}`; const payload = { items, processorStr: processor.toString(), options: { delayPerItem: 1000, priority: options.priority || 1, retries: options.retries || 3, // Store routing information for later use provider: options.provider || 'generic', operation: options.operation || 'generic' }, createdAt: Date.now() }; logger.debug('Storing batch payload', { key, itemCount: items.length }); await cache.set(key, payload, options.ttl || 86400); logger.debug('Stored batch payload successfully', { key, itemCount: items.length }); return key; } async function loadPayload(key: string): Promise { const cache = getCache(); logger.debug('Loading batch payload', { key }); const data = await cache.get(key); if (!data) { logger.error('Payload not found in cache', { key }); throw new Error(`Payload not found: ${key}`); } logger.debug('Loaded batch payload successfully', { key }); return data; } async function cleanupPayload(key: string): Promise { try { const cache = getCache(); await cache.del(key); logger.debug('Cleaned up payload', { key }); } catch (error) { logger.warn('Failed to cleanup payload', { key, error }); } } async function addJobsInChunks(queue: QueueService, jobs: any[], chunkSize = 100): Promise { const allCreatedJobs = []; for (let i = 0; i < jobs.length; i += chunkSize) { const chunk = jobs.slice(i, i + chunkSize); try { const createdJobs = await queue.addBulk(chunk); allCreatedJobs.push(...createdJobs); // Small delay between chunks to avoid overwhelming Redis if (i + chunkSize < jobs.length) { await new Promise(resolve => setTimeout(resolve, 100)); } } catch (error) { logger.error('Failed to add job chunk', { startIndex: i, chunkSize: chunk.length, error }); } } return allCreatedJobs; }