stock-bot/apps/data-service/src/utils/batch-helpers.ts
2025-06-13 13:38:02 -04:00

368 lines
9.6 KiB
TypeScript

import { getLogger } from '@stock-bot/logger';
import { createCache, CacheProvider } from '@stock-bot/cache';
import type { QueueService } from '../services/queue.service';
const logger = getLogger('batch-helpers');
// Simple interfaces
export interface ProcessOptions {
totalDelayMs: number;
batchSize?: number;
priority?: number;
useBatching?: boolean;
retries?: number;
ttl?: number;
removeOnComplete?: number;
removeOnFail?: number;
// Job routing information
provider?: string;
operation?: string;
}
export interface BatchResult {
jobsCreated: number;
mode: 'direct' | 'batch';
totalItems: number;
batchesCreated?: number;
duration: number;
}
// Cache instance for payload storage
let cacheProvider: CacheProvider | null = null;
function getCache(): CacheProvider {
if (!cacheProvider) {
cacheProvider = createCache({
keyPrefix: 'batch:',
ttl: 86400, // 24 hours default
enableMetrics: true
});
}
return cacheProvider;
}
/**
* Initialize the batch cache before any batch operations
* This should be called during application startup
*/
export async function initializeBatchCache(): Promise<void> {
logger.info('Initializing batch cache...');
const cache = getCache();
await cache.waitForReady(10000);
logger.info('Batch cache initialized successfully');
}
/**
* Main function - processes items either directly or in batches
*/
export async function processItems<T>(
items: T[],
processor: (item: T, index: number) => any,
queue: QueueService,
options: ProcessOptions
): Promise<BatchResult> {
const startTime = Date.now();
if (items.length === 0) {
return {
jobsCreated: 0,
mode: 'direct',
totalItems: 0,
duration: 0
};
}
logger.info('Starting batch processing', {
totalItems: items.length,
mode: options.useBatching ? 'batch' : 'direct',
batchSize: options.batchSize,
totalDelayHours: (options.totalDelayMs / 1000 / 60 / 60).toFixed(1)
});
try {
const result = options.useBatching
? await processBatched(items, processor, queue, options)
: await processDirect(items, processor, queue, options);
const duration = Date.now() - startTime;
logger.info('Batch processing completed', {
...result,
duration: `${(duration / 1000).toFixed(1)}s`
});
return { ...result, duration };
} catch (error) {
logger.error('Batch processing failed', error);
throw error;
}
}
/**
* Process items directly - each item becomes a separate job
*/
async function processDirect<T>(
items: T[],
processor: (item: T, index: number) => any,
queue: QueueService,
options: ProcessOptions
): Promise<Omit<BatchResult, 'duration'>> {
const delayPerItem = Math.floor(options.totalDelayMs / items.length);
logger.info('Creating direct jobs', {
totalItems: items.length,
delayPerItem: `${(delayPerItem / 1000).toFixed(1)}s`
});
const jobs = items.map((item, index) => ({
name: 'process-item',
data: {
type: 'process-item',
provider: options.provider || 'generic',
operation: options.operation || 'process-item',
payload: processor(item, index),
priority: options.priority || 1
},
opts: {
delay: index * delayPerItem,
priority: options.priority || 1,
attempts: options.retries || 3,
removeOnComplete: options.removeOnComplete || 10,
removeOnFail: options.removeOnFail || 5
}
}));
const createdJobs = await addJobsInChunks(queue, jobs);
return {
totalItems: items.length,
jobsCreated: createdJobs.length,
mode: 'direct'
};
}
/**
* Process items in batches - groups of items are stored and processed together
*/
async function processBatched<T>(
items: T[],
processor: (item: T, index: number) => any,
queue: QueueService,
options: ProcessOptions
): Promise<Omit<BatchResult, 'duration'>> {
const batchSize = options.batchSize || 100;
const batches = createBatches(items, batchSize);
const delayPerBatch = Math.floor(options.totalDelayMs / batches.length);
logger.info('Creating batch jobs', {
totalItems: items.length,
batchSize,
totalBatches: batches.length,
delayPerBatch: `${(delayPerBatch / 1000 / 60).toFixed(2)} minutes`
});
const batchJobs = await Promise.all(
batches.map(async (batch, batchIndex) => {
const payloadKey = await storePayload(batch, processor, options);
return {
name: 'process-batch',
data: {
type: 'process-batch',
provider: options.provider || 'generic',
operation: 'process-batch-items',
payload: {
payloadKey,
batchIndex,
totalBatches: batches.length,
itemCount: batch.length
},
priority: options.priority || 2
},
opts: {
delay: batchIndex * delayPerBatch,
priority: options.priority || 2,
attempts: options.retries || 3,
removeOnComplete: options.removeOnComplete || 10,
removeOnFail: options.removeOnFail || 5
}
};
})
);
const createdJobs = await addJobsInChunks(queue, batchJobs);
return {
totalItems: items.length,
jobsCreated: createdJobs.length,
batchesCreated: batches.length,
mode: 'batch'
};
}
/**
* Process a batch job - loads payload from cache and creates individual jobs
*/
export async function processBatchJob(jobData: any, queue: QueueService): Promise<any> {
const { payloadKey, batchIndex, totalBatches, itemCount } = jobData;
logger.debug('Processing batch job', {
batchIndex,
totalBatches,
itemCount
});
try {
const payload = await loadPayload(payloadKey);
if (!payload || !payload.items || !payload.processorStr) {
logger.error('Invalid payload data', { payloadKey, payload });
throw new Error(`Invalid payload data for key: ${payloadKey}`);
}
const { items, processorStr, options } = payload;
// Deserialize the processor function
const processor = new Function('return ' + processorStr)();
const jobs = items.map((item: any, index: number) => ({
name: 'process-item',
data: {
type: 'process-item',
provider: options.provider || 'generic',
operation: options.operation || 'generic',
payload: processor(item, index),
priority: options.priority || 1
},
opts: {
delay: index * (options.delayPerItem || 1000),
priority: options.priority || 1,
attempts: options.retries || 3
}
}));
const createdJobs = await addJobsInChunks(queue, jobs);
// Cleanup payload after successful processing
await cleanupPayload(payloadKey);
return {
batchIndex,
itemsProcessed: items.length,
jobsCreated: createdJobs.length
};
} catch (error) {
logger.error('Batch job processing failed', { batchIndex, error });
throw error;
}
}
// Helper functions
function createBatches<T>(items: T[], batchSize: number): T[][] {
const batches: T[][] = [];
for (let i = 0; i < items.length; i += batchSize) {
batches.push(items.slice(i, i + batchSize));
}
return batches;
}
async function storePayload<T>(
items: T[],
processor: (item: T, index: number) => any,
options: ProcessOptions
): Promise<string> {
const cache = getCache();
// Create more specific key: batch:provider:operation:payload_timestamp_random
const timestamp = Date.now();
const randomId = Math.random().toString(36).substr(2, 9);
const provider = options.provider || 'generic';
const operation = options.operation || 'generic';
const key = `${provider}:${operation}:payload_${timestamp}_${randomId}`;
const payload = {
items,
processorStr: processor.toString(),
options: {
delayPerItem: 1000,
priority: options.priority || 1,
retries: options.retries || 3,
// Store routing information for later use
provider: options.provider || 'generic',
operation: options.operation || 'generic'
},
createdAt: Date.now()
};
logger.debug('Storing batch payload', {
key,
itemCount: items.length
});
await cache.set(key, payload, options.ttl || 86400);
logger.debug('Stored batch payload successfully', {
key,
itemCount: items.length
});
return key;
}
async function loadPayload(key: string): Promise<any> {
const cache = getCache();
logger.debug('Loading batch payload', { key });
const data = await cache.get(key);
if (!data) {
logger.error('Payload not found in cache', { key });
throw new Error(`Payload not found: ${key}`);
}
logger.debug('Loaded batch payload successfully', { key });
return data;
}
async function cleanupPayload(key: string): Promise<void> {
try {
const cache = getCache();
await cache.del(key);
logger.debug('Cleaned up payload', { key });
} catch (error) {
logger.warn('Failed to cleanup payload', { key, error });
}
}
async function addJobsInChunks(queue: QueueService, jobs: any[], chunkSize = 100): Promise<any[]> {
const allCreatedJobs = [];
for (let i = 0; i < jobs.length; i += chunkSize) {
const chunk = jobs.slice(i, i + chunkSize);
try {
const createdJobs = await queue.addBulk(chunk);
allCreatedJobs.push(...createdJobs);
// Small delay between chunks to avoid overwhelming Redis
if (i + chunkSize < jobs.length) {
await new Promise(resolve => setTimeout(resolve, 100));
}
} catch (error) {
logger.error('Failed to add job chunk', {
startIndex: i,
chunkSize: chunk.length,
error
});
}
}
return allCreatedJobs;
}