368 lines
9.6 KiB
TypeScript
368 lines
9.6 KiB
TypeScript
import { getLogger } from '@stock-bot/logger';
|
|
import { createCache, CacheProvider } from '@stock-bot/cache';
|
|
import type { QueueService } from '../services/queue.service';
|
|
|
|
const logger = getLogger('batch-helpers');
|
|
|
|
// Simple interfaces
|
|
export interface ProcessOptions {
|
|
totalDelayMs: number;
|
|
batchSize?: number;
|
|
priority?: number;
|
|
useBatching?: boolean;
|
|
retries?: number;
|
|
ttl?: number;
|
|
removeOnComplete?: number;
|
|
removeOnFail?: number;
|
|
// Job routing information
|
|
provider?: string;
|
|
operation?: string;
|
|
}
|
|
|
|
export interface BatchResult {
|
|
jobsCreated: number;
|
|
mode: 'direct' | 'batch';
|
|
totalItems: number;
|
|
batchesCreated?: number;
|
|
duration: number;
|
|
}
|
|
|
|
// Cache instance for payload storage
|
|
let cacheProvider: CacheProvider | null = null;
|
|
|
|
function getCache(): CacheProvider {
|
|
if (!cacheProvider) {
|
|
cacheProvider = createCache({
|
|
keyPrefix: 'batch:',
|
|
ttl: 86400, // 24 hours default
|
|
enableMetrics: true
|
|
});
|
|
}
|
|
return cacheProvider;
|
|
}
|
|
|
|
/**
|
|
* Initialize the batch cache before any batch operations
|
|
* This should be called during application startup
|
|
*/
|
|
export async function initializeBatchCache(): Promise<void> {
|
|
logger.info('Initializing batch cache...');
|
|
const cache = getCache();
|
|
await cache.waitForReady(10000);
|
|
logger.info('Batch cache initialized successfully');
|
|
}
|
|
|
|
/**
|
|
* Main function - processes items either directly or in batches
|
|
*/
|
|
export async function processItems<T>(
|
|
items: T[],
|
|
processor: (item: T, index: number) => any,
|
|
queue: QueueService,
|
|
options: ProcessOptions
|
|
): Promise<BatchResult> {
|
|
const startTime = Date.now();
|
|
|
|
if (items.length === 0) {
|
|
return {
|
|
jobsCreated: 0,
|
|
mode: 'direct',
|
|
totalItems: 0,
|
|
duration: 0
|
|
};
|
|
}
|
|
|
|
logger.info('Starting batch processing', {
|
|
totalItems: items.length,
|
|
mode: options.useBatching ? 'batch' : 'direct',
|
|
batchSize: options.batchSize,
|
|
totalDelayHours: (options.totalDelayMs / 1000 / 60 / 60).toFixed(1)
|
|
});
|
|
|
|
try {
|
|
const result = options.useBatching
|
|
? await processBatched(items, processor, queue, options)
|
|
: await processDirect(items, processor, queue, options);
|
|
|
|
const duration = Date.now() - startTime;
|
|
|
|
logger.info('Batch processing completed', {
|
|
...result,
|
|
duration: `${(duration / 1000).toFixed(1)}s`
|
|
});
|
|
|
|
return { ...result, duration };
|
|
|
|
} catch (error) {
|
|
logger.error('Batch processing failed', error);
|
|
throw error;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Process items directly - each item becomes a separate job
|
|
*/
|
|
async function processDirect<T>(
|
|
items: T[],
|
|
processor: (item: T, index: number) => any,
|
|
queue: QueueService,
|
|
options: ProcessOptions
|
|
): Promise<Omit<BatchResult, 'duration'>> {
|
|
|
|
const delayPerItem = Math.floor(options.totalDelayMs / items.length);
|
|
|
|
logger.info('Creating direct jobs', {
|
|
totalItems: items.length,
|
|
delayPerItem: `${(delayPerItem / 1000).toFixed(1)}s`
|
|
});
|
|
|
|
const jobs = items.map((item, index) => ({
|
|
name: 'process-item',
|
|
data: {
|
|
type: 'process-item',
|
|
provider: options.provider || 'generic',
|
|
operation: options.operation || 'process-item',
|
|
payload: processor(item, index),
|
|
priority: options.priority || 1
|
|
},
|
|
opts: {
|
|
delay: index * delayPerItem,
|
|
priority: options.priority || 1,
|
|
attempts: options.retries || 3,
|
|
removeOnComplete: options.removeOnComplete || 10,
|
|
removeOnFail: options.removeOnFail || 5
|
|
}
|
|
}));
|
|
|
|
const createdJobs = await addJobsInChunks(queue, jobs);
|
|
|
|
return {
|
|
totalItems: items.length,
|
|
jobsCreated: createdJobs.length,
|
|
mode: 'direct'
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Process items in batches - groups of items are stored and processed together
|
|
*/
|
|
async function processBatched<T>(
|
|
items: T[],
|
|
processor: (item: T, index: number) => any,
|
|
queue: QueueService,
|
|
options: ProcessOptions
|
|
): Promise<Omit<BatchResult, 'duration'>> {
|
|
|
|
const batchSize = options.batchSize || 100;
|
|
const batches = createBatches(items, batchSize);
|
|
const delayPerBatch = Math.floor(options.totalDelayMs / batches.length);
|
|
|
|
logger.info('Creating batch jobs', {
|
|
totalItems: items.length,
|
|
batchSize,
|
|
totalBatches: batches.length,
|
|
delayPerBatch: `${(delayPerBatch / 1000 / 60).toFixed(2)} minutes`
|
|
});
|
|
|
|
const batchJobs = await Promise.all(
|
|
batches.map(async (batch, batchIndex) => {
|
|
const payloadKey = await storePayload(batch, processor, options);
|
|
|
|
return {
|
|
name: 'process-batch',
|
|
data: {
|
|
type: 'process-batch',
|
|
provider: options.provider || 'generic',
|
|
operation: 'process-batch-items',
|
|
payload: {
|
|
payloadKey,
|
|
batchIndex,
|
|
totalBatches: batches.length,
|
|
itemCount: batch.length
|
|
},
|
|
priority: options.priority || 2
|
|
},
|
|
opts: {
|
|
delay: batchIndex * delayPerBatch,
|
|
priority: options.priority || 2,
|
|
attempts: options.retries || 3,
|
|
removeOnComplete: options.removeOnComplete || 10,
|
|
removeOnFail: options.removeOnFail || 5
|
|
}
|
|
};
|
|
})
|
|
);
|
|
|
|
const createdJobs = await addJobsInChunks(queue, batchJobs);
|
|
|
|
return {
|
|
totalItems: items.length,
|
|
jobsCreated: createdJobs.length,
|
|
batchesCreated: batches.length,
|
|
mode: 'batch'
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Process a batch job - loads payload from cache and creates individual jobs
|
|
*/
|
|
export async function processBatchJob(jobData: any, queue: QueueService): Promise<any> {
|
|
const { payloadKey, batchIndex, totalBatches, itemCount } = jobData;
|
|
|
|
logger.debug('Processing batch job', {
|
|
batchIndex,
|
|
totalBatches,
|
|
itemCount
|
|
});
|
|
|
|
try {
|
|
const payload = await loadPayload(payloadKey);
|
|
if (!payload || !payload.items || !payload.processorStr) {
|
|
logger.error('Invalid payload data', { payloadKey, payload });
|
|
throw new Error(`Invalid payload data for key: ${payloadKey}`);
|
|
}
|
|
|
|
const { items, processorStr, options } = payload;
|
|
|
|
// Deserialize the processor function
|
|
const processor = new Function('return ' + processorStr)();
|
|
|
|
const jobs = items.map((item: any, index: number) => ({
|
|
name: 'process-item',
|
|
data: {
|
|
type: 'process-item',
|
|
provider: options.provider || 'generic',
|
|
operation: options.operation || 'generic',
|
|
payload: processor(item, index),
|
|
priority: options.priority || 1
|
|
},
|
|
opts: {
|
|
delay: index * (options.delayPerItem || 1000),
|
|
priority: options.priority || 1,
|
|
attempts: options.retries || 3
|
|
}
|
|
}));
|
|
|
|
const createdJobs = await addJobsInChunks(queue, jobs);
|
|
|
|
// Cleanup payload after successful processing
|
|
await cleanupPayload(payloadKey);
|
|
|
|
return {
|
|
batchIndex,
|
|
itemsProcessed: items.length,
|
|
jobsCreated: createdJobs.length
|
|
};
|
|
|
|
} catch (error) {
|
|
logger.error('Batch job processing failed', { batchIndex, error });
|
|
throw error;
|
|
}
|
|
}
|
|
|
|
// Helper functions
|
|
|
|
function createBatches<T>(items: T[], batchSize: number): T[][] {
|
|
const batches: T[][] = [];
|
|
for (let i = 0; i < items.length; i += batchSize) {
|
|
batches.push(items.slice(i, i + batchSize));
|
|
}
|
|
return batches;
|
|
}
|
|
|
|
async function storePayload<T>(
|
|
items: T[],
|
|
processor: (item: T, index: number) => any,
|
|
options: ProcessOptions
|
|
): Promise<string> {
|
|
const cache = getCache();
|
|
|
|
// Create more specific key: batch:provider:operation:payload_timestamp_random
|
|
const timestamp = Date.now();
|
|
const randomId = Math.random().toString(36).substr(2, 9);
|
|
const provider = options.provider || 'generic';
|
|
const operation = options.operation || 'generic';
|
|
|
|
const key = `${provider}:${operation}:payload_${timestamp}_${randomId}`;
|
|
|
|
const payload = {
|
|
items,
|
|
processorStr: processor.toString(),
|
|
options: {
|
|
delayPerItem: 1000,
|
|
priority: options.priority || 1,
|
|
retries: options.retries || 3,
|
|
// Store routing information for later use
|
|
provider: options.provider || 'generic',
|
|
operation: options.operation || 'generic'
|
|
},
|
|
createdAt: Date.now()
|
|
};
|
|
|
|
logger.debug('Storing batch payload', {
|
|
key,
|
|
itemCount: items.length
|
|
});
|
|
|
|
await cache.set(key, payload, options.ttl || 86400);
|
|
|
|
logger.debug('Stored batch payload successfully', {
|
|
key,
|
|
itemCount: items.length
|
|
});
|
|
|
|
return key;
|
|
}
|
|
|
|
async function loadPayload(key: string): Promise<any> {
|
|
const cache = getCache();
|
|
|
|
logger.debug('Loading batch payload', { key });
|
|
|
|
const data = await cache.get(key);
|
|
|
|
if (!data) {
|
|
logger.error('Payload not found in cache', { key });
|
|
throw new Error(`Payload not found: ${key}`);
|
|
}
|
|
|
|
logger.debug('Loaded batch payload successfully', { key });
|
|
return data;
|
|
}
|
|
|
|
async function cleanupPayload(key: string): Promise<void> {
|
|
try {
|
|
const cache = getCache();
|
|
await cache.del(key);
|
|
logger.debug('Cleaned up payload', { key });
|
|
} catch (error) {
|
|
logger.warn('Failed to cleanup payload', { key, error });
|
|
}
|
|
}
|
|
|
|
async function addJobsInChunks(queue: QueueService, jobs: any[], chunkSize = 100): Promise<any[]> {
|
|
const allCreatedJobs = [];
|
|
|
|
for (let i = 0; i < jobs.length; i += chunkSize) {
|
|
const chunk = jobs.slice(i, i + chunkSize);
|
|
try {
|
|
const createdJobs = await queue.addBulk(chunk);
|
|
allCreatedJobs.push(...createdJobs);
|
|
|
|
// Small delay between chunks to avoid overwhelming Redis
|
|
if (i + chunkSize < jobs.length) {
|
|
await new Promise(resolve => setTimeout(resolve, 100));
|
|
}
|
|
} catch (error) {
|
|
logger.error('Failed to add job chunk', {
|
|
startIndex: i,
|
|
chunkSize: chunk.length,
|
|
error
|
|
});
|
|
}
|
|
}
|
|
|
|
return allCreatedJobs;
|
|
}
|
|
|
|
|