added routes and simplified batch processor
This commit is contained in:
parent
0357908b69
commit
4883daa3e2
12 changed files with 1130 additions and 238 deletions
389
apps/data-service/src/utils/batch-helpers.ts
Normal file
389
apps/data-service/src/utils/batch-helpers.ts
Normal file
|
|
@ -0,0 +1,389 @@
|
|||
import { getLogger } from '@stock-bot/logger';
|
||||
import { createCache, CacheProvider } from '@stock-bot/cache';
|
||||
import type { QueueService } from '../services/queue.service';
|
||||
|
||||
const logger = getLogger('batch-helpers');
|
||||
|
||||
// Simple interfaces
|
||||
export interface ProcessOptions {
|
||||
totalDelayMs: number;
|
||||
batchSize?: number;
|
||||
priority?: number;
|
||||
useBatching?: boolean;
|
||||
retries?: number;
|
||||
ttl?: number;
|
||||
removeOnComplete?: number;
|
||||
removeOnFail?: number;
|
||||
}
|
||||
|
||||
export interface BatchResult {
|
||||
jobsCreated: number;
|
||||
mode: 'direct' | 'batch';
|
||||
totalItems: number;
|
||||
batchesCreated?: number;
|
||||
duration: number;
|
||||
}
|
||||
|
||||
// Cache instance for payload storage
|
||||
let cacheProvider: CacheProvider | null = null;
|
||||
|
||||
function getCache(): CacheProvider {
|
||||
if (!cacheProvider) {
|
||||
cacheProvider = createCache({
|
||||
keyPrefix: 'batch:',
|
||||
ttl: 86400, // 24 hours default
|
||||
enableMetrics: true
|
||||
});
|
||||
}
|
||||
return cacheProvider;
|
||||
}
|
||||
|
||||
/**
|
||||
* Main function - processes items either directly or in batches
|
||||
*/
|
||||
export async function processItems<T>(
|
||||
items: T[],
|
||||
processor: (item: T, index: number) => any,
|
||||
queue: QueueService,
|
||||
options: ProcessOptions
|
||||
): Promise<BatchResult> {
|
||||
const startTime = Date.now();
|
||||
|
||||
if (items.length === 0) {
|
||||
return {
|
||||
jobsCreated: 0,
|
||||
mode: 'direct',
|
||||
totalItems: 0,
|
||||
duration: 0
|
||||
};
|
||||
}
|
||||
|
||||
logger.info('Starting batch processing', {
|
||||
totalItems: items.length,
|
||||
mode: options.useBatching ? 'batch' : 'direct',
|
||||
batchSize: options.batchSize,
|
||||
totalDelayHours: (options.totalDelayMs / 1000 / 60 / 60).toFixed(1)
|
||||
});
|
||||
|
||||
try {
|
||||
const result = options.useBatching
|
||||
? await processBatched(items, processor, queue, options)
|
||||
: await processDirect(items, processor, queue, options);
|
||||
|
||||
const duration = Date.now() - startTime;
|
||||
|
||||
logger.info('Batch processing completed', {
|
||||
...result,
|
||||
duration: `${(duration / 1000).toFixed(1)}s`
|
||||
});
|
||||
|
||||
return { ...result, duration };
|
||||
|
||||
} catch (error) {
|
||||
logger.error('Batch processing failed', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Process items directly - each item becomes a separate job
|
||||
*/
|
||||
async function processDirect<T>(
|
||||
items: T[],
|
||||
processor: (item: T, index: number) => any,
|
||||
queue: QueueService,
|
||||
options: ProcessOptions
|
||||
): Promise<Omit<BatchResult, 'duration'>> {
|
||||
|
||||
const delayPerItem = Math.floor(options.totalDelayMs / items.length);
|
||||
|
||||
logger.info('Creating direct jobs', {
|
||||
totalItems: items.length,
|
||||
delayPerItem: `${(delayPerItem / 1000).toFixed(1)}s`
|
||||
});
|
||||
|
||||
const jobs = items.map((item, index) => ({
|
||||
name: 'process-item',
|
||||
data: {
|
||||
type: 'process-item',
|
||||
service: 'batch-processor',
|
||||
provider: 'direct',
|
||||
operation: 'process-single-item',
|
||||
payload: processor(item, index),
|
||||
priority: options.priority || 1
|
||||
},
|
||||
opts: {
|
||||
delay: index * delayPerItem,
|
||||
priority: options.priority || 1,
|
||||
attempts: options.retries || 3,
|
||||
removeOnComplete: options.removeOnComplete || 10,
|
||||
removeOnFail: options.removeOnFail || 5
|
||||
}
|
||||
}));
|
||||
|
||||
const createdJobs = await addJobsInChunks(queue, jobs);
|
||||
|
||||
return {
|
||||
totalItems: items.length,
|
||||
jobsCreated: createdJobs.length,
|
||||
mode: 'direct'
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Process items in batches - groups of items are stored and processed together
|
||||
*/
|
||||
async function processBatched<T>(
|
||||
items: T[],
|
||||
processor: (item: T, index: number) => any,
|
||||
queue: QueueService,
|
||||
options: ProcessOptions
|
||||
): Promise<Omit<BatchResult, 'duration'>> {
|
||||
|
||||
const batchSize = options.batchSize || 100;
|
||||
const batches = createBatches(items, batchSize);
|
||||
const delayPerBatch = Math.floor(options.totalDelayMs / batches.length);
|
||||
|
||||
logger.info('Creating batch jobs', {
|
||||
totalItems: items.length,
|
||||
batchSize,
|
||||
totalBatches: batches.length,
|
||||
delayPerBatch: `${(delayPerBatch / 1000 / 60).toFixed(2)} minutes`
|
||||
});
|
||||
|
||||
const batchJobs = await Promise.all(
|
||||
batches.map(async (batch, batchIndex) => {
|
||||
const payloadKey = await storePayload(batch, processor, options);
|
||||
|
||||
return {
|
||||
name: 'process-batch',
|
||||
data: {
|
||||
type: 'process-batch',
|
||||
service: 'batch-processor',
|
||||
provider: 'batch',
|
||||
operation: 'process-batch-items',
|
||||
payload: {
|
||||
payloadKey,
|
||||
batchIndex,
|
||||
totalBatches: batches.length,
|
||||
itemCount: batch.length
|
||||
},
|
||||
priority: options.priority || 2
|
||||
},
|
||||
opts: {
|
||||
delay: batchIndex * delayPerBatch,
|
||||
priority: options.priority || 2,
|
||||
attempts: options.retries || 3,
|
||||
removeOnComplete: options.removeOnComplete || 10,
|
||||
removeOnFail: options.removeOnFail || 5
|
||||
}
|
||||
};
|
||||
})
|
||||
);
|
||||
|
||||
const createdJobs = await addJobsInChunks(queue, batchJobs);
|
||||
|
||||
return {
|
||||
totalItems: items.length,
|
||||
jobsCreated: createdJobs.length,
|
||||
batchesCreated: batches.length,
|
||||
mode: 'batch'
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Process a batch job - loads payload from cache and creates individual jobs
|
||||
*/
|
||||
export async function processBatchJob(jobData: any, queue: QueueService): Promise<any> {
|
||||
const { payloadKey, batchIndex, totalBatches, itemCount } = jobData;
|
||||
|
||||
logger.debug('Processing batch job', {
|
||||
batchIndex,
|
||||
totalBatches,
|
||||
itemCount
|
||||
});
|
||||
|
||||
try {
|
||||
const payload = await loadPayload(payloadKey);
|
||||
const { items, processorStr, options } = payload;
|
||||
|
||||
// Deserialize processor function (in production, use safer alternatives)
|
||||
const processor = new Function('return ' + processorStr)();
|
||||
|
||||
const jobs = items.map((item: any, index: number) => ({
|
||||
name: 'process-item',
|
||||
data: {
|
||||
type: 'process-item',
|
||||
service: 'batch-processor',
|
||||
provider: 'batch-item',
|
||||
operation: 'process-single-item',
|
||||
payload: processor(item, index),
|
||||
priority: options.priority || 1
|
||||
},
|
||||
opts: {
|
||||
delay: index * (options.delayPerItem || 1000),
|
||||
priority: options.priority || 1,
|
||||
attempts: options.retries || 3
|
||||
}
|
||||
}));
|
||||
|
||||
const createdJobs = await addJobsInChunks(queue, jobs);
|
||||
|
||||
// Cleanup payload after successful processing
|
||||
await cleanupPayload(payloadKey);
|
||||
|
||||
return {
|
||||
batchIndex,
|
||||
itemsProcessed: items.length,
|
||||
jobsCreated: createdJobs.length
|
||||
};
|
||||
|
||||
} catch (error) {
|
||||
logger.error('Batch job processing failed', { batchIndex, error });
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
// Helper functions
|
||||
|
||||
function createBatches<T>(items: T[], batchSize: number): T[][] {
|
||||
const batches: T[][] = [];
|
||||
for (let i = 0; i < items.length; i += batchSize) {
|
||||
batches.push(items.slice(i, i + batchSize));
|
||||
}
|
||||
return batches;
|
||||
}
|
||||
|
||||
async function storePayload<T>(
|
||||
items: T[],
|
||||
processor: (item: T, index: number) => any,
|
||||
options: ProcessOptions
|
||||
): Promise<string> {
|
||||
const cache = getCache();
|
||||
const key = `payload_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
|
||||
|
||||
const payload = {
|
||||
items,
|
||||
processorStr: processor.toString(),
|
||||
options: {
|
||||
delayPerItem: 1000,
|
||||
priority: options.priority || 1,
|
||||
retries: options.retries || 3
|
||||
},
|
||||
createdAt: Date.now()
|
||||
};
|
||||
|
||||
await cache.set(key, JSON.stringify(payload), options.ttl || 86400);
|
||||
|
||||
logger.debug('Stored batch payload', {
|
||||
key,
|
||||
itemCount: items.length
|
||||
});
|
||||
|
||||
return key;
|
||||
}
|
||||
|
||||
async function loadPayload(key: string): Promise<any> {
|
||||
const cache = getCache();
|
||||
const data = await cache.get(key);
|
||||
|
||||
if (!data) {
|
||||
throw new Error(`Payload not found: ${key}`);
|
||||
}
|
||||
|
||||
return JSON.parse(data as string);
|
||||
}
|
||||
|
||||
async function cleanupPayload(key: string): Promise<void> {
|
||||
try {
|
||||
const cache = getCache();
|
||||
await cache.del(key);
|
||||
logger.debug('Cleaned up payload', { key });
|
||||
} catch (error) {
|
||||
logger.warn('Failed to cleanup payload', { key, error });
|
||||
}
|
||||
}
|
||||
|
||||
async function addJobsInChunks(queue: QueueService, jobs: any[], chunkSize = 100): Promise<any[]> {
|
||||
const allCreatedJobs = [];
|
||||
|
||||
for (let i = 0; i < jobs.length; i += chunkSize) {
|
||||
const chunk = jobs.slice(i, i + chunkSize);
|
||||
try {
|
||||
const createdJobs = await queue.addBulk(chunk);
|
||||
allCreatedJobs.push(...createdJobs);
|
||||
|
||||
// Small delay between chunks to avoid overwhelming Redis
|
||||
if (i + chunkSize < jobs.length) {
|
||||
await new Promise(resolve => setTimeout(resolve, 100));
|
||||
}
|
||||
} catch (error) {
|
||||
logger.error('Failed to add job chunk', {
|
||||
startIndex: i,
|
||||
chunkSize: chunk.length,
|
||||
error
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return allCreatedJobs;
|
||||
}
|
||||
|
||||
// Convenience functions for common use cases
|
||||
|
||||
export async function processSymbols(
|
||||
symbols: string[],
|
||||
queue: QueueService,
|
||||
options: {
|
||||
operation: string;
|
||||
service: string;
|
||||
provider: string;
|
||||
totalDelayMs: number;
|
||||
useBatching?: boolean;
|
||||
batchSize?: number;
|
||||
priority?: number;
|
||||
}
|
||||
): Promise<BatchResult> {
|
||||
return processItems(
|
||||
symbols,
|
||||
(symbol, index) => ({
|
||||
symbol,
|
||||
index,
|
||||
source: 'batch-processing'
|
||||
}),
|
||||
queue,
|
||||
{
|
||||
totalDelayMs: options.totalDelayMs,
|
||||
batchSize: options.batchSize || 100,
|
||||
priority: options.priority || 1,
|
||||
useBatching: options.useBatching || false
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
export async function processProxies(
|
||||
proxies: any[],
|
||||
queue: QueueService,
|
||||
options: {
|
||||
totalDelayMs: number;
|
||||
useBatching?: boolean;
|
||||
batchSize?: number;
|
||||
priority?: number;
|
||||
}
|
||||
): Promise<BatchResult> {
|
||||
return processItems(
|
||||
proxies,
|
||||
(proxy, index) => ({
|
||||
proxy,
|
||||
index,
|
||||
source: 'batch-processing'
|
||||
}),
|
||||
queue,
|
||||
{
|
||||
totalDelayMs: options.totalDelayMs,
|
||||
batchSize: options.batchSize || 200,
|
||||
priority: options.priority || 2,
|
||||
useBatching: options.useBatching || true
|
||||
}
|
||||
);
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue