stock-bot/apps/data-service/src/providers/proxy.provider.ts
2025-06-08 23:30:46 -04:00

266 lines
8.8 KiB
TypeScript

import { ProxyInfo } from 'libs/http/src/types';
import { ProviderConfig } from '../services/provider-registry.service';
import { getLogger } from '@stock-bot/logger';
// Create logger for this provider
const logger = getLogger('proxy-provider');
// This will run at the same time each day as when the app started
const getEvery24HourCron = (): string => {
const now = new Date();
const hours = now.getHours();
const minutes = now.getMinutes();
return `${minutes} ${hours} * * *`; // Every day at startup time
};
export const proxyProvider: ProviderConfig = {
name: 'proxy-service',
service: 'proxy',
operations: {
'fetch-and-check': async (payload: { sources?: string[] }) => {
const { proxyService } = await import('./proxy.tasks');
const { queueManager } = await import('../services/queue.service');
await queueManager.drainQueue();
const proxies = await proxyService.fetchProxiesFromSources();
const proxiesCount = proxies.length;
if (proxiesCount === 0) {
logger.info('No proxies fetched, skipping job creation');
return { proxiesFetched: 0, batchJobsCreated: 0 };
}
try {
// Optimized batch size for 800k proxies
const batchSize = 200; // Process 200 proxies per batch job
const totalBatches = Math.ceil(proxies.length / batchSize);
const totalDelayMs = 24 * 60 * 60 * 1000; // 24 hours
const delayPerBatch = Math.floor(totalDelayMs / totalBatches);
logger.info('Creating proxy validation batch jobs', {
totalProxies: proxies.length,
batchSize,
totalBatches,
delayPerBatch: `${(delayPerBatch / 1000 / 60).toFixed(2)} minutes`,
estimatedCompletion: '24 hours'
});
// Process batches in chunks to avoid memory issues
const batchCreationChunkSize = 50; // Create 50 batch jobs at a time
let batchJobsCreated = 0;
for (let chunkStart = 0; chunkStart < totalBatches; chunkStart += batchCreationChunkSize) {
const chunkEnd = Math.min(chunkStart + batchCreationChunkSize, totalBatches);
// Create batch jobs in parallel for this chunk
const batchPromises = [];
for (let i = chunkStart; i < chunkEnd; i++) {
const startIndex = i * batchSize;
const endIndex = Math.min(startIndex + batchSize, proxies.length);
const batchProxies = proxies.slice(startIndex, endIndex);
const delay = i * delayPerBatch;
const batchPromise = queueManager.addJob({
type: 'proxy-batch-validation',
service: 'proxy',
provider: 'proxy-service',
operation: 'process-proxy-batch',
payload: {
proxies: batchProxies,
batchIndex: i,
totalBatches,
source: 'fetch-and-check'
},
priority: 3
}, {
delay: delay,
jobId: `proxy-batch-${i}-${Date.now()}`
});
batchPromises.push(batchPromise);
}
// Wait for this chunk to complete
const results = await Promise.allSettled(batchPromises);
const successful = results.filter(r => r.status === 'fulfilled').length;
const failed = results.filter(r => r.status === 'rejected').length;
batchJobsCreated += successful;
logger.info('Batch chunk created', {
chunkStart: chunkStart + 1,
chunkEnd,
totalChunks: Math.ceil(totalBatches / batchCreationChunkSize),
successful,
failed,
totalCreated: batchJobsCreated,
progress: `${((chunkEnd / totalBatches) * 100).toFixed(1)}%`
});
// Small delay between chunks to prevent overwhelming Redis
if (chunkEnd < totalBatches) {
await new Promise(resolve => setTimeout(resolve, 100));
}
}
logger.info('All batch jobs creation completed', {
totalProxies: proxies.length,
batchJobsCreated,
totalBatches,
avgProxiesPerBatch: Math.floor(proxies.length / totalBatches),
estimatedDuration: '24 hours'
});
return {
proxiesFetched: proxiesCount,
batchJobsCreated,
totalBatches,
avgProxiesPerBatch: Math.floor(proxies.length / totalBatches)
};
} catch (error) {
logger.error('Failed to create batch jobs', {
proxiesCount,
error: error instanceof Error ? error.message : String(error)
});
throw error;
}
},
'process-proxy-batch': async (payload: {
proxies: ProxyInfo[],
batchIndex: number,
totalBatches: number,
source: string
}) => {
const { queueManager } = await import('../services/queue.service');
logger.info('Processing proxy batch', {
batchIndex: payload.batchIndex,
batchSize: payload.proxies.length,
totalBatches: payload.totalBatches,
progress: `${((payload.batchIndex + 1) / payload.totalBatches * 100).toFixed(2)}%`
});
const batchDelayMs = 15 * 60 * 1000; // 15 minutes per batch
const delayPerProxy = Math.floor(batchDelayMs / payload.proxies.length);
logger.info('Batch timing calculated', {
batchIndex: payload.batchIndex,
proxiesInBatch: payload.proxies.length,
batchDurationMinutes: 30,
delayPerProxySeconds: Math.floor(delayPerProxy / 1000),
delayPerProxyMs: delayPerProxy
});
// Use BullMQ's addBulk for better performance
const jobsToCreate = payload.proxies.map((proxy, i) => ({
name: 'proxy-validation',
data: {
type: 'proxy-validation',
service: 'proxy',
provider: 'proxy-service',
operation: 'check-proxy',
payload: {
proxy: proxy,
source: payload.source,
batchIndex: payload.batchIndex,
proxyIndexInBatch: i,
totalBatch: payload.totalBatches
},
priority: 2
},
opts: {
delay: i * delayPerProxy,
jobId: `proxy-${proxy.host}-${proxy.port}-batch${payload.batchIndex}-${Date.now()}-${i}`,
removeOnComplete: 3,
removeOnFail: 5
}
}));
try {
const jobs = await queueManager.addBulk(jobsToCreate);
logger.info('Batch processing completed successfully', {
batchIndex: payload.batchIndex,
totalProxies: payload.proxies.length,
jobsCreated: jobs.length,
batchDelay: '15 minutes',
progress: `${((payload.batchIndex + 1) / payload.totalBatches * 100).toFixed(2)}%`
});
return {
batchIndex: payload.batchIndex,
totalProxies: payload.proxies.length,
jobsCreated: jobs.length,
jobsFailed: 0
};
} catch (error) {
logger.error('Failed to create validation jobs for batch', {
batchIndex: payload.batchIndex,
batchSize: payload.proxies.length,
error: error instanceof Error ? error.message : String(error)
});
return {
batchIndex: payload.batchIndex,
totalProxies: payload.proxies.length,
jobsCreated: 0,
jobsFailed: payload.proxies.length
};
}
},
'check-proxy': async (payload: {
proxy: ProxyInfo,
source?: string,
batchIndex?: number,
proxyIndexInBatch?: number,
totalBatch?: number
}) => {
const { checkProxy } = await import('./proxy.tasks');
logger.debug('Checking individual proxy', {
proxy: `${payload.proxy.host}:${payload.proxy.port}`,
batchIndex: payload.batchIndex,
proxyIndex: payload.proxyIndexInBatch,
source: payload.source
});
const result = await checkProxy(payload.proxy);
logger.debug('Proxy check completed', {
proxy: `${payload.proxy.host}:${payload.proxy.port}`,
isWorking: result.isWorking,
responseTime: result.responseTime,
batchIndex: payload.batchIndex
});
return {
result: result,
batchInfo: {
batchIndex: payload.batchIndex,
proxyIndex: payload.proxyIndexInBatch,
total: payload.totalBatch,
source: payload.source
}
};
}
},
scheduledJobs: [
{
type: 'proxy-maintenance',
operation: 'fetch-and-check',
payload: {},
cronPattern: getEvery24HourCron(), // Every 15 minutes
priority: 5,
immediately: true,
description: 'Fetch and validate proxy list from sources'
}
]
};