diff --git a/apps/data-service/src/providers/proxy.provider.ts b/apps/data-service/src/providers/proxy.provider.ts index b565ef0..459dc64 100644 --- a/apps/data-service/src/providers/proxy.provider.ts +++ b/apps/data-service/src/providers/proxy.provider.ts @@ -60,7 +60,13 @@ export const proxyProvider: ProviderConfig = { // Process a batch of proxies - uses the fetch-and-check JobNamePrefix process-(proxy)-batch const { queueManager } = await import('../services/queue.service'); const batchProcessor = new BatchProcessor(queueManager); - return await batchProcessor.processBatch(payload); + return await batchProcessor.processBatch( + payload, + (proxy: ProxyInfo) => ({ + proxy, + source: payload.config?.source || 'batch-processing' + }) + ); }, 'check-proxy': async (payload: { @@ -123,6 +129,7 @@ export const proxyProvider: ProviderConfig = { type: 'proxy-maintenance', operation: 'fetch-and-check', payload: {}, + // should remove and just run at the same time so app restarts dont keeping adding same jobs cronPattern: getEvery24HourCron(), priority: 5, immediately: true, diff --git a/apps/data-service/src/providers/proxy.tasks.ts b/apps/data-service/src/providers/proxy.tasks.ts index f890b57..321e20d 100644 --- a/apps/data-service/src/providers/proxy.tasks.ts +++ b/apps/data-service/src/providers/proxy.tasks.ts @@ -13,50 +13,31 @@ const PROXY_CONFIG = { CONCURRENCY_LIMIT: 100, PROXY_SOURCES: [ {url: 'https://raw.githubusercontent.com/prxchk/proxy-list/main/http.txt',protocol: 'http', }, - {url: 'https://raw.githubusercontent.com/casals-ar/proxy-list/main/http',protocol: 'http', }, - {url: 'https://raw.githubusercontent.com/MuRongPIG/Proxy-Master/main/http.txt',protocol: 'http', }, - {url: 'https://raw.githubusercontent.com/vakhov/fresh-proxy-list/master/http.txt',protocol: 'http', }, - {url: 'https://raw.githubusercontent.com/sunny9577/proxy-scraper/master/proxies.txt',protocol: 'http', }, - {url: 'https://raw.githubusercontent.com/officialputuid/KangProxy/refs/heads/KangProxy/http/http.txt',protocol: 'http', }, - {url: 'https://raw.githubusercontent.com/gfpcom/free-proxy-list/refs/heads/main/list/http.txt', protocol: 'http' }, - {url: 'https://raw.githubusercontent.com/dpangestuw/Free-Proxy/refs/heads/main/http_proxies.txt',protocol: 'http', }, - {url: 'https://raw.githubusercontent.com/gitrecon1455/fresh-proxy-list/refs/heads/main/proxylist.txt',protocol: 'http', }, - {url: 'https://raw.githubusercontent.com/themiralay/Proxy-List-World/refs/heads/master/data.txt',protocol: 'http', }, - {url: 'https://raw.githubusercontent.com/vakhov/fresh-proxy-list/refs/heads/master/http.txt',protocol: 'http', }, - {url: 'https://raw.githubusercontent.com/casa-ls/proxy-list/refs/heads/main/http',protocol: 'http', }, - {url: 'https://raw.githubusercontent.com/databay-labs/free-proxy-list/refs/heads/master/http.txt',protocol: 'http', }, - {url: 'https://raw.githubusercontent.com/BreakingTechFr/Proxy_Free/refs/heads/main/proxies/http.txt', protocol: 'http' }, - {url: 'https://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/http.txt',protocol: 'http', }, - {url: 'https://raw.githubusercontent.com/ErcinDedeoglu/proxies/main/proxies/http.txt',protocol: 'http', }, - {url: 'https://raw.githubusercontent.com/monosans/proxy-list/main/proxies/http.txt',protocol: 'http', }, - {url: 'https://raw.githubusercontent.com/TuanMinPay/live-proxy/master/http.txt',protocol: 'http', }, - // {url: 'https://raw.githubusercontent.com/zloi-user/hideip.me/refs/heads/master/http.txt',protocol: 'http', }, + // {url: 'https://raw.githubusercontent.com/casals-ar/proxy-list/main/http',protocol: 'http', }, + // {url: 'https://raw.githubusercontent.com/MuRongPIG/Proxy-Master/main/http.txt',protocol: 'http', }, + // {url: 'https://raw.githubusercontent.com/vakhov/fresh-proxy-list/master/http.txt',protocol: 'http', }, + // {url: 'https://raw.githubusercontent.com/sunny9577/proxy-scraper/master/proxies.txt',protocol: 'http', }, + // {url: 'https://raw.githubusercontent.com/officialputuid/KangProxy/refs/heads/KangProxy/http/http.txt',protocol: 'http', }, + // {url: 'https://raw.githubusercontent.com/gfpcom/free-proxy-list/refs/heads/main/list/http.txt', protocol: 'http' }, + // {url: 'https://raw.githubusercontent.com/dpangestuw/Free-Proxy/refs/heads/main/http_proxies.txt',protocol: 'http', }, + // {url: 'https://raw.githubusercontent.com/gitrecon1455/fresh-proxy-list/refs/heads/main/proxylist.txt',protocol: 'http', }, + // {url: 'https://raw.githubusercontent.com/themiralay/Proxy-List-World/refs/heads/master/data.txt',protocol: 'http', }, + // {url: 'https://raw.githubusercontent.com/vakhov/fresh-proxy-list/refs/heads/master/http.txt',protocol: 'http', }, + // {url: 'https://raw.githubusercontent.com/casa-ls/proxy-list/refs/heads/main/http',protocol: 'http', }, + // {url: 'https://raw.githubusercontent.com/databay-labs/free-proxy-list/refs/heads/master/http.txt',protocol: 'http', }, + // {url: 'https://raw.githubusercontent.com/BreakingTechFr/Proxy_Free/refs/heads/main/proxies/http.txt', protocol: 'http' }, + // {url: 'https://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/http.txt',protocol: 'http', }, + // {url: 'https://raw.githubusercontent.com/ErcinDedeoglu/proxies/main/proxies/http.txt',protocol: 'http', }, + // {url: 'https://raw.githubusercontent.com/monosans/proxy-list/main/proxies/http.txt',protocol: 'http', }, + // {url: 'https://raw.githubusercontent.com/TuanMinPay/live-proxy/master/http.txt',protocol: 'http', }, - {url: 'https://raw.githubusercontent.com/r00tee/Proxy-List/refs/heads/main/Https.txt',protocol: 'https', }, - {url: 'https://raw.githubusercontent.com/ErcinDedeoglu/proxies/main/proxies/https.txt',protocol: 'https', }, - {url: 'https://raw.githubusercontent.com/vakhov/fresh-proxy-list/refs/heads/master/https.txt', protocol: 'https' }, - {url: 'https://raw.githubusercontent.com/databay-labs/free-proxy-list/refs/heads/master/https.txt',protocol: 'https', }, - {url: 'https://raw.githubusercontent.com/officialputuid/KangProxy/refs/heads/KangProxy/https/https.txt',protocol: 'https', }, - {url: 'https://raw.githubusercontent.com/zloi-user/hideip.me/refs/heads/master/https.txt',protocol: 'https', }, - {url: 'https://raw.githubusercontent.com/gfpcom/free-proxy-list/refs/heads/main/list/https.txt',protocol: 'https', }, - // {url: 'https://raw.githubusercontent.com/MuRongPIG/Proxy-Master/main/socks4.txt',protocol: 'socks4', }, - // {url: 'https://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/socks4.txt',protocol: 'socks4', }, - // {url: 'https://raw.githubusercontent.com/TuanMinPay/live-proxy/master/socks4.txt',protocol: 'socks4', }, - // {url: 'https://raw.githubusercontent.com/casals-ar/proxy-list/main/socks4',protocol: 'socks4', }, - // {url: 'https://raw.githubusercontent.com/prxchk/proxy-list/main/socks4.txt',protocol: 'socks4', }, - // {url: 'https://raw.githubusercontent.com/BreakingTechFr/Proxy_Free/refs/heads/main/proxies/socks4.txt',protocol: 'socks4', }, - // {url: 'https://raw.githubusercontent.com/ErcinDedeoglu/proxies/main/proxies/socks4.txt',protocol: 'socks4', }, - // {url: 'https://raw.githubusercontent.com/monosans/proxy-list/main/proxies/socks4.txt',protocol: 'socks4', }, - // {url: 'https://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/socks5.txt',protocol: 'socks5', }, - // {url: 'https://raw.githubusercontent.com/hookzof/socks5_list/master/proxy.txt',protocol: 'socks5', }, - // {url: 'https://raw.githubusercontent.com/ErcinDedeoglu/proxies/main/proxies/socks5.txt',protocol: 'socks5', }, - // {url: 'https://raw.githubusercontent.com/monosans/proxy-list/main/proxies/socks5.txt',protocol: 'socks5', }, - // {url: 'https://raw.githubusercontent.com/TuanMinPay/live-proxy/master/socks5.txt',protocol: 'socks5', }, - // {url: 'https://raw.githubusercontent.com/casals-ar/proxy-list/main/socks5',protocol: 'socks5', }, - // {url: 'https://raw.githubusercontent.com/prxchk/proxy-list/main/socks5.txt',protocol: 'socks5', }, - // {url: 'https://raw.githubusercontent.com/hookzof/socks5_list/master/proxy.txt',protocol: 'socks5', }, - // {url: 'https://raw.githubusercontent.com/MuRongPIG/Proxy-Master/main/socks5.txt',protocol: 'socks5', }, - // {url: 'https://raw.githubusercontent.com/BreakingTechFr/Proxy_Free/refs/heads/main/proxies/socks5.txt',protocol: 'socks5', }, + // {url: 'https://raw.githubusercontent.com/r00tee/Proxy-List/refs/heads/main/Https.txt',protocol: 'https', }, + // {url: 'https://raw.githubusercontent.com/ErcinDedeoglu/proxies/main/proxies/https.txt',protocol: 'https', }, + // {url: 'https://raw.githubusercontent.com/vakhov/fresh-proxy-list/refs/heads/master/https.txt', protocol: 'https' }, + // {url: 'https://raw.githubusercontent.com/databay-labs/free-proxy-list/refs/heads/master/https.txt',protocol: 'https', }, + // {url: 'https://raw.githubusercontent.com/officialputuid/KangProxy/refs/heads/KangProxy/https/https.txt',protocol: 'https', }, + // {url: 'https://raw.githubusercontent.com/zloi-user/hideip.me/refs/heads/master/https.txt',protocol: 'https', }, + // {url: 'https://raw.githubusercontent.com/gfpcom/free-proxy-list/refs/heads/main/list/https.txt',protocol: 'https', }, ] }; diff --git a/apps/data-service/src/utils/batch-processor.ts b/apps/data-service/src/utils/batch-processor.ts index b460122..a34ed07 100644 --- a/apps/data-service/src/utils/batch-processor.ts +++ b/apps/data-service/src/utils/batch-processor.ts @@ -208,11 +208,11 @@ export class BatchProcessor { * Process a batch (called by batch jobs) */ async processBatch(payload: { - items: T[]; - batchIndex: number; - total: number; - config: BatchConfig; - }) { + items: T[]; + batchIndex: number; + total: number; + config: BatchConfig; + }, createJobData?: (item: T, index: number) => any) { const { items, batchIndex, total, config } = payload; logger.info('Processing batch', { @@ -222,10 +222,18 @@ export class BatchProcessor { progress: `${((batchIndex + 1) / total * 100).toFixed(2)}%` }); - const delayPerItem = Math.floor((15 * 60 * 1000) / items.length); // 15 min per batch + const totalBatchDelayMs = config.totalDelayMs / total; + const delayPerItem = Math.floor(totalBatchDelayMs / items.length); const jobs = items.map((item, itemIndex) => { - const userData = config.createJobData(item, itemIndex); + // Use the provided createJobData function or fall back to config + const jobDataFn = createJobData || config.createJobData; + + if (!jobDataFn) { + throw new Error('createJobData function is required'); + } + + const userData = jobDataFn(item, itemIndex); return { name: `${config.jobNamePrefix}-processing`,