stock-bot/apps/stock/data-ingestion/src/handlers/proxy/operations/fetch.operations.ts

104 lines
No EOL
2.9 KiB
TypeScript

/**
* Proxy Fetch Operations - Fetching proxies from sources
*/
import type { ProxyInfo } from '@stock-bot/proxy';
import { OperationContext } from '@stock-bot/di';
import { getLogger } from '@stock-bot/logger';
import { fetch } from '@stock-bot/utils';
import { PROXY_CONFIG } from '../shared/config';
import type { ProxySource } from '../shared/types';
export async function fetchProxiesFromSources(): Promise<ProxyInfo[]> {
const ctx = {
logger: getLogger('proxy-fetch')
} as any;
ctx.logger.info('Starting proxy fetch from sources');
const fetchPromises = PROXY_CONFIG.PROXY_SOURCES.map(source => fetchProxiesFromSource(source, ctx));
const results = await Promise.all(fetchPromises);
let allProxies: ProxyInfo[] = results.flat();
allProxies = removeDuplicateProxies(allProxies);
ctx.logger.info('Fetched proxies from all sources', { total: allProxies.length });
return allProxies;
}
export async function fetchProxiesFromSource(source: ProxySource, ctx?: OperationContext): Promise<ProxyInfo[]> {
if (!ctx) {
ctx = OperationContext.create('proxy', 'fetch-source');
}
const allProxies: ProxyInfo[] = [];
try {
ctx.logger.info(`Fetching proxies from ${source.url}`);
const response = await fetch(source.url, {
signal: AbortSignal.timeout(10000),
logger: ctx.logger
} as any);
if (!response.ok) {
ctx.logger.warn(`Failed to fetch from ${source.url}: ${response.status}`);
return [];
}
const text = await response.text();
const lines = text.split('\n').filter((line: string) => line.trim());
for (const line of lines) {
let trimmed = line.trim();
trimmed = cleanProxyUrl(trimmed);
if (!trimmed || trimmed.startsWith('#')) {
continue;
}
// Parse formats like \"host:port\" or \"host:port:user:pass\"
const parts = trimmed.split(':');
if (parts.length >= 2) {
const proxy: ProxyInfo = {
source: source.id,
protocol: source.protocol as 'http' | 'https',
host: parts[0],
port: parseInt(parts[1]),
};
if (!isNaN(proxy.port) && proxy.host) {
allProxies.push(proxy);
}
}
}
ctx.logger.info(`Parsed ${allProxies.length} proxies from ${source.url}`);
} catch (error) {
ctx.logger.error(`Error fetching proxies from ${source.url}`, error);
return [];
}
return allProxies;
}
// Utility functions
function cleanProxyUrl(url: string): string {
return url
.replace(/^https?:\/\//, '')
.replace(/^0+/, '')
.replace(/:0+(\d)/g, ':$1');
}
function removeDuplicateProxies(proxies: ProxyInfo[]): ProxyInfo[] {
const seen = new Set<string>();
const unique: ProxyInfo[] = [];
for (const proxy of proxies) {
const key = `${proxy.protocol}://${proxy.host}:${proxy.port}`;
if (!seen.has(key)) {
seen.add(key);
unique.push(proxy);
}
}
return unique;
}