stock-bot/apps/data-service/src/services/proxy.service.ts

537 lines
No EOL
18 KiB
TypeScript

import { Logger } from '@stock-bot/logger';
import createCache, { type CacheProvider } from '@stock-bot/cache';
import { HttpClient, HttpClientConfig, ProxyConfig , RequestConfig } from '@stock-bot/http';
export interface ProxySource {
url: string;
protocol: 'http' | 'https' | 'socks4' | 'socks5';
parser?: (content: string) => ProxyConfig[];
}
export interface ProxyStats {
total: number;
working: number;
failed: number;
lastCheck: Date;
avgResponseTime: number;
}
export interface ProxyCheckResult {
proxy: ProxyConfig;
isWorking: boolean;
responseTime: number;
error?: string;
checkedAt: Date;
}
export interface ProxyData extends ProxyConfig {
addedAt: Date;
lastChecked: string | null;
isWorking: boolean | null;
responseTime: number | null;
}
export class ProxyService {
private logger;
private cache: CacheProvider;
private httpClient: HttpClient;
private readonly CACHE_PREFIX = 'proxy:';
private readonly WORKING_PROXIES_KEY = 'proxy:working';
private readonly PROXY_STATS_KEY = 'proxy:stats';
private readonly CHECK_TIMEOUT = 10000; // 10 seconds
private readonly DEFAULT_CHECK_URL = 'https://proxy-detection.stare.gg/?api_key=bd406bf53ddc6abe1d9de5907830a955';
private readonly DEFAULT_IP_ADDRESS = '99.246.102.205'
private readonly defaultSources: ProxySource[] = [
// {url: 'https://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/http.txt',protocol: 'http', },
// {url: 'https://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/socks4.txt',protocol: 'socks4', },
// {url: 'https://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/socks5.txt',protocol: 'socks5', },
// {url: 'https://raw.githubusercontent.com/hookzof/socks5_list/master/proxy.txt',protocol: 'socks5', },
// {url: 'https://raw.githubusercontent.com/sunny9577/proxy-scraper/master/proxies.txt',protocol: 'http', },
// {url: 'https://raw.githubusercontent.com/ErcinDedeoglu/proxies/main/proxies/http.txt',protocol: 'http', },
// {url: 'https://raw.githubusercontent.com/ErcinDedeoglu/proxies/main/proxies/https.txt',protocol: 'https', },
// {url: 'https://raw.githubusercontent.com/ErcinDedeoglu/proxies/main/proxies/socks4.txt',protocol: 'socks4', },
// {url: 'https://raw.githubusercontent.com/ErcinDedeoglu/proxies/main/proxies/socks5.txt',protocol: 'socks5', },
// {url: 'https://raw.githubusercontent.com/monosans/proxy-list/main/proxies/http.txt',protocol: 'http', },
// {url: 'https://raw.githubusercontent.com/monosans/proxy-list/main/proxies/socks4.txt',protocol: 'socks4', },
// {url: 'https://raw.githubusercontent.com/monosans/proxy-list/main/proxies/socks5.txt',protocol: 'socks5', },
// {url: 'https://raw.githubusercontent.com/TuanMinPay/live-proxy/master/http.txt',protocol: 'http', },
// {url: 'https://raw.githubusercontent.com/TuanMinPay/live-proxy/master/socks4.txt',protocol: 'socks4', },
// {url: 'https://raw.githubusercontent.com/TuanMinPay/live-proxy/master/socks5.txt',protocol: 'socks5', },
// {url: 'https://raw.githubusercontent.com/casals-ar/proxy-list/main/http',protocol: 'http', },
// {url: 'https://raw.githubusercontent.com/casals-ar/proxy-list/main/socks4',protocol: 'socks4', },
// {url: 'https://raw.githubusercontent.com/casals-ar/proxy-list/main/socks5',protocol: 'socks5', },
// {url: 'https://raw.githubusercontent.com/prxchk/proxy-list/main/http.txt',protocol: 'http', },
// {url: 'https://raw.githubusercontent.com/prxchk/proxy-list/main/socks4.txt',protocol: 'socks4', },
// {url: 'https://raw.githubusercontent.com/prxchk/proxy-list/main/socks5.txt',protocol: 'socks5', },
// {url: 'https://raw.githubusercontent.com/hookzof/socks5_list/master/proxy.txt',protocol: 'socks5', },
// {url: 'https://raw.githubusercontent.com/MuRongPIG/Proxy-Master/main/http.txt',protocol: 'http', },
// {url: 'https://raw.githubusercontent.com/MuRongPIG/Proxy-Master/main/socks4.txt',protocol: 'socks4', },
// {url: 'https://raw.githubusercontent.com/MuRongPIG/Proxy-Master/main/socks5.txt',protocol: 'socks5', },
// {url: 'https://raw.githubusercontent.com/vakhov/fresh-proxy-list/master/http.txt',protocol: 'http', },
];
constructor() {
this.logger = new Logger('proxy-service');
this.cache = createCache('hybrid');
this.httpClient = new HttpClient({
timeout: this.CHECK_TIMEOUT,
});
this.logger.info('ProxyService initialized');
}
/**
* Start the proxy refresh job
*/
async queueRefreshProxies(intervalMs: number = 30 * 60 * 1000): Promise<void> {
this.logger.info('Starting proxy refresh job', { intervalMs });
// Initial refresh
await this.scrapeProxies();
// Set up periodic refresh
setInterval(async () => {
try {
await this.scrapeProxies();
} catch (error) {
this.logger.error('Error in periodic proxy refresh', error);
}
}, intervalMs);
}
/**
* Scrape proxies from all sources
*/
async scrapeProxies(sources: ProxySource[] = this.defaultSources): Promise<number> {
this.logger.info('Starting proxy scraping', { sourceCount: sources.length });
const allProxies: ProxyConfig[] = [];
const scrapingPromises = sources.map(source => this.scrapeFromSource(source));
const results = await Promise.allSettled(scrapingPromises);
results.forEach((result, index) => {
if (result.status === 'fulfilled') {
allProxies.push(...result.value);
this.logger.info('Successfully scraped from source', {
url: sources[index].url,
count: result.value.length
});
} else {
this.logger.error('Failed to scrape from source', {
url: sources[index].url,
error: result.reason
});
}
});
// Remove duplicates
const uniqueProxies = this.removeDuplicateProxies(allProxies);
// Store all proxies in cache
// await this.storeProxies(uniqueProxies);
this.logger.info('Proxy scraping completed', {
total: allProxies.length,
unique: uniqueProxies.length
});
// Start validation of new proxies
this.validateProxiesInBackground(uniqueProxies);
return uniqueProxies.length;
}
/**
* Scrape proxies from a single source
*/
private async scrapeFromSource(source: ProxySource): Promise<ProxyConfig[]> {
try {
const response = await this.httpClient.get(source.url);
if (!response.data || typeof response.data !== 'string') {
throw new Error('Invalid response data');
}
const proxies = source.parser ?
source.parser(response.data) :
this.parseHttpProxyList(response.data);
return proxies.map(proxy => ({
protocol: source.protocol,
host: proxy.host,
port: proxy.port,
username: proxy.username,
password: proxy.password
}));
} catch (error) {
this.logger.error('Error scraping from source', {
url: source.url,
error: error
});
return [];
}
}
/**
* Parse HTTP proxy list in format "ip:port"
*/
private parseHttpProxyList(content: string): ProxyConfig[] {
const lines = content.split('\n').filter(line => line.trim());
const proxies: ProxyConfig[] = [];
for (const line of lines) {
const trimmed = line.trim();
if (!trimmed || trimmed.startsWith('#')) continue;
const [host, port] = trimmed.split(':');
if (host && port && this.isValidIP(host) && this.isValidPort(port)) {
proxies.push({
protocol: 'http',
host: host.trim(),
port: parseInt(port.trim())
});
}
}
return proxies;
}
/**
* Validate IP address format
*/
private isValidIP(ip: string): boolean {
const ipRegex = /^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$/;
return ipRegex.test(ip);
}
/**
* Validate port number
*/
private isValidPort(port: string): boolean {
const portNum = parseInt(port);
return !isNaN(portNum) && portNum > 0 && portNum <= 65535;
}
/**
* Remove duplicate proxies based on host:port combination
*/
private removeDuplicateProxies(proxies: ProxyConfig[]): ProxyConfig[] {
const seen = new Set<string>();
return proxies.filter(proxy => {
const key = `${proxy.host}:${proxy.port}`;
if (seen.has(key)) {
return false;
}
seen.add(key);
return true;
});
}
/**
* Check if a proxy is working
*/
async checkProxy(proxy: ProxyConfig, checkUrl: string = this.DEFAULT_CHECK_URL): Promise<ProxyCheckResult> {
const startTime = Date.now();
try {
this.logger.debug('Proxy check initiate request', {
proxy: proxy.host + ':' + proxy.port,
});
const response = await this.httpClient.get(checkUrl, {proxy: proxy, timeout: this.CHECK_TIMEOUT});
const responseTime = Date.now() - startTime;
this.logger.debug('Proxy check response', {
proxy: proxy.host + ':' + proxy.port,
});
if (response.status >= 200 && response.status < 300 && !response.data.contains(this.DEFAULT_IP_ADDRESS)) {
const result: ProxyCheckResult = {
proxy,
isWorking: true,
responseTime,
checkedAt: new Date()
};
// Update cache with working status
await this.updateProxyStatus(proxy, true, responseTime);
this.logger.debug('Proxy check successful', {
host: proxy.host,
port: proxy.port,
responseTime
});
return result;
} else {
throw new Error(`HTTP ${response.status}`);
}
} catch (error) {
const responseTime = Date.now() - startTime;
const errorMessage = error instanceof Error ? error.message : String(error);
const result: ProxyCheckResult = {
proxy,
isWorking: false,
responseTime,
error: errorMessage,
checkedAt: new Date()
};
// Update cache with failed status
await this.updateProxyStatus(proxy, false, responseTime);
this.logger.debug('Proxy check failed', {
host: proxy.host,
port: proxy.port,
error: errorMessage
});
return result;
}
}
/**
* Update proxy status in cache
*/
private async updateProxyStatus(proxy: ProxyConfig, isWorking: boolean, responseTime: number): Promise<void> {
try {
const key = this.getProxyKey(proxy);
const existingData = await this.cache.get<ProxyData>(key);
if (existingData) {
const data: ProxyData = {
...existingData,
isWorking,
responseTime,
lastChecked: new Date().toISOString()
};
await this.cache.set(key, data, 86400);
// Manage working proxies list
const workingKey = `${this.WORKING_PROXIES_KEY}:${proxy.host}:${proxy.port}`;
if (isWorking) {
await this.cache.set(workingKey, proxy, 86400);
} else {
await this.cache.del(workingKey);
}
}
} catch (error) {
this.logger.error('Error updating proxy status', error);
}
}
/**
* Get a working proxy from cache
*/
async getWorkingProxy(): Promise<ProxyConfig | null> {
try {
// Get all working proxy keys and pick one randomly
const allProxies = await this.getAllProxies();
const workingProxies = [];
for (const proxy of allProxies) {
const key = this.getProxyKey(proxy);
const data = await this.cache.get<ProxyData>(key);
if (data && data.isWorking) {
workingProxies.push(proxy);
}
}
if (workingProxies.length > 0) {
const randomIndex = Math.floor(Math.random() * workingProxies.length);
return workingProxies[randomIndex];
}
this.logger.warn('No working proxies available');
return null;
} catch (error) {
this.logger.error('Error getting working proxy', error);
return null;
}
}
/**
* Get multiple working proxies
*/
async getWorkingProxies(count: number = 10): Promise<ProxyConfig[]> {
try {
const allProxies = await this.getAllProxies();
const workingProxies: ProxyConfig[] = [];
for (const proxy of allProxies) {
if (workingProxies.length >= count) break;
const key = this.getProxyKey(proxy);
const data = await this.cache.get<ProxyData>(key);
if (data && data.isWorking) {
workingProxies.push(proxy);
}
}
return workingProxies;
} catch (error) {
this.logger.error('Error getting working proxies', error);
return [];
}
}
/**
* Get all proxies from cache
*/
async getAllProxies(): Promise<ProxyConfig[]> {
try {
// Since we can't use keys() directly, we'll need to track proxy keys separately
// For now, we'll implement a simple approach using a known key pattern
const proxies: ProxyConfig[] = [];
// We'll need to either:
// 1. Maintain a separate index of all proxy keys
// 2. Or use a different approach
// For now, let's return empty array and log a warning
this.logger.warn('getAllProxies not fully implemented - Redis cache provider limitations');
return proxies;
} catch (error) {
this.logger.error('Error getting all proxies', error);
return [];
}
}
/**
* Get proxy statistics
*/
async getProxyStats(): Promise<ProxyStats> {
try {
const allProxies = await this.getAllProxies();
const workingProxies = await this.getWorkingProxies(1000); // Get up to 1000 for stats
const avgResponseTime = workingProxies.length > 0
? workingProxies.reduce((sum, _proxy) => {
// Since responseTime is not in ProxyConfig, we'll calculate differently
return sum + 1000; // placeholder average
}, 0) / workingProxies.length
: 0;
const stats: ProxyStats = {
total: allProxies.length,
working: workingProxies.length,
failed: allProxies.length - workingProxies.length,
lastCheck: new Date(),
avgResponseTime: Math.round(avgResponseTime)
};
// Cache stats for 5 minutes
await this.cache.set(this.PROXY_STATS_KEY, stats, 300);
return stats;
} catch (error) {
this.logger.error('Error getting proxy stats', error);
return {
total: 0,
working: 0,
failed: 0,
lastCheck: new Date(),
avgResponseTime: 0
};
}
}
/**
* Validate proxies in background
*/
private async validateProxiesInBackground(proxies: ProxyConfig[]): Promise<void> {
this.logger.info('Starting background proxy validation', { count: proxies.length });
const concurrency = 50; // Process 50 proxies concurrently
const chunks = this.chunkArray(proxies, concurrency);
for (const chunk of chunks) {
const validationPromises = chunk.map(proxy =>
this.checkProxy(proxy).catch(error => {
this.logger.error('Error validating proxy', {
host: proxy.host,
port: proxy.port,
error
});
return null;
})
);
await Promise.allSettled(validationPromises);
// Small delay between chunks to avoid overwhelming the system
await new Promise(resolve => setTimeout(resolve, 1000));
}
this.logger.info('Background proxy validation completed');
}
/**
* Start periodic proxy health checks
*/
async startHealthChecks(intervalMs: number = 15 * 60 * 1000): Promise<void> {
this.logger.info('Starting periodic proxy health checks', { intervalMs });
setInterval(async () => {
try {
const workingProxies = await this.getWorkingProxies(100); // Check up to 100 working proxies
const validationPromises = workingProxies.map(proxy => this.checkProxy(proxy));
const results = await Promise.allSettled(validationPromises);
const successCount = results.filter(r =>
r.status === 'fulfilled' && r.value.isWorking
).length;
this.logger.info('Health check completed', {
checked: workingProxies.length,
stillWorking: successCount
});
} catch (error) {
this.logger.error('Error in health check', error);
}
}, intervalMs);
}
/**
* Clear all proxy data from cache
*/
async clearProxies(): Promise<void> {
try {
// Since we can't use keys() and del() with spread, we'll clear known keys
await this.cache.del(this.PROXY_STATS_KEY);
// Note: This is a limitation of the current cache provider
// In a full implementation, we'd need to maintain an index of proxy keys
this.logger.info('Cleared proxy stats from cache');
this.logger.warn('Full proxy data clearing not implemented due to cache provider limitations');
} catch (error) {
this.logger.error('Error clearing proxy data', error);
}
}
/**
* Get cache key for a proxy
*/
private getProxyKey(proxy: ProxyConfig): string {
return `${this.CACHE_PREFIX}${proxy.host}:${proxy.port}`;
}
/**
* Split array into chunks
*/
private chunkArray<T>(array: T[], size: number): T[][] {
const chunks: T[][] = [];
for (let i = 0; i < array.length; i += size) {
chunks.push(array.slice(i, i + size));
}
return chunks;
}
/**
* Graceful shutdown
*/
async shutdown(): Promise<void> {
this.logger.info('Shutting down ProxyService');
// The cache and http client will handle their own cleanup
}
}
// Export singleton instance
export const proxyService = new ProxyService();