work on proxy service

This commit is contained in:
Bojan Kucera 2025-06-07 18:04:20 -04:00
parent baa34a3805
commit 81d88357ca
5 changed files with 204 additions and 547 deletions

View file

@ -1,273 +1,169 @@
import { Logger } from '@stock-bot/logger';
import createCache, { type CacheProvider } from '@stock-bot/cache';
import { HttpClient, HttpClientConfig, ProxyConfig , RequestConfig } from '@stock-bot/http';
import { HttpClient, ProxyInfo } from '@stock-bot/http';
import pLimit from 'p-limit';
export interface ProxySource {
url: string;
protocol: 'http' | 'https' | 'socks4' | 'socks5';
parser?: (content: string) => ProxyConfig[];
}
export interface ProxyStats {
total: number;
working: number;
failed: number;
lastCheck: Date;
avgResponseTime: number;
}
export interface ProxyCheckResult {
proxy: ProxyConfig;
proxy: ProxyInfo;
isWorking: boolean;
responseTime: number;
error?: string;
checkedAt: Date;
}
export interface ProxyData extends ProxyConfig {
addedAt: Date;
lastChecked: string | null;
isWorking: boolean | null;
responseTime: number | null;
export interface ProxyStats {
total: number;
working: number;
avgResponseTime: number;
}
export class ProxyService {
private logger;
private cache: CacheProvider;
private logger = new Logger('proxy-service');
private cache: CacheProvider = createCache('hybrid');
private httpClient: HttpClient;
private readonly concurrencyLimit = pLimit(200);
private readonly CACHE_PREFIX = 'proxy:';
private readonly WORKING_PROXIES_KEY = 'proxy:working';
private readonly PROXY_STATS_KEY = 'proxy:stats';
private readonly CHECK_TIMEOUT = 3000; // 10 seconds
private readonly DEFAULT_CHECK_URL = 'https://proxy-detection.stare.gg/?api_key=bd406bf53ddc6abe1d9de5907830a955';
private readonly DEFAULT_IP_ADDRESS = '99.246.102.205'
private readonly defaultSources: ProxySource[] = [
// {url: 'https://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/http.txt',protocol: 'http', },
// {url: 'https://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/socks4.txt',protocol: 'socks4', },
// {url: 'https://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/socks5.txt',protocol: 'socks5', },
// {url: 'https://raw.githubusercontent.com/hookzof/socks5_list/master/proxy.txt',protocol: 'socks5', },
// {url: 'https://raw.githubusercontent.com/sunny9577/proxy-scraper/master/proxies.txt',protocol: 'http', },
// {url: 'https://raw.githubusercontent.com/ErcinDedeoglu/proxies/main/proxies/http.txt',protocol: 'http', },
// {url: 'https://raw.githubusercontent.com/ErcinDedeoglu/proxies/main/proxies/https.txt',protocol: 'https', },
// {url: 'https://raw.githubusercontent.com/ErcinDedeoglu/proxies/main/proxies/socks4.txt',protocol: 'socks4', },
// {url: 'https://raw.githubusercontent.com/ErcinDedeoglu/proxies/main/proxies/socks5.txt',protocol: 'socks5', },
// {url: 'https://raw.githubusercontent.com/monosans/proxy-list/main/proxies/http.txt',protocol: 'http', },
// {url: 'https://raw.githubusercontent.com/monosans/proxy-list/main/proxies/socks4.txt',protocol: 'socks4', },
// {url: 'https://raw.githubusercontent.com/monosans/proxy-list/main/proxies/socks5.txt',protocol: 'socks5', },
// {url: 'https://raw.githubusercontent.com/TuanMinPay/live-proxy/master/http.txt',protocol: 'http', },
// {url: 'https://raw.githubusercontent.com/TuanMinPay/live-proxy/master/socks4.txt',protocol: 'socks4', },
// {url: 'https://raw.githubusercontent.com/TuanMinPay/live-proxy/master/socks5.txt',protocol: 'socks5', },
// {url: 'https://raw.githubusercontent.com/casals-ar/proxy-list/main/http',protocol: 'http', },
// {url: 'https://raw.githubusercontent.com/casals-ar/proxy-list/main/socks4',protocol: 'socks4', },
// {url: 'https://raw.githubusercontent.com/casals-ar/proxy-list/main/socks5',protocol: 'socks5', },
// {url: 'https://raw.githubusercontent.com/prxchk/proxy-list/main/http.txt',protocol: 'http', },
// {url: 'https://raw.githubusercontent.com/prxchk/proxy-list/main/socks4.txt',protocol: 'socks4', },
// {url: 'https://raw.githubusercontent.com/prxchk/proxy-list/main/socks5.txt',protocol: 'socks5', },
// {url: 'https://raw.githubusercontent.com/hookzof/socks5_list/master/proxy.txt',protocol: 'socks5', },
// {url: 'https://raw.githubusercontent.com/MuRongPIG/Proxy-Master/main/http.txt',protocol: 'http', },
// {url: 'https://raw.githubusercontent.com/MuRongPIG/Proxy-Master/main/socks4.txt',protocol: 'socks4', },
// {url: 'https://raw.githubusercontent.com/MuRongPIG/Proxy-Master/main/socks5.txt',protocol: 'socks5', },
// {url: 'https://raw.githubusercontent.com/vakhov/fresh-proxy-list/master/http.txt',protocol: 'http', },
];
private readonly concurrencyLimit = pLimit(1000);
private readonly CACHE_KEY = 'proxy';
private readonly CACHE_TTL = 86400; // 24 hours
private readonly CHECK_TIMEOUT = 5000;
private readonly CHECK_URL = 'https://httpbin.org/ip';
private readonly PROXY_SOURCES = [
{url: 'https://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/http.txt',protocol: 'http', },
{url: 'https://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/socks4.txt',protocol: 'socks4', },
{url: 'https://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/socks5.txt',protocol: 'socks5', },
{url: 'https://raw.githubusercontent.com/hookzof/socks5_list/master/proxy.txt',protocol: 'socks5', },
{url: 'https://raw.githubusercontent.com/sunny9577/proxy-scraper/master/proxies.txt',protocol: 'http', },
{url: 'https://raw.githubusercontent.com/ErcinDedeoglu/proxies/main/proxies/http.txt',protocol: 'http', },
{url: 'https://raw.githubusercontent.com/ErcinDedeoglu/proxies/main/proxies/https.txt',protocol: 'https', },
{url: 'https://raw.githubusercontent.com/ErcinDedeoglu/proxies/main/proxies/socks4.txt',protocol: 'socks4', },
{url: 'https://raw.githubusercontent.com/ErcinDedeoglu/proxies/main/proxies/socks5.txt',protocol: 'socks5', },
{url: 'https://raw.githubusercontent.com/monosans/proxy-list/main/proxies/http.txt',protocol: 'http', },
{url: 'https://raw.githubusercontent.com/monosans/proxy-list/main/proxies/socks4.txt',protocol: 'socks4', },
{url: 'https://raw.githubusercontent.com/monosans/proxy-list/main/proxies/socks5.txt',protocol: 'socks5', },
{url: 'https://raw.githubusercontent.com/TuanMinPay/live-proxy/master/http.txt',protocol: 'http', },
{url: 'https://raw.githubusercontent.com/TuanMinPay/live-proxy/master/socks4.txt',protocol: 'socks4', },
{url: 'https://raw.githubusercontent.com/TuanMinPay/live-proxy/master/socks5.txt',protocol: 'socks5', },
{url: 'https://raw.githubusercontent.com/casals-ar/proxy-list/main/http',protocol: 'http', },
{url: 'https://raw.githubusercontent.com/casals-ar/proxy-list/main/socks4',protocol: 'socks4', },
{url: 'https://raw.githubusercontent.com/casals-ar/proxy-list/main/socks5',protocol: 'socks5', },
{url: 'https://raw.githubusercontent.com/prxchk/proxy-list/main/http.txt',protocol: 'http', },
{url: 'https://raw.githubusercontent.com/prxchk/proxy-list/main/socks4.txt',protocol: 'socks4', },
{url: 'https://raw.githubusercontent.com/prxchk/proxy-list/main/socks5.txt',protocol: 'socks5', },
{url: 'https://raw.githubusercontent.com/hookzof/socks5_list/master/proxy.txt',protocol: 'socks5', },
{url: 'https://raw.githubusercontent.com/MuRongPIG/Proxy-Master/main/http.txt',protocol: 'http', },
{url: 'https://raw.githubusercontent.com/MuRongPIG/Proxy-Master/main/socks4.txt',protocol: 'socks4', },
{url: 'https://raw.githubusercontent.com/MuRongPIG/Proxy-Master/main/socks5.txt',protocol: 'socks5', },
{url: 'https://raw.githubusercontent.com/vakhov/fresh-proxy-list/master/http.txt',protocol: 'http', },
]
constructor() {
this.logger = new Logger('proxy-service');
this.cache = createCache('hybrid');
this.httpClient = new HttpClient({
timeout: this.CHECK_TIMEOUT,
}, this.logger);
this.logger.info('ProxyService initialized');
}
/**
* Start the proxy refresh job
*/
async queueRefreshProxies(intervalMs: number = 30 * 60 * 1000): Promise<void> {
this.logger.info('Starting proxy refresh job', { intervalMs });
async fetchProxiesFromSources() : Promise<boolean> {
const sources = this.PROXY_SOURCES.map(source =>
this.concurrencyLimit(() => this.fetchProxiesFromSource(source))
)
const result = await Promise.all(sources);
this.checkProxies(result.flat())
return true
}
// Initial refresh
await this.scrapeProxies();
// Set up periodic refresh
setInterval(async () => {
async fetchProxiesFromSource(source: { url: string; protocol: string }): Promise<ProxyInfo[]> {
const allProxies: ProxyInfo[] = [];
try {
await this.scrapeProxies();
this.logger.info(`Fetching proxies from ${source.url}`);
const response = await this.httpClient.get(source.url, {
timeout: 10000
});
if (response.status !== 200) {
this.logger.warn(`Failed to fetch from ${source.url}: ${response.status}`);
return []
}
const text = response.data;
const lines = text.split('\n').filter((line: string) => line.trim());
for (const line of lines) {
const trimmed = line.trim();
if (!trimmed || trimmed.startsWith('#')) continue;
// Parse formats like "host:port" or "host:port:user:pass"
const parts = trimmed.split(':');
if (parts.length >= 2) {
const proxy: ProxyInfo = {
protocol: source.protocol as 'http' | 'https' | 'socks4' | 'socks5',
host: parts[0],
port: parseInt(parts[1])
};
if (!isNaN(proxy.port) && proxy.host) {
allProxies.push(proxy);
}
}
}
this.logger.info(`Parsed ${allProxies.length} proxies from ${source.url}`);
} catch (error) {
this.logger.error('Error in periodic proxy refresh', error);
this.logger.error(`Error fetching proxies from ${source.url}`, error);
return [];
}
}, intervalMs);
}
/**
* Scrape proxies from all sources
*/
async scrapeProxies(sources: ProxySource[] = this.defaultSources): Promise<number> {
this.logger.info('Starting proxy scraping', { sourceCount: sources.length });
const allProxies: ProxyConfig[] = [];
const scrapingPromises = sources.map(source => this.scrapeFromSource(source));
const results = await Promise.allSettled(scrapingPromises);
results.forEach((result, index) => {
if (result.status === 'fulfilled') {
allProxies.push(...result.value);
this.logger.info('Successfully scraped from source', {
url: sources[index].url,
count: result.value.length
});
} else {
this.logger.error('Failed to scrape from source', {
url: sources[index].url,
error: result.reason
});
}
});
// Remove duplicates
const uniqueProxies = this.removeDuplicateProxies(allProxies);
// Store all proxies in cache
// await this.storeProxies(uniqueProxies);
this.logger.info('Proxy scraping completed', {
total: allProxies.length,
unique: uniqueProxies.length
});
// Start validation of new proxies
this.validateProxiesInBackground(uniqueProxies);
return uniqueProxies.length;
this.logger.info(`Total proxies fetched: ${allProxies.length}`);
return allProxies;
}
/**
* Scrape proxies from a single source
*/
private async scrapeFromSource(source: ProxySource): Promise<ProxyConfig[]> {
try {
const response = await this.httpClient.get(source.url);
if (!response.data || typeof response.data !== 'string') {
throw new Error('Invalid response data');
}
const proxies = source.parser ?
source.parser(response.data) :
this.parseHttpProxyList(response.data);
return proxies.map(proxy => ({
protocol: source.protocol,
host: proxy.host,
port: proxy.port,
username: proxy.username,
password: proxy.password
}));
} catch (error) {
this.logger.error('Error scraping from source', {
url: source.url,
error: error
});
return [];
}
}
/**
* Parse HTTP proxy list in format "ip:port"
*/
private parseHttpProxyList(content: string): ProxyConfig[] {
const lines = content.split('\n').filter(line => line.trim());
const proxies: ProxyConfig[] = [];
for (const line of lines) {
const trimmed = line.trim();
if (!trimmed || trimmed.startsWith('#')) continue;
const [host, port] = trimmed.split(':');
if (host && port && this.isValidIP(host) && this.isValidPort(port)) {
proxies.push({
protocol: 'http',
host: host.trim(),
port: parseInt(port.trim())
});
}
}
return proxies;
}
/**
* Validate IP address format
*/
private isValidIP(ip: string): boolean {
const ipRegex = /^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$/;
return ipRegex.test(ip);
}
/**
* Validate port number
*/
private isValidPort(port: string): boolean {
const portNum = parseInt(port);
return !isNaN(portNum) && portNum > 0 && portNum <= 65535;
}
/**
* Remove duplicate proxies based on host:port combination
*/
private removeDuplicateProxies(proxies: ProxyConfig[]): ProxyConfig[] {
const seen = new Set<string>();
return proxies.filter(proxy => {
const key = `${proxy.host}:${proxy.port}`;
if (seen.has(key)) {
return false;
}
seen.add(key);
return true;
});
}
/**
* Check if a proxy is working
*/
async checkProxy(proxy: ProxyConfig, checkUrl: string = this.DEFAULT_CHECK_URL): Promise<ProxyCheckResult> {
async checkProxy(proxy: ProxyInfo): Promise<ProxyCheckResult> {
const startTime = Date.now();
console.log('Checking proxy:', `${proxy.protocol}://${proxy.host}:${proxy.port}`);
try {
this.logger.debug('Proxy check initiate request', {
proxy: proxy.host + ':' + proxy.port,
// Test the proxy
const response = await this.httpClient.get(this.CHECK_URL, {
proxy,
timeout: this.CHECK_TIMEOUT
});
const response = await this.httpClient.get(checkUrl, {proxy: proxy, timeout: this.CHECK_TIMEOUT});
const responseTime = Date.now() - startTime;
this.logger.debug('Proxy check response', {
proxy: proxy.host + ':' + proxy.port,
});
if (response.status >= 200 && response.status < 300 && !response.data.contains(this.DEFAULT_IP_ADDRESS)) {
const result: ProxyCheckResult = {
proxy,
isWorking: true,
responseTime,
checkedAt: new Date()
};
const isWorking = response.status >= 200 && response.status < 300;
// Update cache with working status
await this.updateProxyStatus(proxy, true, responseTime);
this.logger.debug('Proxy check successful', {
host: proxy.host,
port: proxy.port,
responseTime
});
const result: ProxyCheckResult = {
proxy,
isWorking,
responseTime,
checkedAt: new Date()
};
return result;
// Cache the result
// await this.cache.set(cacheKey, result, this.CACHE_TTL);
if (isWorking) {
await this.cache.set(`${this.CACHE_KEY}:${proxy.protocol}://${proxy.host}:${proxy.port}`, result, this.CACHE_TTL);
} else {
throw new Error(`HTTP ${response.status}`);
await this.cache.del(`${this.CACHE_KEY}:${proxy.protocol}://${proxy.host}:${proxy.port}`);
}
this.logger.debug('Proxy check completed', {
host: proxy.host,
port: proxy.port,
isWorking,
responseTime
});
return result;
} catch (error) {
const responseTime = Date.now() - startTime;
const errorMessage = error instanceof Error ? error.message : String(error);
@ -280,72 +176,50 @@ export class ProxyService {
checkedAt: new Date()
};
// Update cache with failed status
await this.updateProxyStatus(proxy, false, responseTime);
// Cache failed result for shorter time
// await this.cache.set(cacheKey, result, 300); // 5 minutes
await this.cache.del(`${this.CACHE_KEY}:${proxy.protocol}://${proxy.host}:${proxy.port}`);
this.logger.debug('Proxy check failed', {
host: proxy.host,
port: proxy.port,
error: errorMessage
this.logger.debug('Proxy check failed', {
host: proxy.host,
port: proxy.port,
error: errorMessage
});
return result;
}
}
/**
* Update proxy status in cache
*/
private async updateProxyStatus(proxy: ProxyConfig, isWorking: boolean, responseTime: number): Promise<void> {
try {
const key = this.getProxyKey(proxy);
const existingData = await this.cache.get<ProxyData>(key);
if (existingData) {
const data: ProxyData = {
...existingData,
isWorking,
responseTime,
lastChecked: new Date().toISOString()
};
await this.cache.set(key, data, 86400);
// Manage working proxies list
const workingKey = `${this.WORKING_PROXIES_KEY}:${proxy.host}:${proxy.port}`;
if (isWorking) {
await this.cache.set(workingKey, proxy, 86400);
} else {
await this.cache.del(workingKey);
}
}
} catch (error) {
this.logger.error('Error updating proxy status', error);
}
}
/**
* Get a working proxy from cache
* Check multiple proxies concurrently
*/
async getWorkingProxy(): Promise<ProxyConfig | null> {
async checkProxies(proxies: ProxyInfo[]): Promise<ProxyCheckResult[]> {
this.logger.info('Checking proxies', { count: proxies.length });
const checkPromises = proxies.map(proxy =>
this.concurrencyLimit(() => this.checkProxy(proxy))
);
const results = await Promise.all(checkPromises);
const workingCount = results.filter(r => r.isWorking).length;
this.logger.info('Proxy check completed', {
total: proxies.length,
working: workingCount,
failed: proxies.length - workingCount
});
return results;
}
/**
* Get a random working proxy from cache
*/
async getWorkingProxy(): Promise<ProxyInfo | null> {
try {
// Get all working proxy keys and pick one randomly
const allProxies = await this.getAllProxies();
const workingProxies = [];
for (const proxy of allProxies) {
const key = this.getProxyKey(proxy);
const data = await this.cache.get<ProxyData>(key);
if (data && data.isWorking) {
workingProxies.push(proxy);
}
}
if (workingProxies.length > 0) {
const randomIndex = Math.floor(Math.random() * workingProxies.length);
return workingProxies[randomIndex];
}
this.logger.warn('No working proxies available');
// Note: This is a simplified implementation
// In production, you'd want to maintain a working proxies list
this.logger.warn('getWorkingProxy not fully implemented - requires proxy list management');
return null;
} catch (error) {
this.logger.error('Error getting working proxy', error);
@ -354,166 +228,42 @@ export class ProxyService {
}
/**
* Get multiple working proxies
* Add proxies to check and cache
*/
async getWorkingProxies(count: number = 10): Promise<ProxyConfig[]> {
try {
const allProxies = await this.getAllProxies();
const workingProxies: ProxyConfig[] = [];
for (const proxy of allProxies) {
if (workingProxies.length >= count) break;
const key = this.getProxyKey(proxy);
const data = await this.cache.get<ProxyData>(key);
if (data && data.isWorking) {
workingProxies.push(proxy);
}
}
return workingProxies;
} catch (error) {
this.logger.error('Error getting working proxies', error);
return [];
}
}
/**
* Get all proxies from cache
*/
async getAllProxies(): Promise<ProxyConfig[]> {
try {
// Since we can't use keys() directly, we'll need to track proxy keys separately
// For now, we'll implement a simple approach using a known key pattern
const proxies: ProxyConfig[] = [];
// We'll need to either:
// 1. Maintain a separate index of all proxy keys
// 2. Or use a different approach
// For now, let's return empty array and log a warning
this.logger.warn('getAllProxies not fully implemented - Redis cache provider limitations');
return proxies;
} catch (error) {
this.logger.error('Error getting all proxies', error);
return [];
}
async addProxies(proxies: ProxyInfo[]): Promise<void> {
this.logger.info('Adding proxies for validation', { count: proxies.length });
// Start background validation
this.checkProxies(proxies).catch(error => {
this.logger.error('Error in background proxy validation', error);
});
}
/**
* Get proxy statistics
*/
async getProxyStats(): Promise<ProxyStats> {
try {
const allProxies = await this.getAllProxies();
const workingProxies = await this.getWorkingProxies(1000); // Get up to 1000 for stats
const avgResponseTime = workingProxies.length > 0
? workingProxies.reduce((sum, _proxy) => {
// Since responseTime is not in ProxyConfig, we'll calculate differently
return sum + 1000; // placeholder average
}, 0) / workingProxies.length
: 0;
const stats: ProxyStats = {
total: allProxies.length,
working: workingProxies.length,
failed: allProxies.length - workingProxies.length,
lastCheck: new Date(),
avgResponseTime: Math.round(avgResponseTime)
};
// Cache stats for 5 minutes
await this.cache.set(this.PROXY_STATS_KEY, stats, 300);
return stats;
} catch (error) {
this.logger.error('Error getting proxy stats', error);
return {
total: 0,
working: 0,
failed: 0,
lastCheck: new Date(),
avgResponseTime: 0
};
}
}
/**
* Validate proxies in background
*/
private async validateProxiesInBackground(proxies: ProxyConfig[]): Promise<void> {
this.logger.info('Starting background proxy validation', { count: proxies.length });
const validationPromises = proxies.map(proxy =>
this.concurrencyLimit(() =>
this.checkProxy(proxy).catch(error => {
this.logger.error('Error validating proxy', {
host: proxy.host,
port: proxy.port,
error
});
return null;
})
)
);
await Promise.allSettled(validationPromises);
this.logger.info('Background proxy validation completed');
}
/**
* Start periodic proxy health checks
*/
async startHealthChecks(intervalMs: number = 15 * 60 * 1000): Promise<void> {
this.logger.info('Starting periodic proxy health checks', { intervalMs });
setInterval(async () => {
try {
const workingProxies = await this.getWorkingProxies(100); // Check up to 100 working proxies
const validationPromises = workingProxies.map(proxy =>
this.concurrencyLimit(() => this.checkProxy(proxy))
);
const results = await Promise.allSettled(validationPromises);
const successCount = results.filter(r =>
r.status === 'fulfilled' && r.value.isWorking
).length;
this.logger.info('Health check completed', {
checked: workingProxies.length,
stillWorking: successCount
});
} catch (error) {
this.logger.error('Error in health check', error);
}
}, intervalMs);
}
/**
* Clear all proxy data from cache
*/
async clearProxies(): Promise<void> {
try {
// Since we can't use keys() and del() with spread, we'll clear known keys
await this.cache.del(this.PROXY_STATS_KEY);
// Note: This is a limitation of the current cache provider
// In a full implementation, we'd need to maintain an index of proxy keys
this.logger.info('Cleared proxy stats from cache');
this.logger.warn('Full proxy data clearing not implemented due to cache provider limitations');
} catch (error) {
this.logger.error('Error clearing proxy data', error);
}
}
/**
* Get cache key for a proxy
*/
private getProxyKey(proxy: ProxyConfig): string {
return `${this.CACHE_PREFIX}${proxy.host}:${proxy.port}`;
async getStats(): Promise<ProxyStats> {
// Simplified stats - in production you'd track these properly
return {
total: 0,
working: 0,
avgResponseTime: 0
};
}
/**
* Graceful shutdown
* Clear proxy cache
*/
async clearCache(): Promise<void> {
this.logger.info('Clearing proxy cache');
// Note: Cache provider limitations - would need proper key tracking
}
/**
* Shutdown service
*/
async shutdown(): Promise<void> {
this.logger.info('Shutting down ProxyService');
// The cache and http client will handle their own cleanup
}
}