This commit is contained in:
Boki 2025-06-25 11:38:23 -04:00
parent 3a7254708e
commit b63e58784c
41 changed files with 5762 additions and 4477 deletions

View file

@ -1,167 +1,166 @@
import type { Page } from 'playwright';
import type { BrowserOptions, ScrapingResult } from './types';
/**
* Simple browser implementation for testing
*/
export class SimpleBrowser {
private browser: any;
private contexts = new Map<string, any>();
private logger: any;
private initialized = false;
private _options: BrowserOptions = {
headless: true,
timeout: 30000,
blockResources: false,
enableNetworkLogging: false,
};
constructor(logger?: any) {
this.logger = logger || console;
// Initialize mock browser
this.browser = {
newContext: async () => {
const pages: any[] = [];
const context = {
newPage: async () => {
const page = {
goto: async () => {},
close: async () => {},
evaluate: async () => {},
waitForSelector: async () => {},
screenshot: async () => Buffer.from('screenshot'),
setViewport: async () => {},
content: async () => '<html></html>',
on: () => {},
route: async () => {},
};
pages.push(page);
return page;
},
close: async () => {},
pages: async () => pages,
};
return context;
},
close: async () => {},
isConnected: () => true,
};
}
async initialize(options: BrowserOptions = {}): Promise<void> {
if (this.initialized) {
return;
}
// Merge options
this._options = { ...this._options, ...options };
this.logger.info('Initializing browser...');
// Mock browser is already initialized in constructor for simplicity
this.initialized = true;
}
async createContext(id?: string): Promise<string> {
if (!this.browser) {
await this.initialize();
}
const contextId = id || `context-${Date.now()}`;
const context = await this.browser.newContext();
this.contexts.set(contextId, context);
return contextId;
}
async closeContext(contextId: string): Promise<void> {
const context = this.contexts.get(contextId);
if (context) {
await context.close();
this.contexts.delete(contextId);
}
}
async newPage(contextId: string): Promise<Page> {
const context = this.contexts.get(contextId);
if (!context) {
throw new Error(`Context ${contextId} not found`);
}
const page = await context.newPage();
// Add resource blocking if enabled
if (this._options?.blockResources) {
await page.route('**/*.{png,jpg,jpeg,gif,svg,ico,woff,woff2,ttf,css}', (route: any) => {
route.abort();
});
}
return page;
}
async goto(page: Page, url: string, options?: any): Promise<void> {
await page.goto(url, {
timeout: this._options?.timeout || 30000,
...options,
});
}
async scrape(url: string, options?: { contextId?: string }): Promise<ScrapingResult> {
try {
let contextId = options?.contextId;
const shouldCloseContext = !contextId;
if (!contextId) {
contextId = await this.createContext();
}
const page = await this.newPage(contextId);
await this.goto(page, url);
// Mock data for testing
const data = {
title: 'Test Title',
text: 'Test content',
links: ['link1', 'link2'],
};
await page.close();
if (shouldCloseContext) {
await this.closeContext(contextId);
}
return {
success: true,
data,
url,
};
} catch (error: any) {
return {
success: false,
error: error.message,
url,
data: {} as any,
};
}
}
async close(): Promise<void> {
if (!this.browser) {
return;
}
// Close all contexts
for (const [contextId, context] of this.contexts) {
await context.close();
}
this.contexts.clear();
await this.browser.close();
this.browser = null;
this.initialized = false;
}
}
import type { Page } from 'playwright';
import type { BrowserOptions, ScrapingResult } from './types';
/**
* Simple browser implementation for testing
*/
export class SimpleBrowser {
private browser: any;
private contexts = new Map<string, any>();
private logger: any;
private initialized = false;
private _options: BrowserOptions = {
headless: true,
timeout: 30000,
blockResources: false,
enableNetworkLogging: false,
};
constructor(logger?: any) {
this.logger = logger || console;
// Initialize mock browser
this.browser = {
newContext: async () => {
const pages: any[] = [];
const context = {
newPage: async () => {
const page = {
goto: async () => {},
close: async () => {},
evaluate: async () => {},
waitForSelector: async () => {},
screenshot: async () => Buffer.from('screenshot'),
setViewport: async () => {},
content: async () => '<html></html>',
on: () => {},
route: async () => {},
};
pages.push(page);
return page;
},
close: async () => {},
pages: async () => pages,
};
return context;
},
close: async () => {},
isConnected: () => true,
};
}
async initialize(options: BrowserOptions = {}): Promise<void> {
if (this.initialized) {
return;
}
// Merge options
this._options = { ...this._options, ...options };
this.logger.info('Initializing browser...');
// Mock browser is already initialized in constructor for simplicity
this.initialized = true;
}
async createContext(id?: string): Promise<string> {
if (!this.browser) {
await this.initialize();
}
const contextId = id || `context-${Date.now()}`;
const context = await this.browser.newContext();
this.contexts.set(contextId, context);
return contextId;
}
async closeContext(contextId: string): Promise<void> {
const context = this.contexts.get(contextId);
if (context) {
await context.close();
this.contexts.delete(contextId);
}
}
async newPage(contextId: string): Promise<Page> {
const context = this.contexts.get(contextId);
if (!context) {
throw new Error(`Context ${contextId} not found`);
}
const page = await context.newPage();
// Add resource blocking if enabled
if (this._options?.blockResources) {
await page.route('**/*.{png,jpg,jpeg,gif,svg,ico,woff,woff2,ttf,css}', (route: any) => {
route.abort();
});
}
return page;
}
async goto(page: Page, url: string, options?: any): Promise<void> {
await page.goto(url, {
timeout: this._options?.timeout || 30000,
...options,
});
}
async scrape(url: string, options?: { contextId?: string }): Promise<ScrapingResult> {
try {
let contextId = options?.contextId;
const shouldCloseContext = !contextId;
if (!contextId) {
contextId = await this.createContext();
}
const page = await this.newPage(contextId);
await this.goto(page, url);
// Mock data for testing
const data = {
title: 'Test Title',
text: 'Test content',
links: ['link1', 'link2'],
};
await page.close();
if (shouldCloseContext) {
await this.closeContext(contextId);
}
return {
success: true,
data,
url,
};
} catch (error: any) {
return {
success: false,
error: error.message,
url,
data: {} as any,
};
}
}
async close(): Promise<void> {
if (!this.browser) {
return;
}
// Close all contexts
for (const [_contextId, context] of this.contexts) {
await context.close();
}
this.contexts.clear();
await this.browser.close();
this.browser = null;
this.initialized = false;
}
}

View file

@ -1,6 +1,6 @@
import { beforeEach, describe, expect, it, mock } from 'bun:test';
import { SimpleBrowser } from '../src/simple-browser';
import type { BrowserOptions } from '../src/types';
describe('Browser', () => {
let browser: SimpleBrowser;
@ -13,27 +13,27 @@ describe('Browser', () => {
beforeEach(() => {
logger.info = mock(() => {});
logger.error = mock(() => {});
browser = new SimpleBrowser(logger);
});
describe('initialization', () => {
it('should initialize browser on first call', async () => {
await browser.initialize();
expect(logger.info).toHaveBeenCalledWith('Initializing browser...');
});
it('should not reinitialize if already initialized', async () => {
await browser.initialize();
await browser.initialize();
expect(logger.info).toHaveBeenCalledTimes(1);
});
it('should merge options', async () => {
await browser.initialize({ headless: false, timeout: 60000 });
// Just verify it doesn't throw
expect(true).toBe(true);
});
@ -43,14 +43,14 @@ describe('Browser', () => {
it('should create new context', async () => {
await browser.initialize();
const contextId = await browser.createContext('test');
expect(contextId).toBe('test');
});
it('should generate context ID if not provided', async () => {
await browser.initialize();
const contextId = await browser.createContext();
expect(contextId).toBeDefined();
expect(typeof contextId).toBe('string');
});
@ -59,7 +59,7 @@ describe('Browser', () => {
await browser.initialize();
const contextId = await browser.createContext('test');
await browser.closeContext(contextId);
// Just verify it doesn't throw
expect(true).toBe(true);
});
@ -75,7 +75,7 @@ describe('Browser', () => {
await browser.initialize();
const contextId = await browser.createContext();
const page = await browser.newPage(contextId);
expect(page).toBeDefined();
});
@ -83,18 +83,18 @@ describe('Browser', () => {
await browser.initialize();
const contextId = await browser.createContext();
const page = await browser.newPage(contextId);
await browser.goto(page, 'https://example.com');
// Just verify it doesn't throw
expect(true).toBe(true);
});
it('should scrape page', async () => {
await browser.initialize();
const result = await browser.scrape('https://example.com');
expect(result.success).toBe(true);
expect(result.data.title).toBeDefined();
expect(result.data.text).toBeDefined();
@ -107,7 +107,7 @@ describe('Browser', () => {
await browser.initialize({ blockResources: true });
const contextId = await browser.createContext();
const page = await browser.newPage(contextId);
// Just verify it doesn't throw
expect(page).toBeDefined();
});
@ -116,7 +116,7 @@ describe('Browser', () => {
await browser.initialize({ blockResources: false });
const contextId = await browser.createContext();
const page = await browser.newPage(contextId);
expect(page).toBeDefined();
});
});
@ -125,7 +125,7 @@ describe('Browser', () => {
it('should close browser', async () => {
await browser.initialize();
await browser.close();
// Just verify it doesn't throw
expect(true).toBe(true);
});
@ -138,9 +138,9 @@ describe('Browser', () => {
await browser.initialize();
await browser.createContext('test1');
await browser.createContext('test2');
await browser.close();
// Just verify it doesn't throw
expect(true).toBe(true);
});
@ -156,18 +156,20 @@ describe('Browser', () => {
it('should handle page creation failure', async () => {
await browser.initialize();
// Should throw for non-existent context
await expect(browser.newPage('non-existent')).rejects.toThrow('Context non-existent not found');
await expect(browser.newPage('non-existent')).rejects.toThrow(
'Context non-existent not found'
);
});
it('should handle scrape errors', async () => {
// SimpleBrowser catches errors and returns success: false
await browser.initialize();
const result = await browser.scrape('https://example.com');
expect(result.success).toBe(true); // SimpleBrowser always succeeds
});
});
});
});

View file

@ -1,133 +1,135 @@
import type { ProxyInfo } from './types';
export interface ProxyConfig {
protocol: string;
host: string;
port: number;
auth?: {
username: string;
password: string;
};
}
/**
* Simple proxy manager for testing
*/
export class SimpleProxyManager {
private proxies: Array<ProxyInfo & { id: string; active: boolean }> = [];
private currentIndex = 0;
private activeProxyIndex = 0;
addProxy(proxy: ProxyInfo & { id: string; active: boolean }): void {
this.proxies.push(proxy);
}
removeProxy(id: string): void {
this.proxies = this.proxies.filter(p => p.id !== id);
}
updateProxyStatus(id: string, active: boolean): void {
const proxy = this.proxies.find(p => p.id === id);
if (proxy) {
proxy.active = active;
}
}
getProxies(): Array<ProxyInfo & { id: string; active: boolean }> {
return [...this.proxies];
}
getActiveProxies(): Array<ProxyInfo & { id: string; active: boolean }> {
return this.proxies.filter(p => p.active);
}
getNextProxy(): (ProxyInfo & { id: string; active: boolean }) | null {
const activeProxies = this.getActiveProxies();
if (activeProxies.length === 0) {
return null;
}
const proxy = activeProxies[this.activeProxyIndex % activeProxies.length];
this.activeProxyIndex++;
return proxy || null;
}
getProxyConfig(proxy: ProxyInfo & { id: string; active: boolean }): ProxyConfig {
const config: ProxyConfig = {
protocol: proxy.protocol,
host: proxy.host,
port: proxy.port,
};
if (proxy.username && proxy.password) {
config.auth = {
username: proxy.username,
password: proxy.password,
};
}
return config;
}
formatProxyUrl(proxy: ProxyInfo): string {
let url = `${proxy.protocol}://`;
if (proxy.username && proxy.password) {
url += `${proxy.username}:${proxy.password}@`;
}
url += `${proxy.host}:${proxy.port}`;
return url;
}
async validateProxy(id: string): Promise<boolean> {
const proxy = this.proxies.find(p => p.id === id);
if (!proxy) return false;
try {
const proxyUrl = this.formatProxyUrl(proxy);
const response = await fetch('https://httpbin.org/ip', {
// @ts-ignore - proxy option might not be in types
proxy: proxyUrl,
signal: AbortSignal.timeout(5000),
});
return response.ok;
} catch {
return false;
}
}
async validateAllProxies(): Promise<Record<string, boolean>> {
const results: Record<string, boolean> = {};
for (const proxy of this.proxies) {
const isValid = await this.validateProxy(proxy.id);
results[proxy.id] = isValid;
// Disable invalid proxies
if (!isValid) {
this.updateProxyStatus(proxy.id, false);
}
}
return results;
}
getStatistics() {
const stats = {
total: this.proxies.length,
active: this.proxies.filter(p => p.active).length,
inactive: this.proxies.filter(p => !p.active).length,
byProtocol: {} as Record<string, number>,
};
this.proxies.forEach(proxy => {
stats.byProtocol[proxy.protocol] = (stats.byProtocol[proxy.protocol] || 0) + 1;
});
return stats;
}
clear(): void {
this.proxies = [];
this.currentIndex = 0;
}
}
import type { ProxyInfo } from './types';
export interface ProxyConfig {
protocol: string;
host: string;
port: number;
auth?: {
username: string;
password: string;
};
}
/**
* Simple proxy manager for testing
*/
export class SimpleProxyManager {
private proxies: Array<ProxyInfo & { id: string; active: boolean }> = [];
private currentIndex = 0;
private activeProxyIndex = 0;
addProxy(proxy: ProxyInfo & { id: string; active: boolean }): void {
this.proxies.push(proxy);
}
removeProxy(id: string): void {
this.proxies = this.proxies.filter(p => p.id !== id);
}
updateProxyStatus(id: string, active: boolean): void {
const proxy = this.proxies.find(p => p.id === id);
if (proxy) {
proxy.active = active;
}
}
getProxies(): Array<ProxyInfo & { id: string; active: boolean }> {
return [...this.proxies];
}
getActiveProxies(): Array<ProxyInfo & { id: string; active: boolean }> {
return this.proxies.filter(p => p.active);
}
getNextProxy(): (ProxyInfo & { id: string; active: boolean }) | null {
const activeProxies = this.getActiveProxies();
if (activeProxies.length === 0) {
return null;
}
const proxy = activeProxies[this.activeProxyIndex % activeProxies.length];
this.activeProxyIndex++;
return proxy || null;
}
getProxyConfig(proxy: ProxyInfo & { id: string; active: boolean }): ProxyConfig {
const config: ProxyConfig = {
protocol: proxy.protocol,
host: proxy.host,
port: proxy.port,
};
if (proxy.username && proxy.password) {
config.auth = {
username: proxy.username,
password: proxy.password,
};
}
return config;
}
formatProxyUrl(proxy: ProxyInfo): string {
let url = `${proxy.protocol}://`;
if (proxy.username && proxy.password) {
url += `${proxy.username}:${proxy.password}@`;
}
url += `${proxy.host}:${proxy.port}`;
return url;
}
async validateProxy(id: string): Promise<boolean> {
const proxy = this.proxies.find(p => p.id === id);
if (!proxy) {
return false;
}
try {
const proxyUrl = this.formatProxyUrl(proxy);
const response = await fetch('https://httpbin.org/ip', {
// @ts-ignore - proxy option might not be in types
proxy: proxyUrl,
signal: AbortSignal.timeout(5000),
});
return response.ok;
} catch {
return false;
}
}
async validateAllProxies(): Promise<Record<string, boolean>> {
const results: Record<string, boolean> = {};
for (const proxy of this.proxies) {
const isValid = await this.validateProxy(proxy.id);
results[proxy.id] = isValid;
// Disable invalid proxies
if (!isValid) {
this.updateProxyStatus(proxy.id, false);
}
}
return results;
}
getStatistics() {
const stats = {
total: this.proxies.length,
active: this.proxies.filter(p => p.active).length,
inactive: this.proxies.filter(p => !p.active).length,
byProtocol: {} as Record<string, number>,
};
this.proxies.forEach(proxy => {
stats.byProtocol[proxy.protocol] = (stats.byProtocol[proxy.protocol] || 0) + 1;
});
return stats;
}
clear(): void {
this.proxies = [];
this.currentIndex = 0;
}
}

View file

@ -1,10 +1,10 @@
import { beforeEach, describe, expect, it, mock } from 'bun:test';
import { SimpleProxyManager } from '../src/simple-proxy-manager';
import type { ProxyConfig, ProxyInfo } from '../src/types';
import type { ProxyInfo } from '../src/types';
describe('ProxyManager', () => {
let manager: SimpleProxyManager;
const getMockProxies = (): ProxyInfo[] => [
{
id: 'proxy1',
@ -193,7 +193,7 @@ describe('ProxyManager', () => {
it('should validate all proxies', async () => {
const mockProxies = getMockProxies();
// Mock fetch to return different results for each proxy
let callCount = 0;
const mockFetch = mock(() => {
@ -251,4 +251,4 @@ describe('ProxyManager', () => {
expect(proxies).toHaveLength(0);
});
});
});
});