import { spawn, type ChildProcess } from 'child_process'; import { join } from 'path'; import { logger } from '../util/logger.js'; import type { Region } from '../types.js'; // ── Types ─────────────────────────────────────────────────────────────────── export interface OcrWord { text: string; x: number; y: number; width: number; height: number; } export interface OcrLine { text: string; words: OcrWord[]; } export interface OcrResponse { ok: true; text: string; lines: OcrLine[]; } export interface GridItem { row: number; col: number; w: number; h: number; } export interface GridMatch { row: number; col: number; similarity: number; } export interface GridScanResult { cells: boolean[][]; items: GridItem[]; matches?: GridMatch[]; } export interface DiffOcrResponse { text: string; lines: OcrLine[]; region?: Region; } export interface DetectGridResult { detected: boolean; region?: Region; cols?: number; rows?: number; cellWidth?: number; cellHeight?: number; } export interface TemplateMatchResult { found: boolean; x: number; y: number; width: number; height: number; confidence: number; } export type OcrEngine = 'tesseract' | 'easyocr' | 'paddleocr'; export type OcrPreprocess = 'none' | 'bgsub' | 'tophat'; export interface DiffCropParams { diffThresh?: number; rowThreshDiv?: number; colThreshDiv?: number; maxGap?: number; trimCutoff?: number; ocrPad?: number; } export interface OcrParams { kernelSize?: number; upscale?: number; useBackgroundSub?: boolean; dimPercentile?: number; textThresh?: number; softThreshold?: boolean; usePerLineOcr?: boolean; lineGapTolerance?: number; linePadY?: number; psm?: number; mergeGap?: number; linkThreshold?: number; textThreshold?: number; lowText?: number; widthThs?: number; paragraph?: boolean; } export interface DiffOcrParams { crop?: DiffCropParams; ocr?: OcrParams; } export type TooltipMethod = 'diff' | 'edge'; export interface EdgeCropParams { cannyLow?: number; cannyHigh?: number; minLineLength?: number; roiSize?: number; densityThreshold?: number; ocrPad?: number; } export interface EdgeOcrParams { crop?: EdgeCropParams; ocr?: OcrParams; } interface DaemonRequest { cmd: string; region?: Region; path?: string; cols?: number; rows?: number; threshold?: number; minCellSize?: number; maxCellSize?: number; engine?: string; preprocess?: string; params?: DiffOcrParams; edgeParams?: EdgeOcrParams; cursorX?: number; cursorY?: number; } interface DaemonResponse { ok: boolean; ready?: boolean; text?: string; lines?: OcrLine[]; image?: string; cells?: boolean[][]; items?: GridItem[]; matches?: GridMatch[]; detected?: boolean; region?: Region; cols?: number; rows?: number; cellWidth?: number; cellHeight?: number; found?: boolean; x?: number; y?: number; width?: number; height?: number; confidence?: number; error?: string; } // ── OcrDaemon ─────────────────────────────────────────────────────────────── const DEFAULT_EXE = join( 'tools', 'OcrDaemon', 'bin', 'Release', 'net8.0-windows10.0.19041.0', 'OcrDaemon.exe', ); const REQUEST_TIMEOUT = 5_000; const CAPTURE_TIMEOUT = 10_000; export class OcrDaemon { private proc: ChildProcess | null = null; private exePath: string; private readyResolve: ((value: void) => void) | null = null; private readyReject: ((err: Error) => void) | null = null; private pendingResolve: ((resp: DaemonResponse) => void) | null = null; private pendingReject: ((err: Error) => void) | null = null; private queue: Array<{ request: DaemonRequest; resolve: (resp: DaemonResponse) => void; reject: (err: Error) => void }> = []; private processing = false; private buffer = ''; private stopped = false; constructor(exePath?: string) { this.exePath = exePath ?? DEFAULT_EXE; } // ── Public API ────────────────────────────────────────────────────────── async ocr(region?: Region, engine?: OcrEngine, preprocess?: OcrPreprocess): Promise { const req: DaemonRequest = { cmd: 'ocr' }; if (region) req.region = region; if (engine && engine !== 'tesseract') req.engine = engine; if (preprocess && preprocess !== 'none') req.preprocess = preprocess; // Python engines need longer timeout for first model load + download const timeout = (engine && engine !== 'tesseract') ? 120_000 : CAPTURE_TIMEOUT; const resp = await this.sendWithRetry(req, timeout); return { ok: true, text: resp.text ?? '', lines: resp.lines ?? [], }; } async captureBuffer(region?: Region): Promise { const req: DaemonRequest = { cmd: 'capture' }; if (region) req.region = region; const resp = await this.sendWithRetry(req, CAPTURE_TIMEOUT); return Buffer.from(resp.image!, 'base64'); } async gridScan(region: Region, cols: number, rows: number, threshold?: number, targetRow?: number, targetCol?: number): Promise { const req: DaemonRequest = { cmd: 'grid', region, cols, rows }; if (threshold) req.threshold = threshold; if (targetRow != null && targetRow >= 0) (req as any).targetRow = targetRow; if (targetCol != null && targetCol >= 0) (req as any).targetCol = targetCol; const resp = await this.sendWithRetry(req, REQUEST_TIMEOUT); return { cells: resp.cells ?? [], items: resp.items ?? [], matches: resp.matches ?? undefined }; } async detectGrid(region: Region, minCellSize?: number, maxCellSize?: number): Promise { const req: DaemonRequest = { cmd: 'detect-grid', region }; if (minCellSize) req.minCellSize = minCellSize; if (maxCellSize) req.maxCellSize = maxCellSize; const resp = await this.sendWithRetry(req, REQUEST_TIMEOUT); return { detected: resp.detected ?? false, region: resp.region, cols: resp.cols, rows: resp.rows, cellWidth: resp.cellWidth, cellHeight: resp.cellHeight, }; } async snapshot(): Promise { await this.sendWithRetry({ cmd: 'snapshot' }, REQUEST_TIMEOUT); } async diffOcr(savePath?: string, region?: Region, engine?: OcrEngine, preprocess?: OcrPreprocess, params?: DiffOcrParams): Promise { const req: DaemonRequest = { cmd: 'diff-ocr' }; if (savePath) req.path = savePath; if (region) req.region = region; if (engine && engine !== 'tesseract') req.engine = engine; if (preprocess) req.preprocess = preprocess; if (params && Object.keys(params).length > 0) req.params = params; const timeout = (engine && engine !== 'tesseract') ? 120_000 : CAPTURE_TIMEOUT; const resp = await this.sendWithRetry(req, timeout); return { text: resp.text ?? '', lines: resp.lines ?? [], region: resp.region, }; } async edgeOcr(savePath?: string, region?: Region, engine?: OcrEngine, preprocess?: OcrPreprocess, edgeParams?: EdgeOcrParams, cursorX?: number, cursorY?: number): Promise { const req: DaemonRequest = { cmd: 'edge-ocr' }; if (savePath) req.path = savePath; if (region) req.region = region; if (engine && engine !== 'tesseract') req.engine = engine; if (preprocess) req.preprocess = preprocess; if (edgeParams && Object.keys(edgeParams).length > 0) req.edgeParams = edgeParams; if (cursorX != null) req.cursorX = cursorX; if (cursorY != null) req.cursorY = cursorY; const timeout = (engine && engine !== 'tesseract') ? 120_000 : CAPTURE_TIMEOUT; const resp = await this.sendWithRetry(req, timeout); return { text: resp.text ?? '', lines: resp.lines ?? [], region: resp.region, }; } async saveScreenshot(path: string, region?: Region): Promise { const req: DaemonRequest = { cmd: 'screenshot', path }; if (region) req.region = region; await this.sendWithRetry(req, REQUEST_TIMEOUT); } async templateMatch(templatePath: string, region?: Region): Promise { const req: DaemonRequest = { cmd: 'match-template', path: templatePath }; if (region) req.region = region; const resp = await this.sendWithRetry(req, REQUEST_TIMEOUT); if (!resp.found) return null; return { found: true, x: resp.x!, y: resp.y!, width: resp.width!, height: resp.height!, confidence: resp.confidence!, }; } /** Eagerly spawn the daemon process so it's ready for the first real request. */ async warmup(): Promise { await this.ensureRunning(); } async stop(): Promise { this.stopped = true; if (this.proc) { const p = this.proc; this.proc = null; p.stdin?.end(); p.kill(); } } // ── Internal ──────────────────────────────────────────────────────────── private async ensureRunning(): Promise { if (this.proc && this.proc.exitCode === null) return; this.proc = null; this.buffer = ''; logger.info({ exe: this.exePath }, 'Spawning OCR daemon'); const proc = spawn(this.exePath, [], { stdio: ['pipe', 'pipe', 'pipe'], windowsHide: true, }); this.proc = proc; proc.stderr?.on('data', (data: Buffer) => { logger.warn({ daemon: data.toString().trim() }, 'OcrDaemon stderr'); }); proc.on('exit', (code) => { logger.warn({ code }, 'OcrDaemon exited'); if (this.pendingReject) { this.pendingReject(new Error(`Daemon exited with code ${code}`)); this.pendingResolve = null; this.pendingReject = null; } }); proc.stdout!.on('data', (data: Buffer) => { this.buffer += data.toString(); this.processBuffer(); }); // Wait for ready signal await new Promise((resolve, reject) => { this.readyResolve = resolve; this.readyReject = reject; const timeout = setTimeout(() => { this.readyReject = null; this.readyResolve = null; reject(new Error('Daemon did not become ready within 10s')); }, 10_000); // Store so we can clear on resolve (this as any)._readyTimeout = timeout; }); logger.info('OCR daemon ready'); } private processBuffer(): void { let newlineIdx: number; while ((newlineIdx = this.buffer.indexOf('\n')) !== -1) { const line = this.buffer.slice(0, newlineIdx).trim(); this.buffer = this.buffer.slice(newlineIdx + 1); if (!line) continue; let parsed: DaemonResponse; try { parsed = JSON.parse(line); } catch { logger.warn({ line }, 'Failed to parse daemon response'); continue; } // Handle ready signal if (parsed.ready && this.readyResolve) { clearTimeout((this as any)._readyTimeout); const resolve = this.readyResolve; this.readyResolve = null; this.readyReject = null; resolve(); continue; } // Handle normal response if (this.pendingResolve) { const resolve = this.pendingResolve; this.pendingResolve = null; this.pendingReject = null; resolve(parsed); } } } private async send(request: DaemonRequest, timeout: number): Promise { await this.ensureRunning(); return new Promise((resolve, reject) => { this.queue.push({ request, resolve, reject }); this.drainQueue(timeout); }); } private drainQueue(timeout: number): void { if (this.processing || this.queue.length === 0) return; this.processing = true; const { request, resolve, reject } = this.queue.shift()!; const timer = setTimeout(() => { this.pendingResolve = null; this.pendingReject = null; this.processing = false; reject(new Error(`Daemon request timed out after ${timeout}ms`)); this.drainQueue(timeout); }, timeout); this.pendingResolve = (resp) => { clearTimeout(timer); this.processing = false; resolve(resp); this.drainQueue(timeout); }; this.pendingReject = (err) => { clearTimeout(timer); this.processing = false; reject(err); this.drainQueue(timeout); }; const json = JSON.stringify(request) + '\n'; this.proc!.stdin!.write(json); } private async sendWithRetry(request: DaemonRequest, timeout: number): Promise { try { const resp = await this.send(request, timeout); if (!resp.ok) throw new Error(resp.error ?? 'Daemon returned error'); return resp; } catch (err) { if (this.stopped) throw err; // Kill and retry once logger.warn({ err, cmd: request.cmd }, 'Daemon request failed, restarting'); if (this.proc) { const p = this.proc; this.proc = null; p.stdin?.end(); p.kill(); } const resp = await this.send(request, timeout); if (!resp.ok) throw new Error(resp.error ?? 'Daemon returned error on retry'); return resp; } } }