import { mkdir } from 'fs/promises'; import { join } from 'path'; import { logger } from '../util/logger.js'; import { OcrDaemon, type OcrResponse, type OcrEngine, type OcrPreprocess, type DiffOcrParams, type DiffCropParams, type OcrParams, type DiffOcrResponse, type TemplateMatchResult } from './OcrDaemon.js'; import { GridReader, type GridLayout, type CellCoord } from './GridReader.js'; import type { Region } from '../types.js'; function elapsed(start: number): string { return `${(performance.now() - start).toFixed(0)}ms`; } export interface OcrSettings { engine: OcrEngine; screenPreprocess: OcrPreprocess; tooltipPreprocess: OcrPreprocess; tooltipParams: DiffOcrParams; saveDebugImages: boolean; } export class ScreenReader { private daemon = new OcrDaemon(); readonly grid = new GridReader(this.daemon); settings: OcrSettings = { engine: 'easyocr', screenPreprocess: 'none', tooltipPreprocess: 'tophat', tooltipParams: { crop: { diffThresh: 10 }, ocr: { kernelSize: 21 }, }, saveDebugImages: true, }; /** * Eagerly spawn the OCR daemon and warm up the EasyOCR model. * Fire-and-forget a small OCR request so the Python model loads in the background. */ async warmup(): Promise { await this.daemon.warmup(); // Fire a small EasyOCR request to trigger Python model load // Use a tiny 1×1 region to minimize work, we only care about loading the model const { engine } = this.settings; if (engine !== 'tesseract') { await this.daemon.ocr({ x: 0, y: 0, width: 100, height: 100 }, engine); } } // ── Screenshot capture ────────────────────────────────────────────── async captureScreen(): Promise { const t = performance.now(); const buf = await this.daemon.captureBuffer(); logger.info({ ms: elapsed(t) }, 'captureScreen'); return buf; } async captureRegion(region: Region): Promise { const t = performance.now(); const buf = await this.daemon.captureBuffer(region); logger.info({ ms: elapsed(t) }, 'captureRegion'); return buf; } // ── OCR helpers ───────────────────────────────────────────────────── /** Bigram (Dice) similarity between two strings, 0..1. */ private static bigramSimilarity(a: string, b: string): number { if (a.length < 2 || b.length < 2) return a === b ? 1 : 0; const bigramsA = new Map(); for (let i = 0; i < a.length - 1; i++) { const bg = a.slice(i, i + 2); bigramsA.set(bg, (bigramsA.get(bg) ?? 0) + 1); } let matches = 0; for (let i = 0; i < b.length - 1; i++) { const bg = b.slice(i, i + 2); const count = bigramsA.get(bg); if (count && count > 0) { matches++; bigramsA.set(bg, count - 1); } } return (2 * matches) / (a.length - 1 + b.length - 1); } /** Normalize text for fuzzy comparison: lowercase, strip non-alphanumeric, collapse spaces. */ private static normalize(s: string): string { return s.toLowerCase().replace(/[^a-z0-9]/g, ''); } private findWordInOcrResult( result: OcrResponse, needle: string, fuzzy: boolean = false, ): { x: number; y: number } | null { const lower = needle.toLowerCase(); const FUZZY_THRESHOLD = 0.55; // Multi-word: match against the full line text, return center of the line's bounding box if (lower.includes(' ')) { const needleNorm = ScreenReader.normalize(needle); for (const line of result.lines) { if (line.words.length === 0) continue; const lineText = line.text.toLowerCase(); // Exact match if (lineText.includes(lower)) { return this.lineBounds(line); } // Fuzzy: normalize line text and check sliding windows if (fuzzy) { const lineNorm = ScreenReader.normalize(line.text); // Check windows of similar length to the needle const windowLen = needleNorm.length; for (let i = 0; i <= lineNorm.length - windowLen + 2; i++) { const window = lineNorm.slice(i, i + windowLen + 2); const sim = ScreenReader.bigramSimilarity(needleNorm, window); if (sim >= FUZZY_THRESHOLD) { logger.info({ needle, matched: line.text, similarity: sim.toFixed(2) }, 'Fuzzy nameplate match'); return this.lineBounds(line); } } } } return null; } // Single word: match against individual words const needleNorm = ScreenReader.normalize(needle); for (const line of result.lines) { for (const word of line.words) { // Exact match if (word.text.toLowerCase().includes(lower)) { return { x: Math.round(word.x + word.width / 2), y: Math.round(word.y + word.height / 2), }; } // Fuzzy match if (fuzzy) { const wordNorm = ScreenReader.normalize(word.text); const sim = ScreenReader.bigramSimilarity(needleNorm, wordNorm); if (sim >= FUZZY_THRESHOLD) { logger.info({ needle, matched: word.text, similarity: sim.toFixed(2) }, 'Fuzzy word match'); return { x: Math.round(word.x + word.width / 2), y: Math.round(word.y + word.height / 2), }; } } } } return null; } /** Get center of a line's bounding box from its words. */ private lineBounds(line: { words: { x: number; y: number; width: number; height: number }[] }): { x: number; y: number } { const first = line.words[0]; const last = line.words[line.words.length - 1]; const x1 = first.x; const y1 = first.y; const x2 = last.x + last.width; const y2 = Math.max(...line.words.map(w => w.y + w.height)); return { x: Math.round((x1 + x2) / 2), y: Math.round((y1 + y2) / 2), }; } // ── Full-screen methods ───────────────────────────────────────────── async findTextOnScreen( searchText: string, fuzzy: boolean = false, ): Promise<{ x: number; y: number } | null> { const t = performance.now(); const { engine, screenPreprocess } = this.settings; const pp = screenPreprocess !== 'none' ? screenPreprocess : undefined; const result = await this.daemon.ocr(undefined, engine, pp); const pos = this.findWordInOcrResult(result, searchText, fuzzy); if (pos) { logger.info({ searchText, engine, x: pos.x, y: pos.y, totalMs: elapsed(t) }, 'Found text on screen'); } else { logger.info({ searchText, engine, totalMs: elapsed(t) }, 'Text not found on screen'); } return pos; } async readFullScreen(): Promise { const { engine, screenPreprocess } = this.settings; const pp = screenPreprocess !== 'none' ? screenPreprocess : undefined; const result = await this.daemon.ocr(undefined, engine, pp); return result.text; } // ── Region methods ────────────────────────────────────────────────── async findTextInRegion( region: Region, searchText: string, ): Promise<{ x: number; y: number } | null> { const t = performance.now(); const { engine, screenPreprocess } = this.settings; const pp = screenPreprocess !== 'none' ? screenPreprocess : undefined; const result = await this.daemon.ocr(region, engine, pp); const pos = this.findWordInOcrResult(result, searchText); if (pos) { // Offset back to screen space const screenPos = { x: region.x + pos.x, y: region.y + pos.y }; logger.info({ searchText, x: screenPos.x, y: screenPos.y, region, totalMs: elapsed(t) }, 'Found text in region'); return screenPos; } logger.info({ searchText, region, totalMs: elapsed(t) }, 'Text not found in region'); return null; } async readRegionText(region: Region): Promise { const { engine, screenPreprocess } = this.settings; const pp = screenPreprocess !== 'none' ? screenPreprocess : undefined; const result = await this.daemon.ocr(region, engine, pp); return result.text; } async checkForText(region: Region, searchText: string): Promise { const pos = await this.findTextInRegion(region, searchText); return pos !== null; } // ── Snapshot / Diff-OCR (for tooltip reading) ────────────────────── async snapshot(): Promise { await this.daemon.snapshot(); } async diffOcr(savePath?: string, region?: Region): Promise { const { engine, tooltipPreprocess, tooltipParams } = this.settings; const pp = tooltipPreprocess !== 'none' ? tooltipPreprocess : undefined; return this.daemon.diffOcr(savePath, region, engine, pp, tooltipParams); } // ── Template matching ────────────────────────────────────────────── async templateMatch(templatePath: string, region?: Region): Promise { const t = performance.now(); const result = await this.daemon.templateMatch(templatePath, region); if (result) { logger.info({ templatePath, x: result.x, y: result.y, confidence: result.confidence.toFixed(3), ms: elapsed(t) }, 'Template match found'); } else { logger.info({ templatePath, ms: elapsed(t) }, 'Template match not found'); } return result; } // ── Save utilities ────────────────────────────────────────────────── async saveScreenshot(path: string): Promise { await this.daemon.saveScreenshot(path); logger.info({ path }, 'Screenshot saved'); } async saveDebugScreenshots(dir: string): Promise { await mkdir(dir, { recursive: true }); const ts = Date.now(); const originalPath = join(dir, `${ts}-screenshot.png`); await this.daemon.saveScreenshot(originalPath); logger.info({ dir, files: [originalPath.split(/[\\/]/).pop()] }, 'Debug screenshot saved'); return [originalPath]; } async saveRegion(region: Region, path: string): Promise { await this.daemon.saveScreenshot(path, region); logger.info({ path, region }, 'Region screenshot saved'); } // ── Lifecycle ─────────────────────────────────────────────────────── async dispose(): Promise { await this.daemon.stop(); } }