297 lines
11 KiB
TypeScript
297 lines
11 KiB
TypeScript
import { mkdir } from 'fs/promises';
|
||
import { join } from 'path';
|
||
import { logger } from '../util/logger.js';
|
||
import { OcrDaemon, type OcrResponse, type OcrEngine, type OcrPreprocess, type DiffOcrParams, type DiffCropParams, type OcrParams, type DiffOcrResponse, type TemplateMatchResult, type TooltipMethod, type EdgeOcrParams } from './OcrDaemon.js';
|
||
import { GridReader, type GridLayout, type CellCoord } from './GridReader.js';
|
||
import type { Region } from '../types.js';
|
||
|
||
function elapsed(start: number): string {
|
||
return `${(performance.now() - start).toFixed(0)}ms`;
|
||
}
|
||
|
||
export interface OcrSettings {
|
||
engine: OcrEngine;
|
||
screenPreprocess: OcrPreprocess;
|
||
tooltipPreprocess: OcrPreprocess;
|
||
tooltipMethod: TooltipMethod;
|
||
tooltipParams: DiffOcrParams;
|
||
edgeParams: EdgeOcrParams;
|
||
saveDebugImages: boolean;
|
||
}
|
||
|
||
export class ScreenReader {
|
||
private daemon = new OcrDaemon();
|
||
readonly grid = new GridReader(this.daemon);
|
||
settings: OcrSettings = {
|
||
engine: 'easyocr',
|
||
screenPreprocess: 'none',
|
||
tooltipPreprocess: 'tophat',
|
||
tooltipMethod: 'diff',
|
||
tooltipParams: {
|
||
crop: { diffThresh: 10 },
|
||
ocr: { kernelSize: 21 },
|
||
},
|
||
edgeParams: {
|
||
crop: {},
|
||
ocr: { kernelSize: 21 },
|
||
},
|
||
saveDebugImages: true,
|
||
};
|
||
|
||
/**
|
||
* Eagerly spawn the OCR daemon and warm up the EasyOCR model.
|
||
* Fire-and-forget a small OCR request so the Python model loads in the background.
|
||
*/
|
||
async warmup(): Promise<void> {
|
||
await this.daemon.warmup();
|
||
// Fire a small EasyOCR request to trigger Python model load
|
||
// Use a tiny 1×1 region to minimize work, we only care about loading the model
|
||
const { engine } = this.settings;
|
||
if (engine !== 'tesseract') {
|
||
await this.daemon.ocr({ x: 0, y: 0, width: 100, height: 100 }, engine);
|
||
}
|
||
}
|
||
|
||
// ── Screenshot capture ──────────────────────────────────────────────
|
||
|
||
async captureScreen(): Promise<Buffer> {
|
||
const t = performance.now();
|
||
const buf = await this.daemon.captureBuffer();
|
||
logger.info({ ms: elapsed(t) }, 'captureScreen');
|
||
return buf;
|
||
}
|
||
|
||
async captureRegion(region: Region): Promise<Buffer> {
|
||
const t = performance.now();
|
||
const buf = await this.daemon.captureBuffer(region);
|
||
logger.info({ ms: elapsed(t) }, 'captureRegion');
|
||
return buf;
|
||
}
|
||
|
||
// ── OCR helpers ─────────────────────────────────────────────────────
|
||
|
||
/** Bigram (Dice) similarity between two strings, 0..1. */
|
||
private static bigramSimilarity(a: string, b: string): number {
|
||
if (a.length < 2 || b.length < 2) return a === b ? 1 : 0;
|
||
const bigramsA = new Map<string, number>();
|
||
for (let i = 0; i < a.length - 1; i++) {
|
||
const bg = a.slice(i, i + 2);
|
||
bigramsA.set(bg, (bigramsA.get(bg) ?? 0) + 1);
|
||
}
|
||
let matches = 0;
|
||
for (let i = 0; i < b.length - 1; i++) {
|
||
const bg = b.slice(i, i + 2);
|
||
const count = bigramsA.get(bg);
|
||
if (count && count > 0) {
|
||
matches++;
|
||
bigramsA.set(bg, count - 1);
|
||
}
|
||
}
|
||
return (2 * matches) / (a.length - 1 + b.length - 1);
|
||
}
|
||
|
||
/** Normalize text for fuzzy comparison: lowercase, strip non-alphanumeric, collapse spaces. */
|
||
private static normalize(s: string): string {
|
||
return s.toLowerCase().replace(/[^a-z0-9]/g, '');
|
||
}
|
||
|
||
private findWordInOcrResult(
|
||
result: OcrResponse,
|
||
needle: string,
|
||
fuzzy: boolean = false,
|
||
): { x: number; y: number } | null {
|
||
const lower = needle.toLowerCase();
|
||
const FUZZY_THRESHOLD = 0.55;
|
||
|
||
// Multi-word: match against the full line text, return center of the line's bounding box
|
||
if (lower.includes(' ')) {
|
||
const needleNorm = ScreenReader.normalize(needle);
|
||
|
||
for (const line of result.lines) {
|
||
if (line.words.length === 0) continue;
|
||
|
||
const lineText = line.text.toLowerCase();
|
||
// Exact match
|
||
if (lineText.includes(lower)) {
|
||
return this.lineBounds(line);
|
||
}
|
||
|
||
// Fuzzy: normalize line text and check sliding windows
|
||
if (fuzzy) {
|
||
const lineNorm = ScreenReader.normalize(line.text);
|
||
// Check windows of similar length to the needle
|
||
const windowLen = needleNorm.length;
|
||
for (let i = 0; i <= lineNorm.length - windowLen + 2; i++) {
|
||
const window = lineNorm.slice(i, i + windowLen + 2);
|
||
const sim = ScreenReader.bigramSimilarity(needleNorm, window);
|
||
if (sim >= FUZZY_THRESHOLD) {
|
||
logger.info({ needle, matched: line.text, similarity: sim.toFixed(2) }, 'Fuzzy nameplate match');
|
||
return this.lineBounds(line);
|
||
}
|
||
}
|
||
}
|
||
}
|
||
return null;
|
||
}
|
||
|
||
// Single word: match against individual words
|
||
const needleNorm = ScreenReader.normalize(needle);
|
||
for (const line of result.lines) {
|
||
for (const word of line.words) {
|
||
// Exact match
|
||
if (word.text.toLowerCase().includes(lower)) {
|
||
return {
|
||
x: Math.round(word.x + word.width / 2),
|
||
y: Math.round(word.y + word.height / 2),
|
||
};
|
||
}
|
||
|
||
// Fuzzy match
|
||
if (fuzzy) {
|
||
const wordNorm = ScreenReader.normalize(word.text);
|
||
const sim = ScreenReader.bigramSimilarity(needleNorm, wordNorm);
|
||
if (sim >= FUZZY_THRESHOLD) {
|
||
logger.info({ needle, matched: word.text, similarity: sim.toFixed(2) }, 'Fuzzy word match');
|
||
return {
|
||
x: Math.round(word.x + word.width / 2),
|
||
y: Math.round(word.y + word.height / 2),
|
||
};
|
||
}
|
||
}
|
||
}
|
||
}
|
||
return null;
|
||
}
|
||
|
||
/** Get center of a line's bounding box from its words. */
|
||
private lineBounds(line: { words: { x: number; y: number; width: number; height: number }[] }): { x: number; y: number } {
|
||
const first = line.words[0];
|
||
const last = line.words[line.words.length - 1];
|
||
const x1 = first.x;
|
||
const y1 = first.y;
|
||
const x2 = last.x + last.width;
|
||
const y2 = Math.max(...line.words.map(w => w.y + w.height));
|
||
return {
|
||
x: Math.round((x1 + x2) / 2),
|
||
y: Math.round((y1 + y2) / 2),
|
||
};
|
||
}
|
||
|
||
// ── Full-screen methods ─────────────────────────────────────────────
|
||
|
||
async findTextOnScreen(
|
||
searchText: string,
|
||
fuzzy: boolean = false,
|
||
): Promise<{ x: number; y: number } | null> {
|
||
const t = performance.now();
|
||
const { engine, screenPreprocess } = this.settings;
|
||
const pp = screenPreprocess !== 'none' ? screenPreprocess : undefined;
|
||
const result = await this.daemon.ocr(undefined, engine, pp);
|
||
const pos = this.findWordInOcrResult(result, searchText, fuzzy);
|
||
|
||
if (pos) {
|
||
logger.info({ searchText, engine, x: pos.x, y: pos.y, totalMs: elapsed(t) }, 'Found text on screen');
|
||
} else {
|
||
logger.info({ searchText, engine, totalMs: elapsed(t) }, 'Text not found on screen');
|
||
}
|
||
return pos;
|
||
}
|
||
|
||
async readFullScreen(): Promise<string> {
|
||
const { engine, screenPreprocess } = this.settings;
|
||
const pp = screenPreprocess !== 'none' ? screenPreprocess : undefined;
|
||
const result = await this.daemon.ocr(undefined, engine, pp);
|
||
return result.text;
|
||
}
|
||
|
||
// ── Region methods ──────────────────────────────────────────────────
|
||
|
||
async findTextInRegion(
|
||
region: Region,
|
||
searchText: string,
|
||
): Promise<{ x: number; y: number } | null> {
|
||
const t = performance.now();
|
||
const { engine, screenPreprocess } = this.settings;
|
||
const pp = screenPreprocess !== 'none' ? screenPreprocess : undefined;
|
||
const result = await this.daemon.ocr(region, engine, pp);
|
||
const pos = this.findWordInOcrResult(result, searchText);
|
||
|
||
if (pos) {
|
||
// Offset back to screen space
|
||
const screenPos = { x: region.x + pos.x, y: region.y + pos.y };
|
||
logger.info({ searchText, x: screenPos.x, y: screenPos.y, region, totalMs: elapsed(t) }, 'Found text in region');
|
||
return screenPos;
|
||
}
|
||
|
||
logger.info({ searchText, region, totalMs: elapsed(t) }, 'Text not found in region');
|
||
return null;
|
||
}
|
||
|
||
async readRegionText(region: Region): Promise<string> {
|
||
const { engine, screenPreprocess } = this.settings;
|
||
const pp = screenPreprocess !== 'none' ? screenPreprocess : undefined;
|
||
const result = await this.daemon.ocr(region, engine, pp);
|
||
return result.text;
|
||
}
|
||
|
||
async checkForText(region: Region, searchText: string): Promise<boolean> {
|
||
const pos = await this.findTextInRegion(region, searchText);
|
||
return pos !== null;
|
||
}
|
||
|
||
// ── Snapshot / Diff-OCR (for tooltip reading) ──────────────────────
|
||
|
||
async snapshot(): Promise<void> {
|
||
if (this.settings.tooltipMethod === 'edge') return; // no reference frame needed
|
||
await this.daemon.snapshot();
|
||
}
|
||
|
||
async diffOcr(savePath?: string, region?: Region): Promise<DiffOcrResponse> {
|
||
const { engine, tooltipPreprocess, tooltipMethod, tooltipParams, edgeParams } = this.settings;
|
||
const pp = tooltipPreprocess !== 'none' ? tooltipPreprocess : undefined;
|
||
if (tooltipMethod === 'edge') {
|
||
return this.daemon.edgeOcr(savePath, region, engine, pp, edgeParams);
|
||
}
|
||
return this.daemon.diffOcr(savePath, region, engine, pp, tooltipParams);
|
||
}
|
||
|
||
// ── Template matching ──────────────────────────────────────────────
|
||
|
||
async templateMatch(templatePath: string, region?: Region): Promise<TemplateMatchResult | null> {
|
||
const t = performance.now();
|
||
const result = await this.daemon.templateMatch(templatePath, region);
|
||
if (result) {
|
||
logger.info({ templatePath, x: result.x, y: result.y, confidence: result.confidence.toFixed(3), ms: elapsed(t) }, 'Template match found');
|
||
} else {
|
||
logger.info({ templatePath, ms: elapsed(t) }, 'Template match not found');
|
||
}
|
||
return result;
|
||
}
|
||
|
||
// ── Save utilities ──────────────────────────────────────────────────
|
||
|
||
async saveScreenshot(path: string): Promise<void> {
|
||
await this.daemon.saveScreenshot(path);
|
||
logger.info({ path }, 'Screenshot saved');
|
||
}
|
||
|
||
async saveDebugScreenshots(dir: string): Promise<string[]> {
|
||
await mkdir(dir, { recursive: true });
|
||
const ts = Date.now();
|
||
const originalPath = join(dir, `${ts}-screenshot.png`);
|
||
await this.daemon.saveScreenshot(originalPath);
|
||
logger.info({ dir, files: [originalPath.split(/[\\/]/).pop()] }, 'Debug screenshot saved');
|
||
return [originalPath];
|
||
}
|
||
|
||
async saveRegion(region: Region, path: string): Promise<void> {
|
||
await this.daemon.saveScreenshot(path, region);
|
||
logger.info({ path, region }, 'Region screenshot saved');
|
||
}
|
||
|
||
// ── Lifecycle ───────────────────────────────────────────────────────
|
||
|
||
async dispose(): Promise<void> {
|
||
await this.daemon.stop();
|
||
}
|
||
}
|