added easyOCR

This commit is contained in:
Boki 2026-02-12 01:04:19 -05:00
parent 37d6678577
commit 9f208b0606
27 changed files with 1780 additions and 112 deletions

View file

@ -1,7 +1,7 @@
import { mkdir } from 'fs/promises';
import { join } from 'path';
import { logger } from '../util/logger.js';
import { OcrDaemon, type OcrResponse, type DiffOcrResponse } from './OcrDaemon.js';
import { OcrDaemon, type OcrResponse, type OcrEngine, type DiffOcrResponse, type TemplateMatchResult } from './OcrDaemon.js';
import { GridReader, type GridLayout, type CellCoord } from './GridReader.js';
import type { Region } from '../types.js';
@ -12,6 +12,7 @@ function elapsed(start: number): string {
export class ScreenReader {
private daemon = new OcrDaemon();
readonly grid = new GridReader(this.daemon);
debugOcrEngine: OcrEngine = 'tesseract';
// ── Screenshot capture ──────────────────────────────────────────────
@ -31,32 +32,122 @@ export class ScreenReader {
// ── OCR helpers ─────────────────────────────────────────────────────
/** Bigram (Dice) similarity between two strings, 0..1. */
private static bigramSimilarity(a: string, b: string): number {
if (a.length < 2 || b.length < 2) return a === b ? 1 : 0;
const bigramsA = new Map<string, number>();
for (let i = 0; i < a.length - 1; i++) {
const bg = a.slice(i, i + 2);
bigramsA.set(bg, (bigramsA.get(bg) ?? 0) + 1);
}
let matches = 0;
for (let i = 0; i < b.length - 1; i++) {
const bg = b.slice(i, i + 2);
const count = bigramsA.get(bg);
if (count && count > 0) {
matches++;
bigramsA.set(bg, count - 1);
}
}
return (2 * matches) / (a.length - 1 + b.length - 1);
}
/** Normalize text for fuzzy comparison: lowercase, strip non-alphanumeric, collapse spaces. */
private static normalize(s: string): string {
return s.toLowerCase().replace(/[^a-z0-9]/g, '');
}
private findWordInOcrResult(
result: OcrResponse,
needle: string,
fuzzy: boolean = false,
): { x: number; y: number } | null {
const lower = needle.toLowerCase();
const FUZZY_THRESHOLD = 0.55;
// Multi-word: match against the full line text, return center of the line's bounding box
if (lower.includes(' ')) {
const needleNorm = ScreenReader.normalize(needle);
for (const line of result.lines) {
if (line.words.length === 0) continue;
const lineText = line.text.toLowerCase();
// Exact match
if (lineText.includes(lower)) {
return this.lineBounds(line);
}
// Fuzzy: normalize line text and check sliding windows
if (fuzzy) {
const lineNorm = ScreenReader.normalize(line.text);
// Check windows of similar length to the needle
const windowLen = needleNorm.length;
for (let i = 0; i <= lineNorm.length - windowLen + 2; i++) {
const window = lineNorm.slice(i, i + windowLen + 2);
const sim = ScreenReader.bigramSimilarity(needleNorm, window);
if (sim >= FUZZY_THRESHOLD) {
logger.info({ needle, matched: line.text, similarity: sim.toFixed(2) }, 'Fuzzy nameplate match');
return this.lineBounds(line);
}
}
}
}
return null;
}
// Single word: match against individual words
const needleNorm = ScreenReader.normalize(needle);
for (const line of result.lines) {
for (const word of line.words) {
// Exact match
if (word.text.toLowerCase().includes(lower)) {
return {
x: Math.round(word.x + word.width / 2),
y: Math.round(word.y + word.height / 2),
};
}
// Fuzzy match
if (fuzzy) {
const wordNorm = ScreenReader.normalize(word.text);
const sim = ScreenReader.bigramSimilarity(needleNorm, wordNorm);
if (sim >= FUZZY_THRESHOLD) {
logger.info({ needle, matched: word.text, similarity: sim.toFixed(2) }, 'Fuzzy word match');
return {
x: Math.round(word.x + word.width / 2),
y: Math.round(word.y + word.height / 2),
};
}
}
}
}
return null;
}
/** Get center of a line's bounding box from its words. */
private lineBounds(line: { words: { x: number; y: number; width: number; height: number }[] }): { x: number; y: number } {
const first = line.words[0];
const last = line.words[line.words.length - 1];
const x1 = first.x;
const y1 = first.y;
const x2 = last.x + last.width;
const y2 = Math.max(...line.words.map(w => w.y + w.height));
return {
x: Math.round((x1 + x2) / 2),
y: Math.round((y1 + y2) / 2),
};
}
// ── Full-screen methods ─────────────────────────────────────────────
async findTextOnScreen(
searchText: string,
fuzzy: boolean = false,
): Promise<{ x: number; y: number } | null> {
const t = performance.now();
const result = await this.daemon.ocr();
const pos = this.findWordInOcrResult(result, searchText);
const pos = this.findWordInOcrResult(result, searchText, fuzzy);
if (pos) {
logger.info({ searchText, x: pos.x, y: pos.y, totalMs: elapsed(t) }, 'Found text on screen');
@ -112,6 +203,19 @@ export class ScreenReader {
return this.daemon.diffOcr(savePath, region);
}
// ── Template matching ──────────────────────────────────────────────
async templateMatch(templatePath: string, region?: Region): Promise<TemplateMatchResult | null> {
const t = performance.now();
const result = await this.daemon.templateMatch(templatePath, region);
if (result) {
logger.info({ templatePath, x: result.x, y: result.y, confidence: result.confidence.toFixed(3), ms: elapsed(t) }, 'Template match found');
} else {
logger.info({ templatePath, ms: elapsed(t) }, 'Template match not found');
}
return result;
}
// ── Save utilities ──────────────────────────────────────────────────
async saveScreenshot(path: string): Promise<void> {
@ -133,6 +237,43 @@ export class ScreenReader {
logger.info({ path, region }, 'Region screenshot saved');
}
// ── Debug OCR (alternative engines) ─────────────────────────────────
async debugDiffOcr(savePath?: string, region?: Region): Promise<DiffOcrResponse> {
const t = performance.now();
const result = await this.daemon.diffOcr(savePath, region, this.debugOcrEngine);
logger.info({ engine: this.debugOcrEngine, ms: elapsed(t) }, 'debugDiffOcr');
return result;
}
async debugOcr(region?: Region): Promise<OcrResponse> {
const t = performance.now();
const result = await this.daemon.ocr(region, this.debugOcrEngine);
logger.info({ engine: this.debugOcrEngine, ms: elapsed(t) }, 'debugOcr');
return result;
}
async debugReadFullScreen(): Promise<string> {
const result = await this.daemon.ocr(undefined, this.debugOcrEngine);
return result.text;
}
async debugFindTextOnScreen(
searchText: string,
fuzzy: boolean = false,
): Promise<{ x: number; y: number } | null> {
const t = performance.now();
const result = await this.daemon.ocr(undefined, this.debugOcrEngine);
const pos = this.findWordInOcrResult(result, searchText, fuzzy);
if (pos) {
logger.info({ searchText, engine: this.debugOcrEngine, x: pos.x, y: pos.y, totalMs: elapsed(t) }, 'debugFindText found');
} else {
logger.info({ searchText, engine: this.debugOcrEngine, totalMs: elapsed(t) }, 'debugFindText not found');
}
return pos;
}
// ── Lifecycle ───────────────────────────────────────────────────────
async dispose(): Promise<void> {