added easyOCR
This commit is contained in:
parent
37d6678577
commit
9f208b0606
27 changed files with 1780 additions and 112 deletions
|
|
@ -1,7 +1,7 @@
|
|||
import { mkdir } from 'fs/promises';
|
||||
import { join } from 'path';
|
||||
import { logger } from '../util/logger.js';
|
||||
import { OcrDaemon, type OcrResponse, type DiffOcrResponse } from './OcrDaemon.js';
|
||||
import { OcrDaemon, type OcrResponse, type OcrEngine, type DiffOcrResponse, type TemplateMatchResult } from './OcrDaemon.js';
|
||||
import { GridReader, type GridLayout, type CellCoord } from './GridReader.js';
|
||||
import type { Region } from '../types.js';
|
||||
|
||||
|
|
@ -12,6 +12,7 @@ function elapsed(start: number): string {
|
|||
export class ScreenReader {
|
||||
private daemon = new OcrDaemon();
|
||||
readonly grid = new GridReader(this.daemon);
|
||||
debugOcrEngine: OcrEngine = 'tesseract';
|
||||
|
||||
// ── Screenshot capture ──────────────────────────────────────────────
|
||||
|
||||
|
|
@ -31,32 +32,122 @@ export class ScreenReader {
|
|||
|
||||
// ── OCR helpers ─────────────────────────────────────────────────────
|
||||
|
||||
/** Bigram (Dice) similarity between two strings, 0..1. */
|
||||
private static bigramSimilarity(a: string, b: string): number {
|
||||
if (a.length < 2 || b.length < 2) return a === b ? 1 : 0;
|
||||
const bigramsA = new Map<string, number>();
|
||||
for (let i = 0; i < a.length - 1; i++) {
|
||||
const bg = a.slice(i, i + 2);
|
||||
bigramsA.set(bg, (bigramsA.get(bg) ?? 0) + 1);
|
||||
}
|
||||
let matches = 0;
|
||||
for (let i = 0; i < b.length - 1; i++) {
|
||||
const bg = b.slice(i, i + 2);
|
||||
const count = bigramsA.get(bg);
|
||||
if (count && count > 0) {
|
||||
matches++;
|
||||
bigramsA.set(bg, count - 1);
|
||||
}
|
||||
}
|
||||
return (2 * matches) / (a.length - 1 + b.length - 1);
|
||||
}
|
||||
|
||||
/** Normalize text for fuzzy comparison: lowercase, strip non-alphanumeric, collapse spaces. */
|
||||
private static normalize(s: string): string {
|
||||
return s.toLowerCase().replace(/[^a-z0-9]/g, '');
|
||||
}
|
||||
|
||||
private findWordInOcrResult(
|
||||
result: OcrResponse,
|
||||
needle: string,
|
||||
fuzzy: boolean = false,
|
||||
): { x: number; y: number } | null {
|
||||
const lower = needle.toLowerCase();
|
||||
const FUZZY_THRESHOLD = 0.55;
|
||||
|
||||
// Multi-word: match against the full line text, return center of the line's bounding box
|
||||
if (lower.includes(' ')) {
|
||||
const needleNorm = ScreenReader.normalize(needle);
|
||||
|
||||
for (const line of result.lines) {
|
||||
if (line.words.length === 0) continue;
|
||||
|
||||
const lineText = line.text.toLowerCase();
|
||||
// Exact match
|
||||
if (lineText.includes(lower)) {
|
||||
return this.lineBounds(line);
|
||||
}
|
||||
|
||||
// Fuzzy: normalize line text and check sliding windows
|
||||
if (fuzzy) {
|
||||
const lineNorm = ScreenReader.normalize(line.text);
|
||||
// Check windows of similar length to the needle
|
||||
const windowLen = needleNorm.length;
|
||||
for (let i = 0; i <= lineNorm.length - windowLen + 2; i++) {
|
||||
const window = lineNorm.slice(i, i + windowLen + 2);
|
||||
const sim = ScreenReader.bigramSimilarity(needleNorm, window);
|
||||
if (sim >= FUZZY_THRESHOLD) {
|
||||
logger.info({ needle, matched: line.text, similarity: sim.toFixed(2) }, 'Fuzzy nameplate match');
|
||||
return this.lineBounds(line);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
// Single word: match against individual words
|
||||
const needleNorm = ScreenReader.normalize(needle);
|
||||
for (const line of result.lines) {
|
||||
for (const word of line.words) {
|
||||
// Exact match
|
||||
if (word.text.toLowerCase().includes(lower)) {
|
||||
return {
|
||||
x: Math.round(word.x + word.width / 2),
|
||||
y: Math.round(word.y + word.height / 2),
|
||||
};
|
||||
}
|
||||
|
||||
// Fuzzy match
|
||||
if (fuzzy) {
|
||||
const wordNorm = ScreenReader.normalize(word.text);
|
||||
const sim = ScreenReader.bigramSimilarity(needleNorm, wordNorm);
|
||||
if (sim >= FUZZY_THRESHOLD) {
|
||||
logger.info({ needle, matched: word.text, similarity: sim.toFixed(2) }, 'Fuzzy word match');
|
||||
return {
|
||||
x: Math.round(word.x + word.width / 2),
|
||||
y: Math.round(word.y + word.height / 2),
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/** Get center of a line's bounding box from its words. */
|
||||
private lineBounds(line: { words: { x: number; y: number; width: number; height: number }[] }): { x: number; y: number } {
|
||||
const first = line.words[0];
|
||||
const last = line.words[line.words.length - 1];
|
||||
const x1 = first.x;
|
||||
const y1 = first.y;
|
||||
const x2 = last.x + last.width;
|
||||
const y2 = Math.max(...line.words.map(w => w.y + w.height));
|
||||
return {
|
||||
x: Math.round((x1 + x2) / 2),
|
||||
y: Math.round((y1 + y2) / 2),
|
||||
};
|
||||
}
|
||||
|
||||
// ── Full-screen methods ─────────────────────────────────────────────
|
||||
|
||||
async findTextOnScreen(
|
||||
searchText: string,
|
||||
fuzzy: boolean = false,
|
||||
): Promise<{ x: number; y: number } | null> {
|
||||
const t = performance.now();
|
||||
const result = await this.daemon.ocr();
|
||||
const pos = this.findWordInOcrResult(result, searchText);
|
||||
const pos = this.findWordInOcrResult(result, searchText, fuzzy);
|
||||
|
||||
if (pos) {
|
||||
logger.info({ searchText, x: pos.x, y: pos.y, totalMs: elapsed(t) }, 'Found text on screen');
|
||||
|
|
@ -112,6 +203,19 @@ export class ScreenReader {
|
|||
return this.daemon.diffOcr(savePath, region);
|
||||
}
|
||||
|
||||
// ── Template matching ──────────────────────────────────────────────
|
||||
|
||||
async templateMatch(templatePath: string, region?: Region): Promise<TemplateMatchResult | null> {
|
||||
const t = performance.now();
|
||||
const result = await this.daemon.templateMatch(templatePath, region);
|
||||
if (result) {
|
||||
logger.info({ templatePath, x: result.x, y: result.y, confidence: result.confidence.toFixed(3), ms: elapsed(t) }, 'Template match found');
|
||||
} else {
|
||||
logger.info({ templatePath, ms: elapsed(t) }, 'Template match not found');
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
// ── Save utilities ──────────────────────────────────────────────────
|
||||
|
||||
async saveScreenshot(path: string): Promise<void> {
|
||||
|
|
@ -133,6 +237,43 @@ export class ScreenReader {
|
|||
logger.info({ path, region }, 'Region screenshot saved');
|
||||
}
|
||||
|
||||
// ── Debug OCR (alternative engines) ─────────────────────────────────
|
||||
|
||||
async debugDiffOcr(savePath?: string, region?: Region): Promise<DiffOcrResponse> {
|
||||
const t = performance.now();
|
||||
const result = await this.daemon.diffOcr(savePath, region, this.debugOcrEngine);
|
||||
logger.info({ engine: this.debugOcrEngine, ms: elapsed(t) }, 'debugDiffOcr');
|
||||
return result;
|
||||
}
|
||||
|
||||
async debugOcr(region?: Region): Promise<OcrResponse> {
|
||||
const t = performance.now();
|
||||
const result = await this.daemon.ocr(region, this.debugOcrEngine);
|
||||
logger.info({ engine: this.debugOcrEngine, ms: elapsed(t) }, 'debugOcr');
|
||||
return result;
|
||||
}
|
||||
|
||||
async debugReadFullScreen(): Promise<string> {
|
||||
const result = await this.daemon.ocr(undefined, this.debugOcrEngine);
|
||||
return result.text;
|
||||
}
|
||||
|
||||
async debugFindTextOnScreen(
|
||||
searchText: string,
|
||||
fuzzy: boolean = false,
|
||||
): Promise<{ x: number; y: number } | null> {
|
||||
const t = performance.now();
|
||||
const result = await this.daemon.ocr(undefined, this.debugOcrEngine);
|
||||
const pos = this.findWordInOcrResult(result, searchText, fuzzy);
|
||||
|
||||
if (pos) {
|
||||
logger.info({ searchText, engine: this.debugOcrEngine, x: pos.x, y: pos.y, totalMs: elapsed(t) }, 'debugFindText found');
|
||||
} else {
|
||||
logger.info({ searchText, engine: this.debugOcrEngine, totalMs: elapsed(t) }, 'debugFindText not found');
|
||||
}
|
||||
return pos;
|
||||
}
|
||||
|
||||
// ── Lifecycle ───────────────────────────────────────────────────────
|
||||
|
||||
async dispose(): Promise<void> {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue