switched to new way :)
This commit is contained in:
parent
b03a2a25f1
commit
f22d182c8f
30 changed files with 0 additions and 0 deletions
|
|
@ -1,297 +0,0 @@
|
|||
import { mkdir } from 'fs/promises';
|
||||
import { join } from 'path';
|
||||
import { logger } from '../util/logger.js';
|
||||
import { OcrDaemon, type OcrResponse, type OcrEngine, type OcrPreprocess, type DiffOcrParams, type DiffCropParams, type OcrParams, type DiffOcrResponse, type TemplateMatchResult, type TooltipMethod, type EdgeOcrParams } from './OcrDaemon.js';
|
||||
import { GridReader, type GridLayout, type CellCoord } from './GridReader.js';
|
||||
import type { Region } from '../types.js';
|
||||
|
||||
function elapsed(start: number): string {
|
||||
return `${(performance.now() - start).toFixed(0)}ms`;
|
||||
}
|
||||
|
||||
export interface OcrSettings {
|
||||
engine: OcrEngine;
|
||||
screenPreprocess: OcrPreprocess;
|
||||
tooltipPreprocess: OcrPreprocess;
|
||||
tooltipMethod: TooltipMethod;
|
||||
tooltipParams: DiffOcrParams;
|
||||
edgeParams: EdgeOcrParams;
|
||||
saveDebugImages: boolean;
|
||||
}
|
||||
|
||||
export class ScreenReader {
|
||||
private daemon = new OcrDaemon();
|
||||
readonly grid = new GridReader(this.daemon);
|
||||
settings: OcrSettings = {
|
||||
engine: 'easyocr',
|
||||
screenPreprocess: 'none',
|
||||
tooltipPreprocess: 'tophat',
|
||||
tooltipMethod: 'diff',
|
||||
tooltipParams: {
|
||||
crop: { diffThresh: 10 },
|
||||
ocr: { kernelSize: 21 },
|
||||
},
|
||||
edgeParams: {
|
||||
crop: {},
|
||||
ocr: { kernelSize: 21 },
|
||||
},
|
||||
saveDebugImages: true,
|
||||
};
|
||||
|
||||
/**
|
||||
* Eagerly spawn the OCR daemon and warm up the EasyOCR model.
|
||||
* Fire-and-forget a small OCR request so the Python model loads in the background.
|
||||
*/
|
||||
async warmup(): Promise<void> {
|
||||
await this.daemon.warmup();
|
||||
// Fire a small EasyOCR request to trigger Python model load
|
||||
// Use a tiny 1×1 region to minimize work, we only care about loading the model
|
||||
const { engine } = this.settings;
|
||||
if (engine !== 'tesseract') {
|
||||
await this.daemon.ocr({ x: 0, y: 0, width: 100, height: 100 }, engine);
|
||||
}
|
||||
}
|
||||
|
||||
// ── Screenshot capture ──────────────────────────────────────────────
|
||||
|
||||
async captureScreen(): Promise<Buffer> {
|
||||
const t = performance.now();
|
||||
const buf = await this.daemon.captureBuffer();
|
||||
logger.info({ ms: elapsed(t) }, 'captureScreen');
|
||||
return buf;
|
||||
}
|
||||
|
||||
async captureRegion(region: Region): Promise<Buffer> {
|
||||
const t = performance.now();
|
||||
const buf = await this.daemon.captureBuffer(region);
|
||||
logger.info({ ms: elapsed(t) }, 'captureRegion');
|
||||
return buf;
|
||||
}
|
||||
|
||||
// ── OCR helpers ─────────────────────────────────────────────────────
|
||||
|
||||
/** Bigram (Dice) similarity between two strings, 0..1. */
|
||||
private static bigramSimilarity(a: string, b: string): number {
|
||||
if (a.length < 2 || b.length < 2) return a === b ? 1 : 0;
|
||||
const bigramsA = new Map<string, number>();
|
||||
for (let i = 0; i < a.length - 1; i++) {
|
||||
const bg = a.slice(i, i + 2);
|
||||
bigramsA.set(bg, (bigramsA.get(bg) ?? 0) + 1);
|
||||
}
|
||||
let matches = 0;
|
||||
for (let i = 0; i < b.length - 1; i++) {
|
||||
const bg = b.slice(i, i + 2);
|
||||
const count = bigramsA.get(bg);
|
||||
if (count && count > 0) {
|
||||
matches++;
|
||||
bigramsA.set(bg, count - 1);
|
||||
}
|
||||
}
|
||||
return (2 * matches) / (a.length - 1 + b.length - 1);
|
||||
}
|
||||
|
||||
/** Normalize text for fuzzy comparison: lowercase, strip non-alphanumeric, collapse spaces. */
|
||||
private static normalize(s: string): string {
|
||||
return s.toLowerCase().replace(/[^a-z0-9]/g, '');
|
||||
}
|
||||
|
||||
private findWordInOcrResult(
|
||||
result: OcrResponse,
|
||||
needle: string,
|
||||
fuzzy: boolean = false,
|
||||
): { x: number; y: number } | null {
|
||||
const lower = needle.toLowerCase();
|
||||
const FUZZY_THRESHOLD = 0.55;
|
||||
|
||||
// Multi-word: match against the full line text, return center of the line's bounding box
|
||||
if (lower.includes(' ')) {
|
||||
const needleNorm = ScreenReader.normalize(needle);
|
||||
|
||||
for (const line of result.lines) {
|
||||
if (line.words.length === 0) continue;
|
||||
|
||||
const lineText = line.text.toLowerCase();
|
||||
// Exact match
|
||||
if (lineText.includes(lower)) {
|
||||
return this.lineBounds(line);
|
||||
}
|
||||
|
||||
// Fuzzy: normalize line text and check sliding windows
|
||||
if (fuzzy) {
|
||||
const lineNorm = ScreenReader.normalize(line.text);
|
||||
// Check windows of similar length to the needle
|
||||
const windowLen = needleNorm.length;
|
||||
for (let i = 0; i <= lineNorm.length - windowLen + 2; i++) {
|
||||
const window = lineNorm.slice(i, i + windowLen + 2);
|
||||
const sim = ScreenReader.bigramSimilarity(needleNorm, window);
|
||||
if (sim >= FUZZY_THRESHOLD) {
|
||||
logger.info({ needle, matched: line.text, similarity: sim.toFixed(2) }, 'Fuzzy nameplate match');
|
||||
return this.lineBounds(line);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
// Single word: match against individual words
|
||||
const needleNorm = ScreenReader.normalize(needle);
|
||||
for (const line of result.lines) {
|
||||
for (const word of line.words) {
|
||||
// Exact match
|
||||
if (word.text.toLowerCase().includes(lower)) {
|
||||
return {
|
||||
x: Math.round(word.x + word.width / 2),
|
||||
y: Math.round(word.y + word.height / 2),
|
||||
};
|
||||
}
|
||||
|
||||
// Fuzzy match
|
||||
if (fuzzy) {
|
||||
const wordNorm = ScreenReader.normalize(word.text);
|
||||
const sim = ScreenReader.bigramSimilarity(needleNorm, wordNorm);
|
||||
if (sim >= FUZZY_THRESHOLD) {
|
||||
logger.info({ needle, matched: word.text, similarity: sim.toFixed(2) }, 'Fuzzy word match');
|
||||
return {
|
||||
x: Math.round(word.x + word.width / 2),
|
||||
y: Math.round(word.y + word.height / 2),
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/** Get center of a line's bounding box from its words. */
|
||||
private lineBounds(line: { words: { x: number; y: number; width: number; height: number }[] }): { x: number; y: number } {
|
||||
const first = line.words[0];
|
||||
const last = line.words[line.words.length - 1];
|
||||
const x1 = first.x;
|
||||
const y1 = first.y;
|
||||
const x2 = last.x + last.width;
|
||||
const y2 = Math.max(...line.words.map(w => w.y + w.height));
|
||||
return {
|
||||
x: Math.round((x1 + x2) / 2),
|
||||
y: Math.round((y1 + y2) / 2),
|
||||
};
|
||||
}
|
||||
|
||||
// ── Full-screen methods ─────────────────────────────────────────────
|
||||
|
||||
async findTextOnScreen(
|
||||
searchText: string,
|
||||
fuzzy: boolean = false,
|
||||
): Promise<{ x: number; y: number } | null> {
|
||||
const t = performance.now();
|
||||
const { engine, screenPreprocess } = this.settings;
|
||||
const pp = screenPreprocess !== 'none' ? screenPreprocess : undefined;
|
||||
const result = await this.daemon.ocr(undefined, engine, pp);
|
||||
const pos = this.findWordInOcrResult(result, searchText, fuzzy);
|
||||
|
||||
if (pos) {
|
||||
logger.info({ searchText, engine, x: pos.x, y: pos.y, totalMs: elapsed(t) }, 'Found text on screen');
|
||||
} else {
|
||||
logger.info({ searchText, engine, totalMs: elapsed(t) }, 'Text not found on screen');
|
||||
}
|
||||
return pos;
|
||||
}
|
||||
|
||||
async readFullScreen(): Promise<string> {
|
||||
const { engine, screenPreprocess } = this.settings;
|
||||
const pp = screenPreprocess !== 'none' ? screenPreprocess : undefined;
|
||||
const result = await this.daemon.ocr(undefined, engine, pp);
|
||||
return result.text;
|
||||
}
|
||||
|
||||
// ── Region methods ──────────────────────────────────────────────────
|
||||
|
||||
async findTextInRegion(
|
||||
region: Region,
|
||||
searchText: string,
|
||||
): Promise<{ x: number; y: number } | null> {
|
||||
const t = performance.now();
|
||||
const { engine, screenPreprocess } = this.settings;
|
||||
const pp = screenPreprocess !== 'none' ? screenPreprocess : undefined;
|
||||
const result = await this.daemon.ocr(region, engine, pp);
|
||||
const pos = this.findWordInOcrResult(result, searchText);
|
||||
|
||||
if (pos) {
|
||||
// Offset back to screen space
|
||||
const screenPos = { x: region.x + pos.x, y: region.y + pos.y };
|
||||
logger.info({ searchText, x: screenPos.x, y: screenPos.y, region, totalMs: elapsed(t) }, 'Found text in region');
|
||||
return screenPos;
|
||||
}
|
||||
|
||||
logger.info({ searchText, region, totalMs: elapsed(t) }, 'Text not found in region');
|
||||
return null;
|
||||
}
|
||||
|
||||
async readRegionText(region: Region): Promise<string> {
|
||||
const { engine, screenPreprocess } = this.settings;
|
||||
const pp = screenPreprocess !== 'none' ? screenPreprocess : undefined;
|
||||
const result = await this.daemon.ocr(region, engine, pp);
|
||||
return result.text;
|
||||
}
|
||||
|
||||
async checkForText(region: Region, searchText: string): Promise<boolean> {
|
||||
const pos = await this.findTextInRegion(region, searchText);
|
||||
return pos !== null;
|
||||
}
|
||||
|
||||
// ── Snapshot / Diff-OCR (for tooltip reading) ──────────────────────
|
||||
|
||||
async snapshot(): Promise<void> {
|
||||
if (this.settings.tooltipMethod === 'edge') return; // no reference frame needed
|
||||
await this.daemon.snapshot();
|
||||
}
|
||||
|
||||
async diffOcr(savePath?: string, region?: Region): Promise<DiffOcrResponse> {
|
||||
const { engine, tooltipPreprocess, tooltipMethod, tooltipParams, edgeParams } = this.settings;
|
||||
const pp = tooltipPreprocess !== 'none' ? tooltipPreprocess : undefined;
|
||||
if (tooltipMethod === 'edge') {
|
||||
return this.daemon.edgeOcr(savePath, region, engine, pp, edgeParams);
|
||||
}
|
||||
return this.daemon.diffOcr(savePath, region, engine, pp, tooltipParams);
|
||||
}
|
||||
|
||||
// ── Template matching ──────────────────────────────────────────────
|
||||
|
||||
async templateMatch(templatePath: string, region?: Region): Promise<TemplateMatchResult | null> {
|
||||
const t = performance.now();
|
||||
const result = await this.daemon.templateMatch(templatePath, region);
|
||||
if (result) {
|
||||
logger.info({ templatePath, x: result.x, y: result.y, confidence: result.confidence.toFixed(3), ms: elapsed(t) }, 'Template match found');
|
||||
} else {
|
||||
logger.info({ templatePath, ms: elapsed(t) }, 'Template match not found');
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
// ── Save utilities ──────────────────────────────────────────────────
|
||||
|
||||
async saveScreenshot(path: string): Promise<void> {
|
||||
await this.daemon.saveScreenshot(path);
|
||||
logger.info({ path }, 'Screenshot saved');
|
||||
}
|
||||
|
||||
async saveDebugScreenshots(dir: string): Promise<string[]> {
|
||||
await mkdir(dir, { recursive: true });
|
||||
const ts = Date.now();
|
||||
const originalPath = join(dir, `${ts}-screenshot.png`);
|
||||
await this.daemon.saveScreenshot(originalPath);
|
||||
logger.info({ dir, files: [originalPath.split(/[\\/]/).pop()] }, 'Debug screenshot saved');
|
||||
return [originalPath];
|
||||
}
|
||||
|
||||
async saveRegion(region: Region, path: string): Promise<void> {
|
||||
await this.daemon.saveScreenshot(path, region);
|
||||
logger.info({ path, region }, 'Region screenshot saved');
|
||||
}
|
||||
|
||||
// ── Lifecycle ───────────────────────────────────────────────────────
|
||||
|
||||
async dispose(): Promise<void> {
|
||||
await this.daemon.stop();
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue