switched to new way :)

This commit is contained in:
Boki 2026-02-13 01:12:11 -05:00
parent b03a2a25f1
commit f22d182c8f
30 changed files with 0 additions and 0 deletions

View file

@ -1,297 +0,0 @@
import { mkdir } from 'fs/promises';
import { join } from 'path';
import { logger } from '../util/logger.js';
import { OcrDaemon, type OcrResponse, type OcrEngine, type OcrPreprocess, type DiffOcrParams, type DiffCropParams, type OcrParams, type DiffOcrResponse, type TemplateMatchResult, type TooltipMethod, type EdgeOcrParams } from './OcrDaemon.js';
import { GridReader, type GridLayout, type CellCoord } from './GridReader.js';
import type { Region } from '../types.js';
function elapsed(start: number): string {
return `${(performance.now() - start).toFixed(0)}ms`;
}
export interface OcrSettings {
engine: OcrEngine;
screenPreprocess: OcrPreprocess;
tooltipPreprocess: OcrPreprocess;
tooltipMethod: TooltipMethod;
tooltipParams: DiffOcrParams;
edgeParams: EdgeOcrParams;
saveDebugImages: boolean;
}
export class ScreenReader {
private daemon = new OcrDaemon();
readonly grid = new GridReader(this.daemon);
settings: OcrSettings = {
engine: 'easyocr',
screenPreprocess: 'none',
tooltipPreprocess: 'tophat',
tooltipMethod: 'diff',
tooltipParams: {
crop: { diffThresh: 10 },
ocr: { kernelSize: 21 },
},
edgeParams: {
crop: {},
ocr: { kernelSize: 21 },
},
saveDebugImages: true,
};
/**
* Eagerly spawn the OCR daemon and warm up the EasyOCR model.
* Fire-and-forget a small OCR request so the Python model loads in the background.
*/
async warmup(): Promise<void> {
await this.daemon.warmup();
// Fire a small EasyOCR request to trigger Python model load
// Use a tiny 1×1 region to minimize work, we only care about loading the model
const { engine } = this.settings;
if (engine !== 'tesseract') {
await this.daemon.ocr({ x: 0, y: 0, width: 100, height: 100 }, engine);
}
}
// ── Screenshot capture ──────────────────────────────────────────────
async captureScreen(): Promise<Buffer> {
const t = performance.now();
const buf = await this.daemon.captureBuffer();
logger.info({ ms: elapsed(t) }, 'captureScreen');
return buf;
}
async captureRegion(region: Region): Promise<Buffer> {
const t = performance.now();
const buf = await this.daemon.captureBuffer(region);
logger.info({ ms: elapsed(t) }, 'captureRegion');
return buf;
}
// ── OCR helpers ─────────────────────────────────────────────────────
/** Bigram (Dice) similarity between two strings, 0..1. */
private static bigramSimilarity(a: string, b: string): number {
if (a.length < 2 || b.length < 2) return a === b ? 1 : 0;
const bigramsA = new Map<string, number>();
for (let i = 0; i < a.length - 1; i++) {
const bg = a.slice(i, i + 2);
bigramsA.set(bg, (bigramsA.get(bg) ?? 0) + 1);
}
let matches = 0;
for (let i = 0; i < b.length - 1; i++) {
const bg = b.slice(i, i + 2);
const count = bigramsA.get(bg);
if (count && count > 0) {
matches++;
bigramsA.set(bg, count - 1);
}
}
return (2 * matches) / (a.length - 1 + b.length - 1);
}
/** Normalize text for fuzzy comparison: lowercase, strip non-alphanumeric, collapse spaces. */
private static normalize(s: string): string {
return s.toLowerCase().replace(/[^a-z0-9]/g, '');
}
private findWordInOcrResult(
result: OcrResponse,
needle: string,
fuzzy: boolean = false,
): { x: number; y: number } | null {
const lower = needle.toLowerCase();
const FUZZY_THRESHOLD = 0.55;
// Multi-word: match against the full line text, return center of the line's bounding box
if (lower.includes(' ')) {
const needleNorm = ScreenReader.normalize(needle);
for (const line of result.lines) {
if (line.words.length === 0) continue;
const lineText = line.text.toLowerCase();
// Exact match
if (lineText.includes(lower)) {
return this.lineBounds(line);
}
// Fuzzy: normalize line text and check sliding windows
if (fuzzy) {
const lineNorm = ScreenReader.normalize(line.text);
// Check windows of similar length to the needle
const windowLen = needleNorm.length;
for (let i = 0; i <= lineNorm.length - windowLen + 2; i++) {
const window = lineNorm.slice(i, i + windowLen + 2);
const sim = ScreenReader.bigramSimilarity(needleNorm, window);
if (sim >= FUZZY_THRESHOLD) {
logger.info({ needle, matched: line.text, similarity: sim.toFixed(2) }, 'Fuzzy nameplate match');
return this.lineBounds(line);
}
}
}
}
return null;
}
// Single word: match against individual words
const needleNorm = ScreenReader.normalize(needle);
for (const line of result.lines) {
for (const word of line.words) {
// Exact match
if (word.text.toLowerCase().includes(lower)) {
return {
x: Math.round(word.x + word.width / 2),
y: Math.round(word.y + word.height / 2),
};
}
// Fuzzy match
if (fuzzy) {
const wordNorm = ScreenReader.normalize(word.text);
const sim = ScreenReader.bigramSimilarity(needleNorm, wordNorm);
if (sim >= FUZZY_THRESHOLD) {
logger.info({ needle, matched: word.text, similarity: sim.toFixed(2) }, 'Fuzzy word match');
return {
x: Math.round(word.x + word.width / 2),
y: Math.round(word.y + word.height / 2),
};
}
}
}
}
return null;
}
/** Get center of a line's bounding box from its words. */
private lineBounds(line: { words: { x: number; y: number; width: number; height: number }[] }): { x: number; y: number } {
const first = line.words[0];
const last = line.words[line.words.length - 1];
const x1 = first.x;
const y1 = first.y;
const x2 = last.x + last.width;
const y2 = Math.max(...line.words.map(w => w.y + w.height));
return {
x: Math.round((x1 + x2) / 2),
y: Math.round((y1 + y2) / 2),
};
}
// ── Full-screen methods ─────────────────────────────────────────────
async findTextOnScreen(
searchText: string,
fuzzy: boolean = false,
): Promise<{ x: number; y: number } | null> {
const t = performance.now();
const { engine, screenPreprocess } = this.settings;
const pp = screenPreprocess !== 'none' ? screenPreprocess : undefined;
const result = await this.daemon.ocr(undefined, engine, pp);
const pos = this.findWordInOcrResult(result, searchText, fuzzy);
if (pos) {
logger.info({ searchText, engine, x: pos.x, y: pos.y, totalMs: elapsed(t) }, 'Found text on screen');
} else {
logger.info({ searchText, engine, totalMs: elapsed(t) }, 'Text not found on screen');
}
return pos;
}
async readFullScreen(): Promise<string> {
const { engine, screenPreprocess } = this.settings;
const pp = screenPreprocess !== 'none' ? screenPreprocess : undefined;
const result = await this.daemon.ocr(undefined, engine, pp);
return result.text;
}
// ── Region methods ──────────────────────────────────────────────────
async findTextInRegion(
region: Region,
searchText: string,
): Promise<{ x: number; y: number } | null> {
const t = performance.now();
const { engine, screenPreprocess } = this.settings;
const pp = screenPreprocess !== 'none' ? screenPreprocess : undefined;
const result = await this.daemon.ocr(region, engine, pp);
const pos = this.findWordInOcrResult(result, searchText);
if (pos) {
// Offset back to screen space
const screenPos = { x: region.x + pos.x, y: region.y + pos.y };
logger.info({ searchText, x: screenPos.x, y: screenPos.y, region, totalMs: elapsed(t) }, 'Found text in region');
return screenPos;
}
logger.info({ searchText, region, totalMs: elapsed(t) }, 'Text not found in region');
return null;
}
async readRegionText(region: Region): Promise<string> {
const { engine, screenPreprocess } = this.settings;
const pp = screenPreprocess !== 'none' ? screenPreprocess : undefined;
const result = await this.daemon.ocr(region, engine, pp);
return result.text;
}
async checkForText(region: Region, searchText: string): Promise<boolean> {
const pos = await this.findTextInRegion(region, searchText);
return pos !== null;
}
// ── Snapshot / Diff-OCR (for tooltip reading) ──────────────────────
async snapshot(): Promise<void> {
if (this.settings.tooltipMethod === 'edge') return; // no reference frame needed
await this.daemon.snapshot();
}
async diffOcr(savePath?: string, region?: Region): Promise<DiffOcrResponse> {
const { engine, tooltipPreprocess, tooltipMethod, tooltipParams, edgeParams } = this.settings;
const pp = tooltipPreprocess !== 'none' ? tooltipPreprocess : undefined;
if (tooltipMethod === 'edge') {
return this.daemon.edgeOcr(savePath, region, engine, pp, edgeParams);
}
return this.daemon.diffOcr(savePath, region, engine, pp, tooltipParams);
}
// ── Template matching ──────────────────────────────────────────────
async templateMatch(templatePath: string, region?: Region): Promise<TemplateMatchResult | null> {
const t = performance.now();
const result = await this.daemon.templateMatch(templatePath, region);
if (result) {
logger.info({ templatePath, x: result.x, y: result.y, confidence: result.confidence.toFixed(3), ms: elapsed(t) }, 'Template match found');
} else {
logger.info({ templatePath, ms: elapsed(t) }, 'Template match not found');
}
return result;
}
// ── Save utilities ──────────────────────────────────────────────────
async saveScreenshot(path: string): Promise<void> {
await this.daemon.saveScreenshot(path);
logger.info({ path }, 'Screenshot saved');
}
async saveDebugScreenshots(dir: string): Promise<string[]> {
await mkdir(dir, { recursive: true });
const ts = Date.now();
const originalPath = join(dir, `${ts}-screenshot.png`);
await this.daemon.saveScreenshot(originalPath);
logger.info({ dir, files: [originalPath.split(/[\\/]/).pop()] }, 'Debug screenshot saved');
return [originalPath];
}
async saveRegion(region: Region, path: string): Promise<void> {
await this.daemon.saveScreenshot(path, region);
logger.info({ path, region }, 'Region screenshot saved');
}
// ── Lifecycle ───────────────────────────────────────────────────────
async dispose(): Promise<void> {
await this.daemon.stop();
}
}