poe2-bot/src-old/game/ScreenReader.ts
2026-02-13 01:12:11 -05:00

297 lines
11 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import { mkdir } from 'fs/promises';
import { join } from 'path';
import { logger } from '../util/logger.js';
import { OcrDaemon, type OcrResponse, type OcrEngine, type OcrPreprocess, type DiffOcrParams, type DiffCropParams, type OcrParams, type DiffOcrResponse, type TemplateMatchResult, type TooltipMethod, type EdgeOcrParams } from './OcrDaemon.js';
import { GridReader, type GridLayout, type CellCoord } from './GridReader.js';
import type { Region } from '../types.js';
function elapsed(start: number): string {
return `${(performance.now() - start).toFixed(0)}ms`;
}
export interface OcrSettings {
engine: OcrEngine;
screenPreprocess: OcrPreprocess;
tooltipPreprocess: OcrPreprocess;
tooltipMethod: TooltipMethod;
tooltipParams: DiffOcrParams;
edgeParams: EdgeOcrParams;
saveDebugImages: boolean;
}
export class ScreenReader {
private daemon = new OcrDaemon();
readonly grid = new GridReader(this.daemon);
settings: OcrSettings = {
engine: 'easyocr',
screenPreprocess: 'none',
tooltipPreprocess: 'tophat',
tooltipMethod: 'diff',
tooltipParams: {
crop: { diffThresh: 10 },
ocr: { kernelSize: 21 },
},
edgeParams: {
crop: {},
ocr: { kernelSize: 21 },
},
saveDebugImages: true,
};
/**
* Eagerly spawn the OCR daemon and warm up the EasyOCR model.
* Fire-and-forget a small OCR request so the Python model loads in the background.
*/
async warmup(): Promise<void> {
await this.daemon.warmup();
// Fire a small EasyOCR request to trigger Python model load
// Use a tiny 1×1 region to minimize work, we only care about loading the model
const { engine } = this.settings;
if (engine !== 'tesseract') {
await this.daemon.ocr({ x: 0, y: 0, width: 100, height: 100 }, engine);
}
}
// ── Screenshot capture ──────────────────────────────────────────────
async captureScreen(): Promise<Buffer> {
const t = performance.now();
const buf = await this.daemon.captureBuffer();
logger.info({ ms: elapsed(t) }, 'captureScreen');
return buf;
}
async captureRegion(region: Region): Promise<Buffer> {
const t = performance.now();
const buf = await this.daemon.captureBuffer(region);
logger.info({ ms: elapsed(t) }, 'captureRegion');
return buf;
}
// ── OCR helpers ─────────────────────────────────────────────────────
/** Bigram (Dice) similarity between two strings, 0..1. */
private static bigramSimilarity(a: string, b: string): number {
if (a.length < 2 || b.length < 2) return a === b ? 1 : 0;
const bigramsA = new Map<string, number>();
for (let i = 0; i < a.length - 1; i++) {
const bg = a.slice(i, i + 2);
bigramsA.set(bg, (bigramsA.get(bg) ?? 0) + 1);
}
let matches = 0;
for (let i = 0; i < b.length - 1; i++) {
const bg = b.slice(i, i + 2);
const count = bigramsA.get(bg);
if (count && count > 0) {
matches++;
bigramsA.set(bg, count - 1);
}
}
return (2 * matches) / (a.length - 1 + b.length - 1);
}
/** Normalize text for fuzzy comparison: lowercase, strip non-alphanumeric, collapse spaces. */
private static normalize(s: string): string {
return s.toLowerCase().replace(/[^a-z0-9]/g, '');
}
private findWordInOcrResult(
result: OcrResponse,
needle: string,
fuzzy: boolean = false,
): { x: number; y: number } | null {
const lower = needle.toLowerCase();
const FUZZY_THRESHOLD = 0.55;
// Multi-word: match against the full line text, return center of the line's bounding box
if (lower.includes(' ')) {
const needleNorm = ScreenReader.normalize(needle);
for (const line of result.lines) {
if (line.words.length === 0) continue;
const lineText = line.text.toLowerCase();
// Exact match
if (lineText.includes(lower)) {
return this.lineBounds(line);
}
// Fuzzy: normalize line text and check sliding windows
if (fuzzy) {
const lineNorm = ScreenReader.normalize(line.text);
// Check windows of similar length to the needle
const windowLen = needleNorm.length;
for (let i = 0; i <= lineNorm.length - windowLen + 2; i++) {
const window = lineNorm.slice(i, i + windowLen + 2);
const sim = ScreenReader.bigramSimilarity(needleNorm, window);
if (sim >= FUZZY_THRESHOLD) {
logger.info({ needle, matched: line.text, similarity: sim.toFixed(2) }, 'Fuzzy nameplate match');
return this.lineBounds(line);
}
}
}
}
return null;
}
// Single word: match against individual words
const needleNorm = ScreenReader.normalize(needle);
for (const line of result.lines) {
for (const word of line.words) {
// Exact match
if (word.text.toLowerCase().includes(lower)) {
return {
x: Math.round(word.x + word.width / 2),
y: Math.round(word.y + word.height / 2),
};
}
// Fuzzy match
if (fuzzy) {
const wordNorm = ScreenReader.normalize(word.text);
const sim = ScreenReader.bigramSimilarity(needleNorm, wordNorm);
if (sim >= FUZZY_THRESHOLD) {
logger.info({ needle, matched: word.text, similarity: sim.toFixed(2) }, 'Fuzzy word match');
return {
x: Math.round(word.x + word.width / 2),
y: Math.round(word.y + word.height / 2),
};
}
}
}
}
return null;
}
/** Get center of a line's bounding box from its words. */
private lineBounds(line: { words: { x: number; y: number; width: number; height: number }[] }): { x: number; y: number } {
const first = line.words[0];
const last = line.words[line.words.length - 1];
const x1 = first.x;
const y1 = first.y;
const x2 = last.x + last.width;
const y2 = Math.max(...line.words.map(w => w.y + w.height));
return {
x: Math.round((x1 + x2) / 2),
y: Math.round((y1 + y2) / 2),
};
}
// ── Full-screen methods ─────────────────────────────────────────────
async findTextOnScreen(
searchText: string,
fuzzy: boolean = false,
): Promise<{ x: number; y: number } | null> {
const t = performance.now();
const { engine, screenPreprocess } = this.settings;
const pp = screenPreprocess !== 'none' ? screenPreprocess : undefined;
const result = await this.daemon.ocr(undefined, engine, pp);
const pos = this.findWordInOcrResult(result, searchText, fuzzy);
if (pos) {
logger.info({ searchText, engine, x: pos.x, y: pos.y, totalMs: elapsed(t) }, 'Found text on screen');
} else {
logger.info({ searchText, engine, totalMs: elapsed(t) }, 'Text not found on screen');
}
return pos;
}
async readFullScreen(): Promise<string> {
const { engine, screenPreprocess } = this.settings;
const pp = screenPreprocess !== 'none' ? screenPreprocess : undefined;
const result = await this.daemon.ocr(undefined, engine, pp);
return result.text;
}
// ── Region methods ──────────────────────────────────────────────────
async findTextInRegion(
region: Region,
searchText: string,
): Promise<{ x: number; y: number } | null> {
const t = performance.now();
const { engine, screenPreprocess } = this.settings;
const pp = screenPreprocess !== 'none' ? screenPreprocess : undefined;
const result = await this.daemon.ocr(region, engine, pp);
const pos = this.findWordInOcrResult(result, searchText);
if (pos) {
// Offset back to screen space
const screenPos = { x: region.x + pos.x, y: region.y + pos.y };
logger.info({ searchText, x: screenPos.x, y: screenPos.y, region, totalMs: elapsed(t) }, 'Found text in region');
return screenPos;
}
logger.info({ searchText, region, totalMs: elapsed(t) }, 'Text not found in region');
return null;
}
async readRegionText(region: Region): Promise<string> {
const { engine, screenPreprocess } = this.settings;
const pp = screenPreprocess !== 'none' ? screenPreprocess : undefined;
const result = await this.daemon.ocr(region, engine, pp);
return result.text;
}
async checkForText(region: Region, searchText: string): Promise<boolean> {
const pos = await this.findTextInRegion(region, searchText);
return pos !== null;
}
// ── Snapshot / Diff-OCR (for tooltip reading) ──────────────────────
async snapshot(): Promise<void> {
if (this.settings.tooltipMethod === 'edge') return; // no reference frame needed
await this.daemon.snapshot();
}
async diffOcr(savePath?: string, region?: Region): Promise<DiffOcrResponse> {
const { engine, tooltipPreprocess, tooltipMethod, tooltipParams, edgeParams } = this.settings;
const pp = tooltipPreprocess !== 'none' ? tooltipPreprocess : undefined;
if (tooltipMethod === 'edge') {
return this.daemon.edgeOcr(savePath, region, engine, pp, edgeParams);
}
return this.daemon.diffOcr(savePath, region, engine, pp, tooltipParams);
}
// ── Template matching ──────────────────────────────────────────────
async templateMatch(templatePath: string, region?: Region): Promise<TemplateMatchResult | null> {
const t = performance.now();
const result = await this.daemon.templateMatch(templatePath, region);
if (result) {
logger.info({ templatePath, x: result.x, y: result.y, confidence: result.confidence.toFixed(3), ms: elapsed(t) }, 'Template match found');
} else {
logger.info({ templatePath, ms: elapsed(t) }, 'Template match not found');
}
return result;
}
// ── Save utilities ──────────────────────────────────────────────────
async saveScreenshot(path: string): Promise<void> {
await this.daemon.saveScreenshot(path);
logger.info({ path }, 'Screenshot saved');
}
async saveDebugScreenshots(dir: string): Promise<string[]> {
await mkdir(dir, { recursive: true });
const ts = Date.now();
const originalPath = join(dir, `${ts}-screenshot.png`);
await this.daemon.saveScreenshot(originalPath);
logger.info({ dir, files: [originalPath.split(/[\\/]/).pop()] }, 'Debug screenshot saved');
return [originalPath];
}
async saveRegion(region: Region, path: string): Promise<void> {
await this.daemon.saveScreenshot(path, region);
logger.info({ path, region }, 'Region screenshot saved');
}
// ── Lifecycle ───────────────────────────────────────────────────────
async dispose(): Promise<void> {
await this.daemon.stop();
}
}