poe2-bot/src-old/game/OcrDaemon.ts
2026-02-13 01:12:11 -05:00

464 lines
13 KiB
TypeScript

import { spawn, type ChildProcess } from 'child_process';
import { join } from 'path';
import { logger } from '../util/logger.js';
import type { Region } from '../types.js';
// ── Types ───────────────────────────────────────────────────────────────────
export interface OcrWord {
text: string;
x: number;
y: number;
width: number;
height: number;
}
export interface OcrLine {
text: string;
words: OcrWord[];
}
export interface OcrResponse {
ok: true;
text: string;
lines: OcrLine[];
}
export interface GridItem {
row: number;
col: number;
w: number;
h: number;
}
export interface GridMatch {
row: number;
col: number;
similarity: number;
}
export interface GridScanResult {
cells: boolean[][];
items: GridItem[];
matches?: GridMatch[];
}
export interface DiffOcrResponse {
text: string;
lines: OcrLine[];
region?: Region;
}
export interface DetectGridResult {
detected: boolean;
region?: Region;
cols?: number;
rows?: number;
cellWidth?: number;
cellHeight?: number;
}
export interface TemplateMatchResult {
found: boolean;
x: number;
y: number;
width: number;
height: number;
confidence: number;
}
export type OcrEngine = 'tesseract' | 'easyocr' | 'paddleocr';
export type OcrPreprocess = 'none' | 'bgsub' | 'tophat';
export interface DiffCropParams {
diffThresh?: number;
rowThreshDiv?: number;
colThreshDiv?: number;
maxGap?: number;
trimCutoff?: number;
ocrPad?: number;
}
export interface OcrParams {
kernelSize?: number;
upscale?: number;
useBackgroundSub?: boolean;
dimPercentile?: number;
textThresh?: number;
softThreshold?: boolean;
usePerLineOcr?: boolean;
lineGapTolerance?: number;
linePadY?: number;
psm?: number;
mergeGap?: number;
linkThreshold?: number;
textThreshold?: number;
lowText?: number;
widthThs?: number;
paragraph?: boolean;
}
export interface DiffOcrParams {
crop?: DiffCropParams;
ocr?: OcrParams;
}
export type TooltipMethod = 'diff' | 'edge';
export interface EdgeCropParams {
cannyLow?: number;
cannyHigh?: number;
minLineLength?: number;
roiSize?: number;
densityThreshold?: number;
ocrPad?: number;
}
export interface EdgeOcrParams {
crop?: EdgeCropParams;
ocr?: OcrParams;
}
interface DaemonRequest {
cmd: string;
region?: Region;
path?: string;
cols?: number;
rows?: number;
threshold?: number;
minCellSize?: number;
maxCellSize?: number;
engine?: string;
preprocess?: string;
params?: DiffOcrParams;
edgeParams?: EdgeOcrParams;
cursorX?: number;
cursorY?: number;
}
interface DaemonResponse {
ok: boolean;
ready?: boolean;
text?: string;
lines?: OcrLine[];
image?: string;
cells?: boolean[][];
items?: GridItem[];
matches?: GridMatch[];
detected?: boolean;
region?: Region;
cols?: number;
rows?: number;
cellWidth?: number;
cellHeight?: number;
found?: boolean;
x?: number;
y?: number;
width?: number;
height?: number;
confidence?: number;
error?: string;
}
// ── OcrDaemon ───────────────────────────────────────────────────────────────
const DEFAULT_EXE = join(
'tools', 'OcrDaemon', 'bin', 'Release',
'net8.0-windows10.0.19041.0', 'OcrDaemon.exe',
);
const REQUEST_TIMEOUT = 5_000;
const CAPTURE_TIMEOUT = 10_000;
export class OcrDaemon {
private proc: ChildProcess | null = null;
private exePath: string;
private readyResolve: ((value: void) => void) | null = null;
private readyReject: ((err: Error) => void) | null = null;
private pendingResolve: ((resp: DaemonResponse) => void) | null = null;
private pendingReject: ((err: Error) => void) | null = null;
private queue: Array<{ request: DaemonRequest; resolve: (resp: DaemonResponse) => void; reject: (err: Error) => void }> = [];
private processing = false;
private buffer = '';
private stopped = false;
constructor(exePath?: string) {
this.exePath = exePath ?? DEFAULT_EXE;
}
// ── Public API ──────────────────────────────────────────────────────────
async ocr(region?: Region, engine?: OcrEngine, preprocess?: OcrPreprocess): Promise<OcrResponse> {
const req: DaemonRequest = { cmd: 'ocr' };
if (region) req.region = region;
if (engine && engine !== 'tesseract') req.engine = engine;
if (preprocess && preprocess !== 'none') req.preprocess = preprocess;
// Python engines need longer timeout for first model load + download
const timeout = (engine && engine !== 'tesseract') ? 120_000 : CAPTURE_TIMEOUT;
const resp = await this.sendWithRetry(req, timeout);
return {
ok: true,
text: resp.text ?? '',
lines: resp.lines ?? [],
};
}
async captureBuffer(region?: Region): Promise<Buffer> {
const req: DaemonRequest = { cmd: 'capture' };
if (region) req.region = region;
const resp = await this.sendWithRetry(req, CAPTURE_TIMEOUT);
return Buffer.from(resp.image!, 'base64');
}
async gridScan(region: Region, cols: number, rows: number, threshold?: number, targetRow?: number, targetCol?: number): Promise<GridScanResult> {
const req: DaemonRequest = { cmd: 'grid', region, cols, rows };
if (threshold) req.threshold = threshold;
if (targetRow != null && targetRow >= 0) (req as any).targetRow = targetRow;
if (targetCol != null && targetCol >= 0) (req as any).targetCol = targetCol;
const resp = await this.sendWithRetry(req, REQUEST_TIMEOUT);
return { cells: resp.cells ?? [], items: resp.items ?? [], matches: resp.matches ?? undefined };
}
async detectGrid(region: Region, minCellSize?: number, maxCellSize?: number): Promise<DetectGridResult> {
const req: DaemonRequest = { cmd: 'detect-grid', region };
if (minCellSize) req.minCellSize = minCellSize;
if (maxCellSize) req.maxCellSize = maxCellSize;
const resp = await this.sendWithRetry(req, REQUEST_TIMEOUT);
return {
detected: resp.detected ?? false,
region: resp.region,
cols: resp.cols,
rows: resp.rows,
cellWidth: resp.cellWidth,
cellHeight: resp.cellHeight,
};
}
async snapshot(): Promise<void> {
await this.sendWithRetry({ cmd: 'snapshot' }, REQUEST_TIMEOUT);
}
async diffOcr(savePath?: string, region?: Region, engine?: OcrEngine, preprocess?: OcrPreprocess, params?: DiffOcrParams): Promise<DiffOcrResponse> {
const req: DaemonRequest = { cmd: 'diff-ocr' };
if (savePath) req.path = savePath;
if (region) req.region = region;
if (engine && engine !== 'tesseract') req.engine = engine;
if (preprocess) req.preprocess = preprocess;
if (params && Object.keys(params).length > 0) req.params = params;
const timeout = (engine && engine !== 'tesseract') ? 120_000 : CAPTURE_TIMEOUT;
const resp = await this.sendWithRetry(req, timeout);
return {
text: resp.text ?? '',
lines: resp.lines ?? [],
region: resp.region,
};
}
async edgeOcr(savePath?: string, region?: Region, engine?: OcrEngine, preprocess?: OcrPreprocess, edgeParams?: EdgeOcrParams, cursorX?: number, cursorY?: number): Promise<DiffOcrResponse> {
const req: DaemonRequest = { cmd: 'edge-ocr' };
if (savePath) req.path = savePath;
if (region) req.region = region;
if (engine && engine !== 'tesseract') req.engine = engine;
if (preprocess) req.preprocess = preprocess;
if (edgeParams && Object.keys(edgeParams).length > 0) req.edgeParams = edgeParams;
if (cursorX != null) req.cursorX = cursorX;
if (cursorY != null) req.cursorY = cursorY;
const timeout = (engine && engine !== 'tesseract') ? 120_000 : CAPTURE_TIMEOUT;
const resp = await this.sendWithRetry(req, timeout);
return {
text: resp.text ?? '',
lines: resp.lines ?? [],
region: resp.region,
};
}
async saveScreenshot(path: string, region?: Region): Promise<void> {
const req: DaemonRequest = { cmd: 'screenshot', path };
if (region) req.region = region;
await this.sendWithRetry(req, REQUEST_TIMEOUT);
}
async templateMatch(templatePath: string, region?: Region): Promise<TemplateMatchResult | null> {
const req: DaemonRequest = { cmd: 'match-template', path: templatePath };
if (region) req.region = region;
const resp = await this.sendWithRetry(req, REQUEST_TIMEOUT);
if (!resp.found) return null;
return {
found: true,
x: resp.x!,
y: resp.y!,
width: resp.width!,
height: resp.height!,
confidence: resp.confidence!,
};
}
/** Eagerly spawn the daemon process so it's ready for the first real request. */
async warmup(): Promise<void> {
await this.ensureRunning();
}
async stop(): Promise<void> {
this.stopped = true;
if (this.proc) {
const p = this.proc;
this.proc = null;
p.stdin?.end();
p.kill();
}
}
// ── Internal ────────────────────────────────────────────────────────────
private async ensureRunning(): Promise<void> {
if (this.proc && this.proc.exitCode === null) return;
this.proc = null;
this.buffer = '';
logger.info({ exe: this.exePath }, 'Spawning OCR daemon');
const proc = spawn(this.exePath, [], {
stdio: ['pipe', 'pipe', 'pipe'],
windowsHide: true,
});
this.proc = proc;
proc.stderr?.on('data', (data: Buffer) => {
logger.warn({ daemon: data.toString().trim() }, 'OcrDaemon stderr');
});
proc.on('exit', (code) => {
logger.warn({ code }, 'OcrDaemon exited');
if (this.pendingReject) {
this.pendingReject(new Error(`Daemon exited with code ${code}`));
this.pendingResolve = null;
this.pendingReject = null;
}
});
proc.stdout!.on('data', (data: Buffer) => {
this.buffer += data.toString();
this.processBuffer();
});
// Wait for ready signal
await new Promise<void>((resolve, reject) => {
this.readyResolve = resolve;
this.readyReject = reject;
const timeout = setTimeout(() => {
this.readyReject = null;
this.readyResolve = null;
reject(new Error('Daemon did not become ready within 10s'));
}, 10_000);
// Store so we can clear on resolve
(this as any)._readyTimeout = timeout;
});
logger.info('OCR daemon ready');
}
private processBuffer(): void {
let newlineIdx: number;
while ((newlineIdx = this.buffer.indexOf('\n')) !== -1) {
const line = this.buffer.slice(0, newlineIdx).trim();
this.buffer = this.buffer.slice(newlineIdx + 1);
if (!line) continue;
let parsed: DaemonResponse;
try {
parsed = JSON.parse(line);
} catch {
logger.warn({ line }, 'Failed to parse daemon response');
continue;
}
// Handle ready signal
if (parsed.ready && this.readyResolve) {
clearTimeout((this as any)._readyTimeout);
const resolve = this.readyResolve;
this.readyResolve = null;
this.readyReject = null;
resolve();
continue;
}
// Handle normal response
if (this.pendingResolve) {
const resolve = this.pendingResolve;
this.pendingResolve = null;
this.pendingReject = null;
resolve(parsed);
}
}
}
private async send(request: DaemonRequest, timeout: number): Promise<DaemonResponse> {
await this.ensureRunning();
return new Promise<DaemonResponse>((resolve, reject) => {
this.queue.push({ request, resolve, reject });
this.drainQueue(timeout);
});
}
private drainQueue(timeout: number): void {
if (this.processing || this.queue.length === 0) return;
this.processing = true;
const { request, resolve, reject } = this.queue.shift()!;
const timer = setTimeout(() => {
this.pendingResolve = null;
this.pendingReject = null;
this.processing = false;
reject(new Error(`Daemon request timed out after ${timeout}ms`));
this.drainQueue(timeout);
}, timeout);
this.pendingResolve = (resp) => {
clearTimeout(timer);
this.processing = false;
resolve(resp);
this.drainQueue(timeout);
};
this.pendingReject = (err) => {
clearTimeout(timer);
this.processing = false;
reject(err);
this.drainQueue(timeout);
};
const json = JSON.stringify(request) + '\n';
this.proc!.stdin!.write(json);
}
private async sendWithRetry(request: DaemonRequest, timeout: number): Promise<DaemonResponse> {
try {
const resp = await this.send(request, timeout);
if (!resp.ok) throw new Error(resp.error ?? 'Daemon returned error');
return resp;
} catch (err) {
if (this.stopped) throw err;
// Kill and retry once
logger.warn({ err, cmd: request.cmd }, 'Daemon request failed, restarting');
if (this.proc) {
const p = this.proc;
this.proc = null;
p.stdin?.end();
p.kill();
}
const resp = await this.send(request, timeout);
if (!resp.ok) throw new Error(resp.error ?? 'Daemon returned error on retry');
return resp;
}
}
}