464 lines
13 KiB
TypeScript
464 lines
13 KiB
TypeScript
import { spawn, type ChildProcess } from 'child_process';
|
|
import { join } from 'path';
|
|
import { logger } from '../util/logger.js';
|
|
import type { Region } from '../types.js';
|
|
|
|
// ── Types ───────────────────────────────────────────────────────────────────
|
|
|
|
export interface OcrWord {
|
|
text: string;
|
|
x: number;
|
|
y: number;
|
|
width: number;
|
|
height: number;
|
|
}
|
|
|
|
export interface OcrLine {
|
|
text: string;
|
|
words: OcrWord[];
|
|
}
|
|
|
|
export interface OcrResponse {
|
|
ok: true;
|
|
text: string;
|
|
lines: OcrLine[];
|
|
}
|
|
|
|
export interface GridItem {
|
|
row: number;
|
|
col: number;
|
|
w: number;
|
|
h: number;
|
|
}
|
|
|
|
export interface GridMatch {
|
|
row: number;
|
|
col: number;
|
|
similarity: number;
|
|
}
|
|
|
|
export interface GridScanResult {
|
|
cells: boolean[][];
|
|
items: GridItem[];
|
|
matches?: GridMatch[];
|
|
}
|
|
|
|
export interface DiffOcrResponse {
|
|
text: string;
|
|
lines: OcrLine[];
|
|
region?: Region;
|
|
}
|
|
|
|
export interface DetectGridResult {
|
|
detected: boolean;
|
|
region?: Region;
|
|
cols?: number;
|
|
rows?: number;
|
|
cellWidth?: number;
|
|
cellHeight?: number;
|
|
}
|
|
|
|
export interface TemplateMatchResult {
|
|
found: boolean;
|
|
x: number;
|
|
y: number;
|
|
width: number;
|
|
height: number;
|
|
confidence: number;
|
|
}
|
|
|
|
export type OcrEngine = 'tesseract' | 'easyocr' | 'paddleocr';
|
|
|
|
export type OcrPreprocess = 'none' | 'bgsub' | 'tophat';
|
|
|
|
export interface DiffCropParams {
|
|
diffThresh?: number;
|
|
rowThreshDiv?: number;
|
|
colThreshDiv?: number;
|
|
maxGap?: number;
|
|
trimCutoff?: number;
|
|
ocrPad?: number;
|
|
}
|
|
|
|
export interface OcrParams {
|
|
kernelSize?: number;
|
|
upscale?: number;
|
|
useBackgroundSub?: boolean;
|
|
dimPercentile?: number;
|
|
textThresh?: number;
|
|
softThreshold?: boolean;
|
|
usePerLineOcr?: boolean;
|
|
lineGapTolerance?: number;
|
|
linePadY?: number;
|
|
psm?: number;
|
|
mergeGap?: number;
|
|
linkThreshold?: number;
|
|
textThreshold?: number;
|
|
lowText?: number;
|
|
widthThs?: number;
|
|
paragraph?: boolean;
|
|
}
|
|
|
|
export interface DiffOcrParams {
|
|
crop?: DiffCropParams;
|
|
ocr?: OcrParams;
|
|
}
|
|
|
|
export type TooltipMethod = 'diff' | 'edge';
|
|
|
|
export interface EdgeCropParams {
|
|
cannyLow?: number;
|
|
cannyHigh?: number;
|
|
minLineLength?: number;
|
|
roiSize?: number;
|
|
densityThreshold?: number;
|
|
ocrPad?: number;
|
|
}
|
|
|
|
export interface EdgeOcrParams {
|
|
crop?: EdgeCropParams;
|
|
ocr?: OcrParams;
|
|
}
|
|
|
|
interface DaemonRequest {
|
|
cmd: string;
|
|
region?: Region;
|
|
path?: string;
|
|
cols?: number;
|
|
rows?: number;
|
|
threshold?: number;
|
|
minCellSize?: number;
|
|
maxCellSize?: number;
|
|
engine?: string;
|
|
preprocess?: string;
|
|
params?: DiffOcrParams;
|
|
edgeParams?: EdgeOcrParams;
|
|
cursorX?: number;
|
|
cursorY?: number;
|
|
}
|
|
|
|
interface DaemonResponse {
|
|
ok: boolean;
|
|
ready?: boolean;
|
|
text?: string;
|
|
lines?: OcrLine[];
|
|
image?: string;
|
|
cells?: boolean[][];
|
|
items?: GridItem[];
|
|
matches?: GridMatch[];
|
|
detected?: boolean;
|
|
region?: Region;
|
|
cols?: number;
|
|
rows?: number;
|
|
cellWidth?: number;
|
|
cellHeight?: number;
|
|
found?: boolean;
|
|
x?: number;
|
|
y?: number;
|
|
width?: number;
|
|
height?: number;
|
|
confidence?: number;
|
|
error?: string;
|
|
}
|
|
|
|
// ── OcrDaemon ───────────────────────────────────────────────────────────────
|
|
|
|
const DEFAULT_EXE = join(
|
|
'tools', 'OcrDaemon', 'bin', 'Release',
|
|
'net8.0-windows10.0.19041.0', 'OcrDaemon.exe',
|
|
);
|
|
|
|
const REQUEST_TIMEOUT = 5_000;
|
|
const CAPTURE_TIMEOUT = 10_000;
|
|
|
|
export class OcrDaemon {
|
|
private proc: ChildProcess | null = null;
|
|
private exePath: string;
|
|
private readyResolve: ((value: void) => void) | null = null;
|
|
private readyReject: ((err: Error) => void) | null = null;
|
|
private pendingResolve: ((resp: DaemonResponse) => void) | null = null;
|
|
private pendingReject: ((err: Error) => void) | null = null;
|
|
private queue: Array<{ request: DaemonRequest; resolve: (resp: DaemonResponse) => void; reject: (err: Error) => void }> = [];
|
|
private processing = false;
|
|
private buffer = '';
|
|
private stopped = false;
|
|
|
|
constructor(exePath?: string) {
|
|
this.exePath = exePath ?? DEFAULT_EXE;
|
|
}
|
|
|
|
// ── Public API ──────────────────────────────────────────────────────────
|
|
|
|
async ocr(region?: Region, engine?: OcrEngine, preprocess?: OcrPreprocess): Promise<OcrResponse> {
|
|
const req: DaemonRequest = { cmd: 'ocr' };
|
|
if (region) req.region = region;
|
|
if (engine && engine !== 'tesseract') req.engine = engine;
|
|
if (preprocess && preprocess !== 'none') req.preprocess = preprocess;
|
|
// Python engines need longer timeout for first model load + download
|
|
const timeout = (engine && engine !== 'tesseract') ? 120_000 : CAPTURE_TIMEOUT;
|
|
const resp = await this.sendWithRetry(req, timeout);
|
|
return {
|
|
ok: true,
|
|
text: resp.text ?? '',
|
|
lines: resp.lines ?? [],
|
|
};
|
|
}
|
|
|
|
async captureBuffer(region?: Region): Promise<Buffer> {
|
|
const req: DaemonRequest = { cmd: 'capture' };
|
|
if (region) req.region = region;
|
|
const resp = await this.sendWithRetry(req, CAPTURE_TIMEOUT);
|
|
return Buffer.from(resp.image!, 'base64');
|
|
}
|
|
|
|
async gridScan(region: Region, cols: number, rows: number, threshold?: number, targetRow?: number, targetCol?: number): Promise<GridScanResult> {
|
|
const req: DaemonRequest = { cmd: 'grid', region, cols, rows };
|
|
if (threshold) req.threshold = threshold;
|
|
if (targetRow != null && targetRow >= 0) (req as any).targetRow = targetRow;
|
|
if (targetCol != null && targetCol >= 0) (req as any).targetCol = targetCol;
|
|
const resp = await this.sendWithRetry(req, REQUEST_TIMEOUT);
|
|
return { cells: resp.cells ?? [], items: resp.items ?? [], matches: resp.matches ?? undefined };
|
|
}
|
|
|
|
async detectGrid(region: Region, minCellSize?: number, maxCellSize?: number): Promise<DetectGridResult> {
|
|
const req: DaemonRequest = { cmd: 'detect-grid', region };
|
|
if (minCellSize) req.minCellSize = minCellSize;
|
|
if (maxCellSize) req.maxCellSize = maxCellSize;
|
|
const resp = await this.sendWithRetry(req, REQUEST_TIMEOUT);
|
|
return {
|
|
detected: resp.detected ?? false,
|
|
region: resp.region,
|
|
cols: resp.cols,
|
|
rows: resp.rows,
|
|
cellWidth: resp.cellWidth,
|
|
cellHeight: resp.cellHeight,
|
|
};
|
|
}
|
|
|
|
async snapshot(): Promise<void> {
|
|
await this.sendWithRetry({ cmd: 'snapshot' }, REQUEST_TIMEOUT);
|
|
}
|
|
|
|
async diffOcr(savePath?: string, region?: Region, engine?: OcrEngine, preprocess?: OcrPreprocess, params?: DiffOcrParams): Promise<DiffOcrResponse> {
|
|
const req: DaemonRequest = { cmd: 'diff-ocr' };
|
|
if (savePath) req.path = savePath;
|
|
if (region) req.region = region;
|
|
if (engine && engine !== 'tesseract') req.engine = engine;
|
|
if (preprocess) req.preprocess = preprocess;
|
|
if (params && Object.keys(params).length > 0) req.params = params;
|
|
const timeout = (engine && engine !== 'tesseract') ? 120_000 : CAPTURE_TIMEOUT;
|
|
const resp = await this.sendWithRetry(req, timeout);
|
|
return {
|
|
text: resp.text ?? '',
|
|
lines: resp.lines ?? [],
|
|
region: resp.region,
|
|
};
|
|
}
|
|
|
|
async edgeOcr(savePath?: string, region?: Region, engine?: OcrEngine, preprocess?: OcrPreprocess, edgeParams?: EdgeOcrParams, cursorX?: number, cursorY?: number): Promise<DiffOcrResponse> {
|
|
const req: DaemonRequest = { cmd: 'edge-ocr' };
|
|
if (savePath) req.path = savePath;
|
|
if (region) req.region = region;
|
|
if (engine && engine !== 'tesseract') req.engine = engine;
|
|
if (preprocess) req.preprocess = preprocess;
|
|
if (edgeParams && Object.keys(edgeParams).length > 0) req.edgeParams = edgeParams;
|
|
if (cursorX != null) req.cursorX = cursorX;
|
|
if (cursorY != null) req.cursorY = cursorY;
|
|
const timeout = (engine && engine !== 'tesseract') ? 120_000 : CAPTURE_TIMEOUT;
|
|
const resp = await this.sendWithRetry(req, timeout);
|
|
return {
|
|
text: resp.text ?? '',
|
|
lines: resp.lines ?? [],
|
|
region: resp.region,
|
|
};
|
|
}
|
|
|
|
async saveScreenshot(path: string, region?: Region): Promise<void> {
|
|
const req: DaemonRequest = { cmd: 'screenshot', path };
|
|
if (region) req.region = region;
|
|
await this.sendWithRetry(req, REQUEST_TIMEOUT);
|
|
}
|
|
|
|
async templateMatch(templatePath: string, region?: Region): Promise<TemplateMatchResult | null> {
|
|
const req: DaemonRequest = { cmd: 'match-template', path: templatePath };
|
|
if (region) req.region = region;
|
|
const resp = await this.sendWithRetry(req, REQUEST_TIMEOUT);
|
|
if (!resp.found) return null;
|
|
return {
|
|
found: true,
|
|
x: resp.x!,
|
|
y: resp.y!,
|
|
width: resp.width!,
|
|
height: resp.height!,
|
|
confidence: resp.confidence!,
|
|
};
|
|
}
|
|
|
|
/** Eagerly spawn the daemon process so it's ready for the first real request. */
|
|
async warmup(): Promise<void> {
|
|
await this.ensureRunning();
|
|
}
|
|
|
|
async stop(): Promise<void> {
|
|
this.stopped = true;
|
|
if (this.proc) {
|
|
const p = this.proc;
|
|
this.proc = null;
|
|
p.stdin?.end();
|
|
p.kill();
|
|
}
|
|
}
|
|
|
|
// ── Internal ────────────────────────────────────────────────────────────
|
|
|
|
private async ensureRunning(): Promise<void> {
|
|
if (this.proc && this.proc.exitCode === null) return;
|
|
|
|
this.proc = null;
|
|
this.buffer = '';
|
|
|
|
logger.info({ exe: this.exePath }, 'Spawning OCR daemon');
|
|
|
|
const proc = spawn(this.exePath, [], {
|
|
stdio: ['pipe', 'pipe', 'pipe'],
|
|
windowsHide: true,
|
|
});
|
|
|
|
this.proc = proc;
|
|
|
|
proc.stderr?.on('data', (data: Buffer) => {
|
|
logger.warn({ daemon: data.toString().trim() }, 'OcrDaemon stderr');
|
|
});
|
|
|
|
proc.on('exit', (code) => {
|
|
logger.warn({ code }, 'OcrDaemon exited');
|
|
if (this.pendingReject) {
|
|
this.pendingReject(new Error(`Daemon exited with code ${code}`));
|
|
this.pendingResolve = null;
|
|
this.pendingReject = null;
|
|
}
|
|
});
|
|
|
|
proc.stdout!.on('data', (data: Buffer) => {
|
|
this.buffer += data.toString();
|
|
this.processBuffer();
|
|
});
|
|
|
|
// Wait for ready signal
|
|
await new Promise<void>((resolve, reject) => {
|
|
this.readyResolve = resolve;
|
|
this.readyReject = reject;
|
|
|
|
const timeout = setTimeout(() => {
|
|
this.readyReject = null;
|
|
this.readyResolve = null;
|
|
reject(new Error('Daemon did not become ready within 10s'));
|
|
}, 10_000);
|
|
|
|
// Store so we can clear on resolve
|
|
(this as any)._readyTimeout = timeout;
|
|
});
|
|
|
|
logger.info('OCR daemon ready');
|
|
}
|
|
|
|
private processBuffer(): void {
|
|
let newlineIdx: number;
|
|
while ((newlineIdx = this.buffer.indexOf('\n')) !== -1) {
|
|
const line = this.buffer.slice(0, newlineIdx).trim();
|
|
this.buffer = this.buffer.slice(newlineIdx + 1);
|
|
|
|
if (!line) continue;
|
|
|
|
let parsed: DaemonResponse;
|
|
try {
|
|
parsed = JSON.parse(line);
|
|
} catch {
|
|
logger.warn({ line }, 'Failed to parse daemon response');
|
|
continue;
|
|
}
|
|
|
|
// Handle ready signal
|
|
if (parsed.ready && this.readyResolve) {
|
|
clearTimeout((this as any)._readyTimeout);
|
|
const resolve = this.readyResolve;
|
|
this.readyResolve = null;
|
|
this.readyReject = null;
|
|
resolve();
|
|
continue;
|
|
}
|
|
|
|
// Handle normal response
|
|
if (this.pendingResolve) {
|
|
const resolve = this.pendingResolve;
|
|
this.pendingResolve = null;
|
|
this.pendingReject = null;
|
|
resolve(parsed);
|
|
}
|
|
}
|
|
}
|
|
|
|
private async send(request: DaemonRequest, timeout: number): Promise<DaemonResponse> {
|
|
await this.ensureRunning();
|
|
|
|
return new Promise<DaemonResponse>((resolve, reject) => {
|
|
this.queue.push({ request, resolve, reject });
|
|
this.drainQueue(timeout);
|
|
});
|
|
}
|
|
|
|
private drainQueue(timeout: number): void {
|
|
if (this.processing || this.queue.length === 0) return;
|
|
this.processing = true;
|
|
|
|
const { request, resolve, reject } = this.queue.shift()!;
|
|
|
|
const timer = setTimeout(() => {
|
|
this.pendingResolve = null;
|
|
this.pendingReject = null;
|
|
this.processing = false;
|
|
reject(new Error(`Daemon request timed out after ${timeout}ms`));
|
|
this.drainQueue(timeout);
|
|
}, timeout);
|
|
|
|
this.pendingResolve = (resp) => {
|
|
clearTimeout(timer);
|
|
this.processing = false;
|
|
resolve(resp);
|
|
this.drainQueue(timeout);
|
|
};
|
|
|
|
this.pendingReject = (err) => {
|
|
clearTimeout(timer);
|
|
this.processing = false;
|
|
reject(err);
|
|
this.drainQueue(timeout);
|
|
};
|
|
|
|
const json = JSON.stringify(request) + '\n';
|
|
this.proc!.stdin!.write(json);
|
|
}
|
|
|
|
private async sendWithRetry(request: DaemonRequest, timeout: number): Promise<DaemonResponse> {
|
|
try {
|
|
const resp = await this.send(request, timeout);
|
|
if (!resp.ok) throw new Error(resp.error ?? 'Daemon returned error');
|
|
return resp;
|
|
} catch (err) {
|
|
if (this.stopped) throw err;
|
|
|
|
// Kill and retry once
|
|
logger.warn({ err, cmd: request.cmd }, 'Daemon request failed, restarting');
|
|
if (this.proc) {
|
|
const p = this.proc;
|
|
this.proc = null;
|
|
p.stdin?.end();
|
|
p.kill();
|
|
}
|
|
|
|
const resp = await this.send(request, timeout);
|
|
if (!resp.ok) throw new Error(resp.error ?? 'Daemon returned error on retry');
|
|
return resp;
|
|
}
|
|
}
|
|
}
|