poe2-bot/tools/test-ocr.ts
2026-02-12 17:48:16 -05:00

484 lines
17 KiB
TypeScript

/**
* OCR test runner + parameter tuner.
*
* Usage:
* npx tsx tools/test-ocr.ts # test all combos with defaults
* npx tsx tools/test-ocr.ts paddleocr # filter to paddleocr combos
* npx tsx tools/test-ocr.ts --tune # tune all combos (coordinate descent)
* npx tsx tools/test-ocr.ts --tune easyocr # tune only easyocr combos
*/
import { OcrDaemon, type OcrEngine, type OcrPreprocess, type DiffOcrParams, type DiffCropParams, type OcrParams } from '../src/game/OcrDaemon.js';
import { readFileSync } from 'fs';
import { join } from 'path';
// ── Types ──────────────────────────────────────────────────────────────────
interface TestCase {
id: string;
image: string;
fullImage: string;
expected: string[];
}
interface Combo {
engine: OcrEngine;
preprocess: OcrPreprocess;
label: string;
}
interface TuneResult {
label: string;
score: number;
params: DiffOcrParams;
evals: number;
}
// ── Combos ─────────────────────────────────────────────────────────────────
const ALL_COMBOS: Combo[] = [
{ engine: 'tesseract', preprocess: 'bgsub', label: 'tesseract+bgsub' },
{ engine: 'tesseract', preprocess: 'tophat', label: 'tesseract+tophat' },
{ engine: 'tesseract', preprocess: 'none', label: 'tesseract+none' },
{ engine: 'easyocr', preprocess: 'bgsub', label: 'easyocr+bgsub' },
{ engine: 'easyocr', preprocess: 'tophat', label: 'easyocr+tophat' },
{ engine: 'easyocr', preprocess: 'none', label: 'easyocr+none' },
{ engine: 'paddleocr', preprocess: 'bgsub', label: 'paddleocr+bgsub' },
{ engine: 'paddleocr', preprocess: 'tophat', label: 'paddleocr+tophat' },
{ engine: 'paddleocr', preprocess: 'none', label: 'paddleocr+none' },
];
// ── Scoring ────────────────────────────────────────────────────────────────
function levenshtein(a: string, b: string): number {
const m = a.length, n = b.length;
const dp: number[][] = Array.from({ length: m + 1 }, () => Array(n + 1).fill(0));
for (let i = 0; i <= m; i++) dp[i][0] = i;
for (let j = 0; j <= n; j++) dp[0][j] = j;
for (let i = 1; i <= m; i++)
for (let j = 1; j <= n; j++)
dp[i][j] = a[i - 1] === b[j - 1]
? dp[i - 1][j - 1]
: 1 + Math.min(dp[i - 1][j], dp[i][j - 1], dp[i - 1][j - 1]);
return dp[m][n];
}
function similarity(a: string, b: string): number {
const maxLen = Math.max(a.length, b.length);
if (maxLen === 0) return 1;
return 1 - levenshtein(a.toLowerCase(), b.toLowerCase()) / maxLen;
}
function scoreLines(expected: string[], actual: string[]): number {
const used = new Set<number>();
let matched = 0;
for (const exp of expected) {
let bestIdx = -1, bestSim = 0;
for (let i = 0; i < actual.length; i++) {
if (used.has(i)) continue;
const sim = similarity(exp, actual[i]);
if (sim > bestSim) { bestSim = sim; bestIdx = i; }
}
if (bestIdx >= 0 && bestSim >= 0.75) {
matched++;
used.add(bestIdx);
}
}
return expected.length > 0 ? matched / expected.length : 1;
}
function scoreLinesVerbose(expected: string[], actual: string[]): { matched: string[]; missed: string[]; extra: string[]; score: number } {
const used = new Set<number>();
const matched: string[] = [];
const missed: string[] = [];
for (const exp of expected) {
let bestIdx = -1, bestSim = 0;
for (let i = 0; i < actual.length; i++) {
if (used.has(i)) continue;
const sim = similarity(exp, actual[i]);
if (sim > bestSim) { bestSim = sim; bestIdx = i; }
}
if (bestIdx >= 0 && bestSim >= 0.75) {
matched.push(exp);
used.add(bestIdx);
} else {
missed.push(exp);
}
}
const extra = actual.filter((_, i) => !used.has(i));
return { matched, missed, extra, score: expected.length > 0 ? matched.length / expected.length : 1 };
}
// ── Daemon helpers ─────────────────────────────────────────────────────────
async function runCase(
daemon: OcrDaemon,
tc: TestCase,
tessdataDir: string,
engine: OcrEngine,
preprocess: OcrPreprocess,
params?: DiffOcrParams,
): Promise<string[]> {
const fullPath = join(tessdataDir, tc.fullImage).replace(/\//g, '\\');
const imagePath = join(tessdataDir, tc.image).replace(/\//g, '\\');
await (daemon as any).sendWithRetry({ cmd: 'snapshot', file: fullPath }, 10_000);
const req: any = { cmd: 'diff-ocr', file: imagePath };
if (engine !== 'tesseract') req.engine = engine;
if (preprocess !== 'none') req.preprocess = preprocess;
if (params && Object.keys(params).length > 0) req.params = params;
const timeout = engine !== 'tesseract' ? 120_000 : 10_000;
const resp = await (daemon as any).sendWithRetry(req, timeout);
return (resp.lines ?? [])
.map((l: any) => (l.text ?? '').trim())
.filter((l: string) => l.length > 0);
}
async function scoreCombo(
daemon: OcrDaemon,
cases: TestCase[],
tessdataDir: string,
engine: OcrEngine,
preprocess: OcrPreprocess,
params?: DiffOcrParams,
): Promise<number> {
let totalScore = 0;
for (const tc of cases) {
try {
const actual = await runCase(daemon, tc, tessdataDir, engine, preprocess, params);
totalScore += scoreLines(tc.expected, actual);
} catch {
// error = 0 score for this case
}
}
return totalScore / cases.length;
}
// ── Parameter sweep definitions ────────────────────────────────────────────
interface CropIntSweep {
name: keyof DiffCropParams;
values: number[];
}
interface OcrIntSweep {
name: keyof OcrParams;
values: number[];
}
interface OcrBoolSweep {
name: keyof OcrParams;
values: boolean[];
}
const CROP_SWEEPS: CropIntSweep[] = [
{ name: 'diffThresh', values: [10, 15, 20, 25, 30, 40, 50] },
{ name: 'maxGap', values: [5, 10, 15, 20, 25, 30] },
];
const CROP_TRIM_VALUES = [0.1, 0.15, 0.2, 0.25, 0.3, 0.4, 0.5];
const SHARED_OCR_SWEEPS: OcrIntSweep[] = [
{ name: 'upscale', values: [1, 2, 3] },
{ name: 'mergeGap', values: [0, 20, 40, 60, 80, 100] },
];
const BGSUB_INT_SWEEPS: OcrIntSweep[] = [
{ name: 'dimPercentile', values: [5, 10, 15, 20, 25, 30, 40, 50, 60] },
{ name: 'textThresh', values: [10, 20, 30, 40, 50, 60, 80, 100] },
];
const BGSUB_BOOL_SWEEPS: OcrBoolSweep[] = [
{ name: 'softThreshold', values: [false, true] },
];
const TOPHAT_SWEEPS: OcrIntSweep[] = [
{ name: 'kernelSize', values: [11, 15, 21, 25, 31, 41, 51, 61] },
];
// ── Default params per preprocess ──────────────────────────────────────────
function defaultParams(preprocess: OcrPreprocess): DiffOcrParams {
const crop: DiffCropParams = { diffThresh: 20, maxGap: 20, trimCutoff: 0.4 };
if (preprocess === 'bgsub') {
return { crop, ocr: { useBackgroundSub: true, upscale: 2, dimPercentile: 40, textThresh: 60, softThreshold: false } };
} else if (preprocess === 'tophat') {
return { crop, ocr: { useBackgroundSub: false, upscale: 2, kernelSize: 41 } };
}
return { crop, ocr: { upscale: 2 } }; // none
}
function cloneParams(p: DiffOcrParams): DiffOcrParams {
return JSON.parse(JSON.stringify(p));
}
// ── Coordinate descent tuner (two-phase: crop then OCR) ──────────────────
async function tuneCombo(
daemon: OcrDaemon,
cases: TestCase[],
tessdataDir: string,
combo: Combo,
): Promise<TuneResult> {
const params = defaultParams(combo.preprocess);
let bestScore = await scoreCombo(daemon, cases, tessdataDir, combo.engine, combo.preprocess, params);
let evals = 1;
process.stderr.write(` baseline: ${(bestScore * 100).toFixed(1)}% ${JSON.stringify(params)}\n`);
// ── Phase A: Tune crop params ──
process.stderr.write(`\n === Phase A: Crop Params ===\n`);
const MAX_ROUNDS = 3;
for (let round = 0; round < MAX_ROUNDS; round++) {
let improved = false;
process.stderr.write(` --- Crop Round ${round + 1} ---\n`);
for (const { name, values } of CROP_SWEEPS) {
process.stderr.write(` crop.${name}: `);
let bestVal: number | undefined;
let bestValScore = -1;
for (const v of values) {
const trial = cloneParams(params);
(trial.crop as any)[name] = v;
const score = await scoreCombo(daemon, cases, tessdataDir, combo.engine, combo.preprocess, trial);
evals++;
process.stderr.write(`${v}=${(score * 100).toFixed(1)} `);
if (score > bestValScore) { bestValScore = score; bestVal = v; }
}
process.stderr.write('\n');
if (bestValScore > bestScore && bestVal !== undefined) {
(params.crop as any)![name] = bestVal;
bestScore = bestValScore;
improved = true;
process.stderr.write(` -> crop.${name}=${bestVal} score=${(bestScore * 100).toFixed(1)}%\n`);
}
}
// Sweep trimCutoff
{
process.stderr.write(` crop.trimCutoff: `);
let bestTrim = params.crop?.trimCutoff ?? 0.2;
let bestTrimScore = bestScore;
for (const v of CROP_TRIM_VALUES) {
const trial = cloneParams(params);
trial.crop!.trimCutoff = v;
const score = await scoreCombo(daemon, cases, tessdataDir, combo.engine, combo.preprocess, trial);
evals++;
process.stderr.write(`${v}=${(score * 100).toFixed(1)} `);
if (score > bestTrimScore) { bestTrimScore = score; bestTrim = v; }
}
process.stderr.write('\n');
if (bestTrimScore > bestScore) {
params.crop!.trimCutoff = bestTrim;
bestScore = bestTrimScore;
improved = true;
process.stderr.write(` -> crop.trimCutoff=${bestTrim} score=${(bestScore * 100).toFixed(1)}%\n`);
}
}
process.stderr.write(` End crop round ${round + 1}: ${(bestScore * 100).toFixed(1)}% (${evals} evals)\n`);
if (!improved) break;
}
// ── Phase B: Tune OCR params (crop is now locked) ──
process.stderr.write(`\n === Phase B: OCR Params (crop locked) ===\n`);
const ocrIntSweeps: OcrIntSweep[] = [...SHARED_OCR_SWEEPS];
const ocrBoolSweeps: OcrBoolSweep[] = [];
if (combo.preprocess === 'bgsub') {
ocrIntSweeps.push(...BGSUB_INT_SWEEPS);
ocrBoolSweeps.push(...BGSUB_BOOL_SWEEPS);
} else if (combo.preprocess === 'tophat') {
ocrIntSweeps.push(...TOPHAT_SWEEPS);
}
for (let round = 0; round < MAX_ROUNDS; round++) {
let improved = false;
process.stderr.write(` --- OCR Round ${round + 1} ---\n`);
for (const { name, values } of ocrIntSweeps) {
process.stderr.write(` ocr.${name}: `);
let bestVal: number | undefined;
let bestValScore = -1;
for (const v of values) {
const trial = cloneParams(params);
(trial.ocr as any)[name] = v;
const score = await scoreCombo(daemon, cases, tessdataDir, combo.engine, combo.preprocess, trial);
evals++;
process.stderr.write(`${v}=${(score * 100).toFixed(1)} `);
if (score > bestValScore) { bestValScore = score; bestVal = v; }
}
process.stderr.write('\n');
if (bestValScore > bestScore && bestVal !== undefined) {
(params.ocr as any)![name] = bestVal;
bestScore = bestValScore;
improved = true;
process.stderr.write(` -> ocr.${name}=${bestVal} score=${(bestScore * 100).toFixed(1)}%\n`);
}
}
for (const { name, values } of ocrBoolSweeps) {
process.stderr.write(` ocr.${name}: `);
let bestVal: boolean | undefined;
let bestValScore = -1;
for (const v of values) {
const trial = cloneParams(params);
(trial.ocr as any)[name] = v;
const score = await scoreCombo(daemon, cases, tessdataDir, combo.engine, combo.preprocess, trial);
evals++;
process.stderr.write(`${v}=${(score * 100).toFixed(1)} `);
if (score > bestValScore) { bestValScore = score; bestVal = v; }
}
process.stderr.write('\n');
if (bestValScore > bestScore && bestVal !== undefined) {
(params.ocr as any)![name] = bestVal;
bestScore = bestValScore;
improved = true;
process.stderr.write(` -> ocr.${name}=${bestVal} score=${(bestScore * 100).toFixed(1)}%\n`);
}
}
process.stderr.write(` End OCR round ${round + 1}: ${(bestScore * 100).toFixed(1)}% (${evals} evals)\n`);
if (!improved) break;
}
return { label: combo.label, score: bestScore, params, evals };
}
// ── Verbose test run ───────────────────────────────────────────────────────
async function testCombo(
daemon: OcrDaemon,
cases: TestCase[],
tessdataDir: string,
combo: Combo,
params?: DiffOcrParams,
): Promise<number> {
let totalScore = 0;
for (const tc of cases) {
try {
const actual = await runCase(daemon, tc, tessdataDir, combo.engine, combo.preprocess, params);
const { matched, missed, extra, score } = scoreLinesVerbose(tc.expected, actual);
totalScore += score;
const status = missed.length === 0 ? 'PASS' : 'FAIL';
console.log(` [${status}] ${tc.id} matched=${matched.length}/${tc.expected.length} extra=${extra.length} score=${score.toFixed(2)}`);
for (const m of missed) console.log(` MISS: ${m}`);
for (const e of extra) console.log(` EXTRA: ${e}`);
} catch (err: any) {
console.log(` [ERROR] ${tc.id}: ${err.message}`);
}
}
return totalScore / cases.length;
}
// ── Main ───────────────────────────────────────────────────────────────────
async function main() {
const args = process.argv.slice(2);
const tuneMode = args.includes('--tune');
const filterArg = args.find(a => !a.startsWith('--'))?.toLowerCase();
const combos = filterArg
? ALL_COMBOS.filter(c => c.label.includes(filterArg))
: ALL_COMBOS;
const tessdataDir = join('tools', 'OcrDaemon', 'bin', 'Release', 'net8.0-windows10.0.19041.0', 'tessdata');
const casesPath = join(tessdataDir, 'cases.json');
const cases: TestCase[] = JSON.parse(readFileSync(casesPath, 'utf-8'));
console.log(`Loaded ${cases.length} test cases: ${cases.map(c => c.id).join(', ')}`);
console.log(`Mode: ${tuneMode ? 'TUNE' : 'TEST'} Combos: ${combos.length}\n`);
const daemon = new OcrDaemon();
if (tuneMode) {
// ── Tune mode: coordinate descent for each combo ──
const tuneResults: TuneResult[] = [];
for (const combo of combos) {
console.log(`\n${'='.repeat(60)}`);
console.log(` TUNING: ${combo.label}`);
console.log(`${'='.repeat(60)}`);
try {
const result = await tuneCombo(daemon, cases, tessdataDir, combo);
tuneResults.push(result);
console.log(`\n Best: ${(result.score * 100).toFixed(1)}% (${result.evals} evals)`);
console.log(` Params: ${JSON.stringify(result.params)}`);
// Verbose run with best params
console.log('');
await testCombo(daemon, cases, tessdataDir, combo, result.params);
} catch (err: any) {
console.log(` ERROR: ${err.message}`);
tuneResults.push({ label: combo.label, score: 0, params: {}, evals: 0 });
}
}
// Summary
console.log(`\n${'='.repeat(70)}`);
console.log(' TUNE RESULTS');
console.log(`${'='.repeat(70)}`);
const sorted = tuneResults.sort((a, b) => b.score - a.score);
for (const r of sorted) {
const bar = '#'.repeat(Math.round(r.score * 40));
console.log(` ${r.label.padEnd(22)} ${(r.score * 100).toFixed(1).padStart(5)}% ${bar}`);
}
console.log(`\n BEST PARAMS PER COMBO:`);
for (const r of sorted) {
if (r.score > 0) {
console.log(` ${r.label.padEnd(22)} ${JSON.stringify(r.params)}`);
}
}
} else {
// ── Test mode: defaults only ──
const results: Record<string, number> = {};
for (const combo of combos) {
console.log(`\n${'='.repeat(60)}`);
console.log(` ${combo.label}`);
console.log(`${'='.repeat(60)}`);
try {
const score = await testCombo(daemon, cases, tessdataDir, combo);
results[combo.label] = score;
console.log(`\n Average: ${(score * 100).toFixed(1)}%`);
} catch (err: any) {
console.log(` ERROR: ${err.message}`);
results[combo.label] = 0;
}
}
console.log(`\n${'='.repeat(60)}`);
console.log(' SUMMARY');
console.log(`${'='.repeat(60)}`);
const sorted = Object.entries(results).sort((a, b) => b[1] - a[1]);
for (const [label, score] of sorted) {
const bar = '#'.repeat(Math.round(score * 40));
console.log(` ${label.padEnd(22)} ${(score * 100).toFixed(1).padStart(5)}% ${bar}`);
}
}
await daemon.stop();
}
main().catch(err => {
console.error(err);
process.exit(1);
});