484 lines
17 KiB
TypeScript
484 lines
17 KiB
TypeScript
/**
|
|
* OCR test runner + parameter tuner.
|
|
*
|
|
* Usage:
|
|
* npx tsx tools/test-ocr.ts # test all combos with defaults
|
|
* npx tsx tools/test-ocr.ts paddleocr # filter to paddleocr combos
|
|
* npx tsx tools/test-ocr.ts --tune # tune all combos (coordinate descent)
|
|
* npx tsx tools/test-ocr.ts --tune easyocr # tune only easyocr combos
|
|
*/
|
|
import { OcrDaemon, type OcrEngine, type OcrPreprocess, type DiffOcrParams, type DiffCropParams, type OcrParams } from '../src/game/OcrDaemon.js';
|
|
import { readFileSync } from 'fs';
|
|
import { join } from 'path';
|
|
|
|
// ── Types ──────────────────────────────────────────────────────────────────
|
|
|
|
interface TestCase {
|
|
id: string;
|
|
image: string;
|
|
fullImage: string;
|
|
expected: string[];
|
|
}
|
|
|
|
interface Combo {
|
|
engine: OcrEngine;
|
|
preprocess: OcrPreprocess;
|
|
label: string;
|
|
}
|
|
|
|
interface TuneResult {
|
|
label: string;
|
|
score: number;
|
|
params: DiffOcrParams;
|
|
evals: number;
|
|
}
|
|
|
|
// ── Combos ─────────────────────────────────────────────────────────────────
|
|
|
|
const ALL_COMBOS: Combo[] = [
|
|
{ engine: 'tesseract', preprocess: 'bgsub', label: 'tesseract+bgsub' },
|
|
{ engine: 'tesseract', preprocess: 'tophat', label: 'tesseract+tophat' },
|
|
{ engine: 'tesseract', preprocess: 'none', label: 'tesseract+none' },
|
|
{ engine: 'easyocr', preprocess: 'bgsub', label: 'easyocr+bgsub' },
|
|
{ engine: 'easyocr', preprocess: 'tophat', label: 'easyocr+tophat' },
|
|
{ engine: 'easyocr', preprocess: 'none', label: 'easyocr+none' },
|
|
{ engine: 'paddleocr', preprocess: 'bgsub', label: 'paddleocr+bgsub' },
|
|
{ engine: 'paddleocr', preprocess: 'tophat', label: 'paddleocr+tophat' },
|
|
{ engine: 'paddleocr', preprocess: 'none', label: 'paddleocr+none' },
|
|
];
|
|
|
|
// ── Scoring ────────────────────────────────────────────────────────────────
|
|
|
|
function levenshtein(a: string, b: string): number {
|
|
const m = a.length, n = b.length;
|
|
const dp: number[][] = Array.from({ length: m + 1 }, () => Array(n + 1).fill(0));
|
|
for (let i = 0; i <= m; i++) dp[i][0] = i;
|
|
for (let j = 0; j <= n; j++) dp[0][j] = j;
|
|
for (let i = 1; i <= m; i++)
|
|
for (let j = 1; j <= n; j++)
|
|
dp[i][j] = a[i - 1] === b[j - 1]
|
|
? dp[i - 1][j - 1]
|
|
: 1 + Math.min(dp[i - 1][j], dp[i][j - 1], dp[i - 1][j - 1]);
|
|
return dp[m][n];
|
|
}
|
|
|
|
function similarity(a: string, b: string): number {
|
|
const maxLen = Math.max(a.length, b.length);
|
|
if (maxLen === 0) return 1;
|
|
return 1 - levenshtein(a.toLowerCase(), b.toLowerCase()) / maxLen;
|
|
}
|
|
|
|
function scoreLines(expected: string[], actual: string[]): number {
|
|
const used = new Set<number>();
|
|
let matched = 0;
|
|
for (const exp of expected) {
|
|
let bestIdx = -1, bestSim = 0;
|
|
for (let i = 0; i < actual.length; i++) {
|
|
if (used.has(i)) continue;
|
|
const sim = similarity(exp, actual[i]);
|
|
if (sim > bestSim) { bestSim = sim; bestIdx = i; }
|
|
}
|
|
if (bestIdx >= 0 && bestSim >= 0.75) {
|
|
matched++;
|
|
used.add(bestIdx);
|
|
}
|
|
}
|
|
return expected.length > 0 ? matched / expected.length : 1;
|
|
}
|
|
|
|
function scoreLinesVerbose(expected: string[], actual: string[]): { matched: string[]; missed: string[]; extra: string[]; score: number } {
|
|
const used = new Set<number>();
|
|
const matched: string[] = [];
|
|
const missed: string[] = [];
|
|
for (const exp of expected) {
|
|
let bestIdx = -1, bestSim = 0;
|
|
for (let i = 0; i < actual.length; i++) {
|
|
if (used.has(i)) continue;
|
|
const sim = similarity(exp, actual[i]);
|
|
if (sim > bestSim) { bestSim = sim; bestIdx = i; }
|
|
}
|
|
if (bestIdx >= 0 && bestSim >= 0.75) {
|
|
matched.push(exp);
|
|
used.add(bestIdx);
|
|
} else {
|
|
missed.push(exp);
|
|
}
|
|
}
|
|
const extra = actual.filter((_, i) => !used.has(i));
|
|
return { matched, missed, extra, score: expected.length > 0 ? matched.length / expected.length : 1 };
|
|
}
|
|
|
|
// ── Daemon helpers ─────────────────────────────────────────────────────────
|
|
|
|
async function runCase(
|
|
daemon: OcrDaemon,
|
|
tc: TestCase,
|
|
tessdataDir: string,
|
|
engine: OcrEngine,
|
|
preprocess: OcrPreprocess,
|
|
params?: DiffOcrParams,
|
|
): Promise<string[]> {
|
|
const fullPath = join(tessdataDir, tc.fullImage).replace(/\//g, '\\');
|
|
const imagePath = join(tessdataDir, tc.image).replace(/\//g, '\\');
|
|
|
|
await (daemon as any).sendWithRetry({ cmd: 'snapshot', file: fullPath }, 10_000);
|
|
|
|
const req: any = { cmd: 'diff-ocr', file: imagePath };
|
|
if (engine !== 'tesseract') req.engine = engine;
|
|
if (preprocess !== 'none') req.preprocess = preprocess;
|
|
if (params && Object.keys(params).length > 0) req.params = params;
|
|
|
|
const timeout = engine !== 'tesseract' ? 120_000 : 10_000;
|
|
const resp = await (daemon as any).sendWithRetry(req, timeout);
|
|
|
|
return (resp.lines ?? [])
|
|
.map((l: any) => (l.text ?? '').trim())
|
|
.filter((l: string) => l.length > 0);
|
|
}
|
|
|
|
async function scoreCombo(
|
|
daemon: OcrDaemon,
|
|
cases: TestCase[],
|
|
tessdataDir: string,
|
|
engine: OcrEngine,
|
|
preprocess: OcrPreprocess,
|
|
params?: DiffOcrParams,
|
|
): Promise<number> {
|
|
let totalScore = 0;
|
|
for (const tc of cases) {
|
|
try {
|
|
const actual = await runCase(daemon, tc, tessdataDir, engine, preprocess, params);
|
|
totalScore += scoreLines(tc.expected, actual);
|
|
} catch {
|
|
// error = 0 score for this case
|
|
}
|
|
}
|
|
return totalScore / cases.length;
|
|
}
|
|
|
|
// ── Parameter sweep definitions ────────────────────────────────────────────
|
|
|
|
interface CropIntSweep {
|
|
name: keyof DiffCropParams;
|
|
values: number[];
|
|
}
|
|
|
|
interface OcrIntSweep {
|
|
name: keyof OcrParams;
|
|
values: number[];
|
|
}
|
|
|
|
interface OcrBoolSweep {
|
|
name: keyof OcrParams;
|
|
values: boolean[];
|
|
}
|
|
|
|
const CROP_SWEEPS: CropIntSweep[] = [
|
|
{ name: 'diffThresh', values: [10, 15, 20, 25, 30, 40, 50] },
|
|
{ name: 'maxGap', values: [5, 10, 15, 20, 25, 30] },
|
|
];
|
|
|
|
const CROP_TRIM_VALUES = [0.1, 0.15, 0.2, 0.25, 0.3, 0.4, 0.5];
|
|
|
|
const SHARED_OCR_SWEEPS: OcrIntSweep[] = [
|
|
{ name: 'upscale', values: [1, 2, 3] },
|
|
{ name: 'mergeGap', values: [0, 20, 40, 60, 80, 100] },
|
|
];
|
|
|
|
const BGSUB_INT_SWEEPS: OcrIntSweep[] = [
|
|
{ name: 'dimPercentile', values: [5, 10, 15, 20, 25, 30, 40, 50, 60] },
|
|
{ name: 'textThresh', values: [10, 20, 30, 40, 50, 60, 80, 100] },
|
|
];
|
|
|
|
const BGSUB_BOOL_SWEEPS: OcrBoolSweep[] = [
|
|
{ name: 'softThreshold', values: [false, true] },
|
|
];
|
|
|
|
const TOPHAT_SWEEPS: OcrIntSweep[] = [
|
|
{ name: 'kernelSize', values: [11, 15, 21, 25, 31, 41, 51, 61] },
|
|
];
|
|
|
|
// ── Default params per preprocess ──────────────────────────────────────────
|
|
|
|
function defaultParams(preprocess: OcrPreprocess): DiffOcrParams {
|
|
const crop: DiffCropParams = { diffThresh: 20, maxGap: 20, trimCutoff: 0.4 };
|
|
if (preprocess === 'bgsub') {
|
|
return { crop, ocr: { useBackgroundSub: true, upscale: 2, dimPercentile: 40, textThresh: 60, softThreshold: false } };
|
|
} else if (preprocess === 'tophat') {
|
|
return { crop, ocr: { useBackgroundSub: false, upscale: 2, kernelSize: 41 } };
|
|
}
|
|
return { crop, ocr: { upscale: 2 } }; // none
|
|
}
|
|
|
|
function cloneParams(p: DiffOcrParams): DiffOcrParams {
|
|
return JSON.parse(JSON.stringify(p));
|
|
}
|
|
|
|
// ── Coordinate descent tuner (two-phase: crop then OCR) ──────────────────
|
|
|
|
async function tuneCombo(
|
|
daemon: OcrDaemon,
|
|
cases: TestCase[],
|
|
tessdataDir: string,
|
|
combo: Combo,
|
|
): Promise<TuneResult> {
|
|
const params = defaultParams(combo.preprocess);
|
|
let bestScore = await scoreCombo(daemon, cases, tessdataDir, combo.engine, combo.preprocess, params);
|
|
let evals = 1;
|
|
|
|
process.stderr.write(` baseline: ${(bestScore * 100).toFixed(1)}% ${JSON.stringify(params)}\n`);
|
|
|
|
// ── Phase A: Tune crop params ──
|
|
process.stderr.write(`\n === Phase A: Crop Params ===\n`);
|
|
const MAX_ROUNDS = 3;
|
|
|
|
for (let round = 0; round < MAX_ROUNDS; round++) {
|
|
let improved = false;
|
|
process.stderr.write(` --- Crop Round ${round + 1} ---\n`);
|
|
|
|
for (const { name, values } of CROP_SWEEPS) {
|
|
process.stderr.write(` crop.${name}: `);
|
|
let bestVal: number | undefined;
|
|
let bestValScore = -1;
|
|
|
|
for (const v of values) {
|
|
const trial = cloneParams(params);
|
|
(trial.crop as any)[name] = v;
|
|
const score = await scoreCombo(daemon, cases, tessdataDir, combo.engine, combo.preprocess, trial);
|
|
evals++;
|
|
process.stderr.write(`${v}=${(score * 100).toFixed(1)} `);
|
|
if (score > bestValScore) { bestValScore = score; bestVal = v; }
|
|
}
|
|
process.stderr.write('\n');
|
|
|
|
if (bestValScore > bestScore && bestVal !== undefined) {
|
|
(params.crop as any)![name] = bestVal;
|
|
bestScore = bestValScore;
|
|
improved = true;
|
|
process.stderr.write(` -> crop.${name}=${bestVal} score=${(bestScore * 100).toFixed(1)}%\n`);
|
|
}
|
|
}
|
|
|
|
// Sweep trimCutoff
|
|
{
|
|
process.stderr.write(` crop.trimCutoff: `);
|
|
let bestTrim = params.crop?.trimCutoff ?? 0.2;
|
|
let bestTrimScore = bestScore;
|
|
|
|
for (const v of CROP_TRIM_VALUES) {
|
|
const trial = cloneParams(params);
|
|
trial.crop!.trimCutoff = v;
|
|
const score = await scoreCombo(daemon, cases, tessdataDir, combo.engine, combo.preprocess, trial);
|
|
evals++;
|
|
process.stderr.write(`${v}=${(score * 100).toFixed(1)} `);
|
|
if (score > bestTrimScore) { bestTrimScore = score; bestTrim = v; }
|
|
}
|
|
process.stderr.write('\n');
|
|
|
|
if (bestTrimScore > bestScore) {
|
|
params.crop!.trimCutoff = bestTrim;
|
|
bestScore = bestTrimScore;
|
|
improved = true;
|
|
process.stderr.write(` -> crop.trimCutoff=${bestTrim} score=${(bestScore * 100).toFixed(1)}%\n`);
|
|
}
|
|
}
|
|
|
|
process.stderr.write(` End crop round ${round + 1}: ${(bestScore * 100).toFixed(1)}% (${evals} evals)\n`);
|
|
if (!improved) break;
|
|
}
|
|
|
|
// ── Phase B: Tune OCR params (crop is now locked) ──
|
|
process.stderr.write(`\n === Phase B: OCR Params (crop locked) ===\n`);
|
|
|
|
const ocrIntSweeps: OcrIntSweep[] = [...SHARED_OCR_SWEEPS];
|
|
const ocrBoolSweeps: OcrBoolSweep[] = [];
|
|
if (combo.preprocess === 'bgsub') {
|
|
ocrIntSweeps.push(...BGSUB_INT_SWEEPS);
|
|
ocrBoolSweeps.push(...BGSUB_BOOL_SWEEPS);
|
|
} else if (combo.preprocess === 'tophat') {
|
|
ocrIntSweeps.push(...TOPHAT_SWEEPS);
|
|
}
|
|
|
|
for (let round = 0; round < MAX_ROUNDS; round++) {
|
|
let improved = false;
|
|
process.stderr.write(` --- OCR Round ${round + 1} ---\n`);
|
|
|
|
for (const { name, values } of ocrIntSweeps) {
|
|
process.stderr.write(` ocr.${name}: `);
|
|
let bestVal: number | undefined;
|
|
let bestValScore = -1;
|
|
|
|
for (const v of values) {
|
|
const trial = cloneParams(params);
|
|
(trial.ocr as any)[name] = v;
|
|
const score = await scoreCombo(daemon, cases, tessdataDir, combo.engine, combo.preprocess, trial);
|
|
evals++;
|
|
process.stderr.write(`${v}=${(score * 100).toFixed(1)} `);
|
|
if (score > bestValScore) { bestValScore = score; bestVal = v; }
|
|
}
|
|
process.stderr.write('\n');
|
|
|
|
if (bestValScore > bestScore && bestVal !== undefined) {
|
|
(params.ocr as any)![name] = bestVal;
|
|
bestScore = bestValScore;
|
|
improved = true;
|
|
process.stderr.write(` -> ocr.${name}=${bestVal} score=${(bestScore * 100).toFixed(1)}%\n`);
|
|
}
|
|
}
|
|
|
|
for (const { name, values } of ocrBoolSweeps) {
|
|
process.stderr.write(` ocr.${name}: `);
|
|
let bestVal: boolean | undefined;
|
|
let bestValScore = -1;
|
|
|
|
for (const v of values) {
|
|
const trial = cloneParams(params);
|
|
(trial.ocr as any)[name] = v;
|
|
const score = await scoreCombo(daemon, cases, tessdataDir, combo.engine, combo.preprocess, trial);
|
|
evals++;
|
|
process.stderr.write(`${v}=${(score * 100).toFixed(1)} `);
|
|
if (score > bestValScore) { bestValScore = score; bestVal = v; }
|
|
}
|
|
process.stderr.write('\n');
|
|
|
|
if (bestValScore > bestScore && bestVal !== undefined) {
|
|
(params.ocr as any)![name] = bestVal;
|
|
bestScore = bestValScore;
|
|
improved = true;
|
|
process.stderr.write(` -> ocr.${name}=${bestVal} score=${(bestScore * 100).toFixed(1)}%\n`);
|
|
}
|
|
}
|
|
|
|
process.stderr.write(` End OCR round ${round + 1}: ${(bestScore * 100).toFixed(1)}% (${evals} evals)\n`);
|
|
if (!improved) break;
|
|
}
|
|
|
|
return { label: combo.label, score: bestScore, params, evals };
|
|
}
|
|
|
|
// ── Verbose test run ───────────────────────────────────────────────────────
|
|
|
|
async function testCombo(
|
|
daemon: OcrDaemon,
|
|
cases: TestCase[],
|
|
tessdataDir: string,
|
|
combo: Combo,
|
|
params?: DiffOcrParams,
|
|
): Promise<number> {
|
|
let totalScore = 0;
|
|
for (const tc of cases) {
|
|
try {
|
|
const actual = await runCase(daemon, tc, tessdataDir, combo.engine, combo.preprocess, params);
|
|
const { matched, missed, extra, score } = scoreLinesVerbose(tc.expected, actual);
|
|
totalScore += score;
|
|
const status = missed.length === 0 ? 'PASS' : 'FAIL';
|
|
console.log(` [${status}] ${tc.id} matched=${matched.length}/${tc.expected.length} extra=${extra.length} score=${score.toFixed(2)}`);
|
|
for (const m of missed) console.log(` MISS: ${m}`);
|
|
for (const e of extra) console.log(` EXTRA: ${e}`);
|
|
} catch (err: any) {
|
|
console.log(` [ERROR] ${tc.id}: ${err.message}`);
|
|
}
|
|
}
|
|
return totalScore / cases.length;
|
|
}
|
|
|
|
// ── Main ───────────────────────────────────────────────────────────────────
|
|
|
|
async function main() {
|
|
const args = process.argv.slice(2);
|
|
const tuneMode = args.includes('--tune');
|
|
const filterArg = args.find(a => !a.startsWith('--'))?.toLowerCase();
|
|
|
|
const combos = filterArg
|
|
? ALL_COMBOS.filter(c => c.label.includes(filterArg))
|
|
: ALL_COMBOS;
|
|
|
|
const tessdataDir = join('tools', 'OcrDaemon', 'bin', 'Release', 'net8.0-windows10.0.19041.0', 'tessdata');
|
|
const casesPath = join(tessdataDir, 'cases.json');
|
|
const cases: TestCase[] = JSON.parse(readFileSync(casesPath, 'utf-8'));
|
|
|
|
console.log(`Loaded ${cases.length} test cases: ${cases.map(c => c.id).join(', ')}`);
|
|
console.log(`Mode: ${tuneMode ? 'TUNE' : 'TEST'} Combos: ${combos.length}\n`);
|
|
|
|
const daemon = new OcrDaemon();
|
|
|
|
if (tuneMode) {
|
|
// ── Tune mode: coordinate descent for each combo ──
|
|
const tuneResults: TuneResult[] = [];
|
|
|
|
for (const combo of combos) {
|
|
console.log(`\n${'='.repeat(60)}`);
|
|
console.log(` TUNING: ${combo.label}`);
|
|
console.log(`${'='.repeat(60)}`);
|
|
|
|
try {
|
|
const result = await tuneCombo(daemon, cases, tessdataDir, combo);
|
|
tuneResults.push(result);
|
|
|
|
console.log(`\n Best: ${(result.score * 100).toFixed(1)}% (${result.evals} evals)`);
|
|
console.log(` Params: ${JSON.stringify(result.params)}`);
|
|
|
|
// Verbose run with best params
|
|
console.log('');
|
|
await testCombo(daemon, cases, tessdataDir, combo, result.params);
|
|
} catch (err: any) {
|
|
console.log(` ERROR: ${err.message}`);
|
|
tuneResults.push({ label: combo.label, score: 0, params: {}, evals: 0 });
|
|
}
|
|
}
|
|
|
|
// Summary
|
|
console.log(`\n${'='.repeat(70)}`);
|
|
console.log(' TUNE RESULTS');
|
|
console.log(`${'='.repeat(70)}`);
|
|
|
|
const sorted = tuneResults.sort((a, b) => b.score - a.score);
|
|
for (const r of sorted) {
|
|
const bar = '#'.repeat(Math.round(r.score * 40));
|
|
console.log(` ${r.label.padEnd(22)} ${(r.score * 100).toFixed(1).padStart(5)}% ${bar}`);
|
|
}
|
|
|
|
console.log(`\n BEST PARAMS PER COMBO:`);
|
|
for (const r of sorted) {
|
|
if (r.score > 0) {
|
|
console.log(` ${r.label.padEnd(22)} ${JSON.stringify(r.params)}`);
|
|
}
|
|
}
|
|
|
|
} else {
|
|
// ── Test mode: defaults only ──
|
|
const results: Record<string, number> = {};
|
|
|
|
for (const combo of combos) {
|
|
console.log(`\n${'='.repeat(60)}`);
|
|
console.log(` ${combo.label}`);
|
|
console.log(`${'='.repeat(60)}`);
|
|
|
|
try {
|
|
const score = await testCombo(daemon, cases, tessdataDir, combo);
|
|
results[combo.label] = score;
|
|
console.log(`\n Average: ${(score * 100).toFixed(1)}%`);
|
|
} catch (err: any) {
|
|
console.log(` ERROR: ${err.message}`);
|
|
results[combo.label] = 0;
|
|
}
|
|
}
|
|
|
|
console.log(`\n${'='.repeat(60)}`);
|
|
console.log(' SUMMARY');
|
|
console.log(`${'='.repeat(60)}`);
|
|
|
|
const sorted = Object.entries(results).sort((a, b) => b[1] - a[1]);
|
|
for (const [label, score] of sorted) {
|
|
const bar = '#'.repeat(Math.round(score * 40));
|
|
console.log(` ${label.padEnd(22)} ${(score * 100).toFixed(1).padStart(5)}% ${bar}`);
|
|
}
|
|
}
|
|
|
|
await daemon.stop();
|
|
}
|
|
|
|
main().catch(err => {
|
|
console.error(err);
|
|
process.exit(1);
|
|
});
|