/** * OCR test runner + parameter tuner. * * Usage: * npx tsx tools/test-ocr.ts # test all combos with defaults * npx tsx tools/test-ocr.ts paddleocr # filter to paddleocr combos * npx tsx tools/test-ocr.ts --tune # tune all combos (coordinate descent) * npx tsx tools/test-ocr.ts --tune easyocr # tune only easyocr combos */ import { OcrDaemon, type OcrEngine, type OcrPreprocess, type DiffOcrParams, type DiffCropParams, type OcrParams } from '../src/game/OcrDaemon.js'; import { readFileSync } from 'fs'; import { join } from 'path'; // ── Types ────────────────────────────────────────────────────────────────── interface TestCase { id: string; image: string; fullImage: string; expected: string[]; } interface Combo { engine: OcrEngine; preprocess: OcrPreprocess; label: string; } interface TuneResult { label: string; score: number; params: DiffOcrParams; evals: number; } // ── Combos ───────────────────────────────────────────────────────────────── const ALL_COMBOS: Combo[] = [ { engine: 'tesseract', preprocess: 'bgsub', label: 'tesseract+bgsub' }, { engine: 'tesseract', preprocess: 'tophat', label: 'tesseract+tophat' }, { engine: 'tesseract', preprocess: 'none', label: 'tesseract+none' }, { engine: 'easyocr', preprocess: 'bgsub', label: 'easyocr+bgsub' }, { engine: 'easyocr', preprocess: 'tophat', label: 'easyocr+tophat' }, { engine: 'easyocr', preprocess: 'none', label: 'easyocr+none' }, { engine: 'paddleocr', preprocess: 'bgsub', label: 'paddleocr+bgsub' }, { engine: 'paddleocr', preprocess: 'tophat', label: 'paddleocr+tophat' }, { engine: 'paddleocr', preprocess: 'none', label: 'paddleocr+none' }, ]; // ── Scoring ──────────────────────────────────────────────────────────────── function levenshtein(a: string, b: string): number { const m = a.length, n = b.length; const dp: number[][] = Array.from({ length: m + 1 }, () => Array(n + 1).fill(0)); for (let i = 0; i <= m; i++) dp[i][0] = i; for (let j = 0; j <= n; j++) dp[0][j] = j; for (let i = 1; i <= m; i++) for (let j = 1; j <= n; j++) dp[i][j] = a[i - 1] === b[j - 1] ? dp[i - 1][j - 1] : 1 + Math.min(dp[i - 1][j], dp[i][j - 1], dp[i - 1][j - 1]); return dp[m][n]; } function similarity(a: string, b: string): number { const maxLen = Math.max(a.length, b.length); if (maxLen === 0) return 1; return 1 - levenshtein(a.toLowerCase(), b.toLowerCase()) / maxLen; } function scoreLines(expected: string[], actual: string[]): number { const used = new Set(); let matched = 0; for (const exp of expected) { let bestIdx = -1, bestSim = 0; for (let i = 0; i < actual.length; i++) { if (used.has(i)) continue; const sim = similarity(exp, actual[i]); if (sim > bestSim) { bestSim = sim; bestIdx = i; } } if (bestIdx >= 0 && bestSim >= 0.75) { matched++; used.add(bestIdx); } } return expected.length > 0 ? matched / expected.length : 1; } function scoreLinesVerbose(expected: string[], actual: string[]): { matched: string[]; missed: string[]; extra: string[]; score: number } { const used = new Set(); const matched: string[] = []; const missed: string[] = []; for (const exp of expected) { let bestIdx = -1, bestSim = 0; for (let i = 0; i < actual.length; i++) { if (used.has(i)) continue; const sim = similarity(exp, actual[i]); if (sim > bestSim) { bestSim = sim; bestIdx = i; } } if (bestIdx >= 0 && bestSim >= 0.75) { matched.push(exp); used.add(bestIdx); } else { missed.push(exp); } } const extra = actual.filter((_, i) => !used.has(i)); return { matched, missed, extra, score: expected.length > 0 ? matched.length / expected.length : 1 }; } // ── Daemon helpers ───────────────────────────────────────────────────────── async function runCase( daemon: OcrDaemon, tc: TestCase, tessdataDir: string, engine: OcrEngine, preprocess: OcrPreprocess, params?: DiffOcrParams, ): Promise { const fullPath = join(tessdataDir, tc.fullImage).replace(/\//g, '\\'); const imagePath = join(tessdataDir, tc.image).replace(/\//g, '\\'); await (daemon as any).sendWithRetry({ cmd: 'snapshot', file: fullPath }, 10_000); const req: any = { cmd: 'diff-ocr', file: imagePath }; if (engine !== 'tesseract') req.engine = engine; if (preprocess !== 'none') req.preprocess = preprocess; if (params && Object.keys(params).length > 0) req.params = params; const timeout = engine !== 'tesseract' ? 120_000 : 10_000; const resp = await (daemon as any).sendWithRetry(req, timeout); return (resp.lines ?? []) .map((l: any) => (l.text ?? '').trim()) .filter((l: string) => l.length > 0); } async function scoreCombo( daemon: OcrDaemon, cases: TestCase[], tessdataDir: string, engine: OcrEngine, preprocess: OcrPreprocess, params?: DiffOcrParams, ): Promise { let totalScore = 0; for (const tc of cases) { try { const actual = await runCase(daemon, tc, tessdataDir, engine, preprocess, params); totalScore += scoreLines(tc.expected, actual); } catch { // error = 0 score for this case } } return totalScore / cases.length; } // ── Parameter sweep definitions ──────────────────────────────────────────── interface CropIntSweep { name: keyof DiffCropParams; values: number[]; } interface OcrIntSweep { name: keyof OcrParams; values: number[]; } interface OcrBoolSweep { name: keyof OcrParams; values: boolean[]; } const CROP_SWEEPS: CropIntSweep[] = [ { name: 'diffThresh', values: [10, 15, 20, 25, 30, 40, 50] }, { name: 'maxGap', values: [5, 10, 15, 20, 25, 30] }, ]; const CROP_TRIM_VALUES = [0.1, 0.15, 0.2, 0.25, 0.3, 0.4, 0.5]; const SHARED_OCR_SWEEPS: OcrIntSweep[] = [ { name: 'upscale', values: [1, 2, 3] }, { name: 'mergeGap', values: [0, 20, 40, 60, 80, 100] }, ]; const BGSUB_INT_SWEEPS: OcrIntSweep[] = [ { name: 'dimPercentile', values: [5, 10, 15, 20, 25, 30, 40, 50, 60] }, { name: 'textThresh', values: [10, 20, 30, 40, 50, 60, 80, 100] }, ]; const BGSUB_BOOL_SWEEPS: OcrBoolSweep[] = [ { name: 'softThreshold', values: [false, true] }, ]; const TOPHAT_SWEEPS: OcrIntSweep[] = [ { name: 'kernelSize', values: [11, 15, 21, 25, 31, 41, 51, 61] }, ]; // ── Default params per preprocess ────────────────────────────────────────── function defaultParams(preprocess: OcrPreprocess): DiffOcrParams { const crop: DiffCropParams = { diffThresh: 20, maxGap: 20, trimCutoff: 0.4 }; if (preprocess === 'bgsub') { return { crop, ocr: { useBackgroundSub: true, upscale: 2, dimPercentile: 40, textThresh: 60, softThreshold: false } }; } else if (preprocess === 'tophat') { return { crop, ocr: { useBackgroundSub: false, upscale: 2, kernelSize: 41 } }; } return { crop, ocr: { upscale: 2 } }; // none } function cloneParams(p: DiffOcrParams): DiffOcrParams { return JSON.parse(JSON.stringify(p)); } // ── Coordinate descent tuner (two-phase: crop then OCR) ────────────────── async function tuneCombo( daemon: OcrDaemon, cases: TestCase[], tessdataDir: string, combo: Combo, ): Promise { const params = defaultParams(combo.preprocess); let bestScore = await scoreCombo(daemon, cases, tessdataDir, combo.engine, combo.preprocess, params); let evals = 1; process.stderr.write(` baseline: ${(bestScore * 100).toFixed(1)}% ${JSON.stringify(params)}\n`); // ── Phase A: Tune crop params ── process.stderr.write(`\n === Phase A: Crop Params ===\n`); const MAX_ROUNDS = 3; for (let round = 0; round < MAX_ROUNDS; round++) { let improved = false; process.stderr.write(` --- Crop Round ${round + 1} ---\n`); for (const { name, values } of CROP_SWEEPS) { process.stderr.write(` crop.${name}: `); let bestVal: number | undefined; let bestValScore = -1; for (const v of values) { const trial = cloneParams(params); (trial.crop as any)[name] = v; const score = await scoreCombo(daemon, cases, tessdataDir, combo.engine, combo.preprocess, trial); evals++; process.stderr.write(`${v}=${(score * 100).toFixed(1)} `); if (score > bestValScore) { bestValScore = score; bestVal = v; } } process.stderr.write('\n'); if (bestValScore > bestScore && bestVal !== undefined) { (params.crop as any)![name] = bestVal; bestScore = bestValScore; improved = true; process.stderr.write(` -> crop.${name}=${bestVal} score=${(bestScore * 100).toFixed(1)}%\n`); } } // Sweep trimCutoff { process.stderr.write(` crop.trimCutoff: `); let bestTrim = params.crop?.trimCutoff ?? 0.2; let bestTrimScore = bestScore; for (const v of CROP_TRIM_VALUES) { const trial = cloneParams(params); trial.crop!.trimCutoff = v; const score = await scoreCombo(daemon, cases, tessdataDir, combo.engine, combo.preprocess, trial); evals++; process.stderr.write(`${v}=${(score * 100).toFixed(1)} `); if (score > bestTrimScore) { bestTrimScore = score; bestTrim = v; } } process.stderr.write('\n'); if (bestTrimScore > bestScore) { params.crop!.trimCutoff = bestTrim; bestScore = bestTrimScore; improved = true; process.stderr.write(` -> crop.trimCutoff=${bestTrim} score=${(bestScore * 100).toFixed(1)}%\n`); } } process.stderr.write(` End crop round ${round + 1}: ${(bestScore * 100).toFixed(1)}% (${evals} evals)\n`); if (!improved) break; } // ── Phase B: Tune OCR params (crop is now locked) ── process.stderr.write(`\n === Phase B: OCR Params (crop locked) ===\n`); const ocrIntSweeps: OcrIntSweep[] = [...SHARED_OCR_SWEEPS]; const ocrBoolSweeps: OcrBoolSweep[] = []; if (combo.preprocess === 'bgsub') { ocrIntSweeps.push(...BGSUB_INT_SWEEPS); ocrBoolSweeps.push(...BGSUB_BOOL_SWEEPS); } else if (combo.preprocess === 'tophat') { ocrIntSweeps.push(...TOPHAT_SWEEPS); } for (let round = 0; round < MAX_ROUNDS; round++) { let improved = false; process.stderr.write(` --- OCR Round ${round + 1} ---\n`); for (const { name, values } of ocrIntSweeps) { process.stderr.write(` ocr.${name}: `); let bestVal: number | undefined; let bestValScore = -1; for (const v of values) { const trial = cloneParams(params); (trial.ocr as any)[name] = v; const score = await scoreCombo(daemon, cases, tessdataDir, combo.engine, combo.preprocess, trial); evals++; process.stderr.write(`${v}=${(score * 100).toFixed(1)} `); if (score > bestValScore) { bestValScore = score; bestVal = v; } } process.stderr.write('\n'); if (bestValScore > bestScore && bestVal !== undefined) { (params.ocr as any)![name] = bestVal; bestScore = bestValScore; improved = true; process.stderr.write(` -> ocr.${name}=${bestVal} score=${(bestScore * 100).toFixed(1)}%\n`); } } for (const { name, values } of ocrBoolSweeps) { process.stderr.write(` ocr.${name}: `); let bestVal: boolean | undefined; let bestValScore = -1; for (const v of values) { const trial = cloneParams(params); (trial.ocr as any)[name] = v; const score = await scoreCombo(daemon, cases, tessdataDir, combo.engine, combo.preprocess, trial); evals++; process.stderr.write(`${v}=${(score * 100).toFixed(1)} `); if (score > bestValScore) { bestValScore = score; bestVal = v; } } process.stderr.write('\n'); if (bestValScore > bestScore && bestVal !== undefined) { (params.ocr as any)![name] = bestVal; bestScore = bestValScore; improved = true; process.stderr.write(` -> ocr.${name}=${bestVal} score=${(bestScore * 100).toFixed(1)}%\n`); } } process.stderr.write(` End OCR round ${round + 1}: ${(bestScore * 100).toFixed(1)}% (${evals} evals)\n`); if (!improved) break; } return { label: combo.label, score: bestScore, params, evals }; } // ── Verbose test run ─────────────────────────────────────────────────────── async function testCombo( daemon: OcrDaemon, cases: TestCase[], tessdataDir: string, combo: Combo, params?: DiffOcrParams, ): Promise { let totalScore = 0; for (const tc of cases) { try { const actual = await runCase(daemon, tc, tessdataDir, combo.engine, combo.preprocess, params); const { matched, missed, extra, score } = scoreLinesVerbose(tc.expected, actual); totalScore += score; const status = missed.length === 0 ? 'PASS' : 'FAIL'; console.log(` [${status}] ${tc.id} matched=${matched.length}/${tc.expected.length} extra=${extra.length} score=${score.toFixed(2)}`); for (const m of missed) console.log(` MISS: ${m}`); for (const e of extra) console.log(` EXTRA: ${e}`); } catch (err: any) { console.log(` [ERROR] ${tc.id}: ${err.message}`); } } return totalScore / cases.length; } // ── Main ─────────────────────────────────────────────────────────────────── async function main() { const args = process.argv.slice(2); const tuneMode = args.includes('--tune'); const filterArg = args.find(a => !a.startsWith('--'))?.toLowerCase(); const combos = filterArg ? ALL_COMBOS.filter(c => c.label.includes(filterArg)) : ALL_COMBOS; const tessdataDir = join('tools', 'OcrDaemon', 'bin', 'Release', 'net8.0-windows10.0.19041.0', 'tessdata'); const casesPath = join(tessdataDir, 'cases.json'); const cases: TestCase[] = JSON.parse(readFileSync(casesPath, 'utf-8')); console.log(`Loaded ${cases.length} test cases: ${cases.map(c => c.id).join(', ')}`); console.log(`Mode: ${tuneMode ? 'TUNE' : 'TEST'} Combos: ${combos.length}\n`); const daemon = new OcrDaemon(); if (tuneMode) { // ── Tune mode: coordinate descent for each combo ── const tuneResults: TuneResult[] = []; for (const combo of combos) { console.log(`\n${'='.repeat(60)}`); console.log(` TUNING: ${combo.label}`); console.log(`${'='.repeat(60)}`); try { const result = await tuneCombo(daemon, cases, tessdataDir, combo); tuneResults.push(result); console.log(`\n Best: ${(result.score * 100).toFixed(1)}% (${result.evals} evals)`); console.log(` Params: ${JSON.stringify(result.params)}`); // Verbose run with best params console.log(''); await testCombo(daemon, cases, tessdataDir, combo, result.params); } catch (err: any) { console.log(` ERROR: ${err.message}`); tuneResults.push({ label: combo.label, score: 0, params: {}, evals: 0 }); } } // Summary console.log(`\n${'='.repeat(70)}`); console.log(' TUNE RESULTS'); console.log(`${'='.repeat(70)}`); const sorted = tuneResults.sort((a, b) => b.score - a.score); for (const r of sorted) { const bar = '#'.repeat(Math.round(r.score * 40)); console.log(` ${r.label.padEnd(22)} ${(r.score * 100).toFixed(1).padStart(5)}% ${bar}`); } console.log(`\n BEST PARAMS PER COMBO:`); for (const r of sorted) { if (r.score > 0) { console.log(` ${r.label.padEnd(22)} ${JSON.stringify(r.params)}`); } } } else { // ── Test mode: defaults only ── const results: Record = {}; for (const combo of combos) { console.log(`\n${'='.repeat(60)}`); console.log(` ${combo.label}`); console.log(`${'='.repeat(60)}`); try { const score = await testCombo(daemon, cases, tessdataDir, combo); results[combo.label] = score; console.log(`\n Average: ${(score * 100).toFixed(1)}%`); } catch (err: any) { console.log(` ERROR: ${err.message}`); results[combo.label] = 0; } } console.log(`\n${'='.repeat(60)}`); console.log(' SUMMARY'); console.log(`${'='.repeat(60)}`); const sorted = Object.entries(results).sort((a, b) => b[1] - a[1]); for (const [label, score] of sorted) { const bar = '#'.repeat(Math.round(score * 40)); console.log(` ${label.padEnd(22)} ${(score * 100).toFixed(1).padStart(5)}% ${bar}`); } } await daemon.stop(); } main().catch(err => { console.error(err); process.exit(1); });