deleted old
This commit is contained in:
parent
4a65c8e17b
commit
696fd07e86
33 changed files with 1 additions and 6292 deletions
|
|
@ -1,104 +0,0 @@
|
|||
import { spawn } from 'child_process';
|
||||
import { join } from 'path';
|
||||
|
||||
const EXE = join('tools', 'OcrDaemon', 'bin', 'Release', 'net8.0-windows10.0.19041.0', 'OcrDaemon.exe');
|
||||
const TESSDATA = join('tools', 'OcrDaemon', 'bin', 'Release', 'net8.0-windows10.0.19041.0', 'tessdata');
|
||||
const SAVE_DIR = join('tools', 'OcrDaemon', 'bin', 'Release', 'net8.0-windows10.0.19041.0', 'tessdata', 'images');
|
||||
|
||||
const expected = {
|
||||
vertex1: [
|
||||
'The Vertex', 'Tribal Mask', 'Helmet', 'Quality: +20%',
|
||||
'Evasion Rating: 79', 'Energy Shield: 34', 'Requires: Level 33',
|
||||
'16% Increased Life Regeneration Rate', 'Has no Attribute Requirements',
|
||||
'+15% to Chaos Resistance', 'Skill gems have no attribute requirements',
|
||||
'+3 to level of all skills', '15% increased mana cost efficiency',
|
||||
'Twice Corrupted', 'Asking Price:', '7x Divine Orb',
|
||||
],
|
||||
vertex2: [
|
||||
'The Vertex', 'Tribal Mask', 'Helmet', 'Quality: +20%',
|
||||
'Evasion Rating: 182', 'Energy Shield: 77', 'Requires: Level 33',
|
||||
'+29 To Spirit', '+1 to Level of All Minion Skills',
|
||||
'Has no Attribute Requirements', '130% increased Evasion and Energy Shield',
|
||||
'27% Increased Critical Hit Chance', '+13% to Chaos Resistance',
|
||||
'+2 to level of all skills', 'Twice Corrupted', 'Asking Price:', '35x Divine Orb',
|
||||
],
|
||||
};
|
||||
|
||||
function levenshteinSim(a, b) {
|
||||
a = a.toLowerCase(); b = b.toLowerCase();
|
||||
if (a === b) return 1;
|
||||
const la = a.length, lb = b.length;
|
||||
if (!la || !lb) return 0;
|
||||
const d = Array.from({ length: la + 1 }, (_, i) => { const r = new Array(lb + 1); r[0] = i; return r; });
|
||||
for (let j = 0; j <= lb; j++) d[0][j] = j;
|
||||
for (let i = 1; i <= la; i++)
|
||||
for (let j = 1; j <= lb; j++) {
|
||||
const cost = a[i-1] === b[j-1] ? 0 : 1;
|
||||
d[i][j] = Math.min(d[i-1][j]+1, d[i][j-1]+1, d[i-1][j-1]+cost);
|
||||
}
|
||||
return 1 - d[la][lb] / Math.max(la, lb);
|
||||
}
|
||||
|
||||
async function run() {
|
||||
const proc = spawn(EXE, [], { stdio: ['pipe', 'pipe', 'pipe'] });
|
||||
let buffer = '';
|
||||
let resolveNext;
|
||||
proc.stdout.on('data', (data) => {
|
||||
buffer += data.toString();
|
||||
let idx;
|
||||
while ((idx = buffer.indexOf('\n')) !== -1) {
|
||||
const line = buffer.slice(0, idx).trim();
|
||||
buffer = buffer.slice(idx + 1);
|
||||
if (!line) continue;
|
||||
try { const p = JSON.parse(line); if (resolveNext) { const r = resolveNext; resolveNext = null; r(p); } } catch {}
|
||||
}
|
||||
});
|
||||
proc.stderr.on('data', (data) => process.stderr.write(data));
|
||||
function sendCmd(cmd) { return new Promise((resolve) => { resolveNext = resolve; proc.stdin.write(JSON.stringify(cmd) + '\n'); }); }
|
||||
await new Promise((resolve) => { resolveNext = resolve; });
|
||||
|
||||
const cases = [
|
||||
{ id: 'vertex1', image: 'vertex1.png', snapshot: 'vertex-snapshot.png' },
|
||||
{ id: 'vertex2', image: 'vertex2.png', snapshot: 'vertex-snapshot.png' },
|
||||
];
|
||||
|
||||
for (const tc of cases) {
|
||||
const snapPath = join(TESSDATA, 'images', tc.snapshot);
|
||||
const imgPath = join(TESSDATA, 'images', tc.image);
|
||||
|
||||
// 3 runs: first saves crop, rest just timing
|
||||
for (let i = 0; i < 3; i++) {
|
||||
await sendCmd({ cmd: 'snapshot', file: snapPath });
|
||||
const savePath = i === 0 ? join(SAVE_DIR, `${tc.id}_easyocr_crop.png`) : undefined;
|
||||
const t0 = performance.now();
|
||||
const resp = await sendCmd({ cmd: 'diff-ocr', file: imgPath, engine: 'easyocr', ...(savePath ? { path: savePath } : {}) });
|
||||
const ms = (performance.now() - t0).toFixed(0);
|
||||
const region = resp.region;
|
||||
const lines = (resp.lines || []).map(l => l.text.trim()).filter(l => l.length > 0);
|
||||
|
||||
if (i === 0) {
|
||||
// Accuracy check on first run
|
||||
const exp = expected[tc.id];
|
||||
const used = new Set();
|
||||
let matched = 0, fuzzy = 0, missed = 0;
|
||||
for (const e of exp) {
|
||||
let bestIdx = -1, bestSim = 0;
|
||||
for (let j = 0; j < lines.length; j++) {
|
||||
if (used.has(j)) continue;
|
||||
const sim = levenshteinSim(e, lines[j]);
|
||||
if (sim > bestSim) { bestSim = sim; bestIdx = j; }
|
||||
}
|
||||
if (bestIdx >= 0 && bestSim >= 0.75) { used.add(bestIdx); if (bestSim >= 0.95) matched++; else fuzzy++; }
|
||||
else { missed++; console.log(` MISS: ${e}${bestIdx >= 0 ? ` (best: "${lines[bestIdx]}", sim=${bestSim.toFixed(2)})` : ''}`); }
|
||||
}
|
||||
console.log(`${tc.id}: ${ms}ms crop=${region?.width}x${region?.height} at (${region?.x},${region?.y}) ${matched} OK / ${fuzzy}~ / ${missed} miss lines=${lines.length}${savePath ? ' [saved]' : ''}`);
|
||||
} else {
|
||||
console.log(`${tc.id}: ${ms}ms crop=${region?.width}x${region?.height}`);
|
||||
}
|
||||
}
|
||||
console.log();
|
||||
}
|
||||
proc.stdin.end();
|
||||
proc.kill();
|
||||
}
|
||||
run().catch(console.error);
|
||||
|
|
@ -1,484 +0,0 @@
|
|||
/**
|
||||
* OCR test runner + parameter tuner.
|
||||
*
|
||||
* Usage:
|
||||
* npx tsx tools/test-ocr.ts # test all combos with defaults
|
||||
* npx tsx tools/test-ocr.ts paddleocr # filter to paddleocr combos
|
||||
* npx tsx tools/test-ocr.ts --tune # tune all combos (coordinate descent)
|
||||
* npx tsx tools/test-ocr.ts --tune easyocr # tune only easyocr combos
|
||||
*/
|
||||
import { OcrDaemon, type OcrEngine, type OcrPreprocess, type DiffOcrParams, type DiffCropParams, type OcrParams } from '../src/game/OcrDaemon.js';
|
||||
import { readFileSync } from 'fs';
|
||||
import { join } from 'path';
|
||||
|
||||
// ── Types ──────────────────────────────────────────────────────────────────
|
||||
|
||||
interface TestCase {
|
||||
id: string;
|
||||
image: string;
|
||||
fullImage: string;
|
||||
expected: string[];
|
||||
}
|
||||
|
||||
interface Combo {
|
||||
engine: OcrEngine;
|
||||
preprocess: OcrPreprocess;
|
||||
label: string;
|
||||
}
|
||||
|
||||
interface TuneResult {
|
||||
label: string;
|
||||
score: number;
|
||||
params: DiffOcrParams;
|
||||
evals: number;
|
||||
}
|
||||
|
||||
// ── Combos ─────────────────────────────────────────────────────────────────
|
||||
|
||||
const ALL_COMBOS: Combo[] = [
|
||||
{ engine: 'tesseract', preprocess: 'bgsub', label: 'tesseract+bgsub' },
|
||||
{ engine: 'tesseract', preprocess: 'tophat', label: 'tesseract+tophat' },
|
||||
{ engine: 'tesseract', preprocess: 'none', label: 'tesseract+none' },
|
||||
{ engine: 'easyocr', preprocess: 'bgsub', label: 'easyocr+bgsub' },
|
||||
{ engine: 'easyocr', preprocess: 'tophat', label: 'easyocr+tophat' },
|
||||
{ engine: 'easyocr', preprocess: 'none', label: 'easyocr+none' },
|
||||
{ engine: 'paddleocr', preprocess: 'bgsub', label: 'paddleocr+bgsub' },
|
||||
{ engine: 'paddleocr', preprocess: 'tophat', label: 'paddleocr+tophat' },
|
||||
{ engine: 'paddleocr', preprocess: 'none', label: 'paddleocr+none' },
|
||||
];
|
||||
|
||||
// ── Scoring ────────────────────────────────────────────────────────────────
|
||||
|
||||
function levenshtein(a: string, b: string): number {
|
||||
const m = a.length, n = b.length;
|
||||
const dp: number[][] = Array.from({ length: m + 1 }, () => Array(n + 1).fill(0));
|
||||
for (let i = 0; i <= m; i++) dp[i][0] = i;
|
||||
for (let j = 0; j <= n; j++) dp[0][j] = j;
|
||||
for (let i = 1; i <= m; i++)
|
||||
for (let j = 1; j <= n; j++)
|
||||
dp[i][j] = a[i - 1] === b[j - 1]
|
||||
? dp[i - 1][j - 1]
|
||||
: 1 + Math.min(dp[i - 1][j], dp[i][j - 1], dp[i - 1][j - 1]);
|
||||
return dp[m][n];
|
||||
}
|
||||
|
||||
function similarity(a: string, b: string): number {
|
||||
const maxLen = Math.max(a.length, b.length);
|
||||
if (maxLen === 0) return 1;
|
||||
return 1 - levenshtein(a.toLowerCase(), b.toLowerCase()) / maxLen;
|
||||
}
|
||||
|
||||
function scoreLines(expected: string[], actual: string[]): number {
|
||||
const used = new Set<number>();
|
||||
let matched = 0;
|
||||
for (const exp of expected) {
|
||||
let bestIdx = -1, bestSim = 0;
|
||||
for (let i = 0; i < actual.length; i++) {
|
||||
if (used.has(i)) continue;
|
||||
const sim = similarity(exp, actual[i]);
|
||||
if (sim > bestSim) { bestSim = sim; bestIdx = i; }
|
||||
}
|
||||
if (bestIdx >= 0 && bestSim >= 0.75) {
|
||||
matched++;
|
||||
used.add(bestIdx);
|
||||
}
|
||||
}
|
||||
return expected.length > 0 ? matched / expected.length : 1;
|
||||
}
|
||||
|
||||
function scoreLinesVerbose(expected: string[], actual: string[]): { matched: string[]; missed: string[]; extra: string[]; score: number } {
|
||||
const used = new Set<number>();
|
||||
const matched: string[] = [];
|
||||
const missed: string[] = [];
|
||||
for (const exp of expected) {
|
||||
let bestIdx = -1, bestSim = 0;
|
||||
for (let i = 0; i < actual.length; i++) {
|
||||
if (used.has(i)) continue;
|
||||
const sim = similarity(exp, actual[i]);
|
||||
if (sim > bestSim) { bestSim = sim; bestIdx = i; }
|
||||
}
|
||||
if (bestIdx >= 0 && bestSim >= 0.75) {
|
||||
matched.push(exp);
|
||||
used.add(bestIdx);
|
||||
} else {
|
||||
missed.push(exp);
|
||||
}
|
||||
}
|
||||
const extra = actual.filter((_, i) => !used.has(i));
|
||||
return { matched, missed, extra, score: expected.length > 0 ? matched.length / expected.length : 1 };
|
||||
}
|
||||
|
||||
// ── Daemon helpers ─────────────────────────────────────────────────────────
|
||||
|
||||
async function runCase(
|
||||
daemon: OcrDaemon,
|
||||
tc: TestCase,
|
||||
tessdataDir: string,
|
||||
engine: OcrEngine,
|
||||
preprocess: OcrPreprocess,
|
||||
params?: DiffOcrParams,
|
||||
): Promise<string[]> {
|
||||
const fullPath = join(tessdataDir, tc.fullImage).replace(/\//g, '\\');
|
||||
const imagePath = join(tessdataDir, tc.image).replace(/\//g, '\\');
|
||||
|
||||
await (daemon as any).sendWithRetry({ cmd: 'snapshot', file: fullPath }, 10_000);
|
||||
|
||||
const req: any = { cmd: 'diff-ocr', file: imagePath };
|
||||
if (engine !== 'tesseract') req.engine = engine;
|
||||
if (preprocess !== 'none') req.preprocess = preprocess;
|
||||
if (params && Object.keys(params).length > 0) req.params = params;
|
||||
|
||||
const timeout = engine !== 'tesseract' ? 120_000 : 10_000;
|
||||
const resp = await (daemon as any).sendWithRetry(req, timeout);
|
||||
|
||||
return (resp.lines ?? [])
|
||||
.map((l: any) => (l.text ?? '').trim())
|
||||
.filter((l: string) => l.length > 0);
|
||||
}
|
||||
|
||||
async function scoreCombo(
|
||||
daemon: OcrDaemon,
|
||||
cases: TestCase[],
|
||||
tessdataDir: string,
|
||||
engine: OcrEngine,
|
||||
preprocess: OcrPreprocess,
|
||||
params?: DiffOcrParams,
|
||||
): Promise<number> {
|
||||
let totalScore = 0;
|
||||
for (const tc of cases) {
|
||||
try {
|
||||
const actual = await runCase(daemon, tc, tessdataDir, engine, preprocess, params);
|
||||
totalScore += scoreLines(tc.expected, actual);
|
||||
} catch {
|
||||
// error = 0 score for this case
|
||||
}
|
||||
}
|
||||
return totalScore / cases.length;
|
||||
}
|
||||
|
||||
// ── Parameter sweep definitions ────────────────────────────────────────────
|
||||
|
||||
interface CropIntSweep {
|
||||
name: keyof DiffCropParams;
|
||||
values: number[];
|
||||
}
|
||||
|
||||
interface OcrIntSweep {
|
||||
name: keyof OcrParams;
|
||||
values: number[];
|
||||
}
|
||||
|
||||
interface OcrBoolSweep {
|
||||
name: keyof OcrParams;
|
||||
values: boolean[];
|
||||
}
|
||||
|
||||
const CROP_SWEEPS: CropIntSweep[] = [
|
||||
{ name: 'diffThresh', values: [10, 15, 20, 25, 30, 40, 50] },
|
||||
{ name: 'maxGap', values: [5, 10, 15, 20, 25, 30] },
|
||||
];
|
||||
|
||||
const CROP_TRIM_VALUES = [0.1, 0.15, 0.2, 0.25, 0.3, 0.4, 0.5];
|
||||
|
||||
const SHARED_OCR_SWEEPS: OcrIntSweep[] = [
|
||||
{ name: 'upscale', values: [1, 2, 3] },
|
||||
{ name: 'mergeGap', values: [0, 20, 40, 60, 80, 100] },
|
||||
];
|
||||
|
||||
const BGSUB_INT_SWEEPS: OcrIntSweep[] = [
|
||||
{ name: 'dimPercentile', values: [5, 10, 15, 20, 25, 30, 40, 50, 60] },
|
||||
{ name: 'textThresh', values: [10, 20, 30, 40, 50, 60, 80, 100] },
|
||||
];
|
||||
|
||||
const BGSUB_BOOL_SWEEPS: OcrBoolSweep[] = [
|
||||
{ name: 'softThreshold', values: [false, true] },
|
||||
];
|
||||
|
||||
const TOPHAT_SWEEPS: OcrIntSweep[] = [
|
||||
{ name: 'kernelSize', values: [11, 15, 21, 25, 31, 41, 51, 61] },
|
||||
];
|
||||
|
||||
// ── Default params per preprocess ──────────────────────────────────────────
|
||||
|
||||
function defaultParams(preprocess: OcrPreprocess): DiffOcrParams {
|
||||
const crop: DiffCropParams = { diffThresh: 20, maxGap: 20, trimCutoff: 0.4 };
|
||||
if (preprocess === 'bgsub') {
|
||||
return { crop, ocr: { useBackgroundSub: true, upscale: 2, dimPercentile: 40, textThresh: 60, softThreshold: false } };
|
||||
} else if (preprocess === 'tophat') {
|
||||
return { crop, ocr: { useBackgroundSub: false, upscale: 2, kernelSize: 41 } };
|
||||
}
|
||||
return { crop, ocr: { upscale: 2 } }; // none
|
||||
}
|
||||
|
||||
function cloneParams(p: DiffOcrParams): DiffOcrParams {
|
||||
return JSON.parse(JSON.stringify(p));
|
||||
}
|
||||
|
||||
// ── Coordinate descent tuner (two-phase: crop then OCR) ──────────────────
|
||||
|
||||
async function tuneCombo(
|
||||
daemon: OcrDaemon,
|
||||
cases: TestCase[],
|
||||
tessdataDir: string,
|
||||
combo: Combo,
|
||||
): Promise<TuneResult> {
|
||||
const params = defaultParams(combo.preprocess);
|
||||
let bestScore = await scoreCombo(daemon, cases, tessdataDir, combo.engine, combo.preprocess, params);
|
||||
let evals = 1;
|
||||
|
||||
process.stderr.write(` baseline: ${(bestScore * 100).toFixed(1)}% ${JSON.stringify(params)}\n`);
|
||||
|
||||
// ── Phase A: Tune crop params ──
|
||||
process.stderr.write(`\n === Phase A: Crop Params ===\n`);
|
||||
const MAX_ROUNDS = 3;
|
||||
|
||||
for (let round = 0; round < MAX_ROUNDS; round++) {
|
||||
let improved = false;
|
||||
process.stderr.write(` --- Crop Round ${round + 1} ---\n`);
|
||||
|
||||
for (const { name, values } of CROP_SWEEPS) {
|
||||
process.stderr.write(` crop.${name}: `);
|
||||
let bestVal: number | undefined;
|
||||
let bestValScore = -1;
|
||||
|
||||
for (const v of values) {
|
||||
const trial = cloneParams(params);
|
||||
(trial.crop as any)[name] = v;
|
||||
const score = await scoreCombo(daemon, cases, tessdataDir, combo.engine, combo.preprocess, trial);
|
||||
evals++;
|
||||
process.stderr.write(`${v}=${(score * 100).toFixed(1)} `);
|
||||
if (score > bestValScore) { bestValScore = score; bestVal = v; }
|
||||
}
|
||||
process.stderr.write('\n');
|
||||
|
||||
if (bestValScore > bestScore && bestVal !== undefined) {
|
||||
(params.crop as any)![name] = bestVal;
|
||||
bestScore = bestValScore;
|
||||
improved = true;
|
||||
process.stderr.write(` -> crop.${name}=${bestVal} score=${(bestScore * 100).toFixed(1)}%\n`);
|
||||
}
|
||||
}
|
||||
|
||||
// Sweep trimCutoff
|
||||
{
|
||||
process.stderr.write(` crop.trimCutoff: `);
|
||||
let bestTrim = params.crop?.trimCutoff ?? 0.2;
|
||||
let bestTrimScore = bestScore;
|
||||
|
||||
for (const v of CROP_TRIM_VALUES) {
|
||||
const trial = cloneParams(params);
|
||||
trial.crop!.trimCutoff = v;
|
||||
const score = await scoreCombo(daemon, cases, tessdataDir, combo.engine, combo.preprocess, trial);
|
||||
evals++;
|
||||
process.stderr.write(`${v}=${(score * 100).toFixed(1)} `);
|
||||
if (score > bestTrimScore) { bestTrimScore = score; bestTrim = v; }
|
||||
}
|
||||
process.stderr.write('\n');
|
||||
|
||||
if (bestTrimScore > bestScore) {
|
||||
params.crop!.trimCutoff = bestTrim;
|
||||
bestScore = bestTrimScore;
|
||||
improved = true;
|
||||
process.stderr.write(` -> crop.trimCutoff=${bestTrim} score=${(bestScore * 100).toFixed(1)}%\n`);
|
||||
}
|
||||
}
|
||||
|
||||
process.stderr.write(` End crop round ${round + 1}: ${(bestScore * 100).toFixed(1)}% (${evals} evals)\n`);
|
||||
if (!improved) break;
|
||||
}
|
||||
|
||||
// ── Phase B: Tune OCR params (crop is now locked) ──
|
||||
process.stderr.write(`\n === Phase B: OCR Params (crop locked) ===\n`);
|
||||
|
||||
const ocrIntSweeps: OcrIntSweep[] = [...SHARED_OCR_SWEEPS];
|
||||
const ocrBoolSweeps: OcrBoolSweep[] = [];
|
||||
if (combo.preprocess === 'bgsub') {
|
||||
ocrIntSweeps.push(...BGSUB_INT_SWEEPS);
|
||||
ocrBoolSweeps.push(...BGSUB_BOOL_SWEEPS);
|
||||
} else if (combo.preprocess === 'tophat') {
|
||||
ocrIntSweeps.push(...TOPHAT_SWEEPS);
|
||||
}
|
||||
|
||||
for (let round = 0; round < MAX_ROUNDS; round++) {
|
||||
let improved = false;
|
||||
process.stderr.write(` --- OCR Round ${round + 1} ---\n`);
|
||||
|
||||
for (const { name, values } of ocrIntSweeps) {
|
||||
process.stderr.write(` ocr.${name}: `);
|
||||
let bestVal: number | undefined;
|
||||
let bestValScore = -1;
|
||||
|
||||
for (const v of values) {
|
||||
const trial = cloneParams(params);
|
||||
(trial.ocr as any)[name] = v;
|
||||
const score = await scoreCombo(daemon, cases, tessdataDir, combo.engine, combo.preprocess, trial);
|
||||
evals++;
|
||||
process.stderr.write(`${v}=${(score * 100).toFixed(1)} `);
|
||||
if (score > bestValScore) { bestValScore = score; bestVal = v; }
|
||||
}
|
||||
process.stderr.write('\n');
|
||||
|
||||
if (bestValScore > bestScore && bestVal !== undefined) {
|
||||
(params.ocr as any)![name] = bestVal;
|
||||
bestScore = bestValScore;
|
||||
improved = true;
|
||||
process.stderr.write(` -> ocr.${name}=${bestVal} score=${(bestScore * 100).toFixed(1)}%\n`);
|
||||
}
|
||||
}
|
||||
|
||||
for (const { name, values } of ocrBoolSweeps) {
|
||||
process.stderr.write(` ocr.${name}: `);
|
||||
let bestVal: boolean | undefined;
|
||||
let bestValScore = -1;
|
||||
|
||||
for (const v of values) {
|
||||
const trial = cloneParams(params);
|
||||
(trial.ocr as any)[name] = v;
|
||||
const score = await scoreCombo(daemon, cases, tessdataDir, combo.engine, combo.preprocess, trial);
|
||||
evals++;
|
||||
process.stderr.write(`${v}=${(score * 100).toFixed(1)} `);
|
||||
if (score > bestValScore) { bestValScore = score; bestVal = v; }
|
||||
}
|
||||
process.stderr.write('\n');
|
||||
|
||||
if (bestValScore > bestScore && bestVal !== undefined) {
|
||||
(params.ocr as any)![name] = bestVal;
|
||||
bestScore = bestValScore;
|
||||
improved = true;
|
||||
process.stderr.write(` -> ocr.${name}=${bestVal} score=${(bestScore * 100).toFixed(1)}%\n`);
|
||||
}
|
||||
}
|
||||
|
||||
process.stderr.write(` End OCR round ${round + 1}: ${(bestScore * 100).toFixed(1)}% (${evals} evals)\n`);
|
||||
if (!improved) break;
|
||||
}
|
||||
|
||||
return { label: combo.label, score: bestScore, params, evals };
|
||||
}
|
||||
|
||||
// ── Verbose test run ───────────────────────────────────────────────────────
|
||||
|
||||
async function testCombo(
|
||||
daemon: OcrDaemon,
|
||||
cases: TestCase[],
|
||||
tessdataDir: string,
|
||||
combo: Combo,
|
||||
params?: DiffOcrParams,
|
||||
): Promise<number> {
|
||||
let totalScore = 0;
|
||||
for (const tc of cases) {
|
||||
try {
|
||||
const actual = await runCase(daemon, tc, tessdataDir, combo.engine, combo.preprocess, params);
|
||||
const { matched, missed, extra, score } = scoreLinesVerbose(tc.expected, actual);
|
||||
totalScore += score;
|
||||
const status = missed.length === 0 ? 'PASS' : 'FAIL';
|
||||
console.log(` [${status}] ${tc.id} matched=${matched.length}/${tc.expected.length} extra=${extra.length} score=${score.toFixed(2)}`);
|
||||
for (const m of missed) console.log(` MISS: ${m}`);
|
||||
for (const e of extra) console.log(` EXTRA: ${e}`);
|
||||
} catch (err: any) {
|
||||
console.log(` [ERROR] ${tc.id}: ${err.message}`);
|
||||
}
|
||||
}
|
||||
return totalScore / cases.length;
|
||||
}
|
||||
|
||||
// ── Main ───────────────────────────────────────────────────────────────────
|
||||
|
||||
async function main() {
|
||||
const args = process.argv.slice(2);
|
||||
const tuneMode = args.includes('--tune');
|
||||
const filterArg = args.find(a => !a.startsWith('--'))?.toLowerCase();
|
||||
|
||||
const combos = filterArg
|
||||
? ALL_COMBOS.filter(c => c.label.includes(filterArg))
|
||||
: ALL_COMBOS;
|
||||
|
||||
const tessdataDir = join('tools', 'OcrDaemon', 'bin', 'Release', 'net8.0-windows10.0.19041.0', 'tessdata');
|
||||
const casesPath = join(tessdataDir, 'cases.json');
|
||||
const cases: TestCase[] = JSON.parse(readFileSync(casesPath, 'utf-8'));
|
||||
|
||||
console.log(`Loaded ${cases.length} test cases: ${cases.map(c => c.id).join(', ')}`);
|
||||
console.log(`Mode: ${tuneMode ? 'TUNE' : 'TEST'} Combos: ${combos.length}\n`);
|
||||
|
||||
const daemon = new OcrDaemon();
|
||||
|
||||
if (tuneMode) {
|
||||
// ── Tune mode: coordinate descent for each combo ──
|
||||
const tuneResults: TuneResult[] = [];
|
||||
|
||||
for (const combo of combos) {
|
||||
console.log(`\n${'='.repeat(60)}`);
|
||||
console.log(` TUNING: ${combo.label}`);
|
||||
console.log(`${'='.repeat(60)}`);
|
||||
|
||||
try {
|
||||
const result = await tuneCombo(daemon, cases, tessdataDir, combo);
|
||||
tuneResults.push(result);
|
||||
|
||||
console.log(`\n Best: ${(result.score * 100).toFixed(1)}% (${result.evals} evals)`);
|
||||
console.log(` Params: ${JSON.stringify(result.params)}`);
|
||||
|
||||
// Verbose run with best params
|
||||
console.log('');
|
||||
await testCombo(daemon, cases, tessdataDir, combo, result.params);
|
||||
} catch (err: any) {
|
||||
console.log(` ERROR: ${err.message}`);
|
||||
tuneResults.push({ label: combo.label, score: 0, params: {}, evals: 0 });
|
||||
}
|
||||
}
|
||||
|
||||
// Summary
|
||||
console.log(`\n${'='.repeat(70)}`);
|
||||
console.log(' TUNE RESULTS');
|
||||
console.log(`${'='.repeat(70)}`);
|
||||
|
||||
const sorted = tuneResults.sort((a, b) => b.score - a.score);
|
||||
for (const r of sorted) {
|
||||
const bar = '#'.repeat(Math.round(r.score * 40));
|
||||
console.log(` ${r.label.padEnd(22)} ${(r.score * 100).toFixed(1).padStart(5)}% ${bar}`);
|
||||
}
|
||||
|
||||
console.log(`\n BEST PARAMS PER COMBO:`);
|
||||
for (const r of sorted) {
|
||||
if (r.score > 0) {
|
||||
console.log(` ${r.label.padEnd(22)} ${JSON.stringify(r.params)}`);
|
||||
}
|
||||
}
|
||||
|
||||
} else {
|
||||
// ── Test mode: defaults only ──
|
||||
const results: Record<string, number> = {};
|
||||
|
||||
for (const combo of combos) {
|
||||
console.log(`\n${'='.repeat(60)}`);
|
||||
console.log(` ${combo.label}`);
|
||||
console.log(`${'='.repeat(60)}`);
|
||||
|
||||
try {
|
||||
const score = await testCombo(daemon, cases, tessdataDir, combo);
|
||||
results[combo.label] = score;
|
||||
console.log(`\n Average: ${(score * 100).toFixed(1)}%`);
|
||||
} catch (err: any) {
|
||||
console.log(` ERROR: ${err.message}`);
|
||||
results[combo.label] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`\n${'='.repeat(60)}`);
|
||||
console.log(' SUMMARY');
|
||||
console.log(`${'='.repeat(60)}`);
|
||||
|
||||
const sorted = Object.entries(results).sort((a, b) => b[1] - a[1]);
|
||||
for (const [label, score] of sorted) {
|
||||
const bar = '#'.repeat(Math.round(score * 40));
|
||||
console.log(` ${label.padEnd(22)} ${(score * 100).toFixed(1).padStart(5)}% ${bar}`);
|
||||
}
|
||||
}
|
||||
|
||||
await daemon.stop();
|
||||
}
|
||||
|
||||
main().catch(err => {
|
||||
console.error(err);
|
||||
process.exit(1);
|
||||
});
|
||||
Loading…
Add table
Add a link
Reference in a new issue