deleted old

This commit is contained in:
Boki 2026-02-13 01:27:20 -05:00
parent 4a65c8e17b
commit 696fd07e86
33 changed files with 1 additions and 6292 deletions

View file

@ -1,104 +0,0 @@
import { spawn } from 'child_process';
import { join } from 'path';
const EXE = join('tools', 'OcrDaemon', 'bin', 'Release', 'net8.0-windows10.0.19041.0', 'OcrDaemon.exe');
const TESSDATA = join('tools', 'OcrDaemon', 'bin', 'Release', 'net8.0-windows10.0.19041.0', 'tessdata');
const SAVE_DIR = join('tools', 'OcrDaemon', 'bin', 'Release', 'net8.0-windows10.0.19041.0', 'tessdata', 'images');
const expected = {
vertex1: [
'The Vertex', 'Tribal Mask', 'Helmet', 'Quality: +20%',
'Evasion Rating: 79', 'Energy Shield: 34', 'Requires: Level 33',
'16% Increased Life Regeneration Rate', 'Has no Attribute Requirements',
'+15% to Chaos Resistance', 'Skill gems have no attribute requirements',
'+3 to level of all skills', '15% increased mana cost efficiency',
'Twice Corrupted', 'Asking Price:', '7x Divine Orb',
],
vertex2: [
'The Vertex', 'Tribal Mask', 'Helmet', 'Quality: +20%',
'Evasion Rating: 182', 'Energy Shield: 77', 'Requires: Level 33',
'+29 To Spirit', '+1 to Level of All Minion Skills',
'Has no Attribute Requirements', '130% increased Evasion and Energy Shield',
'27% Increased Critical Hit Chance', '+13% to Chaos Resistance',
'+2 to level of all skills', 'Twice Corrupted', 'Asking Price:', '35x Divine Orb',
],
};
function levenshteinSim(a, b) {
a = a.toLowerCase(); b = b.toLowerCase();
if (a === b) return 1;
const la = a.length, lb = b.length;
if (!la || !lb) return 0;
const d = Array.from({ length: la + 1 }, (_, i) => { const r = new Array(lb + 1); r[0] = i; return r; });
for (let j = 0; j <= lb; j++) d[0][j] = j;
for (let i = 1; i <= la; i++)
for (let j = 1; j <= lb; j++) {
const cost = a[i-1] === b[j-1] ? 0 : 1;
d[i][j] = Math.min(d[i-1][j]+1, d[i][j-1]+1, d[i-1][j-1]+cost);
}
return 1 - d[la][lb] / Math.max(la, lb);
}
async function run() {
const proc = spawn(EXE, [], { stdio: ['pipe', 'pipe', 'pipe'] });
let buffer = '';
let resolveNext;
proc.stdout.on('data', (data) => {
buffer += data.toString();
let idx;
while ((idx = buffer.indexOf('\n')) !== -1) {
const line = buffer.slice(0, idx).trim();
buffer = buffer.slice(idx + 1);
if (!line) continue;
try { const p = JSON.parse(line); if (resolveNext) { const r = resolveNext; resolveNext = null; r(p); } } catch {}
}
});
proc.stderr.on('data', (data) => process.stderr.write(data));
function sendCmd(cmd) { return new Promise((resolve) => { resolveNext = resolve; proc.stdin.write(JSON.stringify(cmd) + '\n'); }); }
await new Promise((resolve) => { resolveNext = resolve; });
const cases = [
{ id: 'vertex1', image: 'vertex1.png', snapshot: 'vertex-snapshot.png' },
{ id: 'vertex2', image: 'vertex2.png', snapshot: 'vertex-snapshot.png' },
];
for (const tc of cases) {
const snapPath = join(TESSDATA, 'images', tc.snapshot);
const imgPath = join(TESSDATA, 'images', tc.image);
// 3 runs: first saves crop, rest just timing
for (let i = 0; i < 3; i++) {
await sendCmd({ cmd: 'snapshot', file: snapPath });
const savePath = i === 0 ? join(SAVE_DIR, `${tc.id}_easyocr_crop.png`) : undefined;
const t0 = performance.now();
const resp = await sendCmd({ cmd: 'diff-ocr', file: imgPath, engine: 'easyocr', ...(savePath ? { path: savePath } : {}) });
const ms = (performance.now() - t0).toFixed(0);
const region = resp.region;
const lines = (resp.lines || []).map(l => l.text.trim()).filter(l => l.length > 0);
if (i === 0) {
// Accuracy check on first run
const exp = expected[tc.id];
const used = new Set();
let matched = 0, fuzzy = 0, missed = 0;
for (const e of exp) {
let bestIdx = -1, bestSim = 0;
for (let j = 0; j < lines.length; j++) {
if (used.has(j)) continue;
const sim = levenshteinSim(e, lines[j]);
if (sim > bestSim) { bestSim = sim; bestIdx = j; }
}
if (bestIdx >= 0 && bestSim >= 0.75) { used.add(bestIdx); if (bestSim >= 0.95) matched++; else fuzzy++; }
else { missed++; console.log(` MISS: ${e}${bestIdx >= 0 ? ` (best: "${lines[bestIdx]}", sim=${bestSim.toFixed(2)})` : ''}`); }
}
console.log(`${tc.id}: ${ms}ms crop=${region?.width}x${region?.height} at (${region?.x},${region?.y}) ${matched} OK / ${fuzzy}~ / ${missed} miss lines=${lines.length}${savePath ? ' [saved]' : ''}`);
} else {
console.log(`${tc.id}: ${ms}ms crop=${region?.width}x${region?.height}`);
}
}
console.log();
}
proc.stdin.end();
proc.kill();
}
run().catch(console.error);

View file

@ -1,484 +0,0 @@
/**
* OCR test runner + parameter tuner.
*
* Usage:
* npx tsx tools/test-ocr.ts # test all combos with defaults
* npx tsx tools/test-ocr.ts paddleocr # filter to paddleocr combos
* npx tsx tools/test-ocr.ts --tune # tune all combos (coordinate descent)
* npx tsx tools/test-ocr.ts --tune easyocr # tune only easyocr combos
*/
import { OcrDaemon, type OcrEngine, type OcrPreprocess, type DiffOcrParams, type DiffCropParams, type OcrParams } from '../src/game/OcrDaemon.js';
import { readFileSync } from 'fs';
import { join } from 'path';
// ── Types ──────────────────────────────────────────────────────────────────
interface TestCase {
id: string;
image: string;
fullImage: string;
expected: string[];
}
interface Combo {
engine: OcrEngine;
preprocess: OcrPreprocess;
label: string;
}
interface TuneResult {
label: string;
score: number;
params: DiffOcrParams;
evals: number;
}
// ── Combos ─────────────────────────────────────────────────────────────────
const ALL_COMBOS: Combo[] = [
{ engine: 'tesseract', preprocess: 'bgsub', label: 'tesseract+bgsub' },
{ engine: 'tesseract', preprocess: 'tophat', label: 'tesseract+tophat' },
{ engine: 'tesseract', preprocess: 'none', label: 'tesseract+none' },
{ engine: 'easyocr', preprocess: 'bgsub', label: 'easyocr+bgsub' },
{ engine: 'easyocr', preprocess: 'tophat', label: 'easyocr+tophat' },
{ engine: 'easyocr', preprocess: 'none', label: 'easyocr+none' },
{ engine: 'paddleocr', preprocess: 'bgsub', label: 'paddleocr+bgsub' },
{ engine: 'paddleocr', preprocess: 'tophat', label: 'paddleocr+tophat' },
{ engine: 'paddleocr', preprocess: 'none', label: 'paddleocr+none' },
];
// ── Scoring ────────────────────────────────────────────────────────────────
function levenshtein(a: string, b: string): number {
const m = a.length, n = b.length;
const dp: number[][] = Array.from({ length: m + 1 }, () => Array(n + 1).fill(0));
for (let i = 0; i <= m; i++) dp[i][0] = i;
for (let j = 0; j <= n; j++) dp[0][j] = j;
for (let i = 1; i <= m; i++)
for (let j = 1; j <= n; j++)
dp[i][j] = a[i - 1] === b[j - 1]
? dp[i - 1][j - 1]
: 1 + Math.min(dp[i - 1][j], dp[i][j - 1], dp[i - 1][j - 1]);
return dp[m][n];
}
function similarity(a: string, b: string): number {
const maxLen = Math.max(a.length, b.length);
if (maxLen === 0) return 1;
return 1 - levenshtein(a.toLowerCase(), b.toLowerCase()) / maxLen;
}
function scoreLines(expected: string[], actual: string[]): number {
const used = new Set<number>();
let matched = 0;
for (const exp of expected) {
let bestIdx = -1, bestSim = 0;
for (let i = 0; i < actual.length; i++) {
if (used.has(i)) continue;
const sim = similarity(exp, actual[i]);
if (sim > bestSim) { bestSim = sim; bestIdx = i; }
}
if (bestIdx >= 0 && bestSim >= 0.75) {
matched++;
used.add(bestIdx);
}
}
return expected.length > 0 ? matched / expected.length : 1;
}
function scoreLinesVerbose(expected: string[], actual: string[]): { matched: string[]; missed: string[]; extra: string[]; score: number } {
const used = new Set<number>();
const matched: string[] = [];
const missed: string[] = [];
for (const exp of expected) {
let bestIdx = -1, bestSim = 0;
for (let i = 0; i < actual.length; i++) {
if (used.has(i)) continue;
const sim = similarity(exp, actual[i]);
if (sim > bestSim) { bestSim = sim; bestIdx = i; }
}
if (bestIdx >= 0 && bestSim >= 0.75) {
matched.push(exp);
used.add(bestIdx);
} else {
missed.push(exp);
}
}
const extra = actual.filter((_, i) => !used.has(i));
return { matched, missed, extra, score: expected.length > 0 ? matched.length / expected.length : 1 };
}
// ── Daemon helpers ─────────────────────────────────────────────────────────
async function runCase(
daemon: OcrDaemon,
tc: TestCase,
tessdataDir: string,
engine: OcrEngine,
preprocess: OcrPreprocess,
params?: DiffOcrParams,
): Promise<string[]> {
const fullPath = join(tessdataDir, tc.fullImage).replace(/\//g, '\\');
const imagePath = join(tessdataDir, tc.image).replace(/\//g, '\\');
await (daemon as any).sendWithRetry({ cmd: 'snapshot', file: fullPath }, 10_000);
const req: any = { cmd: 'diff-ocr', file: imagePath };
if (engine !== 'tesseract') req.engine = engine;
if (preprocess !== 'none') req.preprocess = preprocess;
if (params && Object.keys(params).length > 0) req.params = params;
const timeout = engine !== 'tesseract' ? 120_000 : 10_000;
const resp = await (daemon as any).sendWithRetry(req, timeout);
return (resp.lines ?? [])
.map((l: any) => (l.text ?? '').trim())
.filter((l: string) => l.length > 0);
}
async function scoreCombo(
daemon: OcrDaemon,
cases: TestCase[],
tessdataDir: string,
engine: OcrEngine,
preprocess: OcrPreprocess,
params?: DiffOcrParams,
): Promise<number> {
let totalScore = 0;
for (const tc of cases) {
try {
const actual = await runCase(daemon, tc, tessdataDir, engine, preprocess, params);
totalScore += scoreLines(tc.expected, actual);
} catch {
// error = 0 score for this case
}
}
return totalScore / cases.length;
}
// ── Parameter sweep definitions ────────────────────────────────────────────
interface CropIntSweep {
name: keyof DiffCropParams;
values: number[];
}
interface OcrIntSweep {
name: keyof OcrParams;
values: number[];
}
interface OcrBoolSweep {
name: keyof OcrParams;
values: boolean[];
}
const CROP_SWEEPS: CropIntSweep[] = [
{ name: 'diffThresh', values: [10, 15, 20, 25, 30, 40, 50] },
{ name: 'maxGap', values: [5, 10, 15, 20, 25, 30] },
];
const CROP_TRIM_VALUES = [0.1, 0.15, 0.2, 0.25, 0.3, 0.4, 0.5];
const SHARED_OCR_SWEEPS: OcrIntSweep[] = [
{ name: 'upscale', values: [1, 2, 3] },
{ name: 'mergeGap', values: [0, 20, 40, 60, 80, 100] },
];
const BGSUB_INT_SWEEPS: OcrIntSweep[] = [
{ name: 'dimPercentile', values: [5, 10, 15, 20, 25, 30, 40, 50, 60] },
{ name: 'textThresh', values: [10, 20, 30, 40, 50, 60, 80, 100] },
];
const BGSUB_BOOL_SWEEPS: OcrBoolSweep[] = [
{ name: 'softThreshold', values: [false, true] },
];
const TOPHAT_SWEEPS: OcrIntSweep[] = [
{ name: 'kernelSize', values: [11, 15, 21, 25, 31, 41, 51, 61] },
];
// ── Default params per preprocess ──────────────────────────────────────────
function defaultParams(preprocess: OcrPreprocess): DiffOcrParams {
const crop: DiffCropParams = { diffThresh: 20, maxGap: 20, trimCutoff: 0.4 };
if (preprocess === 'bgsub') {
return { crop, ocr: { useBackgroundSub: true, upscale: 2, dimPercentile: 40, textThresh: 60, softThreshold: false } };
} else if (preprocess === 'tophat') {
return { crop, ocr: { useBackgroundSub: false, upscale: 2, kernelSize: 41 } };
}
return { crop, ocr: { upscale: 2 } }; // none
}
function cloneParams(p: DiffOcrParams): DiffOcrParams {
return JSON.parse(JSON.stringify(p));
}
// ── Coordinate descent tuner (two-phase: crop then OCR) ──────────────────
async function tuneCombo(
daemon: OcrDaemon,
cases: TestCase[],
tessdataDir: string,
combo: Combo,
): Promise<TuneResult> {
const params = defaultParams(combo.preprocess);
let bestScore = await scoreCombo(daemon, cases, tessdataDir, combo.engine, combo.preprocess, params);
let evals = 1;
process.stderr.write(` baseline: ${(bestScore * 100).toFixed(1)}% ${JSON.stringify(params)}\n`);
// ── Phase A: Tune crop params ──
process.stderr.write(`\n === Phase A: Crop Params ===\n`);
const MAX_ROUNDS = 3;
for (let round = 0; round < MAX_ROUNDS; round++) {
let improved = false;
process.stderr.write(` --- Crop Round ${round + 1} ---\n`);
for (const { name, values } of CROP_SWEEPS) {
process.stderr.write(` crop.${name}: `);
let bestVal: number | undefined;
let bestValScore = -1;
for (const v of values) {
const trial = cloneParams(params);
(trial.crop as any)[name] = v;
const score = await scoreCombo(daemon, cases, tessdataDir, combo.engine, combo.preprocess, trial);
evals++;
process.stderr.write(`${v}=${(score * 100).toFixed(1)} `);
if (score > bestValScore) { bestValScore = score; bestVal = v; }
}
process.stderr.write('\n');
if (bestValScore > bestScore && bestVal !== undefined) {
(params.crop as any)![name] = bestVal;
bestScore = bestValScore;
improved = true;
process.stderr.write(` -> crop.${name}=${bestVal} score=${(bestScore * 100).toFixed(1)}%\n`);
}
}
// Sweep trimCutoff
{
process.stderr.write(` crop.trimCutoff: `);
let bestTrim = params.crop?.trimCutoff ?? 0.2;
let bestTrimScore = bestScore;
for (const v of CROP_TRIM_VALUES) {
const trial = cloneParams(params);
trial.crop!.trimCutoff = v;
const score = await scoreCombo(daemon, cases, tessdataDir, combo.engine, combo.preprocess, trial);
evals++;
process.stderr.write(`${v}=${(score * 100).toFixed(1)} `);
if (score > bestTrimScore) { bestTrimScore = score; bestTrim = v; }
}
process.stderr.write('\n');
if (bestTrimScore > bestScore) {
params.crop!.trimCutoff = bestTrim;
bestScore = bestTrimScore;
improved = true;
process.stderr.write(` -> crop.trimCutoff=${bestTrim} score=${(bestScore * 100).toFixed(1)}%\n`);
}
}
process.stderr.write(` End crop round ${round + 1}: ${(bestScore * 100).toFixed(1)}% (${evals} evals)\n`);
if (!improved) break;
}
// ── Phase B: Tune OCR params (crop is now locked) ──
process.stderr.write(`\n === Phase B: OCR Params (crop locked) ===\n`);
const ocrIntSweeps: OcrIntSweep[] = [...SHARED_OCR_SWEEPS];
const ocrBoolSweeps: OcrBoolSweep[] = [];
if (combo.preprocess === 'bgsub') {
ocrIntSweeps.push(...BGSUB_INT_SWEEPS);
ocrBoolSweeps.push(...BGSUB_BOOL_SWEEPS);
} else if (combo.preprocess === 'tophat') {
ocrIntSweeps.push(...TOPHAT_SWEEPS);
}
for (let round = 0; round < MAX_ROUNDS; round++) {
let improved = false;
process.stderr.write(` --- OCR Round ${round + 1} ---\n`);
for (const { name, values } of ocrIntSweeps) {
process.stderr.write(` ocr.${name}: `);
let bestVal: number | undefined;
let bestValScore = -1;
for (const v of values) {
const trial = cloneParams(params);
(trial.ocr as any)[name] = v;
const score = await scoreCombo(daemon, cases, tessdataDir, combo.engine, combo.preprocess, trial);
evals++;
process.stderr.write(`${v}=${(score * 100).toFixed(1)} `);
if (score > bestValScore) { bestValScore = score; bestVal = v; }
}
process.stderr.write('\n');
if (bestValScore > bestScore && bestVal !== undefined) {
(params.ocr as any)![name] = bestVal;
bestScore = bestValScore;
improved = true;
process.stderr.write(` -> ocr.${name}=${bestVal} score=${(bestScore * 100).toFixed(1)}%\n`);
}
}
for (const { name, values } of ocrBoolSweeps) {
process.stderr.write(` ocr.${name}: `);
let bestVal: boolean | undefined;
let bestValScore = -1;
for (const v of values) {
const trial = cloneParams(params);
(trial.ocr as any)[name] = v;
const score = await scoreCombo(daemon, cases, tessdataDir, combo.engine, combo.preprocess, trial);
evals++;
process.stderr.write(`${v}=${(score * 100).toFixed(1)} `);
if (score > bestValScore) { bestValScore = score; bestVal = v; }
}
process.stderr.write('\n');
if (bestValScore > bestScore && bestVal !== undefined) {
(params.ocr as any)![name] = bestVal;
bestScore = bestValScore;
improved = true;
process.stderr.write(` -> ocr.${name}=${bestVal} score=${(bestScore * 100).toFixed(1)}%\n`);
}
}
process.stderr.write(` End OCR round ${round + 1}: ${(bestScore * 100).toFixed(1)}% (${evals} evals)\n`);
if (!improved) break;
}
return { label: combo.label, score: bestScore, params, evals };
}
// ── Verbose test run ───────────────────────────────────────────────────────
async function testCombo(
daemon: OcrDaemon,
cases: TestCase[],
tessdataDir: string,
combo: Combo,
params?: DiffOcrParams,
): Promise<number> {
let totalScore = 0;
for (const tc of cases) {
try {
const actual = await runCase(daemon, tc, tessdataDir, combo.engine, combo.preprocess, params);
const { matched, missed, extra, score } = scoreLinesVerbose(tc.expected, actual);
totalScore += score;
const status = missed.length === 0 ? 'PASS' : 'FAIL';
console.log(` [${status}] ${tc.id} matched=${matched.length}/${tc.expected.length} extra=${extra.length} score=${score.toFixed(2)}`);
for (const m of missed) console.log(` MISS: ${m}`);
for (const e of extra) console.log(` EXTRA: ${e}`);
} catch (err: any) {
console.log(` [ERROR] ${tc.id}: ${err.message}`);
}
}
return totalScore / cases.length;
}
// ── Main ───────────────────────────────────────────────────────────────────
async function main() {
const args = process.argv.slice(2);
const tuneMode = args.includes('--tune');
const filterArg = args.find(a => !a.startsWith('--'))?.toLowerCase();
const combos = filterArg
? ALL_COMBOS.filter(c => c.label.includes(filterArg))
: ALL_COMBOS;
const tessdataDir = join('tools', 'OcrDaemon', 'bin', 'Release', 'net8.0-windows10.0.19041.0', 'tessdata');
const casesPath = join(tessdataDir, 'cases.json');
const cases: TestCase[] = JSON.parse(readFileSync(casesPath, 'utf-8'));
console.log(`Loaded ${cases.length} test cases: ${cases.map(c => c.id).join(', ')}`);
console.log(`Mode: ${tuneMode ? 'TUNE' : 'TEST'} Combos: ${combos.length}\n`);
const daemon = new OcrDaemon();
if (tuneMode) {
// ── Tune mode: coordinate descent for each combo ──
const tuneResults: TuneResult[] = [];
for (const combo of combos) {
console.log(`\n${'='.repeat(60)}`);
console.log(` TUNING: ${combo.label}`);
console.log(`${'='.repeat(60)}`);
try {
const result = await tuneCombo(daemon, cases, tessdataDir, combo);
tuneResults.push(result);
console.log(`\n Best: ${(result.score * 100).toFixed(1)}% (${result.evals} evals)`);
console.log(` Params: ${JSON.stringify(result.params)}`);
// Verbose run with best params
console.log('');
await testCombo(daemon, cases, tessdataDir, combo, result.params);
} catch (err: any) {
console.log(` ERROR: ${err.message}`);
tuneResults.push({ label: combo.label, score: 0, params: {}, evals: 0 });
}
}
// Summary
console.log(`\n${'='.repeat(70)}`);
console.log(' TUNE RESULTS');
console.log(`${'='.repeat(70)}`);
const sorted = tuneResults.sort((a, b) => b.score - a.score);
for (const r of sorted) {
const bar = '#'.repeat(Math.round(r.score * 40));
console.log(` ${r.label.padEnd(22)} ${(r.score * 100).toFixed(1).padStart(5)}% ${bar}`);
}
console.log(`\n BEST PARAMS PER COMBO:`);
for (const r of sorted) {
if (r.score > 0) {
console.log(` ${r.label.padEnd(22)} ${JSON.stringify(r.params)}`);
}
}
} else {
// ── Test mode: defaults only ──
const results: Record<string, number> = {};
for (const combo of combos) {
console.log(`\n${'='.repeat(60)}`);
console.log(` ${combo.label}`);
console.log(`${'='.repeat(60)}`);
try {
const score = await testCombo(daemon, cases, tessdataDir, combo);
results[combo.label] = score;
console.log(`\n Average: ${(score * 100).toFixed(1)}%`);
} catch (err: any) {
console.log(` ERROR: ${err.message}`);
results[combo.label] = 0;
}
}
console.log(`\n${'='.repeat(60)}`);
console.log(' SUMMARY');
console.log(`${'='.repeat(60)}`);
const sorted = Object.entries(results).sort((a, b) => b[1] - a[1]);
for (const [label, score] of sorted) {
const bar = '#'.repeat(Math.round(score * 40));
console.log(` ${label.padEnd(22)} ${(score * 100).toFixed(1).padStart(5)}% ${bar}`);
}
}
await daemon.stop();
}
main().catch(err => {
console.error(err);
process.exit(1);
});