poe2-bot/tools/OcrDaemon/TestRunner.cs
2026-02-12 22:07:54 -05:00

230 lines
7.9 KiB
C#

namespace OcrDaemon;
using System.Collections.Generic;
using System.Drawing;
using System.IO;
using System.Linq;
using System.Text.Json;
using Tesseract;
static class TestRunner
{
private static readonly JsonSerializerOptions JsonOptions = new()
{
PropertyNameCaseInsensitive = true,
};
public static int Run(string[] args)
{
string baseDir = AppContext.BaseDirectory;
string? savePreDir = null;
for (int i = 0; i < args.Length; i++)
{
if (string.Equals(args[i], "--save-pre", StringComparison.OrdinalIgnoreCase))
{
if (i + 1 < args.Length && !args[i + 1].StartsWith("--", StringComparison.Ordinal))
{
savePreDir = args[i + 1];
i++;
}
else
{
savePreDir = "processed";
}
}
}
string casesPath = args.Length > 0 && !string.IsNullOrWhiteSpace(args[0])
? args[0]
: Path.Combine(baseDir, "tessdata", "cases.json");
if (!File.Exists(casesPath))
{
Console.Error.WriteLine($"cases.json not found: {casesPath}");
return 1;
}
string json = File.ReadAllText(casesPath);
var cases = JsonSerializer.Deserialize<List<TestCase>>(json, JsonOptions) ?? [];
if (cases.Count == 0)
{
Console.Error.WriteLine("No test cases found.");
return 1;
}
string tessdataPath = Path.Combine(baseDir, "tessdata");
string tessLang = File.Exists(Path.Combine(tessdataPath, "poe2.traineddata")) ? "poe2" : "eng";
using var engine = new TesseractEngine(tessdataPath, tessLang, EngineMode.LstmOnly);
engine.DefaultPageSegMode = PageSegMode.SingleBlock;
engine.SetVariable("preserve_interword_spaces", "1");
var ocrHandler = new OcrHandler(engine);
int totalExpected = 0;
int totalMatched = 0;
int caseFailures = 0;
string casesDir = Path.GetDirectoryName(casesPath) ?? baseDir;
if (!string.IsNullOrEmpty(savePreDir))
{
if (!Path.IsPathRooted(savePreDir))
savePreDir = Path.Combine(casesDir, savePreDir);
if (!Directory.Exists(savePreDir))
Directory.CreateDirectory(savePreDir);
}
foreach (var tc in cases)
{
if (string.IsNullOrWhiteSpace(tc.Image))
{
Console.Error.WriteLine($"[SKIP] {tc.Id}: missing image path");
continue;
}
string imagePath = Path.IsPathRooted(tc.Image)
? tc.Image
: Path.Combine(casesDir, tc.Image);
if (!File.Exists(imagePath))
{
Console.Error.WriteLine($"[SKIP] {tc.Id}: image not found: {imagePath}");
continue;
}
List<string> actualSet;
if (!string.IsNullOrWhiteSpace(tc.BeforeImage))
{
string beforePath = Path.IsPathRooted(tc.BeforeImage)
? tc.BeforeImage
: Path.Combine(casesDir, tc.BeforeImage);
if (!File.Exists(beforePath))
{
Console.Error.WriteLine($"[SKIP] {tc.Id}: before image not found: {beforePath}");
continue;
}
ocrHandler.HandleSnapshot(new Request { File = beforePath });
string? savePath = null;
if (!string.IsNullOrEmpty(savePreDir))
savePath = Path.Combine(savePreDir, $"{tc.Id}.raw.png");
var response = ocrHandler.HandleDiffOcr(new Request
{
File = imagePath,
Path = savePath,
});
if (response is ErrorResponse err)
{
Console.Error.WriteLine($"[FAIL] {tc.Id}: {err.Error}");
caseFailures++;
continue;
}
if (response is DiffOcrResponse diff)
actualSet = BuildActualSet(diff.Text, diff.Lines);
else if (response is OcrResponse ocr)
actualSet = BuildActualSet(ocr.Text, ocr.Lines);
else
actualSet = [];
}
else
{
using var bitmap = new Bitmap(imagePath);
using var processed = ImagePreprocessor.PreprocessForOcr(bitmap);
if (!string.IsNullOrEmpty(savePreDir))
{
string outPath = Path.Combine(savePreDir, $"{tc.Id}.pre.png");
processed.Save(outPath, System.Drawing.Imaging.ImageFormat.Png);
}
using var pix = ImageUtils.BitmapToPix(processed);
using var page = engine.Process(pix);
var lines = ImageUtils.ExtractLinesFromPage(page, offsetX: 0, offsetY: 0);
var actualLines = lines.Select(l => Normalize(l.Text)).Where(s => s.Length > 0).ToList();
var rawText = page.GetText() ?? string.Empty;
var rawLines = rawText.Split('\n')
.Select(Normalize)
.Where(s => s.Length > 0)
.ToList();
actualSet = actualLines.Concat(rawLines).Distinct().ToList();
}
var expectedLines = tc.Expected
.Select(Normalize)
.Where(s => s.Length > 0)
.ToList();
totalExpected += expectedLines.Count;
int matched = expectedLines.Count(e => actualSet.Contains(e));
totalMatched += matched;
if (matched < expectedLines.Count)
{
caseFailures++;
Console.Error.WriteLine($"[FAIL] {tc.Id}: matched {matched}/{expectedLines.Count}");
var missing = expectedLines.Where(e => !actualSet.Contains(e)).ToList();
foreach (var line in missing)
Console.Error.WriteLine($" missing: {line}");
Console.Error.WriteLine(" actual:");
foreach (var line in actualSet)
Console.Error.WriteLine($" > {line}");
}
else
{
Console.Error.WriteLine($"[OK] {tc.Id}: matched {matched}/{expectedLines.Count}");
}
}
Console.Error.WriteLine($"Summary: matched {totalMatched}/{totalExpected} lines, failed cases: {caseFailures}");
return caseFailures == 0 ? 0 : 2;
}
private static string Normalize(string input)
{
if (string.IsNullOrWhiteSpace(input)) return string.Empty;
var chars = input.Trim().ToLowerInvariant().ToCharArray();
var sb = new System.Text.StringBuilder(chars.Length);
bool inSpace = false;
foreach (char c in chars)
{
if (char.IsWhiteSpace(c))
{
if (!inSpace)
{
sb.Append(' ');
inSpace = true;
}
continue;
}
inSpace = false;
sb.Append(c);
}
return sb.ToString().Trim();
}
private static List<string> BuildActualSet(string text, List<OcrLineResult> lines)
{
var lineTexts = lines.Select(l => Normalize(l.Text)).Where(s => s.Length > 0).ToList();
var textLines = (text ?? string.Empty).Split('\n')
.Select(Normalize)
.Where(s => s.Length > 0)
.ToList();
return lineTexts.Concat(textLines).Distinct().ToList();
}
private sealed class TestCase
{
public string Id { get; set; } = "";
public string Image { get; set; } = "";
public string? BeforeImage { get; set; }
public List<string> Expected { get; set; } = [];
}
}