namespace OcrDaemon; using System.Collections.Generic; using System.Drawing; using System.IO; using System.Linq; using System.Text.Json; using Tesseract; static class TestRunner { private static readonly JsonSerializerOptions JsonOptions = new() { PropertyNameCaseInsensitive = true, }; public static int Run(string[] args) { string baseDir = AppContext.BaseDirectory; string? savePreDir = null; for (int i = 0; i < args.Length; i++) { if (string.Equals(args[i], "--save-pre", StringComparison.OrdinalIgnoreCase)) { if (i + 1 < args.Length && !args[i + 1].StartsWith("--", StringComparison.Ordinal)) { savePreDir = args[i + 1]; i++; } else { savePreDir = "processed"; } } } string casesPath = args.Length > 0 && !string.IsNullOrWhiteSpace(args[0]) ? args[0] : Path.Combine(baseDir, "tessdata", "cases.json"); if (!File.Exists(casesPath)) { Console.Error.WriteLine($"cases.json not found: {casesPath}"); return 1; } string json = File.ReadAllText(casesPath); var cases = JsonSerializer.Deserialize>(json, JsonOptions) ?? []; if (cases.Count == 0) { Console.Error.WriteLine("No test cases found."); return 1; } string tessdataPath = Path.Combine(baseDir, "tessdata"); string tessLang = File.Exists(Path.Combine(tessdataPath, "poe2.traineddata")) ? "poe2" : "eng"; using var engine = new TesseractEngine(tessdataPath, tessLang, EngineMode.LstmOnly); engine.DefaultPageSegMode = PageSegMode.SingleBlock; engine.SetVariable("preserve_interword_spaces", "1"); var ocrHandler = new OcrHandler(engine); int totalExpected = 0; int totalMatched = 0; int caseFailures = 0; string casesDir = Path.GetDirectoryName(casesPath) ?? baseDir; if (!string.IsNullOrEmpty(savePreDir)) { if (!Path.IsPathRooted(savePreDir)) savePreDir = Path.Combine(casesDir, savePreDir); if (!Directory.Exists(savePreDir)) Directory.CreateDirectory(savePreDir); } foreach (var tc in cases) { if (string.IsNullOrWhiteSpace(tc.Image)) { Console.Error.WriteLine($"[SKIP] {tc.Id}: missing image path"); continue; } string imagePath = Path.IsPathRooted(tc.Image) ? tc.Image : Path.Combine(casesDir, tc.Image); if (!File.Exists(imagePath)) { Console.Error.WriteLine($"[SKIP] {tc.Id}: image not found: {imagePath}"); continue; } List actualSet; if (!string.IsNullOrWhiteSpace(tc.BeforeImage)) { string beforePath = Path.IsPathRooted(tc.BeforeImage) ? tc.BeforeImage : Path.Combine(casesDir, tc.BeforeImage); if (!File.Exists(beforePath)) { Console.Error.WriteLine($"[SKIP] {tc.Id}: before image not found: {beforePath}"); continue; } ocrHandler.HandleSnapshot(new Request { File = beforePath }); string? savePath = null; if (!string.IsNullOrEmpty(savePreDir)) savePath = Path.Combine(savePreDir, $"{tc.Id}.raw.png"); var response = ocrHandler.HandleDiffOcr(new Request { File = imagePath, Path = savePath, }); if (response is ErrorResponse err) { Console.Error.WriteLine($"[FAIL] {tc.Id}: {err.Error}"); caseFailures++; continue; } if (response is DiffOcrResponse diff) actualSet = BuildActualSet(diff.Text, diff.Lines); else if (response is OcrResponse ocr) actualSet = BuildActualSet(ocr.Text, ocr.Lines); else actualSet = []; } else { using var bitmap = new Bitmap(imagePath); using var processed = ImagePreprocessor.PreprocessForOcr(bitmap); if (!string.IsNullOrEmpty(savePreDir)) { string outPath = Path.Combine(savePreDir, $"{tc.Id}.pre.png"); processed.Save(outPath, System.Drawing.Imaging.ImageFormat.Png); } using var pix = ImageUtils.BitmapToPix(processed); using var page = engine.Process(pix); var lines = ImageUtils.ExtractLinesFromPage(page, offsetX: 0, offsetY: 0); var actualLines = lines.Select(l => Normalize(l.Text)).Where(s => s.Length > 0).ToList(); var rawText = page.GetText() ?? string.Empty; var rawLines = rawText.Split('\n') .Select(Normalize) .Where(s => s.Length > 0) .ToList(); actualSet = actualLines.Concat(rawLines).Distinct().ToList(); } var expectedLines = tc.Expected .Select(Normalize) .Where(s => s.Length > 0) .ToList(); totalExpected += expectedLines.Count; int matched = expectedLines.Count(e => actualSet.Contains(e)); totalMatched += matched; if (matched < expectedLines.Count) { caseFailures++; Console.Error.WriteLine($"[FAIL] {tc.Id}: matched {matched}/{expectedLines.Count}"); var missing = expectedLines.Where(e => !actualSet.Contains(e)).ToList(); foreach (var line in missing) Console.Error.WriteLine($" missing: {line}"); Console.Error.WriteLine(" actual:"); foreach (var line in actualSet) Console.Error.WriteLine($" > {line}"); } else { Console.Error.WriteLine($"[OK] {tc.Id}: matched {matched}/{expectedLines.Count}"); } } Console.Error.WriteLine($"Summary: matched {totalMatched}/{totalExpected} lines, failed cases: {caseFailures}"); return caseFailures == 0 ? 0 : 2; } private static string Normalize(string input) { if (string.IsNullOrWhiteSpace(input)) return string.Empty; var chars = input.Trim().ToLowerInvariant().ToCharArray(); var sb = new System.Text.StringBuilder(chars.Length); bool inSpace = false; foreach (char c in chars) { if (char.IsWhiteSpace(c)) { if (!inSpace) { sb.Append(' '); inSpace = true; } continue; } inSpace = false; sb.Append(c); } return sb.ToString().Trim(); } private static List BuildActualSet(string text, List lines) { var lineTexts = lines.Select(l => Normalize(l.Text)).Where(s => s.Length > 0).ToList(); var textLines = (text ?? string.Empty).Split('\n') .Select(Normalize) .Where(s => s.Length > 0) .ToList(); return lineTexts.Concat(textLines).Distinct().ToList(); } private sealed class TestCase { public string Id { get; set; } = ""; public string Image { get; set; } = ""; public string? BeforeImage { get; set; } public List Expected { get; set; } = []; } }