test
This commit is contained in:
parent
f74e3e1c85
commit
9845e7f9bf
1 changed files with 232 additions and 0 deletions
232
tools/OcrDaemon/TestRunner.cs
Normal file
232
tools/OcrDaemon/TestRunner.cs
Normal file
|
|
@ -0,0 +1,232 @@
|
|||
namespace OcrDaemon;
|
||||
|
||||
using System.Collections.Generic;
|
||||
using System.Drawing;
|
||||
using System.IO;
|
||||
using System.Linq;
|
||||
using System.Text.Json;
|
||||
using Tesseract;
|
||||
|
||||
static class TestRunner
|
||||
{
|
||||
private static readonly JsonSerializerOptions JsonOptions = new()
|
||||
{
|
||||
PropertyNameCaseInsensitive = true,
|
||||
};
|
||||
|
||||
public static int Run(string[] args)
|
||||
{
|
||||
string baseDir = AppContext.BaseDirectory;
|
||||
string? savePreDir = null;
|
||||
|
||||
for (int i = 0; i < args.Length; i++)
|
||||
{
|
||||
if (string.Equals(args[i], "--save-pre", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
if (i + 1 < args.Length && !args[i + 1].StartsWith("--", StringComparison.Ordinal))
|
||||
{
|
||||
savePreDir = args[i + 1];
|
||||
i++;
|
||||
}
|
||||
else
|
||||
{
|
||||
savePreDir = "processed";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
string casesPath = args.Length > 0 && !string.IsNullOrWhiteSpace(args[0])
|
||||
? args[0]
|
||||
: Path.Combine(baseDir, "tessdata", "cases.json");
|
||||
|
||||
if (!File.Exists(casesPath))
|
||||
{
|
||||
Console.Error.WriteLine($"cases.json not found: {casesPath}");
|
||||
return 1;
|
||||
}
|
||||
|
||||
string json = File.ReadAllText(casesPath);
|
||||
var cases = JsonSerializer.Deserialize<List<TestCase>>(json, JsonOptions) ?? [];
|
||||
if (cases.Count == 0)
|
||||
{
|
||||
Console.Error.WriteLine("No test cases found.");
|
||||
return 1;
|
||||
}
|
||||
|
||||
string tessdataPath = Path.Combine(baseDir, "tessdata");
|
||||
string tessLang = File.Exists(Path.Combine(tessdataPath, "poe2.traineddata")) ? "poe2" : "eng";
|
||||
|
||||
using var engine = new TesseractEngine(tessdataPath, tessLang, EngineMode.LstmOnly);
|
||||
engine.DefaultPageSegMode = PageSegMode.SingleBlock;
|
||||
engine.SetVariable("preserve_interword_spaces", "1");
|
||||
var ocrHandler = new OcrHandler(engine);
|
||||
|
||||
int totalExpected = 0;
|
||||
int totalMatched = 0;
|
||||
int caseFailures = 0;
|
||||
|
||||
string casesDir = Path.GetDirectoryName(casesPath) ?? baseDir;
|
||||
if (!string.IsNullOrEmpty(savePreDir))
|
||||
{
|
||||
if (!Path.IsPathRooted(savePreDir))
|
||||
savePreDir = Path.Combine(casesDir, savePreDir);
|
||||
if (!Directory.Exists(savePreDir))
|
||||
Directory.CreateDirectory(savePreDir);
|
||||
}
|
||||
foreach (var tc in cases)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(tc.Image))
|
||||
{
|
||||
Console.Error.WriteLine($"[SKIP] {tc.Id}: missing image path");
|
||||
continue;
|
||||
}
|
||||
|
||||
string imagePath = Path.IsPathRooted(tc.Image)
|
||||
? tc.Image
|
||||
: Path.Combine(casesDir, tc.Image);
|
||||
|
||||
if (!File.Exists(imagePath))
|
||||
{
|
||||
Console.Error.WriteLine($"[SKIP] {tc.Id}: image not found: {imagePath}");
|
||||
continue;
|
||||
}
|
||||
|
||||
var options = new OcrOptions();
|
||||
List<string> actualSet;
|
||||
|
||||
if (!string.IsNullOrWhiteSpace(tc.BeforeImage))
|
||||
{
|
||||
string beforePath = Path.IsPathRooted(tc.BeforeImage)
|
||||
? tc.BeforeImage
|
||||
: Path.Combine(casesDir, tc.BeforeImage);
|
||||
|
||||
if (!File.Exists(beforePath))
|
||||
{
|
||||
Console.Error.WriteLine($"[SKIP] {tc.Id}: before image not found: {beforePath}");
|
||||
continue;
|
||||
}
|
||||
|
||||
ocrHandler.HandleSnapshot(new Request { File = beforePath });
|
||||
|
||||
string? savePath = null;
|
||||
if (!string.IsNullOrEmpty(savePreDir))
|
||||
savePath = Path.Combine(savePreDir, $"{tc.Id}.raw.png");
|
||||
|
||||
var response = ocrHandler.HandleDiffOcr(new Request
|
||||
{
|
||||
File = imagePath,
|
||||
Ocr = options,
|
||||
Path = savePath,
|
||||
});
|
||||
|
||||
if (response is ErrorResponse err)
|
||||
{
|
||||
Console.Error.WriteLine($"[FAIL] {tc.Id}: {err.Error}");
|
||||
caseFailures++;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (response is DiffOcrResponse diff)
|
||||
actualSet = BuildActualSet(diff.Text, diff.Lines);
|
||||
else if (response is OcrResponse ocr)
|
||||
actualSet = BuildActualSet(ocr.Text, ocr.Lines);
|
||||
else
|
||||
actualSet = [];
|
||||
}
|
||||
else
|
||||
{
|
||||
using var bitmap = new Bitmap(imagePath);
|
||||
using var processed = ImagePreprocessor.PreprocessForOcr(bitmap, options);
|
||||
|
||||
if (!string.IsNullOrEmpty(savePreDir))
|
||||
{
|
||||
string outPath = Path.Combine(savePreDir, $"{tc.Id}.pre.png");
|
||||
processed.Save(outPath, System.Drawing.Imaging.ImageFormat.Png);
|
||||
}
|
||||
using var pix = ImageUtils.BitmapToPix(processed);
|
||||
using var page = engine.Process(pix);
|
||||
|
||||
var lines = ImageUtils.ExtractLinesFromPage(page, offsetX: 0, offsetY: 0, minConfidence: options.MinConfidence);
|
||||
var actualLines = lines.Select(l => Normalize(l.Text)).Where(s => s.Length > 0).ToList();
|
||||
|
||||
var rawText = page.GetText() ?? string.Empty;
|
||||
var rawLines = rawText.Split('\n')
|
||||
.Select(Normalize)
|
||||
.Where(s => s.Length > 0)
|
||||
.ToList();
|
||||
|
||||
actualSet = actualLines.Concat(rawLines).Distinct().ToList();
|
||||
}
|
||||
|
||||
var expectedLines = tc.Expected
|
||||
.Select(Normalize)
|
||||
.Where(s => s.Length > 0)
|
||||
.ToList();
|
||||
|
||||
totalExpected += expectedLines.Count;
|
||||
int matched = expectedLines.Count(e => actualSet.Contains(e));
|
||||
totalMatched += matched;
|
||||
|
||||
if (matched < expectedLines.Count)
|
||||
{
|
||||
caseFailures++;
|
||||
Console.Error.WriteLine($"[FAIL] {tc.Id}: matched {matched}/{expectedLines.Count}");
|
||||
var missing = expectedLines.Where(e => !actualSet.Contains(e)).ToList();
|
||||
foreach (var line in missing)
|
||||
Console.Error.WriteLine($" missing: {line}");
|
||||
|
||||
Console.Error.WriteLine(" actual:");
|
||||
foreach (var line in actualSet)
|
||||
Console.Error.WriteLine($" > {line}");
|
||||
}
|
||||
else
|
||||
{
|
||||
Console.Error.WriteLine($"[OK] {tc.Id}: matched {matched}/{expectedLines.Count}");
|
||||
}
|
||||
}
|
||||
|
||||
Console.Error.WriteLine($"Summary: matched {totalMatched}/{totalExpected} lines, failed cases: {caseFailures}");
|
||||
return caseFailures == 0 ? 0 : 2;
|
||||
}
|
||||
|
||||
private static string Normalize(string input)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(input)) return string.Empty;
|
||||
var chars = input.Trim().ToLowerInvariant().ToCharArray();
|
||||
var sb = new System.Text.StringBuilder(chars.Length);
|
||||
bool inSpace = false;
|
||||
foreach (char c in chars)
|
||||
{
|
||||
if (char.IsWhiteSpace(c))
|
||||
{
|
||||
if (!inSpace)
|
||||
{
|
||||
sb.Append(' ');
|
||||
inSpace = true;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
inSpace = false;
|
||||
sb.Append(c);
|
||||
}
|
||||
return sb.ToString().Trim();
|
||||
}
|
||||
|
||||
private static List<string> BuildActualSet(string text, List<OcrLineResult> lines)
|
||||
{
|
||||
var lineTexts = lines.Select(l => Normalize(l.Text)).Where(s => s.Length > 0).ToList();
|
||||
var textLines = (text ?? string.Empty).Split('\n')
|
||||
.Select(Normalize)
|
||||
.Where(s => s.Length > 0)
|
||||
.ToList();
|
||||
return lineTexts.Concat(textLines).Distinct().ToList();
|
||||
}
|
||||
|
||||
private sealed class TestCase
|
||||
{
|
||||
public string Id { get; set; } = "";
|
||||
public string Image { get; set; } = "";
|
||||
public string? BeforeImage { get; set; }
|
||||
public List<string> Expected { get; set; } = [];
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue