623 lines
26 KiB
C#
623 lines
26 KiB
C#
namespace OcrDaemon;
|
|
|
|
using System.Drawing;
|
|
using System.Text.Json;
|
|
using System.Text.Json.Serialization;
|
|
using Tesseract;
|
|
|
|
static class Daemon
|
|
{
|
|
private static readonly JsonSerializerOptions JsonOptions = new()
|
|
{
|
|
PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
|
|
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
|
|
};
|
|
|
|
public static int Run()
|
|
{
|
|
ScreenCapture.InitDpiAwareness();
|
|
|
|
// Pre-create the Tesseract OCR engine (reused across all requests)
|
|
var tessdataPath = Path.Combine(AppContext.BaseDirectory, "tessdata");
|
|
var tessLang = File.Exists(Path.Combine(tessdataPath, "poe2.traineddata")) ? "poe2" : "eng";
|
|
TesseractEngine tessEngine;
|
|
try
|
|
{
|
|
tessEngine = new TesseractEngine(tessdataPath, tessLang, EngineMode.LstmOnly);
|
|
tessEngine.DefaultPageSegMode = PageSegMode.SingleBlock;
|
|
tessEngine.SetVariable("preserve_interword_spaces", "1");
|
|
var userWordsPath = Path.Combine(tessdataPath, $"{tessLang}.user-words");
|
|
var userPatternsPath = Path.Combine(tessdataPath, $"{tessLang}.user-patterns");
|
|
if (File.Exists(userWordsPath))
|
|
{
|
|
tessEngine.SetVariable("user_words_file", userWordsPath);
|
|
var lineCount = File.ReadAllLines(userWordsPath).Length;
|
|
Console.Error.WriteLine($"Loaded user-words: {lineCount} words from {userWordsPath}");
|
|
}
|
|
if (File.Exists(userPatternsPath))
|
|
{
|
|
tessEngine.SetVariable("user_patterns_file", userPatternsPath);
|
|
var lineCount = File.ReadAllLines(userPatternsPath).Length;
|
|
Console.Error.WriteLine($"Loaded user-patterns: {lineCount} patterns from {userPatternsPath}");
|
|
}
|
|
Console.Error.WriteLine($"Tesseract engine loaded with language: {tessLang}");
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
WriteResponse(new ErrorResponse($"Failed to create Tesseract engine: {ex.Message}. Ensure tessdata/eng.traineddata exists."));
|
|
return 1;
|
|
}
|
|
|
|
// Signal ready
|
|
WriteResponse(new ReadyResponse());
|
|
|
|
var ocrHandler = new OcrHandler(tessEngine);
|
|
var gridHandler = new GridHandler();
|
|
var detectGridHandler = new DetectGridHandler();
|
|
var templateMatchHandler = new TemplateMatchHandler();
|
|
var edgeCropHandler = new EdgeCropHandler();
|
|
var pythonBridge = new PythonOcrBridge();
|
|
|
|
// Main loop: read one JSON line, handle, write one JSON line
|
|
string? line;
|
|
while ((line = Console.In.ReadLine()) != null)
|
|
{
|
|
line = line.Trim();
|
|
if (line.Length == 0) continue;
|
|
|
|
try
|
|
{
|
|
var request = JsonSerializer.Deserialize<Request>(line, JsonOptions);
|
|
if (request == null)
|
|
{
|
|
WriteResponse(new ErrorResponse("Failed to parse request"));
|
|
continue;
|
|
}
|
|
|
|
object response = request.Cmd?.ToLowerInvariant() switch
|
|
{
|
|
"ocr" => HandleOcrPipeline(ocrHandler, pythonBridge, request),
|
|
"screenshot" => ocrHandler.HandleScreenshot(request),
|
|
"capture" => ocrHandler.HandleCapture(request),
|
|
"snapshot" => ocrHandler.HandleSnapshot(request),
|
|
"diff-ocr" => HandleDiffOcrPipeline(ocrHandler, pythonBridge, request),
|
|
"edge-ocr" => HandleEdgeOcrPipeline(ocrHandler, edgeCropHandler, pythonBridge, request),
|
|
"test" => ocrHandler.HandleTest(request),
|
|
"tune" => ocrHandler.HandleTune(request),
|
|
"crop-test" => HandleCropTest(ocrHandler, edgeCropHandler, request),
|
|
"crop-tune" => HandleCropTune(ocrHandler, request),
|
|
"grid" => gridHandler.HandleGrid(request),
|
|
"detect-grid" => detectGridHandler.HandleDetectGrid(request),
|
|
"match-template" => templateMatchHandler.HandleTemplateMatch(request),
|
|
_ => new ErrorResponse($"Unknown command: {request.Cmd}"),
|
|
};
|
|
WriteResponse(response);
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
WriteResponse(new ErrorResponse(ex.Message));
|
|
}
|
|
}
|
|
|
|
pythonBridge.Dispose();
|
|
return 0;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Unified OCR pipeline for full/region captures.
|
|
/// Capture → optional preprocess → route to engine (tesseract / easyocr / paddleocr).
|
|
/// </summary>
|
|
private static object HandleOcrPipeline(OcrHandler ocrHandler, PythonOcrBridge pythonBridge, Request request)
|
|
{
|
|
var engine = request.Engine ?? "tesseract";
|
|
var preprocess = request.Preprocess ?? "none";
|
|
|
|
var kernelSize = request.Params?.Ocr.KernelSize ?? 41;
|
|
|
|
// No preprocess + tesseract = original fast path
|
|
if (engine == "tesseract" && preprocess == "none")
|
|
return ocrHandler.HandleOcr(request);
|
|
|
|
// Capture
|
|
using var bitmap = ScreenCapture.CaptureOrLoad(request.File, request.Region);
|
|
|
|
// Preprocess
|
|
Bitmap processed;
|
|
if (preprocess == "tophat")
|
|
{
|
|
processed = ImagePreprocessor.PreprocessForOcr(bitmap, kernelSize: kernelSize);
|
|
}
|
|
else if (preprocess == "bgsub")
|
|
{
|
|
return new ErrorResponse("bgsub preprocess requires a reference frame; use diff-ocr instead.");
|
|
}
|
|
else // "none"
|
|
{
|
|
processed = (Bitmap)bitmap.Clone();
|
|
}
|
|
using var _processed = processed;
|
|
|
|
// Route to engine
|
|
if (engine == "tesseract")
|
|
{
|
|
var region = request.Region != null
|
|
? new RegionRect { X = request.Region.X, Y = request.Region.Y, Width = request.Region.Width, Height = request.Region.Height }
|
|
: new RegionRect { X = 0, Y = 0, Width = processed.Width, Height = processed.Height };
|
|
return ocrHandler.RunTesseractOnBitmap(processed, region);
|
|
}
|
|
else // easyocr, paddleocr
|
|
{
|
|
return pythonBridge.OcrFromBitmap(processed, engine);
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Unified diff-OCR pipeline for tooltip detection.
|
|
/// DiffCrop → preprocess (default=bgsub) → route to engine.
|
|
/// </summary>
|
|
private static object HandleDiffOcrPipeline(OcrHandler ocrHandler, PythonOcrBridge pythonBridge, Request request)
|
|
{
|
|
var engine = request.Engine ?? "tesseract";
|
|
var isPythonEngine = engine is "easyocr" or "paddleocr";
|
|
var p = request.Params ?? new DiffOcrParams();
|
|
var cropParams = p.Crop;
|
|
var ocrParams = p.Ocr;
|
|
if (request.Threshold > 0) cropParams.DiffThresh = request.Threshold;
|
|
|
|
// Determine preprocess mode: explicit request.Preprocess > params.UseBackgroundSub > default "bgsub"
|
|
string preprocess;
|
|
if (request.Preprocess != null)
|
|
preprocess = request.Preprocess;
|
|
else if (request.Params != null)
|
|
preprocess = ocrParams.UseBackgroundSub ? "bgsub" : "tophat";
|
|
else
|
|
preprocess = "bgsub";
|
|
|
|
// No engine override + no preprocess override + no params = original Tesseract path
|
|
if (engine == "tesseract" && request.Preprocess == null && request.Params == null)
|
|
return ocrHandler.HandleDiffOcr(request);
|
|
|
|
var sw = System.Diagnostics.Stopwatch.StartNew();
|
|
|
|
var cropResult = ocrHandler.DiffCrop(request, cropParams);
|
|
if (cropResult == null)
|
|
return new OcrResponse { Text = "", Lines = [] };
|
|
|
|
var (cropped, refCropped, current, region) = cropResult.Value;
|
|
using var _current = current;
|
|
|
|
// Preprocess — only sees ocrParams
|
|
Bitmap processed;
|
|
if (preprocess == "bgsub")
|
|
{
|
|
int upscale = isPythonEngine ? 1 : ocrParams.Upscale;
|
|
processed = ImagePreprocessor.PreprocessWithBackgroundSub(
|
|
cropped, refCropped, dimPercentile: ocrParams.DimPercentile, textThresh: ocrParams.TextThresh,
|
|
upscale: upscale, softThreshold: ocrParams.SoftThreshold);
|
|
}
|
|
else if (preprocess == "tophat")
|
|
{
|
|
processed = ImagePreprocessor.PreprocessForOcr(cropped, kernelSize: ocrParams.KernelSize);
|
|
}
|
|
else // "none"
|
|
{
|
|
processed = (Bitmap)cropped.Clone();
|
|
}
|
|
cropped.Dispose();
|
|
refCropped.Dispose();
|
|
|
|
var diffMs = sw.ElapsedMilliseconds;
|
|
using var _processed = processed;
|
|
|
|
// Save debug images if path provided
|
|
if (!string.IsNullOrEmpty(request.Path))
|
|
{
|
|
var dir = Path.GetDirectoryName(request.Path);
|
|
if (!string.IsNullOrEmpty(dir) && !Directory.Exists(dir))
|
|
Directory.CreateDirectory(dir);
|
|
// Save preprocessed crop
|
|
processed.Save(request.Path, ImageUtils.GetImageFormat(request.Path));
|
|
|
|
var ext = Path.GetExtension(request.Path);
|
|
var fullPath = Path.ChangeExtension(request.Path, ".full" + ext);
|
|
current.Save(fullPath, ImageUtils.GetImageFormat(fullPath));
|
|
}
|
|
|
|
// Route to engine
|
|
sw.Restart();
|
|
if (engine == "tesseract")
|
|
{
|
|
var result = ocrHandler.RunTesseractOnBitmap(processed, region);
|
|
var ocrMs = sw.ElapsedMilliseconds;
|
|
Console.Error.WriteLine($" diff-ocr-pipeline: engine={engine} preprocess={preprocess} diff={diffMs}ms ocr={ocrMs}ms crop={region.Width}x{region.Height}");
|
|
return result;
|
|
}
|
|
else // easyocr, paddleocr
|
|
{
|
|
var ocrResult = pythonBridge.OcrFromBitmap(processed, engine, ocrParams);
|
|
var ocrMs = sw.ElapsedMilliseconds;
|
|
Console.Error.WriteLine($" diff-ocr-pipeline: engine={engine} preprocess={preprocess} diff={diffMs}ms ocr={ocrMs}ms crop={region.Width}x{region.Height}");
|
|
|
|
// Offset word coordinates to screen space
|
|
foreach (var line in ocrResult.Lines)
|
|
foreach (var word in line.Words)
|
|
{
|
|
word.X += region.X;
|
|
word.Y += region.Y;
|
|
}
|
|
|
|
return new DiffOcrResponse
|
|
{
|
|
Text = ocrResult.Text,
|
|
Lines = ocrResult.Lines,
|
|
Region = region,
|
|
};
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Edge-based tooltip detection pipeline.
|
|
/// EdgeCrop → preprocess (tophat only; bgsub falls back to tophat) → route to engine.
|
|
/// </summary>
|
|
private static object HandleEdgeOcrPipeline(OcrHandler ocrHandler, EdgeCropHandler edgeCropHandler, PythonOcrBridge pythonBridge, Request request)
|
|
{
|
|
var engine = request.Engine ?? "tesseract";
|
|
var isPythonEngine = engine is "easyocr" or "paddleocr";
|
|
var ep = request.EdgeParams ?? new EdgeOcrParams();
|
|
var cropParams = ep.Crop;
|
|
var ocrParams = ep.Ocr;
|
|
|
|
// Edge method only supports tophat (no reference frame for bgsub)
|
|
string preprocess = request.Preprocess ?? "tophat";
|
|
if (preprocess == "bgsub") preprocess = "tophat";
|
|
|
|
var sw = System.Diagnostics.Stopwatch.StartNew();
|
|
|
|
var cropResult = edgeCropHandler.EdgeCrop(request, cropParams);
|
|
if (cropResult == null)
|
|
return new OcrResponse { Text = "", Lines = [] };
|
|
|
|
var (cropped, fullCapture, region) = cropResult.Value;
|
|
using var _fullCapture = fullCapture;
|
|
|
|
// Preprocess
|
|
Bitmap processed;
|
|
if (preprocess == "tophat")
|
|
{
|
|
processed = ImagePreprocessor.PreprocessForOcr(cropped, kernelSize: ocrParams.KernelSize, upscale: ocrParams.Upscale);
|
|
}
|
|
else // "none"
|
|
{
|
|
processed = (Bitmap)cropped.Clone();
|
|
}
|
|
cropped.Dispose();
|
|
|
|
var cropMs = sw.ElapsedMilliseconds;
|
|
using var _processed = processed;
|
|
|
|
// Save debug images if path provided
|
|
if (!string.IsNullOrEmpty(request.Path))
|
|
{
|
|
var dir = Path.GetDirectoryName(request.Path);
|
|
if (!string.IsNullOrEmpty(dir) && !Directory.Exists(dir))
|
|
Directory.CreateDirectory(dir);
|
|
processed.Save(request.Path, ImageUtils.GetImageFormat(request.Path));
|
|
|
|
var ext = Path.GetExtension(request.Path);
|
|
var fullPath = Path.ChangeExtension(request.Path, ".full" + ext);
|
|
fullCapture.Save(fullPath, ImageUtils.GetImageFormat(fullPath));
|
|
}
|
|
|
|
// Route to engine
|
|
sw.Restart();
|
|
if (engine == "tesseract")
|
|
{
|
|
var result = ocrHandler.RunTesseractOnBitmap(processed, region, pad: cropParams.OcrPad, upscale: ocrParams.Upscale);
|
|
var ocrMs = sw.ElapsedMilliseconds;
|
|
Console.Error.WriteLine($" edge-ocr-pipeline: engine={engine} preprocess={preprocess} crop={cropMs}ms ocr={ocrMs}ms region={region.Width}x{region.Height}");
|
|
return result;
|
|
}
|
|
else // easyocr, paddleocr
|
|
{
|
|
var ocrResult = pythonBridge.OcrFromBitmap(processed, engine, ocrParams);
|
|
var ocrMs = sw.ElapsedMilliseconds;
|
|
Console.Error.WriteLine($" edge-ocr-pipeline: engine={engine} preprocess={preprocess} crop={cropMs}ms ocr={ocrMs}ms region={region.Width}x{region.Height}");
|
|
|
|
foreach (var line in ocrResult.Lines)
|
|
foreach (var word in line.Words)
|
|
{
|
|
word.X += region.X;
|
|
word.Y += region.Y;
|
|
}
|
|
|
|
return new DiffOcrResponse
|
|
{
|
|
Text = ocrResult.Text,
|
|
Lines = ocrResult.Lines,
|
|
Region = region,
|
|
};
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Coordinate-descent sweep over DiffCropParams to maximise avgIoU on crop.json ground truth.
|
|
/// </summary>
|
|
private static object HandleCropTune(OcrHandler ocrHandler, Request request)
|
|
{
|
|
var tessdataDir = Path.Combine(AppContext.BaseDirectory, "tessdata");
|
|
var casesPath = Path.Combine(tessdataDir, "crop.json");
|
|
if (!File.Exists(casesPath))
|
|
return new ErrorResponse($"crop.json not found at {casesPath}");
|
|
|
|
var json = File.ReadAllText(casesPath);
|
|
var cases = JsonSerializer.Deserialize<List<CropTestCase>>(json, JsonOptions);
|
|
if (cases == null || cases.Count == 0)
|
|
return new ErrorResponse("No test cases in crop.json");
|
|
|
|
// Preload valid test cases
|
|
var validCases = new List<(CropTestCase tc, string imagePath, string snapshotPath)>();
|
|
foreach (var tc in cases)
|
|
{
|
|
var imagePath = Path.Combine(tessdataDir, tc.Image);
|
|
var snapshotPath = Path.Combine(tessdataDir, tc.SnapshotImage);
|
|
if (File.Exists(imagePath) && File.Exists(snapshotPath))
|
|
validCases.Add((tc, imagePath, snapshotPath));
|
|
}
|
|
if (validCases.Count == 0)
|
|
return new ErrorResponse("No valid test cases found");
|
|
|
|
// Score function: compute avgIoU for a set of crop params
|
|
double ScoreCropParams(DiffCropParams cp)
|
|
{
|
|
double totalIoU = 0;
|
|
foreach (var (tc, imagePath, snapshotPath) in validCases)
|
|
{
|
|
ocrHandler.HandleSnapshot(new Request { File = snapshotPath });
|
|
var cropResult = ocrHandler.DiffCrop(new Request { File = imagePath }, cp);
|
|
if (cropResult == null) continue;
|
|
|
|
var (cropped, refCropped, current, region) = cropResult.Value;
|
|
cropped.Dispose(); refCropped.Dispose(); current.Dispose();
|
|
|
|
int ax1 = region.X, ay1 = region.Y;
|
|
int ax2 = region.X + region.Width, ay2 = region.Y + region.Height;
|
|
int ex1 = tc.TopLeft.X, ey1 = tc.TopLeft.Y, ex2 = tc.BottomRight.X, ey2 = tc.BottomRight.Y;
|
|
|
|
int ix1 = Math.Max(ax1, ex1), iy1 = Math.Max(ay1, ey1);
|
|
int ix2 = Math.Min(ax2, ex2), iy2 = Math.Min(ay2, ey2);
|
|
int iw = Math.Max(0, ix2 - ix1), ih = Math.Max(0, iy2 - iy1);
|
|
double intersection = (double)iw * ih;
|
|
double expW = ex2 - ex1, expH = ey2 - ey1;
|
|
double union = (double)region.Width * region.Height + expW * expH - intersection;
|
|
totalIoU += union > 0 ? intersection / union : 0;
|
|
}
|
|
return totalIoU / validCases.Count;
|
|
}
|
|
|
|
DiffCropParams CloneCrop(DiffCropParams p) => new()
|
|
{
|
|
DiffThresh = p.DiffThresh, RowThreshDiv = p.RowThreshDiv,
|
|
ColThreshDiv = p.ColThreshDiv, MaxGap = p.MaxGap,
|
|
TrimCutoff = p.TrimCutoff, OcrPad = p.OcrPad,
|
|
};
|
|
|
|
// Start from provided params or defaults
|
|
var best = request.Params?.Crop ?? new DiffCropParams();
|
|
double bestScore = ScoreCropParams(best);
|
|
int totalEvals = 1;
|
|
Console.Error.WriteLine($" crop-tune: baseline avgIoU={bestScore:F4} {best}");
|
|
|
|
var intSweeps = new (string Name, int[] Values, Action<DiffCropParams, int> Set)[]
|
|
{
|
|
("diffThresh", [5, 10, 15, 20, 25, 30, 40], (c, v) => c.DiffThresh = v),
|
|
("rowThreshDiv", [20, 30, 40, 50, 60, 80, 100], (c, v) => c.RowThreshDiv = v),
|
|
("colThreshDiv", [5, 8, 10, 12, 15, 20], (c, v) => c.ColThreshDiv = v),
|
|
("maxGap", [5, 10, 15, 20, 25, 30], (c, v) => c.MaxGap = v),
|
|
};
|
|
double[] trimValues = [0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.5];
|
|
|
|
const int maxRounds = 3;
|
|
for (int round = 0; round < maxRounds; round++)
|
|
{
|
|
bool improved = false;
|
|
Console.Error.WriteLine($"--- Round {round + 1} ---");
|
|
|
|
foreach (var (name, values, set) in intSweeps)
|
|
{
|
|
Console.Error.Write($" {name}: ");
|
|
int bestVal = 0;
|
|
double bestValScore = -1;
|
|
|
|
foreach (int v in values)
|
|
{
|
|
var trial = CloneCrop(best);
|
|
set(trial, v);
|
|
double score = ScoreCropParams(trial);
|
|
totalEvals++;
|
|
Console.Error.Write($"{v}={score:F4} ");
|
|
if (score > bestValScore) { bestValScore = score; bestVal = v; }
|
|
}
|
|
Console.Error.WriteLine();
|
|
|
|
if (bestValScore > bestScore)
|
|
{
|
|
set(best, bestVal);
|
|
bestScore = bestValScore;
|
|
improved = true;
|
|
Console.Error.WriteLine($" -> {name}={bestVal} avgIoU={bestScore:F4}");
|
|
}
|
|
}
|
|
|
|
// trimCutoff sweep
|
|
{
|
|
Console.Error.Write($" trimCutoff: ");
|
|
double bestTrim = best.TrimCutoff;
|
|
double bestTrimScore = bestScore;
|
|
|
|
foreach (double v in trimValues)
|
|
{
|
|
var trial = CloneCrop(best);
|
|
trial.TrimCutoff = v;
|
|
double score = ScoreCropParams(trial);
|
|
totalEvals++;
|
|
Console.Error.Write($"{v:F2}={score:F4} ");
|
|
if (score > bestTrimScore) { bestTrimScore = score; bestTrim = v; }
|
|
}
|
|
Console.Error.WriteLine();
|
|
|
|
if (bestTrimScore > bestScore)
|
|
{
|
|
best.TrimCutoff = bestTrim;
|
|
bestScore = bestTrimScore;
|
|
improved = true;
|
|
Console.Error.WriteLine($" -> trimCutoff={bestTrim:F2} avgIoU={bestScore:F4}");
|
|
}
|
|
}
|
|
|
|
Console.Error.WriteLine($" End of round {round + 1}: avgIoU={bestScore:F4} {best}");
|
|
if (!improved) break;
|
|
}
|
|
|
|
Console.Error.WriteLine($"\n crop-tune: BEST avgIoU={bestScore:F4} {best} evals={totalEvals}");
|
|
|
|
return new CropTuneResponse
|
|
{
|
|
BestAvgIoU = bestScore,
|
|
BestParams = best,
|
|
Iterations = totalEvals,
|
|
};
|
|
}
|
|
|
|
/// <summary>
|
|
/// Crop accuracy test: runs diff and/or edge crop on test cases from crop.json,
|
|
/// computes IoU and per-edge deltas vs ground truth.
|
|
/// </summary>
|
|
private static object HandleCropTest(OcrHandler ocrHandler, EdgeCropHandler edgeCropHandler, Request request)
|
|
{
|
|
var tessdataDir = Path.Combine(AppContext.BaseDirectory, "tessdata");
|
|
var casesPath = Path.Combine(tessdataDir, "crop.json");
|
|
if (!File.Exists(casesPath))
|
|
return new ErrorResponse($"crop.json not found at {casesPath}");
|
|
|
|
var json = File.ReadAllText(casesPath);
|
|
var cases = JsonSerializer.Deserialize<List<CropTestCase>>(json, JsonOptions);
|
|
if (cases == null || cases.Count == 0)
|
|
return new ErrorResponse("No test cases in crop.json");
|
|
|
|
var method = request.Engine ?? "diff"; // reuse engine field: "diff", "edge", or "both"
|
|
var diffParams = request.Params?.Crop ?? new DiffCropParams();
|
|
var edgeParams = request.EdgeParams?.Crop ?? new EdgeCropParams();
|
|
|
|
var results = new List<CropTestResult>();
|
|
|
|
foreach (var tc in cases)
|
|
{
|
|
var imagePath = Path.Combine(tessdataDir, tc.Image);
|
|
var snapshotPath = Path.Combine(tessdataDir, tc.SnapshotImage);
|
|
|
|
if (!File.Exists(imagePath) || !File.Exists(snapshotPath))
|
|
{
|
|
Console.Error.WriteLine($" crop-test: SKIP {tc.Id} — missing files");
|
|
results.Add(new CropTestResult { Id = tc.Id, IoU = 0 });
|
|
continue;
|
|
}
|
|
|
|
// Expected region
|
|
int expX = tc.TopLeft.X;
|
|
int expY = tc.TopLeft.Y;
|
|
int expW = tc.BottomRight.X - tc.TopLeft.X;
|
|
int expH = tc.BottomRight.Y - tc.TopLeft.Y;
|
|
var expected = new RegionRect { X = expX, Y = expY, Width = expW, Height = expH };
|
|
|
|
RegionRect? actual = null;
|
|
|
|
if (method is "diff" or "both")
|
|
{
|
|
// Load snapshot as reference
|
|
ocrHandler.HandleSnapshot(new Request { File = snapshotPath });
|
|
var cropResult = ocrHandler.DiffCrop(new Request { File = imagePath }, diffParams);
|
|
if (cropResult != null)
|
|
{
|
|
var (cropped, refCropped, current, region) = cropResult.Value;
|
|
actual = region;
|
|
cropped.Dispose();
|
|
refCropped.Dispose();
|
|
current.Dispose();
|
|
}
|
|
}
|
|
|
|
if (method == "edge")
|
|
{
|
|
// Default cursor to center of ground-truth bbox if not specified
|
|
int cx = tc.CursorX ?? (tc.TopLeft.X + tc.BottomRight.X) / 2;
|
|
int cy = tc.CursorY ?? (tc.TopLeft.Y + tc.BottomRight.Y) / 2;
|
|
var cropResult = edgeCropHandler.EdgeCrop(
|
|
new Request { File = imagePath, CursorX = cx, CursorY = cy },
|
|
edgeParams);
|
|
if (cropResult != null)
|
|
{
|
|
var (cropped, fullCapture, region) = cropResult.Value;
|
|
actual = region;
|
|
cropped.Dispose();
|
|
fullCapture.Dispose();
|
|
}
|
|
}
|
|
|
|
// Compute IoU and deltas
|
|
double iou = 0;
|
|
int dTop = 0, dLeft = 0, dRight = 0, dBottom = 0;
|
|
if (actual != null)
|
|
{
|
|
int ax1 = actual.X, ay1 = actual.Y;
|
|
int ax2 = actual.X + actual.Width, ay2 = actual.Y + actual.Height;
|
|
int ex1 = expX, ey1 = expY, ex2 = tc.BottomRight.X, ey2 = tc.BottomRight.Y;
|
|
|
|
int ix1 = Math.Max(ax1, ex1), iy1 = Math.Max(ay1, ey1);
|
|
int ix2 = Math.Min(ax2, ex2), iy2 = Math.Min(ay2, ey2);
|
|
int iw = Math.Max(0, ix2 - ix1), ih = Math.Max(0, iy2 - iy1);
|
|
double intersection = (double)iw * ih;
|
|
double union = (double)actual.Width * actual.Height + (double)expW * expH - intersection;
|
|
iou = union > 0 ? intersection / union : 0;
|
|
|
|
dTop = ay1 - ey1; // positive = crop starts too low
|
|
dLeft = ax1 - ex1; // positive = crop starts too far right
|
|
dRight = ax2 - ex2; // positive = crop ends too far right
|
|
dBottom = ay2 - ey2; // positive = crop ends too low
|
|
}
|
|
|
|
Console.Error.WriteLine($" crop-test #{tc.Id}: IoU={iou:F3} expected=({expX},{expY})+{expW}x{expH} actual={FormatRegion(actual)} delta T={dTop:+0;-#} L={dLeft:+0;-#} R={dRight:+0;-#} B={dBottom:+0;-#}");
|
|
|
|
results.Add(new CropTestResult
|
|
{
|
|
Id = tc.Id,
|
|
IoU = iou,
|
|
Expected = expected,
|
|
Actual = actual,
|
|
DeltaTop = dTop,
|
|
DeltaLeft = dLeft,
|
|
DeltaRight = dRight,
|
|
DeltaBottom = dBottom,
|
|
});
|
|
}
|
|
|
|
double avgIoU = results.Count > 0 ? results.Average(r => r.IoU) : 0;
|
|
Console.Error.WriteLine($" crop-test: method={method} avgIoU={avgIoU:F3} ({results.Count} cases)");
|
|
|
|
return new CropTestResponse
|
|
{
|
|
Method = method,
|
|
AvgIoU = avgIoU,
|
|
Results = results,
|
|
};
|
|
}
|
|
|
|
private static string FormatRegion(RegionRect? r) =>
|
|
r != null ? $"({r.X},{r.Y})+{r.Width}x{r.Height}" : "null";
|
|
|
|
private static void WriteResponse(object response)
|
|
{
|
|
var json = JsonSerializer.Serialize(response, JsonOptions);
|
|
Console.Out.WriteLine(json);
|
|
Console.Out.Flush();
|
|
}
|
|
}
|