work on new crop

This commit is contained in:
Boki 2026-02-12 22:07:54 -05:00
parent 9845e7f9bf
commit a7fab55d44
22 changed files with 975 additions and 10 deletions

View file

@ -55,6 +55,7 @@ static class Daemon
var gridHandler = new GridHandler();
var detectGridHandler = new DetectGridHandler();
var templateMatchHandler = new TemplateMatchHandler();
var edgeCropHandler = new EdgeCropHandler();
var pythonBridge = new PythonOcrBridge();
// Main loop: read one JSON line, handle, write one JSON line
@ -80,8 +81,11 @@ static class Daemon
"capture" => ocrHandler.HandleCapture(request),
"snapshot" => ocrHandler.HandleSnapshot(request),
"diff-ocr" => HandleDiffOcrPipeline(ocrHandler, pythonBridge, request),
"edge-ocr" => HandleEdgeOcrPipeline(ocrHandler, edgeCropHandler, pythonBridge, request),
"test" => ocrHandler.HandleTest(request),
"tune" => ocrHandler.HandleTune(request),
"crop-test" => HandleCropTest(ocrHandler, edgeCropHandler, request),
"crop-tune" => HandleCropTune(ocrHandler, request),
"grid" => gridHandler.HandleGrid(request),
"detect-grid" => detectGridHandler.HandleDetectGrid(request),
"match-template" => templateMatchHandler.HandleTemplateMatch(request),
@ -251,6 +255,365 @@ static class Daemon
}
}
/// <summary>
/// Edge-based tooltip detection pipeline.
/// EdgeCrop → preprocess (tophat only; bgsub falls back to tophat) → route to engine.
/// </summary>
private static object HandleEdgeOcrPipeline(OcrHandler ocrHandler, EdgeCropHandler edgeCropHandler, PythonOcrBridge pythonBridge, Request request)
{
var engine = request.Engine ?? "tesseract";
var isPythonEngine = engine is "easyocr" or "paddleocr";
var ep = request.EdgeParams ?? new EdgeOcrParams();
var cropParams = ep.Crop;
var ocrParams = ep.Ocr;
// Edge method only supports tophat (no reference frame for bgsub)
string preprocess = request.Preprocess ?? "tophat";
if (preprocess == "bgsub") preprocess = "tophat";
var sw = System.Diagnostics.Stopwatch.StartNew();
var cropResult = edgeCropHandler.EdgeCrop(request, cropParams);
if (cropResult == null)
return new OcrResponse { Text = "", Lines = [] };
var (cropped, fullCapture, region) = cropResult.Value;
using var _fullCapture = fullCapture;
// Preprocess
Bitmap processed;
if (preprocess == "tophat")
{
processed = ImagePreprocessor.PreprocessForOcr(cropped, kernelSize: ocrParams.KernelSize, upscale: ocrParams.Upscale);
}
else // "none"
{
processed = (Bitmap)cropped.Clone();
}
cropped.Dispose();
var cropMs = sw.ElapsedMilliseconds;
using var _processed = processed;
// Save debug images if path provided
if (!string.IsNullOrEmpty(request.Path))
{
var dir = Path.GetDirectoryName(request.Path);
if (!string.IsNullOrEmpty(dir) && !Directory.Exists(dir))
Directory.CreateDirectory(dir);
processed.Save(request.Path, ImageUtils.GetImageFormat(request.Path));
var ext = Path.GetExtension(request.Path);
var fullPath = Path.ChangeExtension(request.Path, ".full" + ext);
fullCapture.Save(fullPath, ImageUtils.GetImageFormat(fullPath));
}
// Route to engine
sw.Restart();
if (engine == "tesseract")
{
var result = ocrHandler.RunTesseractOnBitmap(processed, region, pad: cropParams.OcrPad, upscale: ocrParams.Upscale);
var ocrMs = sw.ElapsedMilliseconds;
Console.Error.WriteLine($" edge-ocr-pipeline: engine={engine} preprocess={preprocess} crop={cropMs}ms ocr={ocrMs}ms region={region.Width}x{region.Height}");
return result;
}
else // easyocr, paddleocr
{
var ocrResult = pythonBridge.OcrFromBitmap(processed, engine, ocrParams);
var ocrMs = sw.ElapsedMilliseconds;
Console.Error.WriteLine($" edge-ocr-pipeline: engine={engine} preprocess={preprocess} crop={cropMs}ms ocr={ocrMs}ms region={region.Width}x{region.Height}");
foreach (var line in ocrResult.Lines)
foreach (var word in line.Words)
{
word.X += region.X;
word.Y += region.Y;
}
return new DiffOcrResponse
{
Text = ocrResult.Text,
Lines = ocrResult.Lines,
Region = region,
};
}
}
/// <summary>
/// Coordinate-descent sweep over DiffCropParams to maximise avgIoU on crop.json ground truth.
/// </summary>
private static object HandleCropTune(OcrHandler ocrHandler, Request request)
{
var tessdataDir = Path.Combine(AppContext.BaseDirectory, "tessdata");
var casesPath = Path.Combine(tessdataDir, "crop.json");
if (!File.Exists(casesPath))
return new ErrorResponse($"crop.json not found at {casesPath}");
var json = File.ReadAllText(casesPath);
var cases = JsonSerializer.Deserialize<List<CropTestCase>>(json, JsonOptions);
if (cases == null || cases.Count == 0)
return new ErrorResponse("No test cases in crop.json");
// Preload valid test cases
var validCases = new List<(CropTestCase tc, string imagePath, string snapshotPath)>();
foreach (var tc in cases)
{
var imagePath = Path.Combine(tessdataDir, tc.Image);
var snapshotPath = Path.Combine(tessdataDir, tc.SnapshotImage);
if (File.Exists(imagePath) && File.Exists(snapshotPath))
validCases.Add((tc, imagePath, snapshotPath));
}
if (validCases.Count == 0)
return new ErrorResponse("No valid test cases found");
// Score function: compute avgIoU for a set of crop params
double ScoreCropParams(DiffCropParams cp)
{
double totalIoU = 0;
foreach (var (tc, imagePath, snapshotPath) in validCases)
{
ocrHandler.HandleSnapshot(new Request { File = snapshotPath });
var cropResult = ocrHandler.DiffCrop(new Request { File = imagePath }, cp);
if (cropResult == null) continue;
var (cropped, refCropped, current, region) = cropResult.Value;
cropped.Dispose(); refCropped.Dispose(); current.Dispose();
int ax1 = region.X, ay1 = region.Y;
int ax2 = region.X + region.Width, ay2 = region.Y + region.Height;
int ex1 = tc.TopLeft.X, ey1 = tc.TopLeft.Y, ex2 = tc.BottomRight.X, ey2 = tc.BottomRight.Y;
int ix1 = Math.Max(ax1, ex1), iy1 = Math.Max(ay1, ey1);
int ix2 = Math.Min(ax2, ex2), iy2 = Math.Min(ay2, ey2);
int iw = Math.Max(0, ix2 - ix1), ih = Math.Max(0, iy2 - iy1);
double intersection = (double)iw * ih;
double expW = ex2 - ex1, expH = ey2 - ey1;
double union = (double)region.Width * region.Height + expW * expH - intersection;
totalIoU += union > 0 ? intersection / union : 0;
}
return totalIoU / validCases.Count;
}
DiffCropParams CloneCrop(DiffCropParams p) => new()
{
DiffThresh = p.DiffThresh, RowThreshDiv = p.RowThreshDiv,
ColThreshDiv = p.ColThreshDiv, MaxGap = p.MaxGap,
TrimCutoff = p.TrimCutoff, OcrPad = p.OcrPad,
};
// Start from provided params or defaults
var best = request.Params?.Crop ?? new DiffCropParams();
double bestScore = ScoreCropParams(best);
int totalEvals = 1;
Console.Error.WriteLine($" crop-tune: baseline avgIoU={bestScore:F4} {best}");
var intSweeps = new (string Name, int[] Values, Action<DiffCropParams, int> Set)[]
{
("diffThresh", [5, 10, 15, 20, 25, 30, 40], (c, v) => c.DiffThresh = v),
("rowThreshDiv", [20, 30, 40, 50, 60, 80, 100], (c, v) => c.RowThreshDiv = v),
("colThreshDiv", [5, 8, 10, 12, 15, 20], (c, v) => c.ColThreshDiv = v),
("maxGap", [5, 10, 15, 20, 25, 30], (c, v) => c.MaxGap = v),
};
double[] trimValues = [0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.5];
const int maxRounds = 3;
for (int round = 0; round < maxRounds; round++)
{
bool improved = false;
Console.Error.WriteLine($"--- Round {round + 1} ---");
foreach (var (name, values, set) in intSweeps)
{
Console.Error.Write($" {name}: ");
int bestVal = 0;
double bestValScore = -1;
foreach (int v in values)
{
var trial = CloneCrop(best);
set(trial, v);
double score = ScoreCropParams(trial);
totalEvals++;
Console.Error.Write($"{v}={score:F4} ");
if (score > bestValScore) { bestValScore = score; bestVal = v; }
}
Console.Error.WriteLine();
if (bestValScore > bestScore)
{
set(best, bestVal);
bestScore = bestValScore;
improved = true;
Console.Error.WriteLine($" -> {name}={bestVal} avgIoU={bestScore:F4}");
}
}
// trimCutoff sweep
{
Console.Error.Write($" trimCutoff: ");
double bestTrim = best.TrimCutoff;
double bestTrimScore = bestScore;
foreach (double v in trimValues)
{
var trial = CloneCrop(best);
trial.TrimCutoff = v;
double score = ScoreCropParams(trial);
totalEvals++;
Console.Error.Write($"{v:F2}={score:F4} ");
if (score > bestTrimScore) { bestTrimScore = score; bestTrim = v; }
}
Console.Error.WriteLine();
if (bestTrimScore > bestScore)
{
best.TrimCutoff = bestTrim;
bestScore = bestTrimScore;
improved = true;
Console.Error.WriteLine($" -> trimCutoff={bestTrim:F2} avgIoU={bestScore:F4}");
}
}
Console.Error.WriteLine($" End of round {round + 1}: avgIoU={bestScore:F4} {best}");
if (!improved) break;
}
Console.Error.WriteLine($"\n crop-tune: BEST avgIoU={bestScore:F4} {best} evals={totalEvals}");
return new CropTuneResponse
{
BestAvgIoU = bestScore,
BestParams = best,
Iterations = totalEvals,
};
}
/// <summary>
/// Crop accuracy test: runs diff and/or edge crop on test cases from crop.json,
/// computes IoU and per-edge deltas vs ground truth.
/// </summary>
private static object HandleCropTest(OcrHandler ocrHandler, EdgeCropHandler edgeCropHandler, Request request)
{
var tessdataDir = Path.Combine(AppContext.BaseDirectory, "tessdata");
var casesPath = Path.Combine(tessdataDir, "crop.json");
if (!File.Exists(casesPath))
return new ErrorResponse($"crop.json not found at {casesPath}");
var json = File.ReadAllText(casesPath);
var cases = JsonSerializer.Deserialize<List<CropTestCase>>(json, JsonOptions);
if (cases == null || cases.Count == 0)
return new ErrorResponse("No test cases in crop.json");
var method = request.Engine ?? "diff"; // reuse engine field: "diff", "edge", or "both"
var diffParams = request.Params?.Crop ?? new DiffCropParams();
var edgeParams = request.EdgeParams?.Crop ?? new EdgeCropParams();
var results = new List<CropTestResult>();
foreach (var tc in cases)
{
var imagePath = Path.Combine(tessdataDir, tc.Image);
var snapshotPath = Path.Combine(tessdataDir, tc.SnapshotImage);
if (!File.Exists(imagePath) || !File.Exists(snapshotPath))
{
Console.Error.WriteLine($" crop-test: SKIP {tc.Id} — missing files");
results.Add(new CropTestResult { Id = tc.Id, IoU = 0 });
continue;
}
// Expected region
int expX = tc.TopLeft.X;
int expY = tc.TopLeft.Y;
int expW = tc.BottomRight.X - tc.TopLeft.X;
int expH = tc.BottomRight.Y - tc.TopLeft.Y;
var expected = new RegionRect { X = expX, Y = expY, Width = expW, Height = expH };
RegionRect? actual = null;
if (method is "diff" or "both")
{
// Load snapshot as reference
ocrHandler.HandleSnapshot(new Request { File = snapshotPath });
var cropResult = ocrHandler.DiffCrop(new Request { File = imagePath }, diffParams);
if (cropResult != null)
{
var (cropped, refCropped, current, region) = cropResult.Value;
actual = region;
cropped.Dispose();
refCropped.Dispose();
current.Dispose();
}
}
if (method == "edge")
{
// Default cursor to center of ground-truth bbox if not specified
int cx = tc.CursorX ?? (tc.TopLeft.X + tc.BottomRight.X) / 2;
int cy = tc.CursorY ?? (tc.TopLeft.Y + tc.BottomRight.Y) / 2;
var cropResult = edgeCropHandler.EdgeCrop(
new Request { File = imagePath, CursorX = cx, CursorY = cy },
edgeParams);
if (cropResult != null)
{
var (cropped, fullCapture, region) = cropResult.Value;
actual = region;
cropped.Dispose();
fullCapture.Dispose();
}
}
// Compute IoU and deltas
double iou = 0;
int dTop = 0, dLeft = 0, dRight = 0, dBottom = 0;
if (actual != null)
{
int ax1 = actual.X, ay1 = actual.Y;
int ax2 = actual.X + actual.Width, ay2 = actual.Y + actual.Height;
int ex1 = expX, ey1 = expY, ex2 = tc.BottomRight.X, ey2 = tc.BottomRight.Y;
int ix1 = Math.Max(ax1, ex1), iy1 = Math.Max(ay1, ey1);
int ix2 = Math.Min(ax2, ex2), iy2 = Math.Min(ay2, ey2);
int iw = Math.Max(0, ix2 - ix1), ih = Math.Max(0, iy2 - iy1);
double intersection = (double)iw * ih;
double union = (double)actual.Width * actual.Height + (double)expW * expH - intersection;
iou = union > 0 ? intersection / union : 0;
dTop = ay1 - ey1; // positive = crop starts too low
dLeft = ax1 - ex1; // positive = crop starts too far right
dRight = ax2 - ex2; // positive = crop ends too far right
dBottom = ay2 - ey2; // positive = crop ends too low
}
Console.Error.WriteLine($" crop-test #{tc.Id}: IoU={iou:F3} expected=({expX},{expY})+{expW}x{expH} actual={FormatRegion(actual)} delta T={dTop:+0;-#} L={dLeft:+0;-#} R={dRight:+0;-#} B={dBottom:+0;-#}");
results.Add(new CropTestResult
{
Id = tc.Id,
IoU = iou,
Expected = expected,
Actual = actual,
DeltaTop = dTop,
DeltaLeft = dLeft,
DeltaRight = dRight,
DeltaBottom = dBottom,
});
}
double avgIoU = results.Count > 0 ? results.Average(r => r.IoU) : 0;
Console.Error.WriteLine($" crop-test: method={method} avgIoU={avgIoU:F3} ({results.Count} cases)");
return new CropTestResponse
{
Method = method,
AvgIoU = avgIoU,
Results = results,
};
}
private static string FormatRegion(RegionRect? r) =>
r != null ? $"({r.X},{r.Y})+{r.Width}x{r.Height}" : "null";
private static void WriteResponse(object response)
{
var json = JsonSerializer.Serialize(response, JsonOptions);

View file

@ -0,0 +1,205 @@
namespace OcrDaemon;
using System.Drawing;
using System.Drawing.Imaging;
using System.Runtime.InteropServices;
class EdgeCropHandler
{
[StructLayout(LayoutKind.Sequential)]
private struct POINT { public int X, Y; }
[DllImport("user32.dll")]
private static extern bool GetCursorPos(out POINT lpPoint);
public (Bitmap cropped, Bitmap fullCapture, RegionRect region)? EdgeCrop(Request req, EdgeCropParams p)
{
int cursorX, cursorY;
if (req.CursorX.HasValue && req.CursorY.HasValue)
{
cursorX = req.CursorX.Value;
cursorY = req.CursorY.Value;
}
else
{
GetCursorPos(out var pt);
cursorX = pt.X;
cursorY = pt.Y;
}
var fullCapture = ScreenCapture.CaptureOrLoad(req.File, null);
int w = fullCapture.Width;
int h = fullCapture.Height;
var bmpData = fullCapture.LockBits(new Rectangle(0, 0, w, h), ImageLockMode.ReadOnly, PixelFormat.Format32bppArgb);
byte[] px = new byte[bmpData.Stride * h];
Marshal.Copy(bmpData.Scan0, px, 0, px.Length);
fullCapture.UnlockBits(bmpData);
int stride = bmpData.Stride;
int darkThresh = p.DarkThresh;
int colGap = p.RunGapTolerance;
int maxGap = p.MaxGap;
// ── Phase 1: Per-row horizontal extent ──
// Scan left/right from cursorX per row. Gap tolerance bridges through text.
// Percentile-based filtering for robustness.
int bandHalf = p.MinDarkRun; // repurpose: half-height of horizontal scan band
int bandTop = Math.Max(0, cursorY - bandHalf);
int bandBot = Math.Min(h - 1, cursorY + bandHalf);
var leftExtents = new List<int>();
var rightExtents = new List<int>();
for (int y = bandTop; y <= bandBot; y++)
{
int rowOff = y * stride;
int ci = rowOff + cursorX * 4;
int cBright = (px[ci] + px[ci + 1] + px[ci + 2]) / 3;
if (cBright >= darkThresh) continue;
int leftEdge = cursorX;
int gap = 0;
for (int x = cursorX - 1; x >= 0; x--)
{
int i = rowOff + x * 4;
int brightness = (px[i] + px[i + 1] + px[i + 2]) / 3;
if (brightness < darkThresh) { leftEdge = x; gap = 0; }
else if (++gap > colGap) break;
}
int rightEdge = cursorX;
gap = 0;
for (int x = cursorX + 1; x < w; x++)
{
int i = rowOff + x * 4;
int brightness = (px[i] + px[i + 1] + px[i + 2]) / 3;
if (brightness < darkThresh) { rightEdge = x; gap = 0; }
else if (++gap > colGap) break;
}
leftExtents.Add(leftEdge);
rightExtents.Add(rightEdge);
}
if (leftExtents.Count < 10)
{
Console.Error.WriteLine($" edge-crop: too few dark rows ({leftExtents.Count})");
fullCapture.Dispose();
return null;
}
leftExtents.Sort();
rightExtents.Sort();
// Use RowThreshDiv/ColThreshDiv as percentile denominators
// e.g., RowThreshDiv=4 → 25th percentile for left, ColThreshDiv=4 → 75th for right
int leftPctIdx = leftExtents.Count / p.RowThreshDiv;
int rightPctIdx = rightExtents.Count * (p.ColThreshDiv - 1) / p.ColThreshDiv;
leftPctIdx = Math.Clamp(leftPctIdx, 0, leftExtents.Count - 1);
rightPctIdx = Math.Clamp(rightPctIdx, 0, rightExtents.Count - 1);
int bestColStart = leftExtents[leftPctIdx];
int bestColEnd = rightExtents[rightPctIdx];
Console.Error.WriteLine($" edge-crop: horizontal: left={bestColStart} right={bestColEnd} ({bestColEnd - bestColStart + 1}px) samples={leftExtents.Count} pctL={leftPctIdx}/{leftExtents.Count} pctR={rightPctIdx}/{rightExtents.Count}");
if (bestColEnd - bestColStart + 1 < 50)
{
Console.Error.WriteLine($" edge-crop: horizontal extent too small");
fullCapture.Dispose();
return null;
}
// ── Phase 2: Per-column vertical extent ──
int colBandHalf = (bestColEnd - bestColStart + 1) / 3;
int colBandLeft = Math.Max(bestColStart, cursorX - colBandHalf);
int colBandRight = Math.Min(bestColEnd, cursorX + colBandHalf);
var topExtents = new List<int>();
var bottomExtents = new List<int>();
// Asymmetric gap: larger upward to bridge header decorations (~30-40px bright)
int maxGapUp = maxGap * 3;
for (int x = colBandLeft; x <= colBandRight; x++)
{
int ci = cursorY * stride + x * 4;
int cBright = (px[ci] + px[ci + 1] + px[ci + 2]) / 3;
if (cBright >= darkThresh) continue;
int topEdge = cursorY;
int gap = 0;
for (int y = cursorY - 1; y >= 0; y--)
{
int i = y * stride + x * 4;
int brightness = (px[i] + px[i + 1] + px[i + 2]) / 3;
if (brightness < darkThresh) { topEdge = y; gap = 0; }
else if (++gap > maxGapUp) break;
}
int bottomEdge = cursorY;
gap = 0;
for (int y = cursorY + 1; y < h; y++)
{
int i = y * stride + x * 4;
int brightness = (px[i] + px[i + 1] + px[i + 2]) / 3;
if (brightness < darkThresh) { bottomEdge = y; gap = 0; }
else if (++gap > maxGap) break;
}
topExtents.Add(topEdge);
bottomExtents.Add(bottomEdge);
}
if (topExtents.Count < 10)
{
Console.Error.WriteLine($" edge-crop: too few dark columns ({topExtents.Count})");
fullCapture.Dispose();
return null;
}
topExtents.Sort();
bottomExtents.Sort();
int topPctIdx = topExtents.Count / p.RowThreshDiv;
int botPctIdx = topExtents.Count * (p.ColThreshDiv - 1) / p.ColThreshDiv;
topPctIdx = Math.Clamp(topPctIdx, 0, topExtents.Count - 1);
botPctIdx = Math.Clamp(botPctIdx, 0, bottomExtents.Count - 1);
int bestRowStart = topExtents[topPctIdx];
int bestRowEnd = bottomExtents[botPctIdx];
Console.Error.WriteLine($" edge-crop: vertical: top={bestRowStart} bottom={bestRowEnd} ({bestRowEnd - bestRowStart + 1}px) samples={topExtents.Count}");
if (bestRowEnd - bestRowStart + 1 < 50)
{
Console.Error.WriteLine($" edge-crop: vertical extent too small");
fullCapture.Dispose();
return null;
}
int minX = bestColStart;
int minY = bestRowStart;
int maxX = bestColEnd;
int maxY = bestRowEnd;
int rw = maxX - minX + 1;
int rh = maxY - minY + 1;
Console.Error.WriteLine($" edge-crop: result ({minX},{minY}) {rw}x{rh}");
if (rw < 50 || rh < 50)
{
Console.Error.WriteLine($" edge-crop: region too small ({rw}x{rh})");
fullCapture.Dispose();
return null;
}
var cropRect = new Rectangle(minX, minY, rw, rh);
var cropped = fullCapture.Clone(cropRect, PixelFormat.Format32bppArgb);
var region = new RegionRect { X = minX, Y = minY, Width = rw, Height = rh };
return (cropped, fullCapture, region);
}
}

View file

@ -48,6 +48,15 @@ class Request
[JsonPropertyName("params")]
public DiffOcrParams? Params { get; set; }
[JsonPropertyName("edgeParams")]
public EdgeOcrParams? EdgeParams { get; set; }
[JsonPropertyName("cursorX")]
public int? CursorX { get; set; }
[JsonPropertyName("cursorY")]
public int? CursorY { get; set; }
}
class RegionRect
@ -336,6 +345,47 @@ sealed class DiffOcrParams
public override string ToString() => $"[{Crop}] [{Ocr}]";
}
sealed class EdgeCropParams
{
[JsonPropertyName("darkThresh")]
public int DarkThresh { get; set; } = 40;
[JsonPropertyName("minDarkRun")]
public int MinDarkRun { get; set; } = 200;
[JsonPropertyName("runGapTolerance")]
public int RunGapTolerance { get; set; } = 15;
[JsonPropertyName("rowThreshDiv")]
public int RowThreshDiv { get; set; } = 40;
[JsonPropertyName("colThreshDiv")]
public int ColThreshDiv { get; set; } = 8;
[JsonPropertyName("maxGap")]
public int MaxGap { get; set; } = 15;
[JsonPropertyName("trimCutoff")]
public double TrimCutoff { get; set; } = 0.3;
[JsonPropertyName("ocrPad")]
public int OcrPad { get; set; } = 10;
public override string ToString() =>
$"darkThresh={DarkThresh} minRun={MinDarkRun} runGap={RunGapTolerance} maxGap={MaxGap} trimCutoff={TrimCutoff:F2} rowDiv={RowThreshDiv} colDiv={ColThreshDiv}";
}
sealed class EdgeOcrParams
{
[JsonPropertyName("crop")]
public EdgeCropParams Crop { get; set; } = new();
[JsonPropertyName("ocr")]
public OcrParams Ocr { get; set; } = new();
public override string ToString() => $"[{Crop}] [{Ocr}]";
}
class TestCase
{
[JsonPropertyName("id")]
@ -404,3 +454,95 @@ class TuneResponse
[JsonPropertyName("iterations")]
public int Iterations { get; set; }
}
// ── Crop test models ────────────────────────────────────────────────────────
class PointXY
{
[JsonPropertyName("x")]
public int X { get; set; }
[JsonPropertyName("y")]
public int Y { get; set; }
}
class CropTestCase
{
[JsonPropertyName("id")]
public string Id { get; set; } = "";
[JsonPropertyName("image")]
public string Image { get; set; } = "";
[JsonPropertyName("snapshotImage")]
public string SnapshotImage { get; set; } = "";
[JsonPropertyName("topLeft")]
public PointXY TopLeft { get; set; } = new();
[JsonPropertyName("bottomRight")]
public PointXY BottomRight { get; set; } = new();
[JsonPropertyName("cursorX")]
public int? CursorX { get; set; }
[JsonPropertyName("cursorY")]
public int? CursorY { get; set; }
}
class CropTestResult
{
[JsonPropertyName("id")]
public string Id { get; set; } = "";
[JsonPropertyName("iou")]
public double IoU { get; set; }
[JsonPropertyName("expected")]
public RegionRect Expected { get; set; } = new();
[JsonPropertyName("actual")]
public RegionRect? Actual { get; set; }
[JsonPropertyName("deltaTop")]
public int DeltaTop { get; set; }
[JsonPropertyName("deltaLeft")]
public int DeltaLeft { get; set; }
[JsonPropertyName("deltaRight")]
public int DeltaRight { get; set; }
[JsonPropertyName("deltaBottom")]
public int DeltaBottom { get; set; }
}
class CropTestResponse
{
[JsonPropertyName("ok")]
public bool Ok => true;
[JsonPropertyName("method")]
public string Method { get; set; } = "";
[JsonPropertyName("avgIoU")]
public double AvgIoU { get; set; }
[JsonPropertyName("results")]
public List<CropTestResult> Results { get; set; } = [];
}
class CropTuneResponse
{
[JsonPropertyName("ok")]
public bool Ok => true;
[JsonPropertyName("bestAvgIoU")]
public double BestAvgIoU { get; set; }
[JsonPropertyName("bestParams")]
public DiffCropParams BestParams { get; set; } = new();
[JsonPropertyName("iterations")]
public int Iterations { get; set; }
}

View file

@ -26,6 +26,9 @@
<None Update="tessdata\cases.json">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Update="tessdata\crop.json">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Update="tessdata\poe2.user-words" Condition="Exists('tessdata\poe2.user-words')">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>

View file

@ -269,6 +269,36 @@ class OcrHandler(TesseractEngine engine)
int maxX = Math.Min(bestColEnd, w - 1);
int maxY = Math.Min(bestRowEnd, h - 1);
// Boundary extension: scan outward from detected edges with a relaxed threshold
// to capture low-signal regions (e.g. ornamental tooltip headers)
int extRowThresh = Math.Max(1, rowThresh / 4);
int extColThresh = Math.Max(1, colThresh / 4);
int extTop = Math.Max(0, minY - maxGap);
for (int y = minY - 1; y >= extTop; y--)
{
if (rowCounts[y] >= extRowThresh) minY = y;
else break;
}
int extBottom = Math.Min(h - 1, maxY + maxGap);
for (int y = maxY + 1; y <= extBottom; y++)
{
if (rowCounts[y] >= extRowThresh) maxY = y;
else break;
}
int extLeft = Math.Max(0, minX - maxGap);
for (int x = minX - 1; x >= extLeft; x--)
{
if (colCounts[x] >= extColThresh) minX = x;
else break;
}
int extRight = Math.Min(w - 1, maxX + maxGap);
for (int x = maxX + 1; x <= extRight; x++)
{
if (colCounts[x] >= extColThresh) maxX = x;
else break;
}
// Trim low-density edges on both axes to avoid oversized crops.
int colSpan = maxX - minX + 1;
if (colSpan > 50)

View file

@ -91,7 +91,6 @@ static class TestRunner
continue;
}
var options = new OcrOptions();
List<string> actualSet;
if (!string.IsNullOrWhiteSpace(tc.BeforeImage))
@ -115,7 +114,6 @@ static class TestRunner
var response = ocrHandler.HandleDiffOcr(new Request
{
File = imagePath,
Ocr = options,
Path = savePath,
});
@ -136,7 +134,7 @@ static class TestRunner
else
{
using var bitmap = new Bitmap(imagePath);
using var processed = ImagePreprocessor.PreprocessForOcr(bitmap, options);
using var processed = ImagePreprocessor.PreprocessForOcr(bitmap);
if (!string.IsNullOrEmpty(savePreDir))
{
@ -146,7 +144,7 @@ static class TestRunner
using var pix = ImageUtils.BitmapToPix(processed);
using var page = engine.Process(pix);
var lines = ImageUtils.ExtractLinesFromPage(page, offsetX: 0, offsetY: 0, minConfidence: options.MinConfidence);
var lines = ImageUtils.ExtractLinesFromPage(page, offsetX: 0, offsetY: 0);
var actualLines = lines.Select(l => Normalize(l.Text)).Where(s => s.Length > 0).ToList();
var rawText = page.GetText() ?? string.Empty;

View file

@ -0,0 +1,93 @@
[
{
"id": "1",
"image": "images/tooltip1.png",
"snapshotImage": "images/tooltip-snapshot.png",
"topLeft": {
"x": 0,
"y": 84
},
"bottomRight": {
"x": 1185,
"y": 774
}
},
{
"id": "2",
"image": "images/tooltip2.png",
"snapshotImage": "images/tooltip-snapshot.png",
"topLeft": {
"x": 304,
"y": 0
},
"bottomRight": {
"x": 983,
"y": 470
}
},
{
"id": "3",
"image": "images/tooltip3.png",
"snapshotImage": "images/tooltip-snapshot.png",
"topLeft": {
"x": 473,
"y": 334
},
"bottomRight": {
"x": 1114,
"y": 914
}
},
{
"id": "4",
"image": "images/tooltip4.png",
"snapshotImage": "images/tooltip-snapshot.png",
"topLeft": {
"x": 209,
"y": 264
},
"bottomRight": {
"x": 1097,
"y": 915
}
},
{
"id": "5",
"image": "images/tooltip5.png",
"snapshotImage": "images/tooltip-snapshot.png",
"topLeft": {
"x": 763,
"y": 0
},
"bottomRight": {
"x": 1874,
"y": 560
}
},
{
"id": "6",
"image": "images/tooltip6.png",
"snapshotImage": "images/tooltip-snapshot.png",
"topLeft": {
"x": 1541,
"y": 154
},
"bottomRight": {
"x": 2348,
"y": 614
}
},
{
"id": "7",
"image": "images/tooltip7.png",
"snapshotImage": "images/tooltip-snapshot.png",
"topLeft": {
"x": 1921,
"y": 40
},
"bottomRight": {
"x": 2558,
"y": 370
}
}
]

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.3 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.1 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.3 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.2 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.2 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.9 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.1 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.3 MiB