diff --git a/crop-test-cmd.txt b/crop-test-cmd.txt new file mode 100644 index 0000000..09ac825 --- /dev/null +++ b/crop-test-cmd.txt @@ -0,0 +1 @@ +{"cmd":"crop-test","engine":"diff"} diff --git a/crop-test-stderr.txt b/crop-test-stderr.txt new file mode 100644 index 0000000..a6f68d4 --- /dev/null +++ b/crop-test-stderr.txt @@ -0,0 +1,2 @@ +{"ok":true,"ready":true} +{"ok":true,"method":"edge","avgIoU":0.7689866918165986,"results":[{"id":"1","iou":0.9028985507246376,"expected":{"x":0,"y":84,"width":1185,"height":690},"actual":{"x":0,"y":117,"width":1185,"height":623},"deltaTop":33,"deltaLeft":0,"deltaRight":0,"deltaBottom":-34},{"id":"2","iou":0.6861386480207926,"expected":{"x":304,"y":0,"width":679,"height":470},"actual":{"x":428,"y":40,"width":564,"height":474},"deltaTop":40,"deltaLeft":124,"deltaRight":9,"deltaBottom":44},{"id":"3","iou":0.8734518726233722,"expected":{"x":473,"y":334,"width":641,"height":580},"actual":{"x":472,"y":373,"width":609,"height":548},"deltaTop":39,"deltaLeft":-1,"deltaRight":-33,"deltaBottom":7},{"id":"4","iou":0.4827177898385173,"expected":{"x":209,"y":264,"width":888,"height":651},"actual":{"x":0,"y":294,"width":767,"height":634},"deltaTop":30,"deltaLeft":-209,"deltaRight":-330,"deltaBottom":13},{"id":"5","iou":0.8933684252502293,"expected":{"x":763,"y":0,"width":1111,"height":560},"actual":{"x":758,"y":39,"width":1080,"height":523},"deltaTop":39,"deltaLeft":-5,"deltaRight":-36,"deltaBottom":2},{"id":"6","iou":0.9159954398801851,"expected":{"x":1541,"y":154,"width":807,"height":460},"actual":{"x":1486,"y":157,"width":870,"height":460},"deltaTop":3,"deltaLeft":-55,"deltaRight":8,"deltaBottom":3},{"id":"7","iou":0.6283361163784564,"expected":{"x":1921,"y":40,"width":637,"height":330},"actual":{"x":1946,"y":72,"width":447,"height":302},"deltaTop":32,"deltaLeft":25,"deltaRight":-165,"deltaBottom":4}]} diff --git a/crop-test-stdout.txt b/crop-test-stdout.txt new file mode 100644 index 0000000..e69de29 diff --git a/src/game/OcrDaemon.ts b/src/game/OcrDaemon.ts index 396bf1e..780b0b3 100644 --- a/src/game/OcrDaemon.ts +++ b/src/game/OcrDaemon.ts @@ -104,6 +104,22 @@ export interface DiffOcrParams { ocr?: OcrParams; } +export type TooltipMethod = 'diff' | 'edge'; + +export interface EdgeCropParams { + cannyLow?: number; + cannyHigh?: number; + minLineLength?: number; + roiSize?: number; + densityThreshold?: number; + ocrPad?: number; +} + +export interface EdgeOcrParams { + crop?: EdgeCropParams; + ocr?: OcrParams; +} + interface DaemonRequest { cmd: string; region?: Region; @@ -116,6 +132,9 @@ interface DaemonRequest { engine?: string; preprocess?: string; params?: DiffOcrParams; + edgeParams?: EdgeOcrParams; + cursorX?: number; + cursorY?: number; } interface DaemonResponse { @@ -236,6 +255,24 @@ export class OcrDaemon { }; } + async edgeOcr(savePath?: string, region?: Region, engine?: OcrEngine, preprocess?: OcrPreprocess, edgeParams?: EdgeOcrParams, cursorX?: number, cursorY?: number): Promise { + const req: DaemonRequest = { cmd: 'edge-ocr' }; + if (savePath) req.path = savePath; + if (region) req.region = region; + if (engine && engine !== 'tesseract') req.engine = engine; + if (preprocess) req.preprocess = preprocess; + if (edgeParams && Object.keys(edgeParams).length > 0) req.edgeParams = edgeParams; + if (cursorX != null) req.cursorX = cursorX; + if (cursorY != null) req.cursorY = cursorY; + const timeout = (engine && engine !== 'tesseract') ? 120_000 : CAPTURE_TIMEOUT; + const resp = await this.sendWithRetry(req, timeout); + return { + text: resp.text ?? '', + lines: resp.lines ?? [], + region: resp.region, + }; + } + async saveScreenshot(path: string, region?: Region): Promise { const req: DaemonRequest = { cmd: 'screenshot', path }; if (region) req.region = region; diff --git a/src/game/ScreenReader.ts b/src/game/ScreenReader.ts index f2bd8dd..728e989 100644 --- a/src/game/ScreenReader.ts +++ b/src/game/ScreenReader.ts @@ -1,7 +1,7 @@ import { mkdir } from 'fs/promises'; import { join } from 'path'; import { logger } from '../util/logger.js'; -import { OcrDaemon, type OcrResponse, type OcrEngine, type OcrPreprocess, type DiffOcrParams, type DiffCropParams, type OcrParams, type DiffOcrResponse, type TemplateMatchResult } from './OcrDaemon.js'; +import { OcrDaemon, type OcrResponse, type OcrEngine, type OcrPreprocess, type DiffOcrParams, type DiffCropParams, type OcrParams, type DiffOcrResponse, type TemplateMatchResult, type TooltipMethod, type EdgeOcrParams } from './OcrDaemon.js'; import { GridReader, type GridLayout, type CellCoord } from './GridReader.js'; import type { Region } from '../types.js'; @@ -13,7 +13,9 @@ export interface OcrSettings { engine: OcrEngine; screenPreprocess: OcrPreprocess; tooltipPreprocess: OcrPreprocess; + tooltipMethod: TooltipMethod; tooltipParams: DiffOcrParams; + edgeParams: EdgeOcrParams; saveDebugImages: boolean; } @@ -24,10 +26,15 @@ export class ScreenReader { engine: 'easyocr', screenPreprocess: 'none', tooltipPreprocess: 'tophat', + tooltipMethod: 'diff', tooltipParams: { crop: { diffThresh: 10 }, ocr: { kernelSize: 21 }, }, + edgeParams: { + crop: {}, + ocr: { kernelSize: 21 }, + }, saveDebugImages: true, }; @@ -235,12 +242,16 @@ export class ScreenReader { // ── Snapshot / Diff-OCR (for tooltip reading) ────────────────────── async snapshot(): Promise { + if (this.settings.tooltipMethod === 'edge') return; // no reference frame needed await this.daemon.snapshot(); } async diffOcr(savePath?: string, region?: Region): Promise { - const { engine, tooltipPreprocess, tooltipParams } = this.settings; + const { engine, tooltipPreprocess, tooltipMethod, tooltipParams, edgeParams } = this.settings; const pp = tooltipPreprocess !== 'none' ? tooltipPreprocess : undefined; + if (tooltipMethod === 'edge') { + return this.daemon.edgeOcr(savePath, region, engine, pp, edgeParams); + } return this.daemon.diffOcr(savePath, region, engine, pp, tooltipParams); } diff --git a/src/server/index.html b/src/server/index.html index af798f3..fd04e32 100644 --- a/src/server/index.html +++ b/src/server/index.html @@ -581,6 +581,14 @@ +
+
Tooltip Method
+ +
+
Tooltip Preprocess
-
-
Crop Detection
+
+
Crop Detection (Diff)
@@ -608,6 +616,32 @@
+ +
OCR Processing
@@ -1160,7 +1194,24 @@ const screenPp = document.getElementById('ocrScreenPreprocess').value; document.getElementById('screenTophatParams').style.display = screenPp === 'tophat' ? '' : 'none'; - const tooltipPp = document.getElementById('ocrTooltipPreprocess').value; + const method = document.getElementById('ocrTooltipMethod').value; + const isEdge = method === 'edge'; + + // Show/hide method-specific crop params + document.getElementById('diffCropParams').style.display = isEdge ? 'none' : ''; + document.getElementById('edgeCropParams').style.display = isEdge ? '' : 'none'; + + // Disable bgsub when edge (no reference frame) + const ppSelect = document.getElementById('ocrTooltipPreprocess'); + const bgsubOption = ppSelect.querySelector('option[value="bgsub"]'); + if (isEdge) { + bgsubOption.disabled = true; + if (ppSelect.value === 'bgsub') ppSelect.value = 'tophat'; + } else { + bgsubOption.disabled = false; + } + + const tooltipPp = ppSelect.value; document.getElementById('tooltipBgsubParams').style.display = tooltipPp === 'bgsub' ? '' : 'none'; document.getElementById('tooltipTophatParams').style.display = tooltipPp === 'tophat' ? '' : 'none'; @@ -1175,11 +1226,20 @@ if (!data.ok) return; document.getElementById('ocrEngine').value = data.engine || 'easyocr'; document.getElementById('ocrScreenPreprocess').value = data.screenPreprocess || 'none'; + document.getElementById('ocrTooltipMethod').value = data.tooltipMethod || 'diff'; document.getElementById('ocrTooltipPreprocess').value = data.tooltipPreprocess || 'tophat'; document.getElementById('ocrSaveDebugImages').checked = data.saveDebugImages !== false; const tp = data.tooltipParams || {}; const crop = tp.crop || {}; const ocr = tp.ocr || {}; + // Edge params + const ep = data.edgeParams || {}; + const edgeCrop = ep.crop || {}; + document.getElementById('ocrCannyLow').value = edgeCrop.cannyLow ?? 50; + document.getElementById('ocrCannyHigh').value = edgeCrop.cannyHigh ?? 150; + document.getElementById('ocrMinLineLength').value = edgeCrop.minLineLength ?? 100; + document.getElementById('ocrRoiSize').value = edgeCrop.roiSize ?? 1400; + document.getElementById('ocrDensityThreshold').value = edgeCrop.densityThreshold ?? 0.15; document.getElementById('ocrDiffThresh').value = crop.diffThresh ?? 20; document.getElementById('ocrMaxGap').value = crop.maxGap ?? 20; document.getElementById('ocrTrimCutoff').value = crop.trimCutoff ?? 0.4; @@ -1237,11 +1297,29 @@ if (!isNaN(wt)) tooltipParams.ocr.widthThs = wt; } + const tooltipMethod = document.getElementById('ocrTooltipMethod').value; + + const edgeParams = { + crop: { + cannyLow: parseInt(document.getElementById('ocrCannyLow').value) || 50, + cannyHigh: parseInt(document.getElementById('ocrCannyHigh').value) || 150, + minLineLength: parseInt(document.getElementById('ocrMinLineLength').value) || 100, + roiSize: parseInt(document.getElementById('ocrRoiSize').value) || 1400, + densityThreshold: parseFloat(document.getElementById('ocrDensityThreshold').value) || 0.15, + }, + ocr: { + upscale: parseInt(document.getElementById('ocrUpscale').value) || 2, + kernelSize: parseInt(document.getElementById('ocrTooltipKernel').value) || 21, + }, + }; + const body = { engine, screenPreprocess: screenPp, + tooltipMethod, tooltipPreprocess: tooltipPp, tooltipParams, + edgeParams, saveDebugImages: document.getElementById('ocrSaveDebugImages').checked, }; diff --git a/src/server/routes/debug.ts b/src/server/routes/debug.ts index fe2bc65..a3b8acd 100644 --- a/src/server/routes/debug.ts +++ b/src/server/routes/debug.ts @@ -5,7 +5,7 @@ import { sleep } from '../../util/sleep.js'; import { GRID_LAYOUTS } from '../../game/GridReader.js'; import type { Bot } from '../../bot/Bot.js'; import type { Server } from '../Server.js'; -import type { OcrEngine, OcrPreprocess, DiffOcrParams } from '../../game/OcrDaemon.js'; +import type { OcrEngine, OcrPreprocess, DiffOcrParams, TooltipMethod, EdgeOcrParams } from '../../game/OcrDaemon.js'; import type { OcrSettings } from '../../game/ScreenReader.js'; export function debugRoutes(bot: Bot, server: Server): Router { @@ -30,7 +30,9 @@ export function debugRoutes(bot: Bot, server: Server): Router { if (body.engine && ['tesseract', 'easyocr', 'paddleocr'].includes(body.engine)) s.engine = body.engine; if (body.screenPreprocess && ['none', 'bgsub', 'tophat'].includes(body.screenPreprocess)) s.screenPreprocess = body.screenPreprocess; if (body.tooltipPreprocess && ['none', 'bgsub', 'tophat'].includes(body.tooltipPreprocess)) s.tooltipPreprocess = body.tooltipPreprocess; + if (body.tooltipMethod && ['diff', 'edge'].includes(body.tooltipMethod)) s.tooltipMethod = body.tooltipMethod; if (body.tooltipParams != null) s.tooltipParams = body.tooltipParams; + if (body.edgeParams != null) s.edgeParams = body.edgeParams; if (body.saveDebugImages != null) s.saveDebugImages = body.saveDebugImages; server.broadcastLog('info', `OCR settings updated: engine=${s.engine} screen=${s.screenPreprocess} tooltip=${s.tooltipPreprocess}`); res.json({ ok: true }); diff --git a/tools/OcrDaemon/Daemon.cs b/tools/OcrDaemon/Daemon.cs index c87b3b0..387813b 100644 --- a/tools/OcrDaemon/Daemon.cs +++ b/tools/OcrDaemon/Daemon.cs @@ -55,6 +55,7 @@ static class Daemon var gridHandler = new GridHandler(); var detectGridHandler = new DetectGridHandler(); var templateMatchHandler = new TemplateMatchHandler(); + var edgeCropHandler = new EdgeCropHandler(); var pythonBridge = new PythonOcrBridge(); // Main loop: read one JSON line, handle, write one JSON line @@ -80,8 +81,11 @@ static class Daemon "capture" => ocrHandler.HandleCapture(request), "snapshot" => ocrHandler.HandleSnapshot(request), "diff-ocr" => HandleDiffOcrPipeline(ocrHandler, pythonBridge, request), + "edge-ocr" => HandleEdgeOcrPipeline(ocrHandler, edgeCropHandler, pythonBridge, request), "test" => ocrHandler.HandleTest(request), "tune" => ocrHandler.HandleTune(request), + "crop-test" => HandleCropTest(ocrHandler, edgeCropHandler, request), + "crop-tune" => HandleCropTune(ocrHandler, request), "grid" => gridHandler.HandleGrid(request), "detect-grid" => detectGridHandler.HandleDetectGrid(request), "match-template" => templateMatchHandler.HandleTemplateMatch(request), @@ -251,6 +255,365 @@ static class Daemon } } + /// + /// Edge-based tooltip detection pipeline. + /// EdgeCrop → preprocess (tophat only; bgsub falls back to tophat) → route to engine. + /// + private static object HandleEdgeOcrPipeline(OcrHandler ocrHandler, EdgeCropHandler edgeCropHandler, PythonOcrBridge pythonBridge, Request request) + { + var engine = request.Engine ?? "tesseract"; + var isPythonEngine = engine is "easyocr" or "paddleocr"; + var ep = request.EdgeParams ?? new EdgeOcrParams(); + var cropParams = ep.Crop; + var ocrParams = ep.Ocr; + + // Edge method only supports tophat (no reference frame for bgsub) + string preprocess = request.Preprocess ?? "tophat"; + if (preprocess == "bgsub") preprocess = "tophat"; + + var sw = System.Diagnostics.Stopwatch.StartNew(); + + var cropResult = edgeCropHandler.EdgeCrop(request, cropParams); + if (cropResult == null) + return new OcrResponse { Text = "", Lines = [] }; + + var (cropped, fullCapture, region) = cropResult.Value; + using var _fullCapture = fullCapture; + + // Preprocess + Bitmap processed; + if (preprocess == "tophat") + { + processed = ImagePreprocessor.PreprocessForOcr(cropped, kernelSize: ocrParams.KernelSize, upscale: ocrParams.Upscale); + } + else // "none" + { + processed = (Bitmap)cropped.Clone(); + } + cropped.Dispose(); + + var cropMs = sw.ElapsedMilliseconds; + using var _processed = processed; + + // Save debug images if path provided + if (!string.IsNullOrEmpty(request.Path)) + { + var dir = Path.GetDirectoryName(request.Path); + if (!string.IsNullOrEmpty(dir) && !Directory.Exists(dir)) + Directory.CreateDirectory(dir); + processed.Save(request.Path, ImageUtils.GetImageFormat(request.Path)); + + var ext = Path.GetExtension(request.Path); + var fullPath = Path.ChangeExtension(request.Path, ".full" + ext); + fullCapture.Save(fullPath, ImageUtils.GetImageFormat(fullPath)); + } + + // Route to engine + sw.Restart(); + if (engine == "tesseract") + { + var result = ocrHandler.RunTesseractOnBitmap(processed, region, pad: cropParams.OcrPad, upscale: ocrParams.Upscale); + var ocrMs = sw.ElapsedMilliseconds; + Console.Error.WriteLine($" edge-ocr-pipeline: engine={engine} preprocess={preprocess} crop={cropMs}ms ocr={ocrMs}ms region={region.Width}x{region.Height}"); + return result; + } + else // easyocr, paddleocr + { + var ocrResult = pythonBridge.OcrFromBitmap(processed, engine, ocrParams); + var ocrMs = sw.ElapsedMilliseconds; + Console.Error.WriteLine($" edge-ocr-pipeline: engine={engine} preprocess={preprocess} crop={cropMs}ms ocr={ocrMs}ms region={region.Width}x{region.Height}"); + + foreach (var line in ocrResult.Lines) + foreach (var word in line.Words) + { + word.X += region.X; + word.Y += region.Y; + } + + return new DiffOcrResponse + { + Text = ocrResult.Text, + Lines = ocrResult.Lines, + Region = region, + }; + } + } + + /// + /// Coordinate-descent sweep over DiffCropParams to maximise avgIoU on crop.json ground truth. + /// + private static object HandleCropTune(OcrHandler ocrHandler, Request request) + { + var tessdataDir = Path.Combine(AppContext.BaseDirectory, "tessdata"); + var casesPath = Path.Combine(tessdataDir, "crop.json"); + if (!File.Exists(casesPath)) + return new ErrorResponse($"crop.json not found at {casesPath}"); + + var json = File.ReadAllText(casesPath); + var cases = JsonSerializer.Deserialize>(json, JsonOptions); + if (cases == null || cases.Count == 0) + return new ErrorResponse("No test cases in crop.json"); + + // Preload valid test cases + var validCases = new List<(CropTestCase tc, string imagePath, string snapshotPath)>(); + foreach (var tc in cases) + { + var imagePath = Path.Combine(tessdataDir, tc.Image); + var snapshotPath = Path.Combine(tessdataDir, tc.SnapshotImage); + if (File.Exists(imagePath) && File.Exists(snapshotPath)) + validCases.Add((tc, imagePath, snapshotPath)); + } + if (validCases.Count == 0) + return new ErrorResponse("No valid test cases found"); + + // Score function: compute avgIoU for a set of crop params + double ScoreCropParams(DiffCropParams cp) + { + double totalIoU = 0; + foreach (var (tc, imagePath, snapshotPath) in validCases) + { + ocrHandler.HandleSnapshot(new Request { File = snapshotPath }); + var cropResult = ocrHandler.DiffCrop(new Request { File = imagePath }, cp); + if (cropResult == null) continue; + + var (cropped, refCropped, current, region) = cropResult.Value; + cropped.Dispose(); refCropped.Dispose(); current.Dispose(); + + int ax1 = region.X, ay1 = region.Y; + int ax2 = region.X + region.Width, ay2 = region.Y + region.Height; + int ex1 = tc.TopLeft.X, ey1 = tc.TopLeft.Y, ex2 = tc.BottomRight.X, ey2 = tc.BottomRight.Y; + + int ix1 = Math.Max(ax1, ex1), iy1 = Math.Max(ay1, ey1); + int ix2 = Math.Min(ax2, ex2), iy2 = Math.Min(ay2, ey2); + int iw = Math.Max(0, ix2 - ix1), ih = Math.Max(0, iy2 - iy1); + double intersection = (double)iw * ih; + double expW = ex2 - ex1, expH = ey2 - ey1; + double union = (double)region.Width * region.Height + expW * expH - intersection; + totalIoU += union > 0 ? intersection / union : 0; + } + return totalIoU / validCases.Count; + } + + DiffCropParams CloneCrop(DiffCropParams p) => new() + { + DiffThresh = p.DiffThresh, RowThreshDiv = p.RowThreshDiv, + ColThreshDiv = p.ColThreshDiv, MaxGap = p.MaxGap, + TrimCutoff = p.TrimCutoff, OcrPad = p.OcrPad, + }; + + // Start from provided params or defaults + var best = request.Params?.Crop ?? new DiffCropParams(); + double bestScore = ScoreCropParams(best); + int totalEvals = 1; + Console.Error.WriteLine($" crop-tune: baseline avgIoU={bestScore:F4} {best}"); + + var intSweeps = new (string Name, int[] Values, Action Set)[] + { + ("diffThresh", [5, 10, 15, 20, 25, 30, 40], (c, v) => c.DiffThresh = v), + ("rowThreshDiv", [20, 30, 40, 50, 60, 80, 100], (c, v) => c.RowThreshDiv = v), + ("colThreshDiv", [5, 8, 10, 12, 15, 20], (c, v) => c.ColThreshDiv = v), + ("maxGap", [5, 10, 15, 20, 25, 30], (c, v) => c.MaxGap = v), + }; + double[] trimValues = [0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.5]; + + const int maxRounds = 3; + for (int round = 0; round < maxRounds; round++) + { + bool improved = false; + Console.Error.WriteLine($"--- Round {round + 1} ---"); + + foreach (var (name, values, set) in intSweeps) + { + Console.Error.Write($" {name}: "); + int bestVal = 0; + double bestValScore = -1; + + foreach (int v in values) + { + var trial = CloneCrop(best); + set(trial, v); + double score = ScoreCropParams(trial); + totalEvals++; + Console.Error.Write($"{v}={score:F4} "); + if (score > bestValScore) { bestValScore = score; bestVal = v; } + } + Console.Error.WriteLine(); + + if (bestValScore > bestScore) + { + set(best, bestVal); + bestScore = bestValScore; + improved = true; + Console.Error.WriteLine($" -> {name}={bestVal} avgIoU={bestScore:F4}"); + } + } + + // trimCutoff sweep + { + Console.Error.Write($" trimCutoff: "); + double bestTrim = best.TrimCutoff; + double bestTrimScore = bestScore; + + foreach (double v in trimValues) + { + var trial = CloneCrop(best); + trial.TrimCutoff = v; + double score = ScoreCropParams(trial); + totalEvals++; + Console.Error.Write($"{v:F2}={score:F4} "); + if (score > bestTrimScore) { bestTrimScore = score; bestTrim = v; } + } + Console.Error.WriteLine(); + + if (bestTrimScore > bestScore) + { + best.TrimCutoff = bestTrim; + bestScore = bestTrimScore; + improved = true; + Console.Error.WriteLine($" -> trimCutoff={bestTrim:F2} avgIoU={bestScore:F4}"); + } + } + + Console.Error.WriteLine($" End of round {round + 1}: avgIoU={bestScore:F4} {best}"); + if (!improved) break; + } + + Console.Error.WriteLine($"\n crop-tune: BEST avgIoU={bestScore:F4} {best} evals={totalEvals}"); + + return new CropTuneResponse + { + BestAvgIoU = bestScore, + BestParams = best, + Iterations = totalEvals, + }; + } + + /// + /// Crop accuracy test: runs diff and/or edge crop on test cases from crop.json, + /// computes IoU and per-edge deltas vs ground truth. + /// + private static object HandleCropTest(OcrHandler ocrHandler, EdgeCropHandler edgeCropHandler, Request request) + { + var tessdataDir = Path.Combine(AppContext.BaseDirectory, "tessdata"); + var casesPath = Path.Combine(tessdataDir, "crop.json"); + if (!File.Exists(casesPath)) + return new ErrorResponse($"crop.json not found at {casesPath}"); + + var json = File.ReadAllText(casesPath); + var cases = JsonSerializer.Deserialize>(json, JsonOptions); + if (cases == null || cases.Count == 0) + return new ErrorResponse("No test cases in crop.json"); + + var method = request.Engine ?? "diff"; // reuse engine field: "diff", "edge", or "both" + var diffParams = request.Params?.Crop ?? new DiffCropParams(); + var edgeParams = request.EdgeParams?.Crop ?? new EdgeCropParams(); + + var results = new List(); + + foreach (var tc in cases) + { + var imagePath = Path.Combine(tessdataDir, tc.Image); + var snapshotPath = Path.Combine(tessdataDir, tc.SnapshotImage); + + if (!File.Exists(imagePath) || !File.Exists(snapshotPath)) + { + Console.Error.WriteLine($" crop-test: SKIP {tc.Id} — missing files"); + results.Add(new CropTestResult { Id = tc.Id, IoU = 0 }); + continue; + } + + // Expected region + int expX = tc.TopLeft.X; + int expY = tc.TopLeft.Y; + int expW = tc.BottomRight.X - tc.TopLeft.X; + int expH = tc.BottomRight.Y - tc.TopLeft.Y; + var expected = new RegionRect { X = expX, Y = expY, Width = expW, Height = expH }; + + RegionRect? actual = null; + + if (method is "diff" or "both") + { + // Load snapshot as reference + ocrHandler.HandleSnapshot(new Request { File = snapshotPath }); + var cropResult = ocrHandler.DiffCrop(new Request { File = imagePath }, diffParams); + if (cropResult != null) + { + var (cropped, refCropped, current, region) = cropResult.Value; + actual = region; + cropped.Dispose(); + refCropped.Dispose(); + current.Dispose(); + } + } + + if (method == "edge") + { + // Default cursor to center of ground-truth bbox if not specified + int cx = tc.CursorX ?? (tc.TopLeft.X + tc.BottomRight.X) / 2; + int cy = tc.CursorY ?? (tc.TopLeft.Y + tc.BottomRight.Y) / 2; + var cropResult = edgeCropHandler.EdgeCrop( + new Request { File = imagePath, CursorX = cx, CursorY = cy }, + edgeParams); + if (cropResult != null) + { + var (cropped, fullCapture, region) = cropResult.Value; + actual = region; + cropped.Dispose(); + fullCapture.Dispose(); + } + } + + // Compute IoU and deltas + double iou = 0; + int dTop = 0, dLeft = 0, dRight = 0, dBottom = 0; + if (actual != null) + { + int ax1 = actual.X, ay1 = actual.Y; + int ax2 = actual.X + actual.Width, ay2 = actual.Y + actual.Height; + int ex1 = expX, ey1 = expY, ex2 = tc.BottomRight.X, ey2 = tc.BottomRight.Y; + + int ix1 = Math.Max(ax1, ex1), iy1 = Math.Max(ay1, ey1); + int ix2 = Math.Min(ax2, ex2), iy2 = Math.Min(ay2, ey2); + int iw = Math.Max(0, ix2 - ix1), ih = Math.Max(0, iy2 - iy1); + double intersection = (double)iw * ih; + double union = (double)actual.Width * actual.Height + (double)expW * expH - intersection; + iou = union > 0 ? intersection / union : 0; + + dTop = ay1 - ey1; // positive = crop starts too low + dLeft = ax1 - ex1; // positive = crop starts too far right + dRight = ax2 - ex2; // positive = crop ends too far right + dBottom = ay2 - ey2; // positive = crop ends too low + } + + Console.Error.WriteLine($" crop-test #{tc.Id}: IoU={iou:F3} expected=({expX},{expY})+{expW}x{expH} actual={FormatRegion(actual)} delta T={dTop:+0;-#} L={dLeft:+0;-#} R={dRight:+0;-#} B={dBottom:+0;-#}"); + + results.Add(new CropTestResult + { + Id = tc.Id, + IoU = iou, + Expected = expected, + Actual = actual, + DeltaTop = dTop, + DeltaLeft = dLeft, + DeltaRight = dRight, + DeltaBottom = dBottom, + }); + } + + double avgIoU = results.Count > 0 ? results.Average(r => r.IoU) : 0; + Console.Error.WriteLine($" crop-test: method={method} avgIoU={avgIoU:F3} ({results.Count} cases)"); + + return new CropTestResponse + { + Method = method, + AvgIoU = avgIoU, + Results = results, + }; + } + + private static string FormatRegion(RegionRect? r) => + r != null ? $"({r.X},{r.Y})+{r.Width}x{r.Height}" : "null"; + private static void WriteResponse(object response) { var json = JsonSerializer.Serialize(response, JsonOptions); diff --git a/tools/OcrDaemon/EdgeCropHandler.cs b/tools/OcrDaemon/EdgeCropHandler.cs new file mode 100644 index 0000000..28c2c89 --- /dev/null +++ b/tools/OcrDaemon/EdgeCropHandler.cs @@ -0,0 +1,205 @@ +namespace OcrDaemon; + +using System.Drawing; +using System.Drawing.Imaging; +using System.Runtime.InteropServices; + +class EdgeCropHandler +{ + [StructLayout(LayoutKind.Sequential)] + private struct POINT { public int X, Y; } + + [DllImport("user32.dll")] + private static extern bool GetCursorPos(out POINT lpPoint); + + public (Bitmap cropped, Bitmap fullCapture, RegionRect region)? EdgeCrop(Request req, EdgeCropParams p) + { + int cursorX, cursorY; + if (req.CursorX.HasValue && req.CursorY.HasValue) + { + cursorX = req.CursorX.Value; + cursorY = req.CursorY.Value; + } + else + { + GetCursorPos(out var pt); + cursorX = pt.X; + cursorY = pt.Y; + } + + var fullCapture = ScreenCapture.CaptureOrLoad(req.File, null); + int w = fullCapture.Width; + int h = fullCapture.Height; + + var bmpData = fullCapture.LockBits(new Rectangle(0, 0, w, h), ImageLockMode.ReadOnly, PixelFormat.Format32bppArgb); + byte[] px = new byte[bmpData.Stride * h]; + Marshal.Copy(bmpData.Scan0, px, 0, px.Length); + fullCapture.UnlockBits(bmpData); + int stride = bmpData.Stride; + + int darkThresh = p.DarkThresh; + int colGap = p.RunGapTolerance; + int maxGap = p.MaxGap; + + // ── Phase 1: Per-row horizontal extent ── + // Scan left/right from cursorX per row. Gap tolerance bridges through text. + // Percentile-based filtering for robustness. + int bandHalf = p.MinDarkRun; // repurpose: half-height of horizontal scan band + int bandTop = Math.Max(0, cursorY - bandHalf); + int bandBot = Math.Min(h - 1, cursorY + bandHalf); + + var leftExtents = new List(); + var rightExtents = new List(); + + for (int y = bandTop; y <= bandBot; y++) + { + int rowOff = y * stride; + int ci = rowOff + cursorX * 4; + int cBright = (px[ci] + px[ci + 1] + px[ci + 2]) / 3; + if (cBright >= darkThresh) continue; + + int leftEdge = cursorX; + int gap = 0; + for (int x = cursorX - 1; x >= 0; x--) + { + int i = rowOff + x * 4; + int brightness = (px[i] + px[i + 1] + px[i + 2]) / 3; + if (brightness < darkThresh) { leftEdge = x; gap = 0; } + else if (++gap > colGap) break; + } + + int rightEdge = cursorX; + gap = 0; + for (int x = cursorX + 1; x < w; x++) + { + int i = rowOff + x * 4; + int brightness = (px[i] + px[i + 1] + px[i + 2]) / 3; + if (brightness < darkThresh) { rightEdge = x; gap = 0; } + else if (++gap > colGap) break; + } + + leftExtents.Add(leftEdge); + rightExtents.Add(rightEdge); + } + + if (leftExtents.Count < 10) + { + Console.Error.WriteLine($" edge-crop: too few dark rows ({leftExtents.Count})"); + fullCapture.Dispose(); + return null; + } + + leftExtents.Sort(); + rightExtents.Sort(); + + // Use RowThreshDiv/ColThreshDiv as percentile denominators + // e.g., RowThreshDiv=4 → 25th percentile for left, ColThreshDiv=4 → 75th for right + int leftPctIdx = leftExtents.Count / p.RowThreshDiv; + int rightPctIdx = rightExtents.Count * (p.ColThreshDiv - 1) / p.ColThreshDiv; + leftPctIdx = Math.Clamp(leftPctIdx, 0, leftExtents.Count - 1); + rightPctIdx = Math.Clamp(rightPctIdx, 0, rightExtents.Count - 1); + + int bestColStart = leftExtents[leftPctIdx]; + int bestColEnd = rightExtents[rightPctIdx]; + + Console.Error.WriteLine($" edge-crop: horizontal: left={bestColStart} right={bestColEnd} ({bestColEnd - bestColStart + 1}px) samples={leftExtents.Count} pctL={leftPctIdx}/{leftExtents.Count} pctR={rightPctIdx}/{rightExtents.Count}"); + + if (bestColEnd - bestColStart + 1 < 50) + { + Console.Error.WriteLine($" edge-crop: horizontal extent too small"); + fullCapture.Dispose(); + return null; + } + + // ── Phase 2: Per-column vertical extent ── + int colBandHalf = (bestColEnd - bestColStart + 1) / 3; + int colBandLeft = Math.Max(bestColStart, cursorX - colBandHalf); + int colBandRight = Math.Min(bestColEnd, cursorX + colBandHalf); + + var topExtents = new List(); + var bottomExtents = new List(); + + // Asymmetric gap: larger upward to bridge header decorations (~30-40px bright) + int maxGapUp = maxGap * 3; + + for (int x = colBandLeft; x <= colBandRight; x++) + { + int ci = cursorY * stride + x * 4; + int cBright = (px[ci] + px[ci + 1] + px[ci + 2]) / 3; + if (cBright >= darkThresh) continue; + + int topEdge = cursorY; + int gap = 0; + for (int y = cursorY - 1; y >= 0; y--) + { + int i = y * stride + x * 4; + int brightness = (px[i] + px[i + 1] + px[i + 2]) / 3; + if (brightness < darkThresh) { topEdge = y; gap = 0; } + else if (++gap > maxGapUp) break; + } + + int bottomEdge = cursorY; + gap = 0; + for (int y = cursorY + 1; y < h; y++) + { + int i = y * stride + x * 4; + int brightness = (px[i] + px[i + 1] + px[i + 2]) / 3; + if (brightness < darkThresh) { bottomEdge = y; gap = 0; } + else if (++gap > maxGap) break; + } + + topExtents.Add(topEdge); + bottomExtents.Add(bottomEdge); + } + + if (topExtents.Count < 10) + { + Console.Error.WriteLine($" edge-crop: too few dark columns ({topExtents.Count})"); + fullCapture.Dispose(); + return null; + } + + topExtents.Sort(); + bottomExtents.Sort(); + + int topPctIdx = topExtents.Count / p.RowThreshDiv; + int botPctIdx = topExtents.Count * (p.ColThreshDiv - 1) / p.ColThreshDiv; + topPctIdx = Math.Clamp(topPctIdx, 0, topExtents.Count - 1); + botPctIdx = Math.Clamp(botPctIdx, 0, bottomExtents.Count - 1); + + int bestRowStart = topExtents[topPctIdx]; + int bestRowEnd = bottomExtents[botPctIdx]; + + Console.Error.WriteLine($" edge-crop: vertical: top={bestRowStart} bottom={bestRowEnd} ({bestRowEnd - bestRowStart + 1}px) samples={topExtents.Count}"); + + if (bestRowEnd - bestRowStart + 1 < 50) + { + Console.Error.WriteLine($" edge-crop: vertical extent too small"); + fullCapture.Dispose(); + return null; + } + + int minX = bestColStart; + int minY = bestRowStart; + int maxX = bestColEnd; + int maxY = bestRowEnd; + + int rw = maxX - minX + 1; + int rh = maxY - minY + 1; + + Console.Error.WriteLine($" edge-crop: result ({minX},{minY}) {rw}x{rh}"); + + if (rw < 50 || rh < 50) + { + Console.Error.WriteLine($" edge-crop: region too small ({rw}x{rh})"); + fullCapture.Dispose(); + return null; + } + + var cropRect = new Rectangle(minX, minY, rw, rh); + var cropped = fullCapture.Clone(cropRect, PixelFormat.Format32bppArgb); + var region = new RegionRect { X = minX, Y = minY, Width = rw, Height = rh }; + + return (cropped, fullCapture, region); + } +} diff --git a/tools/OcrDaemon/Models.cs b/tools/OcrDaemon/Models.cs index 4eee68d..910e07f 100644 --- a/tools/OcrDaemon/Models.cs +++ b/tools/OcrDaemon/Models.cs @@ -48,6 +48,15 @@ class Request [JsonPropertyName("params")] public DiffOcrParams? Params { get; set; } + + [JsonPropertyName("edgeParams")] + public EdgeOcrParams? EdgeParams { get; set; } + + [JsonPropertyName("cursorX")] + public int? CursorX { get; set; } + + [JsonPropertyName("cursorY")] + public int? CursorY { get; set; } } class RegionRect @@ -336,6 +345,47 @@ sealed class DiffOcrParams public override string ToString() => $"[{Crop}] [{Ocr}]"; } +sealed class EdgeCropParams +{ + [JsonPropertyName("darkThresh")] + public int DarkThresh { get; set; } = 40; + + [JsonPropertyName("minDarkRun")] + public int MinDarkRun { get; set; } = 200; + + [JsonPropertyName("runGapTolerance")] + public int RunGapTolerance { get; set; } = 15; + + [JsonPropertyName("rowThreshDiv")] + public int RowThreshDiv { get; set; } = 40; + + [JsonPropertyName("colThreshDiv")] + public int ColThreshDiv { get; set; } = 8; + + [JsonPropertyName("maxGap")] + public int MaxGap { get; set; } = 15; + + [JsonPropertyName("trimCutoff")] + public double TrimCutoff { get; set; } = 0.3; + + [JsonPropertyName("ocrPad")] + public int OcrPad { get; set; } = 10; + + public override string ToString() => + $"darkThresh={DarkThresh} minRun={MinDarkRun} runGap={RunGapTolerance} maxGap={MaxGap} trimCutoff={TrimCutoff:F2} rowDiv={RowThreshDiv} colDiv={ColThreshDiv}"; +} + +sealed class EdgeOcrParams +{ + [JsonPropertyName("crop")] + public EdgeCropParams Crop { get; set; } = new(); + + [JsonPropertyName("ocr")] + public OcrParams Ocr { get; set; } = new(); + + public override string ToString() => $"[{Crop}] [{Ocr}]"; +} + class TestCase { [JsonPropertyName("id")] @@ -404,3 +454,95 @@ class TuneResponse [JsonPropertyName("iterations")] public int Iterations { get; set; } } + +// ── Crop test models ──────────────────────────────────────────────────────── + +class PointXY +{ + [JsonPropertyName("x")] + public int X { get; set; } + + [JsonPropertyName("y")] + public int Y { get; set; } +} + +class CropTestCase +{ + [JsonPropertyName("id")] + public string Id { get; set; } = ""; + + [JsonPropertyName("image")] + public string Image { get; set; } = ""; + + [JsonPropertyName("snapshotImage")] + public string SnapshotImage { get; set; } = ""; + + [JsonPropertyName("topLeft")] + public PointXY TopLeft { get; set; } = new(); + + [JsonPropertyName("bottomRight")] + public PointXY BottomRight { get; set; } = new(); + + [JsonPropertyName("cursorX")] + public int? CursorX { get; set; } + + [JsonPropertyName("cursorY")] + public int? CursorY { get; set; } +} + +class CropTestResult +{ + [JsonPropertyName("id")] + public string Id { get; set; } = ""; + + [JsonPropertyName("iou")] + public double IoU { get; set; } + + [JsonPropertyName("expected")] + public RegionRect Expected { get; set; } = new(); + + [JsonPropertyName("actual")] + public RegionRect? Actual { get; set; } + + [JsonPropertyName("deltaTop")] + public int DeltaTop { get; set; } + + [JsonPropertyName("deltaLeft")] + public int DeltaLeft { get; set; } + + [JsonPropertyName("deltaRight")] + public int DeltaRight { get; set; } + + [JsonPropertyName("deltaBottom")] + public int DeltaBottom { get; set; } +} + +class CropTestResponse +{ + [JsonPropertyName("ok")] + public bool Ok => true; + + [JsonPropertyName("method")] + public string Method { get; set; } = ""; + + [JsonPropertyName("avgIoU")] + public double AvgIoU { get; set; } + + [JsonPropertyName("results")] + public List Results { get; set; } = []; +} + +class CropTuneResponse +{ + [JsonPropertyName("ok")] + public bool Ok => true; + + [JsonPropertyName("bestAvgIoU")] + public double BestAvgIoU { get; set; } + + [JsonPropertyName("bestParams")] + public DiffCropParams BestParams { get; set; } = new(); + + [JsonPropertyName("iterations")] + public int Iterations { get; set; } +} diff --git a/tools/OcrDaemon/OcrDaemon.csproj b/tools/OcrDaemon/OcrDaemon.csproj index 2bc7b1d..848b63c 100644 --- a/tools/OcrDaemon/OcrDaemon.csproj +++ b/tools/OcrDaemon/OcrDaemon.csproj @@ -26,6 +26,9 @@ PreserveNewest + + PreserveNewest + PreserveNewest diff --git a/tools/OcrDaemon/OcrHandler.cs b/tools/OcrDaemon/OcrHandler.cs index 9b2be98..f9d8d85 100644 --- a/tools/OcrDaemon/OcrHandler.cs +++ b/tools/OcrDaemon/OcrHandler.cs @@ -269,6 +269,36 @@ class OcrHandler(TesseractEngine engine) int maxX = Math.Min(bestColEnd, w - 1); int maxY = Math.Min(bestRowEnd, h - 1); + // Boundary extension: scan outward from detected edges with a relaxed threshold + // to capture low-signal regions (e.g. ornamental tooltip headers) + int extRowThresh = Math.Max(1, rowThresh / 4); + int extColThresh = Math.Max(1, colThresh / 4); + + int extTop = Math.Max(0, minY - maxGap); + for (int y = minY - 1; y >= extTop; y--) + { + if (rowCounts[y] >= extRowThresh) minY = y; + else break; + } + int extBottom = Math.Min(h - 1, maxY + maxGap); + for (int y = maxY + 1; y <= extBottom; y++) + { + if (rowCounts[y] >= extRowThresh) maxY = y; + else break; + } + int extLeft = Math.Max(0, minX - maxGap); + for (int x = minX - 1; x >= extLeft; x--) + { + if (colCounts[x] >= extColThresh) minX = x; + else break; + } + int extRight = Math.Min(w - 1, maxX + maxGap); + for (int x = maxX + 1; x <= extRight; x++) + { + if (colCounts[x] >= extColThresh) maxX = x; + else break; + } + // Trim low-density edges on both axes to avoid oversized crops. int colSpan = maxX - minX + 1; if (colSpan > 50) diff --git a/tools/OcrDaemon/TestRunner.cs b/tools/OcrDaemon/TestRunner.cs index c8171a3..7f22edc 100644 --- a/tools/OcrDaemon/TestRunner.cs +++ b/tools/OcrDaemon/TestRunner.cs @@ -91,7 +91,6 @@ static class TestRunner continue; } - var options = new OcrOptions(); List actualSet; if (!string.IsNullOrWhiteSpace(tc.BeforeImage)) @@ -115,7 +114,6 @@ static class TestRunner var response = ocrHandler.HandleDiffOcr(new Request { File = imagePath, - Ocr = options, Path = savePath, }); @@ -136,7 +134,7 @@ static class TestRunner else { using var bitmap = new Bitmap(imagePath); - using var processed = ImagePreprocessor.PreprocessForOcr(bitmap, options); + using var processed = ImagePreprocessor.PreprocessForOcr(bitmap); if (!string.IsNullOrEmpty(savePreDir)) { @@ -146,7 +144,7 @@ static class TestRunner using var pix = ImageUtils.BitmapToPix(processed); using var page = engine.Process(pix); - var lines = ImageUtils.ExtractLinesFromPage(page, offsetX: 0, offsetY: 0, minConfidence: options.MinConfidence); + var lines = ImageUtils.ExtractLinesFromPage(page, offsetX: 0, offsetY: 0); var actualLines = lines.Select(l => Normalize(l.Text)).Where(s => s.Length > 0).ToList(); var rawText = page.GetText() ?? string.Empty; diff --git a/tools/OcrDaemon/tessdata/crop.json b/tools/OcrDaemon/tessdata/crop.json new file mode 100644 index 0000000..ee1e3fb --- /dev/null +++ b/tools/OcrDaemon/tessdata/crop.json @@ -0,0 +1,93 @@ +[ + { + "id": "1", + "image": "images/tooltip1.png", + "snapshotImage": "images/tooltip-snapshot.png", + "topLeft": { + "x": 0, + "y": 84 + }, + "bottomRight": { + "x": 1185, + "y": 774 + } + }, + { + "id": "2", + "image": "images/tooltip2.png", + "snapshotImage": "images/tooltip-snapshot.png", + "topLeft": { + "x": 304, + "y": 0 + }, + "bottomRight": { + "x": 983, + "y": 470 + } + }, + { + "id": "3", + "image": "images/tooltip3.png", + "snapshotImage": "images/tooltip-snapshot.png", + "topLeft": { + "x": 473, + "y": 334 + }, + "bottomRight": { + "x": 1114, + "y": 914 + } + }, + { + "id": "4", + "image": "images/tooltip4.png", + "snapshotImage": "images/tooltip-snapshot.png", + "topLeft": { + "x": 209, + "y": 264 + }, + "bottomRight": { + "x": 1097, + "y": 915 + } + }, + { + "id": "5", + "image": "images/tooltip5.png", + "snapshotImage": "images/tooltip-snapshot.png", + "topLeft": { + "x": 763, + "y": 0 + }, + "bottomRight": { + "x": 1874, + "y": 560 + } + }, + { + "id": "6", + "image": "images/tooltip6.png", + "snapshotImage": "images/tooltip-snapshot.png", + "topLeft": { + "x": 1541, + "y": 154 + }, + "bottomRight": { + "x": 2348, + "y": 614 + } + }, + { + "id": "7", + "image": "images/tooltip7.png", + "snapshotImage": "images/tooltip-snapshot.png", + "topLeft": { + "x": 1921, + "y": 40 + }, + "bottomRight": { + "x": 2558, + "y": 370 + } + } +] \ No newline at end of file diff --git a/tools/OcrDaemon/tessdata/images/tooltip-snapshot.png b/tools/OcrDaemon/tessdata/images/tooltip-snapshot.png new file mode 100644 index 0000000..efea45b Binary files /dev/null and b/tools/OcrDaemon/tessdata/images/tooltip-snapshot.png differ diff --git a/tools/OcrDaemon/tessdata/images/tooltip1.png b/tools/OcrDaemon/tessdata/images/tooltip1.png new file mode 100644 index 0000000..87e7b8f Binary files /dev/null and b/tools/OcrDaemon/tessdata/images/tooltip1.png differ diff --git a/tools/OcrDaemon/tessdata/images/tooltip2.png b/tools/OcrDaemon/tessdata/images/tooltip2.png new file mode 100644 index 0000000..258cee7 Binary files /dev/null and b/tools/OcrDaemon/tessdata/images/tooltip2.png differ diff --git a/tools/OcrDaemon/tessdata/images/tooltip3.png b/tools/OcrDaemon/tessdata/images/tooltip3.png new file mode 100644 index 0000000..fca2916 Binary files /dev/null and b/tools/OcrDaemon/tessdata/images/tooltip3.png differ diff --git a/tools/OcrDaemon/tessdata/images/tooltip4.png b/tools/OcrDaemon/tessdata/images/tooltip4.png new file mode 100644 index 0000000..b0a1de4 Binary files /dev/null and b/tools/OcrDaemon/tessdata/images/tooltip4.png differ diff --git a/tools/OcrDaemon/tessdata/images/tooltip5.png b/tools/OcrDaemon/tessdata/images/tooltip5.png new file mode 100644 index 0000000..76911a6 Binary files /dev/null and b/tools/OcrDaemon/tessdata/images/tooltip5.png differ diff --git a/tools/OcrDaemon/tessdata/images/tooltip6.png b/tools/OcrDaemon/tessdata/images/tooltip6.png new file mode 100644 index 0000000..3cb1780 Binary files /dev/null and b/tools/OcrDaemon/tessdata/images/tooltip6.png differ diff --git a/tools/OcrDaemon/tessdata/images/tooltip7.png b/tools/OcrDaemon/tessdata/images/tooltip7.png new file mode 100644 index 0000000..2770ba5 Binary files /dev/null and b/tools/OcrDaemon/tessdata/images/tooltip7.png differ