namespace OcrDaemon; using System.Drawing; using System.Text.Json; using System.Text.Json.Serialization; using Tesseract; static class Daemon { private static readonly JsonSerializerOptions JsonOptions = new() { PropertyNamingPolicy = JsonNamingPolicy.CamelCase, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull, }; public static int Run() { ScreenCapture.InitDpiAwareness(); // Pre-create the Tesseract OCR engine (reused across all requests) var tessdataPath = Path.Combine(AppContext.BaseDirectory, "tessdata"); var tessLang = File.Exists(Path.Combine(tessdataPath, "poe2.traineddata")) ? "poe2" : "eng"; TesseractEngine tessEngine; try { tessEngine = new TesseractEngine(tessdataPath, tessLang, EngineMode.LstmOnly); tessEngine.DefaultPageSegMode = PageSegMode.SingleBlock; tessEngine.SetVariable("preserve_interword_spaces", "1"); var userWordsPath = Path.Combine(tessdataPath, $"{tessLang}.user-words"); var userPatternsPath = Path.Combine(tessdataPath, $"{tessLang}.user-patterns"); if (File.Exists(userWordsPath)) { tessEngine.SetVariable("user_words_file", userWordsPath); var lineCount = File.ReadAllLines(userWordsPath).Length; Console.Error.WriteLine($"Loaded user-words: {lineCount} words from {userWordsPath}"); } if (File.Exists(userPatternsPath)) { tessEngine.SetVariable("user_patterns_file", userPatternsPath); var lineCount = File.ReadAllLines(userPatternsPath).Length; Console.Error.WriteLine($"Loaded user-patterns: {lineCount} patterns from {userPatternsPath}"); } Console.Error.WriteLine($"Tesseract engine loaded with language: {tessLang}"); } catch (Exception ex) { WriteResponse(new ErrorResponse($"Failed to create Tesseract engine: {ex.Message}. Ensure tessdata/eng.traineddata exists.")); return 1; } // Signal ready WriteResponse(new ReadyResponse()); var ocrHandler = new OcrHandler(tessEngine); var gridHandler = new GridHandler(); var detectGridHandler = new DetectGridHandler(); var templateMatchHandler = new TemplateMatchHandler(); var edgeCropHandler = new EdgeCropHandler(); var pythonBridge = new PythonOcrBridge(); // Main loop: read one JSON line, handle, write one JSON line string? line; while ((line = Console.In.ReadLine()) != null) { line = line.Trim(); if (line.Length == 0) continue; try { var request = JsonSerializer.Deserialize(line, JsonOptions); if (request == null) { WriteResponse(new ErrorResponse("Failed to parse request")); continue; } object response = request.Cmd?.ToLowerInvariant() switch { "ocr" => HandleOcrPipeline(ocrHandler, pythonBridge, request), "screenshot" => ocrHandler.HandleScreenshot(request), "capture" => ocrHandler.HandleCapture(request), "snapshot" => ocrHandler.HandleSnapshot(request), "diff-ocr" => HandleDiffOcrPipeline(ocrHandler, pythonBridge, request), "edge-ocr" => HandleEdgeOcrPipeline(ocrHandler, edgeCropHandler, pythonBridge, request), "test" => ocrHandler.HandleTest(request), "tune" => ocrHandler.HandleTune(request), "crop-test" => HandleCropTest(ocrHandler, edgeCropHandler, request), "crop-tune" => HandleCropTune(ocrHandler, request), "grid" => gridHandler.HandleGrid(request), "detect-grid" => detectGridHandler.HandleDetectGrid(request), "match-template" => templateMatchHandler.HandleTemplateMatch(request), _ => new ErrorResponse($"Unknown command: {request.Cmd}"), }; WriteResponse(response); } catch (Exception ex) { WriteResponse(new ErrorResponse(ex.Message)); } } pythonBridge.Dispose(); return 0; } /// /// Unified OCR pipeline for full/region captures. /// Capture → optional preprocess → route to engine (tesseract / easyocr / paddleocr). /// private static object HandleOcrPipeline(OcrHandler ocrHandler, PythonOcrBridge pythonBridge, Request request) { var engine = request.Engine ?? "tesseract"; var preprocess = request.Preprocess ?? "none"; var kernelSize = request.Params?.Ocr.KernelSize ?? 41; // No preprocess + tesseract = original fast path if (engine == "tesseract" && preprocess == "none") return ocrHandler.HandleOcr(request); // Capture using var bitmap = ScreenCapture.CaptureOrLoad(request.File, request.Region); // Preprocess Bitmap processed; if (preprocess == "tophat") { processed = ImagePreprocessor.PreprocessForOcr(bitmap, kernelSize: kernelSize); } else if (preprocess == "bgsub") { return new ErrorResponse("bgsub preprocess requires a reference frame; use diff-ocr instead."); } else // "none" { processed = (Bitmap)bitmap.Clone(); } using var _processed = processed; // Route to engine if (engine == "tesseract") { var region = request.Region != null ? new RegionRect { X = request.Region.X, Y = request.Region.Y, Width = request.Region.Width, Height = request.Region.Height } : new RegionRect { X = 0, Y = 0, Width = processed.Width, Height = processed.Height }; return ocrHandler.RunTesseractOnBitmap(processed, region); } else // easyocr, paddleocr { return pythonBridge.OcrFromBitmap(processed, engine); } } /// /// Unified diff-OCR pipeline for tooltip detection. /// DiffCrop → preprocess (default=bgsub) → route to engine. /// private static object HandleDiffOcrPipeline(OcrHandler ocrHandler, PythonOcrBridge pythonBridge, Request request) { var engine = request.Engine ?? "tesseract"; var isPythonEngine = engine is "easyocr" or "paddleocr"; var p = request.Params ?? new DiffOcrParams(); var cropParams = p.Crop; var ocrParams = p.Ocr; if (request.Threshold > 0) cropParams.DiffThresh = request.Threshold; // Determine preprocess mode: explicit request.Preprocess > params.UseBackgroundSub > default "bgsub" string preprocess; if (request.Preprocess != null) preprocess = request.Preprocess; else if (request.Params != null) preprocess = ocrParams.UseBackgroundSub ? "bgsub" : "tophat"; else preprocess = "bgsub"; // No engine override + no preprocess override + no params = original Tesseract path if (engine == "tesseract" && request.Preprocess == null && request.Params == null) return ocrHandler.HandleDiffOcr(request); var sw = System.Diagnostics.Stopwatch.StartNew(); var cropResult = ocrHandler.DiffCrop(request, cropParams); if (cropResult == null) return new OcrResponse { Text = "", Lines = [] }; var (cropped, refCropped, current, region) = cropResult.Value; using var _current = current; // Preprocess — only sees ocrParams Bitmap processed; if (preprocess == "bgsub") { int upscale = isPythonEngine ? 1 : ocrParams.Upscale; processed = ImagePreprocessor.PreprocessWithBackgroundSub( cropped, refCropped, dimPercentile: ocrParams.DimPercentile, textThresh: ocrParams.TextThresh, upscale: upscale, softThreshold: ocrParams.SoftThreshold); } else if (preprocess == "tophat") { processed = ImagePreprocessor.PreprocessForOcr(cropped, kernelSize: ocrParams.KernelSize); } else // "none" { processed = (Bitmap)cropped.Clone(); } cropped.Dispose(); refCropped.Dispose(); var diffMs = sw.ElapsedMilliseconds; using var _processed = processed; // Save debug images if path provided if (!string.IsNullOrEmpty(request.Path)) { var dir = Path.GetDirectoryName(request.Path); if (!string.IsNullOrEmpty(dir) && !Directory.Exists(dir)) Directory.CreateDirectory(dir); // Save preprocessed crop processed.Save(request.Path, ImageUtils.GetImageFormat(request.Path)); var ext = Path.GetExtension(request.Path); var fullPath = Path.ChangeExtension(request.Path, ".full" + ext); current.Save(fullPath, ImageUtils.GetImageFormat(fullPath)); } // Route to engine sw.Restart(); if (engine == "tesseract") { var result = ocrHandler.RunTesseractOnBitmap(processed, region); var ocrMs = sw.ElapsedMilliseconds; Console.Error.WriteLine($" diff-ocr-pipeline: engine={engine} preprocess={preprocess} diff={diffMs}ms ocr={ocrMs}ms crop={region.Width}x{region.Height}"); return result; } else // easyocr, paddleocr { var ocrResult = pythonBridge.OcrFromBitmap(processed, engine, ocrParams); var ocrMs = sw.ElapsedMilliseconds; Console.Error.WriteLine($" diff-ocr-pipeline: engine={engine} preprocess={preprocess} diff={diffMs}ms ocr={ocrMs}ms crop={region.Width}x{region.Height}"); // Offset word coordinates to screen space foreach (var line in ocrResult.Lines) foreach (var word in line.Words) { word.X += region.X; word.Y += region.Y; } return new DiffOcrResponse { Text = ocrResult.Text, Lines = ocrResult.Lines, Region = region, }; } } /// /// Edge-based tooltip detection pipeline. /// EdgeCrop → preprocess (tophat only; bgsub falls back to tophat) → route to engine. /// private static object HandleEdgeOcrPipeline(OcrHandler ocrHandler, EdgeCropHandler edgeCropHandler, PythonOcrBridge pythonBridge, Request request) { var engine = request.Engine ?? "tesseract"; var isPythonEngine = engine is "easyocr" or "paddleocr"; var ep = request.EdgeParams ?? new EdgeOcrParams(); var cropParams = ep.Crop; var ocrParams = ep.Ocr; // Edge method only supports tophat (no reference frame for bgsub) string preprocess = request.Preprocess ?? "tophat"; if (preprocess == "bgsub") preprocess = "tophat"; var sw = System.Diagnostics.Stopwatch.StartNew(); var cropResult = edgeCropHandler.EdgeCrop(request, cropParams); if (cropResult == null) return new OcrResponse { Text = "", Lines = [] }; var (cropped, fullCapture, region) = cropResult.Value; using var _fullCapture = fullCapture; // Preprocess Bitmap processed; if (preprocess == "tophat") { processed = ImagePreprocessor.PreprocessForOcr(cropped, kernelSize: ocrParams.KernelSize, upscale: ocrParams.Upscale); } else // "none" { processed = (Bitmap)cropped.Clone(); } cropped.Dispose(); var cropMs = sw.ElapsedMilliseconds; using var _processed = processed; // Save debug images if path provided if (!string.IsNullOrEmpty(request.Path)) { var dir = Path.GetDirectoryName(request.Path); if (!string.IsNullOrEmpty(dir) && !Directory.Exists(dir)) Directory.CreateDirectory(dir); processed.Save(request.Path, ImageUtils.GetImageFormat(request.Path)); var ext = Path.GetExtension(request.Path); var fullPath = Path.ChangeExtension(request.Path, ".full" + ext); fullCapture.Save(fullPath, ImageUtils.GetImageFormat(fullPath)); } // Route to engine sw.Restart(); if (engine == "tesseract") { var result = ocrHandler.RunTesseractOnBitmap(processed, region, pad: cropParams.OcrPad, upscale: ocrParams.Upscale); var ocrMs = sw.ElapsedMilliseconds; Console.Error.WriteLine($" edge-ocr-pipeline: engine={engine} preprocess={preprocess} crop={cropMs}ms ocr={ocrMs}ms region={region.Width}x{region.Height}"); return result; } else // easyocr, paddleocr { var ocrResult = pythonBridge.OcrFromBitmap(processed, engine, ocrParams); var ocrMs = sw.ElapsedMilliseconds; Console.Error.WriteLine($" edge-ocr-pipeline: engine={engine} preprocess={preprocess} crop={cropMs}ms ocr={ocrMs}ms region={region.Width}x{region.Height}"); foreach (var line in ocrResult.Lines) foreach (var word in line.Words) { word.X += region.X; word.Y += region.Y; } return new DiffOcrResponse { Text = ocrResult.Text, Lines = ocrResult.Lines, Region = region, }; } } /// /// Coordinate-descent sweep over DiffCropParams to maximise avgIoU on crop.json ground truth. /// private static object HandleCropTune(OcrHandler ocrHandler, Request request) { var tessdataDir = Path.Combine(AppContext.BaseDirectory, "tessdata"); var casesPath = Path.Combine(tessdataDir, "crop.json"); if (!File.Exists(casesPath)) return new ErrorResponse($"crop.json not found at {casesPath}"); var json = File.ReadAllText(casesPath); var cases = JsonSerializer.Deserialize>(json, JsonOptions); if (cases == null || cases.Count == 0) return new ErrorResponse("No test cases in crop.json"); // Preload valid test cases var validCases = new List<(CropTestCase tc, string imagePath, string snapshotPath)>(); foreach (var tc in cases) { var imagePath = Path.Combine(tessdataDir, tc.Image); var snapshotPath = Path.Combine(tessdataDir, tc.SnapshotImage); if (File.Exists(imagePath) && File.Exists(snapshotPath)) validCases.Add((tc, imagePath, snapshotPath)); } if (validCases.Count == 0) return new ErrorResponse("No valid test cases found"); // Score function: compute avgIoU for a set of crop params double ScoreCropParams(DiffCropParams cp) { double totalIoU = 0; foreach (var (tc, imagePath, snapshotPath) in validCases) { ocrHandler.HandleSnapshot(new Request { File = snapshotPath }); var cropResult = ocrHandler.DiffCrop(new Request { File = imagePath }, cp); if (cropResult == null) continue; var (cropped, refCropped, current, region) = cropResult.Value; cropped.Dispose(); refCropped.Dispose(); current.Dispose(); int ax1 = region.X, ay1 = region.Y; int ax2 = region.X + region.Width, ay2 = region.Y + region.Height; int ex1 = tc.TopLeft.X, ey1 = tc.TopLeft.Y, ex2 = tc.BottomRight.X, ey2 = tc.BottomRight.Y; int ix1 = Math.Max(ax1, ex1), iy1 = Math.Max(ay1, ey1); int ix2 = Math.Min(ax2, ex2), iy2 = Math.Min(ay2, ey2); int iw = Math.Max(0, ix2 - ix1), ih = Math.Max(0, iy2 - iy1); double intersection = (double)iw * ih; double expW = ex2 - ex1, expH = ey2 - ey1; double union = (double)region.Width * region.Height + expW * expH - intersection; totalIoU += union > 0 ? intersection / union : 0; } return totalIoU / validCases.Count; } DiffCropParams CloneCrop(DiffCropParams p) => new() { DiffThresh = p.DiffThresh, RowThreshDiv = p.RowThreshDiv, ColThreshDiv = p.ColThreshDiv, MaxGap = p.MaxGap, TrimCutoff = p.TrimCutoff, OcrPad = p.OcrPad, }; // Start from provided params or defaults var best = request.Params?.Crop ?? new DiffCropParams(); double bestScore = ScoreCropParams(best); int totalEvals = 1; Console.Error.WriteLine($" crop-tune: baseline avgIoU={bestScore:F4} {best}"); var intSweeps = new (string Name, int[] Values, Action Set)[] { ("diffThresh", [5, 10, 15, 20, 25, 30, 40], (c, v) => c.DiffThresh = v), ("rowThreshDiv", [20, 30, 40, 50, 60, 80, 100], (c, v) => c.RowThreshDiv = v), ("colThreshDiv", [5, 8, 10, 12, 15, 20], (c, v) => c.ColThreshDiv = v), ("maxGap", [5, 10, 15, 20, 25, 30], (c, v) => c.MaxGap = v), }; double[] trimValues = [0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.5]; const int maxRounds = 3; for (int round = 0; round < maxRounds; round++) { bool improved = false; Console.Error.WriteLine($"--- Round {round + 1} ---"); foreach (var (name, values, set) in intSweeps) { Console.Error.Write($" {name}: "); int bestVal = 0; double bestValScore = -1; foreach (int v in values) { var trial = CloneCrop(best); set(trial, v); double score = ScoreCropParams(trial); totalEvals++; Console.Error.Write($"{v}={score:F4} "); if (score > bestValScore) { bestValScore = score; bestVal = v; } } Console.Error.WriteLine(); if (bestValScore > bestScore) { set(best, bestVal); bestScore = bestValScore; improved = true; Console.Error.WriteLine($" -> {name}={bestVal} avgIoU={bestScore:F4}"); } } // trimCutoff sweep { Console.Error.Write($" trimCutoff: "); double bestTrim = best.TrimCutoff; double bestTrimScore = bestScore; foreach (double v in trimValues) { var trial = CloneCrop(best); trial.TrimCutoff = v; double score = ScoreCropParams(trial); totalEvals++; Console.Error.Write($"{v:F2}={score:F4} "); if (score > bestTrimScore) { bestTrimScore = score; bestTrim = v; } } Console.Error.WriteLine(); if (bestTrimScore > bestScore) { best.TrimCutoff = bestTrim; bestScore = bestTrimScore; improved = true; Console.Error.WriteLine($" -> trimCutoff={bestTrim:F2} avgIoU={bestScore:F4}"); } } Console.Error.WriteLine($" End of round {round + 1}: avgIoU={bestScore:F4} {best}"); if (!improved) break; } Console.Error.WriteLine($"\n crop-tune: BEST avgIoU={bestScore:F4} {best} evals={totalEvals}"); return new CropTuneResponse { BestAvgIoU = bestScore, BestParams = best, Iterations = totalEvals, }; } /// /// Crop accuracy test: runs diff and/or edge crop on test cases from crop.json, /// computes IoU and per-edge deltas vs ground truth. /// private static object HandleCropTest(OcrHandler ocrHandler, EdgeCropHandler edgeCropHandler, Request request) { var tessdataDir = Path.Combine(AppContext.BaseDirectory, "tessdata"); var casesPath = Path.Combine(tessdataDir, "crop.json"); if (!File.Exists(casesPath)) return new ErrorResponse($"crop.json not found at {casesPath}"); var json = File.ReadAllText(casesPath); var cases = JsonSerializer.Deserialize>(json, JsonOptions); if (cases == null || cases.Count == 0) return new ErrorResponse("No test cases in crop.json"); var method = request.Engine ?? "diff"; // reuse engine field: "diff", "edge", or "both" var diffParams = request.Params?.Crop ?? new DiffCropParams(); var edgeParams = request.EdgeParams?.Crop ?? new EdgeCropParams(); var results = new List(); foreach (var tc in cases) { var imagePath = Path.Combine(tessdataDir, tc.Image); var snapshotPath = Path.Combine(tessdataDir, tc.SnapshotImage); if (!File.Exists(imagePath) || !File.Exists(snapshotPath)) { Console.Error.WriteLine($" crop-test: SKIP {tc.Id} — missing files"); results.Add(new CropTestResult { Id = tc.Id, IoU = 0 }); continue; } // Expected region int expX = tc.TopLeft.X; int expY = tc.TopLeft.Y; int expW = tc.BottomRight.X - tc.TopLeft.X; int expH = tc.BottomRight.Y - tc.TopLeft.Y; var expected = new RegionRect { X = expX, Y = expY, Width = expW, Height = expH }; RegionRect? actual = null; if (method is "diff" or "both") { // Load snapshot as reference ocrHandler.HandleSnapshot(new Request { File = snapshotPath }); var cropResult = ocrHandler.DiffCrop(new Request { File = imagePath }, diffParams); if (cropResult != null) { var (cropped, refCropped, current, region) = cropResult.Value; actual = region; cropped.Dispose(); refCropped.Dispose(); current.Dispose(); } } if (method == "edge") { // Default cursor to center of ground-truth bbox if not specified int cx = tc.CursorX ?? (tc.TopLeft.X + tc.BottomRight.X) / 2; int cy = tc.CursorY ?? (tc.TopLeft.Y + tc.BottomRight.Y) / 2; var cropResult = edgeCropHandler.EdgeCrop( new Request { File = imagePath, CursorX = cx, CursorY = cy }, edgeParams); if (cropResult != null) { var (cropped, fullCapture, region) = cropResult.Value; actual = region; cropped.Dispose(); fullCapture.Dispose(); } } // Compute IoU and deltas double iou = 0; int dTop = 0, dLeft = 0, dRight = 0, dBottom = 0; if (actual != null) { int ax1 = actual.X, ay1 = actual.Y; int ax2 = actual.X + actual.Width, ay2 = actual.Y + actual.Height; int ex1 = expX, ey1 = expY, ex2 = tc.BottomRight.X, ey2 = tc.BottomRight.Y; int ix1 = Math.Max(ax1, ex1), iy1 = Math.Max(ay1, ey1); int ix2 = Math.Min(ax2, ex2), iy2 = Math.Min(ay2, ey2); int iw = Math.Max(0, ix2 - ix1), ih = Math.Max(0, iy2 - iy1); double intersection = (double)iw * ih; double union = (double)actual.Width * actual.Height + (double)expW * expH - intersection; iou = union > 0 ? intersection / union : 0; dTop = ay1 - ey1; // positive = crop starts too low dLeft = ax1 - ex1; // positive = crop starts too far right dRight = ax2 - ex2; // positive = crop ends too far right dBottom = ay2 - ey2; // positive = crop ends too low } Console.Error.WriteLine($" crop-test #{tc.Id}: IoU={iou:F3} expected=({expX},{expY})+{expW}x{expH} actual={FormatRegion(actual)} delta T={dTop:+0;-#} L={dLeft:+0;-#} R={dRight:+0;-#} B={dBottom:+0;-#}"); results.Add(new CropTestResult { Id = tc.Id, IoU = iou, Expected = expected, Actual = actual, DeltaTop = dTop, DeltaLeft = dLeft, DeltaRight = dRight, DeltaBottom = dBottom, }); } double avgIoU = results.Count > 0 ? results.Average(r => r.IoU) : 0; Console.Error.WriteLine($" crop-test: method={method} avgIoU={avgIoU:F3} ({results.Count} cases)"); return new CropTestResponse { Method = method, AvgIoU = avgIoU, Results = results, }; } private static string FormatRegion(RegionRect? r) => r != null ? $"({r.X},{r.Y})+{r.Width}x{r.Height}" : "null"; private static void WriteResponse(object response) { var json = JsonSerializer.Serialize(response, JsonOptions); Console.Out.WriteLine(json); Console.Out.Flush(); } }