namespace OcrDaemon; using System.Drawing; using System.Text.Json; using System.Text.Json.Serialization; using Tesseract; static class Daemon { private static readonly JsonSerializerOptions JsonOptions = new() { PropertyNamingPolicy = JsonNamingPolicy.CamelCase, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull, }; public static int Run() { ScreenCapture.InitDpiAwareness(); // Pre-create the Tesseract OCR engine (reused across all requests) var tessdataPath = Path.Combine(AppContext.BaseDirectory, "tessdata"); var tessLang = File.Exists(Path.Combine(tessdataPath, "poe2.traineddata")) ? "poe2" : "eng"; TesseractEngine tessEngine; try { tessEngine = new TesseractEngine(tessdataPath, tessLang, EngineMode.LstmOnly); tessEngine.DefaultPageSegMode = PageSegMode.SingleBlock; tessEngine.SetVariable("preserve_interword_spaces", "1"); var userWordsPath = Path.Combine(tessdataPath, $"{tessLang}.user-words"); var userPatternsPath = Path.Combine(tessdataPath, $"{tessLang}.user-patterns"); if (File.Exists(userWordsPath)) { tessEngine.SetVariable("user_words_file", userWordsPath); var lineCount = File.ReadAllLines(userWordsPath).Length; Console.Error.WriteLine($"Loaded user-words: {lineCount} words from {userWordsPath}"); } if (File.Exists(userPatternsPath)) { tessEngine.SetVariable("user_patterns_file", userPatternsPath); var lineCount = File.ReadAllLines(userPatternsPath).Length; Console.Error.WriteLine($"Loaded user-patterns: {lineCount} patterns from {userPatternsPath}"); } Console.Error.WriteLine($"Tesseract engine loaded with language: {tessLang}"); } catch (Exception ex) { WriteResponse(new ErrorResponse($"Failed to create Tesseract engine: {ex.Message}. Ensure tessdata/eng.traineddata exists.")); return 1; } // Signal ready WriteResponse(new ReadyResponse()); var ocrHandler = new OcrHandler(tessEngine); var gridHandler = new GridHandler(); var detectGridHandler = new DetectGridHandler(); var templateMatchHandler = new TemplateMatchHandler(); var pythonBridge = new PythonOcrBridge(); // Main loop: read one JSON line, handle, write one JSON line string? line; while ((line = Console.In.ReadLine()) != null) { line = line.Trim(); if (line.Length == 0) continue; try { var request = JsonSerializer.Deserialize(line, JsonOptions); if (request == null) { WriteResponse(new ErrorResponse("Failed to parse request")); continue; } object response = request.Cmd?.ToLowerInvariant() switch { "ocr" => HandleOcrPipeline(ocrHandler, pythonBridge, request), "screenshot" => ocrHandler.HandleScreenshot(request), "capture" => ocrHandler.HandleCapture(request), "snapshot" => ocrHandler.HandleSnapshot(request), "diff-ocr" => HandleDiffOcrPipeline(ocrHandler, pythonBridge, request), "test" => ocrHandler.HandleTest(request), "tune" => ocrHandler.HandleTune(request), "grid" => gridHandler.HandleGrid(request), "detect-grid" => detectGridHandler.HandleDetectGrid(request), "match-template" => templateMatchHandler.HandleTemplateMatch(request), _ => new ErrorResponse($"Unknown command: {request.Cmd}"), }; WriteResponse(response); } catch (Exception ex) { WriteResponse(new ErrorResponse(ex.Message)); } } pythonBridge.Dispose(); return 0; } /// /// Unified OCR pipeline for full/region captures. /// Capture → optional preprocess → route to engine (tesseract / easyocr / paddleocr). /// private static object HandleOcrPipeline(OcrHandler ocrHandler, PythonOcrBridge pythonBridge, Request request) { var engine = request.Engine ?? "tesseract"; var preprocess = request.Preprocess ?? "none"; var kernelSize = request.Params?.KernelSize ?? 41; // No preprocess + tesseract = original fast path if (engine == "tesseract" && preprocess == "none") return ocrHandler.HandleOcr(request); // Capture using var bitmap = ScreenCapture.CaptureOrLoad(request.File, request.Region); // Preprocess Bitmap processed; if (preprocess == "tophat") { processed = ImagePreprocessor.PreprocessForOcr(bitmap, kernelSize: kernelSize); } else if (preprocess == "bgsub") { return new ErrorResponse("bgsub preprocess requires a reference frame; use diff-ocr instead."); } else // "none" { processed = (Bitmap)bitmap.Clone(); } using var _processed = processed; // Route to engine if (engine == "tesseract") { var region = request.Region != null ? new RegionRect { X = request.Region.X, Y = request.Region.Y, Width = request.Region.Width, Height = request.Region.Height } : new RegionRect { X = 0, Y = 0, Width = processed.Width, Height = processed.Height }; return ocrHandler.RunTesseractOnBitmap(processed, region); } else // easyocr, paddleocr { return pythonBridge.OcrFromBitmap(processed, engine); } } /// /// Unified diff-OCR pipeline for tooltip detection. /// DiffCrop → preprocess (default=bgsub) → route to engine. /// private static object HandleDiffOcrPipeline(OcrHandler ocrHandler, PythonOcrBridge pythonBridge, Request request) { var engine = request.Engine ?? "tesseract"; var isPythonEngine = engine is "easyocr" or "paddleocr"; var p = request.Params?.Clone() ?? new DiffOcrParams(); if (request.Threshold > 0) p.DiffThresh = request.Threshold; // Determine preprocess mode: explicit request.Preprocess > params.UseBackgroundSub > default "bgsub" string preprocess; if (request.Preprocess != null) preprocess = request.Preprocess; else if (request.Params != null) preprocess = p.UseBackgroundSub ? "bgsub" : "tophat"; else preprocess = "bgsub"; // No engine override + no preprocess override + no params = original Tesseract path if (engine == "tesseract" && request.Preprocess == null && request.Params == null) return ocrHandler.HandleDiffOcr(request); var sw = System.Diagnostics.Stopwatch.StartNew(); var cropResult = ocrHandler.DiffCrop(request, p); if (cropResult == null) return new OcrResponse { Text = "", Lines = [] }; var (cropped, refCropped, current, region) = cropResult.Value; using var _current = current; // Preprocess Bitmap processed; if (preprocess == "bgsub") { int upscale = isPythonEngine ? 1 : p.Upscale; processed = ImagePreprocessor.PreprocessWithBackgroundSub( cropped, refCropped, dimPercentile: p.DimPercentile, textThresh: p.TextThresh, upscale: upscale, softThreshold: p.SoftThreshold); } else if (preprocess == "tophat") { processed = ImagePreprocessor.PreprocessForOcr(cropped, kernelSize: p.KernelSize); } else // "none" { processed = (Bitmap)cropped.Clone(); } cropped.Dispose(); refCropped.Dispose(); var diffMs = sw.ElapsedMilliseconds; using var _processed = processed; // Save debug images if path provided if (!string.IsNullOrEmpty(request.Path)) { var dir = Path.GetDirectoryName(request.Path); if (!string.IsNullOrEmpty(dir) && !Directory.Exists(dir)) Directory.CreateDirectory(dir); // Save preprocessed crop processed.Save(request.Path, ImageUtils.GetImageFormat(request.Path)); var ext = Path.GetExtension(request.Path); var fullPath = Path.ChangeExtension(request.Path, ".full" + ext); current.Save(fullPath, ImageUtils.GetImageFormat(fullPath)); } // Route to engine sw.Restart(); if (engine == "tesseract") { var result = ocrHandler.RunTesseractOnBitmap(processed, region); var ocrMs = sw.ElapsedMilliseconds; Console.Error.WriteLine($" diff-ocr-pipeline: engine={engine} preprocess={preprocess} diff={diffMs}ms ocr={ocrMs}ms crop={region.Width}x{region.Height}"); return result; } else // easyocr, paddleocr { var ocrResult = pythonBridge.OcrFromBitmap(processed, engine); var ocrMs = sw.ElapsedMilliseconds; Console.Error.WriteLine($" diff-ocr-pipeline: engine={engine} preprocess={preprocess} diff={diffMs}ms ocr={ocrMs}ms crop={region.Width}x{region.Height}"); // Offset word coordinates to screen space foreach (var line in ocrResult.Lines) foreach (var word in line.Words) { word.X += region.X; word.Y += region.Y; } return new DiffOcrResponse { Text = ocrResult.Text, Lines = ocrResult.Lines, Region = region, }; } } private static void WriteResponse(object response) { var json = JsonSerializer.Serialize(response, JsonOptions); Console.Out.WriteLine(json); Console.Out.Flush(); } }