namespace OcrDaemon; using System.Drawing; using System.Drawing.Imaging; using System.Runtime.InteropServices; using Tesseract; using SdImageFormat = System.Drawing.Imaging.ImageFormat; class OcrHandler(TesseractEngine engine) { private Bitmap? _referenceFrame; public object HandleOcr(Request req) { using var bitmap = ScreenCapture.CaptureOrLoad(req.File, req.Region); using var pix = ImageUtils.BitmapToPix(bitmap); using var page = engine.Process(pix); var text = page.GetText(); var lines = ImageUtils.ExtractLinesFromPage(page, offsetX: 0, offsetY: 0); return new OcrResponse { Text = text, Lines = lines }; } public object HandleScreenshot(Request req) { if (string.IsNullOrEmpty(req.Path)) return new ErrorResponse("screenshot command requires 'path'"); // If a reference frame exists, save that (same image used for diff-ocr). // Otherwise capture a new frame. var bitmap = _referenceFrame ?? ScreenCapture.CaptureOrLoad(req.File, req.Region); var format = ImageUtils.GetImageFormat(req.Path); var dir = Path.GetDirectoryName(req.Path); if (!string.IsNullOrEmpty(dir) && !Directory.Exists(dir)) Directory.CreateDirectory(dir); bitmap.Save(req.Path, format); if (bitmap != _referenceFrame) bitmap.Dispose(); return new OkResponse(); } public object HandleCapture(Request req) { using var bitmap = ScreenCapture.CaptureOrLoad(req.File, req.Region); using var ms = new MemoryStream(); bitmap.Save(ms, SdImageFormat.Png); var base64 = Convert.ToBase64String(ms.ToArray()); return new CaptureResponse { Image = base64 }; } public object HandleSnapshot(Request req) { _referenceFrame?.Dispose(); _referenceFrame = ScreenCapture.CaptureOrLoad(req.File, req.Region); return new OkResponse(); } public object HandleDiffOcr(Request req) { if (_referenceFrame == null) return new ErrorResponse("No reference snapshot stored. Send 'snapshot' first."); using var current = ScreenCapture.CaptureOrLoad(req.File, null); int w = Math.Min(_referenceFrame.Width, current.Width); int h = Math.Min(_referenceFrame.Height, current.Height); // Get raw pixels for both frames var refData = _referenceFrame.LockBits(new Rectangle(0, 0, w, h), ImageLockMode.ReadOnly, PixelFormat.Format32bppArgb); byte[] refPx = new byte[refData.Stride * h]; Marshal.Copy(refData.Scan0, refPx, 0, refPx.Length); _referenceFrame.UnlockBits(refData); int stride = refData.Stride; var curData = current.LockBits(new Rectangle(0, 0, w, h), ImageLockMode.ReadOnly, PixelFormat.Format32bppArgb); byte[] curPx = new byte[curData.Stride * h]; Marshal.Copy(curData.Scan0, curPx, 0, curPx.Length); current.UnlockBits(curData); // Detect pixels that got DARKER (tooltip = dark overlay). // This filters out item highlight glow (brighter) and cursor changes. int diffThresh = req.Threshold > 0 ? req.Threshold : 30; bool[] changed = new bool[w * h]; int totalChanged = 0; for (int y = 0; y < h; y++) { for (int x = 0; x < w; x++) { int i = y * stride + x * 4; int darkerB = refPx[i] - curPx[i]; int darkerG = refPx[i + 1] - curPx[i + 1]; int darkerR = refPx[i + 2] - curPx[i + 2]; if (darkerB + darkerG + darkerR > diffThresh) { changed[y * w + x] = true; totalChanged++; } } } bool debug = req.Debug; if (totalChanged == 0) { if (debug) Console.Error.WriteLine(" diff-ocr: no changes detected"); return new OcrResponse { Text = "", Lines = [] }; } // Two-pass density detection: // Pass 1: Find row range using full-width row counts // Pass 2: Find column range using only pixels within detected row range // This makes the column threshold relative to tooltip height, not screen height. int maxGap = 15; // Pass 1: count changed pixels per row, find longest active run int[] rowCounts = new int[h]; for (int y = 0; y < h; y++) for (int x = 0; x < w; x++) if (changed[y * w + x]) rowCounts[y]++; int rowThresh = w / 30; // ~3% of width int bestRowStart = 0, bestRowEnd = 0, bestRowLen = 0; int curRowStart = -1, lastActiveRow = -1; for (int y = 0; y < h; y++) { if (rowCounts[y] >= rowThresh) { if (curRowStart < 0) curRowStart = y; lastActiveRow = y; } else if (curRowStart >= 0 && y - lastActiveRow > maxGap) { int len = lastActiveRow - curRowStart + 1; if (len > bestRowLen) { bestRowStart = curRowStart; bestRowEnd = lastActiveRow; bestRowLen = len; } curRowStart = -1; } } if (curRowStart >= 0) { int len = lastActiveRow - curRowStart + 1; if (len > bestRowLen) { bestRowStart = curRowStart; bestRowEnd = lastActiveRow; bestRowLen = len; } } // Pass 2: count changed pixels per column, but only within the detected row range int[] colCounts = new int[w]; for (int y = bestRowStart; y <= bestRowEnd; y++) for (int x = 0; x < w; x++) if (changed[y * w + x]) colCounts[x]++; int tooltipHeight = bestRowEnd - bestRowStart + 1; int colThresh = tooltipHeight / 15; // ~7% of tooltip height int bestColStart = 0, bestColEnd = 0, bestColLen = 0; int curColStart = -1, lastActiveCol = -1; for (int x = 0; x < w; x++) { if (colCounts[x] >= colThresh) { if (curColStart < 0) curColStart = x; lastActiveCol = x; } else if (curColStart >= 0 && x - lastActiveCol > maxGap) { int len = lastActiveCol - curColStart + 1; if (len > bestColLen) { bestColStart = curColStart; bestColEnd = lastActiveCol; bestColLen = len; } curColStart = -1; } } if (curColStart >= 0) { int len = lastActiveCol - curColStart + 1; if (len > bestColLen) { bestColStart = curColStart; bestColEnd = lastActiveCol; bestColLen = len; } } // Log density detection results Console.Error.WriteLine($" diff-ocr: changed={totalChanged} rows={bestRowStart}-{bestRowEnd}({bestRowLen}) cols={bestColStart}-{bestColEnd}({bestColLen}) rowThresh={rowThresh} colThresh={colThresh}"); if (bestRowLen < 50 || bestColLen < 50) { Console.Error.WriteLine($" diff-ocr: no tooltip-sized region found (rows={bestRowLen}, cols={bestColLen})"); return new OcrResponse { Text = "", Lines = [] }; } int pad = 0; int minX = Math.Max(bestColStart - pad, 0); int minY = Math.Max(bestRowStart - pad, 0); int maxX = Math.Min(bestColEnd + pad, w - 1); int maxY = Math.Min(bestRowEnd + pad, h - 1); // Dynamic right-edge trim: if the rightmost columns are much sparser than // the tooltip body, trim them. This handles the ~5% of cases where ambient // noise extends the detected region slightly on the right. int colSpan = maxX - minX + 1; if (colSpan > 100) { // Compute median column density in the middle 50% of the range int q1 = minX + colSpan / 4; int q3 = minX + colSpan * 3 / 4; long midSum = 0; int midCount = 0; for (int x = q1; x <= q3; x++) { midSum += colCounts[x]; midCount++; } double avgMidDensity = (double)midSum / midCount; double cutoff = avgMidDensity * 0.3; // column must have >=30% of avg density // Trim from right while below cutoff while (maxX > minX + 100 && colCounts[maxX] < cutoff) maxX--; } int rw = maxX - minX + 1; int rh = maxY - minY + 1; if (debug) Console.Error.WriteLine($" diff-ocr: tooltip region ({minX},{minY}) {rw}x{rh}"); // Simple crop of the tooltip region from the current frame (no per-pixel masking). // The top-hat preprocessing will handle suppressing background text. using var cropped = current.Clone(new Rectangle(minX, minY, rw, rh), PixelFormat.Format32bppArgb); // Save before/after preprocessing images if path is provided if (!string.IsNullOrEmpty(req.Path)) { var dir = Path.GetDirectoryName(req.Path); if (!string.IsNullOrEmpty(dir) && !Directory.Exists(dir)) Directory.CreateDirectory(dir); cropped.Save(req.Path, ImageUtils.GetImageFormat(req.Path)); if (debug) Console.Error.WriteLine($" diff-ocr: saved raw to {req.Path}"); } // Pre-process for OCR: boost contrast, invert colors using var processed = ImagePreprocessor.PreprocessForOcr(cropped); // Save preprocessed version alongside raw if (!string.IsNullOrEmpty(req.Path)) { var ext = Path.GetExtension(req.Path); var prePath = Path.ChangeExtension(req.Path, ".pre" + ext); processed.Save(prePath, ImageUtils.GetImageFormat(prePath)); if (debug) Console.Error.WriteLine($" diff-ocr: saved preprocessed to {prePath}"); } using var pix = ImageUtils.BitmapToPix(processed); using var page = engine.Process(pix); var text = page.GetText(); var lines = ImageUtils.ExtractLinesFromPage(page, offsetX: minX, offsetY: minY); return new DiffOcrResponse { Text = text, Lines = lines, Region = new RegionRect { X = minX, Y = minY, Width = rw, Height = rh }, }; } }