namespace OcrDaemon; using System.Drawing; using System.Drawing.Imaging; using System.Runtime.InteropServices; using Tesseract; using SdImageFormat = System.Drawing.Imaging.ImageFormat; class OcrHandler(TesseractEngine engine) { private Bitmap? _referenceFrame; public object HandleOcr(Request req) { var options = NormalizeOptions(req.Ocr); using var bitmap = ScreenCapture.CaptureOrLoad(req.File, req.Region); using var processed = ImagePreprocessor.PreprocessForOcr(bitmap, options); using var pix = ImageUtils.BitmapToPix(processed); using var page = engine.Process(pix); var text = page.GetText(); var lines = ImageUtils.ExtractLinesFromPage(page, offsetX: 0, offsetY: 0, minConfidence: options.MinConfidence); return new OcrResponse { Text = text, Lines = lines }; } public object HandleScreenshot(Request req) { if (string.IsNullOrEmpty(req.Path)) return new ErrorResponse("screenshot command requires 'path'"); // If a reference frame exists, save that (same image used for diff-ocr). // Otherwise capture a new frame. var bitmap = _referenceFrame ?? ScreenCapture.CaptureOrLoad(req.File, req.Region); var format = ImageUtils.GetImageFormat(req.Path); var dir = Path.GetDirectoryName(req.Path); if (!string.IsNullOrEmpty(dir) && !Directory.Exists(dir)) Directory.CreateDirectory(dir); bitmap.Save(req.Path, format); if (bitmap != _referenceFrame) bitmap.Dispose(); return new OkResponse(); } public object HandleCapture(Request req) { using var bitmap = ScreenCapture.CaptureOrLoad(req.File, req.Region); using var ms = new MemoryStream(); bitmap.Save(ms, SdImageFormat.Png); var base64 = Convert.ToBase64String(ms.ToArray()); return new CaptureResponse { Image = base64 }; } public object HandleSnapshot(Request req) { _referenceFrame?.Dispose(); _referenceFrame = ScreenCapture.CaptureOrLoad(req.File, req.Region); return new OkResponse(); } public object HandleDiffOcr(Request req) { var options = NormalizeOptions(req.Ocr); if (_referenceFrame == null) return new ErrorResponse("No reference snapshot stored. Send 'snapshot' first."); using var current = ScreenCapture.CaptureOrLoad(req.File, null); int w = Math.Min(_referenceFrame.Width, current.Width); int h = Math.Min(_referenceFrame.Height, current.Height); // Get raw pixels for both frames var refData = _referenceFrame.LockBits(new Rectangle(0, 0, w, h), ImageLockMode.ReadOnly, PixelFormat.Format32bppArgb); byte[] refPx = new byte[refData.Stride * h]; Marshal.Copy(refData.Scan0, refPx, 0, refPx.Length); _referenceFrame.UnlockBits(refData); int stride = refData.Stride; var curData = current.LockBits(new Rectangle(0, 0, w, h), ImageLockMode.ReadOnly, PixelFormat.Format32bppArgb); byte[] curPx = new byte[curData.Stride * h]; Marshal.Copy(curData.Scan0, curPx, 0, curPx.Length); current.UnlockBits(curData); bool debug = req.Debug; int[] delta = new int[w * h]; long sum = 0; long sumSq = 0; int count = 0; for (int y = 0; y < h; y++) { for (int x = 0; x < w; x++) { int i = y * stride + x * 4; int refB = refPx[i]; int refG = refPx[i + 1]; int refR = refPx[i + 2]; int curB = curPx[i]; int curG = curPx[i + 1]; int curR = curPx[i + 2]; int refL = (refR * 30 + refG * 59 + refB * 11) / 100; int curL = (curR * 30 + curG * 59 + curB * 11) / 100; int d = refL - curL; delta[y * w + x] = d; if (d > 0) { sum += d; sumSq += (long)d * d; count++; } } } if (count == 0) { if (debug) Console.Error.WriteLine(" diff-ocr: no darkening detected"); return new OcrResponse { Text = "", Lines = [] }; } double mean = (double)sum / count; double variance = Math.Max(0, (double)sumSq / count - mean * mean); double std = Math.Sqrt(variance); int minThresh = req.Threshold > 0 ? req.Threshold : 20; int diffThresh = (int)Math.Round(Math.Max(mean + 2.0 * std, minThresh)); int brightThresh = Math.Max(minThresh, diffThresh / 2); bool[] changed = new bool[w * h]; int totalChanged = 0; for (int i = 0; i < delta.Length; i++) { int d = delta[i]; if (d >= diffThresh || d <= -brightThresh) { changed[i] = true; totalChanged++; } } if (totalChanged == 0) { if (debug) Console.Error.WriteLine(" diff-ocr: no changes detected after threshold"); return new OcrResponse { Text = "", Lines = [] }; } bool[] closed = ErodeMask(DilateMask(changed, w, h, radius: 3), w, h, radius: 3); bool[] cleaned = DilateMask(ErodeMask(closed, w, h, radius: 1), w, h, radius: 1); if (!TryFindBestComponent(cleaned, delta, w, h, minArea: (w * h) / 1000, out var compBounds)) { if (debug) Console.Error.WriteLine(" diff-ocr: no tooltip-sized region found"); return new OcrResponse { Text = "", Lines = [] }; } int pad = Math.Clamp(Math.Min(compBounds.Width, compBounds.Height) / 20, 6, 20); int extraRight = Math.Clamp(compBounds.Width / 6, 12, 80); int minX = Math.Max(compBounds.Left - pad, 0); int minY = Math.Max(compBounds.Top - pad, 0); int maxX = Math.Min(compBounds.Right - 1 + pad + extraRight, w - 1); int maxY = Math.Min(compBounds.Bottom - 1 + pad, h - 1); int rw = maxX - minX + 1; int rh = maxY - minY + 1; if (debug) Console.Error.WriteLine($" diff-ocr: changed={totalChanged} thresh={diffThresh} mean={mean:F1} std={std:F1} region=({minX},{minY}) {rw}x{rh}"); // Simple crop of the tooltip region from the current frame (no per-pixel masking). // The top-hat preprocessing will handle suppressing background text. using var cropped = current.Clone(new Rectangle(minX, minY, rw, rh), PixelFormat.Format32bppArgb); // Save before/after preprocessing images if path is provided if (!string.IsNullOrEmpty(req.Path)) { var dir = Path.GetDirectoryName(req.Path); if (!string.IsNullOrEmpty(dir) && !Directory.Exists(dir)) Directory.CreateDirectory(dir); cropped.Save(req.Path, ImageUtils.GetImageFormat(req.Path)); if (debug) Console.Error.WriteLine($" diff-ocr: saved raw to {req.Path}"); } // Pre-process for OCR: boost contrast, invert colors using var processed = ImagePreprocessor.PreprocessForOcr(cropped, options); // Save preprocessed version alongside raw if (!string.IsNullOrEmpty(req.Path) && options.Preprocess) { var ext = Path.GetExtension(req.Path); var prePath = Path.ChangeExtension(req.Path, ".pre" + ext); processed.Save(prePath, ImageUtils.GetImageFormat(prePath)); if (debug) Console.Error.WriteLine($" diff-ocr: saved preprocessed to {prePath}"); } using var pix = ImageUtils.BitmapToPix(processed); using var page = engine.Process(pix); var text = page.GetText(); var lines = ImageUtils.ExtractLinesFromPage(page, offsetX: minX, offsetY: minY, minConfidence: options.MinConfidence); return new DiffOcrResponse { Text = text, Lines = lines, Region = new RegionRect { X = minX, Y = minY, Width = rw, Height = rh }, }; } private static bool[] DilateMask(bool[] src, int w, int h, int radius) { if (radius <= 0) return src; bool[] dst = new bool[w * h]; int r = Math.Max(1, radius); for (int y = 0; y < h; y++) { int y0 = Math.Max(0, y - r); int y1 = Math.Min(h - 1, y + r); for (int x = 0; x < w; x++) { int x0 = Math.Max(0, x - r); int x1 = Math.Min(w - 1, x + r); bool any = false; for (int yy = y0; yy <= y1 && !any; yy++) { int row = yy * w; for (int xx = x0; xx <= x1; xx++) { if (src[row + xx]) { any = true; break; } } } dst[y * w + x] = any; } } return dst; } private static bool[] ErodeMask(bool[] src, int w, int h, int radius) { if (radius <= 0) return src; bool[] dst = new bool[w * h]; int r = Math.Max(1, radius); for (int y = 0; y < h; y++) { int y0 = Math.Max(0, y - r); int y1 = Math.Min(h - 1, y + r); for (int x = 0; x < w; x++) { int x0 = Math.Max(0, x - r); int x1 = Math.Min(w - 1, x + r); bool all = true; for (int yy = y0; yy <= y1 && all; yy++) { int row = yy * w; for (int xx = x0; xx <= x1; xx++) { if (!src[row + xx]) { all = false; break; } } } dst[y * w + x] = all; } } return dst; } private static bool TryFindBestComponent(bool[] mask, int[] delta, int w, int h, int minArea, out Rectangle bounds) { bounds = Rectangle.Empty; bool[] visited = new bool[w * h]; double bestScore = 0; Rectangle bestBounds = Rectangle.Empty; int[] qx = new int[w * h]; int[] qy = new int[w * h]; for (int y = 0; y < h; y++) { for (int x = 0; x < w; x++) { int idx = y * w + x; if (!mask[idx] || visited[idx]) continue; int head = 0, tail = 0; qx[tail] = x; qy[tail] = y; tail++; visited[idx] = true; int minX = x, maxX = x, minY = y, maxY = y; int area = 0; long sumDelta = 0; while (head < tail) { int cx = qx[head]; int cy = qy[head]; head++; area++; int didx = cy * w + cx; int d = delta[didx]; if (d > 0) sumDelta += d; if (cx < minX) minX = cx; if (cx > maxX) maxX = cx; if (cy < minY) minY = cy; if (cy > maxY) maxY = cy; for (int ny = cy - 1; ny <= cy + 1; ny++) { if (ny < 0 || ny >= h) continue; int row = ny * w; for (int nx = cx - 1; nx <= cx + 1; nx++) { if (nx < 0 || nx >= w) continue; int nidx = row + nx; if (!mask[nidx] || visited[nidx]) continue; visited[nidx] = true; qx[tail] = nx; qy[tail] = ny; tail++; } } } if (area >= minArea) { int rectW = maxX - minX + 1; int rectH = maxY - minY + 1; int rectArea = rectW * rectH; double fillRatio = rectArea > 0 ? (double)area / rectArea : 0; double avgDelta = area > 0 ? (double)sumDelta / area : 0; double score = area * fillRatio * avgDelta; if (score > bestScore) { bestScore = score; bestBounds = Rectangle.FromLTRB(minX, minY, maxX + 1, maxY + 1); } } } } if (bestScore <= 0) return false; bounds = bestBounds; return true; } private static OcrOptions NormalizeOptions(OcrOptions? options) { var normalized = options ?? new OcrOptions(); if (normalized.KernelSize < 3) normalized.KernelSize = 3; if (normalized.KernelSize % 2 == 0) normalized.KernelSize += 1; if (normalized.Scale < 1) normalized.Scale = 1; if (normalized.MinConfidence < 0) normalized.MinConfidence = 0; return normalized; } }