added easyOCR

2026-02-12 01:04:19 -05:00 · 2026-02-12 01:04:19 -05:00 · 9f208b0606
commit 9f208b0606
parent 37d6678577
27 changed files with 1780 additions and 112 deletions
--- a/tools/OcrDaemon/Daemon.cs
+++ b/tools/OcrDaemon/Daemon.cs
@ -53,6 +53,8 @@ static class Daemon
        var ocrHandler = new OcrHandler(tessEngine);
        var gridHandler = new GridHandler();
        var detectGridHandler = new DetectGridHandler();
+        var templateMatchHandler = new TemplateMatchHandler();
+        var pythonBridge = new PythonOcrBridge();

        // Main loop: read one JSON line, handle, write one JSON line
        string? line;
@ -72,16 +74,21 @@ static class Daemon

                object response = request.Cmd?.ToLowerInvariant() switch
                {
+                    "ocr" when request.Engine is "easyocr"
+                                  => pythonBridge.HandleOcr(request, request.Engine),
                    "ocr"         => ocrHandler.HandleOcr(request),
                    "screenshot"  => ocrHandler.HandleScreenshot(request),
                    "capture"     => ocrHandler.HandleCapture(request),
                    "snapshot"    => ocrHandler.HandleSnapshot(request),
+                    "diff-ocr" when request.Engine is "easyocr"
+                                  => HandleDiffOcrPython(ocrHandler, pythonBridge, request),
                    "diff-ocr"    => ocrHandler.HandleDiffOcr(request),
                    "test"        => ocrHandler.HandleTest(request),
                    "tune"        => ocrHandler.HandleTune(request),
                    "grid"        => gridHandler.HandleGrid(request),
-                    "detect-grid" => detectGridHandler.HandleDetectGrid(request),
-                    _             => new ErrorResponse($"Unknown command: {request.Cmd}"),
+                    "detect-grid"     => detectGridHandler.HandleDetectGrid(request),
+                    "match-template"  => templateMatchHandler.HandleTemplateMatch(request),
+                    _                 => new ErrorResponse($"Unknown command: {request.Cmd}"),
                };
                WriteResponse(response);
            }
@ -91,9 +98,59 @@ static class Daemon
            }
        }

+        pythonBridge.Dispose();
        return 0;
    }

+    private static object HandleDiffOcrPython(OcrHandler ocrHandler, PythonOcrBridge pythonBridge, Request request)
+    {
+        var sw = System.Diagnostics.Stopwatch.StartNew();
+        var p = request.Threshold > 0
+            ? new DiffOcrParams { DiffThresh = request.Threshold }
+            : new DiffOcrParams();
+
+        var cropResult = ocrHandler.DiffCrop(request, p);
+        if (cropResult == null)
+            return new OcrResponse { Text = "", Lines = [] };
+
+        var (cropped, refCropped, current, region) = cropResult.Value;
+        using var _current = current;
+        using var _refCropped = refCropped;
+        var diffMs = sw.ElapsedMilliseconds;
+
+        // Save crop to requested path if provided
+        if (!string.IsNullOrEmpty(request.Path))
+        {
+            var dir = Path.GetDirectoryName(request.Path);
+            if (!string.IsNullOrEmpty(dir) && !Directory.Exists(dir))
+                Directory.CreateDirectory(dir);
+            cropped.Save(request.Path, ImageUtils.GetImageFormat(request.Path));
+        }
+
+        // Send crop to Python via base64 over pipe (no temp file I/O)
+        sw.Restart();
+        var ocrResult = pythonBridge.OcrFromBitmap(cropped, request.Engine!);
+        cropped.Dispose();
+        var ocrMs = sw.ElapsedMilliseconds;
+
+        Console.Error.WriteLine($"  diff-ocr-python: diff={diffMs}ms ocr={ocrMs}ms total={diffMs + ocrMs}ms crop={region.Width}x{region.Height}");
+
+        // Offset word coordinates to screen space
+        foreach (var line in ocrResult.Lines)
+            foreach (var word in line.Words)
+            {
+                word.X += region.X;
+                word.Y += region.Y;
+            }
+
+        return new DiffOcrResponse
+        {
+            Text = ocrResult.Text,
+            Lines = ocrResult.Lines,
+            Region = region,
+        };
+    }
+
    private static void WriteResponse(object response)
    {
        var json = JsonSerializer.Serialize(response, JsonOptions);
--- a/tools/OcrDaemon/GridHandler.cs
+++ b/tools/OcrDaemon/GridHandler.cs
@ -69,12 +69,13 @@ class GridHandler
                templateSum += templateGray[ty * templateW + tx];
                innerCount++;
            }
+        double tmplMean = innerCount > 0 ? (double)templateSum / innerCount : 0;

-        // Threshold for mean absolute difference — default 6
-        double diffThreshold = req.Threshold > 0 ? req.Threshold : 2;
+        // Threshold for brightness-normalized MAD
+        double diffThreshold = req.Threshold > 0 ? req.Threshold : 5;
        bool debug = req.Debug;

-        if (debug) Console.Error.WriteLine($"Grid: {cols}x{rows}, cellW={cellW:F1}, cellH={cellH:F1}, border={border}, threshold={diffThreshold}");
+        if (debug) Console.Error.WriteLine($"Grid: {cols}x{rows}, cellW={cellW:F1}, cellH={cellH:F1}, border={border}, threshold={diffThreshold}, tmplMean={tmplMean:F1}");

        var cells = new List<List<bool>>();
        for (int row = 0; row < rows; row++)
@ -88,21 +89,30 @@ class GridHandler
                int cw = (int)Math.Min(cellW, captureW - cx0);
                int ch = (int)Math.Min(cellH, bitmap.Height - cy0);

-                // Compare inner pixels of cell vs template
-                long diffSum = 0;
-                int compared = 0;
                int innerW = Math.Min(cw, templateW) - border;
                int innerH = Math.Min(ch, templateH) - border;
+
+                // First pass: compute cell region mean brightness
+                long cellSum = 0;
+                int compared = 0;
                for (int py = border; py < innerH; py++)
-                {
                    for (int px = border; px < innerW; px++)
                    {
-                        int cellVal = captureGray[(cy0 + py) * captureW + (cx0 + px)];
-                        int tmplVal = templateGray[py * templateW + px];
-                        diffSum += Math.Abs(cellVal - tmplVal);
+                        cellSum += captureGray[(cy0 + py) * captureW + (cx0 + px)];
                        compared++;
                    }
-                }
+                double cellMean = compared > 0 ? (double)cellSum / compared : 0;
+                double offset = cellMean - tmplMean;
+
+                // Second pass: MAD on brightness-normalized values
+                long diffSum = 0;
+                for (int py = border; py < innerH; py++)
+                    for (int px = border; px < innerW; px++)
+                    {
+                        double cellVal = captureGray[(cy0 + py) * captureW + (cx0 + px)];
+                        double tmplVal = templateGray[py * templateW + px];
+                        diffSum += (long)Math.Abs(cellVal - tmplVal - offset);
+                    }
                double meanDiff = compared > 0 ? (double)diffSum / compared : 0;
                bool occupied = meanDiff > diffThreshold;
                rowList.Add(occupied);
--- a/tools/OcrDaemon/Models.cs
+++ b/tools/OcrDaemon/Models.cs
@ -39,6 +39,9 @@ class Request

    [JsonPropertyName("targetCol")]
    public int TargetCol { get; set; } = -1;
+
+    [JsonPropertyName("engine")]
+    public string? Engine { get; set; }
 }

 class RegionRect
@ -209,6 +212,30 @@ class DetectGridResponse
    public double CellHeight { get; set; }
 }

+class TemplateMatchResponse
+{
+    [JsonPropertyName("ok")]
+    public bool Ok => true;
+
+    [JsonPropertyName("found")]
+    public bool Found { get; set; }
+
+    [JsonPropertyName("x")]
+    public int X { get; set; }
+
+    [JsonPropertyName("y")]
+    public int Y { get; set; }
+
+    [JsonPropertyName("width")]
+    public int Width { get; set; }
+
+    [JsonPropertyName("height")]
+    public int Height { get; set; }
+
+    [JsonPropertyName("confidence")]
+    public double Confidence { get; set; }
+}
+
 class DiffOcrParams
 {
    [JsonPropertyName("diffThresh")]
--- a/tools/OcrDaemon/OcrHandler.cs
+++ b/tools/OcrDaemon/OcrHandler.cs
@ -3,6 +3,8 @@ namespace OcrDaemon;
 using System.Drawing;
 using System.Drawing.Imaging;
 using System.Runtime.InteropServices;
+using System.Threading;
+using System.Threading.Tasks;
 using System.Text.Json;
 using OpenCvSharp;
 using OpenCvSharp.Extensions;
@ -61,17 +63,20 @@ class OcrHandler(TesseractEngine engine)
        ? new DiffOcrParams { DiffThresh = req.Threshold }
        : new DiffOcrParams());

-    public object HandleDiffOcr(Request req, DiffOcrParams p)
+    /// <summary>
+    /// Diff detection + crop only. Returns the raw tooltip crop bitmap and region,
+    /// or null if no tooltip detected. Caller is responsible for disposing the bitmap.
+    /// </summary>
+    public (Bitmap cropped, Bitmap refCropped, Bitmap current, RegionRect region)? DiffCrop(Request req, DiffOcrParams p)
    {
        if (_referenceFrame == null)
-            return new ErrorResponse("No reference snapshot stored. Send 'snapshot' first.");
+            return null;

-        using var current = ScreenCapture.CaptureOrLoad(req.File, null);
+        var current = ScreenCapture.CaptureOrLoad(req.File, null);

        int w = Math.Min(_referenceFrame.Width, current.Width);
        int h = Math.Min(_referenceFrame.Height, current.Height);

-        // Get raw pixels for both frames
        var refData = _referenceFrame.LockBits(new Rectangle(0, 0, w, h), ImageLockMode.ReadOnly, PixelFormat.Format32bppArgb);
        byte[] refPx = new byte[refData.Stride * h];
        Marshal.Copy(refData.Scan0, refPx, 0, refPx.Length);
@ -83,49 +88,34 @@ class OcrHandler(TesseractEngine engine)
        Marshal.Copy(curData.Scan0, curPx, 0, curPx.Length);
        current.UnlockBits(curData);

-        // Detect pixels that got DARKER (tooltip = dark overlay).
-        // This filters out item highlight glow (brighter) and cursor changes.
        int diffThresh = p.DiffThresh;
-        bool[] changed = new bool[w * h];
-        int totalChanged = 0;

-        for (int y = 0; y < h; y++)
+        // Pass 1: parallel row diff — compute rowCounts[] directly, no changed[] array
+        int[] rowCounts = new int[h];
+        Parallel.For(0, h, y =>
        {
+            int count = 0;
+            int rowOffset = y * stride;
            for (int x = 0; x < w; x++)
            {
-                int i = y * stride + x * 4;
-                int darkerB = refPx[i] - curPx[i];
-                int darkerG = refPx[i + 1] - curPx[i + 1];
-                int darkerR = refPx[i + 2] - curPx[i + 2];
-                if (darkerB + darkerG + darkerR > diffThresh)
-                {
-                    changed[y * w + x] = true;
-                    totalChanged++;
-                }
+                int i = rowOffset + x * 4;
+                int darker = (refPx[i] - curPx[i]) + (refPx[i + 1] - curPx[i + 1]) + (refPx[i + 2] - curPx[i + 2]);
+                if (darker > diffThresh)
+                    count++;
            }
-        }
+            rowCounts[y] = count;
+        });

-        bool debug = req.Debug;
+        int totalChanged = 0;
+        for (int y = 0; y < h; y++) totalChanged += rowCounts[y];

        if (totalChanged == 0)
        {
-            if (debug) Console.Error.WriteLine("  diff-ocr: no changes detected");
-            return new OcrResponse { Text = "", Lines = [] };
+            current.Dispose();
+            return null;
        }

-        // Two-pass density detection:
-        // Pass 1: Find row range using full-width row counts
-        // Pass 2: Find column range using only pixels within detected row range
-        // This makes the column threshold relative to tooltip height, not screen height.
        int maxGap = p.MaxGap;
-
-        // Pass 1: count changed pixels per row, find longest active run
-        int[] rowCounts = new int[h];
-        for (int y = 0; y < h; y++)
-            for (int x = 0; x < w; x++)
-                if (changed[y * w + x])
-                    rowCounts[y]++;
-
        int rowThresh = w / p.RowThreshDiv;
        int bestRowStart = 0, bestRowEnd = 0, bestRowLen = 0;
        int curRowStart = -1, lastActiveRow = -1;
@ -149,12 +139,46 @@ class OcrHandler(TesseractEngine engine)
            if (len > bestRowLen) { bestRowStart = curRowStart; bestRowEnd = lastActiveRow; bestRowLen = len; }
        }

-        // Pass 2: count changed pixels per column, but only within the detected row range
+        // Pass 2: parallel column diff — only within the row range, recompute from raw pixels
        int[] colCounts = new int[w];
-        for (int y = bestRowStart; y <= bestRowEnd; y++)
-            for (int x = 0; x < w; x++)
-                if (changed[y * w + x])
-                    colCounts[x]++;
+        int rowRangeLen = bestRowEnd - bestRowStart + 1;
+        if (rowRangeLen <= 200)
+        {
+            // Small range: serial is faster than Parallel overhead
+            for (int y = bestRowStart; y <= bestRowEnd; y++)
+            {
+                int rowOffset = y * stride;
+                for (int x = 0; x < w; x++)
+                {
+                    int i = rowOffset + x * 4;
+                    int darker = (refPx[i] - curPx[i]) + (refPx[i + 1] - curPx[i + 1]) + (refPx[i + 2] - curPx[i + 2]);
+                    if (darker > diffThresh)
+                        colCounts[x]++;
+                }
+            }
+        }
+        else
+        {
+            Parallel.For(bestRowStart, bestRowEnd + 1,
+                () => new int[w],
+                (y, _, localCols) =>
+                {
+                    int rowOffset = y * stride;
+                    for (int x = 0; x < w; x++)
+                    {
+                        int i = rowOffset + x * 4;
+                        int darker = (refPx[i] - curPx[i]) + (refPx[i + 1] - curPx[i + 1]) + (refPx[i + 2] - curPx[i + 2]);
+                        if (darker > diffThresh)
+                            localCols[x]++;
+                    }
+                    return localCols;
+                },
+                localCols =>
+                {
+                    for (int x = 0; x < w; x++)
+                        Interlocked.Add(ref colCounts[x], localCols[x]);
+                });
+        }

        int tooltipHeight = bestRowEnd - bestRowStart + 1;
        int colThresh = tooltipHeight / p.ColThreshDiv;
@ -181,13 +205,13 @@ class OcrHandler(TesseractEngine engine)
            if (len > bestColLen) { bestColStart = curColStart; bestColEnd = lastActiveCol; bestColLen = len; }
        }

-        // Log density detection results
        Console.Error.WriteLine($"  diff-ocr: changed={totalChanged} rows={bestRowStart}-{bestRowEnd}({bestRowLen}) cols={bestColStart}-{bestColEnd}({bestColLen}) rowThresh={rowThresh} colThresh={colThresh}");

        if (bestRowLen < 50 || bestColLen < 50)
        {
            Console.Error.WriteLine($"  diff-ocr: no tooltip-sized region found (rows={bestRowLen}, cols={bestColLen})");
-            return new OcrResponse { Text = "", Lines = [] };
+            current.Dispose();
+            return null;
        }

        int minX = bestColStart;
@ -195,13 +219,9 @@ class OcrHandler(TesseractEngine engine)
        int maxX = Math.Min(bestColEnd, w - 1);
        int maxY = Math.Min(bestRowEnd, h - 1);

-        // Dynamic right-edge trim: if the rightmost columns are much sparser than
-        // the tooltip body, trim them. This handles the ~5% of cases where ambient
-        // noise extends the detected region slightly on the right.
        int colSpan = maxX - minX + 1;
        if (colSpan > 100)
        {
-            // Compute median column density in the middle 50% of the range
            int q1 = minX + colSpan / 4;
            int q3 = minX + colSpan * 3 / 4;
            long midSum = 0;
@ -209,21 +229,38 @@ class OcrHandler(TesseractEngine engine)
            for (int x = q1; x <= q3; x++) { midSum += colCounts[x]; midCount++; }
            double avgMidDensity = (double)midSum / midCount;
            double cutoff = avgMidDensity * p.TrimCutoff;
-
-            // Trim from right while below cutoff
            while (maxX > minX + 100 && colCounts[maxX] < cutoff)
                maxX--;
        }
        int rw = maxX - minX + 1;
        int rh = maxY - minY + 1;

-        if (debug) Console.Error.WriteLine($"  diff-ocr: tooltip region ({minX},{minY}) {rw}x{rh}");
+        var cropped = CropFromBytes(curPx, stride, minX, minY, rw, rh);
+        var refCropped = CropFromBytes(refPx, stride, minX, minY, rw, rh);
+        var region = new RegionRect { X = minX, Y = minY, Width = rw, Height = rh };

-        // Crop tooltip region from both current and reference frames
-        using var cropped = current.Clone(new Rectangle(minX, minY, rw, rh), PixelFormat.Format32bppArgb);
-        using var refCropped = _referenceFrame.Clone(new Rectangle(minX, minY, rw, rh), PixelFormat.Format32bppArgb);
+        Console.Error.WriteLine($"  diff-ocr: tooltip region ({minX},{minY}) {rw}x{rh}");

-        // Save before/after preprocessing images if path is provided
+        return (cropped, refCropped, current, region);
+    }
+
+    public object HandleDiffOcr(Request req, DiffOcrParams p)
+    {
+        if (_referenceFrame == null)
+            return new ErrorResponse("No reference snapshot stored. Send 'snapshot' first.");
+
+        var cropResult = DiffCrop(req, p);
+        if (cropResult == null)
+            return new OcrResponse { Text = "", Lines = [] };
+
+        var (cropped, refCropped, current, region) = cropResult.Value;
+        using var _current = current;
+        using var _cropped = cropped;
+        using var _refCropped = refCropped;
+        bool debug = req.Debug;
+        int minX = region.X, minY = region.Y, rw = region.Width, rh = region.Height;
+
+        // Save raw crop if path is provided
        if (!string.IsNullOrEmpty(req.Path))
        {
            var dir = Path.GetDirectoryName(req.Path);
@ -634,6 +671,24 @@ class OcrHandler(TesseractEngine engine)
        };
    }

+    /// <summary>
+    /// Fast crop from raw pixel bytes — avoids slow GDI+ Bitmap.Clone().
+    /// </summary>
+    private static Bitmap CropFromBytes(byte[] px, int srcStride, int cropX, int cropY, int cropW, int cropH)
+    {
+        var bmp = new Bitmap(cropW, cropH, PixelFormat.Format32bppArgb);
+        var data = bmp.LockBits(new Rectangle(0, 0, cropW, cropH), ImageLockMode.WriteOnly, PixelFormat.Format32bppArgb);
+        int dstStride = data.Stride;
+        int rowBytes = cropW * 4;
+        for (int y = 0; y < cropH; y++)
+        {
+            int srcOffset = (cropY + y) * srcStride + cropX * 4;
+            Marshal.Copy(px, srcOffset, data.Scan0 + y * dstStride, rowBytes);
+        }
+        bmp.UnlockBits(data);
+        return bmp;
+    }
+
    private static double LevenshteinSimilarity(string a, string b)
    {
        a = a.ToLowerInvariant();
--- a/tools/OcrDaemon/PythonOcrBridge.cs
+++ b/tools/OcrDaemon/PythonOcrBridge.cs
@ -0,0 +1,193 @@
+namespace OcrDaemon;
+
+using System.Diagnostics;
+using System.Drawing;
+using System.Text.Json;
+using System.Text.Json.Serialization;
+using SdImageFormat = System.Drawing.Imaging.ImageFormat;
+
+/// <summary>
+/// Manages a persistent Python subprocess for EasyOCR / PaddleOCR.
+/// Lazy-starts on first request; reuses the process for subsequent calls.
+/// Same stdin/stdout JSON-per-line protocol as the C# daemon itself.
+/// </summary>
+class PythonOcrBridge : IDisposable
+{
+    private static readonly JsonSerializerOptions JsonOptions = new()
+    {
+        PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
+        DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
+    };
+
+    private Process? _proc;
+    private readonly string _daemonScript;
+    private readonly string _pythonExe;
+    private readonly object _lock = new();
+
+    public PythonOcrBridge()
+    {
+        // Resolve paths relative to this exe
+        var exeDir = AppContext.BaseDirectory;
+        // exeDir = tools/OcrDaemon/bin/Release/net8.0-.../
+        // Walk up 4 levels to tools/
+        var toolsDir = Path.GetFullPath(Path.Combine(exeDir, "..", "..", "..", ".."));
+        _daemonScript = Path.GetFullPath(Path.Combine(toolsDir, "python-ocr", "daemon.py"));
+
+        // Use the venv Python if it exists, otherwise fall back to system python
+        var venvPython = Path.GetFullPath(Path.Combine(toolsDir, "python-ocr", ".venv", "Scripts", "python.exe"));
+        _pythonExe = File.Exists(venvPython) ? venvPython : "python";
+    }
+
+    /// <summary>
+    /// Run OCR on a screen region using the specified Python engine.
+    /// Captures screenshot, saves to temp file, sends to Python, returns OcrResponse.
+    /// </summary>
+    public object HandleOcr(Request req, string engine)
+    {
+        var tmpPath = Path.Combine(Path.GetTempPath(), $"ocr_{Guid.NewGuid():N}.png");
+        try
+        {
+            using var bitmap = ScreenCapture.CaptureOrLoad(req.File, req.Region);
+            bitmap.Save(tmpPath, SdImageFormat.Png);
+            return OcrFromFile(tmpPath, engine);
+        }
+        finally
+        {
+            try { File.Delete(tmpPath); } catch { /* ignore */ }
+        }
+    }
+
+    /// <summary>
+    /// Run OCR on an already-saved image file via the Python engine.
+    /// </summary>
+    public OcrResponse OcrFromFile(string imagePath, string engine)
+    {
+        EnsureRunning();
+
+        var pyReq = new { cmd = "ocr", engine, imagePath };
+        return SendPythonRequest(pyReq);
+    }
+
+    /// <summary>
+    /// Run OCR on a bitmap via the Python engine (base64 PNG over pipe, no temp file).
+    /// </summary>
+    public OcrResponse OcrFromBitmap(Bitmap bitmap, string engine)
+    {
+        EnsureRunning();
+
+        using var ms = new MemoryStream();
+        bitmap.Save(ms, SdImageFormat.Png);
+        var imageBase64 = Convert.ToBase64String(ms.ToArray());
+
+        var pyReq = new { cmd = "ocr", engine, imageBase64 };
+        return SendPythonRequest(pyReq);
+    }
+
+    private OcrResponse SendPythonRequest(object pyReq)
+    {
+        var json = JsonSerializer.Serialize(pyReq, JsonOptions);
+
+        string responseLine;
+        lock (_lock)
+        {
+            _proc!.StandardInput.WriteLine(json);
+            _proc.StandardInput.Flush();
+            responseLine = _proc.StandardOutput.ReadLine()
+                ?? throw new Exception("Python daemon returned null");
+        }
+
+        var resp = JsonSerializer.Deserialize<PythonResponse>(responseLine, JsonOptions);
+        if (resp == null)
+            throw new Exception("Failed to parse Python OCR response");
+        if (!resp.Ok)
+            throw new Exception(resp.Error ?? "Python OCR failed");
+
+        return new OcrResponse
+        {
+            Text = resp.Text ?? "",
+            Lines = resp.Lines ?? [],
+        };
+    }
+
+    private void EnsureRunning()
+    {
+        if (_proc != null && !_proc.HasExited)
+            return;
+
+        _proc?.Dispose();
+        _proc = null;
+
+        if (!File.Exists(_daemonScript))
+            throw new Exception($"Python OCR daemon not found at {_daemonScript}");
+
+        Console.Error.WriteLine($"Spawning Python OCR daemon: {_pythonExe} {_daemonScript}");
+
+        _proc = new Process
+        {
+            StartInfo = new ProcessStartInfo
+            {
+                FileName = _pythonExe,
+                Arguments = $"\"{_daemonScript}\"",
+                UseShellExecute = false,
+                RedirectStandardInput = true,
+                RedirectStandardOutput = true,
+                RedirectStandardError = true,
+                CreateNoWindow = true,
+            }
+        };
+
+        _proc.ErrorDataReceived += (_, e) =>
+        {
+            if (!string.IsNullOrEmpty(e.Data))
+                Console.Error.WriteLine($"[python-ocr] {e.Data}");
+        };
+
+        _proc.Start();
+        _proc.BeginErrorReadLine();
+
+        // Wait for ready signal (up to 30s for first model load)
+        var readyLine = _proc.StandardOutput.ReadLine();
+        if (readyLine == null)
+            throw new Exception("Python OCR daemon exited before ready signal");
+
+        var ready = JsonSerializer.Deserialize<PythonResponse>(readyLine, JsonOptions);
+        if (ready?.Ready != true)
+            throw new Exception($"Python OCR daemon did not send ready signal: {readyLine}");
+
+        Console.Error.WriteLine("Python OCR daemon ready");
+    }
+
+    public void Dispose()
+    {
+        if (_proc != null && !_proc.HasExited)
+        {
+            try
+            {
+                _proc.StandardInput.Close();
+                _proc.WaitForExit(3000);
+                if (!_proc.HasExited) _proc.Kill();
+            }
+            catch { /* ignore */ }
+        }
+        _proc?.Dispose();
+        _proc = null;
+    }
+
+    private class PythonResponse
+    {
+        [JsonPropertyName("ok")]
+        public bool Ok { get; set; }
+
+        [JsonPropertyName("ready")]
+        public bool? Ready { get; set; }
+
+        [JsonPropertyName("text")]
+        public string? Text { get; set; }
+
+        [JsonPropertyName("lines")]
+        public List<OcrLineResult>? Lines { get; set; }
+
+        [JsonPropertyName("error")]
+        public string? Error { get; set; }
+    }
+}
--- a/tools/OcrDaemon/TemplateMatchHandler.cs
+++ b/tools/OcrDaemon/TemplateMatchHandler.cs
@ -0,0 +1,60 @@
+namespace OcrDaemon;
+
+using System.Drawing;
+using System.Drawing.Imaging;
+using OpenCvSharp;
+using OpenCvSharp.Extensions;
+
+class TemplateMatchHandler
+{
+    public object HandleTemplateMatch(Request req)
+    {
+        if (string.IsNullOrEmpty(req.Path))
+            return new ErrorResponse("match-template command requires 'path' (template image file)");
+
+        if (!System.IO.File.Exists(req.Path))
+            return new ErrorResponse($"Template file not found: {req.Path}");
+
+        using var screenshot = ScreenCapture.CaptureOrLoad(req.File, req.Region);
+        using var screenMat = BitmapConverter.ToMat(screenshot);
+        using var template = Cv2.ImRead(req.Path, ImreadModes.Color);
+
+        if (template.Empty())
+            return new ErrorResponse($"Failed to load template image: {req.Path}");
+
+        // Convert screenshot from BGRA to BGR if needed
+        using var screenBgr = new Mat();
+        if (screenMat.Channels() == 4)
+            Cv2.CvtColor(screenMat, screenBgr, ColorConversionCodes.BGRA2BGR);
+        else
+            screenMat.CopyTo(screenBgr);
+
+        // Template must fit within screenshot
+        if (template.Rows > screenBgr.Rows || template.Cols > screenBgr.Cols)
+            return new TemplateMatchResponse { Found = false };
+
+        using var result = new Mat();
+        Cv2.MatchTemplate(screenBgr, template, result, TemplateMatchModes.CCoeffNormed);
+
+        Cv2.MinMaxLoc(result, out _, out double maxVal, out _, out OpenCvSharp.Point maxLoc);
+
+        double threshold = req.Threshold > 0 ? req.Threshold / 100.0 : 0.7;
+
+        if (maxVal < threshold)
+            return new TemplateMatchResponse { Found = false, Confidence = maxVal };
+
+        // Calculate center coordinates — offset by region origin if provided
+        int offsetX = req.Region?.X ?? 0;
+        int offsetY = req.Region?.Y ?? 0;
+
+        return new TemplateMatchResponse
+        {
+            Found = true,
+            X = offsetX + maxLoc.X + template.Cols / 2,
+            Y = offsetY + maxLoc.Y + template.Rows / 2,
+            Width = template.Cols,
+            Height = template.Rows,
+            Confidence = maxVal,
+        };
+    }
+}
--- a/tools/OcrDaemon/tessdata/images/vertex1_crop.png
+++ b/tools/OcrDaemon/tessdata/images/vertex1_crop.png
--- a/tools/OcrDaemon/tessdata/images/vertex1_tight.png
+++ b/tools/OcrDaemon/tessdata/images/vertex1_tight.png
--- a/tools/OcrDaemon/tessdata/images/vertex2_crop.png
+++ b/tools/OcrDaemon/tessdata/images/vertex2_crop.png
--- a/tools/OcrDaemon/tessdata/images/vertex2_tight.png
+++ b/tools/OcrDaemon/tessdata/images/vertex2_tight.png
--- a/tools/python-ocr/pycache/daemon.cpython-313.pyc
+++ b/tools/python-ocr/pycache/daemon.cpython-313.pyc
--- a/tools/python-ocr/daemon.py
+++ b/tools/python-ocr/daemon.py
@ -0,0 +1,157 @@
+"""
+Persistent Python OCR daemon (stdin/stdout JSON-per-line protocol).
+
+Supports EasyOCR engine, lazy-loaded on first use.
+Managed as a subprocess by the C# OcrDaemon.
+
+Request:  {"cmd": "ocr", "engine": "easyocr", "imagePath": "C:\\temp\\screenshot.png"}
+Response: {"ok": true, "text": "...", "lines": [{"text": "...", "words": [...]}]}
+"""
+
+import sys
+import json
+
+_easyocr_reader = None
+
+
+def _redirect_stdout_to_stderr():
+    """Redirect stdout to stderr so library print() calls don't corrupt the JSON protocol."""
+    real_stdout = sys.stdout
+    sys.stdout = sys.stderr
+    return real_stdout
+
+
+def _restore_stdout(real_stdout):
+    sys.stdout = real_stdout
+
+
+def get_easyocr():
+    global _easyocr_reader
+    if _easyocr_reader is None:
+        sys.stderr.write("Loading EasyOCR model...\n")
+        sys.stderr.flush()
+        # EasyOCR prints download progress to stdout — redirect during load
+        real_stdout = _redirect_stdout_to_stderr()
+        try:
+            import easyocr
+            _easyocr_reader = easyocr.Reader(["en"], gpu=True)
+        finally:
+            _restore_stdout(real_stdout)
+        sys.stderr.write("EasyOCR model loaded.\n")
+        sys.stderr.flush()
+    return _easyocr_reader
+
+
+def bbox_to_rect(corners):
+    """Convert 4-corner bbox [[x1,y1],[x2,y2],[x3,y3],[x4,y4]] to axis-aligned {x, y, width, height}."""
+    xs = [c[0] for c in corners]
+    ys = [c[1] for c in corners]
+    x = int(min(xs))
+    y = int(min(ys))
+    return x, y, int(max(xs)) - x, int(max(ys)) - y
+
+
+def split_into_words(text, x, y, width, height):
+    """Split a detection's text into individual words with proportional bounding boxes."""
+    parts = text.split()
+    if len(parts) <= 1:
+        return [{"text": text.strip(), "x": x, "y": y, "width": width, "height": height}]
+
+    total_chars = sum(len(p) for p in parts)
+    if total_chars == 0:
+        return [{"text": text.strip(), "x": x, "y": y, "width": width, "height": height}]
+
+    words = []
+    cx = x
+    for part in parts:
+        w = max(1, int(width * len(part) / total_chars))
+        words.append({"text": part, "x": cx, "y": y, "width": w, "height": height})
+        cx += w
+    return words
+
+
+def run_easyocr(image_path):
+    from PIL import Image
+    import numpy as np
+    img = np.array(Image.open(image_path))
+    return run_easyocr_array(img)
+
+
+def run_easyocr_array(img):
+    reader = get_easyocr()
+
+    # Redirect stdout during inference — easyocr can print warnings
+    real_stdout = _redirect_stdout_to_stderr()
+    try:
+        # batch_size=32: batch GPU recognition of detected text regions
+        results = reader.readtext(img, batch_size=32)
+    finally:
+        _restore_stdout(real_stdout)
+    # results: [(bbox_4corners, text, conf), ...]
+    lines = []
+    all_text_parts = []
+    for bbox, text, conf in results:
+        if not text.strip():
+            continue
+        x, y, w, h = bbox_to_rect(bbox)
+        words = split_into_words(text, x, y, w, h)
+        lines.append({"text": text.strip(), "words": words})
+        all_text_parts.append(text.strip())
+    return {"ok": True, "text": "\n".join(all_text_parts), "lines": lines}
+
+
+def load_image(req):
+    """Load image from either imagePath (file) or imageBase64 (base64-encoded PNG)."""
+    from PIL import Image
+    import numpy as np
+
+    image_base64 = req.get("imageBase64")
+    if image_base64:
+        import base64
+        import io
+        img_bytes = base64.b64decode(image_base64)
+        return np.array(Image.open(io.BytesIO(img_bytes)))
+
+    image_path = req.get("imagePath")
+    if image_path:
+        return np.array(Image.open(image_path))
+
+    return None
+
+
+def handle_request(req):
+    cmd = req.get("cmd")
+    if cmd != "ocr":
+        return {"ok": False, "error": f"Unknown command: {cmd}"}
+
+    engine = req.get("engine", "")
+    img = load_image(req)
+    if img is None:
+        return {"ok": False, "error": "Missing imagePath or imageBase64"}
+
+    if engine == "easyocr":
+        return run_easyocr_array(img)
+    else:
+        return {"ok": False, "error": f"Unknown engine: {engine}"}
+
+
+def main():
+    # Signal ready
+    sys.stdout.write(json.dumps({"ok": True, "ready": True}) + "\n")
+    sys.stdout.flush()
+
+    for line in sys.stdin:
+        line = line.strip()
+        if not line:
+            continue
+        try:
+            req = json.loads(line)
+            resp = handle_request(req)
+        except Exception as e:
+            resp = {"ok": False, "error": str(e)}
+        sys.stdout.write(json.dumps(resp) + "\n")
+        sys.stdout.flush()
+
+
+if __name__ == "__main__":
+    main()
--- a/tools/python-ocr/requirements.txt
+++ b/tools/python-ocr/requirements.txt
@ -0,0 +1,3 @@
+easyocr
+pillow
+numpy