working on crop

2026-02-12 17:48:16 -05:00 · 2026-02-12 17:48:16 -05:00 · f74e3e1c85
commit f74e3e1c85
parent 93e2234c4e
12 changed files with 1135 additions and 220 deletions
--- a/tools/OcrDaemon/Daemon.cs
+++ b/tools/OcrDaemon/Daemon.cs
@ -108,7 +108,7 @@ static class Daemon
        var engine = request.Engine ?? "tesseract";
        var preprocess = request.Preprocess ?? "none";

-        var kernelSize = request.Params?.KernelSize ?? 41;
+        var kernelSize = request.Params?.Ocr.KernelSize ?? 41;

        // No preprocess + tesseract = original fast path
        if (engine == "tesseract" && preprocess == "none")
@ -155,15 +155,17 @@ static class Daemon
    {
        var engine = request.Engine ?? "tesseract";
        var isPythonEngine = engine is "easyocr" or "paddleocr";
-        var p = request.Params?.Clone() ?? new DiffOcrParams();
-        if (request.Threshold > 0) p.DiffThresh = request.Threshold;
+        var p = request.Params ?? new DiffOcrParams();
+        var cropParams = p.Crop;
+        var ocrParams = p.Ocr;
+        if (request.Threshold > 0) cropParams.DiffThresh = request.Threshold;

        // Determine preprocess mode: explicit request.Preprocess > params.UseBackgroundSub > default "bgsub"
        string preprocess;
        if (request.Preprocess != null)
            preprocess = request.Preprocess;
        else if (request.Params != null)
-            preprocess = p.UseBackgroundSub ? "bgsub" : "tophat";
+            preprocess = ocrParams.UseBackgroundSub ? "bgsub" : "tophat";
        else
            preprocess = "bgsub";

@ -173,25 +175,25 @@ static class Daemon

        var sw = System.Diagnostics.Stopwatch.StartNew();

-        var cropResult = ocrHandler.DiffCrop(request, p);
+        var cropResult = ocrHandler.DiffCrop(request, cropParams);
        if (cropResult == null)
            return new OcrResponse { Text = "", Lines = [] };

        var (cropped, refCropped, current, region) = cropResult.Value;
        using var _current = current;

-        // Preprocess
+        // Preprocess — only sees ocrParams
        Bitmap processed;
        if (preprocess == "bgsub")
        {
-            int upscale = isPythonEngine ? 1 : p.Upscale;
+            int upscale = isPythonEngine ? 1 : ocrParams.Upscale;
            processed = ImagePreprocessor.PreprocessWithBackgroundSub(
-                cropped, refCropped, dimPercentile: p.DimPercentile, textThresh: p.TextThresh,
-                upscale: upscale, softThreshold: p.SoftThreshold);
+                cropped, refCropped, dimPercentile: ocrParams.DimPercentile, textThresh: ocrParams.TextThresh,
+                upscale: upscale, softThreshold: ocrParams.SoftThreshold);
        }
        else if (preprocess == "tophat")
        {
-            processed = ImagePreprocessor.PreprocessForOcr(cropped, kernelSize: p.KernelSize);
+            processed = ImagePreprocessor.PreprocessForOcr(cropped, kernelSize: ocrParams.KernelSize);
        }
        else // "none"
        {
@ -228,7 +230,7 @@ static class Daemon
        }
        else // easyocr, paddleocr
        {
-            var ocrResult = pythonBridge.OcrFromBitmap(processed, engine);
+            var ocrResult = pythonBridge.OcrFromBitmap(processed, engine, ocrParams);
            var ocrMs = sw.ElapsedMilliseconds;
            Console.Error.WriteLine($"  diff-ocr-pipeline: engine={engine} preprocess={preprocess} diff={diffMs}ms ocr={ocrMs}ms crop={region.Width}x{region.Height}");

--- a/tools/OcrDaemon/Models.cs
+++ b/tools/OcrDaemon/Models.cs
@ -242,7 +242,7 @@ class TemplateMatchResponse
    public double Confidence { get; set; }
 }

-class DiffOcrParams
+sealed class DiffCropParams
 {
    [JsonPropertyName("diffThresh")]
    public int DiffThresh { get; set; } = 20;
@ -259,6 +259,16 @@ class DiffOcrParams
    [JsonPropertyName("trimCutoff")]
    public double TrimCutoff { get; set; } = 0.4;

+    [JsonPropertyName("ocrPad")]
+    public int OcrPad { get; set; } = 10;
+
+    public override string ToString() =>
+        $"diffThresh={DiffThresh} maxGap={MaxGap} trimCutoff={TrimCutoff:F2} rowThreshDiv={RowThreshDiv} colThreshDiv={ColThreshDiv} ocrPad={OcrPad}";
+}
+
+sealed class OcrParams
+{
+    // preprocessing
    [JsonPropertyName("kernelSize")]
    public int KernelSize { get; set; } = 41;

@ -277,9 +287,7 @@ class DiffOcrParams
    [JsonPropertyName("softThreshold")]
    public bool SoftThreshold { get; set; } = false;

-    [JsonPropertyName("ocrPad")]
-    public int OcrPad { get; set; } = 10;
-
+    // Tesseract-specific
    [JsonPropertyName("usePerLineOcr")]
    public bool UsePerLineOcr { get; set; } = false;

@ -292,12 +300,40 @@ class DiffOcrParams
    [JsonPropertyName("psm")]
    public int Psm { get; set; } = 6;

-    public DiffOcrParams Clone() => (DiffOcrParams)MemberwiseClone();
+    // post-merge / Python engine tuning
+    [JsonPropertyName("mergeGap")]
+    public int MergeGap { get; set; } = 0;
+
+    [JsonPropertyName("linkThreshold")]
+    public double? LinkThreshold { get; set; }
+
+    [JsonPropertyName("textThreshold")]
+    public double? TextThreshold { get; set; }
+
+    [JsonPropertyName("lowText")]
+    public double? LowText { get; set; }
+
+    [JsonPropertyName("widthThs")]
+    public double? WidthThs { get; set; }
+
+    [JsonPropertyName("paragraph")]
+    public bool? Paragraph { get; set; }

    public override string ToString() =>
        UseBackgroundSub
-            ? $"bgSub dimPct={DimPercentile} textThresh={TextThresh} soft={SoftThreshold} ocrPad={OcrPad} perLine={UsePerLineOcr} lineGap={LineGapTolerance} linePadY={LinePadY} psm={Psm} diffThresh={DiffThresh} rowThreshDiv={RowThreshDiv} colThreshDiv={ColThreshDiv} maxGap={MaxGap} trimCutoff={TrimCutoff:F2} upscale={Upscale}"
-            : $"topHat kernelSize={KernelSize} ocrPad={OcrPad} perLine={UsePerLineOcr} lineGap={LineGapTolerance} linePadY={LinePadY} psm={Psm} diffThresh={DiffThresh} rowThreshDiv={RowThreshDiv} colThreshDiv={ColThreshDiv} maxGap={MaxGap} trimCutoff={TrimCutoff:F2} upscale={Upscale}";
+            ? $"bgSub dimPct={DimPercentile} textThresh={TextThresh} soft={SoftThreshold} upscale={Upscale} mergeGap={MergeGap}"
+            : $"topHat kernel={KernelSize} upscale={Upscale} mergeGap={MergeGap}";
+}
+
+sealed class DiffOcrParams
+{
+    [JsonPropertyName("crop")]
+    public DiffCropParams Crop { get; set; } = new();
+
+    [JsonPropertyName("ocr")]
+    public OcrParams Ocr { get; set; } = new();
+
+    public override string ToString() => $"[{Crop}] [{Ocr}]";
 }

 class TestCase
--- a/tools/OcrDaemon/OcrHandler.cs
+++ b/tools/OcrDaemon/OcrHandler.cs
@ -14,6 +14,7 @@ using SdImageFormat = System.Drawing.Imaging.ImageFormat;
 class OcrHandler(TesseractEngine engine)
 {
    private Bitmap? _referenceFrame;
+    private RegionRect? _referenceRegion;

    public object HandleOcr(Request req)
    {
@ -56,31 +57,79 @@ class OcrHandler(TesseractEngine engine)
    {
        _referenceFrame?.Dispose();
        _referenceFrame = ScreenCapture.CaptureOrLoad(req.File, req.Region);
+        _referenceRegion = req.Region == null
+            ? null
+            : new RegionRect { X = req.Region.X, Y = req.Region.Y, Width = req.Region.Width, Height = req.Region.Height };
        return new OkResponse();
    }

    public object HandleDiffOcr(Request req) => HandleDiffOcr(req, req.Threshold > 0
-        ? new DiffOcrParams { DiffThresh = req.Threshold }
+        ? new DiffOcrParams { Crop = new DiffCropParams { DiffThresh = req.Threshold } }
        : new DiffOcrParams());

    /// <summary>
    /// Diff detection + crop only. Returns the raw tooltip crop bitmap and region,
    /// or null if no tooltip detected. Caller is responsible for disposing the bitmap.
    /// </summary>
-    public (Bitmap cropped, Bitmap refCropped, Bitmap current, RegionRect region)? DiffCrop(Request req, DiffOcrParams p)
+    public (Bitmap cropped, Bitmap refCropped, Bitmap current, RegionRect region)? DiffCrop(Request req, DiffCropParams c)
    {
        if (_referenceFrame == null)
            return null;

-        var current = ScreenCapture.CaptureOrLoad(req.File, null);
+        var diffRegion = req.Region ?? _referenceRegion;
+        int baseX = diffRegion?.X ?? 0;
+        int baseY = diffRegion?.Y ?? 0;
+        var current = ScreenCapture.CaptureOrLoad(req.File, diffRegion);

-        int w = Math.Min(_referenceFrame.Width, current.Width);
-        int h = Math.Min(_referenceFrame.Height, current.Height);
+        Bitmap refForDiff = _referenceFrame;
+        bool disposeRef = false;

-        var refData = _referenceFrame.LockBits(new Rectangle(0, 0, w, h), ImageLockMode.ReadOnly, PixelFormat.Format32bppArgb);
+        if (diffRegion != null)
+        {
+            if (_referenceRegion == null)
+            {
+                var croppedRef = CropBitmap(_referenceFrame, diffRegion);
+                if (croppedRef == null)
+                {
+                    current.Dispose();
+                    return null;
+                }
+                refForDiff = croppedRef;
+                disposeRef = true;
+            }
+            else if (!RegionsEqual(diffRegion, _referenceRegion))
+            {
+                int offX = diffRegion.X - _referenceRegion.X;
+                int offY = diffRegion.Y - _referenceRegion.Y;
+                if (offX < 0 || offY < 0 || offX + diffRegion.Width > _referenceFrame.Width || offY + diffRegion.Height > _referenceFrame.Height)
+                {
+                    current.Dispose();
+                    return null;
+                }
+                var croppedRef = CropBitmap(_referenceFrame, new RegionRect
+                {
+                    X = offX,
+                    Y = offY,
+                    Width = diffRegion.Width,
+                    Height = diffRegion.Height,
+                });
+                if (croppedRef == null)
+                {
+                    current.Dispose();
+                    return null;
+                }
+                refForDiff = croppedRef;
+                disposeRef = true;
+            }
+        }
+
+        int w = Math.Min(refForDiff.Width, current.Width);
+        int h = Math.Min(refForDiff.Height, current.Height);
+
+        var refData = refForDiff.LockBits(new Rectangle(0, 0, w, h), ImageLockMode.ReadOnly, PixelFormat.Format32bppArgb);
        byte[] refPx = new byte[refData.Stride * h];
        Marshal.Copy(refData.Scan0, refPx, 0, refPx.Length);
-        _referenceFrame.UnlockBits(refData);
+        refForDiff.UnlockBits(refData);
        int stride = refData.Stride;

        var curData = current.LockBits(new Rectangle(0, 0, w, h), ImageLockMode.ReadOnly, PixelFormat.Format32bppArgb);
@ -88,7 +137,7 @@ class OcrHandler(TesseractEngine engine)
        Marshal.Copy(curData.Scan0, curPx, 0, curPx.Length);
        current.UnlockBits(curData);

-        int diffThresh = p.DiffThresh;
+        int diffThresh = c.DiffThresh;

        // Pass 1: parallel row diff — compute rowCounts[] directly, no changed[] array
        int[] rowCounts = new int[h];
@ -112,11 +161,12 @@ class OcrHandler(TesseractEngine engine)
        if (totalChanged == 0)
        {
            current.Dispose();
+            if (disposeRef) refForDiff.Dispose();
            return null;
        }

-        int maxGap = p.MaxGap;
-        int rowThresh = w / p.RowThreshDiv;
+        int maxGap = c.MaxGap;
+        int rowThresh = w / c.RowThreshDiv;
        int bestRowStart = 0, bestRowEnd = 0, bestRowLen = 0;
        int curRowStart = -1, lastActiveRow = -1;
        for (int y = 0; y < h; y++)
@ -180,7 +230,7 @@ class OcrHandler(TesseractEngine engine)
        }

        int tooltipHeight = bestRowEnd - bestRowStart + 1;
-        int colThresh = tooltipHeight / p.ColThreshDiv;
+        int colThresh = tooltipHeight / c.ColThreshDiv;

        int bestColStart = 0, bestColEnd = 0, bestColLen = 0;
        int curColStart = -1, lastActiveCol = -1;
@ -210,6 +260,7 @@ class OcrHandler(TesseractEngine engine)
        {
            Console.Error.WriteLine($"  diff-ocr: no tooltip-sized region found (rows={bestRowLen}, cols={bestColLen})");
            current.Dispose();
+            if (disposeRef) refForDiff.Dispose();
            return null;
        }

@ -218,37 +269,73 @@ class OcrHandler(TesseractEngine engine)
        int maxX = Math.Min(bestColEnd, w - 1);
        int maxY = Math.Min(bestRowEnd, h - 1);

+        // Trim low-density edges on both axes to avoid oversized crops.
        int colSpan = maxX - minX + 1;
-        if (colSpan > 100)
+        if (colSpan > 50)
        {
            int q1 = minX + colSpan / 4;
            int q3 = minX + colSpan * 3 / 4;
            long midSum = 0;
            int midCount = 0;
            for (int x = q1; x <= q3; x++) { midSum += colCounts[x]; midCount++; }
-            double avgMidDensity = (double)midSum / midCount;
-            double cutoff = avgMidDensity * p.TrimCutoff;
-            while (maxX > minX + 100 && colCounts[maxX] < cutoff)
+            double avgMidDensity = (double)midSum / Math.Max(1, midCount);
+            double cutoff = avgMidDensity * c.TrimCutoff;
+
+            while (minX < maxX - 50 && colCounts[minX] < cutoff)
+                minX++;
+            while (maxX > minX + 50 && colCounts[maxX] < cutoff)
                maxX--;
        }
+
+        int rowSpan = maxY - minY + 1;
+        if (rowSpan > 50)
+        {
+            int q1 = minY + rowSpan / 4;
+            int q3 = minY + rowSpan * 3 / 4;
+            long midSum = 0;
+            int midCount = 0;
+            for (int y = q1; y <= q3; y++) { midSum += rowCounts[y]; midCount++; }
+            double avgMidDensity = (double)midSum / Math.Max(1, midCount);
+            double cutoff = avgMidDensity * c.TrimCutoff;
+
+            while (minY < maxY - 50 && rowCounts[minY] < cutoff)
+                minY++;
+            while (maxY > minY + 50 && rowCounts[maxY] < cutoff)
+                maxY--;
+        }
        int rw = maxX - minX + 1;
        int rh = maxY - minY + 1;

        var cropped = CropFromBytes(curPx, stride, minX, minY, rw, rh);
        var refCropped = CropFromBytes(refPx, stride, minX, minY, rw, rh);
-        var region = new RegionRect { X = minX, Y = minY, Width = rw, Height = rh };
+        var region = new RegionRect { X = baseX + minX, Y = baseY + minY, Width = rw, Height = rh };

        Console.Error.WriteLine($"  diff-ocr: tooltip region ({minX},{minY}) {rw}x{rh}");

+        if (disposeRef) refForDiff.Dispose();
        return (cropped, refCropped, current, region);
    }

+    private static bool RegionsEqual(RegionRect a, RegionRect b) =>
+        a.X == b.X && a.Y == b.Y && a.Width == b.Width && a.Height == b.Height;
+
+    private static Bitmap? CropBitmap(Bitmap src, RegionRect region)
+    {
+        int cx = Math.Max(0, region.X);
+        int cy = Math.Max(0, region.Y);
+        int cw = Math.Min(region.Width, src.Width - cx);
+        int ch = Math.Min(region.Height, src.Height - cy);
+        if (cw <= 0 || ch <= 0)
+            return null;
+        return src.Clone(new Rectangle(cx, cy, cw, ch), PixelFormat.Format32bppArgb);
+    }
+
    public object HandleDiffOcr(Request req, DiffOcrParams p)
    {
        if (_referenceFrame == null)
            return new ErrorResponse("No reference snapshot stored. Send 'snapshot' first.");

-        var cropResult = DiffCrop(req, p);
+        var cropResult = DiffCrop(req, p.Crop);
        if (cropResult == null)
            return new OcrResponse { Text = "", Lines = [] };

@ -270,14 +357,15 @@ class OcrHandler(TesseractEngine engine)
        }

        // Pre-process for OCR — get Mat for per-line detection and padding
+        var ocr = p.Ocr;
        Mat processedMat;
-        if (p.UseBackgroundSub)
+        if (ocr.UseBackgroundSub)
        {
-            processedMat = ImagePreprocessor.PreprocessWithBackgroundSubMat(cropped, refCropped, p.DimPercentile, p.TextThresh, p.Upscale, p.SoftThreshold);
+            processedMat = ImagePreprocessor.PreprocessWithBackgroundSubMat(cropped, refCropped, ocr.DimPercentile, ocr.TextThresh, ocr.Upscale, ocr.SoftThreshold);
        }
        else
        {
-            using var topHatBmp = ImagePreprocessor.PreprocessForOcr(cropped, p.KernelSize, p.Upscale);
+            using var topHatBmp = ImagePreprocessor.PreprocessForOcr(cropped, ocr.KernelSize, ocr.Upscale);
            processedMat = BitmapConverter.ToMat(topHatBmp);
        }
        using var _processedMat = processedMat; // ensure disposal
@ -296,25 +384,25 @@ class OcrHandler(TesseractEngine engine)
            if (debug) Console.Error.WriteLine($"  diff-ocr: saved preprocessed to {prePath}");
        }

-        int pad = p.OcrPad;
-        int upscale = p.Upscale > 0 ? p.Upscale : 1;
+        int pad = p.Crop.OcrPad;
+        int upscale = ocr.Upscale > 0 ? ocr.Upscale : 1;
        var lines = new List<OcrLineResult>();

        // Per-line OCR: detect text lines via horizontal projection, OCR each individually
-        if (p.UsePerLineOcr)
+        if (ocr.UsePerLineOcr)
        {
            // DetectTextLines needs binary input; if soft threshold produced grayscale, binarize a copy
            int minRowPx = Math.Max(processedMat.Cols / 200, 3);
-            using var detectionMat = p.SoftThreshold ? new Mat() : null;
-            if (p.SoftThreshold)
+            using var detectionMat = ocr.SoftThreshold ? new Mat() : null;
+            if (ocr.SoftThreshold)
                Cv2.Threshold(processedMat, detectionMat!, 128, 255, ThresholdTypes.Binary);
-            var lineDetectInput = p.SoftThreshold ? detectionMat! : processedMat;
-            var textLines = ImagePreprocessor.DetectTextLines(lineDetectInput, minRowPixels: minRowPx, gapTolerance: p.LineGapTolerance * upscale);
+            var lineDetectInput = ocr.SoftThreshold ? detectionMat! : processedMat;
+            var textLines = ImagePreprocessor.DetectTextLines(lineDetectInput, minRowPixels: minRowPx, gapTolerance: ocr.LineGapTolerance * upscale);
            if (debug) Console.Error.WriteLine($"  diff-ocr: detected {textLines.Count} text lines");

            if (textLines.Count > 0)
            {
-                int linePadY = p.LinePadY;
+                int linePadY = ocr.LinePadY;
                foreach (var (yStart, yEnd) in textLines)
                {
                    int y0 = Math.Max(yStart - linePadY, 0);
@ -330,7 +418,7 @@ class OcrHandler(TesseractEngine engine)

                    using var lineBmp = BitmapConverter.ToBitmap(padded);
                    using var linePix = ImageUtils.BitmapToPix(lineBmp);
-                    using var linePage = engine.Process(linePix, (PageSegMode)p.Psm);
+                    using var linePage = engine.Process(linePix, (PageSegMode)ocr.Psm);

                    // Extract words, adjusting coordinates back to screen space
                    // Word coords are in padded image space → subtract pad, add line offset, scale to original, add region offset
@ -386,7 +474,7 @@ class OcrHandler(TesseractEngine engine)
            Cv2.CopyMakeBorder(processedMat, padded, pad, pad, pad, pad, BorderTypes.Constant, Scalar.White);
            using var bmp = BitmapConverter.ToBitmap(padded);
            using var pix = ImageUtils.BitmapToPix(bmp);
-            using var page = engine.Process(pix, (PageSegMode)p.Psm);
+            using var page = engine.Process(pix, (PageSegMode)ocr.Psm);

            var text = page.GetText();
            // Adjust word coordinates: subtract padding offset
@ -430,77 +518,161 @@ class OcrHandler(TesseractEngine engine)

    public object HandleTest(Request req) => RunTestCases(new DiffOcrParams(), verbose: true);

+    private static DiffOcrParams CloneParams(DiffOcrParams p)
+    {
+        var json = JsonSerializer.Serialize(p);
+        return JsonSerializer.Deserialize<DiffOcrParams>(json)!;
+    }
+
    public object HandleTune(Request req)
    {
        int totalEvals = 0;

-        // --- Phase 1: Tune top-hat approach ---
-        Console.Error.WriteLine("\n========== Phase 1: Top-Hat ==========");
-        var topHat = new DiffOcrParams { UseBackgroundSub = false };
-        double topHatScore = TuneParams(topHat, ref totalEvals, tuneTopHat: true, tuneBgSub: false);
+        // --- Phase A: Tune crop params ---
+        Console.Error.WriteLine("\n========== Phase A: Crop Params ==========");
+        var best = new DiffOcrParams();
+        double bestScore = TuneCropParams(best, ref totalEvals);

-        // --- Phase 2: Tune background-subtraction approach ---
-        Console.Error.WriteLine("\n========== Phase 2: Background Subtraction ==========");
-        // Start bgSub from the best detection params found in phase 1
-        var bgSub = topHat.Clone();
-        bgSub.UseBackgroundSub = true;
-        double bgSubScore = TuneParams(bgSub, ref totalEvals, tuneTopHat: false, tuneBgSub: true);
+        // --- Phase B: Tune OCR params (top-hat) ---
+        Console.Error.WriteLine("\n========== Phase B: OCR — Top-Hat ==========");
+        var topHat = CloneParams(best);
+        topHat.Ocr.UseBackgroundSub = false;
+        double topHatScore = TuneOcrParams(topHat, ref totalEvals, tuneTopHat: true, tuneBgSub: false);
+
+        // --- Phase C: Tune OCR params (background-subtraction) ---
+        Console.Error.WriteLine("\n========== Phase C: OCR — Background Subtraction ==========");
+        var bgSub = CloneParams(best);
+        bgSub.Ocr.UseBackgroundSub = true;
+        double bgSubScore = TuneOcrParams(bgSub, ref totalEvals, tuneTopHat: false, tuneBgSub: true);

        // Pick the winner
-        var best = bgSubScore > topHatScore ? bgSub : topHat;
-        double bestScore = Math.Max(topHatScore, bgSubScore);
+        var winner = bgSubScore > topHatScore ? bgSub : topHat;
+        double winnerScore = Math.Max(topHatScore, bgSubScore);

        Console.Error.WriteLine($"\n========== Result ==========");
        Console.Error.WriteLine($"  Top-Hat:    {topHatScore:F3}  {topHat}");
        Console.Error.WriteLine($"  BgSub:      {bgSubScore:F3}  {bgSub}");
-        Console.Error.WriteLine($"  Winner:     {(best.UseBackgroundSub ? "BgSub" : "TopHat")} evals={totalEvals}\n");
+        Console.Error.WriteLine($"  Winner:     {(winner.Ocr.UseBackgroundSub ? "BgSub" : "TopHat")} evals={totalEvals}\n");

        // Final verbose report with best params
-        RunTestCases(best, verbose: true);
+        RunTestCases(winner, verbose: true);

        return new TuneResponse
        {
-            BestScore = bestScore,
-            BestParams = best,
+            BestScore = winnerScore,
+            BestParams = winner,
            Iterations = totalEvals,
        };
    }

-    private double TuneParams(DiffOcrParams best, ref int totalEvals, bool tuneTopHat, bool tuneBgSub)
+    private double TuneCropParams(DiffOcrParams best, ref int totalEvals)
    {
        double bestScore = ScoreParams(best);
        Console.Error.WriteLine($"  baseline score={bestScore:F3}  {best}\n");

-        // Detection params (shared by both approaches)
-        var sharedSweeps = new (string Name, int[] Values, Action<DiffOcrParams, int> Set)[]
+        var cropSweeps = new (string Name, int[] Values, Action<DiffCropParams, int> Set)[]
        {
-            ("diffThresh",   [10, 15, 20, 25, 30, 40, 50, 60], (p, v) => p.DiffThresh = v),
-            ("rowThreshDiv", [10, 15, 20, 25, 30, 40, 50, 60], (p, v) => p.RowThreshDiv = v),
-            ("colThreshDiv", [5, 8, 10, 12, 15, 20, 25, 30],   (p, v) => p.ColThreshDiv = v),
-            ("maxGap",       [5, 8, 10, 12, 15, 20, 25, 30],   (p, v) => p.MaxGap = v),
-            ("upscale",      [1, 2, 3],                          (p, v) => p.Upscale = v),
-            ("ocrPad",       [0, 5, 10, 15, 20, 30],             (p, v) => p.OcrPad = v),
-            ("psm",          [4, 6, 11, 13],                      (p, v) => p.Psm = v),
-        };
-
-        // Top-hat specific
-        var topHatSweeps = new (string Name, int[] Values, Action<DiffOcrParams, int> Set)[]
-        {
-            ("kernelSize", [11, 15, 19, 21, 25, 31, 35, 41, 51], (p, v) => p.KernelSize = v),
-        };
-
-        // Background-subtraction specific
-        var bgSubSweeps = new (string Name, int[] Values, Action<DiffOcrParams, int> Set)[]
-        {
-            ("dimPercentile",    [5, 10, 15, 20, 25, 30, 40, 50],  (p, v) => p.DimPercentile = v),
-            ("textThresh",       [10, 15, 20, 25, 30, 40, 50, 60, 80], (p, v) => p.TextThresh = v),
-            ("lineGapTolerance", [3, 5, 8, 10, 15],                 (p, v) => p.LineGapTolerance = v),
-            ("linePadY",         [5, 10, 15, 20],                    (p, v) => p.LinePadY = v),
+            ("diffThresh",   [10, 15, 20, 25, 30, 40, 50, 60], (c, v) => c.DiffThresh = v),
+            ("rowThreshDiv", [10, 15, 20, 25, 30, 40, 50, 60], (c, v) => c.RowThreshDiv = v),
+            ("colThreshDiv", [5, 8, 10, 12, 15, 20, 25, 30],   (c, v) => c.ColThreshDiv = v),
+            ("maxGap",       [5, 8, 10, 12, 15, 20, 25, 30],   (c, v) => c.MaxGap = v),
+            ("ocrPad",       [0, 5, 10, 15, 20, 30],             (c, v) => c.OcrPad = v),
        };

        double[] trimValues = [0.1, 0.15, 0.2, 0.25, 0.3, 0.4, 0.5];

-        var allIntSweeps = sharedSweeps
+        const int maxRounds = 3;
+        for (int round = 0; round < maxRounds; round++)
+        {
+            bool improved = false;
+            Console.Error.WriteLine($"--- Round {round + 1} ---");
+
+            foreach (var (name, values, set) in cropSweeps)
+            {
+                Console.Error.Write($"  {name}: ");
+                int bestVal = 0;
+                double bestValScore = -1;
+
+                foreach (int v in values)
+                {
+                    var trial = CloneParams(best);
+                    set(trial.Crop, v);
+                    double score = ScoreParams(trial);
+                    totalEvals++;
+                    Console.Error.Write($"{v}={score:F3} ");
+                    if (score > bestValScore) { bestValScore = score; bestVal = v; }
+                }
+                Console.Error.WriteLine();
+
+                if (bestValScore > bestScore)
+                {
+                    set(best.Crop, bestVal);
+                    bestScore = bestValScore;
+                    improved = true;
+                    Console.Error.WriteLine($"    → {name}={bestVal} score={bestScore:F3}");
+                }
+            }
+
+            // Sweep trimCutoff
+            {
+                Console.Error.Write($"  trimCutoff: ");
+                double bestTrim = best.Crop.TrimCutoff;
+                double bestTrimScore = bestScore;
+
+                foreach (double v in trimValues)
+                {
+                    var trial = CloneParams(best);
+                    trial.Crop.TrimCutoff = v;
+                    double score = ScoreParams(trial);
+                    totalEvals++;
+                    Console.Error.Write($"{v:F2}={score:F3} ");
+                    if (score > bestTrimScore) { bestTrimScore = score; bestTrim = v; }
+                }
+                Console.Error.WriteLine();
+
+                if (bestTrimScore > bestScore)
+                {
+                    best.Crop.TrimCutoff = bestTrim;
+                    bestScore = bestTrimScore;
+                    improved = true;
+                    Console.Error.WriteLine($"    → trimCutoff={bestTrim:F2} score={bestScore:F3}");
+                }
+            }
+
+            Console.Error.WriteLine($"  End of round {round + 1}: score={bestScore:F3}  {best}");
+            if (!improved) break;
+        }
+
+        return bestScore;
+    }
+
+    private double TuneOcrParams(DiffOcrParams best, ref int totalEvals, bool tuneTopHat, bool tuneBgSub)
+    {
+        double bestScore = ScoreParams(best);
+        Console.Error.WriteLine($"  baseline score={bestScore:F3}  {best}\n");
+
+        var sharedOcrSweeps = new (string Name, int[] Values, Action<OcrParams, int> Set)[]
+        {
+            ("upscale",      [1, 2, 3],           (o, v) => o.Upscale = v),
+            ("psm",          [4, 6, 11, 13],       (o, v) => o.Psm = v),
+        };
+
+        // Top-hat specific
+        var topHatSweeps = new (string Name, int[] Values, Action<OcrParams, int> Set)[]
+        {
+            ("kernelSize", [11, 15, 19, 21, 25, 31, 35, 41, 51], (o, v) => o.KernelSize = v),
+        };
+
+        // Background-subtraction specific
+        var bgSubSweeps = new (string Name, int[] Values, Action<OcrParams, int> Set)[]
+        {
+            ("dimPercentile",    [5, 10, 15, 20, 25, 30, 40, 50],      (o, v) => o.DimPercentile = v),
+            ("textThresh",       [10, 15, 20, 25, 30, 40, 50, 60, 80], (o, v) => o.TextThresh = v),
+            ("lineGapTolerance", [3, 5, 8, 10, 15],                     (o, v) => o.LineGapTolerance = v),
+            ("linePadY",         [5, 10, 15, 20],                        (o, v) => o.LinePadY = v),
+        };
+
+        var allOcrSweeps = sharedOcrSweeps
            .Concat(tuneTopHat ? topHatSweeps : [])
            .Concat(tuneBgSub ? bgSubSweeps : [])
            .ToArray();
@ -511,7 +683,7 @@ class OcrHandler(TesseractEngine engine)
            bool improved = false;
            Console.Error.WriteLine($"--- Round {round + 1} ---");

-            foreach (var (name, values, set) in allIntSweeps)
+            foreach (var (name, values, set) in allOcrSweeps)
            {
                Console.Error.Write($"  {name}: ");
                int bestVal = 0;
@ -519,8 +691,8 @@ class OcrHandler(TesseractEngine engine)

                foreach (int v in values)
                {
-                    var trial = best.Clone();
-                    set(trial, v);
+                    var trial = CloneParams(best);
+                    set(trial.Ocr, v);
                    double score = ScoreParams(trial);
                    totalEvals++;
                    Console.Error.Write($"{v}={score:F3} ");
@ -530,39 +702,13 @@ class OcrHandler(TesseractEngine engine)

                if (bestValScore > bestScore)
                {
-                    set(best, bestVal);
+                    set(best.Ocr, bestVal);
                    bestScore = bestValScore;
                    improved = true;
                    Console.Error.WriteLine($"    → {name}={bestVal} score={bestScore:F3}");
                }
            }

-            // Sweep trimCutoff
-            {
-                Console.Error.Write($"  trimCutoff: ");
-                double bestTrim = best.TrimCutoff;
-                double bestTrimScore = bestScore;
-
-                foreach (double v in trimValues)
-                {
-                    var trial = best.Clone();
-                    trial.TrimCutoff = v;
-                    double score = ScoreParams(trial);
-                    totalEvals++;
-                    Console.Error.Write($"{v:F2}={score:F3} ");
-                    if (score > bestTrimScore) { bestTrimScore = score; bestTrim = v; }
-                }
-                Console.Error.WriteLine();
-
-                if (bestTrimScore > bestScore)
-                {
-                    best.TrimCutoff = bestTrim;
-                    bestScore = bestTrimScore;
-                    improved = true;
-                    Console.Error.WriteLine($"    → trimCutoff={bestTrim:F2} score={bestScore:F3}");
-                }
-            }
-
            Console.Error.WriteLine($"  End of round {round + 1}: score={bestScore:F3}  {best}");
            if (!improved) break;
        }
--- a/tools/OcrDaemon/PythonOcrBridge.cs
+++ b/tools/OcrDaemon/PythonOcrBridge.cs
@ -60,18 +60,19 @@ class PythonOcrBridge : IDisposable
    /// <summary>
    /// Run OCR on an already-saved image file via the Python engine.
    /// </summary>
-    public OcrResponse OcrFromFile(string imagePath, string engine)
+    public OcrResponse OcrFromFile(string imagePath, string engine, OcrParams? ocrParams = null)
    {
        EnsureRunning();

-        var pyReq = new { cmd = "ocr", engine, imagePath };
+        var pyReq = BuildPythonRequest(engine, ocrParams);
+        pyReq["imagePath"] = imagePath;
        return SendPythonRequest(pyReq);
    }

    /// <summary>
    /// Run OCR on a bitmap via the Python engine (base64 PNG over pipe, no temp file).
    /// </summary>
-    public OcrResponse OcrFromBitmap(Bitmap bitmap, string engine)
+    public OcrResponse OcrFromBitmap(Bitmap bitmap, string engine, OcrParams? ocrParams = null)
    {
        EnsureRunning();

@ -79,10 +80,26 @@ class PythonOcrBridge : IDisposable
        bitmap.Save(ms, SdImageFormat.Png);
        var imageBase64 = Convert.ToBase64String(ms.ToArray());

-        var pyReq = new { cmd = "ocr", engine, imageBase64 };
+        var pyReq = BuildPythonRequest(engine, ocrParams);
+        pyReq["imageBase64"] = imageBase64;
        return SendPythonRequest(pyReq);
    }

+    private static Dictionary<string, object?> BuildPythonRequest(string engine, OcrParams? ocrParams)
+    {
+        var req = new Dictionary<string, object?> { ["cmd"] = "ocr", ["engine"] = engine };
+        if (ocrParams == null) return req;
+
+        if (ocrParams.MergeGap > 0) req["mergeGap"] = ocrParams.MergeGap;
+        if (ocrParams.LinkThreshold.HasValue) req["linkThreshold"] = ocrParams.LinkThreshold.Value;
+        if (ocrParams.TextThreshold.HasValue) req["textThreshold"] = ocrParams.TextThreshold.Value;
+        if (ocrParams.LowText.HasValue) req["lowText"] = ocrParams.LowText.Value;
+        if (ocrParams.WidthThs.HasValue) req["widthThs"] = ocrParams.WidthThs.Value;
+        if (ocrParams.Paragraph.HasValue) req["paragraph"] = ocrParams.Paragraph.Value;
+
+        return req;
+    }
+
    private OcrResponse SendPythonRequest(object pyReq)
    {
        var json = JsonSerializer.Serialize(pyReq, JsonOptions);
--- a/tools/python-ocr/daemon.py
+++ b/tools/python-ocr/daemon.py
@ -71,6 +71,51 @@ def split_into_words(text, x, y, width, height):
    return words


+def merge_nearby_detections(items, merge_gap):
+    """Merge adjacent detections on the same Y baseline when X gap < merge_gap.
+
+    items: list of {"text", "x", "y", "w", "h"}
+    Merge when: Y overlap > 50% of min height AND 0 <= X gap <= merge_gap.
+    """
+    if not items or merge_gap <= 0:
+        return items
+
+    sorted_items = sorted(items, key=lambda d: (d["y"] + d["h"] / 2, d["x"]))
+
+    merged = [dict(sorted_items[0])]
+    for item in sorted_items[1:]:
+        last = merged[-1]
+        overlap = min(last["y"] + last["h"], item["y"] + item["h"]) - max(last["y"], item["y"])
+        min_h = min(last["h"], item["h"])
+        x_gap = item["x"] - (last["x"] + last["w"])
+
+        if min_h > 0 and overlap / min_h > 0.5 and 0 <= x_gap <= merge_gap:
+            new_x = min(last["x"], item["x"])
+            new_y = min(last["y"], item["y"])
+            new_x2 = max(last["x"] + last["w"], item["x"] + item["w"])
+            new_y2 = max(last["y"] + last["h"], item["y"] + item["h"])
+            last["x"] = new_x
+            last["y"] = new_y
+            last["w"] = new_x2 - new_x
+            last["h"] = new_y2 - new_y
+            last["text"] = last["text"] + " " + item["text"]
+        else:
+            merged.append(dict(item))
+
+    return merged
+
+
+def items_to_response(items):
+    """Convert list of {"text", "x", "y", "w", "h"} to OcrResponse format."""
+    lines = []
+    all_text_parts = []
+    for item in items:
+        words = split_into_words(item["text"], item["x"], item["y"], item["w"], item["h"])
+        lines.append({"text": item["text"], "words": words})
+        all_text_parts.append(item["text"])
+    return {"ok": True, "text": "\n".join(all_text_parts), "lines": lines}
+
+
 def run_easyocr(image_path):
    from PIL import Image
    import numpy as np
@ -78,27 +123,28 @@ def run_easyocr(image_path):
    return run_easyocr_array(img)


-def run_easyocr_array(img):
+def run_easyocr_array(img, merge_gap=0, **easyocr_kwargs):
    reader = get_easyocr()

    # Redirect stdout during inference — easyocr can print warnings
    real_stdout = _redirect_stdout_to_stderr()
    try:
-        # batch_size=32: batch GPU recognition of detected text regions
-        results = reader.readtext(img, batch_size=32)
+        results = reader.readtext(img, batch_size=32, **easyocr_kwargs)
    finally:
        _restore_stdout(real_stdout)
+
    # results: [(bbox_4corners, text, conf), ...]
-    lines = []
-    all_text_parts = []
+    items = []
    for bbox, text, conf in results:
        if not text.strip():
            continue
        x, y, w, h = bbox_to_rect(bbox)
-        words = split_into_words(text, x, y, w, h)
-        lines.append({"text": text.strip(), "words": words})
-        all_text_parts.append(text.strip())
-    return {"ok": True, "text": "\n".join(all_text_parts), "lines": lines}
+        items.append({"text": text.strip(), "x": x, "y": y, "w": w, "h": h})
+
+    if merge_gap > 0:
+        items = merge_nearby_detections(items, merge_gap)
+
+    return items_to_response(items)


 def get_paddleocr():
@ -106,10 +152,18 @@ def get_paddleocr():
    if _paddle_ocr is None:
        sys.stderr.write("Loading PaddleOCR model...\n")
        sys.stderr.flush()
+        import os
+        os.environ.setdefault("PADDLE_PDX_DISABLE_MODEL_SOURCE_CHECK", "True")
        real_stdout = _redirect_stdout_to_stderr()
        try:
            from paddleocr import PaddleOCR
-            _paddle_ocr = PaddleOCR(use_angle_cls=True, lang="en", use_gpu=True, show_log=False)
+            _paddle_ocr = PaddleOCR(
+                use_doc_orientation_classify=False,
+                use_doc_unwarping=False,
+                use_textline_orientation=False,
+                lang="en",
+                ocr_version="PP-OCRv4",
+            )
        finally:
            _restore_stdout(real_stdout)
        sys.stderr.write("PaddleOCR model loaded.\n")
@ -117,28 +171,41 @@ def get_paddleocr():
    return _paddle_ocr


-def run_paddleocr_array(img):
+def run_paddleocr_array(img, merge_gap=0):
    ocr = get_paddleocr()

+    # Ensure RGB 3-channel
+    if len(img.shape) == 2:
+        import numpy as np
+        img = np.stack([img, img, img], axis=-1)
+    elif img.shape[2] == 4:
+        img = img[:, :, :3]
+
    real_stdout = _redirect_stdout_to_stderr()
    try:
-        results = ocr.ocr(img, cls=True)
+        results = ocr.predict(img)
    finally:
        _restore_stdout(real_stdout)

-    lines = []
-    all_text_parts = []
-    # PaddleOCR returns [page_results], each item is [bbox_4corners, (text, conf)]
-    if results and results[0]:
-        for item in results[0]:
-            bbox, (text, conf) = item
+    items = []
+    # PaddleOCR 3.4: results is list of OCRResult objects
+    for res in results:
+        texts = res.get("rec_texts", []) if hasattr(res, "get") else getattr(res, "rec_texts", [])
+        polys = res.get("dt_polys", []) if hasattr(res, "get") else getattr(res, "dt_polys", [])
+        for i, text in enumerate(texts):
            if not text.strip():
                continue
-            x, y, w, h = bbox_to_rect(bbox)
-            words = split_into_words(text, x, y, w, h)
-            lines.append({"text": text.strip(), "words": words})
-            all_text_parts.append(text.strip())
-    return {"ok": True, "text": "\n".join(all_text_parts), "lines": lines}
+            if i < len(polys):
+                bbox = polys[i]
+                x, y, w, h = bbox_to_rect(bbox)
+            else:
+                x, y, w, h = 0, 0, 0, 0
+            items.append({"text": text.strip(), "x": x, "y": y, "w": w, "h": h})
+
+    if merge_gap > 0:
+        items = merge_nearby_detections(items, merge_gap)
+
+    return items_to_response(items)


 def load_image(req):
@ -170,10 +237,22 @@ def handle_request(req):
    if img is None:
        return {"ok": False, "error": "Missing imagePath or imageBase64"}

+    merge_gap = req.get("mergeGap", 0)
+
    if engine == "easyocr":
-        return run_easyocr_array(img)
+        easyocr_kwargs = {}
+        for json_key, py_param in [
+            ("linkThreshold", "link_threshold"),
+            ("textThreshold", "text_threshold"),
+            ("lowText", "low_text"),
+            ("widthThs", "width_ths"),
+            ("paragraph", "paragraph"),
+        ]:
+            if json_key in req:
+                easyocr_kwargs[py_param] = req[json_key]
+        return run_easyocr_array(img, merge_gap=merge_gap, **easyocr_kwargs)
    elif engine == "paddleocr":
-        return run_paddleocr_array(img)
+        return run_paddleocr_array(img, merge_gap=merge_gap)
    else:
        return {"ok": False, "error": f"Unknown engine: {engine}"}

--- a/tools/test-ocr.ts
+++ b/tools/test-ocr.ts
@ -0,0 +1,484 @@
+/**
+ * OCR test runner + parameter tuner.
+ *
+ * Usage:
+ *   npx tsx tools/test-ocr.ts                  # test all combos with defaults
+ *   npx tsx tools/test-ocr.ts paddleocr        # filter to paddleocr combos
+ *   npx tsx tools/test-ocr.ts --tune           # tune all combos (coordinate descent)
+ *   npx tsx tools/test-ocr.ts --tune easyocr   # tune only easyocr combos
+ */
+import { OcrDaemon, type OcrEngine, type OcrPreprocess, type DiffOcrParams, type DiffCropParams, type OcrParams } from '../src/game/OcrDaemon.js';
+import { readFileSync } from 'fs';
+import { join } from 'path';
+
+// ── Types ──────────────────────────────────────────────────────────────────
+
+interface TestCase {
+  id: string;
+  image: string;
+  fullImage: string;
+  expected: string[];
+}
+
+interface Combo {
+  engine: OcrEngine;
+  preprocess: OcrPreprocess;
+  label: string;
+}
+
+interface TuneResult {
+  label: string;
+  score: number;
+  params: DiffOcrParams;
+  evals: number;
+}
+
+// ── Combos ─────────────────────────────────────────────────────────────────
+
+const ALL_COMBOS: Combo[] = [
+  { engine: 'tesseract', preprocess: 'bgsub',  label: 'tesseract+bgsub' },
+  { engine: 'tesseract', preprocess: 'tophat', label: 'tesseract+tophat' },
+  { engine: 'tesseract', preprocess: 'none',   label: 'tesseract+none' },
+  { engine: 'easyocr',   preprocess: 'bgsub',  label: 'easyocr+bgsub' },
+  { engine: 'easyocr',   preprocess: 'tophat', label: 'easyocr+tophat' },
+  { engine: 'easyocr',   preprocess: 'none',   label: 'easyocr+none' },
+  { engine: 'paddleocr', preprocess: 'bgsub',  label: 'paddleocr+bgsub' },
+  { engine: 'paddleocr', preprocess: 'tophat', label: 'paddleocr+tophat' },
+  { engine: 'paddleocr', preprocess: 'none',   label: 'paddleocr+none' },
+];
+
+// ── Scoring ────────────────────────────────────────────────────────────────
+
+function levenshtein(a: string, b: string): number {
+  const m = a.length, n = b.length;
+  const dp: number[][] = Array.from({ length: m + 1 }, () => Array(n + 1).fill(0));
+  for (let i = 0; i <= m; i++) dp[i][0] = i;
+  for (let j = 0; j <= n; j++) dp[0][j] = j;
+  for (let i = 1; i <= m; i++)
+    for (let j = 1; j <= n; j++)
+      dp[i][j] = a[i - 1] === b[j - 1]
+        ? dp[i - 1][j - 1]
+        : 1 + Math.min(dp[i - 1][j], dp[i][j - 1], dp[i - 1][j - 1]);
+  return dp[m][n];
+}
+
+function similarity(a: string, b: string): number {
+  const maxLen = Math.max(a.length, b.length);
+  if (maxLen === 0) return 1;
+  return 1 - levenshtein(a.toLowerCase(), b.toLowerCase()) / maxLen;
+}
+
+function scoreLines(expected: string[], actual: string[]): number {
+  const used = new Set<number>();
+  let matched = 0;
+  for (const exp of expected) {
+    let bestIdx = -1, bestSim = 0;
+    for (let i = 0; i < actual.length; i++) {
+      if (used.has(i)) continue;
+      const sim = similarity(exp, actual[i]);
+      if (sim > bestSim) { bestSim = sim; bestIdx = i; }
+    }
+    if (bestIdx >= 0 && bestSim >= 0.75) {
+      matched++;
+      used.add(bestIdx);
+    }
+  }
+  return expected.length > 0 ? matched / expected.length : 1;
+}
+
+function scoreLinesVerbose(expected: string[], actual: string[]): { matched: string[]; missed: string[]; extra: string[]; score: number } {
+  const used = new Set<number>();
+  const matched: string[] = [];
+  const missed: string[] = [];
+  for (const exp of expected) {
+    let bestIdx = -1, bestSim = 0;
+    for (let i = 0; i < actual.length; i++) {
+      if (used.has(i)) continue;
+      const sim = similarity(exp, actual[i]);
+      if (sim > bestSim) { bestSim = sim; bestIdx = i; }
+    }
+    if (bestIdx >= 0 && bestSim >= 0.75) {
+      matched.push(exp);
+      used.add(bestIdx);
+    } else {
+      missed.push(exp);
+    }
+  }
+  const extra = actual.filter((_, i) => !used.has(i));
+  return { matched, missed, extra, score: expected.length > 0 ? matched.length / expected.length : 1 };
+}
+
+// ── Daemon helpers ─────────────────────────────────────────────────────────
+
+async function runCase(
+  daemon: OcrDaemon,
+  tc: TestCase,
+  tessdataDir: string,
+  engine: OcrEngine,
+  preprocess: OcrPreprocess,
+  params?: DiffOcrParams,
+): Promise<string[]> {
+  const fullPath = join(tessdataDir, tc.fullImage).replace(/\//g, '\\');
+  const imagePath = join(tessdataDir, tc.image).replace(/\//g, '\\');
+
+  await (daemon as any).sendWithRetry({ cmd: 'snapshot', file: fullPath }, 10_000);
+
+  const req: any = { cmd: 'diff-ocr', file: imagePath };
+  if (engine !== 'tesseract') req.engine = engine;
+  if (preprocess !== 'none') req.preprocess = preprocess;
+  if (params && Object.keys(params).length > 0) req.params = params;
+
+  const timeout = engine !== 'tesseract' ? 120_000 : 10_000;
+  const resp = await (daemon as any).sendWithRetry(req, timeout);
+
+  return (resp.lines ?? [])
+    .map((l: any) => (l.text ?? '').trim())
+    .filter((l: string) => l.length > 0);
+}
+
+async function scoreCombo(
+  daemon: OcrDaemon,
+  cases: TestCase[],
+  tessdataDir: string,
+  engine: OcrEngine,
+  preprocess: OcrPreprocess,
+  params?: DiffOcrParams,
+): Promise<number> {
+  let totalScore = 0;
+  for (const tc of cases) {
+    try {
+      const actual = await runCase(daemon, tc, tessdataDir, engine, preprocess, params);
+      totalScore += scoreLines(tc.expected, actual);
+    } catch {
+      // error = 0 score for this case
+    }
+  }
+  return totalScore / cases.length;
+}
+
+// ── Parameter sweep definitions ────────────────────────────────────────────
+
+interface CropIntSweep {
+  name: keyof DiffCropParams;
+  values: number[];
+}
+
+interface OcrIntSweep {
+  name: keyof OcrParams;
+  values: number[];
+}
+
+interface OcrBoolSweep {
+  name: keyof OcrParams;
+  values: boolean[];
+}
+
+const CROP_SWEEPS: CropIntSweep[] = [
+  { name: 'diffThresh',  values: [10, 15, 20, 25, 30, 40, 50] },
+  { name: 'maxGap',      values: [5, 10, 15, 20, 25, 30] },
+];
+
+const CROP_TRIM_VALUES = [0.1, 0.15, 0.2, 0.25, 0.3, 0.4, 0.5];
+
+const SHARED_OCR_SWEEPS: OcrIntSweep[] = [
+  { name: 'upscale', values: [1, 2, 3] },
+  { name: 'mergeGap', values: [0, 20, 40, 60, 80, 100] },
+];
+
+const BGSUB_INT_SWEEPS: OcrIntSweep[] = [
+  { name: 'dimPercentile', values: [5, 10, 15, 20, 25, 30, 40, 50, 60] },
+  { name: 'textThresh',    values: [10, 20, 30, 40, 50, 60, 80, 100] },
+];
+
+const BGSUB_BOOL_SWEEPS: OcrBoolSweep[] = [
+  { name: 'softThreshold', values: [false, true] },
+];
+
+const TOPHAT_SWEEPS: OcrIntSweep[] = [
+  { name: 'kernelSize', values: [11, 15, 21, 25, 31, 41, 51, 61] },
+];
+
+// ── Default params per preprocess ──────────────────────────────────────────
+
+function defaultParams(preprocess: OcrPreprocess): DiffOcrParams {
+  const crop: DiffCropParams = { diffThresh: 20, maxGap: 20, trimCutoff: 0.4 };
+  if (preprocess === 'bgsub') {
+    return { crop, ocr: { useBackgroundSub: true, upscale: 2, dimPercentile: 40, textThresh: 60, softThreshold: false } };
+  } else if (preprocess === 'tophat') {
+    return { crop, ocr: { useBackgroundSub: false, upscale: 2, kernelSize: 41 } };
+  }
+  return { crop, ocr: { upscale: 2 } }; // none
+}
+
+function cloneParams(p: DiffOcrParams): DiffOcrParams {
+  return JSON.parse(JSON.stringify(p));
+}
+
+// ── Coordinate descent tuner (two-phase: crop then OCR) ──────────────────
+
+async function tuneCombo(
+  daemon: OcrDaemon,
+  cases: TestCase[],
+  tessdataDir: string,
+  combo: Combo,
+): Promise<TuneResult> {
+  const params = defaultParams(combo.preprocess);
+  let bestScore = await scoreCombo(daemon, cases, tessdataDir, combo.engine, combo.preprocess, params);
+  let evals = 1;
+
+  process.stderr.write(`  baseline: ${(bestScore * 100).toFixed(1)}%  ${JSON.stringify(params)}\n`);
+
+  // ── Phase A: Tune crop params ──
+  process.stderr.write(`\n  === Phase A: Crop Params ===\n`);
+  const MAX_ROUNDS = 3;
+
+  for (let round = 0; round < MAX_ROUNDS; round++) {
+    let improved = false;
+    process.stderr.write(`  --- Crop Round ${round + 1} ---\n`);
+
+    for (const { name, values } of CROP_SWEEPS) {
+      process.stderr.write(`  crop.${name}: `);
+      let bestVal: number | undefined;
+      let bestValScore = -1;
+
+      for (const v of values) {
+        const trial = cloneParams(params);
+        (trial.crop as any)[name] = v;
+        const score = await scoreCombo(daemon, cases, tessdataDir, combo.engine, combo.preprocess, trial);
+        evals++;
+        process.stderr.write(`${v}=${(score * 100).toFixed(1)} `);
+        if (score > bestValScore) { bestValScore = score; bestVal = v; }
+      }
+      process.stderr.write('\n');
+
+      if (bestValScore > bestScore && bestVal !== undefined) {
+        (params.crop as any)![name] = bestVal;
+        bestScore = bestValScore;
+        improved = true;
+        process.stderr.write(`    -> crop.${name}=${bestVal} score=${(bestScore * 100).toFixed(1)}%\n`);
+      }
+    }
+
+    // Sweep trimCutoff
+    {
+      process.stderr.write(`  crop.trimCutoff: `);
+      let bestTrim = params.crop?.trimCutoff ?? 0.2;
+      let bestTrimScore = bestScore;
+
+      for (const v of CROP_TRIM_VALUES) {
+        const trial = cloneParams(params);
+        trial.crop!.trimCutoff = v;
+        const score = await scoreCombo(daemon, cases, tessdataDir, combo.engine, combo.preprocess, trial);
+        evals++;
+        process.stderr.write(`${v}=${(score * 100).toFixed(1)} `);
+        if (score > bestTrimScore) { bestTrimScore = score; bestTrim = v; }
+      }
+      process.stderr.write('\n');
+
+      if (bestTrimScore > bestScore) {
+        params.crop!.trimCutoff = bestTrim;
+        bestScore = bestTrimScore;
+        improved = true;
+        process.stderr.write(`    -> crop.trimCutoff=${bestTrim} score=${(bestScore * 100).toFixed(1)}%\n`);
+      }
+    }
+
+    process.stderr.write(`  End crop round ${round + 1}: ${(bestScore * 100).toFixed(1)}% (${evals} evals)\n`);
+    if (!improved) break;
+  }
+
+  // ── Phase B: Tune OCR params (crop is now locked) ──
+  process.stderr.write(`\n  === Phase B: OCR Params (crop locked) ===\n`);
+
+  const ocrIntSweeps: OcrIntSweep[] = [...SHARED_OCR_SWEEPS];
+  const ocrBoolSweeps: OcrBoolSweep[] = [];
+  if (combo.preprocess === 'bgsub') {
+    ocrIntSweeps.push(...BGSUB_INT_SWEEPS);
+    ocrBoolSweeps.push(...BGSUB_BOOL_SWEEPS);
+  } else if (combo.preprocess === 'tophat') {
+    ocrIntSweeps.push(...TOPHAT_SWEEPS);
+  }
+
+  for (let round = 0; round < MAX_ROUNDS; round++) {
+    let improved = false;
+    process.stderr.write(`  --- OCR Round ${round + 1} ---\n`);
+
+    for (const { name, values } of ocrIntSweeps) {
+      process.stderr.write(`  ocr.${name}: `);
+      let bestVal: number | undefined;
+      let bestValScore = -1;
+
+      for (const v of values) {
+        const trial = cloneParams(params);
+        (trial.ocr as any)[name] = v;
+        const score = await scoreCombo(daemon, cases, tessdataDir, combo.engine, combo.preprocess, trial);
+        evals++;
+        process.stderr.write(`${v}=${(score * 100).toFixed(1)} `);
+        if (score > bestValScore) { bestValScore = score; bestVal = v; }
+      }
+      process.stderr.write('\n');
+
+      if (bestValScore > bestScore && bestVal !== undefined) {
+        (params.ocr as any)![name] = bestVal;
+        bestScore = bestValScore;
+        improved = true;
+        process.stderr.write(`    -> ocr.${name}=${bestVal} score=${(bestScore * 100).toFixed(1)}%\n`);
+      }
+    }
+
+    for (const { name, values } of ocrBoolSweeps) {
+      process.stderr.write(`  ocr.${name}: `);
+      let bestVal: boolean | undefined;
+      let bestValScore = -1;
+
+      for (const v of values) {
+        const trial = cloneParams(params);
+        (trial.ocr as any)[name] = v;
+        const score = await scoreCombo(daemon, cases, tessdataDir, combo.engine, combo.preprocess, trial);
+        evals++;
+        process.stderr.write(`${v}=${(score * 100).toFixed(1)} `);
+        if (score > bestValScore) { bestValScore = score; bestVal = v; }
+      }
+      process.stderr.write('\n');
+
+      if (bestValScore > bestScore && bestVal !== undefined) {
+        (params.ocr as any)![name] = bestVal;
+        bestScore = bestValScore;
+        improved = true;
+        process.stderr.write(`    -> ocr.${name}=${bestVal} score=${(bestScore * 100).toFixed(1)}%\n`);
+      }
+    }
+
+    process.stderr.write(`  End OCR round ${round + 1}: ${(bestScore * 100).toFixed(1)}% (${evals} evals)\n`);
+    if (!improved) break;
+  }
+
+  return { label: combo.label, score: bestScore, params, evals };
+}
+
+// ── Verbose test run ───────────────────────────────────────────────────────
+
+async function testCombo(
+  daemon: OcrDaemon,
+  cases: TestCase[],
+  tessdataDir: string,
+  combo: Combo,
+  params?: DiffOcrParams,
+): Promise<number> {
+  let totalScore = 0;
+  for (const tc of cases) {
+    try {
+      const actual = await runCase(daemon, tc, tessdataDir, combo.engine, combo.preprocess, params);
+      const { matched, missed, extra, score } = scoreLinesVerbose(tc.expected, actual);
+      totalScore += score;
+      const status = missed.length === 0 ? 'PASS' : 'FAIL';
+      console.log(`  [${status}] ${tc.id}  matched=${matched.length}/${tc.expected.length}  extra=${extra.length}  score=${score.toFixed(2)}`);
+      for (const m of missed) console.log(`    MISS: ${m}`);
+      for (const e of extra) console.log(`    EXTRA: ${e}`);
+    } catch (err: any) {
+      console.log(`  [ERROR] ${tc.id}: ${err.message}`);
+    }
+  }
+  return totalScore / cases.length;
+}
+
+// ── Main ───────────────────────────────────────────────────────────────────
+
+async function main() {
+  const args = process.argv.slice(2);
+  const tuneMode = args.includes('--tune');
+  const filterArg = args.find(a => !a.startsWith('--'))?.toLowerCase();
+
+  const combos = filterArg
+    ? ALL_COMBOS.filter(c => c.label.includes(filterArg))
+    : ALL_COMBOS;
+
+  const tessdataDir = join('tools', 'OcrDaemon', 'bin', 'Release', 'net8.0-windows10.0.19041.0', 'tessdata');
+  const casesPath = join(tessdataDir, 'cases.json');
+  const cases: TestCase[] = JSON.parse(readFileSync(casesPath, 'utf-8'));
+
+  console.log(`Loaded ${cases.length} test cases: ${cases.map(c => c.id).join(', ')}`);
+  console.log(`Mode: ${tuneMode ? 'TUNE' : 'TEST'}  Combos: ${combos.length}\n`);
+
+  const daemon = new OcrDaemon();
+
+  if (tuneMode) {
+    // ── Tune mode: coordinate descent for each combo ──
+    const tuneResults: TuneResult[] = [];
+
+    for (const combo of combos) {
+      console.log(`\n${'='.repeat(60)}`);
+      console.log(`  TUNING: ${combo.label}`);
+      console.log(`${'='.repeat(60)}`);
+
+      try {
+        const result = await tuneCombo(daemon, cases, tessdataDir, combo);
+        tuneResults.push(result);
+
+        console.log(`\n  Best: ${(result.score * 100).toFixed(1)}%  (${result.evals} evals)`);
+        console.log(`  Params: ${JSON.stringify(result.params)}`);
+
+        // Verbose run with best params
+        console.log('');
+        await testCombo(daemon, cases, tessdataDir, combo, result.params);
+      } catch (err: any) {
+        console.log(`  ERROR: ${err.message}`);
+        tuneResults.push({ label: combo.label, score: 0, params: {}, evals: 0 });
+      }
+    }
+
+    // Summary
+    console.log(`\n${'='.repeat(70)}`);
+    console.log('  TUNE RESULTS');
+    console.log(`${'='.repeat(70)}`);
+
+    const sorted = tuneResults.sort((a, b) => b.score - a.score);
+    for (const r of sorted) {
+      const bar = '#'.repeat(Math.round(r.score * 40));
+      console.log(`  ${r.label.padEnd(22)} ${(r.score * 100).toFixed(1).padStart(5)}%  ${bar}`);
+    }
+
+    console.log(`\n  BEST PARAMS PER COMBO:`);
+    for (const r of sorted) {
+      if (r.score > 0) {
+        console.log(`  ${r.label.padEnd(22)} ${JSON.stringify(r.params)}`);
+      }
+    }
+
+  } else {
+    // ── Test mode: defaults only ──
+    const results: Record<string, number> = {};
+
+    for (const combo of combos) {
+      console.log(`\n${'='.repeat(60)}`);
+      console.log(`  ${combo.label}`);
+      console.log(`${'='.repeat(60)}`);
+
+      try {
+        const score = await testCombo(daemon, cases, tessdataDir, combo);
+        results[combo.label] = score;
+        console.log(`\n  Average: ${(score * 100).toFixed(1)}%`);
+      } catch (err: any) {
+        console.log(`  ERROR: ${err.message}`);
+        results[combo.label] = 0;
+      }
+    }
+
+    console.log(`\n${'='.repeat(60)}`);
+    console.log('  SUMMARY');
+    console.log(`${'='.repeat(60)}`);
+
+    const sorted = Object.entries(results).sort((a, b) => b[1] - a[1]);
+    for (const [label, score] of sorted) {
+      const bar = '#'.repeat(Math.round(score * 40));
+      console.log(`  ${label.padEnd(22)} ${(score * 100).toFixed(1).padStart(5)}%  ${bar}`);
+    }
+  }
+
+  await daemon.stop();
+}
+
+main().catch(err => {
+  console.error(err);
+  process.exit(1);
+});