From cf5d944fd1c7279b4a2c0f3b4f76de0c96e556f2 Mon Sep 17 00:00:00 2001
From: Boki <boki@stare.gg>
Date: Thu, 12 Feb 2026 11:24:31 -0500
Subject: [PATCH] finished easyocr and pipeline

---
 src/dashboard/DashboardServer.ts |  24 ++++-
 src/dashboard/index.html         |  25 +++++
 src/game/OcrDaemon.ts            |  11 ++-
 src/game/ScreenReader.ts         |  19 ++--
 tools/OcrDaemon/Daemon.cs        | 151 +++++++++++++++++++++++--------
 tools/OcrDaemon/Models.cs        |   3 +
 tools/OcrDaemon/OcrHandler.cs    |  27 ++++++
 tools/python-ocr/daemon.py       |  43 +++++++++
 8 files changed, 252 insertions(+), 51 deletions(-)
diff --git a/src/dashboard/DashboardServer.ts b/src/dashboard/DashboardServer.ts
index 39f2c27..2bf573c 100644
--- a/src/dashboard/DashboardServer.ts
+++ b/src/dashboard/DashboardServer.ts
@@ -8,7 +8,7 @@ import { logger } from '../util/logger.js';
 import { sleep } from '../util/sleep.js';
 import type { BotController } from './BotController.js';
 import type { ScreenReader } from '../game/ScreenReader.js';
-import type { OcrEngine } from '../game/OcrDaemon.js';
+import type { OcrEngine, OcrPreprocess } from '../game/OcrDaemon.js';
 import { GRID_LAYOUTS } from '../game/GridReader.js';
 import type { GameController } from '../game/GameController.js';
 
@@ -131,8 +131,8 @@ export class DashboardServer {
     this.app.post('/api/debug/ocr-engine', (req, res) => {
       if (!this.debug) { res.status(503).json({ error: 'Debug not available' }); return; }
       const { engine } = req.body as { engine: string };
-      if (!['tesseract', 'easyocr'].includes(engine)) {
-        res.status(400).json({ error: 'Invalid engine. Must be tesseract or easyocr.' });
+      if (!['tesseract', 'easyocr', 'paddleocr'].includes(engine)) {
+        res.status(400).json({ error: 'Invalid engine. Must be tesseract, easyocr, or paddleocr.' });
         return;
       }
       this.debug.screenReader.debugOcrEngine = engine as OcrEngine;
@@ -140,6 +140,24 @@ export class DashboardServer {
       res.json({ ok: true });
     });
 
+    // OCR preprocess selection
+    this.app.get('/api/debug/ocr-preprocess', (_req, res) => {
+      if (!this.debug) { res.status(503).json({ error: 'Debug not available' }); return; }
+      res.json({ ok: true, preprocess: this.debug.screenReader.debugPreprocess });
+    });
+
+    this.app.post('/api/debug/ocr-preprocess', (req, res) => {
+      if (!this.debug) { res.status(503).json({ error: 'Debug not available' }); return; }
+      const { preprocess } = req.body as { preprocess: string };
+      if (!['none', 'bgsub', 'tophat'].includes(preprocess)) {
+        res.status(400).json({ error: 'Invalid preprocess. Must be none, bgsub, or tophat.' });
+        return;
+      }
+      this.debug.screenReader.debugPreprocess = preprocess as OcrPreprocess;
+      this.broadcastLog('info', `OCR preprocess set to: ${preprocess}`);
+      res.json({ ok: true });
+    });
+
     this.app.post('/api/debug/ocr', async (_req, res) => {
       if (!this.debug) { res.status(503).json({ error: 'Debug not available' }); return; }
       try {
diff --git a/src/dashboard/index.html b/src/dashboard/index.html
index 986992f..5babde6 100644
--- a/src/dashboard/index.html
+++ b/src/dashboard/index.html
@@ -455,6 +455,12 @@
         <select id="ocrEngineSelect" onchange="setOcrEngine(this.value)" style="padding:6px 10px;background:#0d1117;border:1px solid #30363d;border-radius:6px;color:#e6edf3;font-size:13px">
           <option value="tesseract">Tesseract</option>
           <option value="easyocr">EasyOCR</option>
+          <option value="paddleocr">PaddleOCR</option>
+        </select>
+        <select id="ocrPreprocessSelect" onchange="setOcrPreprocess(this.value)" style="padding:6px 10px;background:#0d1117;border:1px solid #30363d;border-radius:6px;color:#e6edf3;font-size:13px">
+          <option value="none">No Preprocess</option>
+          <option value="bgsub" selected>BgSub</option>
+          <option value="tophat">TopHat</option>
         </select>
         <button onclick="debugScreenshot()">Screenshot</button>
         <button onclick="debugOcr()">OCR Screen</button>
@@ -1004,8 +1010,27 @@
     } catch {}
   }
 
+  async function setOcrPreprocess(preprocess) {
+    await fetch('/api/debug/ocr-preprocess', {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json' },
+      body: JSON.stringify({ preprocess }),
+    });
+  }
+
+  async function loadOcrPreprocess() {
+    try {
+      const res = await fetch('/api/debug/ocr-preprocess');
+      const data = await res.json();
+      if (data.ok && data.preprocess) {
+        document.getElementById('ocrPreprocessSelect').value = data.preprocess;
+      }
+    } catch {}
+  }
+
   connect();
   loadOcrEngine();
+  loadOcrPreprocess();
 </script>
 </body>
 </html>
diff --git a/src/game/OcrDaemon.ts b/src/game/OcrDaemon.ts
index 73b4fa4..21f9a2a 100644
--- a/src/game/OcrDaemon.ts
+++ b/src/game/OcrDaemon.ts
@@ -67,7 +67,9 @@ export interface TemplateMatchResult {
   confidence: number;
 }
 
-export type OcrEngine = 'tesseract' | 'easyocr';
+export type OcrEngine = 'tesseract' | 'easyocr' | 'paddleocr';
+
+export type OcrPreprocess = 'none' | 'bgsub' | 'tophat';
 
 interface DaemonRequest {
   cmd: string;
@@ -79,6 +81,7 @@ interface DaemonRequest {
   minCellSize?: number;
   maxCellSize?: number;
   engine?: string;
+  preprocess?: string;
 }
 
 interface DaemonResponse {
@@ -133,10 +136,11 @@ export class OcrDaemon {
 
   // ── Public API ──────────────────────────────────────────────────────────
 
-  async ocr(region?: Region, engine?: OcrEngine): Promise<OcrResponse> {
+  async ocr(region?: Region, engine?: OcrEngine, preprocess?: OcrPreprocess): Promise<OcrResponse> {
     const req: DaemonRequest = { cmd: 'ocr' };
     if (region) req.region = region;
     if (engine && engine !== 'tesseract') req.engine = engine;
+    if (preprocess && preprocess !== 'none') req.preprocess = preprocess;
     // Python engines need longer timeout for first model load + download
     const timeout = (engine && engine !== 'tesseract') ? 120_000 : CAPTURE_TIMEOUT;
     const resp = await this.sendWithRetry(req, timeout);
@@ -182,11 +186,12 @@ export class OcrDaemon {
     await this.sendWithRetry({ cmd: 'snapshot' }, REQUEST_TIMEOUT);
   }
 
-  async diffOcr(savePath?: string, region?: Region, engine?: OcrEngine): Promise<DiffOcrResponse> {
+  async diffOcr(savePath?: string, region?: Region, engine?: OcrEngine, preprocess?: OcrPreprocess): Promise<DiffOcrResponse> {
     const req: DaemonRequest = { cmd: 'diff-ocr' };
     if (savePath) req.path = savePath;
     if (region) req.region = region;
     if (engine && engine !== 'tesseract') req.engine = engine;
+    if (preprocess) req.preprocess = preprocess;
     const timeout = (engine && engine !== 'tesseract') ? 120_000 : CAPTURE_TIMEOUT;
     const resp = await this.sendWithRetry(req, timeout);
     return {
diff --git a/src/game/ScreenReader.ts b/src/game/ScreenReader.ts
index 7b513f3..94e1afb 100644
--- a/src/game/ScreenReader.ts
+++ b/src/game/ScreenReader.ts
@@ -1,7 +1,7 @@
 import { mkdir } from 'fs/promises';
 import { join } from 'path';
 import { logger } from '../util/logger.js';
-import { OcrDaemon, type OcrResponse, type OcrEngine, type DiffOcrResponse, type TemplateMatchResult } from './OcrDaemon.js';
+import { OcrDaemon, type OcrResponse, type OcrEngine, type OcrPreprocess, type DiffOcrResponse, type TemplateMatchResult } from './OcrDaemon.js';
 import { GridReader, type GridLayout, type CellCoord } from './GridReader.js';
 import type { Region } from '../types.js';
 
@@ -13,6 +13,7 @@ export class ScreenReader {
   private daemon = new OcrDaemon();
   readonly grid = new GridReader(this.daemon);
   debugOcrEngine: OcrEngine = 'tesseract';
+  debugPreprocess: OcrPreprocess = 'bgsub';
 
   // ── Screenshot capture ──────────────────────────────────────────────
 
@@ -241,20 +242,20 @@ export class ScreenReader {
 
   async debugDiffOcr(savePath?: string, region?: Region): Promise<DiffOcrResponse> {
     const t = performance.now();
-    const result = await this.daemon.diffOcr(savePath, region, this.debugOcrEngine);
-    logger.info({ engine: this.debugOcrEngine, ms: elapsed(t) }, 'debugDiffOcr');
+    const result = await this.daemon.diffOcr(savePath, region, this.debugOcrEngine, this.debugPreprocess);
+    logger.info({ engine: this.debugOcrEngine, preprocess: this.debugPreprocess, ms: elapsed(t) }, 'debugDiffOcr');
     return result;
   }
 
   async debugOcr(region?: Region): Promise<OcrResponse> {
     const t = performance.now();
-    const result = await this.daemon.ocr(region, this.debugOcrEngine);
-    logger.info({ engine: this.debugOcrEngine, ms: elapsed(t) }, 'debugOcr');
+    const result = await this.daemon.ocr(region, this.debugOcrEngine, this.debugPreprocess);
+    logger.info({ engine: this.debugOcrEngine, preprocess: this.debugPreprocess, ms: elapsed(t) }, 'debugOcr');
     return result;
   }
 
   async debugReadFullScreen(): Promise<string> {
-    const result = await this.daemon.ocr(undefined, this.debugOcrEngine);
+    const result = await this.daemon.ocr(undefined, this.debugOcrEngine, this.debugPreprocess);
     return result.text;
   }
 
@@ -263,13 +264,13 @@ export class ScreenReader {
     fuzzy: boolean = false,
   ): Promise<{ x: number; y: number } | null> {
     const t = performance.now();
-    const result = await this.daemon.ocr(undefined, this.debugOcrEngine);
+    const result = await this.daemon.ocr(undefined, this.debugOcrEngine, this.debugPreprocess);
     const pos = this.findWordInOcrResult(result, searchText, fuzzy);
 
     if (pos) {
-      logger.info({ searchText, engine: this.debugOcrEngine, x: pos.x, y: pos.y, totalMs: elapsed(t) }, 'debugFindText found');
+      logger.info({ searchText, engine: this.debugOcrEngine, preprocess: this.debugPreprocess, x: pos.x, y: pos.y, totalMs: elapsed(t) }, 'debugFindText found');
     } else {
-      logger.info({ searchText, engine: this.debugOcrEngine, totalMs: elapsed(t) }, 'debugFindText not found');
+      logger.info({ searchText, engine: this.debugOcrEngine, preprocess: this.debugPreprocess, totalMs: elapsed(t) }, 'debugFindText not found');
     }
     return pos;
   }
diff --git a/tools/OcrDaemon/Daemon.cs b/tools/OcrDaemon/Daemon.cs
index 2fc36d5..6cfb898 100644
--- a/tools/OcrDaemon/Daemon.cs
+++ b/tools/OcrDaemon/Daemon.cs
@@ -1,5 +1,6 @@
 namespace OcrDaemon;
 
+using System.Drawing;
 using System.Text.Json;
 using System.Text.Json.Serialization;
 using Tesseract;
@@ -74,15 +75,11 @@ static class Daemon
 
                 object response = request.Cmd?.ToLowerInvariant() switch
                 {
-                    "ocr" when request.Engine is "easyocr"
-                                  => pythonBridge.HandleOcr(request, request.Engine),
-                    "ocr"         => ocrHandler.HandleOcr(request),
+                    "ocr"         => HandleOcrPipeline(ocrHandler, pythonBridge, request),
                     "screenshot"  => ocrHandler.HandleScreenshot(request),
                     "capture"     => ocrHandler.HandleCapture(request),
                     "snapshot"    => ocrHandler.HandleSnapshot(request),
-                    "diff-ocr" when request.Engine is "easyocr"
-                                  => HandleDiffOcrPython(ocrHandler, pythonBridge, request),
-                    "diff-ocr"    => ocrHandler.HandleDiffOcr(request),
+                    "diff-ocr"    => HandleDiffOcrPipeline(ocrHandler, pythonBridge, request),
                     "test"        => ocrHandler.HandleTest(request),
                     "tune"        => ocrHandler.HandleTune(request),
                     "grid"        => gridHandler.HandleGrid(request),
@@ -102,11 +99,67 @@ static class Daemon
         return 0;
     }
 
-    private static object HandleDiffOcrPython(OcrHandler ocrHandler, PythonOcrBridge pythonBridge, Request request)
+    /// <summary>
+    /// Unified OCR pipeline for full/region captures.
+    /// Capture → optional preprocess → route to engine (tesseract / easyocr / paddleocr).
+    /// </summary>
+    private static object HandleOcrPipeline(OcrHandler ocrHandler, PythonOcrBridge pythonBridge, Request request)
     {
+        var engine = request.Engine ?? "tesseract";
+        var preprocess = request.Preprocess ?? "none";
+
+        // No preprocess + tesseract = original fast path
+        if (engine == "tesseract" && preprocess == "none")
+            return ocrHandler.HandleOcr(request);
+
+        // Capture
+        using var bitmap = ScreenCapture.CaptureOrLoad(request.File, request.Region);
+
+        // Preprocess
+        Bitmap processed;
+        if (preprocess == "tophat")
+        {
+            processed = ImagePreprocessor.PreprocessForOcr(bitmap);
+        }
+        else if (preprocess == "bgsub")
+        {
+            return new ErrorResponse("bgsub preprocess requires a reference frame; use diff-ocr instead.");
+        }
+        else // "none"
+        {
+            processed = (Bitmap)bitmap.Clone();
+        }
+        using var _processed = processed;
+
+        // Route to engine
+        if (engine == "tesseract")
+        {
+            var region = request.Region != null
+                ? new RegionRect { X = request.Region.X, Y = request.Region.Y, Width = request.Region.Width, Height = request.Region.Height }
+                : new RegionRect { X = 0, Y = 0, Width = processed.Width, Height = processed.Height };
+            return ocrHandler.RunTesseractOnBitmap(processed, region);
+        }
+        else // easyocr, paddleocr
+        {
+            return pythonBridge.OcrFromBitmap(processed, engine);
+        }
+    }
+
+    /// <summary>
+    /// Unified diff-OCR pipeline for tooltip detection.
+    /// DiffCrop → preprocess (default=bgsub) → route to engine.
+    /// </summary>
+    private static object HandleDiffOcrPipeline(OcrHandler ocrHandler, PythonOcrBridge pythonBridge, Request request)
+    {
+        var engine = request.Engine ?? "tesseract";
+        var preprocess = request.Preprocess ?? "bgsub";
+        var isPythonEngine = engine is "easyocr" or "paddleocr";
+
+        // No engine override + no preprocess override = original Tesseract path (supports test/tune params)
+        if (engine == "tesseract" && request.Preprocess == null)
+            return ocrHandler.HandleDiffOcr(request);
+
         var sw = System.Diagnostics.Stopwatch.StartNew();
-        // Use default params (same wide crop as Tesseract path).
-        // Background subtraction below eliminates stash items from the image.
         var p = new DiffOcrParams();
         if (request.Threshold > 0) p.DiffThresh = request.Threshold;
 
@@ -117,46 +170,72 @@ static class Daemon
         var (cropped, refCropped, current, region) = cropResult.Value;
         using var _current = current;
 
-        // Apply background subtraction to isolate tooltip text.
-        // This removes stash items and game world — only tooltip text remains.
-        // No upscale (upscale=1) to keep the image small for EasyOCR speed.
-        // Hard threshold (softThreshold=false) produces clean binary for OCR.
-        using var processed = ImagePreprocessor.PreprocessWithBackgroundSub(
-            cropped, refCropped, dimPercentile: 40, textThresh: 60, upscale: 1, softThreshold: false);
+        // Preprocess
+        Bitmap processed;
+        if (preprocess == "bgsub")
+        {
+            int upscale = isPythonEngine ? 1 : 2;
+            processed = ImagePreprocessor.PreprocessWithBackgroundSub(
+                cropped, refCropped, dimPercentile: 40, textThresh: 60, upscale: upscale, softThreshold: false);
+        }
+        else if (preprocess == "tophat")
+        {
+            processed = ImagePreprocessor.PreprocessForOcr(cropped);
+        }
+        else // "none"
+        {
+            processed = (Bitmap)cropped.Clone();
+        }
         cropped.Dispose();
         refCropped.Dispose();
-        var diffMs = sw.ElapsedMilliseconds;
 
-        // Save processed crop if path provided
+        var diffMs = sw.ElapsedMilliseconds;
+        using var _processed = processed;
+
+        // Save debug images if path provided
         if (!string.IsNullOrEmpty(request.Path))
         {
             var dir = Path.GetDirectoryName(request.Path);
             if (!string.IsNullOrEmpty(dir) && !Directory.Exists(dir))
                 Directory.CreateDirectory(dir);
+            // Save preprocessed crop
             processed.Save(request.Path, ImageUtils.GetImageFormat(request.Path));
+
+            var ext = Path.GetExtension(request.Path);
+            var fullPath = Path.ChangeExtension(request.Path, ".full" + ext);
+            current.Save(fullPath, ImageUtils.GetImageFormat(fullPath));
         }
 
-        // Send processed image to Python OCR via base64
+        // Route to engine
         sw.Restart();
-        var ocrResult = pythonBridge.OcrFromBitmap(processed, request.Engine!);
-        var ocrMs = sw.ElapsedMilliseconds;
-
-        Console.Error.WriteLine($"  diff-ocr-python: diff={diffMs}ms ocr={ocrMs}ms total={diffMs + ocrMs}ms crop={region.Width}x{region.Height}");
-
-        // Offset word coordinates to screen space
-        foreach (var line in ocrResult.Lines)
-            foreach (var word in line.Words)
-            {
-                word.X += region.X;
-                word.Y += region.Y;
-            }
-
-        return new DiffOcrResponse
+        if (engine == "tesseract")
         {
-            Text = ocrResult.Text,
-            Lines = ocrResult.Lines,
-            Region = region,
-        };
+            var result = ocrHandler.RunTesseractOnBitmap(processed, region);
+            var ocrMs = sw.ElapsedMilliseconds;
+            Console.Error.WriteLine($"  diff-ocr-pipeline: engine={engine} preprocess={preprocess} diff={diffMs}ms ocr={ocrMs}ms crop={region.Width}x{region.Height}");
+            return result;
+        }
+        else // easyocr, paddleocr
+        {
+            var ocrResult = pythonBridge.OcrFromBitmap(processed, engine);
+            var ocrMs = sw.ElapsedMilliseconds;
+            Console.Error.WriteLine($"  diff-ocr-pipeline: engine={engine} preprocess={preprocess} diff={diffMs}ms ocr={ocrMs}ms crop={region.Width}x{region.Height}");
+
+            // Offset word coordinates to screen space
+            foreach (var line in ocrResult.Lines)
+                foreach (var word in line.Words)
+                {
+                    word.X += region.X;
+                    word.Y += region.Y;
+                }
+
+            return new DiffOcrResponse
+            {
+                Text = ocrResult.Text,
+                Lines = ocrResult.Lines,
+                Region = region,
+            };
+        }
     }
 
     private static void WriteResponse(object response)
diff --git a/tools/OcrDaemon/Models.cs b/tools/OcrDaemon/Models.cs
index 55427b0..03ef8c4 100644
--- a/tools/OcrDaemon/Models.cs
+++ b/tools/OcrDaemon/Models.cs
@@ -42,6 +42,9 @@ class Request
 
     [JsonPropertyName("engine")]
     public string? Engine { get; set; }
+
+    [JsonPropertyName("preprocess")]
+    public string? Preprocess { get; set; }
 }
 
 class RegionRect
diff --git a/tools/OcrDaemon/OcrHandler.cs b/tools/OcrDaemon/OcrHandler.cs
index 04dbe07..26cfe46 100644
--- a/tools/OcrDaemon/OcrHandler.cs
+++ b/tools/OcrDaemon/OcrHandler.cs
@@ -401,6 +401,33 @@ class OcrHandler(TesseractEngine engine)
         }
     }
 
+    /// <summary>
+    /// Run Tesseract OCR on an already-preprocessed bitmap. Converts to Mat, pads,
+    /// runs PSM-6, and adjusts word coordinates to screen space using the supplied region.
+    /// </summary>
+    public DiffOcrResponse RunTesseractOnBitmap(Bitmap processedBmp, RegionRect region, int pad = 10, int upscale = 2, int psm = 6)
+    {
+        using var processedMat = BitmapConverter.ToMat(processedBmp);
+        using var padded = new Mat();
+        Cv2.CopyMakeBorder(processedMat, padded, pad, pad, pad, pad, BorderTypes.Constant, Scalar.White);
+        using var bmp = BitmapConverter.ToBitmap(padded);
+        using var pix = ImageUtils.BitmapToPix(bmp);
+        using var page = engine.Process(pix, (PageSegMode)psm);
+
+        var text = page.GetText();
+        int effUpscale = upscale > 0 ? upscale : 1;
+        var lines = ImageUtils.ExtractLinesFromPage(page,
+            offsetX: region.X - pad / effUpscale,
+            offsetY: region.Y - pad / effUpscale);
+
+        return new DiffOcrResponse
+        {
+            Text = text,
+            Lines = lines,
+            Region = region,
+        };
+    }
+
     public object HandleTest(Request req) => RunTestCases(new DiffOcrParams(), verbose: true);
 
     public object HandleTune(Request req)
diff --git a/tools/python-ocr/daemon.py b/tools/python-ocr/daemon.py
index 1d38b60..116a1ac 100644
--- a/tools/python-ocr/daemon.py
+++ b/tools/python-ocr/daemon.py
@@ -12,6 +12,7 @@ import sys
 import json
 
 _easyocr_reader = None
+_paddle_ocr = None
 
 
 def _redirect_stdout_to_stderr():
@@ -100,6 +101,46 @@ def run_easyocr_array(img):
     return {"ok": True, "text": "\n".join(all_text_parts), "lines": lines}
 
 
+def get_paddleocr():
+    global _paddle_ocr
+    if _paddle_ocr is None:
+        sys.stderr.write("Loading PaddleOCR model...\n")
+        sys.stderr.flush()
+        real_stdout = _redirect_stdout_to_stderr()
+        try:
+            from paddleocr import PaddleOCR
+            _paddle_ocr = PaddleOCR(use_angle_cls=True, lang="en", use_gpu=True, show_log=False)
+        finally:
+            _restore_stdout(real_stdout)
+        sys.stderr.write("PaddleOCR model loaded.\n")
+        sys.stderr.flush()
+    return _paddle_ocr
+
+
+def run_paddleocr_array(img):
+    ocr = get_paddleocr()
+
+    real_stdout = _redirect_stdout_to_stderr()
+    try:
+        results = ocr.ocr(img, cls=True)
+    finally:
+        _restore_stdout(real_stdout)
+
+    lines = []
+    all_text_parts = []
+    # PaddleOCR returns [page_results], each item is [bbox_4corners, (text, conf)]
+    if results and results[0]:
+        for item in results[0]:
+            bbox, (text, conf) = item
+            if not text.strip():
+                continue
+            x, y, w, h = bbox_to_rect(bbox)
+            words = split_into_words(text, x, y, w, h)
+            lines.append({"text": text.strip(), "words": words})
+            all_text_parts.append(text.strip())
+    return {"ok": True, "text": "\n".join(all_text_parts), "lines": lines}
+
+
 def load_image(req):
     """Load image from either imagePath (file) or imageBase64 (base64-encoded PNG)."""
     from PIL import Image
@@ -131,6 +172,8 @@ def handle_request(req):
 
     if engine == "easyocr":
         return run_easyocr_array(img)
+    elif engine == "paddleocr":
+        return run_paddleocr_array(img)
     else:
         return {"ok": False, "error": f"Unknown engine: {engine}"}