added easyOCR

2026-02-12 01:04:19 -05:00 · 2026-02-12 01:04:19 -05:00 · 9f208b0606
commit 9f208b0606
parent 37d6678577
27 changed files with 1780 additions and 112 deletions
--- a/tools/python-ocr/daemon.py
+++ b/tools/python-ocr/daemon.py
@ -0,0 +1,157 @@
+"""
+Persistent Python OCR daemon (stdin/stdout JSON-per-line protocol).
+
+Supports EasyOCR engine, lazy-loaded on first use.
+Managed as a subprocess by the C# OcrDaemon.
+
+Request:  {"cmd": "ocr", "engine": "easyocr", "imagePath": "C:\\temp\\screenshot.png"}
+Response: {"ok": true, "text": "...", "lines": [{"text": "...", "words": [...]}]}
+"""
+
+import sys
+import json
+
+_easyocr_reader = None
+
+
+def _redirect_stdout_to_stderr():
+    """Redirect stdout to stderr so library print() calls don't corrupt the JSON protocol."""
+    real_stdout = sys.stdout
+    sys.stdout = sys.stderr
+    return real_stdout
+
+
+def _restore_stdout(real_stdout):
+    sys.stdout = real_stdout
+
+
+def get_easyocr():
+    global _easyocr_reader
+    if _easyocr_reader is None:
+        sys.stderr.write("Loading EasyOCR model...\n")
+        sys.stderr.flush()
+        # EasyOCR prints download progress to stdout — redirect during load
+        real_stdout = _redirect_stdout_to_stderr()
+        try:
+            import easyocr
+            _easyocr_reader = easyocr.Reader(["en"], gpu=True)
+        finally:
+            _restore_stdout(real_stdout)
+        sys.stderr.write("EasyOCR model loaded.\n")
+        sys.stderr.flush()
+    return _easyocr_reader
+
+
+def bbox_to_rect(corners):
+    """Convert 4-corner bbox [[x1,y1],[x2,y2],[x3,y3],[x4,y4]] to axis-aligned {x, y, width, height}."""
+    xs = [c[0] for c in corners]
+    ys = [c[1] for c in corners]
+    x = int(min(xs))
+    y = int(min(ys))
+    return x, y, int(max(xs)) - x, int(max(ys)) - y
+
+
+def split_into_words(text, x, y, width, height):
+    """Split a detection's text into individual words with proportional bounding boxes."""
+    parts = text.split()
+    if len(parts) <= 1:
+        return [{"text": text.strip(), "x": x, "y": y, "width": width, "height": height}]
+
+    total_chars = sum(len(p) for p in parts)
+    if total_chars == 0:
+        return [{"text": text.strip(), "x": x, "y": y, "width": width, "height": height}]
+
+    words = []
+    cx = x
+    for part in parts:
+        w = max(1, int(width * len(part) / total_chars))
+        words.append({"text": part, "x": cx, "y": y, "width": w, "height": height})
+        cx += w
+    return words
+
+
+def run_easyocr(image_path):
+    from PIL import Image
+    import numpy as np
+    img = np.array(Image.open(image_path))
+    return run_easyocr_array(img)
+
+
+def run_easyocr_array(img):
+    reader = get_easyocr()
+
+    # Redirect stdout during inference — easyocr can print warnings
+    real_stdout = _redirect_stdout_to_stderr()
+    try:
+        # batch_size=32: batch GPU recognition of detected text regions
+        results = reader.readtext(img, batch_size=32)
+    finally:
+        _restore_stdout(real_stdout)
+    # results: [(bbox_4corners, text, conf), ...]
+    lines = []
+    all_text_parts = []
+    for bbox, text, conf in results:
+        if not text.strip():
+            continue
+        x, y, w, h = bbox_to_rect(bbox)
+        words = split_into_words(text, x, y, w, h)
+        lines.append({"text": text.strip(), "words": words})
+        all_text_parts.append(text.strip())
+    return {"ok": True, "text": "\n".join(all_text_parts), "lines": lines}
+
+
+def load_image(req):
+    """Load image from either imagePath (file) or imageBase64 (base64-encoded PNG)."""
+    from PIL import Image
+    import numpy as np
+
+    image_base64 = req.get("imageBase64")
+    if image_base64:
+        import base64
+        import io
+        img_bytes = base64.b64decode(image_base64)
+        return np.array(Image.open(io.BytesIO(img_bytes)))
+
+    image_path = req.get("imagePath")
+    if image_path:
+        return np.array(Image.open(image_path))
+
+    return None
+
+
+def handle_request(req):
+    cmd = req.get("cmd")
+    if cmd != "ocr":
+        return {"ok": False, "error": f"Unknown command: {cmd}"}
+
+    engine = req.get("engine", "")
+    img = load_image(req)
+    if img is None:
+        return {"ok": False, "error": "Missing imagePath or imageBase64"}
+
+    if engine == "easyocr":
+        return run_easyocr_array(img)
+    else:
+        return {"ok": False, "error": f"Unknown engine: {engine}"}
+
+
+def main():
+    # Signal ready
+    sys.stdout.write(json.dumps({"ok": True, "ready": True}) + "\n")
+    sys.stdout.flush()
+
+    for line in sys.stdin:
+        line = line.strip()
+        if not line:
+            continue
+        try:
+            req = json.loads(line)
+            resp = handle_request(req)
+        except Exception as e:
+            resp = {"ok": False, "error": str(e)}
+        sys.stdout.write(json.dumps(resp) + "\n")
+        sys.stdout.flush()
+
+
+if __name__ == "__main__":
+    main()