switched to new way

This commit is contained in:
Boki 2026-02-13 01:12:51 -05:00
parent f22d182c8f
commit 4a65c8e17b
96 changed files with 4991 additions and 10025 deletions

View file

@ -2,7 +2,7 @@
Persistent Python OCR daemon (stdin/stdout JSON-per-line protocol).
Supports EasyOCR engine, lazy-loaded on first use.
Managed as a subprocess by the C# OcrDaemon.
Managed as a subprocess by PythonOcrBridge in Poe2Trade.Screen.
Request: {"cmd": "ocr", "engine": "easyocr", "imagePath": "C:\\temp\\screenshot.png"}
Response: {"ok": true, "text": "...", "lines": [{"text": "...", "words": [...]}]}
@ -12,7 +12,6 @@ import sys
import json
_easyocr_reader = None
_paddle_ocr = None
def _redirect_stdout_to_stderr():
@ -116,13 +115,6 @@ def items_to_response(items):
return {"ok": True, "text": "\n".join(all_text_parts), "lines": lines}
def run_easyocr(image_path):
from PIL import Image
import numpy as np
img = np.array(Image.open(image_path))
return run_easyocr_array(img)
def run_easyocr_array(img, merge_gap=0, **easyocr_kwargs):
reader = get_easyocr()
@ -147,67 +139,6 @@ def run_easyocr_array(img, merge_gap=0, **easyocr_kwargs):
return items_to_response(items)
def get_paddleocr():
global _paddle_ocr
if _paddle_ocr is None:
sys.stderr.write("Loading PaddleOCR model...\n")
sys.stderr.flush()
import os
os.environ.setdefault("PADDLE_PDX_DISABLE_MODEL_SOURCE_CHECK", "True")
real_stdout = _redirect_stdout_to_stderr()
try:
from paddleocr import PaddleOCR
_paddle_ocr = PaddleOCR(
use_doc_orientation_classify=False,
use_doc_unwarping=False,
use_textline_orientation=False,
lang="en",
ocr_version="PP-OCRv4",
)
finally:
_restore_stdout(real_stdout)
sys.stderr.write("PaddleOCR model loaded.\n")
sys.stderr.flush()
return _paddle_ocr
def run_paddleocr_array(img, merge_gap=0):
ocr = get_paddleocr()
# Ensure RGB 3-channel
if len(img.shape) == 2:
import numpy as np
img = np.stack([img, img, img], axis=-1)
elif img.shape[2] == 4:
img = img[:, :, :3]
real_stdout = _redirect_stdout_to_stderr()
try:
results = ocr.predict(img)
finally:
_restore_stdout(real_stdout)
items = []
# PaddleOCR 3.4: results is list of OCRResult objects
for res in results:
texts = res.get("rec_texts", []) if hasattr(res, "get") else getattr(res, "rec_texts", [])
polys = res.get("dt_polys", []) if hasattr(res, "get") else getattr(res, "dt_polys", [])
for i, text in enumerate(texts):
if not text.strip():
continue
if i < len(polys):
bbox = polys[i]
x, y, w, h = bbox_to_rect(bbox)
else:
x, y, w, h = 0, 0, 0, 0
items.append({"text": text.strip(), "x": x, "y": y, "w": w, "h": h})
if merge_gap > 0:
items = merge_nearby_detections(items, merge_gap)
return items_to_response(items)
def load_image(req):
"""Load image from either imagePath (file) or imageBase64 (base64-encoded PNG)."""
from PIL import Image
@ -232,29 +163,23 @@ def handle_request(req):
if cmd != "ocr":
return {"ok": False, "error": f"Unknown command: {cmd}"}
engine = req.get("engine", "")
img = load_image(req)
if img is None:
return {"ok": False, "error": "Missing imagePath or imageBase64"}
merge_gap = req.get("mergeGap", 0)
if engine == "easyocr":
easyocr_kwargs = {}
for json_key, py_param in [
("linkThreshold", "link_threshold"),
("textThreshold", "text_threshold"),
("lowText", "low_text"),
("widthThs", "width_ths"),
("paragraph", "paragraph"),
]:
if json_key in req:
easyocr_kwargs[py_param] = req[json_key]
return run_easyocr_array(img, merge_gap=merge_gap, **easyocr_kwargs)
elif engine == "paddleocr":
return run_paddleocr_array(img, merge_gap=merge_gap)
else:
return {"ok": False, "error": f"Unknown engine: {engine}"}
easyocr_kwargs = {}
for json_key, py_param in [
("linkThreshold", "link_threshold"),
("textThreshold", "text_threshold"),
("lowText", "low_text"),
("widthThs", "width_ths"),
("paragraph", "paragraph"),
]:
if json_key in req:
easyocr_kwargs[py_param] = req[json_key]
return run_easyocr_array(img, merge_gap=merge_gap, **easyocr_kwargs)
def main():