added easyOCR
This commit is contained in:
parent
37d6678577
commit
9f208b0606
27 changed files with 1780 additions and 112 deletions
157
tools/python-ocr/daemon.py
Normal file
157
tools/python-ocr/daemon.py
Normal file
|
|
@ -0,0 +1,157 @@
|
|||
"""
|
||||
Persistent Python OCR daemon (stdin/stdout JSON-per-line protocol).
|
||||
|
||||
Supports EasyOCR engine, lazy-loaded on first use.
|
||||
Managed as a subprocess by the C# OcrDaemon.
|
||||
|
||||
Request: {"cmd": "ocr", "engine": "easyocr", "imagePath": "C:\\temp\\screenshot.png"}
|
||||
Response: {"ok": true, "text": "...", "lines": [{"text": "...", "words": [...]}]}
|
||||
"""
|
||||
|
||||
import sys
|
||||
import json
|
||||
|
||||
_easyocr_reader = None
|
||||
|
||||
|
||||
def _redirect_stdout_to_stderr():
|
||||
"""Redirect stdout to stderr so library print() calls don't corrupt the JSON protocol."""
|
||||
real_stdout = sys.stdout
|
||||
sys.stdout = sys.stderr
|
||||
return real_stdout
|
||||
|
||||
|
||||
def _restore_stdout(real_stdout):
|
||||
sys.stdout = real_stdout
|
||||
|
||||
|
||||
def get_easyocr():
|
||||
global _easyocr_reader
|
||||
if _easyocr_reader is None:
|
||||
sys.stderr.write("Loading EasyOCR model...\n")
|
||||
sys.stderr.flush()
|
||||
# EasyOCR prints download progress to stdout — redirect during load
|
||||
real_stdout = _redirect_stdout_to_stderr()
|
||||
try:
|
||||
import easyocr
|
||||
_easyocr_reader = easyocr.Reader(["en"], gpu=True)
|
||||
finally:
|
||||
_restore_stdout(real_stdout)
|
||||
sys.stderr.write("EasyOCR model loaded.\n")
|
||||
sys.stderr.flush()
|
||||
return _easyocr_reader
|
||||
|
||||
|
||||
def bbox_to_rect(corners):
|
||||
"""Convert 4-corner bbox [[x1,y1],[x2,y2],[x3,y3],[x4,y4]] to axis-aligned {x, y, width, height}."""
|
||||
xs = [c[0] for c in corners]
|
||||
ys = [c[1] for c in corners]
|
||||
x = int(min(xs))
|
||||
y = int(min(ys))
|
||||
return x, y, int(max(xs)) - x, int(max(ys)) - y
|
||||
|
||||
|
||||
def split_into_words(text, x, y, width, height):
|
||||
"""Split a detection's text into individual words with proportional bounding boxes."""
|
||||
parts = text.split()
|
||||
if len(parts) <= 1:
|
||||
return [{"text": text.strip(), "x": x, "y": y, "width": width, "height": height}]
|
||||
|
||||
total_chars = sum(len(p) for p in parts)
|
||||
if total_chars == 0:
|
||||
return [{"text": text.strip(), "x": x, "y": y, "width": width, "height": height}]
|
||||
|
||||
words = []
|
||||
cx = x
|
||||
for part in parts:
|
||||
w = max(1, int(width * len(part) / total_chars))
|
||||
words.append({"text": part, "x": cx, "y": y, "width": w, "height": height})
|
||||
cx += w
|
||||
return words
|
||||
|
||||
|
||||
def run_easyocr(image_path):
|
||||
from PIL import Image
|
||||
import numpy as np
|
||||
img = np.array(Image.open(image_path))
|
||||
return run_easyocr_array(img)
|
||||
|
||||
|
||||
def run_easyocr_array(img):
|
||||
reader = get_easyocr()
|
||||
|
||||
# Redirect stdout during inference — easyocr can print warnings
|
||||
real_stdout = _redirect_stdout_to_stderr()
|
||||
try:
|
||||
# batch_size=32: batch GPU recognition of detected text regions
|
||||
results = reader.readtext(img, batch_size=32)
|
||||
finally:
|
||||
_restore_stdout(real_stdout)
|
||||
# results: [(bbox_4corners, text, conf), ...]
|
||||
lines = []
|
||||
all_text_parts = []
|
||||
for bbox, text, conf in results:
|
||||
if not text.strip():
|
||||
continue
|
||||
x, y, w, h = bbox_to_rect(bbox)
|
||||
words = split_into_words(text, x, y, w, h)
|
||||
lines.append({"text": text.strip(), "words": words})
|
||||
all_text_parts.append(text.strip())
|
||||
return {"ok": True, "text": "\n".join(all_text_parts), "lines": lines}
|
||||
|
||||
|
||||
def load_image(req):
|
||||
"""Load image from either imagePath (file) or imageBase64 (base64-encoded PNG)."""
|
||||
from PIL import Image
|
||||
import numpy as np
|
||||
|
||||
image_base64 = req.get("imageBase64")
|
||||
if image_base64:
|
||||
import base64
|
||||
import io
|
||||
img_bytes = base64.b64decode(image_base64)
|
||||
return np.array(Image.open(io.BytesIO(img_bytes)))
|
||||
|
||||
image_path = req.get("imagePath")
|
||||
if image_path:
|
||||
return np.array(Image.open(image_path))
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def handle_request(req):
|
||||
cmd = req.get("cmd")
|
||||
if cmd != "ocr":
|
||||
return {"ok": False, "error": f"Unknown command: {cmd}"}
|
||||
|
||||
engine = req.get("engine", "")
|
||||
img = load_image(req)
|
||||
if img is None:
|
||||
return {"ok": False, "error": "Missing imagePath or imageBase64"}
|
||||
|
||||
if engine == "easyocr":
|
||||
return run_easyocr_array(img)
|
||||
else:
|
||||
return {"ok": False, "error": f"Unknown engine: {engine}"}
|
||||
|
||||
|
||||
def main():
|
||||
# Signal ready
|
||||
sys.stdout.write(json.dumps({"ok": True, "ready": True}) + "\n")
|
||||
sys.stdout.flush()
|
||||
|
||||
for line in sys.stdin:
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
try:
|
||||
req = json.loads(line)
|
||||
resp = handle_request(req)
|
||||
except Exception as e:
|
||||
resp = {"ok": False, "error": str(e)}
|
||||
sys.stdout.write(json.dumps(resp) + "\n")
|
||||
sys.stdout.flush()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Loading…
Add table
Add a link
Reference in a new issue