finished easyocr and pipeline
This commit is contained in:
parent
735b6f7157
commit
cf5d944fd1
8 changed files with 252 additions and 51 deletions
|
|
@ -12,6 +12,7 @@ import sys
|
|||
import json
|
||||
|
||||
_easyocr_reader = None
|
||||
_paddle_ocr = None
|
||||
|
||||
|
||||
def _redirect_stdout_to_stderr():
|
||||
|
|
@ -100,6 +101,46 @@ def run_easyocr_array(img):
|
|||
return {"ok": True, "text": "\n".join(all_text_parts), "lines": lines}
|
||||
|
||||
|
||||
def get_paddleocr():
|
||||
global _paddle_ocr
|
||||
if _paddle_ocr is None:
|
||||
sys.stderr.write("Loading PaddleOCR model...\n")
|
||||
sys.stderr.flush()
|
||||
real_stdout = _redirect_stdout_to_stderr()
|
||||
try:
|
||||
from paddleocr import PaddleOCR
|
||||
_paddle_ocr = PaddleOCR(use_angle_cls=True, lang="en", use_gpu=True, show_log=False)
|
||||
finally:
|
||||
_restore_stdout(real_stdout)
|
||||
sys.stderr.write("PaddleOCR model loaded.\n")
|
||||
sys.stderr.flush()
|
||||
return _paddle_ocr
|
||||
|
||||
|
||||
def run_paddleocr_array(img):
|
||||
ocr = get_paddleocr()
|
||||
|
||||
real_stdout = _redirect_stdout_to_stderr()
|
||||
try:
|
||||
results = ocr.ocr(img, cls=True)
|
||||
finally:
|
||||
_restore_stdout(real_stdout)
|
||||
|
||||
lines = []
|
||||
all_text_parts = []
|
||||
# PaddleOCR returns [page_results], each item is [bbox_4corners, (text, conf)]
|
||||
if results and results[0]:
|
||||
for item in results[0]:
|
||||
bbox, (text, conf) = item
|
||||
if not text.strip():
|
||||
continue
|
||||
x, y, w, h = bbox_to_rect(bbox)
|
||||
words = split_into_words(text, x, y, w, h)
|
||||
lines.append({"text": text.strip(), "words": words})
|
||||
all_text_parts.append(text.strip())
|
||||
return {"ok": True, "text": "\n".join(all_text_parts), "lines": lines}
|
||||
|
||||
|
||||
def load_image(req):
|
||||
"""Load image from either imagePath (file) or imageBase64 (base64-encoded PNG)."""
|
||||
from PIL import Image
|
||||
|
|
@ -131,6 +172,8 @@ def handle_request(req):
|
|||
|
||||
if engine == "easyocr":
|
||||
return run_easyocr_array(img)
|
||||
elif engine == "paddleocr":
|
||||
return run_paddleocr_array(img)
|
||||
else:
|
||||
return {"ok": False, "error": f"Unknown engine: {engine}"}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue