finished easyocr and pipeline
This commit is contained in:
parent
735b6f7157
commit
cf5d944fd1
8 changed files with 252 additions and 51 deletions
|
|
@ -8,7 +8,7 @@ import { logger } from '../util/logger.js';
|
||||||
import { sleep } from '../util/sleep.js';
|
import { sleep } from '../util/sleep.js';
|
||||||
import type { BotController } from './BotController.js';
|
import type { BotController } from './BotController.js';
|
||||||
import type { ScreenReader } from '../game/ScreenReader.js';
|
import type { ScreenReader } from '../game/ScreenReader.js';
|
||||||
import type { OcrEngine } from '../game/OcrDaemon.js';
|
import type { OcrEngine, OcrPreprocess } from '../game/OcrDaemon.js';
|
||||||
import { GRID_LAYOUTS } from '../game/GridReader.js';
|
import { GRID_LAYOUTS } from '../game/GridReader.js';
|
||||||
import type { GameController } from '../game/GameController.js';
|
import type { GameController } from '../game/GameController.js';
|
||||||
|
|
||||||
|
|
@ -131,8 +131,8 @@ export class DashboardServer {
|
||||||
this.app.post('/api/debug/ocr-engine', (req, res) => {
|
this.app.post('/api/debug/ocr-engine', (req, res) => {
|
||||||
if (!this.debug) { res.status(503).json({ error: 'Debug not available' }); return; }
|
if (!this.debug) { res.status(503).json({ error: 'Debug not available' }); return; }
|
||||||
const { engine } = req.body as { engine: string };
|
const { engine } = req.body as { engine: string };
|
||||||
if (!['tesseract', 'easyocr'].includes(engine)) {
|
if (!['tesseract', 'easyocr', 'paddleocr'].includes(engine)) {
|
||||||
res.status(400).json({ error: 'Invalid engine. Must be tesseract or easyocr.' });
|
res.status(400).json({ error: 'Invalid engine. Must be tesseract, easyocr, or paddleocr.' });
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
this.debug.screenReader.debugOcrEngine = engine as OcrEngine;
|
this.debug.screenReader.debugOcrEngine = engine as OcrEngine;
|
||||||
|
|
@ -140,6 +140,24 @@ export class DashboardServer {
|
||||||
res.json({ ok: true });
|
res.json({ ok: true });
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// OCR preprocess selection
|
||||||
|
this.app.get('/api/debug/ocr-preprocess', (_req, res) => {
|
||||||
|
if (!this.debug) { res.status(503).json({ error: 'Debug not available' }); return; }
|
||||||
|
res.json({ ok: true, preprocess: this.debug.screenReader.debugPreprocess });
|
||||||
|
});
|
||||||
|
|
||||||
|
this.app.post('/api/debug/ocr-preprocess', (req, res) => {
|
||||||
|
if (!this.debug) { res.status(503).json({ error: 'Debug not available' }); return; }
|
||||||
|
const { preprocess } = req.body as { preprocess: string };
|
||||||
|
if (!['none', 'bgsub', 'tophat'].includes(preprocess)) {
|
||||||
|
res.status(400).json({ error: 'Invalid preprocess. Must be none, bgsub, or tophat.' });
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
this.debug.screenReader.debugPreprocess = preprocess as OcrPreprocess;
|
||||||
|
this.broadcastLog('info', `OCR preprocess set to: ${preprocess}`);
|
||||||
|
res.json({ ok: true });
|
||||||
|
});
|
||||||
|
|
||||||
this.app.post('/api/debug/ocr', async (_req, res) => {
|
this.app.post('/api/debug/ocr', async (_req, res) => {
|
||||||
if (!this.debug) { res.status(503).json({ error: 'Debug not available' }); return; }
|
if (!this.debug) { res.status(503).json({ error: 'Debug not available' }); return; }
|
||||||
try {
|
try {
|
||||||
|
|
|
||||||
|
|
@ -455,6 +455,12 @@
|
||||||
<select id="ocrEngineSelect" onchange="setOcrEngine(this.value)" style="padding:6px 10px;background:#0d1117;border:1px solid #30363d;border-radius:6px;color:#e6edf3;font-size:13px">
|
<select id="ocrEngineSelect" onchange="setOcrEngine(this.value)" style="padding:6px 10px;background:#0d1117;border:1px solid #30363d;border-radius:6px;color:#e6edf3;font-size:13px">
|
||||||
<option value="tesseract">Tesseract</option>
|
<option value="tesseract">Tesseract</option>
|
||||||
<option value="easyocr">EasyOCR</option>
|
<option value="easyocr">EasyOCR</option>
|
||||||
|
<option value="paddleocr">PaddleOCR</option>
|
||||||
|
</select>
|
||||||
|
<select id="ocrPreprocessSelect" onchange="setOcrPreprocess(this.value)" style="padding:6px 10px;background:#0d1117;border:1px solid #30363d;border-radius:6px;color:#e6edf3;font-size:13px">
|
||||||
|
<option value="none">No Preprocess</option>
|
||||||
|
<option value="bgsub" selected>BgSub</option>
|
||||||
|
<option value="tophat">TopHat</option>
|
||||||
</select>
|
</select>
|
||||||
<button onclick="debugScreenshot()">Screenshot</button>
|
<button onclick="debugScreenshot()">Screenshot</button>
|
||||||
<button onclick="debugOcr()">OCR Screen</button>
|
<button onclick="debugOcr()">OCR Screen</button>
|
||||||
|
|
@ -1004,8 +1010,27 @@
|
||||||
} catch {}
|
} catch {}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async function setOcrPreprocess(preprocess) {
|
||||||
|
await fetch('/api/debug/ocr-preprocess', {
|
||||||
|
method: 'POST',
|
||||||
|
headers: { 'Content-Type': 'application/json' },
|
||||||
|
body: JSON.stringify({ preprocess }),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
async function loadOcrPreprocess() {
|
||||||
|
try {
|
||||||
|
const res = await fetch('/api/debug/ocr-preprocess');
|
||||||
|
const data = await res.json();
|
||||||
|
if (data.ok && data.preprocess) {
|
||||||
|
document.getElementById('ocrPreprocessSelect').value = data.preprocess;
|
||||||
|
}
|
||||||
|
} catch {}
|
||||||
|
}
|
||||||
|
|
||||||
connect();
|
connect();
|
||||||
loadOcrEngine();
|
loadOcrEngine();
|
||||||
|
loadOcrPreprocess();
|
||||||
</script>
|
</script>
|
||||||
</body>
|
</body>
|
||||||
</html>
|
</html>
|
||||||
|
|
|
||||||
|
|
@ -67,7 +67,9 @@ export interface TemplateMatchResult {
|
||||||
confidence: number;
|
confidence: number;
|
||||||
}
|
}
|
||||||
|
|
||||||
export type OcrEngine = 'tesseract' | 'easyocr';
|
export type OcrEngine = 'tesseract' | 'easyocr' | 'paddleocr';
|
||||||
|
|
||||||
|
export type OcrPreprocess = 'none' | 'bgsub' | 'tophat';
|
||||||
|
|
||||||
interface DaemonRequest {
|
interface DaemonRequest {
|
||||||
cmd: string;
|
cmd: string;
|
||||||
|
|
@ -79,6 +81,7 @@ interface DaemonRequest {
|
||||||
minCellSize?: number;
|
minCellSize?: number;
|
||||||
maxCellSize?: number;
|
maxCellSize?: number;
|
||||||
engine?: string;
|
engine?: string;
|
||||||
|
preprocess?: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
interface DaemonResponse {
|
interface DaemonResponse {
|
||||||
|
|
@ -133,10 +136,11 @@ export class OcrDaemon {
|
||||||
|
|
||||||
// ── Public API ──────────────────────────────────────────────────────────
|
// ── Public API ──────────────────────────────────────────────────────────
|
||||||
|
|
||||||
async ocr(region?: Region, engine?: OcrEngine): Promise<OcrResponse> {
|
async ocr(region?: Region, engine?: OcrEngine, preprocess?: OcrPreprocess): Promise<OcrResponse> {
|
||||||
const req: DaemonRequest = { cmd: 'ocr' };
|
const req: DaemonRequest = { cmd: 'ocr' };
|
||||||
if (region) req.region = region;
|
if (region) req.region = region;
|
||||||
if (engine && engine !== 'tesseract') req.engine = engine;
|
if (engine && engine !== 'tesseract') req.engine = engine;
|
||||||
|
if (preprocess && preprocess !== 'none') req.preprocess = preprocess;
|
||||||
// Python engines need longer timeout for first model load + download
|
// Python engines need longer timeout for first model load + download
|
||||||
const timeout = (engine && engine !== 'tesseract') ? 120_000 : CAPTURE_TIMEOUT;
|
const timeout = (engine && engine !== 'tesseract') ? 120_000 : CAPTURE_TIMEOUT;
|
||||||
const resp = await this.sendWithRetry(req, timeout);
|
const resp = await this.sendWithRetry(req, timeout);
|
||||||
|
|
@ -182,11 +186,12 @@ export class OcrDaemon {
|
||||||
await this.sendWithRetry({ cmd: 'snapshot' }, REQUEST_TIMEOUT);
|
await this.sendWithRetry({ cmd: 'snapshot' }, REQUEST_TIMEOUT);
|
||||||
}
|
}
|
||||||
|
|
||||||
async diffOcr(savePath?: string, region?: Region, engine?: OcrEngine): Promise<DiffOcrResponse> {
|
async diffOcr(savePath?: string, region?: Region, engine?: OcrEngine, preprocess?: OcrPreprocess): Promise<DiffOcrResponse> {
|
||||||
const req: DaemonRequest = { cmd: 'diff-ocr' };
|
const req: DaemonRequest = { cmd: 'diff-ocr' };
|
||||||
if (savePath) req.path = savePath;
|
if (savePath) req.path = savePath;
|
||||||
if (region) req.region = region;
|
if (region) req.region = region;
|
||||||
if (engine && engine !== 'tesseract') req.engine = engine;
|
if (engine && engine !== 'tesseract') req.engine = engine;
|
||||||
|
if (preprocess) req.preprocess = preprocess;
|
||||||
const timeout = (engine && engine !== 'tesseract') ? 120_000 : CAPTURE_TIMEOUT;
|
const timeout = (engine && engine !== 'tesseract') ? 120_000 : CAPTURE_TIMEOUT;
|
||||||
const resp = await this.sendWithRetry(req, timeout);
|
const resp = await this.sendWithRetry(req, timeout);
|
||||||
return {
|
return {
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,7 @@
|
||||||
import { mkdir } from 'fs/promises';
|
import { mkdir } from 'fs/promises';
|
||||||
import { join } from 'path';
|
import { join } from 'path';
|
||||||
import { logger } from '../util/logger.js';
|
import { logger } from '../util/logger.js';
|
||||||
import { OcrDaemon, type OcrResponse, type OcrEngine, type DiffOcrResponse, type TemplateMatchResult } from './OcrDaemon.js';
|
import { OcrDaemon, type OcrResponse, type OcrEngine, type OcrPreprocess, type DiffOcrResponse, type TemplateMatchResult } from './OcrDaemon.js';
|
||||||
import { GridReader, type GridLayout, type CellCoord } from './GridReader.js';
|
import { GridReader, type GridLayout, type CellCoord } from './GridReader.js';
|
||||||
import type { Region } from '../types.js';
|
import type { Region } from '../types.js';
|
||||||
|
|
||||||
|
|
@ -13,6 +13,7 @@ export class ScreenReader {
|
||||||
private daemon = new OcrDaemon();
|
private daemon = new OcrDaemon();
|
||||||
readonly grid = new GridReader(this.daemon);
|
readonly grid = new GridReader(this.daemon);
|
||||||
debugOcrEngine: OcrEngine = 'tesseract';
|
debugOcrEngine: OcrEngine = 'tesseract';
|
||||||
|
debugPreprocess: OcrPreprocess = 'bgsub';
|
||||||
|
|
||||||
// ── Screenshot capture ──────────────────────────────────────────────
|
// ── Screenshot capture ──────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
@ -241,20 +242,20 @@ export class ScreenReader {
|
||||||
|
|
||||||
async debugDiffOcr(savePath?: string, region?: Region): Promise<DiffOcrResponse> {
|
async debugDiffOcr(savePath?: string, region?: Region): Promise<DiffOcrResponse> {
|
||||||
const t = performance.now();
|
const t = performance.now();
|
||||||
const result = await this.daemon.diffOcr(savePath, region, this.debugOcrEngine);
|
const result = await this.daemon.diffOcr(savePath, region, this.debugOcrEngine, this.debugPreprocess);
|
||||||
logger.info({ engine: this.debugOcrEngine, ms: elapsed(t) }, 'debugDiffOcr');
|
logger.info({ engine: this.debugOcrEngine, preprocess: this.debugPreprocess, ms: elapsed(t) }, 'debugDiffOcr');
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
async debugOcr(region?: Region): Promise<OcrResponse> {
|
async debugOcr(region?: Region): Promise<OcrResponse> {
|
||||||
const t = performance.now();
|
const t = performance.now();
|
||||||
const result = await this.daemon.ocr(region, this.debugOcrEngine);
|
const result = await this.daemon.ocr(region, this.debugOcrEngine, this.debugPreprocess);
|
||||||
logger.info({ engine: this.debugOcrEngine, ms: elapsed(t) }, 'debugOcr');
|
logger.info({ engine: this.debugOcrEngine, preprocess: this.debugPreprocess, ms: elapsed(t) }, 'debugOcr');
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
async debugReadFullScreen(): Promise<string> {
|
async debugReadFullScreen(): Promise<string> {
|
||||||
const result = await this.daemon.ocr(undefined, this.debugOcrEngine);
|
const result = await this.daemon.ocr(undefined, this.debugOcrEngine, this.debugPreprocess);
|
||||||
return result.text;
|
return result.text;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -263,13 +264,13 @@ export class ScreenReader {
|
||||||
fuzzy: boolean = false,
|
fuzzy: boolean = false,
|
||||||
): Promise<{ x: number; y: number } | null> {
|
): Promise<{ x: number; y: number } | null> {
|
||||||
const t = performance.now();
|
const t = performance.now();
|
||||||
const result = await this.daemon.ocr(undefined, this.debugOcrEngine);
|
const result = await this.daemon.ocr(undefined, this.debugOcrEngine, this.debugPreprocess);
|
||||||
const pos = this.findWordInOcrResult(result, searchText, fuzzy);
|
const pos = this.findWordInOcrResult(result, searchText, fuzzy);
|
||||||
|
|
||||||
if (pos) {
|
if (pos) {
|
||||||
logger.info({ searchText, engine: this.debugOcrEngine, x: pos.x, y: pos.y, totalMs: elapsed(t) }, 'debugFindText found');
|
logger.info({ searchText, engine: this.debugOcrEngine, preprocess: this.debugPreprocess, x: pos.x, y: pos.y, totalMs: elapsed(t) }, 'debugFindText found');
|
||||||
} else {
|
} else {
|
||||||
logger.info({ searchText, engine: this.debugOcrEngine, totalMs: elapsed(t) }, 'debugFindText not found');
|
logger.info({ searchText, engine: this.debugOcrEngine, preprocess: this.debugPreprocess, totalMs: elapsed(t) }, 'debugFindText not found');
|
||||||
}
|
}
|
||||||
return pos;
|
return pos;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,6 @@
|
||||||
namespace OcrDaemon;
|
namespace OcrDaemon;
|
||||||
|
|
||||||
|
using System.Drawing;
|
||||||
using System.Text.Json;
|
using System.Text.Json;
|
||||||
using System.Text.Json.Serialization;
|
using System.Text.Json.Serialization;
|
||||||
using Tesseract;
|
using Tesseract;
|
||||||
|
|
@ -74,15 +75,11 @@ static class Daemon
|
||||||
|
|
||||||
object response = request.Cmd?.ToLowerInvariant() switch
|
object response = request.Cmd?.ToLowerInvariant() switch
|
||||||
{
|
{
|
||||||
"ocr" when request.Engine is "easyocr"
|
"ocr" => HandleOcrPipeline(ocrHandler, pythonBridge, request),
|
||||||
=> pythonBridge.HandleOcr(request, request.Engine),
|
|
||||||
"ocr" => ocrHandler.HandleOcr(request),
|
|
||||||
"screenshot" => ocrHandler.HandleScreenshot(request),
|
"screenshot" => ocrHandler.HandleScreenshot(request),
|
||||||
"capture" => ocrHandler.HandleCapture(request),
|
"capture" => ocrHandler.HandleCapture(request),
|
||||||
"snapshot" => ocrHandler.HandleSnapshot(request),
|
"snapshot" => ocrHandler.HandleSnapshot(request),
|
||||||
"diff-ocr" when request.Engine is "easyocr"
|
"diff-ocr" => HandleDiffOcrPipeline(ocrHandler, pythonBridge, request),
|
||||||
=> HandleDiffOcrPython(ocrHandler, pythonBridge, request),
|
|
||||||
"diff-ocr" => ocrHandler.HandleDiffOcr(request),
|
|
||||||
"test" => ocrHandler.HandleTest(request),
|
"test" => ocrHandler.HandleTest(request),
|
||||||
"tune" => ocrHandler.HandleTune(request),
|
"tune" => ocrHandler.HandleTune(request),
|
||||||
"grid" => gridHandler.HandleGrid(request),
|
"grid" => gridHandler.HandleGrid(request),
|
||||||
|
|
@ -102,11 +99,67 @@ static class Daemon
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static object HandleDiffOcrPython(OcrHandler ocrHandler, PythonOcrBridge pythonBridge, Request request)
|
/// <summary>
|
||||||
|
/// Unified OCR pipeline for full/region captures.
|
||||||
|
/// Capture → optional preprocess → route to engine (tesseract / easyocr / paddleocr).
|
||||||
|
/// </summary>
|
||||||
|
private static object HandleOcrPipeline(OcrHandler ocrHandler, PythonOcrBridge pythonBridge, Request request)
|
||||||
{
|
{
|
||||||
|
var engine = request.Engine ?? "tesseract";
|
||||||
|
var preprocess = request.Preprocess ?? "none";
|
||||||
|
|
||||||
|
// No preprocess + tesseract = original fast path
|
||||||
|
if (engine == "tesseract" && preprocess == "none")
|
||||||
|
return ocrHandler.HandleOcr(request);
|
||||||
|
|
||||||
|
// Capture
|
||||||
|
using var bitmap = ScreenCapture.CaptureOrLoad(request.File, request.Region);
|
||||||
|
|
||||||
|
// Preprocess
|
||||||
|
Bitmap processed;
|
||||||
|
if (preprocess == "tophat")
|
||||||
|
{
|
||||||
|
processed = ImagePreprocessor.PreprocessForOcr(bitmap);
|
||||||
|
}
|
||||||
|
else if (preprocess == "bgsub")
|
||||||
|
{
|
||||||
|
return new ErrorResponse("bgsub preprocess requires a reference frame; use diff-ocr instead.");
|
||||||
|
}
|
||||||
|
else // "none"
|
||||||
|
{
|
||||||
|
processed = (Bitmap)bitmap.Clone();
|
||||||
|
}
|
||||||
|
using var _processed = processed;
|
||||||
|
|
||||||
|
// Route to engine
|
||||||
|
if (engine == "tesseract")
|
||||||
|
{
|
||||||
|
var region = request.Region != null
|
||||||
|
? new RegionRect { X = request.Region.X, Y = request.Region.Y, Width = request.Region.Width, Height = request.Region.Height }
|
||||||
|
: new RegionRect { X = 0, Y = 0, Width = processed.Width, Height = processed.Height };
|
||||||
|
return ocrHandler.RunTesseractOnBitmap(processed, region);
|
||||||
|
}
|
||||||
|
else // easyocr, paddleocr
|
||||||
|
{
|
||||||
|
return pythonBridge.OcrFromBitmap(processed, engine);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Unified diff-OCR pipeline for tooltip detection.
|
||||||
|
/// DiffCrop → preprocess (default=bgsub) → route to engine.
|
||||||
|
/// </summary>
|
||||||
|
private static object HandleDiffOcrPipeline(OcrHandler ocrHandler, PythonOcrBridge pythonBridge, Request request)
|
||||||
|
{
|
||||||
|
var engine = request.Engine ?? "tesseract";
|
||||||
|
var preprocess = request.Preprocess ?? "bgsub";
|
||||||
|
var isPythonEngine = engine is "easyocr" or "paddleocr";
|
||||||
|
|
||||||
|
// No engine override + no preprocess override = original Tesseract path (supports test/tune params)
|
||||||
|
if (engine == "tesseract" && request.Preprocess == null)
|
||||||
|
return ocrHandler.HandleDiffOcr(request);
|
||||||
|
|
||||||
var sw = System.Diagnostics.Stopwatch.StartNew();
|
var sw = System.Diagnostics.Stopwatch.StartNew();
|
||||||
// Use default params (same wide crop as Tesseract path).
|
|
||||||
// Background subtraction below eliminates stash items from the image.
|
|
||||||
var p = new DiffOcrParams();
|
var p = new DiffOcrParams();
|
||||||
if (request.Threshold > 0) p.DiffThresh = request.Threshold;
|
if (request.Threshold > 0) p.DiffThresh = request.Threshold;
|
||||||
|
|
||||||
|
|
@ -117,46 +170,72 @@ static class Daemon
|
||||||
var (cropped, refCropped, current, region) = cropResult.Value;
|
var (cropped, refCropped, current, region) = cropResult.Value;
|
||||||
using var _current = current;
|
using var _current = current;
|
||||||
|
|
||||||
// Apply background subtraction to isolate tooltip text.
|
// Preprocess
|
||||||
// This removes stash items and game world — only tooltip text remains.
|
Bitmap processed;
|
||||||
// No upscale (upscale=1) to keep the image small for EasyOCR speed.
|
if (preprocess == "bgsub")
|
||||||
// Hard threshold (softThreshold=false) produces clean binary for OCR.
|
{
|
||||||
using var processed = ImagePreprocessor.PreprocessWithBackgroundSub(
|
int upscale = isPythonEngine ? 1 : 2;
|
||||||
cropped, refCropped, dimPercentile: 40, textThresh: 60, upscale: 1, softThreshold: false);
|
processed = ImagePreprocessor.PreprocessWithBackgroundSub(
|
||||||
|
cropped, refCropped, dimPercentile: 40, textThresh: 60, upscale: upscale, softThreshold: false);
|
||||||
|
}
|
||||||
|
else if (preprocess == "tophat")
|
||||||
|
{
|
||||||
|
processed = ImagePreprocessor.PreprocessForOcr(cropped);
|
||||||
|
}
|
||||||
|
else // "none"
|
||||||
|
{
|
||||||
|
processed = (Bitmap)cropped.Clone();
|
||||||
|
}
|
||||||
cropped.Dispose();
|
cropped.Dispose();
|
||||||
refCropped.Dispose();
|
refCropped.Dispose();
|
||||||
var diffMs = sw.ElapsedMilliseconds;
|
|
||||||
|
|
||||||
// Save processed crop if path provided
|
var diffMs = sw.ElapsedMilliseconds;
|
||||||
|
using var _processed = processed;
|
||||||
|
|
||||||
|
// Save debug images if path provided
|
||||||
if (!string.IsNullOrEmpty(request.Path))
|
if (!string.IsNullOrEmpty(request.Path))
|
||||||
{
|
{
|
||||||
var dir = Path.GetDirectoryName(request.Path);
|
var dir = Path.GetDirectoryName(request.Path);
|
||||||
if (!string.IsNullOrEmpty(dir) && !Directory.Exists(dir))
|
if (!string.IsNullOrEmpty(dir) && !Directory.Exists(dir))
|
||||||
Directory.CreateDirectory(dir);
|
Directory.CreateDirectory(dir);
|
||||||
|
// Save preprocessed crop
|
||||||
processed.Save(request.Path, ImageUtils.GetImageFormat(request.Path));
|
processed.Save(request.Path, ImageUtils.GetImageFormat(request.Path));
|
||||||
|
|
||||||
|
var ext = Path.GetExtension(request.Path);
|
||||||
|
var fullPath = Path.ChangeExtension(request.Path, ".full" + ext);
|
||||||
|
current.Save(fullPath, ImageUtils.GetImageFormat(fullPath));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Send processed image to Python OCR via base64
|
// Route to engine
|
||||||
sw.Restart();
|
sw.Restart();
|
||||||
var ocrResult = pythonBridge.OcrFromBitmap(processed, request.Engine!);
|
if (engine == "tesseract")
|
||||||
var ocrMs = sw.ElapsedMilliseconds;
|
|
||||||
|
|
||||||
Console.Error.WriteLine($" diff-ocr-python: diff={diffMs}ms ocr={ocrMs}ms total={diffMs + ocrMs}ms crop={region.Width}x{region.Height}");
|
|
||||||
|
|
||||||
// Offset word coordinates to screen space
|
|
||||||
foreach (var line in ocrResult.Lines)
|
|
||||||
foreach (var word in line.Words)
|
|
||||||
{
|
|
||||||
word.X += region.X;
|
|
||||||
word.Y += region.Y;
|
|
||||||
}
|
|
||||||
|
|
||||||
return new DiffOcrResponse
|
|
||||||
{
|
{
|
||||||
Text = ocrResult.Text,
|
var result = ocrHandler.RunTesseractOnBitmap(processed, region);
|
||||||
Lines = ocrResult.Lines,
|
var ocrMs = sw.ElapsedMilliseconds;
|
||||||
Region = region,
|
Console.Error.WriteLine($" diff-ocr-pipeline: engine={engine} preprocess={preprocess} diff={diffMs}ms ocr={ocrMs}ms crop={region.Width}x{region.Height}");
|
||||||
};
|
return result;
|
||||||
|
}
|
||||||
|
else // easyocr, paddleocr
|
||||||
|
{
|
||||||
|
var ocrResult = pythonBridge.OcrFromBitmap(processed, engine);
|
||||||
|
var ocrMs = sw.ElapsedMilliseconds;
|
||||||
|
Console.Error.WriteLine($" diff-ocr-pipeline: engine={engine} preprocess={preprocess} diff={diffMs}ms ocr={ocrMs}ms crop={region.Width}x{region.Height}");
|
||||||
|
|
||||||
|
// Offset word coordinates to screen space
|
||||||
|
foreach (var line in ocrResult.Lines)
|
||||||
|
foreach (var word in line.Words)
|
||||||
|
{
|
||||||
|
word.X += region.X;
|
||||||
|
word.Y += region.Y;
|
||||||
|
}
|
||||||
|
|
||||||
|
return new DiffOcrResponse
|
||||||
|
{
|
||||||
|
Text = ocrResult.Text,
|
||||||
|
Lines = ocrResult.Lines,
|
||||||
|
Region = region,
|
||||||
|
};
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void WriteResponse(object response)
|
private static void WriteResponse(object response)
|
||||||
|
|
|
||||||
|
|
@ -42,6 +42,9 @@ class Request
|
||||||
|
|
||||||
[JsonPropertyName("engine")]
|
[JsonPropertyName("engine")]
|
||||||
public string? Engine { get; set; }
|
public string? Engine { get; set; }
|
||||||
|
|
||||||
|
[JsonPropertyName("preprocess")]
|
||||||
|
public string? Preprocess { get; set; }
|
||||||
}
|
}
|
||||||
|
|
||||||
class RegionRect
|
class RegionRect
|
||||||
|
|
|
||||||
|
|
@ -401,6 +401,33 @@ class OcrHandler(TesseractEngine engine)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Run Tesseract OCR on an already-preprocessed bitmap. Converts to Mat, pads,
|
||||||
|
/// runs PSM-6, and adjusts word coordinates to screen space using the supplied region.
|
||||||
|
/// </summary>
|
||||||
|
public DiffOcrResponse RunTesseractOnBitmap(Bitmap processedBmp, RegionRect region, int pad = 10, int upscale = 2, int psm = 6)
|
||||||
|
{
|
||||||
|
using var processedMat = BitmapConverter.ToMat(processedBmp);
|
||||||
|
using var padded = new Mat();
|
||||||
|
Cv2.CopyMakeBorder(processedMat, padded, pad, pad, pad, pad, BorderTypes.Constant, Scalar.White);
|
||||||
|
using var bmp = BitmapConverter.ToBitmap(padded);
|
||||||
|
using var pix = ImageUtils.BitmapToPix(bmp);
|
||||||
|
using var page = engine.Process(pix, (PageSegMode)psm);
|
||||||
|
|
||||||
|
var text = page.GetText();
|
||||||
|
int effUpscale = upscale > 0 ? upscale : 1;
|
||||||
|
var lines = ImageUtils.ExtractLinesFromPage(page,
|
||||||
|
offsetX: region.X - pad / effUpscale,
|
||||||
|
offsetY: region.Y - pad / effUpscale);
|
||||||
|
|
||||||
|
return new DiffOcrResponse
|
||||||
|
{
|
||||||
|
Text = text,
|
||||||
|
Lines = lines,
|
||||||
|
Region = region,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
public object HandleTest(Request req) => RunTestCases(new DiffOcrParams(), verbose: true);
|
public object HandleTest(Request req) => RunTestCases(new DiffOcrParams(), verbose: true);
|
||||||
|
|
||||||
public object HandleTune(Request req)
|
public object HandleTune(Request req)
|
||||||
|
|
|
||||||
|
|
@ -12,6 +12,7 @@ import sys
|
||||||
import json
|
import json
|
||||||
|
|
||||||
_easyocr_reader = None
|
_easyocr_reader = None
|
||||||
|
_paddle_ocr = None
|
||||||
|
|
||||||
|
|
||||||
def _redirect_stdout_to_stderr():
|
def _redirect_stdout_to_stderr():
|
||||||
|
|
@ -100,6 +101,46 @@ def run_easyocr_array(img):
|
||||||
return {"ok": True, "text": "\n".join(all_text_parts), "lines": lines}
|
return {"ok": True, "text": "\n".join(all_text_parts), "lines": lines}
|
||||||
|
|
||||||
|
|
||||||
|
def get_paddleocr():
|
||||||
|
global _paddle_ocr
|
||||||
|
if _paddle_ocr is None:
|
||||||
|
sys.stderr.write("Loading PaddleOCR model...\n")
|
||||||
|
sys.stderr.flush()
|
||||||
|
real_stdout = _redirect_stdout_to_stderr()
|
||||||
|
try:
|
||||||
|
from paddleocr import PaddleOCR
|
||||||
|
_paddle_ocr = PaddleOCR(use_angle_cls=True, lang="en", use_gpu=True, show_log=False)
|
||||||
|
finally:
|
||||||
|
_restore_stdout(real_stdout)
|
||||||
|
sys.stderr.write("PaddleOCR model loaded.\n")
|
||||||
|
sys.stderr.flush()
|
||||||
|
return _paddle_ocr
|
||||||
|
|
||||||
|
|
||||||
|
def run_paddleocr_array(img):
|
||||||
|
ocr = get_paddleocr()
|
||||||
|
|
||||||
|
real_stdout = _redirect_stdout_to_stderr()
|
||||||
|
try:
|
||||||
|
results = ocr.ocr(img, cls=True)
|
||||||
|
finally:
|
||||||
|
_restore_stdout(real_stdout)
|
||||||
|
|
||||||
|
lines = []
|
||||||
|
all_text_parts = []
|
||||||
|
# PaddleOCR returns [page_results], each item is [bbox_4corners, (text, conf)]
|
||||||
|
if results and results[0]:
|
||||||
|
for item in results[0]:
|
||||||
|
bbox, (text, conf) = item
|
||||||
|
if not text.strip():
|
||||||
|
continue
|
||||||
|
x, y, w, h = bbox_to_rect(bbox)
|
||||||
|
words = split_into_words(text, x, y, w, h)
|
||||||
|
lines.append({"text": text.strip(), "words": words})
|
||||||
|
all_text_parts.append(text.strip())
|
||||||
|
return {"ok": True, "text": "\n".join(all_text_parts), "lines": lines}
|
||||||
|
|
||||||
|
|
||||||
def load_image(req):
|
def load_image(req):
|
||||||
"""Load image from either imagePath (file) or imageBase64 (base64-encoded PNG)."""
|
"""Load image from either imagePath (file) or imageBase64 (base64-encoded PNG)."""
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
|
|
@ -131,6 +172,8 @@ def handle_request(req):
|
||||||
|
|
||||||
if engine == "easyocr":
|
if engine == "easyocr":
|
||||||
return run_easyocr_array(img)
|
return run_easyocr_array(img)
|
||||||
|
elif engine == "paddleocr":
|
||||||
|
return run_paddleocr_array(img)
|
||||||
else:
|
else:
|
||||||
return {"ok": False, "error": f"Unknown engine: {engine}"}
|
return {"ok": False, "error": f"Unknown engine: {engine}"}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue