finished easyocr and pipeline

This commit is contained in:
Boki 2026-02-12 11:24:31 -05:00
parent 735b6f7157
commit cf5d944fd1
8 changed files with 252 additions and 51 deletions

View file

@ -1,5 +1,6 @@
namespace OcrDaemon;
using System.Drawing;
using System.Text.Json;
using System.Text.Json.Serialization;
using Tesseract;
@ -74,15 +75,11 @@ static class Daemon
object response = request.Cmd?.ToLowerInvariant() switch
{
"ocr" when request.Engine is "easyocr"
=> pythonBridge.HandleOcr(request, request.Engine),
"ocr" => ocrHandler.HandleOcr(request),
"ocr" => HandleOcrPipeline(ocrHandler, pythonBridge, request),
"screenshot" => ocrHandler.HandleScreenshot(request),
"capture" => ocrHandler.HandleCapture(request),
"snapshot" => ocrHandler.HandleSnapshot(request),
"diff-ocr" when request.Engine is "easyocr"
=> HandleDiffOcrPython(ocrHandler, pythonBridge, request),
"diff-ocr" => ocrHandler.HandleDiffOcr(request),
"diff-ocr" => HandleDiffOcrPipeline(ocrHandler, pythonBridge, request),
"test" => ocrHandler.HandleTest(request),
"tune" => ocrHandler.HandleTune(request),
"grid" => gridHandler.HandleGrid(request),
@ -102,11 +99,67 @@ static class Daemon
return 0;
}
private static object HandleDiffOcrPython(OcrHandler ocrHandler, PythonOcrBridge pythonBridge, Request request)
/// <summary>
/// Unified OCR pipeline for full/region captures.
/// Capture → optional preprocess → route to engine (tesseract / easyocr / paddleocr).
/// </summary>
private static object HandleOcrPipeline(OcrHandler ocrHandler, PythonOcrBridge pythonBridge, Request request)
{
var engine = request.Engine ?? "tesseract";
var preprocess = request.Preprocess ?? "none";
// No preprocess + tesseract = original fast path
if (engine == "tesseract" && preprocess == "none")
return ocrHandler.HandleOcr(request);
// Capture
using var bitmap = ScreenCapture.CaptureOrLoad(request.File, request.Region);
// Preprocess
Bitmap processed;
if (preprocess == "tophat")
{
processed = ImagePreprocessor.PreprocessForOcr(bitmap);
}
else if (preprocess == "bgsub")
{
return new ErrorResponse("bgsub preprocess requires a reference frame; use diff-ocr instead.");
}
else // "none"
{
processed = (Bitmap)bitmap.Clone();
}
using var _processed = processed;
// Route to engine
if (engine == "tesseract")
{
var region = request.Region != null
? new RegionRect { X = request.Region.X, Y = request.Region.Y, Width = request.Region.Width, Height = request.Region.Height }
: new RegionRect { X = 0, Y = 0, Width = processed.Width, Height = processed.Height };
return ocrHandler.RunTesseractOnBitmap(processed, region);
}
else // easyocr, paddleocr
{
return pythonBridge.OcrFromBitmap(processed, engine);
}
}
/// <summary>
/// Unified diff-OCR pipeline for tooltip detection.
/// DiffCrop → preprocess (default=bgsub) → route to engine.
/// </summary>
private static object HandleDiffOcrPipeline(OcrHandler ocrHandler, PythonOcrBridge pythonBridge, Request request)
{
var engine = request.Engine ?? "tesseract";
var preprocess = request.Preprocess ?? "bgsub";
var isPythonEngine = engine is "easyocr" or "paddleocr";
// No engine override + no preprocess override = original Tesseract path (supports test/tune params)
if (engine == "tesseract" && request.Preprocess == null)
return ocrHandler.HandleDiffOcr(request);
var sw = System.Diagnostics.Stopwatch.StartNew();
// Use default params (same wide crop as Tesseract path).
// Background subtraction below eliminates stash items from the image.
var p = new DiffOcrParams();
if (request.Threshold > 0) p.DiffThresh = request.Threshold;
@ -117,46 +170,72 @@ static class Daemon
var (cropped, refCropped, current, region) = cropResult.Value;
using var _current = current;
// Apply background subtraction to isolate tooltip text.
// This removes stash items and game world — only tooltip text remains.
// No upscale (upscale=1) to keep the image small for EasyOCR speed.
// Hard threshold (softThreshold=false) produces clean binary for OCR.
using var processed = ImagePreprocessor.PreprocessWithBackgroundSub(
cropped, refCropped, dimPercentile: 40, textThresh: 60, upscale: 1, softThreshold: false);
// Preprocess
Bitmap processed;
if (preprocess == "bgsub")
{
int upscale = isPythonEngine ? 1 : 2;
processed = ImagePreprocessor.PreprocessWithBackgroundSub(
cropped, refCropped, dimPercentile: 40, textThresh: 60, upscale: upscale, softThreshold: false);
}
else if (preprocess == "tophat")
{
processed = ImagePreprocessor.PreprocessForOcr(cropped);
}
else // "none"
{
processed = (Bitmap)cropped.Clone();
}
cropped.Dispose();
refCropped.Dispose();
var diffMs = sw.ElapsedMilliseconds;
// Save processed crop if path provided
var diffMs = sw.ElapsedMilliseconds;
using var _processed = processed;
// Save debug images if path provided
if (!string.IsNullOrEmpty(request.Path))
{
var dir = Path.GetDirectoryName(request.Path);
if (!string.IsNullOrEmpty(dir) && !Directory.Exists(dir))
Directory.CreateDirectory(dir);
// Save preprocessed crop
processed.Save(request.Path, ImageUtils.GetImageFormat(request.Path));
var ext = Path.GetExtension(request.Path);
var fullPath = Path.ChangeExtension(request.Path, ".full" + ext);
current.Save(fullPath, ImageUtils.GetImageFormat(fullPath));
}
// Send processed image to Python OCR via base64
// Route to engine
sw.Restart();
var ocrResult = pythonBridge.OcrFromBitmap(processed, request.Engine!);
var ocrMs = sw.ElapsedMilliseconds;
Console.Error.WriteLine($" diff-ocr-python: diff={diffMs}ms ocr={ocrMs}ms total={diffMs + ocrMs}ms crop={region.Width}x{region.Height}");
// Offset word coordinates to screen space
foreach (var line in ocrResult.Lines)
foreach (var word in line.Words)
{
word.X += region.X;
word.Y += region.Y;
}
return new DiffOcrResponse
if (engine == "tesseract")
{
Text = ocrResult.Text,
Lines = ocrResult.Lines,
Region = region,
};
var result = ocrHandler.RunTesseractOnBitmap(processed, region);
var ocrMs = sw.ElapsedMilliseconds;
Console.Error.WriteLine($" diff-ocr-pipeline: engine={engine} preprocess={preprocess} diff={diffMs}ms ocr={ocrMs}ms crop={region.Width}x{region.Height}");
return result;
}
else // easyocr, paddleocr
{
var ocrResult = pythonBridge.OcrFromBitmap(processed, engine);
var ocrMs = sw.ElapsedMilliseconds;
Console.Error.WriteLine($" diff-ocr-pipeline: engine={engine} preprocess={preprocess} diff={diffMs}ms ocr={ocrMs}ms crop={region.Width}x{region.Height}");
// Offset word coordinates to screen space
foreach (var line in ocrResult.Lines)
foreach (var word in line.Words)
{
word.X += region.X;
word.Y += region.Y;
}
return new DiffOcrResponse
{
Text = ocrResult.Text,
Lines = ocrResult.Lines,
Region = region,
};
}
}
private static void WriteResponse(object response)