switched to new way

This commit is contained in:
Boki 2026-02-13 01:12:51 -05:00
parent f22d182c8f
commit 4a65c8e17b
96 changed files with 4991 additions and 10025 deletions

View file

@ -1,623 +0,0 @@
namespace OcrDaemon;
using System.Drawing;
using System.Text.Json;
using System.Text.Json.Serialization;
using Tesseract;
static class Daemon
{
private static readonly JsonSerializerOptions JsonOptions = new()
{
PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
};
public static int Run()
{
ScreenCapture.InitDpiAwareness();
// Pre-create the Tesseract OCR engine (reused across all requests)
var tessdataPath = Path.Combine(AppContext.BaseDirectory, "tessdata");
var tessLang = File.Exists(Path.Combine(tessdataPath, "poe2.traineddata")) ? "poe2" : "eng";
TesseractEngine tessEngine;
try
{
tessEngine = new TesseractEngine(tessdataPath, tessLang, EngineMode.LstmOnly);
tessEngine.DefaultPageSegMode = PageSegMode.SingleBlock;
tessEngine.SetVariable("preserve_interword_spaces", "1");
var userWordsPath = Path.Combine(tessdataPath, $"{tessLang}.user-words");
var userPatternsPath = Path.Combine(tessdataPath, $"{tessLang}.user-patterns");
if (File.Exists(userWordsPath))
{
tessEngine.SetVariable("user_words_file", userWordsPath);
var lineCount = File.ReadAllLines(userWordsPath).Length;
Console.Error.WriteLine($"Loaded user-words: {lineCount} words from {userWordsPath}");
}
if (File.Exists(userPatternsPath))
{
tessEngine.SetVariable("user_patterns_file", userPatternsPath);
var lineCount = File.ReadAllLines(userPatternsPath).Length;
Console.Error.WriteLine($"Loaded user-patterns: {lineCount} patterns from {userPatternsPath}");
}
Console.Error.WriteLine($"Tesseract engine loaded with language: {tessLang}");
}
catch (Exception ex)
{
WriteResponse(new ErrorResponse($"Failed to create Tesseract engine: {ex.Message}. Ensure tessdata/eng.traineddata exists."));
return 1;
}
// Signal ready
WriteResponse(new ReadyResponse());
var ocrHandler = new OcrHandler(tessEngine);
var gridHandler = new GridHandler();
var detectGridHandler = new DetectGridHandler();
var templateMatchHandler = new TemplateMatchHandler();
var edgeCropHandler = new EdgeCropHandler();
var pythonBridge = new PythonOcrBridge();
// Main loop: read one JSON line, handle, write one JSON line
string? line;
while ((line = Console.In.ReadLine()) != null)
{
line = line.Trim();
if (line.Length == 0) continue;
try
{
var request = JsonSerializer.Deserialize<Request>(line, JsonOptions);
if (request == null)
{
WriteResponse(new ErrorResponse("Failed to parse request"));
continue;
}
object response = request.Cmd?.ToLowerInvariant() switch
{
"ocr" => HandleOcrPipeline(ocrHandler, pythonBridge, request),
"screenshot" => ocrHandler.HandleScreenshot(request),
"capture" => ocrHandler.HandleCapture(request),
"snapshot" => ocrHandler.HandleSnapshot(request),
"diff-ocr" => HandleDiffOcrPipeline(ocrHandler, pythonBridge, request),
"edge-ocr" => HandleEdgeOcrPipeline(ocrHandler, edgeCropHandler, pythonBridge, request),
"test" => ocrHandler.HandleTest(request),
"tune" => ocrHandler.HandleTune(request),
"crop-test" => HandleCropTest(ocrHandler, edgeCropHandler, request),
"crop-tune" => HandleCropTune(ocrHandler, request),
"grid" => gridHandler.HandleGrid(request),
"detect-grid" => detectGridHandler.HandleDetectGrid(request),
"match-template" => templateMatchHandler.HandleTemplateMatch(request),
_ => new ErrorResponse($"Unknown command: {request.Cmd}"),
};
WriteResponse(response);
}
catch (Exception ex)
{
WriteResponse(new ErrorResponse(ex.Message));
}
}
pythonBridge.Dispose();
return 0;
}
/// <summary>
/// Unified OCR pipeline for full/region captures.
/// Capture → optional preprocess → route to engine (tesseract / easyocr / paddleocr).
/// </summary>
private static object HandleOcrPipeline(OcrHandler ocrHandler, PythonOcrBridge pythonBridge, Request request)
{
var engine = request.Engine ?? "tesseract";
var preprocess = request.Preprocess ?? "none";
var kernelSize = request.Params?.Ocr.KernelSize ?? 41;
// No preprocess + tesseract = original fast path
if (engine == "tesseract" && preprocess == "none")
return ocrHandler.HandleOcr(request);
// Capture
using var bitmap = ScreenCapture.CaptureOrLoad(request.File, request.Region);
// Preprocess
Bitmap processed;
if (preprocess == "tophat")
{
processed = ImagePreprocessor.PreprocessForOcr(bitmap, kernelSize: kernelSize);
}
else if (preprocess == "bgsub")
{
return new ErrorResponse("bgsub preprocess requires a reference frame; use diff-ocr instead.");
}
else // "none"
{
processed = (Bitmap)bitmap.Clone();
}
using var _processed = processed;
// Route to engine
if (engine == "tesseract")
{
var region = request.Region != null
? new RegionRect { X = request.Region.X, Y = request.Region.Y, Width = request.Region.Width, Height = request.Region.Height }
: new RegionRect { X = 0, Y = 0, Width = processed.Width, Height = processed.Height };
return ocrHandler.RunTesseractOnBitmap(processed, region);
}
else // easyocr, paddleocr
{
return pythonBridge.OcrFromBitmap(processed, engine);
}
}
/// <summary>
/// Unified diff-OCR pipeline for tooltip detection.
/// DiffCrop → preprocess (default=bgsub) → route to engine.
/// </summary>
private static object HandleDiffOcrPipeline(OcrHandler ocrHandler, PythonOcrBridge pythonBridge, Request request)
{
var engine = request.Engine ?? "tesseract";
var isPythonEngine = engine is "easyocr" or "paddleocr";
var p = request.Params ?? new DiffOcrParams();
var cropParams = p.Crop;
var ocrParams = p.Ocr;
if (request.Threshold > 0) cropParams.DiffThresh = request.Threshold;
// Determine preprocess mode: explicit request.Preprocess > params.UseBackgroundSub > default "bgsub"
string preprocess;
if (request.Preprocess != null)
preprocess = request.Preprocess;
else if (request.Params != null)
preprocess = ocrParams.UseBackgroundSub ? "bgsub" : "tophat";
else
preprocess = "bgsub";
// No engine override + no preprocess override + no params = original Tesseract path
if (engine == "tesseract" && request.Preprocess == null && request.Params == null)
return ocrHandler.HandleDiffOcr(request);
var sw = System.Diagnostics.Stopwatch.StartNew();
var cropResult = ocrHandler.DiffCrop(request, cropParams);
if (cropResult == null)
return new OcrResponse { Text = "", Lines = [] };
var (cropped, refCropped, current, region) = cropResult.Value;
using var _current = current;
// Preprocess — only sees ocrParams
Bitmap processed;
if (preprocess == "bgsub")
{
int upscale = isPythonEngine ? 1 : ocrParams.Upscale;
processed = ImagePreprocessor.PreprocessWithBackgroundSub(
cropped, refCropped, dimPercentile: ocrParams.DimPercentile, textThresh: ocrParams.TextThresh,
upscale: upscale, softThreshold: ocrParams.SoftThreshold);
}
else if (preprocess == "tophat")
{
processed = ImagePreprocessor.PreprocessForOcr(cropped, kernelSize: ocrParams.KernelSize);
}
else // "none"
{
processed = (Bitmap)cropped.Clone();
}
cropped.Dispose();
refCropped.Dispose();
var diffMs = sw.ElapsedMilliseconds;
using var _processed = processed;
// Save debug images if path provided
if (!string.IsNullOrEmpty(request.Path))
{
var dir = Path.GetDirectoryName(request.Path);
if (!string.IsNullOrEmpty(dir) && !Directory.Exists(dir))
Directory.CreateDirectory(dir);
// Save preprocessed crop
processed.Save(request.Path, ImageUtils.GetImageFormat(request.Path));
var ext = Path.GetExtension(request.Path);
var fullPath = Path.ChangeExtension(request.Path, ".full" + ext);
current.Save(fullPath, ImageUtils.GetImageFormat(fullPath));
}
// Route to engine
sw.Restart();
if (engine == "tesseract")
{
var result = ocrHandler.RunTesseractOnBitmap(processed, region);
var ocrMs = sw.ElapsedMilliseconds;
Console.Error.WriteLine($" diff-ocr-pipeline: engine={engine} preprocess={preprocess} diff={diffMs}ms ocr={ocrMs}ms crop={region.Width}x{region.Height}");
return result;
}
else // easyocr, paddleocr
{
var ocrResult = pythonBridge.OcrFromBitmap(processed, engine, ocrParams);
var ocrMs = sw.ElapsedMilliseconds;
Console.Error.WriteLine($" diff-ocr-pipeline: engine={engine} preprocess={preprocess} diff={diffMs}ms ocr={ocrMs}ms crop={region.Width}x{region.Height}");
// Offset word coordinates to screen space
foreach (var line in ocrResult.Lines)
foreach (var word in line.Words)
{
word.X += region.X;
word.Y += region.Y;
}
return new DiffOcrResponse
{
Text = ocrResult.Text,
Lines = ocrResult.Lines,
Region = region,
};
}
}
/// <summary>
/// Edge-based tooltip detection pipeline.
/// EdgeCrop → preprocess (tophat only; bgsub falls back to tophat) → route to engine.
/// </summary>
private static object HandleEdgeOcrPipeline(OcrHandler ocrHandler, EdgeCropHandler edgeCropHandler, PythonOcrBridge pythonBridge, Request request)
{
var engine = request.Engine ?? "tesseract";
var isPythonEngine = engine is "easyocr" or "paddleocr";
var ep = request.EdgeParams ?? new EdgeOcrParams();
var cropParams = ep.Crop;
var ocrParams = ep.Ocr;
// Edge method only supports tophat (no reference frame for bgsub)
string preprocess = request.Preprocess ?? "tophat";
if (preprocess == "bgsub") preprocess = "tophat";
var sw = System.Diagnostics.Stopwatch.StartNew();
var cropResult = edgeCropHandler.EdgeCrop(request, cropParams);
if (cropResult == null)
return new OcrResponse { Text = "", Lines = [] };
var (cropped, fullCapture, region) = cropResult.Value;
using var _fullCapture = fullCapture;
// Preprocess
Bitmap processed;
if (preprocess == "tophat")
{
processed = ImagePreprocessor.PreprocessForOcr(cropped, kernelSize: ocrParams.KernelSize, upscale: ocrParams.Upscale);
}
else // "none"
{
processed = (Bitmap)cropped.Clone();
}
cropped.Dispose();
var cropMs = sw.ElapsedMilliseconds;
using var _processed = processed;
// Save debug images if path provided
if (!string.IsNullOrEmpty(request.Path))
{
var dir = Path.GetDirectoryName(request.Path);
if (!string.IsNullOrEmpty(dir) && !Directory.Exists(dir))
Directory.CreateDirectory(dir);
processed.Save(request.Path, ImageUtils.GetImageFormat(request.Path));
var ext = Path.GetExtension(request.Path);
var fullPath = Path.ChangeExtension(request.Path, ".full" + ext);
fullCapture.Save(fullPath, ImageUtils.GetImageFormat(fullPath));
}
// Route to engine
sw.Restart();
if (engine == "tesseract")
{
var result = ocrHandler.RunTesseractOnBitmap(processed, region, pad: cropParams.OcrPad, upscale: ocrParams.Upscale);
var ocrMs = sw.ElapsedMilliseconds;
Console.Error.WriteLine($" edge-ocr-pipeline: engine={engine} preprocess={preprocess} crop={cropMs}ms ocr={ocrMs}ms region={region.Width}x{region.Height}");
return result;
}
else // easyocr, paddleocr
{
var ocrResult = pythonBridge.OcrFromBitmap(processed, engine, ocrParams);
var ocrMs = sw.ElapsedMilliseconds;
Console.Error.WriteLine($" edge-ocr-pipeline: engine={engine} preprocess={preprocess} crop={cropMs}ms ocr={ocrMs}ms region={region.Width}x{region.Height}");
foreach (var line in ocrResult.Lines)
foreach (var word in line.Words)
{
word.X += region.X;
word.Y += region.Y;
}
return new DiffOcrResponse
{
Text = ocrResult.Text,
Lines = ocrResult.Lines,
Region = region,
};
}
}
/// <summary>
/// Coordinate-descent sweep over DiffCropParams to maximise avgIoU on crop.json ground truth.
/// </summary>
private static object HandleCropTune(OcrHandler ocrHandler, Request request)
{
var tessdataDir = Path.Combine(AppContext.BaseDirectory, "tessdata");
var casesPath = Path.Combine(tessdataDir, "crop.json");
if (!File.Exists(casesPath))
return new ErrorResponse($"crop.json not found at {casesPath}");
var json = File.ReadAllText(casesPath);
var cases = JsonSerializer.Deserialize<List<CropTestCase>>(json, JsonOptions);
if (cases == null || cases.Count == 0)
return new ErrorResponse("No test cases in crop.json");
// Preload valid test cases
var validCases = new List<(CropTestCase tc, string imagePath, string snapshotPath)>();
foreach (var tc in cases)
{
var imagePath = Path.Combine(tessdataDir, tc.Image);
var snapshotPath = Path.Combine(tessdataDir, tc.SnapshotImage);
if (File.Exists(imagePath) && File.Exists(snapshotPath))
validCases.Add((tc, imagePath, snapshotPath));
}
if (validCases.Count == 0)
return new ErrorResponse("No valid test cases found");
// Score function: compute avgIoU for a set of crop params
double ScoreCropParams(DiffCropParams cp)
{
double totalIoU = 0;
foreach (var (tc, imagePath, snapshotPath) in validCases)
{
ocrHandler.HandleSnapshot(new Request { File = snapshotPath });
var cropResult = ocrHandler.DiffCrop(new Request { File = imagePath }, cp);
if (cropResult == null) continue;
var (cropped, refCropped, current, region) = cropResult.Value;
cropped.Dispose(); refCropped.Dispose(); current.Dispose();
int ax1 = region.X, ay1 = region.Y;
int ax2 = region.X + region.Width, ay2 = region.Y + region.Height;
int ex1 = tc.TopLeft.X, ey1 = tc.TopLeft.Y, ex2 = tc.BottomRight.X, ey2 = tc.BottomRight.Y;
int ix1 = Math.Max(ax1, ex1), iy1 = Math.Max(ay1, ey1);
int ix2 = Math.Min(ax2, ex2), iy2 = Math.Min(ay2, ey2);
int iw = Math.Max(0, ix2 - ix1), ih = Math.Max(0, iy2 - iy1);
double intersection = (double)iw * ih;
double expW = ex2 - ex1, expH = ey2 - ey1;
double union = (double)region.Width * region.Height + expW * expH - intersection;
totalIoU += union > 0 ? intersection / union : 0;
}
return totalIoU / validCases.Count;
}
DiffCropParams CloneCrop(DiffCropParams p) => new()
{
DiffThresh = p.DiffThresh, RowThreshDiv = p.RowThreshDiv,
ColThreshDiv = p.ColThreshDiv, MaxGap = p.MaxGap,
TrimCutoff = p.TrimCutoff, OcrPad = p.OcrPad,
};
// Start from provided params or defaults
var best = request.Params?.Crop ?? new DiffCropParams();
double bestScore = ScoreCropParams(best);
int totalEvals = 1;
Console.Error.WriteLine($" crop-tune: baseline avgIoU={bestScore:F4} {best}");
var intSweeps = new (string Name, int[] Values, Action<DiffCropParams, int> Set)[]
{
("diffThresh", [5, 10, 15, 20, 25, 30, 40], (c, v) => c.DiffThresh = v),
("rowThreshDiv", [20, 30, 40, 50, 60, 80, 100], (c, v) => c.RowThreshDiv = v),
("colThreshDiv", [5, 8, 10, 12, 15, 20], (c, v) => c.ColThreshDiv = v),
("maxGap", [5, 10, 15, 20, 25, 30], (c, v) => c.MaxGap = v),
};
double[] trimValues = [0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.5];
const int maxRounds = 3;
for (int round = 0; round < maxRounds; round++)
{
bool improved = false;
Console.Error.WriteLine($"--- Round {round + 1} ---");
foreach (var (name, values, set) in intSweeps)
{
Console.Error.Write($" {name}: ");
int bestVal = 0;
double bestValScore = -1;
foreach (int v in values)
{
var trial = CloneCrop(best);
set(trial, v);
double score = ScoreCropParams(trial);
totalEvals++;
Console.Error.Write($"{v}={score:F4} ");
if (score > bestValScore) { bestValScore = score; bestVal = v; }
}
Console.Error.WriteLine();
if (bestValScore > bestScore)
{
set(best, bestVal);
bestScore = bestValScore;
improved = true;
Console.Error.WriteLine($" -> {name}={bestVal} avgIoU={bestScore:F4}");
}
}
// trimCutoff sweep
{
Console.Error.Write($" trimCutoff: ");
double bestTrim = best.TrimCutoff;
double bestTrimScore = bestScore;
foreach (double v in trimValues)
{
var trial = CloneCrop(best);
trial.TrimCutoff = v;
double score = ScoreCropParams(trial);
totalEvals++;
Console.Error.Write($"{v:F2}={score:F4} ");
if (score > bestTrimScore) { bestTrimScore = score; bestTrim = v; }
}
Console.Error.WriteLine();
if (bestTrimScore > bestScore)
{
best.TrimCutoff = bestTrim;
bestScore = bestTrimScore;
improved = true;
Console.Error.WriteLine($" -> trimCutoff={bestTrim:F2} avgIoU={bestScore:F4}");
}
}
Console.Error.WriteLine($" End of round {round + 1}: avgIoU={bestScore:F4} {best}");
if (!improved) break;
}
Console.Error.WriteLine($"\n crop-tune: BEST avgIoU={bestScore:F4} {best} evals={totalEvals}");
return new CropTuneResponse
{
BestAvgIoU = bestScore,
BestParams = best,
Iterations = totalEvals,
};
}
/// <summary>
/// Crop accuracy test: runs diff and/or edge crop on test cases from crop.json,
/// computes IoU and per-edge deltas vs ground truth.
/// </summary>
private static object HandleCropTest(OcrHandler ocrHandler, EdgeCropHandler edgeCropHandler, Request request)
{
var tessdataDir = Path.Combine(AppContext.BaseDirectory, "tessdata");
var casesPath = Path.Combine(tessdataDir, "crop.json");
if (!File.Exists(casesPath))
return new ErrorResponse($"crop.json not found at {casesPath}");
var json = File.ReadAllText(casesPath);
var cases = JsonSerializer.Deserialize<List<CropTestCase>>(json, JsonOptions);
if (cases == null || cases.Count == 0)
return new ErrorResponse("No test cases in crop.json");
var method = request.Engine ?? "diff"; // reuse engine field: "diff", "edge", or "both"
var diffParams = request.Params?.Crop ?? new DiffCropParams();
var edgeParams = request.EdgeParams?.Crop ?? new EdgeCropParams();
var results = new List<CropTestResult>();
foreach (var tc in cases)
{
var imagePath = Path.Combine(tessdataDir, tc.Image);
var snapshotPath = Path.Combine(tessdataDir, tc.SnapshotImage);
if (!File.Exists(imagePath) || !File.Exists(snapshotPath))
{
Console.Error.WriteLine($" crop-test: SKIP {tc.Id} — missing files");
results.Add(new CropTestResult { Id = tc.Id, IoU = 0 });
continue;
}
// Expected region
int expX = tc.TopLeft.X;
int expY = tc.TopLeft.Y;
int expW = tc.BottomRight.X - tc.TopLeft.X;
int expH = tc.BottomRight.Y - tc.TopLeft.Y;
var expected = new RegionRect { X = expX, Y = expY, Width = expW, Height = expH };
RegionRect? actual = null;
if (method is "diff" or "both")
{
// Load snapshot as reference
ocrHandler.HandleSnapshot(new Request { File = snapshotPath });
var cropResult = ocrHandler.DiffCrop(new Request { File = imagePath }, diffParams);
if (cropResult != null)
{
var (cropped, refCropped, current, region) = cropResult.Value;
actual = region;
cropped.Dispose();
refCropped.Dispose();
current.Dispose();
}
}
if (method == "edge")
{
// Default cursor to center of ground-truth bbox if not specified
int cx = tc.CursorX ?? (tc.TopLeft.X + tc.BottomRight.X) / 2;
int cy = tc.CursorY ?? (tc.TopLeft.Y + tc.BottomRight.Y) / 2;
var cropResult = edgeCropHandler.EdgeCrop(
new Request { File = imagePath, CursorX = cx, CursorY = cy },
edgeParams);
if (cropResult != null)
{
var (cropped, fullCapture, region) = cropResult.Value;
actual = region;
cropped.Dispose();
fullCapture.Dispose();
}
}
// Compute IoU and deltas
double iou = 0;
int dTop = 0, dLeft = 0, dRight = 0, dBottom = 0;
if (actual != null)
{
int ax1 = actual.X, ay1 = actual.Y;
int ax2 = actual.X + actual.Width, ay2 = actual.Y + actual.Height;
int ex1 = expX, ey1 = expY, ex2 = tc.BottomRight.X, ey2 = tc.BottomRight.Y;
int ix1 = Math.Max(ax1, ex1), iy1 = Math.Max(ay1, ey1);
int ix2 = Math.Min(ax2, ex2), iy2 = Math.Min(ay2, ey2);
int iw = Math.Max(0, ix2 - ix1), ih = Math.Max(0, iy2 - iy1);
double intersection = (double)iw * ih;
double union = (double)actual.Width * actual.Height + (double)expW * expH - intersection;
iou = union > 0 ? intersection / union : 0;
dTop = ay1 - ey1; // positive = crop starts too low
dLeft = ax1 - ex1; // positive = crop starts too far right
dRight = ax2 - ex2; // positive = crop ends too far right
dBottom = ay2 - ey2; // positive = crop ends too low
}
Console.Error.WriteLine($" crop-test #{tc.Id}: IoU={iou:F3} expected=({expX},{expY})+{expW}x{expH} actual={FormatRegion(actual)} delta T={dTop:+0;-#} L={dLeft:+0;-#} R={dRight:+0;-#} B={dBottom:+0;-#}");
results.Add(new CropTestResult
{
Id = tc.Id,
IoU = iou,
Expected = expected,
Actual = actual,
DeltaTop = dTop,
DeltaLeft = dLeft,
DeltaRight = dRight,
DeltaBottom = dBottom,
});
}
double avgIoU = results.Count > 0 ? results.Average(r => r.IoU) : 0;
Console.Error.WriteLine($" crop-test: method={method} avgIoU={avgIoU:F3} ({results.Count} cases)");
return new CropTestResponse
{
Method = method,
AvgIoU = avgIoU,
Results = results,
};
}
private static string FormatRegion(RegionRect? r) =>
r != null ? $"({r.X},{r.Y})+{r.Width}x{r.Height}" : "null";
private static void WriteResponse(object response)
{
var json = JsonSerializer.Serialize(response, JsonOptions);
Console.Out.WriteLine(json);
Console.Out.Flush();
}
}

View file

@ -1,190 +0,0 @@
namespace OcrDaemon;
using System.Drawing;
using System.Drawing.Imaging;
using System.Runtime.InteropServices;
class DetectGridHandler
{
public object HandleDetectGrid(Request req)
{
if (req.Region == null)
return new ErrorResponse("detect-grid requires region");
int minCell = req.MinCellSize > 0 ? req.MinCellSize : 20;
int maxCell = req.MaxCellSize > 0 ? req.MaxCellSize : 70;
bool debug = req.Debug;
Bitmap bitmap = ScreenCapture.CaptureOrLoad(req.File, req.Region);
int w = bitmap.Width;
int h = bitmap.Height;
var bmpData = bitmap.LockBits(
new Rectangle(0, 0, w, h),
ImageLockMode.ReadOnly,
PixelFormat.Format32bppArgb
);
byte[] pixels = new byte[bmpData.Stride * h];
Marshal.Copy(bmpData.Scan0, pixels, 0, pixels.Length);
bitmap.UnlockBits(bmpData);
int stride = bmpData.Stride;
byte[] gray = new byte[w * h];
for (int y = 0; y < h; y++)
for (int x = 0; x < w; x++)
{
int i = y * stride + x * 4;
gray[y * w + x] = (byte)((pixels[i] + pixels[i + 1] + pixels[i + 2]) / 3);
}
bitmap.Dispose();
// ── Pass 1: Scan horizontal bands using "very dark pixel density" ──
// Grid lines are nearly all very dark (density ~0.9), cell interiors are
// partially dark (0.3-0.5), game world is mostly bright (density ~0.05).
// This creates clear periodic peaks at grid line positions.
int bandH = 200;
int bandStep = 40;
const int veryDarkPixelThresh = 12; // pixels below this brightness = "very dark"
const double gridSegThresh = 0.25; // density above this = potential grid column
var candidates = new List<(int bandY, int cellW, double hAc, int hLeft, int hRight)>();
for (int by = 0; by + bandH <= h; by += bandStep)
{
// "Very dark pixel density" per column: fraction of pixels below threshold
double[] darkDensity = new double[w];
for (int x = 0; x < w; x++)
{
int count = 0;
for (int y = by; y < by + bandH; y++)
{
if (gray[y * w + x] < veryDarkPixelThresh) count++;
}
darkDensity[x] = (double)count / bandH;
}
// Find segments where density > gridSegThresh (grid panel regions)
var gridSegs = SignalProcessing.FindDarkDensitySegments(darkDensity, gridSegThresh, 200);
foreach (var (segLeft, segRight) in gridSegs)
{
// Extract segment and run AC
int segLen = segRight - segLeft;
double[] segment = new double[segLen];
Array.Copy(darkDensity, segLeft, segment, 0, segLen);
var (period, acScore) = SignalProcessing.FindPeriodWithScore(segment, minCell, maxCell);
if (period <= 0) continue;
// FindGridExtent within the segment
var (extLeft, extRight) = SignalProcessing.FindGridExtent(segment, period);
if (extLeft < 0) continue;
// Map back to full image coordinates
int absLeft = segLeft + extLeft;
int absRight = segLeft + extRight;
int extent = absRight - absLeft;
// Require at least 8 cells wide AND 200px absolute minimum
if (extent < period * 8 || extent < 200) continue;
if (debug) Console.Error.WriteLine(
$" Band y={by}: seg=[{segLeft}-{segRight}] period={period}, AC={acScore:F3}, " +
$"extent={absLeft}-{absRight}={extent}px ({extent / period} cells)");
candidates.Add((by, period, acScore, absLeft, absRight));
}
}
if (debug) Console.Error.WriteLine($"Pass 1: {candidates.Count} candidates");
// Sort by score = AC * extent (prefer large strongly-periodic areas)
candidates.Sort((a, b) =>
{
double sa = a.hAc * (a.hRight - a.hLeft);
double sb = b.hAc * (b.hRight - b.hLeft);
return sb.CompareTo(sa);
});
// ── Pass 2: Verify vertical periodicity ──
foreach (var cand in candidates.Take(10))
{
int colSpan = cand.hRight - cand.hLeft;
if (colSpan < cand.cellW * 3) continue;
// Row "very dark pixel density" within the detected column range
double[] rowDensity = new double[h];
for (int y = 0; y < h; y++)
{
int count = 0;
for (int x = cand.hLeft; x < cand.hRight; x++)
{
if (gray[y * w + x] < veryDarkPixelThresh) count++;
}
rowDensity[y] = (double)count / colSpan;
}
// Find grid panel vertical segment
var vGridSegs = SignalProcessing.FindDarkDensitySegments(rowDensity, gridSegThresh, 100);
if (vGridSegs.Count == 0) continue;
// Use the largest segment
var (vSegTop, vSegBottom) = vGridSegs.OrderByDescending(s => s.end - s.start).First();
int vSegLen = vSegBottom - vSegTop;
double[] vSegment = new double[vSegLen];
Array.Copy(rowDensity, vSegTop, vSegment, 0, vSegLen);
var (cellH, vAc) = SignalProcessing.FindPeriodWithScore(vSegment, minCell, maxCell);
if (cellH <= 0) continue;
var (extTop, extBottom) = SignalProcessing.FindGridExtent(vSegment, cellH);
if (extTop < 0) continue;
int top = vSegTop + extTop;
int bottom = vSegTop + extBottom;
int vExtent = bottom - top;
// Require at least 3 rows tall AND 100px absolute minimum
if (vExtent < cellH * 3 || vExtent < 100) continue;
if (debug) Console.Error.WriteLine(
$" 2D candidate: cellW={cand.cellW}, cellH={cellH}, " +
$"region=({cand.hLeft},{top})-({cand.hRight},{bottom}), " +
$"vAC={vAc:F3}, extent={vExtent}px ({vExtent / cellH} rows)");
// ── Found a valid 2D grid ──
int gridW = cand.hRight - cand.hLeft;
int gridH = bottom - top;
int cols = Math.Max(2, (int)Math.Round((double)gridW / cand.cellW));
int rows = Math.Max(2, (int)Math.Round((double)gridH / cellH));
// Snap grid dimensions to exact multiples of cell size
gridW = cols * cand.cellW;
gridH = rows * cellH;
if (debug) Console.Error.WriteLine(
$" => cols={cols}, rows={rows}, gridW={gridW}, gridH={gridH}");
return new DetectGridResponse
{
Detected = true,
Region = new RegionRect
{
X = req.Region.X + cand.hLeft,
Y = req.Region.Y + top,
Width = gridW,
Height = gridH,
},
Cols = cols,
Rows = rows,
CellWidth = Math.Round((double)gridW / cols, 1),
CellHeight = Math.Round((double)gridH / rows, 1),
};
}
if (debug) Console.Error.WriteLine(" No valid 2D grid found");
return new DetectGridResponse { Detected = false };
}
}

View file

@ -1,244 +0,0 @@
namespace OcrDaemon;
using System.Drawing;
using System.Drawing.Imaging;
using System.Runtime.InteropServices;
class EdgeCropHandler
{
[StructLayout(LayoutKind.Sequential)]
private struct POINT { public int X, Y; }
[DllImport("user32.dll")]
private static extern bool GetCursorPos(out POINT lpPoint);
public (Bitmap cropped, Bitmap fullCapture, RegionRect region)? EdgeCrop(Request req, EdgeCropParams p)
{
int cursorX, cursorY;
if (req.CursorX.HasValue && req.CursorY.HasValue)
{
cursorX = req.CursorX.Value;
cursorY = req.CursorY.Value;
}
else
{
GetCursorPos(out var pt);
cursorX = pt.X;
cursorY = pt.Y;
}
var fullCapture = ScreenCapture.CaptureOrLoad(req.File, null);
int w = fullCapture.Width;
int h = fullCapture.Height;
cursorX = Math.Clamp(cursorX, 0, w - 1);
cursorY = Math.Clamp(cursorY, 0, h - 1);
var bmpData = fullCapture.LockBits(new Rectangle(0, 0, w, h), ImageLockMode.ReadOnly, PixelFormat.Format32bppArgb);
byte[] px = new byte[bmpData.Stride * h];
Marshal.Copy(bmpData.Scan0, px, 0, px.Length);
fullCapture.UnlockBits(bmpData);
int stride = bmpData.Stride;
int darkThresh = p.DarkThresh;
int colGap = p.RunGapTolerance;
int maxGap = p.MaxGap;
// ── Phase 1: Per-row horizontal extent ──
// Scan left/right from cursorX per row. Gap tolerance bridges through text.
// Percentile-based filtering for robustness.
int bandHalf = p.MinDarkRun; // repurpose: half-height of horizontal scan band
int bandTop = Math.Max(0, cursorY - bandHalf);
int bandBot = Math.Min(h - 1, cursorY + bandHalf);
var leftExtents = new List<int>();
var rightExtents = new List<int>();
for (int y = bandTop; y <= bandBot; y++)
{
int rowOff = y * stride;
int seedX = FindDarkSeedInRow(px, stride, w, rowOff, cursorX, darkThresh, seedRadius: 6);
if (seedX < 0) continue;
int leftEdge = seedX;
int gap = 0;
for (int x = seedX - 1; x >= 0; x--)
{
int i = rowOff + x * 4;
int brightness = (px[i] + px[i + 1] + px[i + 2]) / 3;
if (brightness < darkThresh) { leftEdge = x; gap = 0; }
else if (++gap > colGap) break;
}
int rightEdge = seedX;
gap = 0;
for (int x = seedX + 1; x < w; x++)
{
int i = rowOff + x * 4;
int brightness = (px[i] + px[i + 1] + px[i + 2]) / 3;
if (brightness < darkThresh) { rightEdge = x; gap = 0; }
else if (++gap > colGap) break;
}
leftExtents.Add(leftEdge);
rightExtents.Add(rightEdge);
}
if (leftExtents.Count < 10)
{
Console.Error.WriteLine($" edge-crop: too few dark rows ({leftExtents.Count})");
fullCapture.Dispose();
return null;
}
leftExtents.Sort();
rightExtents.Sort();
// Use RowThreshDiv/ColThreshDiv as percentile denominators
// e.g., RowThreshDiv=4 → 25th percentile for left, ColThreshDiv=4 → 75th for right
int leftPctIdx = leftExtents.Count / p.RowThreshDiv;
int rightPctIdx = rightExtents.Count * (p.ColThreshDiv - 1) / p.ColThreshDiv;
leftPctIdx = Math.Clamp(leftPctIdx, 0, leftExtents.Count - 1);
rightPctIdx = Math.Clamp(rightPctIdx, 0, rightExtents.Count - 1);
int bestColStart = leftExtents[leftPctIdx];
int bestColEnd = rightExtents[rightPctIdx];
Console.Error.WriteLine($" edge-crop: horizontal: left={bestColStart} right={bestColEnd} ({bestColEnd - bestColStart + 1}px) samples={leftExtents.Count} pctL={leftPctIdx}/{leftExtents.Count} pctR={rightPctIdx}/{rightExtents.Count}");
if (bestColEnd - bestColStart + 1 < 50)
{
Console.Error.WriteLine($" edge-crop: horizontal extent too small");
fullCapture.Dispose();
return null;
}
// ── Phase 2: Per-column vertical extent ──
int colBandHalf = (bestColEnd - bestColStart + 1) / 3;
int colBandLeft = Math.Max(bestColStart, cursorX - colBandHalf);
int colBandRight = Math.Min(bestColEnd, cursorX + colBandHalf);
var topExtents = new List<int>();
var bottomExtents = new List<int>();
// Asymmetric gap: larger upward to bridge header decorations (~30-40px bright)
int maxGapUp = maxGap * 3;
for (int x = colBandLeft; x <= colBandRight; x++)
{
int seedY = FindDarkSeedInColumn(px, stride, h, x, cursorY, darkThresh, seedRadius: 6);
if (seedY < 0) continue;
int topEdge = seedY;
int gap = 0;
for (int y = seedY - 1; y >= 0; y--)
{
int i = y * stride + x * 4;
int brightness = (px[i] + px[i + 1] + px[i + 2]) / 3;
if (brightness < darkThresh) { topEdge = y; gap = 0; }
else if (++gap > maxGapUp) break;
}
int bottomEdge = seedY;
gap = 0;
for (int y = seedY + 1; y < h; y++)
{
int i = y * stride + x * 4;
int brightness = (px[i] + px[i + 1] + px[i + 2]) / 3;
if (brightness < darkThresh) { bottomEdge = y; gap = 0; }
else if (++gap > maxGap) break;
}
topExtents.Add(topEdge);
bottomExtents.Add(bottomEdge);
}
if (topExtents.Count < 10)
{
Console.Error.WriteLine($" edge-crop: too few dark columns ({topExtents.Count})");
fullCapture.Dispose();
return null;
}
topExtents.Sort();
bottomExtents.Sort();
int topPctIdx = topExtents.Count / p.RowThreshDiv;
int botPctIdx = topExtents.Count * (p.ColThreshDiv - 1) / p.ColThreshDiv;
topPctIdx = Math.Clamp(topPctIdx, 0, topExtents.Count - 1);
botPctIdx = Math.Clamp(botPctIdx, 0, bottomExtents.Count - 1);
int bestRowStart = topExtents[topPctIdx];
int bestRowEnd = bottomExtents[botPctIdx];
Console.Error.WriteLine($" edge-crop: vertical: top={bestRowStart} bottom={bestRowEnd} ({bestRowEnd - bestRowStart + 1}px) samples={topExtents.Count}");
if (bestRowEnd - bestRowStart + 1 < 50)
{
Console.Error.WriteLine($" edge-crop: vertical extent too small");
fullCapture.Dispose();
return null;
}
int minX = bestColStart;
int minY = bestRowStart;
int maxX = bestColEnd;
int maxY = bestRowEnd;
int rw = maxX - minX + 1;
int rh = maxY - minY + 1;
Console.Error.WriteLine($" edge-crop: result ({minX},{minY}) {rw}x{rh}");
if (rw < 50 || rh < 50)
{
Console.Error.WriteLine($" edge-crop: region too small ({rw}x{rh})");
fullCapture.Dispose();
return null;
}
var cropRect = new Rectangle(minX, minY, rw, rh);
var cropped = fullCapture.Clone(cropRect, PixelFormat.Format32bppArgb);
var region = new RegionRect { X = minX, Y = minY, Width = rw, Height = rh };
return (cropped, fullCapture, region);
}
private static int FindDarkSeedInRow(byte[] px, int stride, int w, int rowOff, int cursorX, int darkThresh, int seedRadius)
{
int maxR = Math.Min(seedRadius, Math.Min(cursorX, w - 1 - cursorX));
for (int r = 0; r <= maxR; r++)
{
int x1 = cursorX - r;
int i1 = rowOff + x1 * 4;
int b1 = (px[i1] + px[i1 + 1] + px[i1 + 2]) / 3;
if (b1 < darkThresh) return x1;
int x2 = cursorX + r;
int i2 = rowOff + x2 * 4;
int b2 = (px[i2] + px[i2 + 1] + px[i2 + 2]) / 3;
if (b2 < darkThresh) return x2;
}
return -1;
}
private static int FindDarkSeedInColumn(byte[] px, int stride, int h, int x, int cursorY, int darkThresh, int seedRadius)
{
int maxR = Math.Min(seedRadius, Math.Min(cursorY, h - 1 - cursorY));
for (int r = 0; r <= maxR; r++)
{
int y1 = cursorY - r;
int i1 = y1 * stride + x * 4;
int b1 = (px[i1] + px[i1 + 1] + px[i1 + 2]) / 3;
if (b1 < darkThresh) return y1;
int y2 = cursorY + r;
int i2 = y2 * stride + x * 4;
int b2 = (px[i2] + px[i2 + 1] + px[i2 + 2]) / 3;
if (b2 < darkThresh) return y2;
}
return -1;
}
}

View file

@ -1,357 +0,0 @@
namespace OcrDaemon;
using System.Drawing;
using System.Drawing.Imaging;
using System.Runtime.InteropServices;
class GridHandler
{
// Pre-loaded empty cell templates (loaded lazily on first grid scan)
private byte[]? _emptyTemplate70Gray;
private byte[]? _emptyTemplate70Argb;
private int _emptyTemplate70W, _emptyTemplate70H, _emptyTemplate70Stride;
private byte[]? _emptyTemplate35Gray;
private byte[]? _emptyTemplate35Argb;
private int _emptyTemplate35W, _emptyTemplate35H, _emptyTemplate35Stride;
public object HandleGrid(Request req)
{
if (req.Region == null || req.Cols <= 0 || req.Rows <= 0)
return new ErrorResponse("grid command requires region, cols, rows");
LoadTemplatesIfNeeded();
using var bitmap = ScreenCapture.CaptureOrLoad(req.File, req.Region);
int cols = req.Cols;
int rows = req.Rows;
float cellW = (float)bitmap.Width / cols;
float cellH = (float)bitmap.Height / rows;
// Pick the right empty template based on cell size
int nominalCell = (int)Math.Round(cellW);
byte[]? templateGray;
byte[]? templateArgb;
int templateW, templateH, templateStride;
if (nominalCell <= 40 && _emptyTemplate35Gray != null)
{
templateGray = _emptyTemplate35Gray;
templateArgb = _emptyTemplate35Argb!;
templateW = _emptyTemplate35W;
templateH = _emptyTemplate35H;
templateStride = _emptyTemplate35Stride;
}
else if (_emptyTemplate70Gray != null)
{
templateGray = _emptyTemplate70Gray;
templateArgb = _emptyTemplate70Argb!;
templateW = _emptyTemplate70W;
templateH = _emptyTemplate70H;
templateStride = _emptyTemplate70Stride;
}
else
{
return new ErrorResponse("Empty cell templates not found in assets/");
}
// Convert captured bitmap to grayscale + keep ARGB for border color comparison
var (captureGray, captureArgb, captureStride) = ImageUtils.BitmapToGrayAndArgb(bitmap);
int captureW = bitmap.Width;
// Border to skip (outer pixels may differ between cells)
int border = Math.Max(2, nominalCell / 10);
// Pre-compute template average for the inner region
long templateSum = 0;
int innerCount = 0;
for (int ty = border; ty < templateH - border; ty++)
for (int tx = border; tx < templateW - border; tx++)
{
templateSum += templateGray[ty * templateW + tx];
innerCount++;
}
double tmplMean = innerCount > 0 ? (double)templateSum / innerCount : 0;
// Threshold for brightness-normalized MAD
double diffThreshold = req.Threshold > 0 ? req.Threshold : 5;
bool debug = req.Debug;
if (debug) Console.Error.WriteLine($"Grid: {cols}x{rows}, cellW={cellW:F1}, cellH={cellH:F1}, border={border}, threshold={diffThreshold}, tmplMean={tmplMean:F1}");
var cells = new List<List<bool>>();
for (int row = 0; row < rows; row++)
{
var rowList = new List<bool>();
var debugDiffs = new List<string>();
for (int col = 0; col < cols; col++)
{
int cx0 = (int)(col * cellW);
int cy0 = (int)(row * cellH);
int cw = (int)Math.Min(cellW, captureW - cx0);
int ch = (int)Math.Min(cellH, bitmap.Height - cy0);
int innerW = Math.Min(cw, templateW) - border;
int innerH = Math.Min(ch, templateH) - border;
// First pass: compute cell region mean brightness
long cellSum = 0;
int compared = 0;
for (int py = border; py < innerH; py++)
for (int px = border; px < innerW; px++)
{
cellSum += captureGray[(cy0 + py) * captureW + (cx0 + px)];
compared++;
}
double cellMean = compared > 0 ? (double)cellSum / compared : 0;
double offset = cellMean - tmplMean;
// Second pass: MAD on brightness-normalized values
long diffSum = 0;
for (int py = border; py < innerH; py++)
for (int px = border; px < innerW; px++)
{
double cellVal = captureGray[(cy0 + py) * captureW + (cx0 + px)];
double tmplVal = templateGray[py * templateW + px];
diffSum += (long)Math.Abs(cellVal - tmplVal - offset);
}
double meanDiff = compared > 0 ? (double)diffSum / compared : 0;
bool occupied = meanDiff > diffThreshold;
rowList.Add(occupied);
if (debug) debugDiffs.Add($"{meanDiff,5:F1}{(occupied ? "*" : " ")}");
}
cells.Add(rowList);
if (debug) Console.Error.WriteLine($" Row {row,2}: {string.Join(" ", debugDiffs)}");
}
// ── Item detection: compare border pixels to empty template (grayscale) ──
// Items have a colored tint behind them that shows through grid lines.
// Compare each cell's border strip against the template's border pixels.
// If they differ → item tint present → cells belong to same item.
int[] parent = new int[rows * cols];
for (int i = 0; i < parent.Length; i++) parent[i] = i;
int Find(int x) { while (parent[x] != x) { parent[x] = parent[parent[x]]; x = parent[x]; } return x; }
void Union(int a, int b) { parent[Find(a)] = Find(b); }
int stripWidth = Math.Max(2, border / 2);
int stripInset = (int)(cellW * 0.15);
double borderDiffThresh = 15.0;
for (int row = 0; row < rows; row++)
{
for (int col = 0; col < cols; col++)
{
if (!cells[row][col]) continue;
int cx0 = (int)(col * cellW);
int cy0 = (int)(row * cellH);
// Check right neighbor
if (col + 1 < cols && cells[row][col + 1])
{
long diffSum = 0; int cnt = 0;
int xStart = (int)((col + 1) * cellW) - stripWidth;
int yFrom = cy0 + stripInset;
int yTo = (int)((row + 1) * cellH) - stripInset;
for (int sy = yFrom; sy < yTo; sy += 2)
{
int tmplY = sy - cy0;
for (int sx = xStart; sx < xStart + stripWidth * 2; sx++)
{
if (sx < 0 || sx >= captureW) continue;
int tmplX = sx - cx0;
if (tmplX < 0 || tmplX >= templateW) continue;
diffSum += Math.Abs(captureGray[sy * captureW + sx] - templateGray[tmplY * templateW + tmplX]);
cnt++;
}
}
double meanDiff = cnt > 0 ? (double)diffSum / cnt : 0;
if (debug) Console.Error.WriteLine($" H ({row},{col})->({row},{col+1}): {meanDiff:F1}{(meanDiff > borderDiffThresh ? " SAME" : "")}");
if (meanDiff > borderDiffThresh)
Union(row * cols + col, row * cols + col + 1);
}
// Check bottom neighbor
if (row + 1 < rows && cells[row + 1][col])
{
long diffSum = 0; int cnt = 0;
int yStart = (int)((row + 1) * cellH) - stripWidth;
int xFrom = cx0 + stripInset;
int xTo = (int)((col + 1) * cellW) - stripInset;
for (int sx = xFrom; sx < xTo; sx += 2)
{
int tmplX = sx - cx0;
for (int sy = yStart; sy < yStart + stripWidth * 2; sy++)
{
if (sy < 0 || sy >= bitmap.Height) continue;
int tmplY = sy - cy0;
if (tmplY < 0 || tmplY >= templateH) continue;
diffSum += Math.Abs(captureGray[sy * captureW + sx] - templateGray[tmplY * templateW + tmplX]);
cnt++;
}
}
double meanDiff = cnt > 0 ? (double)diffSum / cnt : 0;
if (debug) Console.Error.WriteLine($" V ({row},{col})->({row+1},{col}): {meanDiff:F1}{(meanDiff > borderDiffThresh ? " SAME" : "")}");
if (meanDiff > borderDiffThresh)
Union(row * cols + col, (row + 1) * cols + col);
}
}
}
// Extract items from union-find groups
var groups = new Dictionary<int, List<(int row, int col)>>();
for (int row = 0; row < rows; row++)
for (int col = 0; col < cols; col++)
if (cells[row][col])
{
int root = Find(row * cols + col);
if (!groups.ContainsKey(root)) groups[root] = [];
groups[root].Add((row, col));
}
var items = new List<GridItem>();
foreach (var group in groups.Values)
{
int minR = group.Min(c => c.row);
int maxR = group.Max(c => c.row);
int minC = group.Min(c => c.col);
int maxC = group.Max(c => c.col);
items.Add(new GridItem { Row = minR, Col = minC, W = maxC - minC + 1, H = maxR - minR + 1 });
}
if (debug)
{
Console.Error.WriteLine($" Items found: {items.Count}");
foreach (var item in items)
Console.Error.WriteLine($" ({item.Row},{item.Col}) {item.W}x{item.H}");
}
// ── Visual matching: find cells similar to target ──
List<GridMatch>? matches = null;
if (req.TargetRow >= 0 && req.TargetCol >= 0 &&
req.TargetRow < rows && req.TargetCol < cols &&
cells[req.TargetRow][req.TargetCol])
{
matches = FindMatchingCells(
captureGray, captureW, bitmap.Height,
cells, rows, cols, cellW, cellH, border,
req.TargetRow, req.TargetCol, debug);
}
return new GridResponse { Cells = cells, Items = items, Matches = matches };
}
/// <summary>
/// Find all occupied cells visually similar to the target cell using full-resolution NCC.
/// </summary>
private List<GridMatch> FindMatchingCells(
byte[] gray, int imgW, int imgH,
List<List<bool>> cells, int rows, int cols,
float cellW, float cellH, int border,
int targetRow, int targetCol, bool debug)
{
int innerW = (int)cellW - border * 2;
int innerH = (int)cellH - border * 2;
if (innerW <= 4 || innerH <= 4) return [];
int tCx0 = (int)(targetCol * cellW) + border;
int tCy0 = (int)(targetRow * cellH) + border;
int tInnerW = Math.Min(innerW, imgW - tCx0);
int tInnerH = Math.Min(innerH, imgH - tCy0);
if (tInnerW < innerW || tInnerH < innerH) return [];
int n = innerW * innerH;
// Pre-compute target cell pixels and stats
double[] targetPixels = new double[n];
double tMean = 0;
for (int py = 0; py < innerH; py++)
for (int px = 0; px < innerW; px++)
{
double v = gray[(tCy0 + py) * imgW + (tCx0 + px)];
targetPixels[py * innerW + px] = v;
tMean += v;
}
tMean /= n;
double tStd = 0;
for (int i = 0; i < n; i++)
tStd += (targetPixels[i] - tMean) * (targetPixels[i] - tMean);
tStd = Math.Sqrt(tStd / n);
if (debug) Console.Error.WriteLine($" Match target ({targetRow},{targetCol}): {innerW}x{innerH} ({n}px), mean={tMean:F1}, std={tStd:F1}");
if (tStd < 3.0) return [];
double matchThreshold = 0.70;
var matches = new List<GridMatch>();
for (int row = 0; row < rows; row++)
{
for (int col = 0; col < cols; col++)
{
if (!cells[row][col]) continue;
if (row == targetRow && col == targetCol) continue;
int cx0 = (int)(col * cellW) + border;
int cy0 = (int)(row * cellH) + border;
int cInnerW = Math.Min(innerW, imgW - cx0);
int cInnerH = Math.Min(innerH, imgH - cy0);
if (cInnerW < innerW || cInnerH < innerH) continue;
// Compute NCC at full resolution
double cMean = 0;
for (int py = 0; py < innerH; py++)
for (int px = 0; px < innerW; px++)
cMean += gray[(cy0 + py) * imgW + (cx0 + px)];
cMean /= n;
double cStd = 0, cross = 0;
for (int py = 0; py < innerH; py++)
for (int px = 0; px < innerW; px++)
{
double cv = gray[(cy0 + py) * imgW + (cx0 + px)] - cMean;
double tv = targetPixels[py * innerW + px] - tMean;
cStd += cv * cv;
cross += tv * cv;
}
cStd = Math.Sqrt(cStd / n);
double ncc = (tStd > 0 && cStd > 0) ? cross / (n * tStd * cStd) : 0;
if (debug && ncc > 0.5)
Console.Error.WriteLine($" ({row},{col}): NCC={ncc:F3}");
if (ncc >= matchThreshold)
matches.Add(new GridMatch { Row = row, Col = col, Similarity = Math.Round(ncc, 3) });
}
}
if (debug) Console.Error.WriteLine($" Matches for ({targetRow},{targetCol}): {matches.Count}");
return matches;
}
private void LoadTemplatesIfNeeded()
{
if (_emptyTemplate70Gray != null) return;
// Look for templates relative to exe directory
var exeDir = AppContext.BaseDirectory;
// Templates are in assets/ at project root — walk up from bin/Release/net8.0-.../
var projectRoot = Path.GetFullPath(Path.Combine(exeDir, "..", "..", "..", "..", ".."));
var t70Path = Path.Combine(projectRoot, "assets", "empty70.png");
var t35Path = Path.Combine(projectRoot, "assets", "empty35.png");
if (File.Exists(t70Path))
{
using var bmp = new Bitmap(t70Path);
_emptyTemplate70W = bmp.Width;
_emptyTemplate70H = bmp.Height;
(_emptyTemplate70Gray, _emptyTemplate70Argb, _emptyTemplate70Stride) = ImageUtils.BitmapToGrayAndArgb(bmp);
}
if (File.Exists(t35Path))
{
using var bmp = new Bitmap(t35Path);
_emptyTemplate35W = bmp.Width;
_emptyTemplate35H = bmp.Height;
(_emptyTemplate35Gray, _emptyTemplate35Argb, _emptyTemplate35Stride) = ImageUtils.BitmapToGrayAndArgb(bmp);
}
}
}

View file

@ -1,234 +0,0 @@
namespace OcrDaemon;
using System.Drawing;
using OpenCvSharp;
using OpenCvSharp.Extensions;
static class ImagePreprocessor
{
/// <summary>
/// Pre-process an image for OCR using morphological white top-hat filtering.
/// Isolates bright tooltip text, suppresses dim background text visible through overlay.
/// Pipeline: grayscale → morphological top-hat → Otsu binary → upscale
/// </summary>
public static Bitmap PreprocessForOcr(Bitmap src, int kernelSize = 41, int upscale = 2)
{
using var mat = BitmapConverter.ToMat(src);
using var gray = new Mat();
Cv2.CvtColor(mat, gray, ColorConversionCodes.BGRA2GRAY);
// Morphological white top-hat: isolates bright text on dark background
using var kernel = Cv2.GetStructuringElement(MorphShapes.Rect, new OpenCvSharp.Size(kernelSize, kernelSize));
using var tophat = new Mat();
Cv2.MorphologyEx(gray, tophat, MorphTypes.TopHat, kernel);
// Otsu binarization: automatic threshold, black text on white
using var binary = new Mat();
Cv2.Threshold(tophat, binary, 0, 255, ThresholdTypes.BinaryInv | ThresholdTypes.Otsu);
// Upscale for better LSTM recognition
if (upscale > 1)
{
using var upscaled = new Mat();
Cv2.Resize(binary, upscaled, new OpenCvSharp.Size(binary.Width * upscale, binary.Height * upscale),
interpolation: InterpolationFlags.Cubic);
return BitmapConverter.ToBitmap(upscaled);
}
return BitmapConverter.ToBitmap(binary);
}
/// <summary>
/// Background-subtraction preprocessing: uses the reference frame to remove
/// background bleed-through from the semi-transparent tooltip overlay.
/// Pipeline: estimate dimming factor → subtract expected background → threshold → upscale
/// Returns the upscaled binary Mat directly (caller must dispose).
/// </summary>
public static Mat PreprocessWithBackgroundSubMat(Bitmap tooltipCrop, Bitmap referenceCrop,
int dimPercentile = 25, int textThresh = 30, int upscale = 2, bool softThreshold = true)
{
using var curMat = BitmapConverter.ToMat(tooltipCrop);
using var refMat = BitmapConverter.ToMat(referenceCrop);
using var curGray = new Mat();
using var refGray = new Mat();
Cv2.CvtColor(curMat, curGray, ColorConversionCodes.BGRA2GRAY);
Cv2.CvtColor(refMat, refGray, ColorConversionCodes.BGRA2GRAY);
int rows = curGray.Rows, cols = curGray.Cols;
// Estimate the dimming factor of the tooltip overlay.
// For non-text pixels: current ≈ reference × dim_factor
// Collect ratios where reference is bright enough to be meaningful
var ratios = new List<double>();
unsafe
{
byte* curPtr = (byte*)curGray.Data;
byte* refPtr = (byte*)refGray.Data;
int curStep = (int)curGray.Step();
int refStep = (int)refGray.Step();
for (int y = 0; y < rows; y++)
for (int x = 0; x < cols; x++)
{
byte r = refPtr[y * refStep + x];
byte c = curPtr[y * curStep + x];
if (r > 30) // skip very dark reference pixels (no signal)
ratios.Add((double)c / r);
}
}
if (ratios.Count == 0)
{
// Fallback: use top-hat preprocessing, convert to Mat
using var fallbackBmp = PreprocessForOcr(tooltipCrop, 41, upscale);
return BitmapConverter.ToMat(fallbackBmp);
}
// Use a low percentile of ratios as the dimming factor.
// Text pixels have high ratios (bright on dark), overlay pixels have low ratios.
// A low percentile captures the overlay dimming, ignoring text.
ratios.Sort();
int idx = Math.Clamp(ratios.Count * dimPercentile / 100, 0, ratios.Count - 1);
double dimFactor = ratios[idx];
// Clamp to sane range
dimFactor = Math.Clamp(dimFactor, 0.05, 0.95);
// Subtract expected background: text_signal = current - reference × dimFactor
using var textSignal = new Mat(rows, cols, MatType.CV_8UC1);
unsafe
{
byte* curPtr = (byte*)curGray.Data;
byte* refPtr = (byte*)refGray.Data;
byte* outPtr = (byte*)textSignal.Data;
int curStep = (int)curGray.Step();
int refStep = (int)refGray.Step();
int outStep = (int)textSignal.Step();
for (int y = 0; y < rows; y++)
for (int x = 0; x < cols; x++)
{
double expected = refPtr[y * refStep + x] * dimFactor;
double signal = curPtr[y * curStep + x] - expected;
outPtr[y * outStep + x] = (byte)Math.Clamp(signal, 0, 255);
}
}
Mat result;
if (softThreshold)
{
// Soft threshold: clip below textThresh, contrast-stretch, invert.
// Produces grayscale anti-aliased text on white background,
// matching the training data format (text2image renders).
result = new Mat(rows, cols, MatType.CV_8UC1);
unsafe
{
byte* srcPtr = (byte*)textSignal.Data;
byte* dstPtr = (byte*)result.Data;
int srcStep = (int)textSignal.Step();
int dstStep = (int)result.Step();
// Find max signal above threshold for contrast stretch
int maxClipped = 1;
for (int y = 0; y < rows; y++)
for (int x = 0; x < cols; x++)
{
int val = srcPtr[y * srcStep + x] - textThresh;
if (val > maxClipped) maxClipped = val;
}
// Clip, stretch, invert: background → 255 (white), text → dark
for (int y = 0; y < rows; y++)
for (int x = 0; x < cols; x++)
{
int clipped = srcPtr[y * srcStep + x] - textThresh;
if (clipped <= 0)
{
dstPtr[y * dstStep + x] = 255; // background
}
else
{
int stretched = clipped * 255 / maxClipped;
dstPtr[y * dstStep + x] = (byte)(255 - stretched); // invert
}
}
}
}
else
{
// Hard binary threshold (original behavior)
result = new Mat();
Cv2.Threshold(textSignal, result, textThresh, 255, ThresholdTypes.BinaryInv);
}
using var _result = result;
return UpscaleMat(result, upscale);
}
/// <summary>
/// Background-subtraction preprocessing returning a Bitmap (convenience wrapper).
/// </summary>
public static Bitmap PreprocessWithBackgroundSub(Bitmap tooltipCrop, Bitmap referenceCrop,
int dimPercentile = 25, int textThresh = 30, int upscale = 2, bool softThreshold = true)
{
using var mat = PreprocessWithBackgroundSubMat(tooltipCrop, referenceCrop, dimPercentile, textThresh, upscale, softThreshold);
return BitmapConverter.ToBitmap(mat);
}
/// <summary>
/// Detect text lines via horizontal projection on a binary image.
/// Binary should be inverted: text=black(0), background=white(255).
/// Returns list of (yStart, yEnd) row ranges for each detected text line.
/// </summary>
public static List<(int yStart, int yEnd)> DetectTextLines(
Mat binary, int minRowPixels = 2, int gapTolerance = 5)
{
int rows = binary.Rows, cols = binary.Cols;
// Count dark (text) pixels per row — use < 128 threshold since
// cubic upscaling introduces anti-aliased intermediate values
var rowCounts = new int[rows];
unsafe
{
byte* ptr = (byte*)binary.Data;
int step = (int)binary.Step();
for (int y = 0; y < rows; y++)
for (int x = 0; x < cols; x++)
if (ptr[y * step + x] < 128)
rowCounts[y]++;
}
// Group into contiguous runs with gap tolerance
var lines = new List<(int yStart, int yEnd)>();
int lineStart = -1, lastActive = -1;
for (int y = 0; y < rows; y++)
{
if (rowCounts[y] >= minRowPixels)
{
if (lineStart < 0) lineStart = y;
lastActive = y;
}
else if (lineStart >= 0 && y - lastActive > gapTolerance)
{
lines.Add((lineStart, lastActive));
lineStart = -1;
}
}
if (lineStart >= 0)
lines.Add((lineStart, lastActive));
return lines;
}
/// <summary>Returns a new Mat (caller must dispose). Does NOT dispose src.</summary>
private static Mat UpscaleMat(Mat src, int factor)
{
if (factor > 1)
{
var upscaled = new Mat();
Cv2.Resize(src, upscaled, new OpenCvSharp.Size(src.Width * factor, src.Height * factor),
interpolation: InterpolationFlags.Cubic);
return upscaled;
}
return src.Clone();
}
}

View file

@ -1,89 +0,0 @@
namespace OcrDaemon;
using System.Drawing;
using System.Drawing.Imaging;
using System.Runtime.InteropServices;
using Tesseract;
using SdImageFormat = System.Drawing.Imaging.ImageFormat;
static class ImageUtils
{
public static Pix BitmapToPix(Bitmap bitmap)
{
using var ms = new MemoryStream();
bitmap.Save(ms, SdImageFormat.Png);
return Pix.LoadFromMemory(ms.ToArray());
}
public static List<OcrLineResult> ExtractLinesFromPage(Page page, int offsetX, int offsetY)
{
var lines = new List<OcrLineResult>();
using var iter = page.GetIterator();
if (iter == null) return lines;
iter.Begin();
do
{
var words = new List<OcrWordResult>();
do
{
var wordText = iter.GetText(PageIteratorLevel.Word);
if (string.IsNullOrWhiteSpace(wordText)) continue;
float conf = iter.GetConfidence(PageIteratorLevel.Word);
if (conf < 50) continue; // reject low-confidence garbage from background bleed
if (iter.TryGetBoundingBox(PageIteratorLevel.Word, out var bounds))
{
words.Add(new OcrWordResult
{
Text = wordText.Trim(),
X = bounds.X1 + offsetX,
Y = bounds.Y1 + offsetY,
Width = bounds.Width,
Height = bounds.Height,
});
}
} while (iter.Next(PageIteratorLevel.TextLine, PageIteratorLevel.Word));
if (words.Count > 0)
{
var lineText = string.Join(" ", words.Select(w => w.Text));
lines.Add(new OcrLineResult { Text = lineText, Words = words });
}
} while (iter.Next(PageIteratorLevel.Block, PageIteratorLevel.TextLine));
return lines;
}
public static (byte[] gray, byte[] argb, int stride) BitmapToGrayAndArgb(Bitmap bmp)
{
int w = bmp.Width, h = bmp.Height;
var data = bmp.LockBits(new Rectangle(0, 0, w, h), ImageLockMode.ReadOnly, PixelFormat.Format32bppArgb);
byte[] argb = new byte[data.Stride * h];
Marshal.Copy(data.Scan0, argb, 0, argb.Length);
bmp.UnlockBits(data);
int stride = data.Stride;
byte[] gray = new byte[w * h];
for (int y = 0; y < h; y++)
for (int x = 0; x < w; x++)
{
int i = y * stride + x * 4;
gray[y * w + x] = (byte)((argb[i] + argb[i + 1] + argb[i + 2]) / 3);
}
return (gray, argb, stride);
}
public static SdImageFormat GetImageFormat(string path)
{
var ext = Path.GetExtension(path).ToLowerInvariant();
return ext switch
{
".jpg" or ".jpeg" => SdImageFormat.Jpeg,
".bmp" => SdImageFormat.Bmp,
_ => SdImageFormat.Png,
};
}
}

View file

@ -1,548 +0,0 @@
namespace OcrDaemon;
using System.Text.Json.Serialization;
class Request
{
[JsonPropertyName("cmd")]
public string? Cmd { get; set; }
[JsonPropertyName("region")]
public RegionRect? Region { get; set; }
[JsonPropertyName("path")]
public string? Path { get; set; }
[JsonPropertyName("cols")]
public int Cols { get; set; }
[JsonPropertyName("rows")]
public int Rows { get; set; }
[JsonPropertyName("threshold")]
public int Threshold { get; set; }
[JsonPropertyName("minCellSize")]
public int MinCellSize { get; set; }
[JsonPropertyName("maxCellSize")]
public int MaxCellSize { get; set; }
[JsonPropertyName("file")]
public string? File { get; set; }
[JsonPropertyName("debug")]
public bool Debug { get; set; }
[JsonPropertyName("targetRow")]
public int TargetRow { get; set; } = -1;
[JsonPropertyName("targetCol")]
public int TargetCol { get; set; } = -1;
[JsonPropertyName("engine")]
public string? Engine { get; set; }
[JsonPropertyName("preprocess")]
public string? Preprocess { get; set; }
[JsonPropertyName("params")]
public DiffOcrParams? Params { get; set; }
[JsonPropertyName("edgeParams")]
public EdgeOcrParams? EdgeParams { get; set; }
[JsonPropertyName("cursorX")]
public int? CursorX { get; set; }
[JsonPropertyName("cursorY")]
public int? CursorY { get; set; }
}
class RegionRect
{
[JsonPropertyName("x")]
public int X { get; set; }
[JsonPropertyName("y")]
public int Y { get; set; }
[JsonPropertyName("width")]
public int Width { get; set; }
[JsonPropertyName("height")]
public int Height { get; set; }
}
class ReadyResponse
{
[JsonPropertyName("ok")]
public bool Ok => true;
[JsonPropertyName("ready")]
public bool Ready => true;
}
class OkResponse
{
[JsonPropertyName("ok")]
public bool Ok => true;
}
class ErrorResponse(string message)
{
[JsonPropertyName("ok")]
public bool Ok => false;
[JsonPropertyName("error")]
public string Error => message;
}
class OcrResponse
{
[JsonPropertyName("ok")]
public bool Ok => true;
[JsonPropertyName("text")]
public string Text { get; set; } = "";
[JsonPropertyName("lines")]
public List<OcrLineResult> Lines { get; set; } = [];
}
class DiffOcrResponse
{
[JsonPropertyName("ok")]
public bool Ok => true;
[JsonPropertyName("text")]
public string Text { get; set; } = "";
[JsonPropertyName("lines")]
public List<OcrLineResult> Lines { get; set; } = [];
[JsonPropertyName("region")]
public RegionRect? Region { get; set; }
}
class OcrLineResult
{
[JsonPropertyName("text")]
public string Text { get; set; } = "";
[JsonPropertyName("words")]
public List<OcrWordResult> Words { get; set; } = [];
}
class OcrWordResult
{
[JsonPropertyName("text")]
public string Text { get; set; } = "";
[JsonPropertyName("x")]
public int X { get; set; }
[JsonPropertyName("y")]
public int Y { get; set; }
[JsonPropertyName("width")]
public int Width { get; set; }
[JsonPropertyName("height")]
public int Height { get; set; }
}
class CaptureResponse
{
[JsonPropertyName("ok")]
public bool Ok => true;
[JsonPropertyName("image")]
public string Image { get; set; } = "";
}
class GridResponse
{
[JsonPropertyName("ok")]
public bool Ok => true;
[JsonPropertyName("cells")]
public List<List<bool>> Cells { get; set; } = [];
[JsonPropertyName("items")]
public List<GridItem>? Items { get; set; }
[JsonPropertyName("matches")]
public List<GridMatch>? Matches { get; set; }
}
class GridItem
{
[JsonPropertyName("row")]
public int Row { get; set; }
[JsonPropertyName("col")]
public int Col { get; set; }
[JsonPropertyName("w")]
public int W { get; set; }
[JsonPropertyName("h")]
public int H { get; set; }
}
class GridMatch
{
[JsonPropertyName("row")]
public int Row { get; set; }
[JsonPropertyName("col")]
public int Col { get; set; }
[JsonPropertyName("similarity")]
public double Similarity { get; set; }
}
class DetectGridResponse
{
[JsonPropertyName("ok")]
public bool Ok => true;
[JsonPropertyName("detected")]
public bool Detected { get; set; }
[JsonPropertyName("region")]
public RegionRect? Region { get; set; }
[JsonPropertyName("cols")]
public int Cols { get; set; }
[JsonPropertyName("rows")]
public int Rows { get; set; }
[JsonPropertyName("cellWidth")]
public double CellWidth { get; set; }
[JsonPropertyName("cellHeight")]
public double CellHeight { get; set; }
}
class TemplateMatchResponse
{
[JsonPropertyName("ok")]
public bool Ok => true;
[JsonPropertyName("found")]
public bool Found { get; set; }
[JsonPropertyName("x")]
public int X { get; set; }
[JsonPropertyName("y")]
public int Y { get; set; }
[JsonPropertyName("width")]
public int Width { get; set; }
[JsonPropertyName("height")]
public int Height { get; set; }
[JsonPropertyName("confidence")]
public double Confidence { get; set; }
}
sealed class DiffCropParams
{
[JsonPropertyName("diffThresh")]
public int DiffThresh { get; set; } = 20;
[JsonPropertyName("rowThreshDiv")]
public int RowThreshDiv { get; set; } = 40;
[JsonPropertyName("colThreshDiv")]
public int ColThreshDiv { get; set; } = 8;
[JsonPropertyName("maxGap")]
public int MaxGap { get; set; } = 20;
[JsonPropertyName("trimCutoff")]
public double TrimCutoff { get; set; } = 0.4;
[JsonPropertyName("ocrPad")]
public int OcrPad { get; set; } = 10;
public override string ToString() =>
$"diffThresh={DiffThresh} maxGap={MaxGap} trimCutoff={TrimCutoff:F2} rowThreshDiv={RowThreshDiv} colThreshDiv={ColThreshDiv} ocrPad={OcrPad}";
}
sealed class OcrParams
{
// preprocessing
[JsonPropertyName("kernelSize")]
public int KernelSize { get; set; } = 41;
[JsonPropertyName("upscale")]
public int Upscale { get; set; } = 2;
[JsonPropertyName("useBackgroundSub")]
public bool UseBackgroundSub { get; set; } = true;
[JsonPropertyName("dimPercentile")]
public int DimPercentile { get; set; } = 40;
[JsonPropertyName("textThresh")]
public int TextThresh { get; set; } = 60;
[JsonPropertyName("softThreshold")]
public bool SoftThreshold { get; set; } = false;
// Tesseract-specific
[JsonPropertyName("usePerLineOcr")]
public bool UsePerLineOcr { get; set; } = false;
[JsonPropertyName("lineGapTolerance")]
public int LineGapTolerance { get; set; } = 10;
[JsonPropertyName("linePadY")]
public int LinePadY { get; set; } = 20;
[JsonPropertyName("psm")]
public int Psm { get; set; } = 6;
// post-merge / Python engine tuning
[JsonPropertyName("mergeGap")]
public int MergeGap { get; set; } = 0;
[JsonPropertyName("linkThreshold")]
public double? LinkThreshold { get; set; }
[JsonPropertyName("textThreshold")]
public double? TextThreshold { get; set; }
[JsonPropertyName("lowText")]
public double? LowText { get; set; }
[JsonPropertyName("widthThs")]
public double? WidthThs { get; set; }
[JsonPropertyName("paragraph")]
public bool? Paragraph { get; set; }
public override string ToString() =>
UseBackgroundSub
? $"bgSub dimPct={DimPercentile} textThresh={TextThresh} soft={SoftThreshold} upscale={Upscale} mergeGap={MergeGap}"
: $"topHat kernel={KernelSize} upscale={Upscale} mergeGap={MergeGap}";
}
sealed class DiffOcrParams
{
[JsonPropertyName("crop")]
public DiffCropParams Crop { get; set; } = new();
[JsonPropertyName("ocr")]
public OcrParams Ocr { get; set; } = new();
public override string ToString() => $"[{Crop}] [{Ocr}]";
}
sealed class EdgeCropParams
{
[JsonPropertyName("darkThresh")]
public int DarkThresh { get; set; } = 40;
[JsonPropertyName("minDarkRun")]
public int MinDarkRun { get; set; } = 200;
[JsonPropertyName("runGapTolerance")]
public int RunGapTolerance { get; set; } = 15;
[JsonPropertyName("rowThreshDiv")]
public int RowThreshDiv { get; set; } = 40;
[JsonPropertyName("colThreshDiv")]
public int ColThreshDiv { get; set; } = 8;
[JsonPropertyName("maxGap")]
public int MaxGap { get; set; } = 15;
[JsonPropertyName("trimCutoff")]
public double TrimCutoff { get; set; } = 0.3;
[JsonPropertyName("ocrPad")]
public int OcrPad { get; set; } = 10;
public override string ToString() =>
$"darkThresh={DarkThresh} minRun={MinDarkRun} runGap={RunGapTolerance} maxGap={MaxGap} trimCutoff={TrimCutoff:F2} rowDiv={RowThreshDiv} colDiv={ColThreshDiv}";
}
sealed class EdgeOcrParams
{
[JsonPropertyName("crop")]
public EdgeCropParams Crop { get; set; } = new();
[JsonPropertyName("ocr")]
public OcrParams Ocr { get; set; } = new();
public override string ToString() => $"[{Crop}] [{Ocr}]";
}
class TestCase
{
[JsonPropertyName("id")]
public string Id { get; set; } = "";
[JsonPropertyName("image")]
public string Image { get; set; } = "";
[JsonPropertyName("fullImage")]
public string FullImage { get; set; } = "";
[JsonPropertyName("expected")]
public List<string> Expected { get; set; } = [];
}
class TestCaseResult
{
[JsonPropertyName("id")]
public string Id { get; set; } = "";
[JsonPropertyName("passed")]
public bool Passed { get; set; }
[JsonPropertyName("score")]
public double Score { get; set; }
[JsonPropertyName("matched")]
public List<string> Matched { get; set; } = [];
[JsonPropertyName("missed")]
public List<string> Missed { get; set; } = [];
[JsonPropertyName("extra")]
public List<string> Extra { get; set; } = [];
}
class TestResponse
{
[JsonPropertyName("ok")]
public bool Ok => true;
[JsonPropertyName("passed")]
public int Passed { get; set; }
[JsonPropertyName("failed")]
public int Failed { get; set; }
[JsonPropertyName("total")]
public int Total { get; set; }
[JsonPropertyName("results")]
public List<TestCaseResult> Results { get; set; } = [];
}
class TuneResponse
{
[JsonPropertyName("ok")]
public bool Ok => true;
[JsonPropertyName("bestScore")]
public double BestScore { get; set; }
[JsonPropertyName("bestParams")]
public DiffOcrParams BestParams { get; set; } = new();
[JsonPropertyName("iterations")]
public int Iterations { get; set; }
}
// ── Crop test models ────────────────────────────────────────────────────────
class PointXY
{
[JsonPropertyName("x")]
public int X { get; set; }
[JsonPropertyName("y")]
public int Y { get; set; }
}
class CropTestCase
{
[JsonPropertyName("id")]
public string Id { get; set; } = "";
[JsonPropertyName("image")]
public string Image { get; set; } = "";
[JsonPropertyName("snapshotImage")]
public string SnapshotImage { get; set; } = "";
[JsonPropertyName("topLeft")]
public PointXY TopLeft { get; set; } = new();
[JsonPropertyName("bottomRight")]
public PointXY BottomRight { get; set; } = new();
[JsonPropertyName("cursorX")]
public int? CursorX { get; set; }
[JsonPropertyName("cursorY")]
public int? CursorY { get; set; }
}
class CropTestResult
{
[JsonPropertyName("id")]
public string Id { get; set; } = "";
[JsonPropertyName("iou")]
public double IoU { get; set; }
[JsonPropertyName("expected")]
public RegionRect Expected { get; set; } = new();
[JsonPropertyName("actual")]
public RegionRect? Actual { get; set; }
[JsonPropertyName("deltaTop")]
public int DeltaTop { get; set; }
[JsonPropertyName("deltaLeft")]
public int DeltaLeft { get; set; }
[JsonPropertyName("deltaRight")]
public int DeltaRight { get; set; }
[JsonPropertyName("deltaBottom")]
public int DeltaBottom { get; set; }
}
class CropTestResponse
{
[JsonPropertyName("ok")]
public bool Ok => true;
[JsonPropertyName("method")]
public string Method { get; set; } = "";
[JsonPropertyName("avgIoU")]
public double AvgIoU { get; set; }
[JsonPropertyName("results")]
public List<CropTestResult> Results { get; set; } = [];
}
class CropTuneResponse
{
[JsonPropertyName("ok")]
public bool Ok => true;
[JsonPropertyName("bestAvgIoU")]
public double BestAvgIoU { get; set; }
[JsonPropertyName("bestParams")]
public DiffCropParams BestParams { get; set; } = new();
[JsonPropertyName("iterations")]
public int Iterations { get; set; }
}

View file

@ -1,43 +0,0 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net8.0-windows10.0.19041.0</TargetFramework>
<Nullable>enable</Nullable>
<ImplicitUsings>enable</ImplicitUsings>
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="OpenCvSharp4" Version="4.11.0.20250507" />
<PackageReference Include="OpenCvSharp4.Extensions" Version="4.11.0.20250507" />
<PackageReference Include="OpenCvSharp4.runtime.win" Version="4.11.0.20250507" />
<PackageReference Include="System.Drawing.Common" Version="8.0.12" />
<PackageReference Include="Tesseract" Version="5.2.0" />
</ItemGroup>
<ItemGroup>
<None Update="tessdata\eng.traineddata">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Update="tessdata\poe2.traineddata" Condition="Exists('tessdata\poe2.traineddata')">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Update="tessdata\cases.json">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Update="tessdata\crop.json">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Update="tessdata\poe2.user-words" Condition="Exists('tessdata\poe2.user-words')">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Update="tessdata\poe2.user-patterns" Condition="Exists('tessdata\poe2.user-patterns')">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Include="tessdata\images\*">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
</ItemGroup>
</Project>

View file

@ -1,916 +0,0 @@
namespace OcrDaemon;
using System.Drawing;
using System.Drawing.Imaging;
using System.Runtime.InteropServices;
using System.Threading;
using System.Threading.Tasks;
using System.Text.Json;
using OpenCvSharp;
using OpenCvSharp.Extensions;
using Tesseract;
using SdImageFormat = System.Drawing.Imaging.ImageFormat;
class OcrHandler(TesseractEngine engine)
{
private Bitmap? _referenceFrame;
private RegionRect? _referenceRegion;
public object HandleOcr(Request req)
{
using var bitmap = ScreenCapture.CaptureOrLoad(req.File, req.Region);
using var pix = ImageUtils.BitmapToPix(bitmap);
using var page = engine.Process(pix);
var text = page.GetText();
var lines = ImageUtils.ExtractLinesFromPage(page, offsetX: 0, offsetY: 0);
return new OcrResponse { Text = text, Lines = lines };
}
public object HandleScreenshot(Request req)
{
if (string.IsNullOrEmpty(req.Path))
return new ErrorResponse("screenshot command requires 'path'");
// If a reference frame exists, save that (same image used for diff-ocr).
// Otherwise capture a new frame.
var bitmap = _referenceFrame ?? ScreenCapture.CaptureOrLoad(req.File, req.Region);
var format = ImageUtils.GetImageFormat(req.Path);
var dir = Path.GetDirectoryName(req.Path);
if (!string.IsNullOrEmpty(dir) && !Directory.Exists(dir))
Directory.CreateDirectory(dir);
bitmap.Save(req.Path, format);
if (bitmap != _referenceFrame) bitmap.Dispose();
return new OkResponse();
}
public object HandleCapture(Request req)
{
using var bitmap = ScreenCapture.CaptureOrLoad(req.File, req.Region);
using var ms = new MemoryStream();
bitmap.Save(ms, SdImageFormat.Png);
var base64 = Convert.ToBase64String(ms.ToArray());
return new CaptureResponse { Image = base64 };
}
public object HandleSnapshot(Request req)
{
_referenceFrame?.Dispose();
_referenceFrame = ScreenCapture.CaptureOrLoad(req.File, req.Region);
_referenceRegion = req.Region == null
? null
: new RegionRect { X = req.Region.X, Y = req.Region.Y, Width = req.Region.Width, Height = req.Region.Height };
return new OkResponse();
}
public object HandleDiffOcr(Request req) => HandleDiffOcr(req, req.Threshold > 0
? new DiffOcrParams { Crop = new DiffCropParams { DiffThresh = req.Threshold } }
: new DiffOcrParams());
/// <summary>
/// Diff detection + crop only. Returns the raw tooltip crop bitmap and region,
/// or null if no tooltip detected. Caller is responsible for disposing the bitmap.
/// </summary>
public (Bitmap cropped, Bitmap refCropped, Bitmap current, RegionRect region)? DiffCrop(Request req, DiffCropParams c)
{
if (_referenceFrame == null)
return null;
var diffRegion = req.Region ?? _referenceRegion;
int baseX = diffRegion?.X ?? 0;
int baseY = diffRegion?.Y ?? 0;
var current = ScreenCapture.CaptureOrLoad(req.File, diffRegion);
Bitmap refForDiff = _referenceFrame;
bool disposeRef = false;
if (diffRegion != null)
{
if (_referenceRegion == null)
{
var croppedRef = CropBitmap(_referenceFrame, diffRegion);
if (croppedRef == null)
{
current.Dispose();
return null;
}
refForDiff = croppedRef;
disposeRef = true;
}
else if (!RegionsEqual(diffRegion, _referenceRegion))
{
int offX = diffRegion.X - _referenceRegion.X;
int offY = diffRegion.Y - _referenceRegion.Y;
if (offX < 0 || offY < 0 || offX + diffRegion.Width > _referenceFrame.Width || offY + diffRegion.Height > _referenceFrame.Height)
{
current.Dispose();
return null;
}
var croppedRef = CropBitmap(_referenceFrame, new RegionRect
{
X = offX,
Y = offY,
Width = diffRegion.Width,
Height = diffRegion.Height,
});
if (croppedRef == null)
{
current.Dispose();
return null;
}
refForDiff = croppedRef;
disposeRef = true;
}
}
int w = Math.Min(refForDiff.Width, current.Width);
int h = Math.Min(refForDiff.Height, current.Height);
var refData = refForDiff.LockBits(new Rectangle(0, 0, w, h), ImageLockMode.ReadOnly, PixelFormat.Format32bppArgb);
byte[] refPx = new byte[refData.Stride * h];
Marshal.Copy(refData.Scan0, refPx, 0, refPx.Length);
refForDiff.UnlockBits(refData);
int stride = refData.Stride;
var curData = current.LockBits(new Rectangle(0, 0, w, h), ImageLockMode.ReadOnly, PixelFormat.Format32bppArgb);
byte[] curPx = new byte[curData.Stride * h];
Marshal.Copy(curData.Scan0, curPx, 0, curPx.Length);
current.UnlockBits(curData);
int diffThresh = c.DiffThresh;
// Pass 1: parallel row diff — compute rowCounts[] directly, no changed[] array
int[] rowCounts = new int[h];
Parallel.For(0, h, y =>
{
int count = 0;
int rowOffset = y * stride;
for (int x = 0; x < w; x++)
{
int i = rowOffset + x * 4;
int darker = (refPx[i] - curPx[i]) + (refPx[i + 1] - curPx[i + 1]) + (refPx[i + 2] - curPx[i + 2]);
if (darker > diffThresh)
count++;
}
rowCounts[y] = count;
});
int totalChanged = 0;
for (int y = 0; y < h; y++) totalChanged += rowCounts[y];
if (totalChanged == 0)
{
current.Dispose();
if (disposeRef) refForDiff.Dispose();
return null;
}
int maxGap = c.MaxGap;
int rowThresh = w / c.RowThreshDiv;
int bestRowStart = 0, bestRowEnd = 0, bestRowLen = 0;
int curRowStart = -1, lastActiveRow = -1;
for (int y = 0; y < h; y++)
{
if (rowCounts[y] >= rowThresh)
{
if (curRowStart < 0) curRowStart = y;
lastActiveRow = y;
}
else if (curRowStart >= 0 && y - lastActiveRow > maxGap)
{
int len = lastActiveRow - curRowStart + 1;
if (len > bestRowLen) { bestRowStart = curRowStart; bestRowEnd = lastActiveRow; bestRowLen = len; }
curRowStart = -1;
}
}
if (curRowStart >= 0)
{
int len = lastActiveRow - curRowStart + 1;
if (len > bestRowLen) { bestRowStart = curRowStart; bestRowEnd = lastActiveRow; bestRowLen = len; }
}
// Pass 2: parallel column diff — only within the row range, recompute from raw pixels
int[] colCounts = new int[w];
int rowRangeLen = bestRowEnd - bestRowStart + 1;
if (rowRangeLen <= 200)
{
for (int y = bestRowStart; y <= bestRowEnd; y++)
{
int rowOffset = y * stride;
for (int x = 0; x < w; x++)
{
int i = rowOffset + x * 4;
int darker = (refPx[i] - curPx[i]) + (refPx[i + 1] - curPx[i + 1]) + (refPx[i + 2] - curPx[i + 2]);
if (darker > diffThresh)
colCounts[x]++;
}
}
}
else
{
Parallel.For(bestRowStart, bestRowEnd + 1,
() => new int[w],
(y, _, localCols) =>
{
int rowOffset = y * stride;
for (int x = 0; x < w; x++)
{
int i = rowOffset + x * 4;
int darker = (refPx[i] - curPx[i]) + (refPx[i + 1] - curPx[i + 1]) + (refPx[i + 2] - curPx[i + 2]);
if (darker > diffThresh)
localCols[x]++;
}
return localCols;
},
localCols =>
{
for (int x = 0; x < w; x++)
Interlocked.Add(ref colCounts[x], localCols[x]);
});
}
int tooltipHeight = bestRowEnd - bestRowStart + 1;
int colThresh = tooltipHeight / c.ColThreshDiv;
int bestColStart = 0, bestColEnd = 0, bestColLen = 0;
int curColStart = -1, lastActiveCol = -1;
for (int x = 0; x < w; x++)
{
if (colCounts[x] >= colThresh)
{
if (curColStart < 0) curColStart = x;
lastActiveCol = x;
}
else if (curColStart >= 0 && x - lastActiveCol > maxGap)
{
int len = lastActiveCol - curColStart + 1;
if (len > bestColLen) { bestColStart = curColStart; bestColEnd = lastActiveCol; bestColLen = len; }
curColStart = -1;
}
}
if (curColStart >= 0)
{
int len = lastActiveCol - curColStart + 1;
if (len > bestColLen) { bestColStart = curColStart; bestColEnd = lastActiveCol; bestColLen = len; }
}
Console.Error.WriteLine($" diff-ocr: changed={totalChanged} rows={bestRowStart}-{bestRowEnd}({bestRowLen}) cols={bestColStart}-{bestColEnd}({bestColLen}) rowThresh={rowThresh} colThresh={colThresh}");
if (bestRowLen < 50 || bestColLen < 50)
{
Console.Error.WriteLine($" diff-ocr: no tooltip-sized region found (rows={bestRowLen}, cols={bestColLen})");
current.Dispose();
if (disposeRef) refForDiff.Dispose();
return null;
}
int minX = bestColStart;
int minY = bestRowStart;
int maxX = Math.Min(bestColEnd, w - 1);
int maxY = Math.Min(bestRowEnd, h - 1);
// Boundary extension: scan outward from detected edges with a relaxed threshold
// to capture low-signal regions (e.g. ornamental tooltip headers)
int extRowThresh = Math.Max(1, rowThresh / 4);
int extColThresh = Math.Max(1, colThresh / 4);
int extTop = Math.Max(0, minY - maxGap);
for (int y = minY - 1; y >= extTop; y--)
{
if (rowCounts[y] >= extRowThresh) minY = y;
else break;
}
int extBottom = Math.Min(h - 1, maxY + maxGap);
for (int y = maxY + 1; y <= extBottom; y++)
{
if (rowCounts[y] >= extRowThresh) maxY = y;
else break;
}
int extLeft = Math.Max(0, minX - maxGap);
for (int x = minX - 1; x >= extLeft; x--)
{
if (colCounts[x] >= extColThresh) minX = x;
else break;
}
int extRight = Math.Min(w - 1, maxX + maxGap);
for (int x = maxX + 1; x <= extRight; x++)
{
if (colCounts[x] >= extColThresh) maxX = x;
else break;
}
// Trim low-density edges on both axes to avoid oversized crops.
int colSpan = maxX - minX + 1;
if (colSpan > 50)
{
int q1 = minX + colSpan / 4;
int q3 = minX + colSpan * 3 / 4;
long midSum = 0;
int midCount = 0;
for (int x = q1; x <= q3; x++) { midSum += colCounts[x]; midCount++; }
double avgMidDensity = (double)midSum / Math.Max(1, midCount);
double cutoff = avgMidDensity * c.TrimCutoff;
while (minX < maxX - 50 && colCounts[minX] < cutoff)
minX++;
while (maxX > minX + 50 && colCounts[maxX] < cutoff)
maxX--;
}
int rowSpan = maxY - minY + 1;
if (rowSpan > 50)
{
int q1 = minY + rowSpan / 4;
int q3 = minY + rowSpan * 3 / 4;
long midSum = 0;
int midCount = 0;
for (int y = q1; y <= q3; y++) { midSum += rowCounts[y]; midCount++; }
double avgMidDensity = (double)midSum / Math.Max(1, midCount);
double cutoff = avgMidDensity * c.TrimCutoff;
while (minY < maxY - 50 && rowCounts[minY] < cutoff)
minY++;
while (maxY > minY + 50 && rowCounts[maxY] < cutoff)
maxY--;
}
int rw = maxX - minX + 1;
int rh = maxY - minY + 1;
var cropped = CropFromBytes(curPx, stride, minX, minY, rw, rh);
var refCropped = CropFromBytes(refPx, stride, minX, minY, rw, rh);
var region = new RegionRect { X = baseX + minX, Y = baseY + minY, Width = rw, Height = rh };
Console.Error.WriteLine($" diff-ocr: tooltip region ({minX},{minY}) {rw}x{rh}");
if (disposeRef) refForDiff.Dispose();
return (cropped, refCropped, current, region);
}
private static bool RegionsEqual(RegionRect a, RegionRect b) =>
a.X == b.X && a.Y == b.Y && a.Width == b.Width && a.Height == b.Height;
private static Bitmap? CropBitmap(Bitmap src, RegionRect region)
{
int cx = Math.Max(0, region.X);
int cy = Math.Max(0, region.Y);
int cw = Math.Min(region.Width, src.Width - cx);
int ch = Math.Min(region.Height, src.Height - cy);
if (cw <= 0 || ch <= 0)
return null;
return src.Clone(new Rectangle(cx, cy, cw, ch), PixelFormat.Format32bppArgb);
}
public object HandleDiffOcr(Request req, DiffOcrParams p)
{
if (_referenceFrame == null)
return new ErrorResponse("No reference snapshot stored. Send 'snapshot' first.");
var cropResult = DiffCrop(req, p.Crop);
if (cropResult == null)
return new OcrResponse { Text = "", Lines = [] };
var (cropped, refCropped, current, region) = cropResult.Value;
using var _current = current;
using var _cropped = cropped;
using var _refCropped = refCropped;
bool debug = req.Debug;
int minX = region.X, minY = region.Y, rw = region.Width, rh = region.Height;
// Save raw crop if path is provided
if (!string.IsNullOrEmpty(req.Path))
{
var dir = Path.GetDirectoryName(req.Path);
if (!string.IsNullOrEmpty(dir) && !Directory.Exists(dir))
Directory.CreateDirectory(dir);
cropped.Save(req.Path, ImageUtils.GetImageFormat(req.Path));
if (debug) Console.Error.WriteLine($" diff-ocr: saved raw to {req.Path}");
}
// Pre-process for OCR — get Mat for per-line detection and padding
var ocr = p.Ocr;
Mat processedMat;
if (ocr.UseBackgroundSub)
{
processedMat = ImagePreprocessor.PreprocessWithBackgroundSubMat(cropped, refCropped, ocr.DimPercentile, ocr.TextThresh, ocr.Upscale, ocr.SoftThreshold);
}
else
{
using var topHatBmp = ImagePreprocessor.PreprocessForOcr(cropped, ocr.KernelSize, ocr.Upscale);
processedMat = BitmapConverter.ToMat(topHatBmp);
}
using var _processedMat = processedMat; // ensure disposal
// Save fullscreen and preprocessed versions alongside raw
if (!string.IsNullOrEmpty(req.Path))
{
var ext = Path.GetExtension(req.Path);
var fullPath = Path.ChangeExtension(req.Path, ".full" + ext);
current.Save(fullPath, ImageUtils.GetImageFormat(fullPath));
if (debug) Console.Error.WriteLine($" diff-ocr: saved fullscreen to {fullPath}");
var prePath = Path.ChangeExtension(req.Path, ".pre" + ext);
using var preBmp = BitmapConverter.ToBitmap(processedMat);
preBmp.Save(prePath, ImageUtils.GetImageFormat(prePath));
if (debug) Console.Error.WriteLine($" diff-ocr: saved preprocessed to {prePath}");
}
int pad = p.Crop.OcrPad;
int upscale = ocr.Upscale > 0 ? ocr.Upscale : 1;
var lines = new List<OcrLineResult>();
// Per-line OCR: detect text lines via horizontal projection, OCR each individually
if (ocr.UsePerLineOcr)
{
// DetectTextLines needs binary input; if soft threshold produced grayscale, binarize a copy
int minRowPx = Math.Max(processedMat.Cols / 200, 3);
using var detectionMat = ocr.SoftThreshold ? new Mat() : null;
if (ocr.SoftThreshold)
Cv2.Threshold(processedMat, detectionMat!, 128, 255, ThresholdTypes.Binary);
var lineDetectInput = ocr.SoftThreshold ? detectionMat! : processedMat;
var textLines = ImagePreprocessor.DetectTextLines(lineDetectInput, minRowPixels: minRowPx, gapTolerance: ocr.LineGapTolerance * upscale);
if (debug) Console.Error.WriteLine($" diff-ocr: detected {textLines.Count} text lines");
if (textLines.Count > 0)
{
int linePadY = ocr.LinePadY;
foreach (var (yStart, yEnd) in textLines)
{
int y0 = Math.Max(yStart - linePadY, 0);
int y1 = Math.Min(yEnd + linePadY, processedMat.Rows - 1);
int lineH = y1 - y0 + 1;
// Crop line strip (full width)
using var lineStrip = new Mat(processedMat, new OpenCvSharp.Rect(0, y0, processedMat.Cols, lineH));
// Add whitespace padding around the line
using var padded = new Mat();
Cv2.CopyMakeBorder(lineStrip, padded, pad, pad, pad, pad, BorderTypes.Constant, Scalar.White);
using var lineBmp = BitmapConverter.ToBitmap(padded);
using var linePix = ImageUtils.BitmapToPix(lineBmp);
using var linePage = engine.Process(linePix, (PageSegMode)ocr.Psm);
// Extract words, adjusting coordinates back to screen space
// Word coords are in padded image space → subtract pad, add line offset, scale to original, add region offset
var lineWords = new List<OcrWordResult>();
using var iter = linePage.GetIterator();
if (iter != null)
{
iter.Begin();
do
{
var wordText = iter.GetText(PageIteratorLevel.Word);
if (string.IsNullOrWhiteSpace(wordText)) continue;
float conf = iter.GetConfidence(PageIteratorLevel.Word);
if (conf < 50) continue;
if (iter.TryGetBoundingBox(PageIteratorLevel.Word, out var bounds))
{
lineWords.Add(new OcrWordResult
{
Text = wordText.Trim(),
X = (bounds.X1 - pad + 0) / upscale + minX,
Y = (bounds.Y1 - pad + y0) / upscale + minY,
Width = bounds.Width / upscale,
Height = bounds.Height / upscale,
});
}
} while (iter.Next(PageIteratorLevel.TextLine, PageIteratorLevel.Word));
}
if (lineWords.Count > 0)
{
var lineText = string.Join(" ", lineWords.Select(w => w.Text));
lines.Add(new OcrLineResult { Text = lineText, Words = lineWords });
}
}
var text = string.Join("\n", lines.Select(l => l.Text)) + "\n";
return new DiffOcrResponse
{
Text = text,
Lines = lines,
Region = new RegionRect { X = minX, Y = minY, Width = rw, Height = rh },
};
}
if (debug) Console.Error.WriteLine(" diff-ocr: no text lines detected, falling back to whole-block OCR");
}
// Whole-block fallback: add padding and use configurable PSM
{
using var padded = new Mat();
Cv2.CopyMakeBorder(processedMat, padded, pad, pad, pad, pad, BorderTypes.Constant, Scalar.White);
using var bmp = BitmapConverter.ToBitmap(padded);
using var pix = ImageUtils.BitmapToPix(bmp);
using var page = engine.Process(pix, (PageSegMode)ocr.Psm);
var text = page.GetText();
// Adjust word coordinates: subtract padding offset
lines = ImageUtils.ExtractLinesFromPage(page, offsetX: minX - pad / upscale, offsetY: minY - pad / upscale);
return new DiffOcrResponse
{
Text = text,
Lines = lines,
Region = new RegionRect { X = minX, Y = minY, Width = rw, Height = rh },
};
}
}
/// <summary>
/// Run Tesseract OCR on an already-preprocessed bitmap. Converts to Mat, pads,
/// runs PSM-6, and adjusts word coordinates to screen space using the supplied region.
/// </summary>
public DiffOcrResponse RunTesseractOnBitmap(Bitmap processedBmp, RegionRect region, int pad = 10, int upscale = 2, int psm = 6)
{
using var processedMat = BitmapConverter.ToMat(processedBmp);
using var padded = new Mat();
Cv2.CopyMakeBorder(processedMat, padded, pad, pad, pad, pad, BorderTypes.Constant, Scalar.White);
using var bmp = BitmapConverter.ToBitmap(padded);
using var pix = ImageUtils.BitmapToPix(bmp);
using var page = engine.Process(pix, (PageSegMode)psm);
var text = page.GetText();
int effUpscale = upscale > 0 ? upscale : 1;
var lines = ImageUtils.ExtractLinesFromPage(page,
offsetX: region.X - pad / effUpscale,
offsetY: region.Y - pad / effUpscale);
return new DiffOcrResponse
{
Text = text,
Lines = lines,
Region = region,
};
}
public object HandleTest(Request req) => RunTestCases(new DiffOcrParams(), verbose: true);
private static DiffOcrParams CloneParams(DiffOcrParams p)
{
var json = JsonSerializer.Serialize(p);
return JsonSerializer.Deserialize<DiffOcrParams>(json)!;
}
public object HandleTune(Request req)
{
int totalEvals = 0;
// --- Phase A: Tune crop params ---
Console.Error.WriteLine("\n========== Phase A: Crop Params ==========");
var best = new DiffOcrParams();
double bestScore = TuneCropParams(best, ref totalEvals);
// --- Phase B: Tune OCR params (top-hat) ---
Console.Error.WriteLine("\n========== Phase B: OCR — Top-Hat ==========");
var topHat = CloneParams(best);
topHat.Ocr.UseBackgroundSub = false;
double topHatScore = TuneOcrParams(topHat, ref totalEvals, tuneTopHat: true, tuneBgSub: false);
// --- Phase C: Tune OCR params (background-subtraction) ---
Console.Error.WriteLine("\n========== Phase C: OCR — Background Subtraction ==========");
var bgSub = CloneParams(best);
bgSub.Ocr.UseBackgroundSub = true;
double bgSubScore = TuneOcrParams(bgSub, ref totalEvals, tuneTopHat: false, tuneBgSub: true);
// Pick the winner
var winner = bgSubScore > topHatScore ? bgSub : topHat;
double winnerScore = Math.Max(topHatScore, bgSubScore);
Console.Error.WriteLine($"\n========== Result ==========");
Console.Error.WriteLine($" Top-Hat: {topHatScore:F3} {topHat}");
Console.Error.WriteLine($" BgSub: {bgSubScore:F3} {bgSub}");
Console.Error.WriteLine($" Winner: {(winner.Ocr.UseBackgroundSub ? "BgSub" : "TopHat")} evals={totalEvals}\n");
// Final verbose report with best params
RunTestCases(winner, verbose: true);
return new TuneResponse
{
BestScore = winnerScore,
BestParams = winner,
Iterations = totalEvals,
};
}
private double TuneCropParams(DiffOcrParams best, ref int totalEvals)
{
double bestScore = ScoreParams(best);
Console.Error.WriteLine($" baseline score={bestScore:F3} {best}\n");
var cropSweeps = new (string Name, int[] Values, Action<DiffCropParams, int> Set)[]
{
("diffThresh", [10, 15, 20, 25, 30, 40, 50, 60], (c, v) => c.DiffThresh = v),
("rowThreshDiv", [10, 15, 20, 25, 30, 40, 50, 60], (c, v) => c.RowThreshDiv = v),
("colThreshDiv", [5, 8, 10, 12, 15, 20, 25, 30], (c, v) => c.ColThreshDiv = v),
("maxGap", [5, 8, 10, 12, 15, 20, 25, 30], (c, v) => c.MaxGap = v),
("ocrPad", [0, 5, 10, 15, 20, 30], (c, v) => c.OcrPad = v),
};
double[] trimValues = [0.1, 0.15, 0.2, 0.25, 0.3, 0.4, 0.5];
const int maxRounds = 3;
for (int round = 0; round < maxRounds; round++)
{
bool improved = false;
Console.Error.WriteLine($"--- Round {round + 1} ---");
foreach (var (name, values, set) in cropSweeps)
{
Console.Error.Write($" {name}: ");
int bestVal = 0;
double bestValScore = -1;
foreach (int v in values)
{
var trial = CloneParams(best);
set(trial.Crop, v);
double score = ScoreParams(trial);
totalEvals++;
Console.Error.Write($"{v}={score:F3} ");
if (score > bestValScore) { bestValScore = score; bestVal = v; }
}
Console.Error.WriteLine();
if (bestValScore > bestScore)
{
set(best.Crop, bestVal);
bestScore = bestValScore;
improved = true;
Console.Error.WriteLine($" → {name}={bestVal} score={bestScore:F3}");
}
}
// Sweep trimCutoff
{
Console.Error.Write($" trimCutoff: ");
double bestTrim = best.Crop.TrimCutoff;
double bestTrimScore = bestScore;
foreach (double v in trimValues)
{
var trial = CloneParams(best);
trial.Crop.TrimCutoff = v;
double score = ScoreParams(trial);
totalEvals++;
Console.Error.Write($"{v:F2}={score:F3} ");
if (score > bestTrimScore) { bestTrimScore = score; bestTrim = v; }
}
Console.Error.WriteLine();
if (bestTrimScore > bestScore)
{
best.Crop.TrimCutoff = bestTrim;
bestScore = bestTrimScore;
improved = true;
Console.Error.WriteLine($" → trimCutoff={bestTrim:F2} score={bestScore:F3}");
}
}
Console.Error.WriteLine($" End of round {round + 1}: score={bestScore:F3} {best}");
if (!improved) break;
}
return bestScore;
}
private double TuneOcrParams(DiffOcrParams best, ref int totalEvals, bool tuneTopHat, bool tuneBgSub)
{
double bestScore = ScoreParams(best);
Console.Error.WriteLine($" baseline score={bestScore:F3} {best}\n");
var sharedOcrSweeps = new (string Name, int[] Values, Action<OcrParams, int> Set)[]
{
("upscale", [1, 2, 3], (o, v) => o.Upscale = v),
("psm", [4, 6, 11, 13], (o, v) => o.Psm = v),
};
// Top-hat specific
var topHatSweeps = new (string Name, int[] Values, Action<OcrParams, int> Set)[]
{
("kernelSize", [11, 15, 19, 21, 25, 31, 35, 41, 51], (o, v) => o.KernelSize = v),
};
// Background-subtraction specific
var bgSubSweeps = new (string Name, int[] Values, Action<OcrParams, int> Set)[]
{
("dimPercentile", [5, 10, 15, 20, 25, 30, 40, 50], (o, v) => o.DimPercentile = v),
("textThresh", [10, 15, 20, 25, 30, 40, 50, 60, 80], (o, v) => o.TextThresh = v),
("lineGapTolerance", [3, 5, 8, 10, 15], (o, v) => o.LineGapTolerance = v),
("linePadY", [5, 10, 15, 20], (o, v) => o.LinePadY = v),
};
var allOcrSweeps = sharedOcrSweeps
.Concat(tuneTopHat ? topHatSweeps : [])
.Concat(tuneBgSub ? bgSubSweeps : [])
.ToArray();
const int maxRounds = 3;
for (int round = 0; round < maxRounds; round++)
{
bool improved = false;
Console.Error.WriteLine($"--- Round {round + 1} ---");
foreach (var (name, values, set) in allOcrSweeps)
{
Console.Error.Write($" {name}: ");
int bestVal = 0;
double bestValScore = -1;
foreach (int v in values)
{
var trial = CloneParams(best);
set(trial.Ocr, v);
double score = ScoreParams(trial);
totalEvals++;
Console.Error.Write($"{v}={score:F3} ");
if (score > bestValScore) { bestValScore = score; bestVal = v; }
}
Console.Error.WriteLine();
if (bestValScore > bestScore)
{
set(best.Ocr, bestVal);
bestScore = bestValScore;
improved = true;
Console.Error.WriteLine($" → {name}={bestVal} score={bestScore:F3}");
}
}
Console.Error.WriteLine($" End of round {round + 1}: score={bestScore:F3} {best}");
if (!improved) break;
}
return bestScore;
}
/// <summary>Score a param set: average match ratio across all test cases (0-1).</summary>
private double ScoreParams(DiffOcrParams p)
{
var result = RunTestCases(p, verbose: false);
if (result is TestResponse tr && tr.Total > 0)
return tr.Results.Average(r => r.Score);
return 0;
}
private object RunTestCases(DiffOcrParams p, bool verbose)
{
var tessdataDir = Path.Combine(AppContext.BaseDirectory, "tessdata");
var casesPath = Path.Combine(tessdataDir, "cases.json");
if (!File.Exists(casesPath))
return new ErrorResponse($"cases.json not found at {casesPath}");
var json = File.ReadAllText(casesPath);
var cases = JsonSerializer.Deserialize<List<TestCase>>(json);
if (cases == null || cases.Count == 0)
return new ErrorResponse("No test cases found in cases.json");
var results = new List<TestCaseResult>();
int passCount = 0;
foreach (var tc in cases)
{
if (verbose) Console.Error.WriteLine($"\n=== Test: {tc.Id} ===");
var fullPath = Path.Combine(tessdataDir, tc.FullImage);
var imagePath = Path.Combine(tessdataDir, tc.Image);
if (!File.Exists(fullPath))
{
if (verbose) Console.Error.WriteLine($" SKIP: full image not found: {fullPath}");
results.Add(new TestCaseResult { Id = tc.Id, Passed = false, Score = 0, Missed = tc.Expected });
continue;
}
if (!File.Exists(imagePath))
{
if (verbose) Console.Error.WriteLine($" SKIP: tooltip image not found: {imagePath}");
results.Add(new TestCaseResult { Id = tc.Id, Passed = false, Score = 0, Missed = tc.Expected });
continue;
}
// Run the same pipeline: snapshot (reference) then diff-ocr (with tooltip)
HandleSnapshot(new Request { File = fullPath });
var diffResult = HandleDiffOcr(new Request { File = imagePath, Debug = verbose }, p);
// Extract actual lines from the response
List<string> actualLines;
if (diffResult is DiffOcrResponse diffResp)
actualLines = diffResp.Lines.Select(l => l.Text.Trim()).Where(l => l.Length > 0).ToList();
else if (diffResult is OcrResponse ocrResp)
actualLines = ocrResp.Lines.Select(l => l.Text.Trim()).Where(l => l.Length > 0).ToList();
else
{
if (verbose) Console.Error.WriteLine($" ERROR: unexpected response type");
results.Add(new TestCaseResult { Id = tc.Id, Passed = false, Score = 0, Missed = tc.Expected });
continue;
}
// Fuzzy match expected vs actual
var matched = new List<string>();
var missed = new List<string>();
var usedActual = new HashSet<int>();
foreach (var expected in tc.Expected)
{
int bestIdx = -1;
double bestSim = 0;
for (int i = 0; i < actualLines.Count; i++)
{
if (usedActual.Contains(i)) continue;
double sim = LevenshteinSimilarity(expected, actualLines[i]);
if (sim > bestSim) { bestSim = sim; bestIdx = i; }
}
if (bestIdx >= 0 && bestSim >= 0.75)
{
matched.Add(expected);
usedActual.Add(bestIdx);
if (verbose && bestSim < 1.0)
Console.Error.WriteLine($" ~ {expected} → {actualLines[bestIdx]} (sim={bestSim:F2})");
}
else
{
missed.Add(expected);
if (verbose)
Console.Error.WriteLine($" MISS: {expected}" + (bestIdx >= 0 ? $" (best: {actualLines[bestIdx]}, sim={bestSim:F2})" : ""));
}
}
var extra = actualLines.Where((_, i) => !usedActual.Contains(i)).ToList();
if (verbose)
foreach (var e in extra)
Console.Error.WriteLine($" EXTRA: {e}");
double score = tc.Expected.Count > 0 ? (double)matched.Count / tc.Expected.Count : 1.0;
bool passed = missed.Count == 0;
if (passed) passCount++;
if (verbose)
Console.Error.WriteLine($" Result: {(passed ? "PASS" : "FAIL")} matched={matched.Count}/{tc.Expected.Count} extra={extra.Count} score={score:F2}");
results.Add(new TestCaseResult
{
Id = tc.Id,
Passed = passed,
Score = score,
Matched = matched,
Missed = missed,
Extra = extra,
});
}
if (verbose)
Console.Error.WriteLine($"\n=== Summary: {passCount}/{cases.Count} passed ===\n");
return new TestResponse
{
Passed = passCount,
Failed = cases.Count - passCount,
Total = cases.Count,
Results = results,
};
}
/// <summary>
/// Fast crop from raw pixel bytes — avoids slow GDI+ Bitmap.Clone().
/// </summary>
private static Bitmap CropFromBytes(byte[] px, int srcStride, int cropX, int cropY, int cropW, int cropH)
{
var bmp = new Bitmap(cropW, cropH, PixelFormat.Format32bppArgb);
var data = bmp.LockBits(new Rectangle(0, 0, cropW, cropH), ImageLockMode.WriteOnly, PixelFormat.Format32bppArgb);
int dstStride = data.Stride;
int rowBytes = cropW * 4;
for (int y = 0; y < cropH; y++)
{
int srcOffset = (cropY + y) * srcStride + cropX * 4;
Marshal.Copy(px, srcOffset, data.Scan0 + y * dstStride, rowBytes);
}
bmp.UnlockBits(data);
return bmp;
}
private static double LevenshteinSimilarity(string a, string b)
{
a = a.ToLowerInvariant();
b = b.ToLowerInvariant();
if (a == b) return 1.0;
int la = a.Length, lb = b.Length;
if (la == 0 || lb == 0) return 0.0;
var d = new int[la + 1, lb + 1];
for (int i = 0; i <= la; i++) d[i, 0] = i;
for (int j = 0; j <= lb; j++) d[0, j] = j;
for (int i = 1; i <= la; i++)
for (int j = 1; j <= lb; j++)
{
int cost = a[i - 1] == b[j - 1] ? 0 : 1;
d[i, j] = Math.Min(Math.Min(d[i - 1, j] + 1, d[i, j - 1] + 1), d[i - 1, j - 1] + cost);
}
return 1.0 - (double)d[la, lb] / Math.Max(la, lb);
}
}

View file

@ -1 +0,0 @@
return OcrDaemon.Daemon.Run();

View file

@ -1,210 +0,0 @@
namespace OcrDaemon;
using System.Diagnostics;
using System.Drawing;
using System.Text.Json;
using System.Text.Json.Serialization;
using SdImageFormat = System.Drawing.Imaging.ImageFormat;
/// <summary>
/// Manages a persistent Python subprocess for EasyOCR / PaddleOCR.
/// Lazy-starts on first request; reuses the process for subsequent calls.
/// Same stdin/stdout JSON-per-line protocol as the C# daemon itself.
/// </summary>
class PythonOcrBridge : IDisposable
{
private static readonly JsonSerializerOptions JsonOptions = new()
{
PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
};
private Process? _proc;
private readonly string _daemonScript;
private readonly string _pythonExe;
private readonly object _lock = new();
public PythonOcrBridge()
{
// Resolve paths relative to this exe
var exeDir = AppContext.BaseDirectory;
// exeDir = tools/OcrDaemon/bin/Release/net8.0-.../
// Walk up 4 levels to tools/
var toolsDir = Path.GetFullPath(Path.Combine(exeDir, "..", "..", "..", ".."));
_daemonScript = Path.GetFullPath(Path.Combine(toolsDir, "python-ocr", "daemon.py"));
// Use the venv Python if it exists, otherwise fall back to system python
var venvPython = Path.GetFullPath(Path.Combine(toolsDir, "python-ocr", ".venv", "Scripts", "python.exe"));
_pythonExe = File.Exists(venvPython) ? venvPython : "python";
}
/// <summary>
/// Run OCR on a screen region using the specified Python engine.
/// Captures screenshot, saves to temp file, sends to Python, returns OcrResponse.
/// </summary>
public object HandleOcr(Request req, string engine)
{
var tmpPath = Path.Combine(Path.GetTempPath(), $"ocr_{Guid.NewGuid():N}.png");
try
{
using var bitmap = ScreenCapture.CaptureOrLoad(req.File, req.Region);
bitmap.Save(tmpPath, SdImageFormat.Png);
return OcrFromFile(tmpPath, engine);
}
finally
{
try { File.Delete(tmpPath); } catch { /* ignore */ }
}
}
/// <summary>
/// Run OCR on an already-saved image file via the Python engine.
/// </summary>
public OcrResponse OcrFromFile(string imagePath, string engine, OcrParams? ocrParams = null)
{
EnsureRunning();
var pyReq = BuildPythonRequest(engine, ocrParams);
pyReq["imagePath"] = imagePath;
return SendPythonRequest(pyReq);
}
/// <summary>
/// Run OCR on a bitmap via the Python engine (base64 PNG over pipe, no temp file).
/// </summary>
public OcrResponse OcrFromBitmap(Bitmap bitmap, string engine, OcrParams? ocrParams = null)
{
EnsureRunning();
using var ms = new MemoryStream();
bitmap.Save(ms, SdImageFormat.Png);
var imageBase64 = Convert.ToBase64String(ms.ToArray());
var pyReq = BuildPythonRequest(engine, ocrParams);
pyReq["imageBase64"] = imageBase64;
return SendPythonRequest(pyReq);
}
private static Dictionary<string, object?> BuildPythonRequest(string engine, OcrParams? ocrParams)
{
var req = new Dictionary<string, object?> { ["cmd"] = "ocr", ["engine"] = engine };
if (ocrParams == null) return req;
if (ocrParams.MergeGap > 0) req["mergeGap"] = ocrParams.MergeGap;
if (ocrParams.LinkThreshold.HasValue) req["linkThreshold"] = ocrParams.LinkThreshold.Value;
if (ocrParams.TextThreshold.HasValue) req["textThreshold"] = ocrParams.TextThreshold.Value;
if (ocrParams.LowText.HasValue) req["lowText"] = ocrParams.LowText.Value;
if (ocrParams.WidthThs.HasValue) req["widthThs"] = ocrParams.WidthThs.Value;
if (ocrParams.Paragraph.HasValue) req["paragraph"] = ocrParams.Paragraph.Value;
return req;
}
private OcrResponse SendPythonRequest(object pyReq)
{
var json = JsonSerializer.Serialize(pyReq, JsonOptions);
string responseLine;
lock (_lock)
{
_proc!.StandardInput.WriteLine(json);
_proc.StandardInput.Flush();
responseLine = _proc.StandardOutput.ReadLine()
?? throw new Exception("Python daemon returned null");
}
var resp = JsonSerializer.Deserialize<PythonResponse>(responseLine, JsonOptions);
if (resp == null)
throw new Exception("Failed to parse Python OCR response");
if (!resp.Ok)
throw new Exception(resp.Error ?? "Python OCR failed");
return new OcrResponse
{
Text = resp.Text ?? "",
Lines = resp.Lines ?? [],
};
}
private void EnsureRunning()
{
if (_proc != null && !_proc.HasExited)
return;
_proc?.Dispose();
_proc = null;
if (!File.Exists(_daemonScript))
throw new Exception($"Python OCR daemon not found at {_daemonScript}");
Console.Error.WriteLine($"Spawning Python OCR daemon: {_pythonExe} {_daemonScript}");
_proc = new Process
{
StartInfo = new ProcessStartInfo
{
FileName = _pythonExe,
Arguments = $"\"{_daemonScript}\"",
UseShellExecute = false,
RedirectStandardInput = true,
RedirectStandardOutput = true,
RedirectStandardError = true,
CreateNoWindow = true,
}
};
_proc.ErrorDataReceived += (_, e) =>
{
if (!string.IsNullOrEmpty(e.Data))
Console.Error.WriteLine($"[python-ocr] {e.Data}");
};
_proc.Start();
_proc.BeginErrorReadLine();
// Wait for ready signal (up to 30s for first model load)
var readyLine = _proc.StandardOutput.ReadLine();
if (readyLine == null)
throw new Exception("Python OCR daemon exited before ready signal");
var ready = JsonSerializer.Deserialize<PythonResponse>(readyLine, JsonOptions);
if (ready?.Ready != true)
throw new Exception($"Python OCR daemon did not send ready signal: {readyLine}");
Console.Error.WriteLine("Python OCR daemon ready");
}
public void Dispose()
{
if (_proc != null && !_proc.HasExited)
{
try
{
_proc.StandardInput.Close();
_proc.WaitForExit(3000);
if (!_proc.HasExited) _proc.Kill();
}
catch { /* ignore */ }
}
_proc?.Dispose();
_proc = null;
}
private class PythonResponse
{
[JsonPropertyName("ok")]
public bool Ok { get; set; }
[JsonPropertyName("ready")]
public bool? Ready { get; set; }
[JsonPropertyName("text")]
public string? Text { get; set; }
[JsonPropertyName("lines")]
public List<OcrLineResult>? Lines { get; set; }
[JsonPropertyName("error")]
public string? Error { get; set; }
}
}

View file

@ -1,65 +0,0 @@
namespace OcrDaemon;
using System.Drawing;
using System.Drawing.Imaging;
using System.Runtime.InteropServices;
static class ScreenCapture
{
[DllImport("user32.dll")]
private static extern bool SetProcessDPIAware();
[DllImport("user32.dll")]
private static extern int GetSystemMetrics(int nIndex);
public static void InitDpiAwareness() => SetProcessDPIAware();
/// <summary>
/// Capture from screen, or load from file if specified.
/// When file is set, loads the image and crops to region.
/// </summary>
public static Bitmap CaptureOrLoad(string? file, RegionRect? region)
{
if (!string.IsNullOrEmpty(file))
{
var fullBmp = new Bitmap(file);
if (region != null)
{
int cx = Math.Max(0, region.X);
int cy = Math.Max(0, region.Y);
int cw = Math.Min(region.Width, fullBmp.Width - cx);
int ch = Math.Min(region.Height, fullBmp.Height - cy);
var cropped = fullBmp.Clone(new Rectangle(cx, cy, cw, ch), PixelFormat.Format32bppArgb);
fullBmp.Dispose();
return cropped;
}
return fullBmp;
}
return CaptureScreen(region);
}
public static Bitmap CaptureScreen(RegionRect? region)
{
int x, y, w, h;
if (region != null)
{
x = region.X;
y = region.Y;
w = region.Width;
h = region.Height;
}
else
{
// Primary monitor only (0,0 origin, SM_CXSCREEN / SM_CYSCREEN)
x = 0;
y = 0;
w = GetSystemMetrics(0); // SM_CXSCREEN
h = GetSystemMetrics(1); // SM_CYSCREEN
}
var bitmap = new Bitmap(w, h, PixelFormat.Format32bppArgb);
using var g = Graphics.FromImage(bitmap);
g.CopyFromScreen(x, y, 0, 0, new System.Drawing.Size(w, h), CopyPixelOperation.SourceCopy);
return bitmap;
}
}

View file

@ -1,177 +0,0 @@
namespace OcrDaemon;
static class SignalProcessing
{
/// <summary>
/// Find the dominant period in a signal using autocorrelation.
/// Returns (period, score) where score is the autocorrelation strength.
/// </summary>
public static (int period, double score) FindPeriodWithScore(double[] signal, int minPeriod, int maxPeriod)
{
int n = signal.Length;
if (n < minPeriod * 3) return (-1, 0);
double mean = signal.Average();
double variance = 0;
for (int i = 0; i < n; i++)
variance += (signal[i] - mean) * (signal[i] - mean);
if (variance < 1.0) return (-1, 0);
int maxLag = Math.Min(maxPeriod, n / 3);
double[] ac = new double[maxLag + 1];
for (int lag = minPeriod; lag <= maxLag; lag++)
{
double sum = 0;
for (int i = 0; i < n - lag; i++)
sum += (signal[i] - mean) * (signal[i + lag] - mean);
ac[lag] = sum / variance;
}
// Find the first significant peak — this is the fundamental period.
// Using "first" avoids picking harmonics (2x, 3x) or unrelated larger patterns.
for (int lag = minPeriod + 1; lag < maxLag; lag++)
{
if (ac[lag] > 0.01 && ac[lag] >= ac[lag - 1] && ac[lag] >= ac[lag + 1])
return (lag, ac[lag]);
}
return (-1, 0);
}
/// <summary>
/// Find contiguous segments where values are ABOVE threshold.
/// Used to find grid panel regions by density of very dark pixels.
/// Allows brief gaps (up to 5px) to handle grid borders.
/// </summary>
public static List<(int start, int end)> FindDarkDensitySegments(double[] profile, double threshold, int minLength)
{
var segments = new List<(int start, int end)>();
int n = profile.Length;
int curStart = -1;
int maxGap = 5;
int gapCount = 0;
for (int i = 0; i < n; i++)
{
if (profile[i] >= threshold)
{
if (curStart < 0) curStart = i;
gapCount = 0;
}
else
{
if (curStart >= 0)
{
gapCount++;
if (gapCount > maxGap)
{
int end = i - gapCount;
if (end - curStart >= minLength)
segments.Add((curStart, end));
curStart = -1;
gapCount = 0;
}
}
}
}
if (curStart >= 0)
{
int end = gapCount > 0 ? n - gapCount : n;
if (end - curStart >= minLength)
segments.Add((curStart, end));
}
return segments;
}
/// <summary>
/// Find the extent of the grid in a 1D profile using local autocorrelation
/// at the specific detected period. Only regions where the signal actually
/// repeats at the given period will score high — much more precise than variance.
/// </summary>
public static (int start, int end) FindGridExtent(double[] signal, int period)
{
int n = signal.Length;
int halfWin = period * 2; // window radius: 2 periods each side
if (n < halfWin * 2 + period) return (-1, -1);
// Compute local AC at the specific lag=period in a sliding window
double[] localAc = new double[n];
for (int center = halfWin; center < n - halfWin; center++)
{
int wStart = center - halfWin;
int wEnd = center + halfWin;
int count = wEnd - wStart;
// Local mean
double sum = 0;
for (int i = wStart; i < wEnd; i++)
sum += signal[i];
double mean = sum / count;
// Local variance
double varSum = 0;
for (int i = wStart; i < wEnd; i++)
varSum += (signal[i] - mean) * (signal[i] - mean);
if (varSum < 1.0) continue;
// AC at the specific lag=period
double acSum = 0;
for (int i = wStart; i < wEnd - period; i++)
acSum += (signal[i] - mean) * (signal[i + period] - mean);
localAc[center] = Math.Max(0, acSum / varSum);
}
// Find the longest contiguous run above threshold
double maxAc = 0;
for (int i = 0; i < n; i++)
if (localAc[i] > maxAc) maxAc = localAc[i];
if (maxAc < 0.02) return (-1, -1);
double threshold = maxAc * 0.25;
int bestStart = -1, bestEnd = -1, bestLen = 0;
int curStartPos = -1;
for (int i = 0; i < n; i++)
{
if (localAc[i] > threshold)
{
if (curStartPos < 0) curStartPos = i;
}
else
{
if (curStartPos >= 0)
{
int len = i - curStartPos;
if (len > bestLen)
{
bestLen = len;
bestStart = curStartPos;
bestEnd = i;
}
curStartPos = -1;
}
}
}
// Handle run extending to end of signal
if (curStartPos >= 0)
{
int len = n - curStartPos;
if (len > bestLen)
{
bestStart = curStartPos;
bestEnd = n;
}
}
if (bestStart < 0) return (-1, -1);
// Small extension to include cell borders at edges
bestStart = Math.Max(0, bestStart - period / 4);
bestEnd = Math.Min(n - 1, bestEnd + period / 4);
return (bestStart, bestEnd);
}
}

View file

@ -1,60 +0,0 @@
namespace OcrDaemon;
using System.Drawing;
using System.Drawing.Imaging;
using OpenCvSharp;
using OpenCvSharp.Extensions;
class TemplateMatchHandler
{
public object HandleTemplateMatch(Request req)
{
if (string.IsNullOrEmpty(req.Path))
return new ErrorResponse("match-template command requires 'path' (template image file)");
if (!System.IO.File.Exists(req.Path))
return new ErrorResponse($"Template file not found: {req.Path}");
using var screenshot = ScreenCapture.CaptureOrLoad(req.File, req.Region);
using var screenMat = BitmapConverter.ToMat(screenshot);
using var template = Cv2.ImRead(req.Path, ImreadModes.Color);
if (template.Empty())
return new ErrorResponse($"Failed to load template image: {req.Path}");
// Convert screenshot from BGRA to BGR if needed
using var screenBgr = new Mat();
if (screenMat.Channels() == 4)
Cv2.CvtColor(screenMat, screenBgr, ColorConversionCodes.BGRA2BGR);
else
screenMat.CopyTo(screenBgr);
// Template must fit within screenshot
if (template.Rows > screenBgr.Rows || template.Cols > screenBgr.Cols)
return new TemplateMatchResponse { Found = false };
using var result = new Mat();
Cv2.MatchTemplate(screenBgr, template, result, TemplateMatchModes.CCoeffNormed);
Cv2.MinMaxLoc(result, out _, out double maxVal, out _, out OpenCvSharp.Point maxLoc);
double threshold = req.Threshold > 0 ? req.Threshold / 100.0 : 0.7;
if (maxVal < threshold)
return new TemplateMatchResponse { Found = false, Confidence = maxVal };
// Calculate center coordinates — offset by region origin if provided
int offsetX = req.Region?.X ?? 0;
int offsetY = req.Region?.Y ?? 0;
return new TemplateMatchResponse
{
Found = true,
X = offsetX + maxLoc.X + template.Cols / 2,
Y = offsetY + maxLoc.Y + template.Rows / 2,
Width = template.Cols,
Height = template.Rows,
Confidence = maxVal,
};
}
}

View file

@ -1,230 +0,0 @@
namespace OcrDaemon;
using System.Collections.Generic;
using System.Drawing;
using System.IO;
using System.Linq;
using System.Text.Json;
using Tesseract;
static class TestRunner
{
private static readonly JsonSerializerOptions JsonOptions = new()
{
PropertyNameCaseInsensitive = true,
};
public static int Run(string[] args)
{
string baseDir = AppContext.BaseDirectory;
string? savePreDir = null;
for (int i = 0; i < args.Length; i++)
{
if (string.Equals(args[i], "--save-pre", StringComparison.OrdinalIgnoreCase))
{
if (i + 1 < args.Length && !args[i + 1].StartsWith("--", StringComparison.Ordinal))
{
savePreDir = args[i + 1];
i++;
}
else
{
savePreDir = "processed";
}
}
}
string casesPath = args.Length > 0 && !string.IsNullOrWhiteSpace(args[0])
? args[0]
: Path.Combine(baseDir, "tessdata", "cases.json");
if (!File.Exists(casesPath))
{
Console.Error.WriteLine($"cases.json not found: {casesPath}");
return 1;
}
string json = File.ReadAllText(casesPath);
var cases = JsonSerializer.Deserialize<List<TestCase>>(json, JsonOptions) ?? [];
if (cases.Count == 0)
{
Console.Error.WriteLine("No test cases found.");
return 1;
}
string tessdataPath = Path.Combine(baseDir, "tessdata");
string tessLang = File.Exists(Path.Combine(tessdataPath, "poe2.traineddata")) ? "poe2" : "eng";
using var engine = new TesseractEngine(tessdataPath, tessLang, EngineMode.LstmOnly);
engine.DefaultPageSegMode = PageSegMode.SingleBlock;
engine.SetVariable("preserve_interword_spaces", "1");
var ocrHandler = new OcrHandler(engine);
int totalExpected = 0;
int totalMatched = 0;
int caseFailures = 0;
string casesDir = Path.GetDirectoryName(casesPath) ?? baseDir;
if (!string.IsNullOrEmpty(savePreDir))
{
if (!Path.IsPathRooted(savePreDir))
savePreDir = Path.Combine(casesDir, savePreDir);
if (!Directory.Exists(savePreDir))
Directory.CreateDirectory(savePreDir);
}
foreach (var tc in cases)
{
if (string.IsNullOrWhiteSpace(tc.Image))
{
Console.Error.WriteLine($"[SKIP] {tc.Id}: missing image path");
continue;
}
string imagePath = Path.IsPathRooted(tc.Image)
? tc.Image
: Path.Combine(casesDir, tc.Image);
if (!File.Exists(imagePath))
{
Console.Error.WriteLine($"[SKIP] {tc.Id}: image not found: {imagePath}");
continue;
}
List<string> actualSet;
if (!string.IsNullOrWhiteSpace(tc.BeforeImage))
{
string beforePath = Path.IsPathRooted(tc.BeforeImage)
? tc.BeforeImage
: Path.Combine(casesDir, tc.BeforeImage);
if (!File.Exists(beforePath))
{
Console.Error.WriteLine($"[SKIP] {tc.Id}: before image not found: {beforePath}");
continue;
}
ocrHandler.HandleSnapshot(new Request { File = beforePath });
string? savePath = null;
if (!string.IsNullOrEmpty(savePreDir))
savePath = Path.Combine(savePreDir, $"{tc.Id}.raw.png");
var response = ocrHandler.HandleDiffOcr(new Request
{
File = imagePath,
Path = savePath,
});
if (response is ErrorResponse err)
{
Console.Error.WriteLine($"[FAIL] {tc.Id}: {err.Error}");
caseFailures++;
continue;
}
if (response is DiffOcrResponse diff)
actualSet = BuildActualSet(diff.Text, diff.Lines);
else if (response is OcrResponse ocr)
actualSet = BuildActualSet(ocr.Text, ocr.Lines);
else
actualSet = [];
}
else
{
using var bitmap = new Bitmap(imagePath);
using var processed = ImagePreprocessor.PreprocessForOcr(bitmap);
if (!string.IsNullOrEmpty(savePreDir))
{
string outPath = Path.Combine(savePreDir, $"{tc.Id}.pre.png");
processed.Save(outPath, System.Drawing.Imaging.ImageFormat.Png);
}
using var pix = ImageUtils.BitmapToPix(processed);
using var page = engine.Process(pix);
var lines = ImageUtils.ExtractLinesFromPage(page, offsetX: 0, offsetY: 0);
var actualLines = lines.Select(l => Normalize(l.Text)).Where(s => s.Length > 0).ToList();
var rawText = page.GetText() ?? string.Empty;
var rawLines = rawText.Split('\n')
.Select(Normalize)
.Where(s => s.Length > 0)
.ToList();
actualSet = actualLines.Concat(rawLines).Distinct().ToList();
}
var expectedLines = tc.Expected
.Select(Normalize)
.Where(s => s.Length > 0)
.ToList();
totalExpected += expectedLines.Count;
int matched = expectedLines.Count(e => actualSet.Contains(e));
totalMatched += matched;
if (matched < expectedLines.Count)
{
caseFailures++;
Console.Error.WriteLine($"[FAIL] {tc.Id}: matched {matched}/{expectedLines.Count}");
var missing = expectedLines.Where(e => !actualSet.Contains(e)).ToList();
foreach (var line in missing)
Console.Error.WriteLine($" missing: {line}");
Console.Error.WriteLine(" actual:");
foreach (var line in actualSet)
Console.Error.WriteLine($" > {line}");
}
else
{
Console.Error.WriteLine($"[OK] {tc.Id}: matched {matched}/{expectedLines.Count}");
}
}
Console.Error.WriteLine($"Summary: matched {totalMatched}/{totalExpected} lines, failed cases: {caseFailures}");
return caseFailures == 0 ? 0 : 2;
}
private static string Normalize(string input)
{
if (string.IsNullOrWhiteSpace(input)) return string.Empty;
var chars = input.Trim().ToLowerInvariant().ToCharArray();
var sb = new System.Text.StringBuilder(chars.Length);
bool inSpace = false;
foreach (char c in chars)
{
if (char.IsWhiteSpace(c))
{
if (!inSpace)
{
sb.Append(' ');
inSpace = true;
}
continue;
}
inSpace = false;
sb.Append(c);
}
return sb.ToString().Trim();
}
private static List<string> BuildActualSet(string text, List<OcrLineResult> lines)
{
var lineTexts = lines.Select(l => Normalize(l.Text)).Where(s => s.Length > 0).ToList();
var textLines = (text ?? string.Empty).Split('\n')
.Select(Normalize)
.Where(s => s.Length > 0)
.ToList();
return lineTexts.Concat(textLines).Distinct().ToList();
}
private sealed class TestCase
{
public string Id { get; set; } = "";
public string Image { get; set; } = "";
public string? BeforeImage { get; set; }
public List<string> Expected { get; set; } = [];
}
}

View file

@ -1,79 +0,0 @@
[
{
"id": "vertex1",
"image": "images/vertex1.png",
"fullImage": "images/vertex-snapshot.png",
"expected": [
"The Vertex",
"Tribal Mask",
"Helmet",
"Quality: +20%",
"Evasion Rating: 79",
"Energy Shield: 34",
"Requires: Level 33",
"16% Increased Life Regeneration Rate",
"Has no Attribute Requirements",
"+15% to Chaos Resistance",
"Skill gems have no attribute requirements",
"+3 to level of all skills",
"15% increased mana cost efficiency",
"Twice Corrupted",
"\"A Queen should be seen, Admired, but never touched.\"",
"- Atziri, Queen of the Vaal",
"Asking Price:",
"7x Divine Orb"
]
},
{
"id": "vertex2",
"image": "images/vertex2.png",
"fullImage": "images/vertex-snapshot.png",
"expected": [
"The Vertex",
"Tribal Mask",
"Helmet",
"Quality: +20%",
"Evasion Rating: 182",
"Energy Shield: 77",
"Requires: Level 33",
"+29 To Spirit",
"+1 to Level of All Minion Skills",
"Has no Attribute Requirements",
"130% increased Evasion and Energy Shield",
"27% Increased Critical Hit Chance",
"+13% to Chaos Resistance",
"+2 to level of all skills",
"Twice Corrupted",
"\"A Queen should be seen, Admired, but never touched.\"",
"- Atziri, Queen of the Vaal",
"Asking Price:",
"35x Divine Orb"
]
},
{
"id": "raphpith1",
"image": "images/raphpith.png",
"fullImage": "images/raphpith-snapshot.png",
"expected": [
"RATHPITH GLOBE",
"SACRED Focus",
"Focus",
"Quality: +20%",
"Energy Shield: 104",
"Requires: Level 75",
"16% Increased Energy Shield",
"+24 To Maximum Mana",
"+5% to all Elemental Resistances",
"NON-CHANNELLING SPELLS HAVE 3% INCREASED MAGNITUDE OF AlLMENTS PER 100 MAXIMUM LIFE",
"NON-CHANNELLING SPELLS DEAL 6% INCREASED DAMAGE PER 100 MAXIMUM MANA",
"+72 TO MAXIMUM LIFE",
"NON-CHANNELLING SPELLS HAVE 3% INCREASED CRITICAL HIT CHANCE PER 100 MAXIMUM LIFE",
"NON-CHANNELLING SPELLS DEAL 6% INCREASED DAMACE PER 100 MAXIMUM LIFE",
"Twice Corrupted",
"THE VAAL EMPTIED THEIR SLAVES OF BEATING HEARTS",
"AND LEFT A MOUNTAIN OF TWITCHING DEAD",
"Asking Price:",
"120x Divine Orb"
]
}
]

View file

@ -1,93 +0,0 @@
[
{
"id": "1",
"image": "images/tooltip1.png",
"snapshotImage": "images/tooltip-snapshot.png",
"topLeft": {
"x": 0,
"y": 84
},
"bottomRight": {
"x": 1185,
"y": 774
}
},
{
"id": "2",
"image": "images/tooltip2.png",
"snapshotImage": "images/tooltip-snapshot.png",
"topLeft": {
"x": 304,
"y": 0
},
"bottomRight": {
"x": 983,
"y": 470
}
},
{
"id": "3",
"image": "images/tooltip3.png",
"snapshotImage": "images/tooltip-snapshot.png",
"topLeft": {
"x": 473,
"y": 334
},
"bottomRight": {
"x": 1114,
"y": 914
}
},
{
"id": "4",
"image": "images/tooltip4.png",
"snapshotImage": "images/tooltip-snapshot.png",
"topLeft": {
"x": 209,
"y": 264
},
"bottomRight": {
"x": 1097,
"y": 915
}
},
{
"id": "5",
"image": "images/tooltip5.png",
"snapshotImage": "images/tooltip-snapshot.png",
"topLeft": {
"x": 763,
"y": 0
},
"bottomRight": {
"x": 1874,
"y": 560
}
},
{
"id": "6",
"image": "images/tooltip6.png",
"snapshotImage": "images/tooltip-snapshot.png",
"topLeft": {
"x": 1541,
"y": 154
},
"bottomRight": {
"x": 2348,
"y": 614
}
},
{
"id": "7",
"image": "images/tooltip7.png",
"snapshotImage": "images/tooltip-snapshot.png",
"topLeft": {
"x": 1921,
"y": 40
},
"bottomRight": {
"x": 2558,
"y": 370
}
}
]

View file

@ -1,166 +0,0 @@
#!/usr/bin/env node
/**
* Fetches POE2 trade API data and generates Tesseract user-words and user-patterns
* files to improve OCR accuracy for tooltip text.
*
* Usage: node generate-words.mjs
* Output: poe2.user-words, poe2.user-patterns (in same directory)
*/
import { writeFileSync } from "fs";
import { dirname, join } from "path";
import { fileURLToPath } from "url";
const __dirname = dirname(fileURLToPath(import.meta.url));
const UA = "OAuth poe2trade/1.0 (contact: poe2trade@users.noreply.github.com)";
async function fetchJson(path) {
const url = `https://www.pathofexile.com/api/trade2/data/${path}`;
const res = await fetch(url, { headers: { "User-Agent": UA } });
if (!res.ok) throw new Error(`${url}: ${res.status}`);
return res.json();
}
async function main() {
console.log("Fetching POE2 trade API data...");
const [items, stats, static_, filters] = await Promise.all([
fetchJson("items"),
fetchJson("stats"),
fetchJson("static"),
fetchJson("filters"),
]);
const words = new Set();
// Helper: split text into individual words and add each
function addWords(text) {
if (!text) return;
// Remove # placeholders and special chars, split on whitespace
const cleaned = text
.replace(/#/g, "")
.replace(/[{}()\[\]]/g, "")
.replace(/[+\-]/g, " ");
for (const word of cleaned.split(/\s+/)) {
// Only keep words that are actual words (not numbers, not single chars)
const trimmed = word.replace(/^[^a-zA-Z]+|[^a-zA-Z]+$/g, "");
if (trimmed.length >= 2) words.add(trimmed);
}
}
// Helper: add a full phrase (multi-word item name) as-is
function addPhrase(text) {
if (!text) return;
addWords(text);
}
// Items: type names (base types like "Tribal Mask", "Leather Vest")
for (const cat of items.result) {
addPhrase(cat.label);
for (const entry of cat.entries) {
addPhrase(entry.type);
addPhrase(entry.name);
addPhrase(entry.text);
}
}
// Stats: mod text like "+#% to Chaos Resistance", "# to maximum Life"
for (const cat of stats.result) {
for (const entry of cat.entries) {
addPhrase(entry.text);
}
}
// Static: currency/fragment names like "Divine Orb", "Scroll of Wisdom"
for (const cat of static_.result) {
addPhrase(cat.label);
for (const entry of cat.entries) {
addPhrase(entry.text);
}
}
// Filters: filter labels and option texts
for (const cat of filters.result) {
addPhrase(cat.title);
if (cat.filters) {
for (const f of cat.filters) {
addPhrase(f.text);
if (f.option?.options) {
for (const opt of f.option.options) {
addPhrase(opt.text);
}
}
}
}
}
// Add common tooltip keywords not in trade API
const extraWords = [
// Section headers
"Quality", "Requires", "Level", "Asking", "Price",
"Corrupted", "Mirrored", "Unmodifiable",
"Twice", "Sockets",
// Attributes
"Strength", "Dexterity", "Intelligence", "Spirit",
// Defense types
"Armour", "Evasion", "Rating", "Energy", "Shield",
// Damage types
"Physical", "Elemental", "Lightning", "Cold", "Fire", "Chaos",
// Common mod words
"increased", "reduced", "more", "less",
"added", "converted", "regeneration",
"maximum", "minimum", "total",
"Resistance", "Damage", "Speed", "Duration",
"Critical", "Hit", "Chance", "Multiplier",
"Attack", "Cast", "Spell", "Minion", "Skill",
"Mana", "Life", "Rarity",
// Item classes
"Helmet", "Gloves", "Boots", "Body", "Belt",
"Ring", "Amulet", "Shield", "Quiver",
"Sword", "Axe", "Mace", "Dagger", "Wand", "Staff", "Bow",
"Sceptre", "Crossbow", "Flail", "Spear",
// Rarity
"Normal", "Magic", "Rare", "Unique",
];
for (const w of extraWords) words.add(w);
// Sort and write user-words
const sortedWords = [...words].sort((a, b) => a.toLowerCase().localeCompare(b.toLowerCase()));
const wordsPath = join(__dirname, "poe2.user-words");
writeFileSync(wordsPath, sortedWords.join("\n") + "\n");
console.log(`Wrote ${sortedWords.length} words to ${wordsPath}`);
// Generate user-patterns for common tooltip formats
const patterns = [
// Stat values: "+12% to Chaos Resistance", "+3 to Level"
"\\+\\d+%",
"\\+\\d+",
"\\-\\d+%",
"\\-\\d+",
// Ranges: "10-20"
"\\d+-\\d+",
// Currency amounts: "7x Divine Orb", "35x Divine Orb"
"\\d+x",
// Quality: "+20%"
"\\d+%",
// Level requirements: "Level \\d+"
"Level \\d+",
// Asking Price section
"Asking Price:",
// Item level
"Item Level: \\d+",
// Requires line
"Requires:",
// Rating values
"Rating: \\d+",
"Shield: \\d+",
"Quality: \\+\\d+%",
];
const patternsPath = join(__dirname, "poe2.user-patterns");
writeFileSync(patternsPath, patterns.join("\n") + "\n");
console.log(`Wrote ${patterns.length} patterns to ${patternsPath}`);
}
main().catch((e) => {
console.error(e);
process.exit(1);
});

Binary file not shown.

Before

Width:  |  Height:  |  Size: 5.8 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 5.8 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 5.8 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 5.8 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 5.8 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 5.8 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 5.9 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 6 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 5.6 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 5.3 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 5.1 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 5.3 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 5.2 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 5.2 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 4.9 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 5.1 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 5.3 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 5.9 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 5.8 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 5.9 MiB

View file

@ -1,14 +0,0 @@
\+\d+%
\+\d+
\-\d+%
\-\d+
\d+-\d+
\d+x
\d+%
Level \d+
Asking Price:
Item Level: \d+
Requires:
Rating: \d+
Shield: \d+
Quality: \+\d+%

File diff suppressed because it is too large Load diff

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -2,7 +2,7 @@
Persistent Python OCR daemon (stdin/stdout JSON-per-line protocol).
Supports EasyOCR engine, lazy-loaded on first use.
Managed as a subprocess by the C# OcrDaemon.
Managed as a subprocess by PythonOcrBridge in Poe2Trade.Screen.
Request: {"cmd": "ocr", "engine": "easyocr", "imagePath": "C:\\temp\\screenshot.png"}
Response: {"ok": true, "text": "...", "lines": [{"text": "...", "words": [...]}]}
@ -12,7 +12,6 @@ import sys
import json
_easyocr_reader = None
_paddle_ocr = None
def _redirect_stdout_to_stderr():
@ -116,13 +115,6 @@ def items_to_response(items):
return {"ok": True, "text": "\n".join(all_text_parts), "lines": lines}
def run_easyocr(image_path):
from PIL import Image
import numpy as np
img = np.array(Image.open(image_path))
return run_easyocr_array(img)
def run_easyocr_array(img, merge_gap=0, **easyocr_kwargs):
reader = get_easyocr()
@ -147,67 +139,6 @@ def run_easyocr_array(img, merge_gap=0, **easyocr_kwargs):
return items_to_response(items)
def get_paddleocr():
global _paddle_ocr
if _paddle_ocr is None:
sys.stderr.write("Loading PaddleOCR model...\n")
sys.stderr.flush()
import os
os.environ.setdefault("PADDLE_PDX_DISABLE_MODEL_SOURCE_CHECK", "True")
real_stdout = _redirect_stdout_to_stderr()
try:
from paddleocr import PaddleOCR
_paddle_ocr = PaddleOCR(
use_doc_orientation_classify=False,
use_doc_unwarping=False,
use_textline_orientation=False,
lang="en",
ocr_version="PP-OCRv4",
)
finally:
_restore_stdout(real_stdout)
sys.stderr.write("PaddleOCR model loaded.\n")
sys.stderr.flush()
return _paddle_ocr
def run_paddleocr_array(img, merge_gap=0):
ocr = get_paddleocr()
# Ensure RGB 3-channel
if len(img.shape) == 2:
import numpy as np
img = np.stack([img, img, img], axis=-1)
elif img.shape[2] == 4:
img = img[:, :, :3]
real_stdout = _redirect_stdout_to_stderr()
try:
results = ocr.predict(img)
finally:
_restore_stdout(real_stdout)
items = []
# PaddleOCR 3.4: results is list of OCRResult objects
for res in results:
texts = res.get("rec_texts", []) if hasattr(res, "get") else getattr(res, "rec_texts", [])
polys = res.get("dt_polys", []) if hasattr(res, "get") else getattr(res, "dt_polys", [])
for i, text in enumerate(texts):
if not text.strip():
continue
if i < len(polys):
bbox = polys[i]
x, y, w, h = bbox_to_rect(bbox)
else:
x, y, w, h = 0, 0, 0, 0
items.append({"text": text.strip(), "x": x, "y": y, "w": w, "h": h})
if merge_gap > 0:
items = merge_nearby_detections(items, merge_gap)
return items_to_response(items)
def load_image(req):
"""Load image from either imagePath (file) or imageBase64 (base64-encoded PNG)."""
from PIL import Image
@ -232,29 +163,23 @@ def handle_request(req):
if cmd != "ocr":
return {"ok": False, "error": f"Unknown command: {cmd}"}
engine = req.get("engine", "")
img = load_image(req)
if img is None:
return {"ok": False, "error": "Missing imagePath or imageBase64"}
merge_gap = req.get("mergeGap", 0)
if engine == "easyocr":
easyocr_kwargs = {}
for json_key, py_param in [
("linkThreshold", "link_threshold"),
("textThreshold", "text_threshold"),
("lowText", "low_text"),
("widthThs", "width_ths"),
("paragraph", "paragraph"),
]:
if json_key in req:
easyocr_kwargs[py_param] = req[json_key]
return run_easyocr_array(img, merge_gap=merge_gap, **easyocr_kwargs)
elif engine == "paddleocr":
return run_paddleocr_array(img, merge_gap=merge_gap)
else:
return {"ok": False, "error": f"Unknown engine: {engine}"}
easyocr_kwargs = {}
for json_key, py_param in [
("linkThreshold", "link_threshold"),
("textThreshold", "text_threshold"),
("lowText", "low_text"),
("widthThs", "width_ths"),
("paragraph", "paragraph"),
]:
if json_key in req:
easyocr_kwargs[py_param] = req[json_key]
return run_easyocr_array(img, merge_gap=merge_gap, **easyocr_kwargs)
def main():