switched to new way
|
|
@ -1,623 +0,0 @@
|
|||
namespace OcrDaemon;
|
||||
|
||||
using System.Drawing;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
using Tesseract;
|
||||
|
||||
static class Daemon
|
||||
{
|
||||
private static readonly JsonSerializerOptions JsonOptions = new()
|
||||
{
|
||||
PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
|
||||
};
|
||||
|
||||
public static int Run()
|
||||
{
|
||||
ScreenCapture.InitDpiAwareness();
|
||||
|
||||
// Pre-create the Tesseract OCR engine (reused across all requests)
|
||||
var tessdataPath = Path.Combine(AppContext.BaseDirectory, "tessdata");
|
||||
var tessLang = File.Exists(Path.Combine(tessdataPath, "poe2.traineddata")) ? "poe2" : "eng";
|
||||
TesseractEngine tessEngine;
|
||||
try
|
||||
{
|
||||
tessEngine = new TesseractEngine(tessdataPath, tessLang, EngineMode.LstmOnly);
|
||||
tessEngine.DefaultPageSegMode = PageSegMode.SingleBlock;
|
||||
tessEngine.SetVariable("preserve_interword_spaces", "1");
|
||||
var userWordsPath = Path.Combine(tessdataPath, $"{tessLang}.user-words");
|
||||
var userPatternsPath = Path.Combine(tessdataPath, $"{tessLang}.user-patterns");
|
||||
if (File.Exists(userWordsPath))
|
||||
{
|
||||
tessEngine.SetVariable("user_words_file", userWordsPath);
|
||||
var lineCount = File.ReadAllLines(userWordsPath).Length;
|
||||
Console.Error.WriteLine($"Loaded user-words: {lineCount} words from {userWordsPath}");
|
||||
}
|
||||
if (File.Exists(userPatternsPath))
|
||||
{
|
||||
tessEngine.SetVariable("user_patterns_file", userPatternsPath);
|
||||
var lineCount = File.ReadAllLines(userPatternsPath).Length;
|
||||
Console.Error.WriteLine($"Loaded user-patterns: {lineCount} patterns from {userPatternsPath}");
|
||||
}
|
||||
Console.Error.WriteLine($"Tesseract engine loaded with language: {tessLang}");
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
WriteResponse(new ErrorResponse($"Failed to create Tesseract engine: {ex.Message}. Ensure tessdata/eng.traineddata exists."));
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Signal ready
|
||||
WriteResponse(new ReadyResponse());
|
||||
|
||||
var ocrHandler = new OcrHandler(tessEngine);
|
||||
var gridHandler = new GridHandler();
|
||||
var detectGridHandler = new DetectGridHandler();
|
||||
var templateMatchHandler = new TemplateMatchHandler();
|
||||
var edgeCropHandler = new EdgeCropHandler();
|
||||
var pythonBridge = new PythonOcrBridge();
|
||||
|
||||
// Main loop: read one JSON line, handle, write one JSON line
|
||||
string? line;
|
||||
while ((line = Console.In.ReadLine()) != null)
|
||||
{
|
||||
line = line.Trim();
|
||||
if (line.Length == 0) continue;
|
||||
|
||||
try
|
||||
{
|
||||
var request = JsonSerializer.Deserialize<Request>(line, JsonOptions);
|
||||
if (request == null)
|
||||
{
|
||||
WriteResponse(new ErrorResponse("Failed to parse request"));
|
||||
continue;
|
||||
}
|
||||
|
||||
object response = request.Cmd?.ToLowerInvariant() switch
|
||||
{
|
||||
"ocr" => HandleOcrPipeline(ocrHandler, pythonBridge, request),
|
||||
"screenshot" => ocrHandler.HandleScreenshot(request),
|
||||
"capture" => ocrHandler.HandleCapture(request),
|
||||
"snapshot" => ocrHandler.HandleSnapshot(request),
|
||||
"diff-ocr" => HandleDiffOcrPipeline(ocrHandler, pythonBridge, request),
|
||||
"edge-ocr" => HandleEdgeOcrPipeline(ocrHandler, edgeCropHandler, pythonBridge, request),
|
||||
"test" => ocrHandler.HandleTest(request),
|
||||
"tune" => ocrHandler.HandleTune(request),
|
||||
"crop-test" => HandleCropTest(ocrHandler, edgeCropHandler, request),
|
||||
"crop-tune" => HandleCropTune(ocrHandler, request),
|
||||
"grid" => gridHandler.HandleGrid(request),
|
||||
"detect-grid" => detectGridHandler.HandleDetectGrid(request),
|
||||
"match-template" => templateMatchHandler.HandleTemplateMatch(request),
|
||||
_ => new ErrorResponse($"Unknown command: {request.Cmd}"),
|
||||
};
|
||||
WriteResponse(response);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
WriteResponse(new ErrorResponse(ex.Message));
|
||||
}
|
||||
}
|
||||
|
||||
pythonBridge.Dispose();
|
||||
return 0;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Unified OCR pipeline for full/region captures.
|
||||
/// Capture → optional preprocess → route to engine (tesseract / easyocr / paddleocr).
|
||||
/// </summary>
|
||||
private static object HandleOcrPipeline(OcrHandler ocrHandler, PythonOcrBridge pythonBridge, Request request)
|
||||
{
|
||||
var engine = request.Engine ?? "tesseract";
|
||||
var preprocess = request.Preprocess ?? "none";
|
||||
|
||||
var kernelSize = request.Params?.Ocr.KernelSize ?? 41;
|
||||
|
||||
// No preprocess + tesseract = original fast path
|
||||
if (engine == "tesseract" && preprocess == "none")
|
||||
return ocrHandler.HandleOcr(request);
|
||||
|
||||
// Capture
|
||||
using var bitmap = ScreenCapture.CaptureOrLoad(request.File, request.Region);
|
||||
|
||||
// Preprocess
|
||||
Bitmap processed;
|
||||
if (preprocess == "tophat")
|
||||
{
|
||||
processed = ImagePreprocessor.PreprocessForOcr(bitmap, kernelSize: kernelSize);
|
||||
}
|
||||
else if (preprocess == "bgsub")
|
||||
{
|
||||
return new ErrorResponse("bgsub preprocess requires a reference frame; use diff-ocr instead.");
|
||||
}
|
||||
else // "none"
|
||||
{
|
||||
processed = (Bitmap)bitmap.Clone();
|
||||
}
|
||||
using var _processed = processed;
|
||||
|
||||
// Route to engine
|
||||
if (engine == "tesseract")
|
||||
{
|
||||
var region = request.Region != null
|
||||
? new RegionRect { X = request.Region.X, Y = request.Region.Y, Width = request.Region.Width, Height = request.Region.Height }
|
||||
: new RegionRect { X = 0, Y = 0, Width = processed.Width, Height = processed.Height };
|
||||
return ocrHandler.RunTesseractOnBitmap(processed, region);
|
||||
}
|
||||
else // easyocr, paddleocr
|
||||
{
|
||||
return pythonBridge.OcrFromBitmap(processed, engine);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Unified diff-OCR pipeline for tooltip detection.
|
||||
/// DiffCrop → preprocess (default=bgsub) → route to engine.
|
||||
/// </summary>
|
||||
private static object HandleDiffOcrPipeline(OcrHandler ocrHandler, PythonOcrBridge pythonBridge, Request request)
|
||||
{
|
||||
var engine = request.Engine ?? "tesseract";
|
||||
var isPythonEngine = engine is "easyocr" or "paddleocr";
|
||||
var p = request.Params ?? new DiffOcrParams();
|
||||
var cropParams = p.Crop;
|
||||
var ocrParams = p.Ocr;
|
||||
if (request.Threshold > 0) cropParams.DiffThresh = request.Threshold;
|
||||
|
||||
// Determine preprocess mode: explicit request.Preprocess > params.UseBackgroundSub > default "bgsub"
|
||||
string preprocess;
|
||||
if (request.Preprocess != null)
|
||||
preprocess = request.Preprocess;
|
||||
else if (request.Params != null)
|
||||
preprocess = ocrParams.UseBackgroundSub ? "bgsub" : "tophat";
|
||||
else
|
||||
preprocess = "bgsub";
|
||||
|
||||
// No engine override + no preprocess override + no params = original Tesseract path
|
||||
if (engine == "tesseract" && request.Preprocess == null && request.Params == null)
|
||||
return ocrHandler.HandleDiffOcr(request);
|
||||
|
||||
var sw = System.Diagnostics.Stopwatch.StartNew();
|
||||
|
||||
var cropResult = ocrHandler.DiffCrop(request, cropParams);
|
||||
if (cropResult == null)
|
||||
return new OcrResponse { Text = "", Lines = [] };
|
||||
|
||||
var (cropped, refCropped, current, region) = cropResult.Value;
|
||||
using var _current = current;
|
||||
|
||||
// Preprocess — only sees ocrParams
|
||||
Bitmap processed;
|
||||
if (preprocess == "bgsub")
|
||||
{
|
||||
int upscale = isPythonEngine ? 1 : ocrParams.Upscale;
|
||||
processed = ImagePreprocessor.PreprocessWithBackgroundSub(
|
||||
cropped, refCropped, dimPercentile: ocrParams.DimPercentile, textThresh: ocrParams.TextThresh,
|
||||
upscale: upscale, softThreshold: ocrParams.SoftThreshold);
|
||||
}
|
||||
else if (preprocess == "tophat")
|
||||
{
|
||||
processed = ImagePreprocessor.PreprocessForOcr(cropped, kernelSize: ocrParams.KernelSize);
|
||||
}
|
||||
else // "none"
|
||||
{
|
||||
processed = (Bitmap)cropped.Clone();
|
||||
}
|
||||
cropped.Dispose();
|
||||
refCropped.Dispose();
|
||||
|
||||
var diffMs = sw.ElapsedMilliseconds;
|
||||
using var _processed = processed;
|
||||
|
||||
// Save debug images if path provided
|
||||
if (!string.IsNullOrEmpty(request.Path))
|
||||
{
|
||||
var dir = Path.GetDirectoryName(request.Path);
|
||||
if (!string.IsNullOrEmpty(dir) && !Directory.Exists(dir))
|
||||
Directory.CreateDirectory(dir);
|
||||
// Save preprocessed crop
|
||||
processed.Save(request.Path, ImageUtils.GetImageFormat(request.Path));
|
||||
|
||||
var ext = Path.GetExtension(request.Path);
|
||||
var fullPath = Path.ChangeExtension(request.Path, ".full" + ext);
|
||||
current.Save(fullPath, ImageUtils.GetImageFormat(fullPath));
|
||||
}
|
||||
|
||||
// Route to engine
|
||||
sw.Restart();
|
||||
if (engine == "tesseract")
|
||||
{
|
||||
var result = ocrHandler.RunTesseractOnBitmap(processed, region);
|
||||
var ocrMs = sw.ElapsedMilliseconds;
|
||||
Console.Error.WriteLine($" diff-ocr-pipeline: engine={engine} preprocess={preprocess} diff={diffMs}ms ocr={ocrMs}ms crop={region.Width}x{region.Height}");
|
||||
return result;
|
||||
}
|
||||
else // easyocr, paddleocr
|
||||
{
|
||||
var ocrResult = pythonBridge.OcrFromBitmap(processed, engine, ocrParams);
|
||||
var ocrMs = sw.ElapsedMilliseconds;
|
||||
Console.Error.WriteLine($" diff-ocr-pipeline: engine={engine} preprocess={preprocess} diff={diffMs}ms ocr={ocrMs}ms crop={region.Width}x{region.Height}");
|
||||
|
||||
// Offset word coordinates to screen space
|
||||
foreach (var line in ocrResult.Lines)
|
||||
foreach (var word in line.Words)
|
||||
{
|
||||
word.X += region.X;
|
||||
word.Y += region.Y;
|
||||
}
|
||||
|
||||
return new DiffOcrResponse
|
||||
{
|
||||
Text = ocrResult.Text,
|
||||
Lines = ocrResult.Lines,
|
||||
Region = region,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Edge-based tooltip detection pipeline.
|
||||
/// EdgeCrop → preprocess (tophat only; bgsub falls back to tophat) → route to engine.
|
||||
/// </summary>
|
||||
private static object HandleEdgeOcrPipeline(OcrHandler ocrHandler, EdgeCropHandler edgeCropHandler, PythonOcrBridge pythonBridge, Request request)
|
||||
{
|
||||
var engine = request.Engine ?? "tesseract";
|
||||
var isPythonEngine = engine is "easyocr" or "paddleocr";
|
||||
var ep = request.EdgeParams ?? new EdgeOcrParams();
|
||||
var cropParams = ep.Crop;
|
||||
var ocrParams = ep.Ocr;
|
||||
|
||||
// Edge method only supports tophat (no reference frame for bgsub)
|
||||
string preprocess = request.Preprocess ?? "tophat";
|
||||
if (preprocess == "bgsub") preprocess = "tophat";
|
||||
|
||||
var sw = System.Diagnostics.Stopwatch.StartNew();
|
||||
|
||||
var cropResult = edgeCropHandler.EdgeCrop(request, cropParams);
|
||||
if (cropResult == null)
|
||||
return new OcrResponse { Text = "", Lines = [] };
|
||||
|
||||
var (cropped, fullCapture, region) = cropResult.Value;
|
||||
using var _fullCapture = fullCapture;
|
||||
|
||||
// Preprocess
|
||||
Bitmap processed;
|
||||
if (preprocess == "tophat")
|
||||
{
|
||||
processed = ImagePreprocessor.PreprocessForOcr(cropped, kernelSize: ocrParams.KernelSize, upscale: ocrParams.Upscale);
|
||||
}
|
||||
else // "none"
|
||||
{
|
||||
processed = (Bitmap)cropped.Clone();
|
||||
}
|
||||
cropped.Dispose();
|
||||
|
||||
var cropMs = sw.ElapsedMilliseconds;
|
||||
using var _processed = processed;
|
||||
|
||||
// Save debug images if path provided
|
||||
if (!string.IsNullOrEmpty(request.Path))
|
||||
{
|
||||
var dir = Path.GetDirectoryName(request.Path);
|
||||
if (!string.IsNullOrEmpty(dir) && !Directory.Exists(dir))
|
||||
Directory.CreateDirectory(dir);
|
||||
processed.Save(request.Path, ImageUtils.GetImageFormat(request.Path));
|
||||
|
||||
var ext = Path.GetExtension(request.Path);
|
||||
var fullPath = Path.ChangeExtension(request.Path, ".full" + ext);
|
||||
fullCapture.Save(fullPath, ImageUtils.GetImageFormat(fullPath));
|
||||
}
|
||||
|
||||
// Route to engine
|
||||
sw.Restart();
|
||||
if (engine == "tesseract")
|
||||
{
|
||||
var result = ocrHandler.RunTesseractOnBitmap(processed, region, pad: cropParams.OcrPad, upscale: ocrParams.Upscale);
|
||||
var ocrMs = sw.ElapsedMilliseconds;
|
||||
Console.Error.WriteLine($" edge-ocr-pipeline: engine={engine} preprocess={preprocess} crop={cropMs}ms ocr={ocrMs}ms region={region.Width}x{region.Height}");
|
||||
return result;
|
||||
}
|
||||
else // easyocr, paddleocr
|
||||
{
|
||||
var ocrResult = pythonBridge.OcrFromBitmap(processed, engine, ocrParams);
|
||||
var ocrMs = sw.ElapsedMilliseconds;
|
||||
Console.Error.WriteLine($" edge-ocr-pipeline: engine={engine} preprocess={preprocess} crop={cropMs}ms ocr={ocrMs}ms region={region.Width}x{region.Height}");
|
||||
|
||||
foreach (var line in ocrResult.Lines)
|
||||
foreach (var word in line.Words)
|
||||
{
|
||||
word.X += region.X;
|
||||
word.Y += region.Y;
|
||||
}
|
||||
|
||||
return new DiffOcrResponse
|
||||
{
|
||||
Text = ocrResult.Text,
|
||||
Lines = ocrResult.Lines,
|
||||
Region = region,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Coordinate-descent sweep over DiffCropParams to maximise avgIoU on crop.json ground truth.
|
||||
/// </summary>
|
||||
private static object HandleCropTune(OcrHandler ocrHandler, Request request)
|
||||
{
|
||||
var tessdataDir = Path.Combine(AppContext.BaseDirectory, "tessdata");
|
||||
var casesPath = Path.Combine(tessdataDir, "crop.json");
|
||||
if (!File.Exists(casesPath))
|
||||
return new ErrorResponse($"crop.json not found at {casesPath}");
|
||||
|
||||
var json = File.ReadAllText(casesPath);
|
||||
var cases = JsonSerializer.Deserialize<List<CropTestCase>>(json, JsonOptions);
|
||||
if (cases == null || cases.Count == 0)
|
||||
return new ErrorResponse("No test cases in crop.json");
|
||||
|
||||
// Preload valid test cases
|
||||
var validCases = new List<(CropTestCase tc, string imagePath, string snapshotPath)>();
|
||||
foreach (var tc in cases)
|
||||
{
|
||||
var imagePath = Path.Combine(tessdataDir, tc.Image);
|
||||
var snapshotPath = Path.Combine(tessdataDir, tc.SnapshotImage);
|
||||
if (File.Exists(imagePath) && File.Exists(snapshotPath))
|
||||
validCases.Add((tc, imagePath, snapshotPath));
|
||||
}
|
||||
if (validCases.Count == 0)
|
||||
return new ErrorResponse("No valid test cases found");
|
||||
|
||||
// Score function: compute avgIoU for a set of crop params
|
||||
double ScoreCropParams(DiffCropParams cp)
|
||||
{
|
||||
double totalIoU = 0;
|
||||
foreach (var (tc, imagePath, snapshotPath) in validCases)
|
||||
{
|
||||
ocrHandler.HandleSnapshot(new Request { File = snapshotPath });
|
||||
var cropResult = ocrHandler.DiffCrop(new Request { File = imagePath }, cp);
|
||||
if (cropResult == null) continue;
|
||||
|
||||
var (cropped, refCropped, current, region) = cropResult.Value;
|
||||
cropped.Dispose(); refCropped.Dispose(); current.Dispose();
|
||||
|
||||
int ax1 = region.X, ay1 = region.Y;
|
||||
int ax2 = region.X + region.Width, ay2 = region.Y + region.Height;
|
||||
int ex1 = tc.TopLeft.X, ey1 = tc.TopLeft.Y, ex2 = tc.BottomRight.X, ey2 = tc.BottomRight.Y;
|
||||
|
||||
int ix1 = Math.Max(ax1, ex1), iy1 = Math.Max(ay1, ey1);
|
||||
int ix2 = Math.Min(ax2, ex2), iy2 = Math.Min(ay2, ey2);
|
||||
int iw = Math.Max(0, ix2 - ix1), ih = Math.Max(0, iy2 - iy1);
|
||||
double intersection = (double)iw * ih;
|
||||
double expW = ex2 - ex1, expH = ey2 - ey1;
|
||||
double union = (double)region.Width * region.Height + expW * expH - intersection;
|
||||
totalIoU += union > 0 ? intersection / union : 0;
|
||||
}
|
||||
return totalIoU / validCases.Count;
|
||||
}
|
||||
|
||||
DiffCropParams CloneCrop(DiffCropParams p) => new()
|
||||
{
|
||||
DiffThresh = p.DiffThresh, RowThreshDiv = p.RowThreshDiv,
|
||||
ColThreshDiv = p.ColThreshDiv, MaxGap = p.MaxGap,
|
||||
TrimCutoff = p.TrimCutoff, OcrPad = p.OcrPad,
|
||||
};
|
||||
|
||||
// Start from provided params or defaults
|
||||
var best = request.Params?.Crop ?? new DiffCropParams();
|
||||
double bestScore = ScoreCropParams(best);
|
||||
int totalEvals = 1;
|
||||
Console.Error.WriteLine($" crop-tune: baseline avgIoU={bestScore:F4} {best}");
|
||||
|
||||
var intSweeps = new (string Name, int[] Values, Action<DiffCropParams, int> Set)[]
|
||||
{
|
||||
("diffThresh", [5, 10, 15, 20, 25, 30, 40], (c, v) => c.DiffThresh = v),
|
||||
("rowThreshDiv", [20, 30, 40, 50, 60, 80, 100], (c, v) => c.RowThreshDiv = v),
|
||||
("colThreshDiv", [5, 8, 10, 12, 15, 20], (c, v) => c.ColThreshDiv = v),
|
||||
("maxGap", [5, 10, 15, 20, 25, 30], (c, v) => c.MaxGap = v),
|
||||
};
|
||||
double[] trimValues = [0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.5];
|
||||
|
||||
const int maxRounds = 3;
|
||||
for (int round = 0; round < maxRounds; round++)
|
||||
{
|
||||
bool improved = false;
|
||||
Console.Error.WriteLine($"--- Round {round + 1} ---");
|
||||
|
||||
foreach (var (name, values, set) in intSweeps)
|
||||
{
|
||||
Console.Error.Write($" {name}: ");
|
||||
int bestVal = 0;
|
||||
double bestValScore = -1;
|
||||
|
||||
foreach (int v in values)
|
||||
{
|
||||
var trial = CloneCrop(best);
|
||||
set(trial, v);
|
||||
double score = ScoreCropParams(trial);
|
||||
totalEvals++;
|
||||
Console.Error.Write($"{v}={score:F4} ");
|
||||
if (score > bestValScore) { bestValScore = score; bestVal = v; }
|
||||
}
|
||||
Console.Error.WriteLine();
|
||||
|
||||
if (bestValScore > bestScore)
|
||||
{
|
||||
set(best, bestVal);
|
||||
bestScore = bestValScore;
|
||||
improved = true;
|
||||
Console.Error.WriteLine($" -> {name}={bestVal} avgIoU={bestScore:F4}");
|
||||
}
|
||||
}
|
||||
|
||||
// trimCutoff sweep
|
||||
{
|
||||
Console.Error.Write($" trimCutoff: ");
|
||||
double bestTrim = best.TrimCutoff;
|
||||
double bestTrimScore = bestScore;
|
||||
|
||||
foreach (double v in trimValues)
|
||||
{
|
||||
var trial = CloneCrop(best);
|
||||
trial.TrimCutoff = v;
|
||||
double score = ScoreCropParams(trial);
|
||||
totalEvals++;
|
||||
Console.Error.Write($"{v:F2}={score:F4} ");
|
||||
if (score > bestTrimScore) { bestTrimScore = score; bestTrim = v; }
|
||||
}
|
||||
Console.Error.WriteLine();
|
||||
|
||||
if (bestTrimScore > bestScore)
|
||||
{
|
||||
best.TrimCutoff = bestTrim;
|
||||
bestScore = bestTrimScore;
|
||||
improved = true;
|
||||
Console.Error.WriteLine($" -> trimCutoff={bestTrim:F2} avgIoU={bestScore:F4}");
|
||||
}
|
||||
}
|
||||
|
||||
Console.Error.WriteLine($" End of round {round + 1}: avgIoU={bestScore:F4} {best}");
|
||||
if (!improved) break;
|
||||
}
|
||||
|
||||
Console.Error.WriteLine($"\n crop-tune: BEST avgIoU={bestScore:F4} {best} evals={totalEvals}");
|
||||
|
||||
return new CropTuneResponse
|
||||
{
|
||||
BestAvgIoU = bestScore,
|
||||
BestParams = best,
|
||||
Iterations = totalEvals,
|
||||
};
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Crop accuracy test: runs diff and/or edge crop on test cases from crop.json,
|
||||
/// computes IoU and per-edge deltas vs ground truth.
|
||||
/// </summary>
|
||||
private static object HandleCropTest(OcrHandler ocrHandler, EdgeCropHandler edgeCropHandler, Request request)
|
||||
{
|
||||
var tessdataDir = Path.Combine(AppContext.BaseDirectory, "tessdata");
|
||||
var casesPath = Path.Combine(tessdataDir, "crop.json");
|
||||
if (!File.Exists(casesPath))
|
||||
return new ErrorResponse($"crop.json not found at {casesPath}");
|
||||
|
||||
var json = File.ReadAllText(casesPath);
|
||||
var cases = JsonSerializer.Deserialize<List<CropTestCase>>(json, JsonOptions);
|
||||
if (cases == null || cases.Count == 0)
|
||||
return new ErrorResponse("No test cases in crop.json");
|
||||
|
||||
var method = request.Engine ?? "diff"; // reuse engine field: "diff", "edge", or "both"
|
||||
var diffParams = request.Params?.Crop ?? new DiffCropParams();
|
||||
var edgeParams = request.EdgeParams?.Crop ?? new EdgeCropParams();
|
||||
|
||||
var results = new List<CropTestResult>();
|
||||
|
||||
foreach (var tc in cases)
|
||||
{
|
||||
var imagePath = Path.Combine(tessdataDir, tc.Image);
|
||||
var snapshotPath = Path.Combine(tessdataDir, tc.SnapshotImage);
|
||||
|
||||
if (!File.Exists(imagePath) || !File.Exists(snapshotPath))
|
||||
{
|
||||
Console.Error.WriteLine($" crop-test: SKIP {tc.Id} — missing files");
|
||||
results.Add(new CropTestResult { Id = tc.Id, IoU = 0 });
|
||||
continue;
|
||||
}
|
||||
|
||||
// Expected region
|
||||
int expX = tc.TopLeft.X;
|
||||
int expY = tc.TopLeft.Y;
|
||||
int expW = tc.BottomRight.X - tc.TopLeft.X;
|
||||
int expH = tc.BottomRight.Y - tc.TopLeft.Y;
|
||||
var expected = new RegionRect { X = expX, Y = expY, Width = expW, Height = expH };
|
||||
|
||||
RegionRect? actual = null;
|
||||
|
||||
if (method is "diff" or "both")
|
||||
{
|
||||
// Load snapshot as reference
|
||||
ocrHandler.HandleSnapshot(new Request { File = snapshotPath });
|
||||
var cropResult = ocrHandler.DiffCrop(new Request { File = imagePath }, diffParams);
|
||||
if (cropResult != null)
|
||||
{
|
||||
var (cropped, refCropped, current, region) = cropResult.Value;
|
||||
actual = region;
|
||||
cropped.Dispose();
|
||||
refCropped.Dispose();
|
||||
current.Dispose();
|
||||
}
|
||||
}
|
||||
|
||||
if (method == "edge")
|
||||
{
|
||||
// Default cursor to center of ground-truth bbox if not specified
|
||||
int cx = tc.CursorX ?? (tc.TopLeft.X + tc.BottomRight.X) / 2;
|
||||
int cy = tc.CursorY ?? (tc.TopLeft.Y + tc.BottomRight.Y) / 2;
|
||||
var cropResult = edgeCropHandler.EdgeCrop(
|
||||
new Request { File = imagePath, CursorX = cx, CursorY = cy },
|
||||
edgeParams);
|
||||
if (cropResult != null)
|
||||
{
|
||||
var (cropped, fullCapture, region) = cropResult.Value;
|
||||
actual = region;
|
||||
cropped.Dispose();
|
||||
fullCapture.Dispose();
|
||||
}
|
||||
}
|
||||
|
||||
// Compute IoU and deltas
|
||||
double iou = 0;
|
||||
int dTop = 0, dLeft = 0, dRight = 0, dBottom = 0;
|
||||
if (actual != null)
|
||||
{
|
||||
int ax1 = actual.X, ay1 = actual.Y;
|
||||
int ax2 = actual.X + actual.Width, ay2 = actual.Y + actual.Height;
|
||||
int ex1 = expX, ey1 = expY, ex2 = tc.BottomRight.X, ey2 = tc.BottomRight.Y;
|
||||
|
||||
int ix1 = Math.Max(ax1, ex1), iy1 = Math.Max(ay1, ey1);
|
||||
int ix2 = Math.Min(ax2, ex2), iy2 = Math.Min(ay2, ey2);
|
||||
int iw = Math.Max(0, ix2 - ix1), ih = Math.Max(0, iy2 - iy1);
|
||||
double intersection = (double)iw * ih;
|
||||
double union = (double)actual.Width * actual.Height + (double)expW * expH - intersection;
|
||||
iou = union > 0 ? intersection / union : 0;
|
||||
|
||||
dTop = ay1 - ey1; // positive = crop starts too low
|
||||
dLeft = ax1 - ex1; // positive = crop starts too far right
|
||||
dRight = ax2 - ex2; // positive = crop ends too far right
|
||||
dBottom = ay2 - ey2; // positive = crop ends too low
|
||||
}
|
||||
|
||||
Console.Error.WriteLine($" crop-test #{tc.Id}: IoU={iou:F3} expected=({expX},{expY})+{expW}x{expH} actual={FormatRegion(actual)} delta T={dTop:+0;-#} L={dLeft:+0;-#} R={dRight:+0;-#} B={dBottom:+0;-#}");
|
||||
|
||||
results.Add(new CropTestResult
|
||||
{
|
||||
Id = tc.Id,
|
||||
IoU = iou,
|
||||
Expected = expected,
|
||||
Actual = actual,
|
||||
DeltaTop = dTop,
|
||||
DeltaLeft = dLeft,
|
||||
DeltaRight = dRight,
|
||||
DeltaBottom = dBottom,
|
||||
});
|
||||
}
|
||||
|
||||
double avgIoU = results.Count > 0 ? results.Average(r => r.IoU) : 0;
|
||||
Console.Error.WriteLine($" crop-test: method={method} avgIoU={avgIoU:F3} ({results.Count} cases)");
|
||||
|
||||
return new CropTestResponse
|
||||
{
|
||||
Method = method,
|
||||
AvgIoU = avgIoU,
|
||||
Results = results,
|
||||
};
|
||||
}
|
||||
|
||||
private static string FormatRegion(RegionRect? r) =>
|
||||
r != null ? $"({r.X},{r.Y})+{r.Width}x{r.Height}" : "null";
|
||||
|
||||
private static void WriteResponse(object response)
|
||||
{
|
||||
var json = JsonSerializer.Serialize(response, JsonOptions);
|
||||
Console.Out.WriteLine(json);
|
||||
Console.Out.Flush();
|
||||
}
|
||||
}
|
||||
|
|
@ -1,190 +0,0 @@
|
|||
namespace OcrDaemon;
|
||||
|
||||
using System.Drawing;
|
||||
using System.Drawing.Imaging;
|
||||
using System.Runtime.InteropServices;
|
||||
|
||||
class DetectGridHandler
|
||||
{
|
||||
public object HandleDetectGrid(Request req)
|
||||
{
|
||||
if (req.Region == null)
|
||||
return new ErrorResponse("detect-grid requires region");
|
||||
|
||||
int minCell = req.MinCellSize > 0 ? req.MinCellSize : 20;
|
||||
int maxCell = req.MaxCellSize > 0 ? req.MaxCellSize : 70;
|
||||
bool debug = req.Debug;
|
||||
|
||||
Bitmap bitmap = ScreenCapture.CaptureOrLoad(req.File, req.Region);
|
||||
int w = bitmap.Width;
|
||||
int h = bitmap.Height;
|
||||
|
||||
var bmpData = bitmap.LockBits(
|
||||
new Rectangle(0, 0, w, h),
|
||||
ImageLockMode.ReadOnly,
|
||||
PixelFormat.Format32bppArgb
|
||||
);
|
||||
byte[] pixels = new byte[bmpData.Stride * h];
|
||||
Marshal.Copy(bmpData.Scan0, pixels, 0, pixels.Length);
|
||||
bitmap.UnlockBits(bmpData);
|
||||
int stride = bmpData.Stride;
|
||||
|
||||
byte[] gray = new byte[w * h];
|
||||
for (int y = 0; y < h; y++)
|
||||
for (int x = 0; x < w; x++)
|
||||
{
|
||||
int i = y * stride + x * 4;
|
||||
gray[y * w + x] = (byte)((pixels[i] + pixels[i + 1] + pixels[i + 2]) / 3);
|
||||
}
|
||||
|
||||
bitmap.Dispose();
|
||||
|
||||
// ── Pass 1: Scan horizontal bands using "very dark pixel density" ──
|
||||
// Grid lines are nearly all very dark (density ~0.9), cell interiors are
|
||||
// partially dark (0.3-0.5), game world is mostly bright (density ~0.05).
|
||||
// This creates clear periodic peaks at grid line positions.
|
||||
int bandH = 200;
|
||||
int bandStep = 40;
|
||||
const int veryDarkPixelThresh = 12; // pixels below this brightness = "very dark"
|
||||
const double gridSegThresh = 0.25; // density above this = potential grid column
|
||||
|
||||
var candidates = new List<(int bandY, int cellW, double hAc, int hLeft, int hRight)>();
|
||||
|
||||
for (int by = 0; by + bandH <= h; by += bandStep)
|
||||
{
|
||||
// "Very dark pixel density" per column: fraction of pixels below threshold
|
||||
double[] darkDensity = new double[w];
|
||||
for (int x = 0; x < w; x++)
|
||||
{
|
||||
int count = 0;
|
||||
for (int y = by; y < by + bandH; y++)
|
||||
{
|
||||
if (gray[y * w + x] < veryDarkPixelThresh) count++;
|
||||
}
|
||||
darkDensity[x] = (double)count / bandH;
|
||||
}
|
||||
|
||||
// Find segments where density > gridSegThresh (grid panel regions)
|
||||
var gridSegs = SignalProcessing.FindDarkDensitySegments(darkDensity, gridSegThresh, 200);
|
||||
|
||||
foreach (var (segLeft, segRight) in gridSegs)
|
||||
{
|
||||
// Extract segment and run AC
|
||||
int segLen = segRight - segLeft;
|
||||
double[] segment = new double[segLen];
|
||||
Array.Copy(darkDensity, segLeft, segment, 0, segLen);
|
||||
|
||||
var (period, acScore) = SignalProcessing.FindPeriodWithScore(segment, minCell, maxCell);
|
||||
|
||||
if (period <= 0) continue;
|
||||
|
||||
// FindGridExtent within the segment
|
||||
var (extLeft, extRight) = SignalProcessing.FindGridExtent(segment, period);
|
||||
if (extLeft < 0) continue;
|
||||
|
||||
// Map back to full image coordinates
|
||||
int absLeft = segLeft + extLeft;
|
||||
int absRight = segLeft + extRight;
|
||||
int extent = absRight - absLeft;
|
||||
|
||||
// Require at least 8 cells wide AND 200px absolute minimum
|
||||
if (extent < period * 8 || extent < 200) continue;
|
||||
|
||||
if (debug) Console.Error.WriteLine(
|
||||
$" Band y={by}: seg=[{segLeft}-{segRight}] period={period}, AC={acScore:F3}, " +
|
||||
$"extent={absLeft}-{absRight}={extent}px ({extent / period} cells)");
|
||||
|
||||
candidates.Add((by, period, acScore, absLeft, absRight));
|
||||
}
|
||||
}
|
||||
|
||||
if (debug) Console.Error.WriteLine($"Pass 1: {candidates.Count} candidates");
|
||||
|
||||
// Sort by score = AC * extent (prefer large strongly-periodic areas)
|
||||
candidates.Sort((a, b) =>
|
||||
{
|
||||
double sa = a.hAc * (a.hRight - a.hLeft);
|
||||
double sb = b.hAc * (b.hRight - b.hLeft);
|
||||
return sb.CompareTo(sa);
|
||||
});
|
||||
|
||||
// ── Pass 2: Verify vertical periodicity ──
|
||||
foreach (var cand in candidates.Take(10))
|
||||
{
|
||||
int colSpan = cand.hRight - cand.hLeft;
|
||||
if (colSpan < cand.cellW * 3) continue;
|
||||
|
||||
// Row "very dark pixel density" within the detected column range
|
||||
double[] rowDensity = new double[h];
|
||||
for (int y = 0; y < h; y++)
|
||||
{
|
||||
int count = 0;
|
||||
for (int x = cand.hLeft; x < cand.hRight; x++)
|
||||
{
|
||||
if (gray[y * w + x] < veryDarkPixelThresh) count++;
|
||||
}
|
||||
rowDensity[y] = (double)count / colSpan;
|
||||
}
|
||||
|
||||
// Find grid panel vertical segment
|
||||
var vGridSegs = SignalProcessing.FindDarkDensitySegments(rowDensity, gridSegThresh, 100);
|
||||
if (vGridSegs.Count == 0) continue;
|
||||
|
||||
// Use the largest segment
|
||||
var (vSegTop, vSegBottom) = vGridSegs.OrderByDescending(s => s.end - s.start).First();
|
||||
int vSegLen = vSegBottom - vSegTop;
|
||||
double[] vSegment = new double[vSegLen];
|
||||
Array.Copy(rowDensity, vSegTop, vSegment, 0, vSegLen);
|
||||
|
||||
var (cellH, vAc) = SignalProcessing.FindPeriodWithScore(vSegment, minCell, maxCell);
|
||||
if (cellH <= 0) continue;
|
||||
|
||||
var (extTop, extBottom) = SignalProcessing.FindGridExtent(vSegment, cellH);
|
||||
if (extTop < 0) continue;
|
||||
|
||||
int top = vSegTop + extTop;
|
||||
int bottom = vSegTop + extBottom;
|
||||
int vExtent = bottom - top;
|
||||
|
||||
// Require at least 3 rows tall AND 100px absolute minimum
|
||||
if (vExtent < cellH * 3 || vExtent < 100) continue;
|
||||
|
||||
if (debug) Console.Error.WriteLine(
|
||||
$" 2D candidate: cellW={cand.cellW}, cellH={cellH}, " +
|
||||
$"region=({cand.hLeft},{top})-({cand.hRight},{bottom}), " +
|
||||
$"vAC={vAc:F3}, extent={vExtent}px ({vExtent / cellH} rows)");
|
||||
|
||||
// ── Found a valid 2D grid ──
|
||||
int gridW = cand.hRight - cand.hLeft;
|
||||
int gridH = bottom - top;
|
||||
int cols = Math.Max(2, (int)Math.Round((double)gridW / cand.cellW));
|
||||
int rows = Math.Max(2, (int)Math.Round((double)gridH / cellH));
|
||||
|
||||
// Snap grid dimensions to exact multiples of cell size
|
||||
gridW = cols * cand.cellW;
|
||||
gridH = rows * cellH;
|
||||
|
||||
if (debug) Console.Error.WriteLine(
|
||||
$" => cols={cols}, rows={rows}, gridW={gridW}, gridH={gridH}");
|
||||
|
||||
return new DetectGridResponse
|
||||
{
|
||||
Detected = true,
|
||||
Region = new RegionRect
|
||||
{
|
||||
X = req.Region.X + cand.hLeft,
|
||||
Y = req.Region.Y + top,
|
||||
Width = gridW,
|
||||
Height = gridH,
|
||||
},
|
||||
Cols = cols,
|
||||
Rows = rows,
|
||||
CellWidth = Math.Round((double)gridW / cols, 1),
|
||||
CellHeight = Math.Round((double)gridH / rows, 1),
|
||||
};
|
||||
}
|
||||
|
||||
if (debug) Console.Error.WriteLine(" No valid 2D grid found");
|
||||
return new DetectGridResponse { Detected = false };
|
||||
}
|
||||
}
|
||||
|
|
@ -1,244 +0,0 @@
|
|||
namespace OcrDaemon;
|
||||
|
||||
using System.Drawing;
|
||||
using System.Drawing.Imaging;
|
||||
using System.Runtime.InteropServices;
|
||||
|
||||
class EdgeCropHandler
|
||||
{
|
||||
[StructLayout(LayoutKind.Sequential)]
|
||||
private struct POINT { public int X, Y; }
|
||||
|
||||
[DllImport("user32.dll")]
|
||||
private static extern bool GetCursorPos(out POINT lpPoint);
|
||||
|
||||
public (Bitmap cropped, Bitmap fullCapture, RegionRect region)? EdgeCrop(Request req, EdgeCropParams p)
|
||||
{
|
||||
int cursorX, cursorY;
|
||||
if (req.CursorX.HasValue && req.CursorY.HasValue)
|
||||
{
|
||||
cursorX = req.CursorX.Value;
|
||||
cursorY = req.CursorY.Value;
|
||||
}
|
||||
else
|
||||
{
|
||||
GetCursorPos(out var pt);
|
||||
cursorX = pt.X;
|
||||
cursorY = pt.Y;
|
||||
}
|
||||
|
||||
var fullCapture = ScreenCapture.CaptureOrLoad(req.File, null);
|
||||
int w = fullCapture.Width;
|
||||
int h = fullCapture.Height;
|
||||
|
||||
cursorX = Math.Clamp(cursorX, 0, w - 1);
|
||||
cursorY = Math.Clamp(cursorY, 0, h - 1);
|
||||
|
||||
var bmpData = fullCapture.LockBits(new Rectangle(0, 0, w, h), ImageLockMode.ReadOnly, PixelFormat.Format32bppArgb);
|
||||
byte[] px = new byte[bmpData.Stride * h];
|
||||
Marshal.Copy(bmpData.Scan0, px, 0, px.Length);
|
||||
fullCapture.UnlockBits(bmpData);
|
||||
int stride = bmpData.Stride;
|
||||
|
||||
int darkThresh = p.DarkThresh;
|
||||
int colGap = p.RunGapTolerance;
|
||||
int maxGap = p.MaxGap;
|
||||
|
||||
// ── Phase 1: Per-row horizontal extent ──
|
||||
// Scan left/right from cursorX per row. Gap tolerance bridges through text.
|
||||
// Percentile-based filtering for robustness.
|
||||
int bandHalf = p.MinDarkRun; // repurpose: half-height of horizontal scan band
|
||||
int bandTop = Math.Max(0, cursorY - bandHalf);
|
||||
int bandBot = Math.Min(h - 1, cursorY + bandHalf);
|
||||
|
||||
var leftExtents = new List<int>();
|
||||
var rightExtents = new List<int>();
|
||||
|
||||
for (int y = bandTop; y <= bandBot; y++)
|
||||
{
|
||||
int rowOff = y * stride;
|
||||
int seedX = FindDarkSeedInRow(px, stride, w, rowOff, cursorX, darkThresh, seedRadius: 6);
|
||||
if (seedX < 0) continue;
|
||||
|
||||
int leftEdge = seedX;
|
||||
int gap = 0;
|
||||
for (int x = seedX - 1; x >= 0; x--)
|
||||
{
|
||||
int i = rowOff + x * 4;
|
||||
int brightness = (px[i] + px[i + 1] + px[i + 2]) / 3;
|
||||
if (brightness < darkThresh) { leftEdge = x; gap = 0; }
|
||||
else if (++gap > colGap) break;
|
||||
}
|
||||
|
||||
int rightEdge = seedX;
|
||||
gap = 0;
|
||||
for (int x = seedX + 1; x < w; x++)
|
||||
{
|
||||
int i = rowOff + x * 4;
|
||||
int brightness = (px[i] + px[i + 1] + px[i + 2]) / 3;
|
||||
if (brightness < darkThresh) { rightEdge = x; gap = 0; }
|
||||
else if (++gap > colGap) break;
|
||||
}
|
||||
|
||||
leftExtents.Add(leftEdge);
|
||||
rightExtents.Add(rightEdge);
|
||||
}
|
||||
|
||||
if (leftExtents.Count < 10)
|
||||
{
|
||||
Console.Error.WriteLine($" edge-crop: too few dark rows ({leftExtents.Count})");
|
||||
fullCapture.Dispose();
|
||||
return null;
|
||||
}
|
||||
|
||||
leftExtents.Sort();
|
||||
rightExtents.Sort();
|
||||
|
||||
// Use RowThreshDiv/ColThreshDiv as percentile denominators
|
||||
// e.g., RowThreshDiv=4 → 25th percentile for left, ColThreshDiv=4 → 75th for right
|
||||
int leftPctIdx = leftExtents.Count / p.RowThreshDiv;
|
||||
int rightPctIdx = rightExtents.Count * (p.ColThreshDiv - 1) / p.ColThreshDiv;
|
||||
leftPctIdx = Math.Clamp(leftPctIdx, 0, leftExtents.Count - 1);
|
||||
rightPctIdx = Math.Clamp(rightPctIdx, 0, rightExtents.Count - 1);
|
||||
|
||||
int bestColStart = leftExtents[leftPctIdx];
|
||||
int bestColEnd = rightExtents[rightPctIdx];
|
||||
|
||||
Console.Error.WriteLine($" edge-crop: horizontal: left={bestColStart} right={bestColEnd} ({bestColEnd - bestColStart + 1}px) samples={leftExtents.Count} pctL={leftPctIdx}/{leftExtents.Count} pctR={rightPctIdx}/{rightExtents.Count}");
|
||||
|
||||
if (bestColEnd - bestColStart + 1 < 50)
|
||||
{
|
||||
Console.Error.WriteLine($" edge-crop: horizontal extent too small");
|
||||
fullCapture.Dispose();
|
||||
return null;
|
||||
}
|
||||
|
||||
// ── Phase 2: Per-column vertical extent ──
|
||||
int colBandHalf = (bestColEnd - bestColStart + 1) / 3;
|
||||
int colBandLeft = Math.Max(bestColStart, cursorX - colBandHalf);
|
||||
int colBandRight = Math.Min(bestColEnd, cursorX + colBandHalf);
|
||||
|
||||
var topExtents = new List<int>();
|
||||
var bottomExtents = new List<int>();
|
||||
|
||||
// Asymmetric gap: larger upward to bridge header decorations (~30-40px bright)
|
||||
int maxGapUp = maxGap * 3;
|
||||
|
||||
for (int x = colBandLeft; x <= colBandRight; x++)
|
||||
{
|
||||
int seedY = FindDarkSeedInColumn(px, stride, h, x, cursorY, darkThresh, seedRadius: 6);
|
||||
if (seedY < 0) continue;
|
||||
|
||||
int topEdge = seedY;
|
||||
int gap = 0;
|
||||
for (int y = seedY - 1; y >= 0; y--)
|
||||
{
|
||||
int i = y * stride + x * 4;
|
||||
int brightness = (px[i] + px[i + 1] + px[i + 2]) / 3;
|
||||
if (brightness < darkThresh) { topEdge = y; gap = 0; }
|
||||
else if (++gap > maxGapUp) break;
|
||||
}
|
||||
|
||||
int bottomEdge = seedY;
|
||||
gap = 0;
|
||||
for (int y = seedY + 1; y < h; y++)
|
||||
{
|
||||
int i = y * stride + x * 4;
|
||||
int brightness = (px[i] + px[i + 1] + px[i + 2]) / 3;
|
||||
if (brightness < darkThresh) { bottomEdge = y; gap = 0; }
|
||||
else if (++gap > maxGap) break;
|
||||
}
|
||||
|
||||
topExtents.Add(topEdge);
|
||||
bottomExtents.Add(bottomEdge);
|
||||
}
|
||||
|
||||
if (topExtents.Count < 10)
|
||||
{
|
||||
Console.Error.WriteLine($" edge-crop: too few dark columns ({topExtents.Count})");
|
||||
fullCapture.Dispose();
|
||||
return null;
|
||||
}
|
||||
|
||||
topExtents.Sort();
|
||||
bottomExtents.Sort();
|
||||
|
||||
int topPctIdx = topExtents.Count / p.RowThreshDiv;
|
||||
int botPctIdx = topExtents.Count * (p.ColThreshDiv - 1) / p.ColThreshDiv;
|
||||
topPctIdx = Math.Clamp(topPctIdx, 0, topExtents.Count - 1);
|
||||
botPctIdx = Math.Clamp(botPctIdx, 0, bottomExtents.Count - 1);
|
||||
|
||||
int bestRowStart = topExtents[topPctIdx];
|
||||
int bestRowEnd = bottomExtents[botPctIdx];
|
||||
|
||||
Console.Error.WriteLine($" edge-crop: vertical: top={bestRowStart} bottom={bestRowEnd} ({bestRowEnd - bestRowStart + 1}px) samples={topExtents.Count}");
|
||||
|
||||
if (bestRowEnd - bestRowStart + 1 < 50)
|
||||
{
|
||||
Console.Error.WriteLine($" edge-crop: vertical extent too small");
|
||||
fullCapture.Dispose();
|
||||
return null;
|
||||
}
|
||||
|
||||
int minX = bestColStart;
|
||||
int minY = bestRowStart;
|
||||
int maxX = bestColEnd;
|
||||
int maxY = bestRowEnd;
|
||||
|
||||
int rw = maxX - minX + 1;
|
||||
int rh = maxY - minY + 1;
|
||||
|
||||
Console.Error.WriteLine($" edge-crop: result ({minX},{minY}) {rw}x{rh}");
|
||||
|
||||
if (rw < 50 || rh < 50)
|
||||
{
|
||||
Console.Error.WriteLine($" edge-crop: region too small ({rw}x{rh})");
|
||||
fullCapture.Dispose();
|
||||
return null;
|
||||
}
|
||||
|
||||
var cropRect = new Rectangle(minX, minY, rw, rh);
|
||||
var cropped = fullCapture.Clone(cropRect, PixelFormat.Format32bppArgb);
|
||||
var region = new RegionRect { X = minX, Y = minY, Width = rw, Height = rh };
|
||||
|
||||
return (cropped, fullCapture, region);
|
||||
}
|
||||
|
||||
private static int FindDarkSeedInRow(byte[] px, int stride, int w, int rowOff, int cursorX, int darkThresh, int seedRadius)
|
||||
{
|
||||
int maxR = Math.Min(seedRadius, Math.Min(cursorX, w - 1 - cursorX));
|
||||
for (int r = 0; r <= maxR; r++)
|
||||
{
|
||||
int x1 = cursorX - r;
|
||||
int i1 = rowOff + x1 * 4;
|
||||
int b1 = (px[i1] + px[i1 + 1] + px[i1 + 2]) / 3;
|
||||
if (b1 < darkThresh) return x1;
|
||||
|
||||
int x2 = cursorX + r;
|
||||
int i2 = rowOff + x2 * 4;
|
||||
int b2 = (px[i2] + px[i2 + 1] + px[i2 + 2]) / 3;
|
||||
if (b2 < darkThresh) return x2;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
private static int FindDarkSeedInColumn(byte[] px, int stride, int h, int x, int cursorY, int darkThresh, int seedRadius)
|
||||
{
|
||||
int maxR = Math.Min(seedRadius, Math.Min(cursorY, h - 1 - cursorY));
|
||||
for (int r = 0; r <= maxR; r++)
|
||||
{
|
||||
int y1 = cursorY - r;
|
||||
int i1 = y1 * stride + x * 4;
|
||||
int b1 = (px[i1] + px[i1 + 1] + px[i1 + 2]) / 3;
|
||||
if (b1 < darkThresh) return y1;
|
||||
|
||||
int y2 = cursorY + r;
|
||||
int i2 = y2 * stride + x * 4;
|
||||
int b2 = (px[i2] + px[i2 + 1] + px[i2 + 2]) / 3;
|
||||
if (b2 < darkThresh) return y2;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
|
@ -1,357 +0,0 @@
|
|||
namespace OcrDaemon;
|
||||
|
||||
using System.Drawing;
|
||||
using System.Drawing.Imaging;
|
||||
using System.Runtime.InteropServices;
|
||||
|
||||
class GridHandler
|
||||
{
|
||||
// Pre-loaded empty cell templates (loaded lazily on first grid scan)
|
||||
private byte[]? _emptyTemplate70Gray;
|
||||
private byte[]? _emptyTemplate70Argb;
|
||||
private int _emptyTemplate70W, _emptyTemplate70H, _emptyTemplate70Stride;
|
||||
private byte[]? _emptyTemplate35Gray;
|
||||
private byte[]? _emptyTemplate35Argb;
|
||||
private int _emptyTemplate35W, _emptyTemplate35H, _emptyTemplate35Stride;
|
||||
|
||||
public object HandleGrid(Request req)
|
||||
{
|
||||
if (req.Region == null || req.Cols <= 0 || req.Rows <= 0)
|
||||
return new ErrorResponse("grid command requires region, cols, rows");
|
||||
|
||||
LoadTemplatesIfNeeded();
|
||||
|
||||
using var bitmap = ScreenCapture.CaptureOrLoad(req.File, req.Region);
|
||||
int cols = req.Cols;
|
||||
int rows = req.Rows;
|
||||
float cellW = (float)bitmap.Width / cols;
|
||||
float cellH = (float)bitmap.Height / rows;
|
||||
|
||||
// Pick the right empty template based on cell size
|
||||
int nominalCell = (int)Math.Round(cellW);
|
||||
byte[]? templateGray;
|
||||
byte[]? templateArgb;
|
||||
int templateW, templateH, templateStride;
|
||||
if (nominalCell <= 40 && _emptyTemplate35Gray != null)
|
||||
{
|
||||
templateGray = _emptyTemplate35Gray;
|
||||
templateArgb = _emptyTemplate35Argb!;
|
||||
templateW = _emptyTemplate35W;
|
||||
templateH = _emptyTemplate35H;
|
||||
templateStride = _emptyTemplate35Stride;
|
||||
}
|
||||
else if (_emptyTemplate70Gray != null)
|
||||
{
|
||||
templateGray = _emptyTemplate70Gray;
|
||||
templateArgb = _emptyTemplate70Argb!;
|
||||
templateW = _emptyTemplate70W;
|
||||
templateH = _emptyTemplate70H;
|
||||
templateStride = _emptyTemplate70Stride;
|
||||
}
|
||||
else
|
||||
{
|
||||
return new ErrorResponse("Empty cell templates not found in assets/");
|
||||
}
|
||||
|
||||
// Convert captured bitmap to grayscale + keep ARGB for border color comparison
|
||||
var (captureGray, captureArgb, captureStride) = ImageUtils.BitmapToGrayAndArgb(bitmap);
|
||||
int captureW = bitmap.Width;
|
||||
|
||||
// Border to skip (outer pixels may differ between cells)
|
||||
int border = Math.Max(2, nominalCell / 10);
|
||||
|
||||
// Pre-compute template average for the inner region
|
||||
long templateSum = 0;
|
||||
int innerCount = 0;
|
||||
for (int ty = border; ty < templateH - border; ty++)
|
||||
for (int tx = border; tx < templateW - border; tx++)
|
||||
{
|
||||
templateSum += templateGray[ty * templateW + tx];
|
||||
innerCount++;
|
||||
}
|
||||
double tmplMean = innerCount > 0 ? (double)templateSum / innerCount : 0;
|
||||
|
||||
// Threshold for brightness-normalized MAD
|
||||
double diffThreshold = req.Threshold > 0 ? req.Threshold : 5;
|
||||
bool debug = req.Debug;
|
||||
|
||||
if (debug) Console.Error.WriteLine($"Grid: {cols}x{rows}, cellW={cellW:F1}, cellH={cellH:F1}, border={border}, threshold={diffThreshold}, tmplMean={tmplMean:F1}");
|
||||
|
||||
var cells = new List<List<bool>>();
|
||||
for (int row = 0; row < rows; row++)
|
||||
{
|
||||
var rowList = new List<bool>();
|
||||
var debugDiffs = new List<string>();
|
||||
for (int col = 0; col < cols; col++)
|
||||
{
|
||||
int cx0 = (int)(col * cellW);
|
||||
int cy0 = (int)(row * cellH);
|
||||
int cw = (int)Math.Min(cellW, captureW - cx0);
|
||||
int ch = (int)Math.Min(cellH, bitmap.Height - cy0);
|
||||
|
||||
int innerW = Math.Min(cw, templateW) - border;
|
||||
int innerH = Math.Min(ch, templateH) - border;
|
||||
|
||||
// First pass: compute cell region mean brightness
|
||||
long cellSum = 0;
|
||||
int compared = 0;
|
||||
for (int py = border; py < innerH; py++)
|
||||
for (int px = border; px < innerW; px++)
|
||||
{
|
||||
cellSum += captureGray[(cy0 + py) * captureW + (cx0 + px)];
|
||||
compared++;
|
||||
}
|
||||
double cellMean = compared > 0 ? (double)cellSum / compared : 0;
|
||||
double offset = cellMean - tmplMean;
|
||||
|
||||
// Second pass: MAD on brightness-normalized values
|
||||
long diffSum = 0;
|
||||
for (int py = border; py < innerH; py++)
|
||||
for (int px = border; px < innerW; px++)
|
||||
{
|
||||
double cellVal = captureGray[(cy0 + py) * captureW + (cx0 + px)];
|
||||
double tmplVal = templateGray[py * templateW + px];
|
||||
diffSum += (long)Math.Abs(cellVal - tmplVal - offset);
|
||||
}
|
||||
double meanDiff = compared > 0 ? (double)diffSum / compared : 0;
|
||||
bool occupied = meanDiff > diffThreshold;
|
||||
rowList.Add(occupied);
|
||||
if (debug) debugDiffs.Add($"{meanDiff,5:F1}{(occupied ? "*" : " ")}");
|
||||
}
|
||||
cells.Add(rowList);
|
||||
if (debug) Console.Error.WriteLine($" Row {row,2}: {string.Join(" ", debugDiffs)}");
|
||||
}
|
||||
|
||||
// ── Item detection: compare border pixels to empty template (grayscale) ──
|
||||
// Items have a colored tint behind them that shows through grid lines.
|
||||
// Compare each cell's border strip against the template's border pixels.
|
||||
// If they differ → item tint present → cells belong to same item.
|
||||
int[] parent = new int[rows * cols];
|
||||
for (int i = 0; i < parent.Length; i++) parent[i] = i;
|
||||
|
||||
int Find(int x) { while (parent[x] != x) { parent[x] = parent[parent[x]]; x = parent[x]; } return x; }
|
||||
void Union(int a, int b) { parent[Find(a)] = Find(b); }
|
||||
|
||||
int stripWidth = Math.Max(2, border / 2);
|
||||
int stripInset = (int)(cellW * 0.15);
|
||||
double borderDiffThresh = 15.0;
|
||||
|
||||
for (int row = 0; row < rows; row++)
|
||||
{
|
||||
for (int col = 0; col < cols; col++)
|
||||
{
|
||||
if (!cells[row][col]) continue;
|
||||
int cx0 = (int)(col * cellW);
|
||||
int cy0 = (int)(row * cellH);
|
||||
|
||||
// Check right neighbor
|
||||
if (col + 1 < cols && cells[row][col + 1])
|
||||
{
|
||||
long diffSum = 0; int cnt = 0;
|
||||
int xStart = (int)((col + 1) * cellW) - stripWidth;
|
||||
int yFrom = cy0 + stripInset;
|
||||
int yTo = (int)((row + 1) * cellH) - stripInset;
|
||||
for (int sy = yFrom; sy < yTo; sy += 2)
|
||||
{
|
||||
int tmplY = sy - cy0;
|
||||
for (int sx = xStart; sx < xStart + stripWidth * 2; sx++)
|
||||
{
|
||||
if (sx < 0 || sx >= captureW) continue;
|
||||
int tmplX = sx - cx0;
|
||||
if (tmplX < 0 || tmplX >= templateW) continue;
|
||||
diffSum += Math.Abs(captureGray[sy * captureW + sx] - templateGray[tmplY * templateW + tmplX]);
|
||||
cnt++;
|
||||
}
|
||||
}
|
||||
double meanDiff = cnt > 0 ? (double)diffSum / cnt : 0;
|
||||
if (debug) Console.Error.WriteLine($" H ({row},{col})->({row},{col+1}): {meanDiff:F1}{(meanDiff > borderDiffThresh ? " SAME" : "")}");
|
||||
if (meanDiff > borderDiffThresh)
|
||||
Union(row * cols + col, row * cols + col + 1);
|
||||
}
|
||||
|
||||
// Check bottom neighbor
|
||||
if (row + 1 < rows && cells[row + 1][col])
|
||||
{
|
||||
long diffSum = 0; int cnt = 0;
|
||||
int yStart = (int)((row + 1) * cellH) - stripWidth;
|
||||
int xFrom = cx0 + stripInset;
|
||||
int xTo = (int)((col + 1) * cellW) - stripInset;
|
||||
for (int sx = xFrom; sx < xTo; sx += 2)
|
||||
{
|
||||
int tmplX = sx - cx0;
|
||||
for (int sy = yStart; sy < yStart + stripWidth * 2; sy++)
|
||||
{
|
||||
if (sy < 0 || sy >= bitmap.Height) continue;
|
||||
int tmplY = sy - cy0;
|
||||
if (tmplY < 0 || tmplY >= templateH) continue;
|
||||
diffSum += Math.Abs(captureGray[sy * captureW + sx] - templateGray[tmplY * templateW + tmplX]);
|
||||
cnt++;
|
||||
}
|
||||
}
|
||||
double meanDiff = cnt > 0 ? (double)diffSum / cnt : 0;
|
||||
if (debug) Console.Error.WriteLine($" V ({row},{col})->({row+1},{col}): {meanDiff:F1}{(meanDiff > borderDiffThresh ? " SAME" : "")}");
|
||||
if (meanDiff > borderDiffThresh)
|
||||
Union(row * cols + col, (row + 1) * cols + col);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Extract items from union-find groups
|
||||
var groups = new Dictionary<int, List<(int row, int col)>>();
|
||||
for (int row = 0; row < rows; row++)
|
||||
for (int col = 0; col < cols; col++)
|
||||
if (cells[row][col])
|
||||
{
|
||||
int root = Find(row * cols + col);
|
||||
if (!groups.ContainsKey(root)) groups[root] = [];
|
||||
groups[root].Add((row, col));
|
||||
}
|
||||
|
||||
var items = new List<GridItem>();
|
||||
foreach (var group in groups.Values)
|
||||
{
|
||||
int minR = group.Min(c => c.row);
|
||||
int maxR = group.Max(c => c.row);
|
||||
int minC = group.Min(c => c.col);
|
||||
int maxC = group.Max(c => c.col);
|
||||
items.Add(new GridItem { Row = minR, Col = minC, W = maxC - minC + 1, H = maxR - minR + 1 });
|
||||
}
|
||||
|
||||
if (debug)
|
||||
{
|
||||
Console.Error.WriteLine($" Items found: {items.Count}");
|
||||
foreach (var item in items)
|
||||
Console.Error.WriteLine($" ({item.Row},{item.Col}) {item.W}x{item.H}");
|
||||
}
|
||||
|
||||
// ── Visual matching: find cells similar to target ──
|
||||
List<GridMatch>? matches = null;
|
||||
if (req.TargetRow >= 0 && req.TargetCol >= 0 &&
|
||||
req.TargetRow < rows && req.TargetCol < cols &&
|
||||
cells[req.TargetRow][req.TargetCol])
|
||||
{
|
||||
matches = FindMatchingCells(
|
||||
captureGray, captureW, bitmap.Height,
|
||||
cells, rows, cols, cellW, cellH, border,
|
||||
req.TargetRow, req.TargetCol, debug);
|
||||
}
|
||||
|
||||
return new GridResponse { Cells = cells, Items = items, Matches = matches };
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Find all occupied cells visually similar to the target cell using full-resolution NCC.
|
||||
/// </summary>
|
||||
private List<GridMatch> FindMatchingCells(
|
||||
byte[] gray, int imgW, int imgH,
|
||||
List<List<bool>> cells, int rows, int cols,
|
||||
float cellW, float cellH, int border,
|
||||
int targetRow, int targetCol, bool debug)
|
||||
{
|
||||
int innerW = (int)cellW - border * 2;
|
||||
int innerH = (int)cellH - border * 2;
|
||||
if (innerW <= 4 || innerH <= 4) return [];
|
||||
|
||||
int tCx0 = (int)(targetCol * cellW) + border;
|
||||
int tCy0 = (int)(targetRow * cellH) + border;
|
||||
int tInnerW = Math.Min(innerW, imgW - tCx0);
|
||||
int tInnerH = Math.Min(innerH, imgH - tCy0);
|
||||
if (tInnerW < innerW || tInnerH < innerH) return [];
|
||||
|
||||
int n = innerW * innerH;
|
||||
|
||||
// Pre-compute target cell pixels and stats
|
||||
double[] targetPixels = new double[n];
|
||||
double tMean = 0;
|
||||
for (int py = 0; py < innerH; py++)
|
||||
for (int px = 0; px < innerW; px++)
|
||||
{
|
||||
double v = gray[(tCy0 + py) * imgW + (tCx0 + px)];
|
||||
targetPixels[py * innerW + px] = v;
|
||||
tMean += v;
|
||||
}
|
||||
tMean /= n;
|
||||
|
||||
double tStd = 0;
|
||||
for (int i = 0; i < n; i++)
|
||||
tStd += (targetPixels[i] - tMean) * (targetPixels[i] - tMean);
|
||||
tStd = Math.Sqrt(tStd / n);
|
||||
|
||||
if (debug) Console.Error.WriteLine($" Match target ({targetRow},{targetCol}): {innerW}x{innerH} ({n}px), mean={tMean:F1}, std={tStd:F1}");
|
||||
if (tStd < 3.0) return [];
|
||||
|
||||
double matchThreshold = 0.70;
|
||||
var matches = new List<GridMatch>();
|
||||
|
||||
for (int row = 0; row < rows; row++)
|
||||
{
|
||||
for (int col = 0; col < cols; col++)
|
||||
{
|
||||
if (!cells[row][col]) continue;
|
||||
if (row == targetRow && col == targetCol) continue;
|
||||
|
||||
int cx0 = (int)(col * cellW) + border;
|
||||
int cy0 = (int)(row * cellH) + border;
|
||||
int cInnerW = Math.Min(innerW, imgW - cx0);
|
||||
int cInnerH = Math.Min(innerH, imgH - cy0);
|
||||
if (cInnerW < innerW || cInnerH < innerH) continue;
|
||||
|
||||
// Compute NCC at full resolution
|
||||
double cMean = 0;
|
||||
for (int py = 0; py < innerH; py++)
|
||||
for (int px = 0; px < innerW; px++)
|
||||
cMean += gray[(cy0 + py) * imgW + (cx0 + px)];
|
||||
cMean /= n;
|
||||
|
||||
double cStd = 0, cross = 0;
|
||||
for (int py = 0; py < innerH; py++)
|
||||
for (int px = 0; px < innerW; px++)
|
||||
{
|
||||
double cv = gray[(cy0 + py) * imgW + (cx0 + px)] - cMean;
|
||||
double tv = targetPixels[py * innerW + px] - tMean;
|
||||
cStd += cv * cv;
|
||||
cross += tv * cv;
|
||||
}
|
||||
cStd = Math.Sqrt(cStd / n);
|
||||
|
||||
double ncc = (tStd > 0 && cStd > 0) ? cross / (n * tStd * cStd) : 0;
|
||||
|
||||
if (debug && ncc > 0.5)
|
||||
Console.Error.WriteLine($" ({row},{col}): NCC={ncc:F3}");
|
||||
|
||||
if (ncc >= matchThreshold)
|
||||
matches.Add(new GridMatch { Row = row, Col = col, Similarity = Math.Round(ncc, 3) });
|
||||
}
|
||||
}
|
||||
|
||||
if (debug) Console.Error.WriteLine($" Matches for ({targetRow},{targetCol}): {matches.Count}");
|
||||
return matches;
|
||||
}
|
||||
|
||||
private void LoadTemplatesIfNeeded()
|
||||
{
|
||||
if (_emptyTemplate70Gray != null) return;
|
||||
|
||||
// Look for templates relative to exe directory
|
||||
var exeDir = AppContext.BaseDirectory;
|
||||
// Templates are in assets/ at project root — walk up from bin/Release/net8.0-.../
|
||||
var projectRoot = Path.GetFullPath(Path.Combine(exeDir, "..", "..", "..", "..", ".."));
|
||||
var t70Path = Path.Combine(projectRoot, "assets", "empty70.png");
|
||||
var t35Path = Path.Combine(projectRoot, "assets", "empty35.png");
|
||||
|
||||
if (File.Exists(t70Path))
|
||||
{
|
||||
using var bmp = new Bitmap(t70Path);
|
||||
_emptyTemplate70W = bmp.Width;
|
||||
_emptyTemplate70H = bmp.Height;
|
||||
(_emptyTemplate70Gray, _emptyTemplate70Argb, _emptyTemplate70Stride) = ImageUtils.BitmapToGrayAndArgb(bmp);
|
||||
}
|
||||
if (File.Exists(t35Path))
|
||||
{
|
||||
using var bmp = new Bitmap(t35Path);
|
||||
_emptyTemplate35W = bmp.Width;
|
||||
_emptyTemplate35H = bmp.Height;
|
||||
(_emptyTemplate35Gray, _emptyTemplate35Argb, _emptyTemplate35Stride) = ImageUtils.BitmapToGrayAndArgb(bmp);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,234 +0,0 @@
|
|||
namespace OcrDaemon;
|
||||
|
||||
using System.Drawing;
|
||||
using OpenCvSharp;
|
||||
using OpenCvSharp.Extensions;
|
||||
|
||||
static class ImagePreprocessor
|
||||
{
|
||||
/// <summary>
|
||||
/// Pre-process an image for OCR using morphological white top-hat filtering.
|
||||
/// Isolates bright tooltip text, suppresses dim background text visible through overlay.
|
||||
/// Pipeline: grayscale → morphological top-hat → Otsu binary → upscale
|
||||
/// </summary>
|
||||
public static Bitmap PreprocessForOcr(Bitmap src, int kernelSize = 41, int upscale = 2)
|
||||
{
|
||||
using var mat = BitmapConverter.ToMat(src);
|
||||
using var gray = new Mat();
|
||||
Cv2.CvtColor(mat, gray, ColorConversionCodes.BGRA2GRAY);
|
||||
|
||||
// Morphological white top-hat: isolates bright text on dark background
|
||||
using var kernel = Cv2.GetStructuringElement(MorphShapes.Rect, new OpenCvSharp.Size(kernelSize, kernelSize));
|
||||
using var tophat = new Mat();
|
||||
Cv2.MorphologyEx(gray, tophat, MorphTypes.TopHat, kernel);
|
||||
|
||||
// Otsu binarization: automatic threshold, black text on white
|
||||
using var binary = new Mat();
|
||||
Cv2.Threshold(tophat, binary, 0, 255, ThresholdTypes.BinaryInv | ThresholdTypes.Otsu);
|
||||
|
||||
// Upscale for better LSTM recognition
|
||||
if (upscale > 1)
|
||||
{
|
||||
using var upscaled = new Mat();
|
||||
Cv2.Resize(binary, upscaled, new OpenCvSharp.Size(binary.Width * upscale, binary.Height * upscale),
|
||||
interpolation: InterpolationFlags.Cubic);
|
||||
return BitmapConverter.ToBitmap(upscaled);
|
||||
}
|
||||
|
||||
return BitmapConverter.ToBitmap(binary);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Background-subtraction preprocessing: uses the reference frame to remove
|
||||
/// background bleed-through from the semi-transparent tooltip overlay.
|
||||
/// Pipeline: estimate dimming factor → subtract expected background → threshold → upscale
|
||||
/// Returns the upscaled binary Mat directly (caller must dispose).
|
||||
/// </summary>
|
||||
public static Mat PreprocessWithBackgroundSubMat(Bitmap tooltipCrop, Bitmap referenceCrop,
|
||||
int dimPercentile = 25, int textThresh = 30, int upscale = 2, bool softThreshold = true)
|
||||
{
|
||||
using var curMat = BitmapConverter.ToMat(tooltipCrop);
|
||||
using var refMat = BitmapConverter.ToMat(referenceCrop);
|
||||
using var curGray = new Mat();
|
||||
using var refGray = new Mat();
|
||||
Cv2.CvtColor(curMat, curGray, ColorConversionCodes.BGRA2GRAY);
|
||||
Cv2.CvtColor(refMat, refGray, ColorConversionCodes.BGRA2GRAY);
|
||||
|
||||
int rows = curGray.Rows, cols = curGray.Cols;
|
||||
|
||||
// Estimate the dimming factor of the tooltip overlay.
|
||||
// For non-text pixels: current ≈ reference × dim_factor
|
||||
// Collect ratios where reference is bright enough to be meaningful
|
||||
var ratios = new List<double>();
|
||||
unsafe
|
||||
{
|
||||
byte* curPtr = (byte*)curGray.Data;
|
||||
byte* refPtr = (byte*)refGray.Data;
|
||||
int curStep = (int)curGray.Step();
|
||||
int refStep = (int)refGray.Step();
|
||||
|
||||
for (int y = 0; y < rows; y++)
|
||||
for (int x = 0; x < cols; x++)
|
||||
{
|
||||
byte r = refPtr[y * refStep + x];
|
||||
byte c = curPtr[y * curStep + x];
|
||||
if (r > 30) // skip very dark reference pixels (no signal)
|
||||
ratios.Add((double)c / r);
|
||||
}
|
||||
}
|
||||
|
||||
if (ratios.Count == 0)
|
||||
{
|
||||
// Fallback: use top-hat preprocessing, convert to Mat
|
||||
using var fallbackBmp = PreprocessForOcr(tooltipCrop, 41, upscale);
|
||||
return BitmapConverter.ToMat(fallbackBmp);
|
||||
}
|
||||
|
||||
// Use a low percentile of ratios as the dimming factor.
|
||||
// Text pixels have high ratios (bright on dark), overlay pixels have low ratios.
|
||||
// A low percentile captures the overlay dimming, ignoring text.
|
||||
ratios.Sort();
|
||||
int idx = Math.Clamp(ratios.Count * dimPercentile / 100, 0, ratios.Count - 1);
|
||||
double dimFactor = ratios[idx];
|
||||
// Clamp to sane range
|
||||
dimFactor = Math.Clamp(dimFactor, 0.05, 0.95);
|
||||
|
||||
// Subtract expected background: text_signal = current - reference × dimFactor
|
||||
using var textSignal = new Mat(rows, cols, MatType.CV_8UC1);
|
||||
unsafe
|
||||
{
|
||||
byte* curPtr = (byte*)curGray.Data;
|
||||
byte* refPtr = (byte*)refGray.Data;
|
||||
byte* outPtr = (byte*)textSignal.Data;
|
||||
int curStep = (int)curGray.Step();
|
||||
int refStep = (int)refGray.Step();
|
||||
int outStep = (int)textSignal.Step();
|
||||
|
||||
for (int y = 0; y < rows; y++)
|
||||
for (int x = 0; x < cols; x++)
|
||||
{
|
||||
double expected = refPtr[y * refStep + x] * dimFactor;
|
||||
double signal = curPtr[y * curStep + x] - expected;
|
||||
outPtr[y * outStep + x] = (byte)Math.Clamp(signal, 0, 255);
|
||||
}
|
||||
}
|
||||
|
||||
Mat result;
|
||||
if (softThreshold)
|
||||
{
|
||||
// Soft threshold: clip below textThresh, contrast-stretch, invert.
|
||||
// Produces grayscale anti-aliased text on white background,
|
||||
// matching the training data format (text2image renders).
|
||||
result = new Mat(rows, cols, MatType.CV_8UC1);
|
||||
unsafe
|
||||
{
|
||||
byte* srcPtr = (byte*)textSignal.Data;
|
||||
byte* dstPtr = (byte*)result.Data;
|
||||
int srcStep = (int)textSignal.Step();
|
||||
int dstStep = (int)result.Step();
|
||||
|
||||
// Find max signal above threshold for contrast stretch
|
||||
int maxClipped = 1;
|
||||
for (int y = 0; y < rows; y++)
|
||||
for (int x = 0; x < cols; x++)
|
||||
{
|
||||
int val = srcPtr[y * srcStep + x] - textThresh;
|
||||
if (val > maxClipped) maxClipped = val;
|
||||
}
|
||||
|
||||
// Clip, stretch, invert: background → 255 (white), text → dark
|
||||
for (int y = 0; y < rows; y++)
|
||||
for (int x = 0; x < cols; x++)
|
||||
{
|
||||
int clipped = srcPtr[y * srcStep + x] - textThresh;
|
||||
if (clipped <= 0)
|
||||
{
|
||||
dstPtr[y * dstStep + x] = 255; // background
|
||||
}
|
||||
else
|
||||
{
|
||||
int stretched = clipped * 255 / maxClipped;
|
||||
dstPtr[y * dstStep + x] = (byte)(255 - stretched); // invert
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Hard binary threshold (original behavior)
|
||||
result = new Mat();
|
||||
Cv2.Threshold(textSignal, result, textThresh, 255, ThresholdTypes.BinaryInv);
|
||||
}
|
||||
|
||||
using var _result = result;
|
||||
return UpscaleMat(result, upscale);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Background-subtraction preprocessing returning a Bitmap (convenience wrapper).
|
||||
/// </summary>
|
||||
public static Bitmap PreprocessWithBackgroundSub(Bitmap tooltipCrop, Bitmap referenceCrop,
|
||||
int dimPercentile = 25, int textThresh = 30, int upscale = 2, bool softThreshold = true)
|
||||
{
|
||||
using var mat = PreprocessWithBackgroundSubMat(tooltipCrop, referenceCrop, dimPercentile, textThresh, upscale, softThreshold);
|
||||
return BitmapConverter.ToBitmap(mat);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Detect text lines via horizontal projection on a binary image.
|
||||
/// Binary should be inverted: text=black(0), background=white(255).
|
||||
/// Returns list of (yStart, yEnd) row ranges for each detected text line.
|
||||
/// </summary>
|
||||
public static List<(int yStart, int yEnd)> DetectTextLines(
|
||||
Mat binary, int minRowPixels = 2, int gapTolerance = 5)
|
||||
{
|
||||
int rows = binary.Rows, cols = binary.Cols;
|
||||
|
||||
// Count dark (text) pixels per row — use < 128 threshold since
|
||||
// cubic upscaling introduces anti-aliased intermediate values
|
||||
var rowCounts = new int[rows];
|
||||
unsafe
|
||||
{
|
||||
byte* ptr = (byte*)binary.Data;
|
||||
int step = (int)binary.Step();
|
||||
for (int y = 0; y < rows; y++)
|
||||
for (int x = 0; x < cols; x++)
|
||||
if (ptr[y * step + x] < 128)
|
||||
rowCounts[y]++;
|
||||
}
|
||||
|
||||
// Group into contiguous runs with gap tolerance
|
||||
var lines = new List<(int yStart, int yEnd)>();
|
||||
int lineStart = -1, lastActive = -1;
|
||||
for (int y = 0; y < rows; y++)
|
||||
{
|
||||
if (rowCounts[y] >= minRowPixels)
|
||||
{
|
||||
if (lineStart < 0) lineStart = y;
|
||||
lastActive = y;
|
||||
}
|
||||
else if (lineStart >= 0 && y - lastActive > gapTolerance)
|
||||
{
|
||||
lines.Add((lineStart, lastActive));
|
||||
lineStart = -1;
|
||||
}
|
||||
}
|
||||
if (lineStart >= 0)
|
||||
lines.Add((lineStart, lastActive));
|
||||
|
||||
return lines;
|
||||
}
|
||||
|
||||
/// <summary>Returns a new Mat (caller must dispose). Does NOT dispose src.</summary>
|
||||
private static Mat UpscaleMat(Mat src, int factor)
|
||||
{
|
||||
if (factor > 1)
|
||||
{
|
||||
var upscaled = new Mat();
|
||||
Cv2.Resize(src, upscaled, new OpenCvSharp.Size(src.Width * factor, src.Height * factor),
|
||||
interpolation: InterpolationFlags.Cubic);
|
||||
return upscaled;
|
||||
}
|
||||
return src.Clone();
|
||||
}
|
||||
}
|
||||
|
|
@ -1,89 +0,0 @@
|
|||
namespace OcrDaemon;
|
||||
|
||||
using System.Drawing;
|
||||
using System.Drawing.Imaging;
|
||||
using System.Runtime.InteropServices;
|
||||
using Tesseract;
|
||||
using SdImageFormat = System.Drawing.Imaging.ImageFormat;
|
||||
|
||||
static class ImageUtils
|
||||
{
|
||||
public static Pix BitmapToPix(Bitmap bitmap)
|
||||
{
|
||||
using var ms = new MemoryStream();
|
||||
bitmap.Save(ms, SdImageFormat.Png);
|
||||
return Pix.LoadFromMemory(ms.ToArray());
|
||||
}
|
||||
|
||||
public static List<OcrLineResult> ExtractLinesFromPage(Page page, int offsetX, int offsetY)
|
||||
{
|
||||
var lines = new List<OcrLineResult>();
|
||||
using var iter = page.GetIterator();
|
||||
if (iter == null) return lines;
|
||||
|
||||
iter.Begin();
|
||||
|
||||
do
|
||||
{
|
||||
var words = new List<OcrWordResult>();
|
||||
do
|
||||
{
|
||||
var wordText = iter.GetText(PageIteratorLevel.Word);
|
||||
if (string.IsNullOrWhiteSpace(wordText)) continue;
|
||||
|
||||
float conf = iter.GetConfidence(PageIteratorLevel.Word);
|
||||
if (conf < 50) continue; // reject low-confidence garbage from background bleed
|
||||
|
||||
if (iter.TryGetBoundingBox(PageIteratorLevel.Word, out var bounds))
|
||||
{
|
||||
words.Add(new OcrWordResult
|
||||
{
|
||||
Text = wordText.Trim(),
|
||||
X = bounds.X1 + offsetX,
|
||||
Y = bounds.Y1 + offsetY,
|
||||
Width = bounds.Width,
|
||||
Height = bounds.Height,
|
||||
});
|
||||
}
|
||||
} while (iter.Next(PageIteratorLevel.TextLine, PageIteratorLevel.Word));
|
||||
|
||||
if (words.Count > 0)
|
||||
{
|
||||
var lineText = string.Join(" ", words.Select(w => w.Text));
|
||||
lines.Add(new OcrLineResult { Text = lineText, Words = words });
|
||||
}
|
||||
} while (iter.Next(PageIteratorLevel.Block, PageIteratorLevel.TextLine));
|
||||
|
||||
return lines;
|
||||
}
|
||||
|
||||
public static (byte[] gray, byte[] argb, int stride) BitmapToGrayAndArgb(Bitmap bmp)
|
||||
{
|
||||
int w = bmp.Width, h = bmp.Height;
|
||||
var data = bmp.LockBits(new Rectangle(0, 0, w, h), ImageLockMode.ReadOnly, PixelFormat.Format32bppArgb);
|
||||
byte[] argb = new byte[data.Stride * h];
|
||||
Marshal.Copy(data.Scan0, argb, 0, argb.Length);
|
||||
bmp.UnlockBits(data);
|
||||
int stride = data.Stride;
|
||||
|
||||
byte[] gray = new byte[w * h];
|
||||
for (int y = 0; y < h; y++)
|
||||
for (int x = 0; x < w; x++)
|
||||
{
|
||||
int i = y * stride + x * 4;
|
||||
gray[y * w + x] = (byte)((argb[i] + argb[i + 1] + argb[i + 2]) / 3);
|
||||
}
|
||||
return (gray, argb, stride);
|
||||
}
|
||||
|
||||
public static SdImageFormat GetImageFormat(string path)
|
||||
{
|
||||
var ext = Path.GetExtension(path).ToLowerInvariant();
|
||||
return ext switch
|
||||
{
|
||||
".jpg" or ".jpeg" => SdImageFormat.Jpeg,
|
||||
".bmp" => SdImageFormat.Bmp,
|
||||
_ => SdImageFormat.Png,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
|
@ -1,548 +0,0 @@
|
|||
namespace OcrDaemon;
|
||||
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
class Request
|
||||
{
|
||||
[JsonPropertyName("cmd")]
|
||||
public string? Cmd { get; set; }
|
||||
|
||||
[JsonPropertyName("region")]
|
||||
public RegionRect? Region { get; set; }
|
||||
|
||||
[JsonPropertyName("path")]
|
||||
public string? Path { get; set; }
|
||||
|
||||
[JsonPropertyName("cols")]
|
||||
public int Cols { get; set; }
|
||||
|
||||
[JsonPropertyName("rows")]
|
||||
public int Rows { get; set; }
|
||||
|
||||
[JsonPropertyName("threshold")]
|
||||
public int Threshold { get; set; }
|
||||
|
||||
[JsonPropertyName("minCellSize")]
|
||||
public int MinCellSize { get; set; }
|
||||
|
||||
[JsonPropertyName("maxCellSize")]
|
||||
public int MaxCellSize { get; set; }
|
||||
|
||||
[JsonPropertyName("file")]
|
||||
public string? File { get; set; }
|
||||
|
||||
[JsonPropertyName("debug")]
|
||||
public bool Debug { get; set; }
|
||||
|
||||
[JsonPropertyName("targetRow")]
|
||||
public int TargetRow { get; set; } = -1;
|
||||
|
||||
[JsonPropertyName("targetCol")]
|
||||
public int TargetCol { get; set; } = -1;
|
||||
|
||||
[JsonPropertyName("engine")]
|
||||
public string? Engine { get; set; }
|
||||
|
||||
[JsonPropertyName("preprocess")]
|
||||
public string? Preprocess { get; set; }
|
||||
|
||||
[JsonPropertyName("params")]
|
||||
public DiffOcrParams? Params { get; set; }
|
||||
|
||||
[JsonPropertyName("edgeParams")]
|
||||
public EdgeOcrParams? EdgeParams { get; set; }
|
||||
|
||||
[JsonPropertyName("cursorX")]
|
||||
public int? CursorX { get; set; }
|
||||
|
||||
[JsonPropertyName("cursorY")]
|
||||
public int? CursorY { get; set; }
|
||||
}
|
||||
|
||||
class RegionRect
|
||||
{
|
||||
[JsonPropertyName("x")]
|
||||
public int X { get; set; }
|
||||
|
||||
[JsonPropertyName("y")]
|
||||
public int Y { get; set; }
|
||||
|
||||
[JsonPropertyName("width")]
|
||||
public int Width { get; set; }
|
||||
|
||||
[JsonPropertyName("height")]
|
||||
public int Height { get; set; }
|
||||
}
|
||||
|
||||
class ReadyResponse
|
||||
{
|
||||
[JsonPropertyName("ok")]
|
||||
public bool Ok => true;
|
||||
|
||||
[JsonPropertyName("ready")]
|
||||
public bool Ready => true;
|
||||
}
|
||||
|
||||
class OkResponse
|
||||
{
|
||||
[JsonPropertyName("ok")]
|
||||
public bool Ok => true;
|
||||
}
|
||||
|
||||
class ErrorResponse(string message)
|
||||
{
|
||||
[JsonPropertyName("ok")]
|
||||
public bool Ok => false;
|
||||
|
||||
[JsonPropertyName("error")]
|
||||
public string Error => message;
|
||||
}
|
||||
|
||||
class OcrResponse
|
||||
{
|
||||
[JsonPropertyName("ok")]
|
||||
public bool Ok => true;
|
||||
|
||||
[JsonPropertyName("text")]
|
||||
public string Text { get; set; } = "";
|
||||
|
||||
[JsonPropertyName("lines")]
|
||||
public List<OcrLineResult> Lines { get; set; } = [];
|
||||
}
|
||||
|
||||
class DiffOcrResponse
|
||||
{
|
||||
[JsonPropertyName("ok")]
|
||||
public bool Ok => true;
|
||||
|
||||
[JsonPropertyName("text")]
|
||||
public string Text { get; set; } = "";
|
||||
|
||||
[JsonPropertyName("lines")]
|
||||
public List<OcrLineResult> Lines { get; set; } = [];
|
||||
|
||||
[JsonPropertyName("region")]
|
||||
public RegionRect? Region { get; set; }
|
||||
}
|
||||
|
||||
class OcrLineResult
|
||||
{
|
||||
[JsonPropertyName("text")]
|
||||
public string Text { get; set; } = "";
|
||||
|
||||
[JsonPropertyName("words")]
|
||||
public List<OcrWordResult> Words { get; set; } = [];
|
||||
}
|
||||
|
||||
class OcrWordResult
|
||||
{
|
||||
[JsonPropertyName("text")]
|
||||
public string Text { get; set; } = "";
|
||||
|
||||
[JsonPropertyName("x")]
|
||||
public int X { get; set; }
|
||||
|
||||
[JsonPropertyName("y")]
|
||||
public int Y { get; set; }
|
||||
|
||||
[JsonPropertyName("width")]
|
||||
public int Width { get; set; }
|
||||
|
||||
[JsonPropertyName("height")]
|
||||
public int Height { get; set; }
|
||||
}
|
||||
|
||||
class CaptureResponse
|
||||
{
|
||||
[JsonPropertyName("ok")]
|
||||
public bool Ok => true;
|
||||
|
||||
[JsonPropertyName("image")]
|
||||
public string Image { get; set; } = "";
|
||||
}
|
||||
|
||||
class GridResponse
|
||||
{
|
||||
[JsonPropertyName("ok")]
|
||||
public bool Ok => true;
|
||||
|
||||
[JsonPropertyName("cells")]
|
||||
public List<List<bool>> Cells { get; set; } = [];
|
||||
|
||||
[JsonPropertyName("items")]
|
||||
public List<GridItem>? Items { get; set; }
|
||||
|
||||
[JsonPropertyName("matches")]
|
||||
public List<GridMatch>? Matches { get; set; }
|
||||
}
|
||||
|
||||
class GridItem
|
||||
{
|
||||
[JsonPropertyName("row")]
|
||||
public int Row { get; set; }
|
||||
|
||||
[JsonPropertyName("col")]
|
||||
public int Col { get; set; }
|
||||
|
||||
[JsonPropertyName("w")]
|
||||
public int W { get; set; }
|
||||
|
||||
[JsonPropertyName("h")]
|
||||
public int H { get; set; }
|
||||
}
|
||||
|
||||
class GridMatch
|
||||
{
|
||||
[JsonPropertyName("row")]
|
||||
public int Row { get; set; }
|
||||
|
||||
[JsonPropertyName("col")]
|
||||
public int Col { get; set; }
|
||||
|
||||
[JsonPropertyName("similarity")]
|
||||
public double Similarity { get; set; }
|
||||
}
|
||||
|
||||
class DetectGridResponse
|
||||
{
|
||||
[JsonPropertyName("ok")]
|
||||
public bool Ok => true;
|
||||
|
||||
[JsonPropertyName("detected")]
|
||||
public bool Detected { get; set; }
|
||||
|
||||
[JsonPropertyName("region")]
|
||||
public RegionRect? Region { get; set; }
|
||||
|
||||
[JsonPropertyName("cols")]
|
||||
public int Cols { get; set; }
|
||||
|
||||
[JsonPropertyName("rows")]
|
||||
public int Rows { get; set; }
|
||||
|
||||
[JsonPropertyName("cellWidth")]
|
||||
public double CellWidth { get; set; }
|
||||
|
||||
[JsonPropertyName("cellHeight")]
|
||||
public double CellHeight { get; set; }
|
||||
}
|
||||
|
||||
class TemplateMatchResponse
|
||||
{
|
||||
[JsonPropertyName("ok")]
|
||||
public bool Ok => true;
|
||||
|
||||
[JsonPropertyName("found")]
|
||||
public bool Found { get; set; }
|
||||
|
||||
[JsonPropertyName("x")]
|
||||
public int X { get; set; }
|
||||
|
||||
[JsonPropertyName("y")]
|
||||
public int Y { get; set; }
|
||||
|
||||
[JsonPropertyName("width")]
|
||||
public int Width { get; set; }
|
||||
|
||||
[JsonPropertyName("height")]
|
||||
public int Height { get; set; }
|
||||
|
||||
[JsonPropertyName("confidence")]
|
||||
public double Confidence { get; set; }
|
||||
}
|
||||
|
||||
sealed class DiffCropParams
|
||||
{
|
||||
[JsonPropertyName("diffThresh")]
|
||||
public int DiffThresh { get; set; } = 20;
|
||||
|
||||
[JsonPropertyName("rowThreshDiv")]
|
||||
public int RowThreshDiv { get; set; } = 40;
|
||||
|
||||
[JsonPropertyName("colThreshDiv")]
|
||||
public int ColThreshDiv { get; set; } = 8;
|
||||
|
||||
[JsonPropertyName("maxGap")]
|
||||
public int MaxGap { get; set; } = 20;
|
||||
|
||||
[JsonPropertyName("trimCutoff")]
|
||||
public double TrimCutoff { get; set; } = 0.4;
|
||||
|
||||
[JsonPropertyName("ocrPad")]
|
||||
public int OcrPad { get; set; } = 10;
|
||||
|
||||
public override string ToString() =>
|
||||
$"diffThresh={DiffThresh} maxGap={MaxGap} trimCutoff={TrimCutoff:F2} rowThreshDiv={RowThreshDiv} colThreshDiv={ColThreshDiv} ocrPad={OcrPad}";
|
||||
}
|
||||
|
||||
sealed class OcrParams
|
||||
{
|
||||
// preprocessing
|
||||
[JsonPropertyName("kernelSize")]
|
||||
public int KernelSize { get; set; } = 41;
|
||||
|
||||
[JsonPropertyName("upscale")]
|
||||
public int Upscale { get; set; } = 2;
|
||||
|
||||
[JsonPropertyName("useBackgroundSub")]
|
||||
public bool UseBackgroundSub { get; set; } = true;
|
||||
|
||||
[JsonPropertyName("dimPercentile")]
|
||||
public int DimPercentile { get; set; } = 40;
|
||||
|
||||
[JsonPropertyName("textThresh")]
|
||||
public int TextThresh { get; set; } = 60;
|
||||
|
||||
[JsonPropertyName("softThreshold")]
|
||||
public bool SoftThreshold { get; set; } = false;
|
||||
|
||||
// Tesseract-specific
|
||||
[JsonPropertyName("usePerLineOcr")]
|
||||
public bool UsePerLineOcr { get; set; } = false;
|
||||
|
||||
[JsonPropertyName("lineGapTolerance")]
|
||||
public int LineGapTolerance { get; set; } = 10;
|
||||
|
||||
[JsonPropertyName("linePadY")]
|
||||
public int LinePadY { get; set; } = 20;
|
||||
|
||||
[JsonPropertyName("psm")]
|
||||
public int Psm { get; set; } = 6;
|
||||
|
||||
// post-merge / Python engine tuning
|
||||
[JsonPropertyName("mergeGap")]
|
||||
public int MergeGap { get; set; } = 0;
|
||||
|
||||
[JsonPropertyName("linkThreshold")]
|
||||
public double? LinkThreshold { get; set; }
|
||||
|
||||
[JsonPropertyName("textThreshold")]
|
||||
public double? TextThreshold { get; set; }
|
||||
|
||||
[JsonPropertyName("lowText")]
|
||||
public double? LowText { get; set; }
|
||||
|
||||
[JsonPropertyName("widthThs")]
|
||||
public double? WidthThs { get; set; }
|
||||
|
||||
[JsonPropertyName("paragraph")]
|
||||
public bool? Paragraph { get; set; }
|
||||
|
||||
public override string ToString() =>
|
||||
UseBackgroundSub
|
||||
? $"bgSub dimPct={DimPercentile} textThresh={TextThresh} soft={SoftThreshold} upscale={Upscale} mergeGap={MergeGap}"
|
||||
: $"topHat kernel={KernelSize} upscale={Upscale} mergeGap={MergeGap}";
|
||||
}
|
||||
|
||||
sealed class DiffOcrParams
|
||||
{
|
||||
[JsonPropertyName("crop")]
|
||||
public DiffCropParams Crop { get; set; } = new();
|
||||
|
||||
[JsonPropertyName("ocr")]
|
||||
public OcrParams Ocr { get; set; } = new();
|
||||
|
||||
public override string ToString() => $"[{Crop}] [{Ocr}]";
|
||||
}
|
||||
|
||||
sealed class EdgeCropParams
|
||||
{
|
||||
[JsonPropertyName("darkThresh")]
|
||||
public int DarkThresh { get; set; } = 40;
|
||||
|
||||
[JsonPropertyName("minDarkRun")]
|
||||
public int MinDarkRun { get; set; } = 200;
|
||||
|
||||
[JsonPropertyName("runGapTolerance")]
|
||||
public int RunGapTolerance { get; set; } = 15;
|
||||
|
||||
[JsonPropertyName("rowThreshDiv")]
|
||||
public int RowThreshDiv { get; set; } = 40;
|
||||
|
||||
[JsonPropertyName("colThreshDiv")]
|
||||
public int ColThreshDiv { get; set; } = 8;
|
||||
|
||||
[JsonPropertyName("maxGap")]
|
||||
public int MaxGap { get; set; } = 15;
|
||||
|
||||
[JsonPropertyName("trimCutoff")]
|
||||
public double TrimCutoff { get; set; } = 0.3;
|
||||
|
||||
[JsonPropertyName("ocrPad")]
|
||||
public int OcrPad { get; set; } = 10;
|
||||
|
||||
public override string ToString() =>
|
||||
$"darkThresh={DarkThresh} minRun={MinDarkRun} runGap={RunGapTolerance} maxGap={MaxGap} trimCutoff={TrimCutoff:F2} rowDiv={RowThreshDiv} colDiv={ColThreshDiv}";
|
||||
}
|
||||
|
||||
sealed class EdgeOcrParams
|
||||
{
|
||||
[JsonPropertyName("crop")]
|
||||
public EdgeCropParams Crop { get; set; } = new();
|
||||
|
||||
[JsonPropertyName("ocr")]
|
||||
public OcrParams Ocr { get; set; } = new();
|
||||
|
||||
public override string ToString() => $"[{Crop}] [{Ocr}]";
|
||||
}
|
||||
|
||||
class TestCase
|
||||
{
|
||||
[JsonPropertyName("id")]
|
||||
public string Id { get; set; } = "";
|
||||
|
||||
[JsonPropertyName("image")]
|
||||
public string Image { get; set; } = "";
|
||||
|
||||
[JsonPropertyName("fullImage")]
|
||||
public string FullImage { get; set; } = "";
|
||||
|
||||
[JsonPropertyName("expected")]
|
||||
public List<string> Expected { get; set; } = [];
|
||||
}
|
||||
|
||||
class TestCaseResult
|
||||
{
|
||||
[JsonPropertyName("id")]
|
||||
public string Id { get; set; } = "";
|
||||
|
||||
[JsonPropertyName("passed")]
|
||||
public bool Passed { get; set; }
|
||||
|
||||
[JsonPropertyName("score")]
|
||||
public double Score { get; set; }
|
||||
|
||||
[JsonPropertyName("matched")]
|
||||
public List<string> Matched { get; set; } = [];
|
||||
|
||||
[JsonPropertyName("missed")]
|
||||
public List<string> Missed { get; set; } = [];
|
||||
|
||||
[JsonPropertyName("extra")]
|
||||
public List<string> Extra { get; set; } = [];
|
||||
}
|
||||
|
||||
class TestResponse
|
||||
{
|
||||
[JsonPropertyName("ok")]
|
||||
public bool Ok => true;
|
||||
|
||||
[JsonPropertyName("passed")]
|
||||
public int Passed { get; set; }
|
||||
|
||||
[JsonPropertyName("failed")]
|
||||
public int Failed { get; set; }
|
||||
|
||||
[JsonPropertyName("total")]
|
||||
public int Total { get; set; }
|
||||
|
||||
[JsonPropertyName("results")]
|
||||
public List<TestCaseResult> Results { get; set; } = [];
|
||||
}
|
||||
|
||||
class TuneResponse
|
||||
{
|
||||
[JsonPropertyName("ok")]
|
||||
public bool Ok => true;
|
||||
|
||||
[JsonPropertyName("bestScore")]
|
||||
public double BestScore { get; set; }
|
||||
|
||||
[JsonPropertyName("bestParams")]
|
||||
public DiffOcrParams BestParams { get; set; } = new();
|
||||
|
||||
[JsonPropertyName("iterations")]
|
||||
public int Iterations { get; set; }
|
||||
}
|
||||
|
||||
// ── Crop test models ────────────────────────────────────────────────────────
|
||||
|
||||
class PointXY
|
||||
{
|
||||
[JsonPropertyName("x")]
|
||||
public int X { get; set; }
|
||||
|
||||
[JsonPropertyName("y")]
|
||||
public int Y { get; set; }
|
||||
}
|
||||
|
||||
class CropTestCase
|
||||
{
|
||||
[JsonPropertyName("id")]
|
||||
public string Id { get; set; } = "";
|
||||
|
||||
[JsonPropertyName("image")]
|
||||
public string Image { get; set; } = "";
|
||||
|
||||
[JsonPropertyName("snapshotImage")]
|
||||
public string SnapshotImage { get; set; } = "";
|
||||
|
||||
[JsonPropertyName("topLeft")]
|
||||
public PointXY TopLeft { get; set; } = new();
|
||||
|
||||
[JsonPropertyName("bottomRight")]
|
||||
public PointXY BottomRight { get; set; } = new();
|
||||
|
||||
[JsonPropertyName("cursorX")]
|
||||
public int? CursorX { get; set; }
|
||||
|
||||
[JsonPropertyName("cursorY")]
|
||||
public int? CursorY { get; set; }
|
||||
}
|
||||
|
||||
class CropTestResult
|
||||
{
|
||||
[JsonPropertyName("id")]
|
||||
public string Id { get; set; } = "";
|
||||
|
||||
[JsonPropertyName("iou")]
|
||||
public double IoU { get; set; }
|
||||
|
||||
[JsonPropertyName("expected")]
|
||||
public RegionRect Expected { get; set; } = new();
|
||||
|
||||
[JsonPropertyName("actual")]
|
||||
public RegionRect? Actual { get; set; }
|
||||
|
||||
[JsonPropertyName("deltaTop")]
|
||||
public int DeltaTop { get; set; }
|
||||
|
||||
[JsonPropertyName("deltaLeft")]
|
||||
public int DeltaLeft { get; set; }
|
||||
|
||||
[JsonPropertyName("deltaRight")]
|
||||
public int DeltaRight { get; set; }
|
||||
|
||||
[JsonPropertyName("deltaBottom")]
|
||||
public int DeltaBottom { get; set; }
|
||||
}
|
||||
|
||||
class CropTestResponse
|
||||
{
|
||||
[JsonPropertyName("ok")]
|
||||
public bool Ok => true;
|
||||
|
||||
[JsonPropertyName("method")]
|
||||
public string Method { get; set; } = "";
|
||||
|
||||
[JsonPropertyName("avgIoU")]
|
||||
public double AvgIoU { get; set; }
|
||||
|
||||
[JsonPropertyName("results")]
|
||||
public List<CropTestResult> Results { get; set; } = [];
|
||||
}
|
||||
|
||||
class CropTuneResponse
|
||||
{
|
||||
[JsonPropertyName("ok")]
|
||||
public bool Ok => true;
|
||||
|
||||
[JsonPropertyName("bestAvgIoU")]
|
||||
public double BestAvgIoU { get; set; }
|
||||
|
||||
[JsonPropertyName("bestParams")]
|
||||
public DiffCropParams BestParams { get; set; } = new();
|
||||
|
||||
[JsonPropertyName("iterations")]
|
||||
public int Iterations { get; set; }
|
||||
}
|
||||
|
|
@ -1,43 +0,0 @@
|
|||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
|
||||
<PropertyGroup>
|
||||
<OutputType>Exe</OutputType>
|
||||
<TargetFramework>net8.0-windows10.0.19041.0</TargetFramework>
|
||||
<Nullable>enable</Nullable>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="OpenCvSharp4" Version="4.11.0.20250507" />
|
||||
<PackageReference Include="OpenCvSharp4.Extensions" Version="4.11.0.20250507" />
|
||||
<PackageReference Include="OpenCvSharp4.runtime.win" Version="4.11.0.20250507" />
|
||||
<PackageReference Include="System.Drawing.Common" Version="8.0.12" />
|
||||
<PackageReference Include="Tesseract" Version="5.2.0" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<None Update="tessdata\eng.traineddata">
|
||||
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
|
||||
</None>
|
||||
<None Update="tessdata\poe2.traineddata" Condition="Exists('tessdata\poe2.traineddata')">
|
||||
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
|
||||
</None>
|
||||
<None Update="tessdata\cases.json">
|
||||
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
|
||||
</None>
|
||||
<None Update="tessdata\crop.json">
|
||||
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
|
||||
</None>
|
||||
<None Update="tessdata\poe2.user-words" Condition="Exists('tessdata\poe2.user-words')">
|
||||
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
|
||||
</None>
|
||||
<None Update="tessdata\poe2.user-patterns" Condition="Exists('tessdata\poe2.user-patterns')">
|
||||
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
|
||||
</None>
|
||||
<None Include="tessdata\images\*">
|
||||
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
|
||||
</None>
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
|
|
@ -1,916 +0,0 @@
|
|||
namespace OcrDaemon;
|
||||
|
||||
using System.Drawing;
|
||||
using System.Drawing.Imaging;
|
||||
using System.Runtime.InteropServices;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using System.Text.Json;
|
||||
using OpenCvSharp;
|
||||
using OpenCvSharp.Extensions;
|
||||
using Tesseract;
|
||||
using SdImageFormat = System.Drawing.Imaging.ImageFormat;
|
||||
|
||||
class OcrHandler(TesseractEngine engine)
|
||||
{
|
||||
private Bitmap? _referenceFrame;
|
||||
private RegionRect? _referenceRegion;
|
||||
|
||||
public object HandleOcr(Request req)
|
||||
{
|
||||
using var bitmap = ScreenCapture.CaptureOrLoad(req.File, req.Region);
|
||||
using var pix = ImageUtils.BitmapToPix(bitmap);
|
||||
using var page = engine.Process(pix);
|
||||
|
||||
var text = page.GetText();
|
||||
var lines = ImageUtils.ExtractLinesFromPage(page, offsetX: 0, offsetY: 0);
|
||||
return new OcrResponse { Text = text, Lines = lines };
|
||||
}
|
||||
|
||||
public object HandleScreenshot(Request req)
|
||||
{
|
||||
if (string.IsNullOrEmpty(req.Path))
|
||||
return new ErrorResponse("screenshot command requires 'path'");
|
||||
|
||||
// If a reference frame exists, save that (same image used for diff-ocr).
|
||||
// Otherwise capture a new frame.
|
||||
var bitmap = _referenceFrame ?? ScreenCapture.CaptureOrLoad(req.File, req.Region);
|
||||
var format = ImageUtils.GetImageFormat(req.Path);
|
||||
var dir = Path.GetDirectoryName(req.Path);
|
||||
if (!string.IsNullOrEmpty(dir) && !Directory.Exists(dir))
|
||||
Directory.CreateDirectory(dir);
|
||||
bitmap.Save(req.Path, format);
|
||||
if (bitmap != _referenceFrame) bitmap.Dispose();
|
||||
return new OkResponse();
|
||||
}
|
||||
|
||||
public object HandleCapture(Request req)
|
||||
{
|
||||
using var bitmap = ScreenCapture.CaptureOrLoad(req.File, req.Region);
|
||||
using var ms = new MemoryStream();
|
||||
bitmap.Save(ms, SdImageFormat.Png);
|
||||
var base64 = Convert.ToBase64String(ms.ToArray());
|
||||
return new CaptureResponse { Image = base64 };
|
||||
}
|
||||
|
||||
public object HandleSnapshot(Request req)
|
||||
{
|
||||
_referenceFrame?.Dispose();
|
||||
_referenceFrame = ScreenCapture.CaptureOrLoad(req.File, req.Region);
|
||||
_referenceRegion = req.Region == null
|
||||
? null
|
||||
: new RegionRect { X = req.Region.X, Y = req.Region.Y, Width = req.Region.Width, Height = req.Region.Height };
|
||||
return new OkResponse();
|
||||
}
|
||||
|
||||
public object HandleDiffOcr(Request req) => HandleDiffOcr(req, req.Threshold > 0
|
||||
? new DiffOcrParams { Crop = new DiffCropParams { DiffThresh = req.Threshold } }
|
||||
: new DiffOcrParams());
|
||||
|
||||
/// <summary>
|
||||
/// Diff detection + crop only. Returns the raw tooltip crop bitmap and region,
|
||||
/// or null if no tooltip detected. Caller is responsible for disposing the bitmap.
|
||||
/// </summary>
|
||||
public (Bitmap cropped, Bitmap refCropped, Bitmap current, RegionRect region)? DiffCrop(Request req, DiffCropParams c)
|
||||
{
|
||||
if (_referenceFrame == null)
|
||||
return null;
|
||||
|
||||
var diffRegion = req.Region ?? _referenceRegion;
|
||||
int baseX = diffRegion?.X ?? 0;
|
||||
int baseY = diffRegion?.Y ?? 0;
|
||||
var current = ScreenCapture.CaptureOrLoad(req.File, diffRegion);
|
||||
|
||||
Bitmap refForDiff = _referenceFrame;
|
||||
bool disposeRef = false;
|
||||
|
||||
if (diffRegion != null)
|
||||
{
|
||||
if (_referenceRegion == null)
|
||||
{
|
||||
var croppedRef = CropBitmap(_referenceFrame, diffRegion);
|
||||
if (croppedRef == null)
|
||||
{
|
||||
current.Dispose();
|
||||
return null;
|
||||
}
|
||||
refForDiff = croppedRef;
|
||||
disposeRef = true;
|
||||
}
|
||||
else if (!RegionsEqual(diffRegion, _referenceRegion))
|
||||
{
|
||||
int offX = diffRegion.X - _referenceRegion.X;
|
||||
int offY = diffRegion.Y - _referenceRegion.Y;
|
||||
if (offX < 0 || offY < 0 || offX + diffRegion.Width > _referenceFrame.Width || offY + diffRegion.Height > _referenceFrame.Height)
|
||||
{
|
||||
current.Dispose();
|
||||
return null;
|
||||
}
|
||||
var croppedRef = CropBitmap(_referenceFrame, new RegionRect
|
||||
{
|
||||
X = offX,
|
||||
Y = offY,
|
||||
Width = diffRegion.Width,
|
||||
Height = diffRegion.Height,
|
||||
});
|
||||
if (croppedRef == null)
|
||||
{
|
||||
current.Dispose();
|
||||
return null;
|
||||
}
|
||||
refForDiff = croppedRef;
|
||||
disposeRef = true;
|
||||
}
|
||||
}
|
||||
|
||||
int w = Math.Min(refForDiff.Width, current.Width);
|
||||
int h = Math.Min(refForDiff.Height, current.Height);
|
||||
|
||||
var refData = refForDiff.LockBits(new Rectangle(0, 0, w, h), ImageLockMode.ReadOnly, PixelFormat.Format32bppArgb);
|
||||
byte[] refPx = new byte[refData.Stride * h];
|
||||
Marshal.Copy(refData.Scan0, refPx, 0, refPx.Length);
|
||||
refForDiff.UnlockBits(refData);
|
||||
int stride = refData.Stride;
|
||||
|
||||
var curData = current.LockBits(new Rectangle(0, 0, w, h), ImageLockMode.ReadOnly, PixelFormat.Format32bppArgb);
|
||||
byte[] curPx = new byte[curData.Stride * h];
|
||||
Marshal.Copy(curData.Scan0, curPx, 0, curPx.Length);
|
||||
current.UnlockBits(curData);
|
||||
|
||||
int diffThresh = c.DiffThresh;
|
||||
|
||||
// Pass 1: parallel row diff — compute rowCounts[] directly, no changed[] array
|
||||
int[] rowCounts = new int[h];
|
||||
Parallel.For(0, h, y =>
|
||||
{
|
||||
int count = 0;
|
||||
int rowOffset = y * stride;
|
||||
for (int x = 0; x < w; x++)
|
||||
{
|
||||
int i = rowOffset + x * 4;
|
||||
int darker = (refPx[i] - curPx[i]) + (refPx[i + 1] - curPx[i + 1]) + (refPx[i + 2] - curPx[i + 2]);
|
||||
if (darker > diffThresh)
|
||||
count++;
|
||||
}
|
||||
rowCounts[y] = count;
|
||||
});
|
||||
|
||||
int totalChanged = 0;
|
||||
for (int y = 0; y < h; y++) totalChanged += rowCounts[y];
|
||||
|
||||
if (totalChanged == 0)
|
||||
{
|
||||
current.Dispose();
|
||||
if (disposeRef) refForDiff.Dispose();
|
||||
return null;
|
||||
}
|
||||
|
||||
int maxGap = c.MaxGap;
|
||||
int rowThresh = w / c.RowThreshDiv;
|
||||
int bestRowStart = 0, bestRowEnd = 0, bestRowLen = 0;
|
||||
int curRowStart = -1, lastActiveRow = -1;
|
||||
for (int y = 0; y < h; y++)
|
||||
{
|
||||
if (rowCounts[y] >= rowThresh)
|
||||
{
|
||||
if (curRowStart < 0) curRowStart = y;
|
||||
lastActiveRow = y;
|
||||
}
|
||||
else if (curRowStart >= 0 && y - lastActiveRow > maxGap)
|
||||
{
|
||||
int len = lastActiveRow - curRowStart + 1;
|
||||
if (len > bestRowLen) { bestRowStart = curRowStart; bestRowEnd = lastActiveRow; bestRowLen = len; }
|
||||
curRowStart = -1;
|
||||
}
|
||||
}
|
||||
if (curRowStart >= 0)
|
||||
{
|
||||
int len = lastActiveRow - curRowStart + 1;
|
||||
if (len > bestRowLen) { bestRowStart = curRowStart; bestRowEnd = lastActiveRow; bestRowLen = len; }
|
||||
}
|
||||
|
||||
// Pass 2: parallel column diff — only within the row range, recompute from raw pixels
|
||||
int[] colCounts = new int[w];
|
||||
int rowRangeLen = bestRowEnd - bestRowStart + 1;
|
||||
if (rowRangeLen <= 200)
|
||||
{
|
||||
for (int y = bestRowStart; y <= bestRowEnd; y++)
|
||||
{
|
||||
int rowOffset = y * stride;
|
||||
for (int x = 0; x < w; x++)
|
||||
{
|
||||
int i = rowOffset + x * 4;
|
||||
int darker = (refPx[i] - curPx[i]) + (refPx[i + 1] - curPx[i + 1]) + (refPx[i + 2] - curPx[i + 2]);
|
||||
if (darker > diffThresh)
|
||||
colCounts[x]++;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
Parallel.For(bestRowStart, bestRowEnd + 1,
|
||||
() => new int[w],
|
||||
(y, _, localCols) =>
|
||||
{
|
||||
int rowOffset = y * stride;
|
||||
for (int x = 0; x < w; x++)
|
||||
{
|
||||
int i = rowOffset + x * 4;
|
||||
int darker = (refPx[i] - curPx[i]) + (refPx[i + 1] - curPx[i + 1]) + (refPx[i + 2] - curPx[i + 2]);
|
||||
if (darker > diffThresh)
|
||||
localCols[x]++;
|
||||
}
|
||||
return localCols;
|
||||
},
|
||||
localCols =>
|
||||
{
|
||||
for (int x = 0; x < w; x++)
|
||||
Interlocked.Add(ref colCounts[x], localCols[x]);
|
||||
});
|
||||
}
|
||||
|
||||
int tooltipHeight = bestRowEnd - bestRowStart + 1;
|
||||
int colThresh = tooltipHeight / c.ColThreshDiv;
|
||||
|
||||
int bestColStart = 0, bestColEnd = 0, bestColLen = 0;
|
||||
int curColStart = -1, lastActiveCol = -1;
|
||||
for (int x = 0; x < w; x++)
|
||||
{
|
||||
if (colCounts[x] >= colThresh)
|
||||
{
|
||||
if (curColStart < 0) curColStart = x;
|
||||
lastActiveCol = x;
|
||||
}
|
||||
else if (curColStart >= 0 && x - lastActiveCol > maxGap)
|
||||
{
|
||||
int len = lastActiveCol - curColStart + 1;
|
||||
if (len > bestColLen) { bestColStart = curColStart; bestColEnd = lastActiveCol; bestColLen = len; }
|
||||
curColStart = -1;
|
||||
}
|
||||
}
|
||||
if (curColStart >= 0)
|
||||
{
|
||||
int len = lastActiveCol - curColStart + 1;
|
||||
if (len > bestColLen) { bestColStart = curColStart; bestColEnd = lastActiveCol; bestColLen = len; }
|
||||
}
|
||||
|
||||
Console.Error.WriteLine($" diff-ocr: changed={totalChanged} rows={bestRowStart}-{bestRowEnd}({bestRowLen}) cols={bestColStart}-{bestColEnd}({bestColLen}) rowThresh={rowThresh} colThresh={colThresh}");
|
||||
|
||||
if (bestRowLen < 50 || bestColLen < 50)
|
||||
{
|
||||
Console.Error.WriteLine($" diff-ocr: no tooltip-sized region found (rows={bestRowLen}, cols={bestColLen})");
|
||||
current.Dispose();
|
||||
if (disposeRef) refForDiff.Dispose();
|
||||
return null;
|
||||
}
|
||||
|
||||
int minX = bestColStart;
|
||||
int minY = bestRowStart;
|
||||
int maxX = Math.Min(bestColEnd, w - 1);
|
||||
int maxY = Math.Min(bestRowEnd, h - 1);
|
||||
|
||||
// Boundary extension: scan outward from detected edges with a relaxed threshold
|
||||
// to capture low-signal regions (e.g. ornamental tooltip headers)
|
||||
int extRowThresh = Math.Max(1, rowThresh / 4);
|
||||
int extColThresh = Math.Max(1, colThresh / 4);
|
||||
|
||||
int extTop = Math.Max(0, minY - maxGap);
|
||||
for (int y = minY - 1; y >= extTop; y--)
|
||||
{
|
||||
if (rowCounts[y] >= extRowThresh) minY = y;
|
||||
else break;
|
||||
}
|
||||
int extBottom = Math.Min(h - 1, maxY + maxGap);
|
||||
for (int y = maxY + 1; y <= extBottom; y++)
|
||||
{
|
||||
if (rowCounts[y] >= extRowThresh) maxY = y;
|
||||
else break;
|
||||
}
|
||||
int extLeft = Math.Max(0, minX - maxGap);
|
||||
for (int x = minX - 1; x >= extLeft; x--)
|
||||
{
|
||||
if (colCounts[x] >= extColThresh) minX = x;
|
||||
else break;
|
||||
}
|
||||
int extRight = Math.Min(w - 1, maxX + maxGap);
|
||||
for (int x = maxX + 1; x <= extRight; x++)
|
||||
{
|
||||
if (colCounts[x] >= extColThresh) maxX = x;
|
||||
else break;
|
||||
}
|
||||
|
||||
// Trim low-density edges on both axes to avoid oversized crops.
|
||||
int colSpan = maxX - minX + 1;
|
||||
if (colSpan > 50)
|
||||
{
|
||||
int q1 = minX + colSpan / 4;
|
||||
int q3 = minX + colSpan * 3 / 4;
|
||||
long midSum = 0;
|
||||
int midCount = 0;
|
||||
for (int x = q1; x <= q3; x++) { midSum += colCounts[x]; midCount++; }
|
||||
double avgMidDensity = (double)midSum / Math.Max(1, midCount);
|
||||
double cutoff = avgMidDensity * c.TrimCutoff;
|
||||
|
||||
while (minX < maxX - 50 && colCounts[minX] < cutoff)
|
||||
minX++;
|
||||
while (maxX > minX + 50 && colCounts[maxX] < cutoff)
|
||||
maxX--;
|
||||
}
|
||||
|
||||
int rowSpan = maxY - minY + 1;
|
||||
if (rowSpan > 50)
|
||||
{
|
||||
int q1 = minY + rowSpan / 4;
|
||||
int q3 = minY + rowSpan * 3 / 4;
|
||||
long midSum = 0;
|
||||
int midCount = 0;
|
||||
for (int y = q1; y <= q3; y++) { midSum += rowCounts[y]; midCount++; }
|
||||
double avgMidDensity = (double)midSum / Math.Max(1, midCount);
|
||||
double cutoff = avgMidDensity * c.TrimCutoff;
|
||||
|
||||
while (minY < maxY - 50 && rowCounts[minY] < cutoff)
|
||||
minY++;
|
||||
while (maxY > minY + 50 && rowCounts[maxY] < cutoff)
|
||||
maxY--;
|
||||
}
|
||||
int rw = maxX - minX + 1;
|
||||
int rh = maxY - minY + 1;
|
||||
|
||||
var cropped = CropFromBytes(curPx, stride, minX, minY, rw, rh);
|
||||
var refCropped = CropFromBytes(refPx, stride, minX, minY, rw, rh);
|
||||
var region = new RegionRect { X = baseX + minX, Y = baseY + minY, Width = rw, Height = rh };
|
||||
|
||||
Console.Error.WriteLine($" diff-ocr: tooltip region ({minX},{minY}) {rw}x{rh}");
|
||||
|
||||
if (disposeRef) refForDiff.Dispose();
|
||||
return (cropped, refCropped, current, region);
|
||||
}
|
||||
|
||||
private static bool RegionsEqual(RegionRect a, RegionRect b) =>
|
||||
a.X == b.X && a.Y == b.Y && a.Width == b.Width && a.Height == b.Height;
|
||||
|
||||
private static Bitmap? CropBitmap(Bitmap src, RegionRect region)
|
||||
{
|
||||
int cx = Math.Max(0, region.X);
|
||||
int cy = Math.Max(0, region.Y);
|
||||
int cw = Math.Min(region.Width, src.Width - cx);
|
||||
int ch = Math.Min(region.Height, src.Height - cy);
|
||||
if (cw <= 0 || ch <= 0)
|
||||
return null;
|
||||
return src.Clone(new Rectangle(cx, cy, cw, ch), PixelFormat.Format32bppArgb);
|
||||
}
|
||||
|
||||
public object HandleDiffOcr(Request req, DiffOcrParams p)
|
||||
{
|
||||
if (_referenceFrame == null)
|
||||
return new ErrorResponse("No reference snapshot stored. Send 'snapshot' first.");
|
||||
|
||||
var cropResult = DiffCrop(req, p.Crop);
|
||||
if (cropResult == null)
|
||||
return new OcrResponse { Text = "", Lines = [] };
|
||||
|
||||
var (cropped, refCropped, current, region) = cropResult.Value;
|
||||
using var _current = current;
|
||||
using var _cropped = cropped;
|
||||
using var _refCropped = refCropped;
|
||||
bool debug = req.Debug;
|
||||
int minX = region.X, minY = region.Y, rw = region.Width, rh = region.Height;
|
||||
|
||||
// Save raw crop if path is provided
|
||||
if (!string.IsNullOrEmpty(req.Path))
|
||||
{
|
||||
var dir = Path.GetDirectoryName(req.Path);
|
||||
if (!string.IsNullOrEmpty(dir) && !Directory.Exists(dir))
|
||||
Directory.CreateDirectory(dir);
|
||||
cropped.Save(req.Path, ImageUtils.GetImageFormat(req.Path));
|
||||
if (debug) Console.Error.WriteLine($" diff-ocr: saved raw to {req.Path}");
|
||||
}
|
||||
|
||||
// Pre-process for OCR — get Mat for per-line detection and padding
|
||||
var ocr = p.Ocr;
|
||||
Mat processedMat;
|
||||
if (ocr.UseBackgroundSub)
|
||||
{
|
||||
processedMat = ImagePreprocessor.PreprocessWithBackgroundSubMat(cropped, refCropped, ocr.DimPercentile, ocr.TextThresh, ocr.Upscale, ocr.SoftThreshold);
|
||||
}
|
||||
else
|
||||
{
|
||||
using var topHatBmp = ImagePreprocessor.PreprocessForOcr(cropped, ocr.KernelSize, ocr.Upscale);
|
||||
processedMat = BitmapConverter.ToMat(topHatBmp);
|
||||
}
|
||||
using var _processedMat = processedMat; // ensure disposal
|
||||
|
||||
// Save fullscreen and preprocessed versions alongside raw
|
||||
if (!string.IsNullOrEmpty(req.Path))
|
||||
{
|
||||
var ext = Path.GetExtension(req.Path);
|
||||
var fullPath = Path.ChangeExtension(req.Path, ".full" + ext);
|
||||
current.Save(fullPath, ImageUtils.GetImageFormat(fullPath));
|
||||
if (debug) Console.Error.WriteLine($" diff-ocr: saved fullscreen to {fullPath}");
|
||||
|
||||
var prePath = Path.ChangeExtension(req.Path, ".pre" + ext);
|
||||
using var preBmp = BitmapConverter.ToBitmap(processedMat);
|
||||
preBmp.Save(prePath, ImageUtils.GetImageFormat(prePath));
|
||||
if (debug) Console.Error.WriteLine($" diff-ocr: saved preprocessed to {prePath}");
|
||||
}
|
||||
|
||||
int pad = p.Crop.OcrPad;
|
||||
int upscale = ocr.Upscale > 0 ? ocr.Upscale : 1;
|
||||
var lines = new List<OcrLineResult>();
|
||||
|
||||
// Per-line OCR: detect text lines via horizontal projection, OCR each individually
|
||||
if (ocr.UsePerLineOcr)
|
||||
{
|
||||
// DetectTextLines needs binary input; if soft threshold produced grayscale, binarize a copy
|
||||
int minRowPx = Math.Max(processedMat.Cols / 200, 3);
|
||||
using var detectionMat = ocr.SoftThreshold ? new Mat() : null;
|
||||
if (ocr.SoftThreshold)
|
||||
Cv2.Threshold(processedMat, detectionMat!, 128, 255, ThresholdTypes.Binary);
|
||||
var lineDetectInput = ocr.SoftThreshold ? detectionMat! : processedMat;
|
||||
var textLines = ImagePreprocessor.DetectTextLines(lineDetectInput, minRowPixels: minRowPx, gapTolerance: ocr.LineGapTolerance * upscale);
|
||||
if (debug) Console.Error.WriteLine($" diff-ocr: detected {textLines.Count} text lines");
|
||||
|
||||
if (textLines.Count > 0)
|
||||
{
|
||||
int linePadY = ocr.LinePadY;
|
||||
foreach (var (yStart, yEnd) in textLines)
|
||||
{
|
||||
int y0 = Math.Max(yStart - linePadY, 0);
|
||||
int y1 = Math.Min(yEnd + linePadY, processedMat.Rows - 1);
|
||||
int lineH = y1 - y0 + 1;
|
||||
|
||||
// Crop line strip (full width)
|
||||
using var lineStrip = new Mat(processedMat, new OpenCvSharp.Rect(0, y0, processedMat.Cols, lineH));
|
||||
|
||||
// Add whitespace padding around the line
|
||||
using var padded = new Mat();
|
||||
Cv2.CopyMakeBorder(lineStrip, padded, pad, pad, pad, pad, BorderTypes.Constant, Scalar.White);
|
||||
|
||||
using var lineBmp = BitmapConverter.ToBitmap(padded);
|
||||
using var linePix = ImageUtils.BitmapToPix(lineBmp);
|
||||
using var linePage = engine.Process(linePix, (PageSegMode)ocr.Psm);
|
||||
|
||||
// Extract words, adjusting coordinates back to screen space
|
||||
// Word coords are in padded image space → subtract pad, add line offset, scale to original, add region offset
|
||||
var lineWords = new List<OcrWordResult>();
|
||||
using var iter = linePage.GetIterator();
|
||||
if (iter != null)
|
||||
{
|
||||
iter.Begin();
|
||||
do
|
||||
{
|
||||
var wordText = iter.GetText(PageIteratorLevel.Word);
|
||||
if (string.IsNullOrWhiteSpace(wordText)) continue;
|
||||
|
||||
float conf = iter.GetConfidence(PageIteratorLevel.Word);
|
||||
if (conf < 50) continue;
|
||||
|
||||
if (iter.TryGetBoundingBox(PageIteratorLevel.Word, out var bounds))
|
||||
{
|
||||
lineWords.Add(new OcrWordResult
|
||||
{
|
||||
Text = wordText.Trim(),
|
||||
X = (bounds.X1 - pad + 0) / upscale + minX,
|
||||
Y = (bounds.Y1 - pad + y0) / upscale + minY,
|
||||
Width = bounds.Width / upscale,
|
||||
Height = bounds.Height / upscale,
|
||||
});
|
||||
}
|
||||
} while (iter.Next(PageIteratorLevel.TextLine, PageIteratorLevel.Word));
|
||||
}
|
||||
|
||||
if (lineWords.Count > 0)
|
||||
{
|
||||
var lineText = string.Join(" ", lineWords.Select(w => w.Text));
|
||||
lines.Add(new OcrLineResult { Text = lineText, Words = lineWords });
|
||||
}
|
||||
}
|
||||
|
||||
var text = string.Join("\n", lines.Select(l => l.Text)) + "\n";
|
||||
return new DiffOcrResponse
|
||||
{
|
||||
Text = text,
|
||||
Lines = lines,
|
||||
Region = new RegionRect { X = minX, Y = minY, Width = rw, Height = rh },
|
||||
};
|
||||
}
|
||||
|
||||
if (debug) Console.Error.WriteLine(" diff-ocr: no text lines detected, falling back to whole-block OCR");
|
||||
}
|
||||
|
||||
// Whole-block fallback: add padding and use configurable PSM
|
||||
{
|
||||
using var padded = new Mat();
|
||||
Cv2.CopyMakeBorder(processedMat, padded, pad, pad, pad, pad, BorderTypes.Constant, Scalar.White);
|
||||
using var bmp = BitmapConverter.ToBitmap(padded);
|
||||
using var pix = ImageUtils.BitmapToPix(bmp);
|
||||
using var page = engine.Process(pix, (PageSegMode)ocr.Psm);
|
||||
|
||||
var text = page.GetText();
|
||||
// Adjust word coordinates: subtract padding offset
|
||||
lines = ImageUtils.ExtractLinesFromPage(page, offsetX: minX - pad / upscale, offsetY: minY - pad / upscale);
|
||||
|
||||
return new DiffOcrResponse
|
||||
{
|
||||
Text = text,
|
||||
Lines = lines,
|
||||
Region = new RegionRect { X = minX, Y = minY, Width = rw, Height = rh },
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Run Tesseract OCR on an already-preprocessed bitmap. Converts to Mat, pads,
|
||||
/// runs PSM-6, and adjusts word coordinates to screen space using the supplied region.
|
||||
/// </summary>
|
||||
public DiffOcrResponse RunTesseractOnBitmap(Bitmap processedBmp, RegionRect region, int pad = 10, int upscale = 2, int psm = 6)
|
||||
{
|
||||
using var processedMat = BitmapConverter.ToMat(processedBmp);
|
||||
using var padded = new Mat();
|
||||
Cv2.CopyMakeBorder(processedMat, padded, pad, pad, pad, pad, BorderTypes.Constant, Scalar.White);
|
||||
using var bmp = BitmapConverter.ToBitmap(padded);
|
||||
using var pix = ImageUtils.BitmapToPix(bmp);
|
||||
using var page = engine.Process(pix, (PageSegMode)psm);
|
||||
|
||||
var text = page.GetText();
|
||||
int effUpscale = upscale > 0 ? upscale : 1;
|
||||
var lines = ImageUtils.ExtractLinesFromPage(page,
|
||||
offsetX: region.X - pad / effUpscale,
|
||||
offsetY: region.Y - pad / effUpscale);
|
||||
|
||||
return new DiffOcrResponse
|
||||
{
|
||||
Text = text,
|
||||
Lines = lines,
|
||||
Region = region,
|
||||
};
|
||||
}
|
||||
|
||||
public object HandleTest(Request req) => RunTestCases(new DiffOcrParams(), verbose: true);
|
||||
|
||||
private static DiffOcrParams CloneParams(DiffOcrParams p)
|
||||
{
|
||||
var json = JsonSerializer.Serialize(p);
|
||||
return JsonSerializer.Deserialize<DiffOcrParams>(json)!;
|
||||
}
|
||||
|
||||
public object HandleTune(Request req)
|
||||
{
|
||||
int totalEvals = 0;
|
||||
|
||||
// --- Phase A: Tune crop params ---
|
||||
Console.Error.WriteLine("\n========== Phase A: Crop Params ==========");
|
||||
var best = new DiffOcrParams();
|
||||
double bestScore = TuneCropParams(best, ref totalEvals);
|
||||
|
||||
// --- Phase B: Tune OCR params (top-hat) ---
|
||||
Console.Error.WriteLine("\n========== Phase B: OCR — Top-Hat ==========");
|
||||
var topHat = CloneParams(best);
|
||||
topHat.Ocr.UseBackgroundSub = false;
|
||||
double topHatScore = TuneOcrParams(topHat, ref totalEvals, tuneTopHat: true, tuneBgSub: false);
|
||||
|
||||
// --- Phase C: Tune OCR params (background-subtraction) ---
|
||||
Console.Error.WriteLine("\n========== Phase C: OCR — Background Subtraction ==========");
|
||||
var bgSub = CloneParams(best);
|
||||
bgSub.Ocr.UseBackgroundSub = true;
|
||||
double bgSubScore = TuneOcrParams(bgSub, ref totalEvals, tuneTopHat: false, tuneBgSub: true);
|
||||
|
||||
// Pick the winner
|
||||
var winner = bgSubScore > topHatScore ? bgSub : topHat;
|
||||
double winnerScore = Math.Max(topHatScore, bgSubScore);
|
||||
|
||||
Console.Error.WriteLine($"\n========== Result ==========");
|
||||
Console.Error.WriteLine($" Top-Hat: {topHatScore:F3} {topHat}");
|
||||
Console.Error.WriteLine($" BgSub: {bgSubScore:F3} {bgSub}");
|
||||
Console.Error.WriteLine($" Winner: {(winner.Ocr.UseBackgroundSub ? "BgSub" : "TopHat")} evals={totalEvals}\n");
|
||||
|
||||
// Final verbose report with best params
|
||||
RunTestCases(winner, verbose: true);
|
||||
|
||||
return new TuneResponse
|
||||
{
|
||||
BestScore = winnerScore,
|
||||
BestParams = winner,
|
||||
Iterations = totalEvals,
|
||||
};
|
||||
}
|
||||
|
||||
private double TuneCropParams(DiffOcrParams best, ref int totalEvals)
|
||||
{
|
||||
double bestScore = ScoreParams(best);
|
||||
Console.Error.WriteLine($" baseline score={bestScore:F3} {best}\n");
|
||||
|
||||
var cropSweeps = new (string Name, int[] Values, Action<DiffCropParams, int> Set)[]
|
||||
{
|
||||
("diffThresh", [10, 15, 20, 25, 30, 40, 50, 60], (c, v) => c.DiffThresh = v),
|
||||
("rowThreshDiv", [10, 15, 20, 25, 30, 40, 50, 60], (c, v) => c.RowThreshDiv = v),
|
||||
("colThreshDiv", [5, 8, 10, 12, 15, 20, 25, 30], (c, v) => c.ColThreshDiv = v),
|
||||
("maxGap", [5, 8, 10, 12, 15, 20, 25, 30], (c, v) => c.MaxGap = v),
|
||||
("ocrPad", [0, 5, 10, 15, 20, 30], (c, v) => c.OcrPad = v),
|
||||
};
|
||||
|
||||
double[] trimValues = [0.1, 0.15, 0.2, 0.25, 0.3, 0.4, 0.5];
|
||||
|
||||
const int maxRounds = 3;
|
||||
for (int round = 0; round < maxRounds; round++)
|
||||
{
|
||||
bool improved = false;
|
||||
Console.Error.WriteLine($"--- Round {round + 1} ---");
|
||||
|
||||
foreach (var (name, values, set) in cropSweeps)
|
||||
{
|
||||
Console.Error.Write($" {name}: ");
|
||||
int bestVal = 0;
|
||||
double bestValScore = -1;
|
||||
|
||||
foreach (int v in values)
|
||||
{
|
||||
var trial = CloneParams(best);
|
||||
set(trial.Crop, v);
|
||||
double score = ScoreParams(trial);
|
||||
totalEvals++;
|
||||
Console.Error.Write($"{v}={score:F3} ");
|
||||
if (score > bestValScore) { bestValScore = score; bestVal = v; }
|
||||
}
|
||||
Console.Error.WriteLine();
|
||||
|
||||
if (bestValScore > bestScore)
|
||||
{
|
||||
set(best.Crop, bestVal);
|
||||
bestScore = bestValScore;
|
||||
improved = true;
|
||||
Console.Error.WriteLine($" → {name}={bestVal} score={bestScore:F3}");
|
||||
}
|
||||
}
|
||||
|
||||
// Sweep trimCutoff
|
||||
{
|
||||
Console.Error.Write($" trimCutoff: ");
|
||||
double bestTrim = best.Crop.TrimCutoff;
|
||||
double bestTrimScore = bestScore;
|
||||
|
||||
foreach (double v in trimValues)
|
||||
{
|
||||
var trial = CloneParams(best);
|
||||
trial.Crop.TrimCutoff = v;
|
||||
double score = ScoreParams(trial);
|
||||
totalEvals++;
|
||||
Console.Error.Write($"{v:F2}={score:F3} ");
|
||||
if (score > bestTrimScore) { bestTrimScore = score; bestTrim = v; }
|
||||
}
|
||||
Console.Error.WriteLine();
|
||||
|
||||
if (bestTrimScore > bestScore)
|
||||
{
|
||||
best.Crop.TrimCutoff = bestTrim;
|
||||
bestScore = bestTrimScore;
|
||||
improved = true;
|
||||
Console.Error.WriteLine($" → trimCutoff={bestTrim:F2} score={bestScore:F3}");
|
||||
}
|
||||
}
|
||||
|
||||
Console.Error.WriteLine($" End of round {round + 1}: score={bestScore:F3} {best}");
|
||||
if (!improved) break;
|
||||
}
|
||||
|
||||
return bestScore;
|
||||
}
|
||||
|
||||
private double TuneOcrParams(DiffOcrParams best, ref int totalEvals, bool tuneTopHat, bool tuneBgSub)
|
||||
{
|
||||
double bestScore = ScoreParams(best);
|
||||
Console.Error.WriteLine($" baseline score={bestScore:F3} {best}\n");
|
||||
|
||||
var sharedOcrSweeps = new (string Name, int[] Values, Action<OcrParams, int> Set)[]
|
||||
{
|
||||
("upscale", [1, 2, 3], (o, v) => o.Upscale = v),
|
||||
("psm", [4, 6, 11, 13], (o, v) => o.Psm = v),
|
||||
};
|
||||
|
||||
// Top-hat specific
|
||||
var topHatSweeps = new (string Name, int[] Values, Action<OcrParams, int> Set)[]
|
||||
{
|
||||
("kernelSize", [11, 15, 19, 21, 25, 31, 35, 41, 51], (o, v) => o.KernelSize = v),
|
||||
};
|
||||
|
||||
// Background-subtraction specific
|
||||
var bgSubSweeps = new (string Name, int[] Values, Action<OcrParams, int> Set)[]
|
||||
{
|
||||
("dimPercentile", [5, 10, 15, 20, 25, 30, 40, 50], (o, v) => o.DimPercentile = v),
|
||||
("textThresh", [10, 15, 20, 25, 30, 40, 50, 60, 80], (o, v) => o.TextThresh = v),
|
||||
("lineGapTolerance", [3, 5, 8, 10, 15], (o, v) => o.LineGapTolerance = v),
|
||||
("linePadY", [5, 10, 15, 20], (o, v) => o.LinePadY = v),
|
||||
};
|
||||
|
||||
var allOcrSweeps = sharedOcrSweeps
|
||||
.Concat(tuneTopHat ? topHatSweeps : [])
|
||||
.Concat(tuneBgSub ? bgSubSweeps : [])
|
||||
.ToArray();
|
||||
|
||||
const int maxRounds = 3;
|
||||
for (int round = 0; round < maxRounds; round++)
|
||||
{
|
||||
bool improved = false;
|
||||
Console.Error.WriteLine($"--- Round {round + 1} ---");
|
||||
|
||||
foreach (var (name, values, set) in allOcrSweeps)
|
||||
{
|
||||
Console.Error.Write($" {name}: ");
|
||||
int bestVal = 0;
|
||||
double bestValScore = -1;
|
||||
|
||||
foreach (int v in values)
|
||||
{
|
||||
var trial = CloneParams(best);
|
||||
set(trial.Ocr, v);
|
||||
double score = ScoreParams(trial);
|
||||
totalEvals++;
|
||||
Console.Error.Write($"{v}={score:F3} ");
|
||||
if (score > bestValScore) { bestValScore = score; bestVal = v; }
|
||||
}
|
||||
Console.Error.WriteLine();
|
||||
|
||||
if (bestValScore > bestScore)
|
||||
{
|
||||
set(best.Ocr, bestVal);
|
||||
bestScore = bestValScore;
|
||||
improved = true;
|
||||
Console.Error.WriteLine($" → {name}={bestVal} score={bestScore:F3}");
|
||||
}
|
||||
}
|
||||
|
||||
Console.Error.WriteLine($" End of round {round + 1}: score={bestScore:F3} {best}");
|
||||
if (!improved) break;
|
||||
}
|
||||
|
||||
return bestScore;
|
||||
}
|
||||
|
||||
/// <summary>Score a param set: average match ratio across all test cases (0-1).</summary>
|
||||
private double ScoreParams(DiffOcrParams p)
|
||||
{
|
||||
var result = RunTestCases(p, verbose: false);
|
||||
if (result is TestResponse tr && tr.Total > 0)
|
||||
return tr.Results.Average(r => r.Score);
|
||||
return 0;
|
||||
}
|
||||
|
||||
private object RunTestCases(DiffOcrParams p, bool verbose)
|
||||
{
|
||||
var tessdataDir = Path.Combine(AppContext.BaseDirectory, "tessdata");
|
||||
var casesPath = Path.Combine(tessdataDir, "cases.json");
|
||||
if (!File.Exists(casesPath))
|
||||
return new ErrorResponse($"cases.json not found at {casesPath}");
|
||||
|
||||
var json = File.ReadAllText(casesPath);
|
||||
var cases = JsonSerializer.Deserialize<List<TestCase>>(json);
|
||||
if (cases == null || cases.Count == 0)
|
||||
return new ErrorResponse("No test cases found in cases.json");
|
||||
|
||||
var results = new List<TestCaseResult>();
|
||||
int passCount = 0;
|
||||
|
||||
foreach (var tc in cases)
|
||||
{
|
||||
if (verbose) Console.Error.WriteLine($"\n=== Test: {tc.Id} ===");
|
||||
|
||||
var fullPath = Path.Combine(tessdataDir, tc.FullImage);
|
||||
var imagePath = Path.Combine(tessdataDir, tc.Image);
|
||||
|
||||
if (!File.Exists(fullPath))
|
||||
{
|
||||
if (verbose) Console.Error.WriteLine($" SKIP: full image not found: {fullPath}");
|
||||
results.Add(new TestCaseResult { Id = tc.Id, Passed = false, Score = 0, Missed = tc.Expected });
|
||||
continue;
|
||||
}
|
||||
if (!File.Exists(imagePath))
|
||||
{
|
||||
if (verbose) Console.Error.WriteLine($" SKIP: tooltip image not found: {imagePath}");
|
||||
results.Add(new TestCaseResult { Id = tc.Id, Passed = false, Score = 0, Missed = tc.Expected });
|
||||
continue;
|
||||
}
|
||||
|
||||
// Run the same pipeline: snapshot (reference) then diff-ocr (with tooltip)
|
||||
HandleSnapshot(new Request { File = fullPath });
|
||||
var diffResult = HandleDiffOcr(new Request { File = imagePath, Debug = verbose }, p);
|
||||
|
||||
// Extract actual lines from the response
|
||||
List<string> actualLines;
|
||||
if (diffResult is DiffOcrResponse diffResp)
|
||||
actualLines = diffResp.Lines.Select(l => l.Text.Trim()).Where(l => l.Length > 0).ToList();
|
||||
else if (diffResult is OcrResponse ocrResp)
|
||||
actualLines = ocrResp.Lines.Select(l => l.Text.Trim()).Where(l => l.Length > 0).ToList();
|
||||
else
|
||||
{
|
||||
if (verbose) Console.Error.WriteLine($" ERROR: unexpected response type");
|
||||
results.Add(new TestCaseResult { Id = tc.Id, Passed = false, Score = 0, Missed = tc.Expected });
|
||||
continue;
|
||||
}
|
||||
|
||||
// Fuzzy match expected vs actual
|
||||
var matched = new List<string>();
|
||||
var missed = new List<string>();
|
||||
var usedActual = new HashSet<int>();
|
||||
|
||||
foreach (var expected in tc.Expected)
|
||||
{
|
||||
int bestIdx = -1;
|
||||
double bestSim = 0;
|
||||
for (int i = 0; i < actualLines.Count; i++)
|
||||
{
|
||||
if (usedActual.Contains(i)) continue;
|
||||
double sim = LevenshteinSimilarity(expected, actualLines[i]);
|
||||
if (sim > bestSim) { bestSim = sim; bestIdx = i; }
|
||||
}
|
||||
|
||||
if (bestIdx >= 0 && bestSim >= 0.75)
|
||||
{
|
||||
matched.Add(expected);
|
||||
usedActual.Add(bestIdx);
|
||||
if (verbose && bestSim < 1.0)
|
||||
Console.Error.WriteLine($" ~ {expected} → {actualLines[bestIdx]} (sim={bestSim:F2})");
|
||||
}
|
||||
else
|
||||
{
|
||||
missed.Add(expected);
|
||||
if (verbose)
|
||||
Console.Error.WriteLine($" MISS: {expected}" + (bestIdx >= 0 ? $" (best: {actualLines[bestIdx]}, sim={bestSim:F2})" : ""));
|
||||
}
|
||||
}
|
||||
|
||||
var extra = actualLines.Where((_, i) => !usedActual.Contains(i)).ToList();
|
||||
if (verbose)
|
||||
foreach (var e in extra)
|
||||
Console.Error.WriteLine($" EXTRA: {e}");
|
||||
|
||||
double score = tc.Expected.Count > 0 ? (double)matched.Count / tc.Expected.Count : 1.0;
|
||||
bool passed = missed.Count == 0;
|
||||
if (passed) passCount++;
|
||||
|
||||
if (verbose)
|
||||
Console.Error.WriteLine($" Result: {(passed ? "PASS" : "FAIL")} matched={matched.Count}/{tc.Expected.Count} extra={extra.Count} score={score:F2}");
|
||||
|
||||
results.Add(new TestCaseResult
|
||||
{
|
||||
Id = tc.Id,
|
||||
Passed = passed,
|
||||
Score = score,
|
||||
Matched = matched,
|
||||
Missed = missed,
|
||||
Extra = extra,
|
||||
});
|
||||
}
|
||||
|
||||
if (verbose)
|
||||
Console.Error.WriteLine($"\n=== Summary: {passCount}/{cases.Count} passed ===\n");
|
||||
|
||||
return new TestResponse
|
||||
{
|
||||
Passed = passCount,
|
||||
Failed = cases.Count - passCount,
|
||||
Total = cases.Count,
|
||||
Results = results,
|
||||
};
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Fast crop from raw pixel bytes — avoids slow GDI+ Bitmap.Clone().
|
||||
/// </summary>
|
||||
private static Bitmap CropFromBytes(byte[] px, int srcStride, int cropX, int cropY, int cropW, int cropH)
|
||||
{
|
||||
var bmp = new Bitmap(cropW, cropH, PixelFormat.Format32bppArgb);
|
||||
var data = bmp.LockBits(new Rectangle(0, 0, cropW, cropH), ImageLockMode.WriteOnly, PixelFormat.Format32bppArgb);
|
||||
int dstStride = data.Stride;
|
||||
int rowBytes = cropW * 4;
|
||||
for (int y = 0; y < cropH; y++)
|
||||
{
|
||||
int srcOffset = (cropY + y) * srcStride + cropX * 4;
|
||||
Marshal.Copy(px, srcOffset, data.Scan0 + y * dstStride, rowBytes);
|
||||
}
|
||||
bmp.UnlockBits(data);
|
||||
return bmp;
|
||||
}
|
||||
|
||||
private static double LevenshteinSimilarity(string a, string b)
|
||||
{
|
||||
a = a.ToLowerInvariant();
|
||||
b = b.ToLowerInvariant();
|
||||
if (a == b) return 1.0;
|
||||
|
||||
int la = a.Length, lb = b.Length;
|
||||
if (la == 0 || lb == 0) return 0.0;
|
||||
|
||||
var d = new int[la + 1, lb + 1];
|
||||
for (int i = 0; i <= la; i++) d[i, 0] = i;
|
||||
for (int j = 0; j <= lb; j++) d[0, j] = j;
|
||||
|
||||
for (int i = 1; i <= la; i++)
|
||||
for (int j = 1; j <= lb; j++)
|
||||
{
|
||||
int cost = a[i - 1] == b[j - 1] ? 0 : 1;
|
||||
d[i, j] = Math.Min(Math.Min(d[i - 1, j] + 1, d[i, j - 1] + 1), d[i - 1, j - 1] + cost);
|
||||
}
|
||||
|
||||
return 1.0 - (double)d[la, lb] / Math.Max(la, lb);
|
||||
}
|
||||
}
|
||||
|
|
@ -1 +0,0 @@
|
|||
return OcrDaemon.Daemon.Run();
|
||||
|
|
@ -1,210 +0,0 @@
|
|||
namespace OcrDaemon;
|
||||
|
||||
using System.Diagnostics;
|
||||
using System.Drawing;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
using SdImageFormat = System.Drawing.Imaging.ImageFormat;
|
||||
|
||||
/// <summary>
|
||||
/// Manages a persistent Python subprocess for EasyOCR / PaddleOCR.
|
||||
/// Lazy-starts on first request; reuses the process for subsequent calls.
|
||||
/// Same stdin/stdout JSON-per-line protocol as the C# daemon itself.
|
||||
/// </summary>
|
||||
class PythonOcrBridge : IDisposable
|
||||
{
|
||||
private static readonly JsonSerializerOptions JsonOptions = new()
|
||||
{
|
||||
PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
|
||||
};
|
||||
|
||||
private Process? _proc;
|
||||
private readonly string _daemonScript;
|
||||
private readonly string _pythonExe;
|
||||
private readonly object _lock = new();
|
||||
|
||||
public PythonOcrBridge()
|
||||
{
|
||||
// Resolve paths relative to this exe
|
||||
var exeDir = AppContext.BaseDirectory;
|
||||
// exeDir = tools/OcrDaemon/bin/Release/net8.0-.../
|
||||
// Walk up 4 levels to tools/
|
||||
var toolsDir = Path.GetFullPath(Path.Combine(exeDir, "..", "..", "..", ".."));
|
||||
_daemonScript = Path.GetFullPath(Path.Combine(toolsDir, "python-ocr", "daemon.py"));
|
||||
|
||||
// Use the venv Python if it exists, otherwise fall back to system python
|
||||
var venvPython = Path.GetFullPath(Path.Combine(toolsDir, "python-ocr", ".venv", "Scripts", "python.exe"));
|
||||
_pythonExe = File.Exists(venvPython) ? venvPython : "python";
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Run OCR on a screen region using the specified Python engine.
|
||||
/// Captures screenshot, saves to temp file, sends to Python, returns OcrResponse.
|
||||
/// </summary>
|
||||
public object HandleOcr(Request req, string engine)
|
||||
{
|
||||
var tmpPath = Path.Combine(Path.GetTempPath(), $"ocr_{Guid.NewGuid():N}.png");
|
||||
try
|
||||
{
|
||||
using var bitmap = ScreenCapture.CaptureOrLoad(req.File, req.Region);
|
||||
bitmap.Save(tmpPath, SdImageFormat.Png);
|
||||
return OcrFromFile(tmpPath, engine);
|
||||
}
|
||||
finally
|
||||
{
|
||||
try { File.Delete(tmpPath); } catch { /* ignore */ }
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Run OCR on an already-saved image file via the Python engine.
|
||||
/// </summary>
|
||||
public OcrResponse OcrFromFile(string imagePath, string engine, OcrParams? ocrParams = null)
|
||||
{
|
||||
EnsureRunning();
|
||||
|
||||
var pyReq = BuildPythonRequest(engine, ocrParams);
|
||||
pyReq["imagePath"] = imagePath;
|
||||
return SendPythonRequest(pyReq);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Run OCR on a bitmap via the Python engine (base64 PNG over pipe, no temp file).
|
||||
/// </summary>
|
||||
public OcrResponse OcrFromBitmap(Bitmap bitmap, string engine, OcrParams? ocrParams = null)
|
||||
{
|
||||
EnsureRunning();
|
||||
|
||||
using var ms = new MemoryStream();
|
||||
bitmap.Save(ms, SdImageFormat.Png);
|
||||
var imageBase64 = Convert.ToBase64String(ms.ToArray());
|
||||
|
||||
var pyReq = BuildPythonRequest(engine, ocrParams);
|
||||
pyReq["imageBase64"] = imageBase64;
|
||||
return SendPythonRequest(pyReq);
|
||||
}
|
||||
|
||||
private static Dictionary<string, object?> BuildPythonRequest(string engine, OcrParams? ocrParams)
|
||||
{
|
||||
var req = new Dictionary<string, object?> { ["cmd"] = "ocr", ["engine"] = engine };
|
||||
if (ocrParams == null) return req;
|
||||
|
||||
if (ocrParams.MergeGap > 0) req["mergeGap"] = ocrParams.MergeGap;
|
||||
if (ocrParams.LinkThreshold.HasValue) req["linkThreshold"] = ocrParams.LinkThreshold.Value;
|
||||
if (ocrParams.TextThreshold.HasValue) req["textThreshold"] = ocrParams.TextThreshold.Value;
|
||||
if (ocrParams.LowText.HasValue) req["lowText"] = ocrParams.LowText.Value;
|
||||
if (ocrParams.WidthThs.HasValue) req["widthThs"] = ocrParams.WidthThs.Value;
|
||||
if (ocrParams.Paragraph.HasValue) req["paragraph"] = ocrParams.Paragraph.Value;
|
||||
|
||||
return req;
|
||||
}
|
||||
|
||||
private OcrResponse SendPythonRequest(object pyReq)
|
||||
{
|
||||
var json = JsonSerializer.Serialize(pyReq, JsonOptions);
|
||||
|
||||
string responseLine;
|
||||
lock (_lock)
|
||||
{
|
||||
_proc!.StandardInput.WriteLine(json);
|
||||
_proc.StandardInput.Flush();
|
||||
responseLine = _proc.StandardOutput.ReadLine()
|
||||
?? throw new Exception("Python daemon returned null");
|
||||
}
|
||||
|
||||
var resp = JsonSerializer.Deserialize<PythonResponse>(responseLine, JsonOptions);
|
||||
if (resp == null)
|
||||
throw new Exception("Failed to parse Python OCR response");
|
||||
if (!resp.Ok)
|
||||
throw new Exception(resp.Error ?? "Python OCR failed");
|
||||
|
||||
return new OcrResponse
|
||||
{
|
||||
Text = resp.Text ?? "",
|
||||
Lines = resp.Lines ?? [],
|
||||
};
|
||||
}
|
||||
|
||||
private void EnsureRunning()
|
||||
{
|
||||
if (_proc != null && !_proc.HasExited)
|
||||
return;
|
||||
|
||||
_proc?.Dispose();
|
||||
_proc = null;
|
||||
|
||||
if (!File.Exists(_daemonScript))
|
||||
throw new Exception($"Python OCR daemon not found at {_daemonScript}");
|
||||
|
||||
Console.Error.WriteLine($"Spawning Python OCR daemon: {_pythonExe} {_daemonScript}");
|
||||
|
||||
_proc = new Process
|
||||
{
|
||||
StartInfo = new ProcessStartInfo
|
||||
{
|
||||
FileName = _pythonExe,
|
||||
Arguments = $"\"{_daemonScript}\"",
|
||||
UseShellExecute = false,
|
||||
RedirectStandardInput = true,
|
||||
RedirectStandardOutput = true,
|
||||
RedirectStandardError = true,
|
||||
CreateNoWindow = true,
|
||||
}
|
||||
};
|
||||
|
||||
_proc.ErrorDataReceived += (_, e) =>
|
||||
{
|
||||
if (!string.IsNullOrEmpty(e.Data))
|
||||
Console.Error.WriteLine($"[python-ocr] {e.Data}");
|
||||
};
|
||||
|
||||
_proc.Start();
|
||||
_proc.BeginErrorReadLine();
|
||||
|
||||
// Wait for ready signal (up to 30s for first model load)
|
||||
var readyLine = _proc.StandardOutput.ReadLine();
|
||||
if (readyLine == null)
|
||||
throw new Exception("Python OCR daemon exited before ready signal");
|
||||
|
||||
var ready = JsonSerializer.Deserialize<PythonResponse>(readyLine, JsonOptions);
|
||||
if (ready?.Ready != true)
|
||||
throw new Exception($"Python OCR daemon did not send ready signal: {readyLine}");
|
||||
|
||||
Console.Error.WriteLine("Python OCR daemon ready");
|
||||
}
|
||||
|
||||
public void Dispose()
|
||||
{
|
||||
if (_proc != null && !_proc.HasExited)
|
||||
{
|
||||
try
|
||||
{
|
||||
_proc.StandardInput.Close();
|
||||
_proc.WaitForExit(3000);
|
||||
if (!_proc.HasExited) _proc.Kill();
|
||||
}
|
||||
catch { /* ignore */ }
|
||||
}
|
||||
_proc?.Dispose();
|
||||
_proc = null;
|
||||
}
|
||||
|
||||
private class PythonResponse
|
||||
{
|
||||
[JsonPropertyName("ok")]
|
||||
public bool Ok { get; set; }
|
||||
|
||||
[JsonPropertyName("ready")]
|
||||
public bool? Ready { get; set; }
|
||||
|
||||
[JsonPropertyName("text")]
|
||||
public string? Text { get; set; }
|
||||
|
||||
[JsonPropertyName("lines")]
|
||||
public List<OcrLineResult>? Lines { get; set; }
|
||||
|
||||
[JsonPropertyName("error")]
|
||||
public string? Error { get; set; }
|
||||
}
|
||||
}
|
||||
|
|
@ -1,65 +0,0 @@
|
|||
namespace OcrDaemon;
|
||||
|
||||
using System.Drawing;
|
||||
using System.Drawing.Imaging;
|
||||
using System.Runtime.InteropServices;
|
||||
|
||||
static class ScreenCapture
|
||||
{
|
||||
[DllImport("user32.dll")]
|
||||
private static extern bool SetProcessDPIAware();
|
||||
|
||||
[DllImport("user32.dll")]
|
||||
private static extern int GetSystemMetrics(int nIndex);
|
||||
|
||||
public static void InitDpiAwareness() => SetProcessDPIAware();
|
||||
|
||||
/// <summary>
|
||||
/// Capture from screen, or load from file if specified.
|
||||
/// When file is set, loads the image and crops to region.
|
||||
/// </summary>
|
||||
public static Bitmap CaptureOrLoad(string? file, RegionRect? region)
|
||||
{
|
||||
if (!string.IsNullOrEmpty(file))
|
||||
{
|
||||
var fullBmp = new Bitmap(file);
|
||||
if (region != null)
|
||||
{
|
||||
int cx = Math.Max(0, region.X);
|
||||
int cy = Math.Max(0, region.Y);
|
||||
int cw = Math.Min(region.Width, fullBmp.Width - cx);
|
||||
int ch = Math.Min(region.Height, fullBmp.Height - cy);
|
||||
var cropped = fullBmp.Clone(new Rectangle(cx, cy, cw, ch), PixelFormat.Format32bppArgb);
|
||||
fullBmp.Dispose();
|
||||
return cropped;
|
||||
}
|
||||
return fullBmp;
|
||||
}
|
||||
return CaptureScreen(region);
|
||||
}
|
||||
|
||||
public static Bitmap CaptureScreen(RegionRect? region)
|
||||
{
|
||||
int x, y, w, h;
|
||||
if (region != null)
|
||||
{
|
||||
x = region.X;
|
||||
y = region.Y;
|
||||
w = region.Width;
|
||||
h = region.Height;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Primary monitor only (0,0 origin, SM_CXSCREEN / SM_CYSCREEN)
|
||||
x = 0;
|
||||
y = 0;
|
||||
w = GetSystemMetrics(0); // SM_CXSCREEN
|
||||
h = GetSystemMetrics(1); // SM_CYSCREEN
|
||||
}
|
||||
|
||||
var bitmap = new Bitmap(w, h, PixelFormat.Format32bppArgb);
|
||||
using var g = Graphics.FromImage(bitmap);
|
||||
g.CopyFromScreen(x, y, 0, 0, new System.Drawing.Size(w, h), CopyPixelOperation.SourceCopy);
|
||||
return bitmap;
|
||||
}
|
||||
}
|
||||
|
|
@ -1,177 +0,0 @@
|
|||
namespace OcrDaemon;
|
||||
|
||||
static class SignalProcessing
|
||||
{
|
||||
/// <summary>
|
||||
/// Find the dominant period in a signal using autocorrelation.
|
||||
/// Returns (period, score) where score is the autocorrelation strength.
|
||||
/// </summary>
|
||||
public static (int period, double score) FindPeriodWithScore(double[] signal, int minPeriod, int maxPeriod)
|
||||
{
|
||||
int n = signal.Length;
|
||||
if (n < minPeriod * 3) return (-1, 0);
|
||||
|
||||
double mean = signal.Average();
|
||||
double variance = 0;
|
||||
for (int i = 0; i < n; i++)
|
||||
variance += (signal[i] - mean) * (signal[i] - mean);
|
||||
if (variance < 1.0) return (-1, 0);
|
||||
|
||||
int maxLag = Math.Min(maxPeriod, n / 3);
|
||||
double[] ac = new double[maxLag + 1];
|
||||
for (int lag = minPeriod; lag <= maxLag; lag++)
|
||||
{
|
||||
double sum = 0;
|
||||
for (int i = 0; i < n - lag; i++)
|
||||
sum += (signal[i] - mean) * (signal[i + lag] - mean);
|
||||
ac[lag] = sum / variance;
|
||||
}
|
||||
|
||||
// Find the first significant peak — this is the fundamental period.
|
||||
// Using "first" avoids picking harmonics (2x, 3x) or unrelated larger patterns.
|
||||
for (int lag = minPeriod + 1; lag < maxLag; lag++)
|
||||
{
|
||||
if (ac[lag] > 0.01 && ac[lag] >= ac[lag - 1] && ac[lag] >= ac[lag + 1])
|
||||
return (lag, ac[lag]);
|
||||
}
|
||||
|
||||
return (-1, 0);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Find contiguous segments where values are ABOVE threshold.
|
||||
/// Used to find grid panel regions by density of very dark pixels.
|
||||
/// Allows brief gaps (up to 5px) to handle grid borders.
|
||||
/// </summary>
|
||||
public static List<(int start, int end)> FindDarkDensitySegments(double[] profile, double threshold, int minLength)
|
||||
{
|
||||
var segments = new List<(int start, int end)>();
|
||||
int n = profile.Length;
|
||||
int curStart = -1;
|
||||
int maxGap = 5;
|
||||
int gapCount = 0;
|
||||
|
||||
for (int i = 0; i < n; i++)
|
||||
{
|
||||
if (profile[i] >= threshold)
|
||||
{
|
||||
if (curStart < 0) curStart = i;
|
||||
gapCount = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (curStart >= 0)
|
||||
{
|
||||
gapCount++;
|
||||
if (gapCount > maxGap)
|
||||
{
|
||||
int end = i - gapCount;
|
||||
if (end - curStart >= minLength)
|
||||
segments.Add((curStart, end));
|
||||
curStart = -1;
|
||||
gapCount = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (curStart >= 0)
|
||||
{
|
||||
int end = gapCount > 0 ? n - gapCount : n;
|
||||
if (end - curStart >= minLength)
|
||||
segments.Add((curStart, end));
|
||||
}
|
||||
|
||||
return segments;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Find the extent of the grid in a 1D profile using local autocorrelation
|
||||
/// at the specific detected period. Only regions where the signal actually
|
||||
/// repeats at the given period will score high — much more precise than variance.
|
||||
/// </summary>
|
||||
public static (int start, int end) FindGridExtent(double[] signal, int period)
|
||||
{
|
||||
int n = signal.Length;
|
||||
int halfWin = period * 2; // window radius: 2 periods each side
|
||||
if (n < halfWin * 2 + period) return (-1, -1);
|
||||
|
||||
// Compute local AC at the specific lag=period in a sliding window
|
||||
double[] localAc = new double[n];
|
||||
for (int center = halfWin; center < n - halfWin; center++)
|
||||
{
|
||||
int wStart = center - halfWin;
|
||||
int wEnd = center + halfWin;
|
||||
int count = wEnd - wStart;
|
||||
|
||||
// Local mean
|
||||
double sum = 0;
|
||||
for (int i = wStart; i < wEnd; i++)
|
||||
sum += signal[i];
|
||||
double mean = sum / count;
|
||||
|
||||
// Local variance
|
||||
double varSum = 0;
|
||||
for (int i = wStart; i < wEnd; i++)
|
||||
varSum += (signal[i] - mean) * (signal[i] - mean);
|
||||
|
||||
if (varSum < 1.0) continue;
|
||||
|
||||
// AC at the specific lag=period
|
||||
double acSum = 0;
|
||||
for (int i = wStart; i < wEnd - period; i++)
|
||||
acSum += (signal[i] - mean) * (signal[i + period] - mean);
|
||||
|
||||
localAc[center] = Math.Max(0, acSum / varSum);
|
||||
}
|
||||
|
||||
// Find the longest contiguous run above threshold
|
||||
double maxAc = 0;
|
||||
for (int i = 0; i < n; i++)
|
||||
if (localAc[i] > maxAc) maxAc = localAc[i];
|
||||
if (maxAc < 0.02) return (-1, -1);
|
||||
|
||||
double threshold = maxAc * 0.25;
|
||||
|
||||
int bestStart = -1, bestEnd = -1, bestLen = 0;
|
||||
int curStartPos = -1;
|
||||
for (int i = 0; i < n; i++)
|
||||
{
|
||||
if (localAc[i] > threshold)
|
||||
{
|
||||
if (curStartPos < 0) curStartPos = i;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (curStartPos >= 0)
|
||||
{
|
||||
int len = i - curStartPos;
|
||||
if (len > bestLen)
|
||||
{
|
||||
bestLen = len;
|
||||
bestStart = curStartPos;
|
||||
bestEnd = i;
|
||||
}
|
||||
curStartPos = -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Handle run extending to end of signal
|
||||
if (curStartPos >= 0)
|
||||
{
|
||||
int len = n - curStartPos;
|
||||
if (len > bestLen)
|
||||
{
|
||||
bestStart = curStartPos;
|
||||
bestEnd = n;
|
||||
}
|
||||
}
|
||||
|
||||
if (bestStart < 0) return (-1, -1);
|
||||
|
||||
// Small extension to include cell borders at edges
|
||||
bestStart = Math.Max(0, bestStart - period / 4);
|
||||
bestEnd = Math.Min(n - 1, bestEnd + period / 4);
|
||||
|
||||
return (bestStart, bestEnd);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,60 +0,0 @@
|
|||
namespace OcrDaemon;
|
||||
|
||||
using System.Drawing;
|
||||
using System.Drawing.Imaging;
|
||||
using OpenCvSharp;
|
||||
using OpenCvSharp.Extensions;
|
||||
|
||||
class TemplateMatchHandler
|
||||
{
|
||||
public object HandleTemplateMatch(Request req)
|
||||
{
|
||||
if (string.IsNullOrEmpty(req.Path))
|
||||
return new ErrorResponse("match-template command requires 'path' (template image file)");
|
||||
|
||||
if (!System.IO.File.Exists(req.Path))
|
||||
return new ErrorResponse($"Template file not found: {req.Path}");
|
||||
|
||||
using var screenshot = ScreenCapture.CaptureOrLoad(req.File, req.Region);
|
||||
using var screenMat = BitmapConverter.ToMat(screenshot);
|
||||
using var template = Cv2.ImRead(req.Path, ImreadModes.Color);
|
||||
|
||||
if (template.Empty())
|
||||
return new ErrorResponse($"Failed to load template image: {req.Path}");
|
||||
|
||||
// Convert screenshot from BGRA to BGR if needed
|
||||
using var screenBgr = new Mat();
|
||||
if (screenMat.Channels() == 4)
|
||||
Cv2.CvtColor(screenMat, screenBgr, ColorConversionCodes.BGRA2BGR);
|
||||
else
|
||||
screenMat.CopyTo(screenBgr);
|
||||
|
||||
// Template must fit within screenshot
|
||||
if (template.Rows > screenBgr.Rows || template.Cols > screenBgr.Cols)
|
||||
return new TemplateMatchResponse { Found = false };
|
||||
|
||||
using var result = new Mat();
|
||||
Cv2.MatchTemplate(screenBgr, template, result, TemplateMatchModes.CCoeffNormed);
|
||||
|
||||
Cv2.MinMaxLoc(result, out _, out double maxVal, out _, out OpenCvSharp.Point maxLoc);
|
||||
|
||||
double threshold = req.Threshold > 0 ? req.Threshold / 100.0 : 0.7;
|
||||
|
||||
if (maxVal < threshold)
|
||||
return new TemplateMatchResponse { Found = false, Confidence = maxVal };
|
||||
|
||||
// Calculate center coordinates — offset by region origin if provided
|
||||
int offsetX = req.Region?.X ?? 0;
|
||||
int offsetY = req.Region?.Y ?? 0;
|
||||
|
||||
return new TemplateMatchResponse
|
||||
{
|
||||
Found = true,
|
||||
X = offsetX + maxLoc.X + template.Cols / 2,
|
||||
Y = offsetY + maxLoc.Y + template.Rows / 2,
|
||||
Width = template.Cols,
|
||||
Height = template.Rows,
|
||||
Confidence = maxVal,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
|
@ -1,230 +0,0 @@
|
|||
namespace OcrDaemon;
|
||||
|
||||
using System.Collections.Generic;
|
||||
using System.Drawing;
|
||||
using System.IO;
|
||||
using System.Linq;
|
||||
using System.Text.Json;
|
||||
using Tesseract;
|
||||
|
||||
static class TestRunner
|
||||
{
|
||||
private static readonly JsonSerializerOptions JsonOptions = new()
|
||||
{
|
||||
PropertyNameCaseInsensitive = true,
|
||||
};
|
||||
|
||||
public static int Run(string[] args)
|
||||
{
|
||||
string baseDir = AppContext.BaseDirectory;
|
||||
string? savePreDir = null;
|
||||
|
||||
for (int i = 0; i < args.Length; i++)
|
||||
{
|
||||
if (string.Equals(args[i], "--save-pre", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
if (i + 1 < args.Length && !args[i + 1].StartsWith("--", StringComparison.Ordinal))
|
||||
{
|
||||
savePreDir = args[i + 1];
|
||||
i++;
|
||||
}
|
||||
else
|
||||
{
|
||||
savePreDir = "processed";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
string casesPath = args.Length > 0 && !string.IsNullOrWhiteSpace(args[0])
|
||||
? args[0]
|
||||
: Path.Combine(baseDir, "tessdata", "cases.json");
|
||||
|
||||
if (!File.Exists(casesPath))
|
||||
{
|
||||
Console.Error.WriteLine($"cases.json not found: {casesPath}");
|
||||
return 1;
|
||||
}
|
||||
|
||||
string json = File.ReadAllText(casesPath);
|
||||
var cases = JsonSerializer.Deserialize<List<TestCase>>(json, JsonOptions) ?? [];
|
||||
if (cases.Count == 0)
|
||||
{
|
||||
Console.Error.WriteLine("No test cases found.");
|
||||
return 1;
|
||||
}
|
||||
|
||||
string tessdataPath = Path.Combine(baseDir, "tessdata");
|
||||
string tessLang = File.Exists(Path.Combine(tessdataPath, "poe2.traineddata")) ? "poe2" : "eng";
|
||||
|
||||
using var engine = new TesseractEngine(tessdataPath, tessLang, EngineMode.LstmOnly);
|
||||
engine.DefaultPageSegMode = PageSegMode.SingleBlock;
|
||||
engine.SetVariable("preserve_interword_spaces", "1");
|
||||
var ocrHandler = new OcrHandler(engine);
|
||||
|
||||
int totalExpected = 0;
|
||||
int totalMatched = 0;
|
||||
int caseFailures = 0;
|
||||
|
||||
string casesDir = Path.GetDirectoryName(casesPath) ?? baseDir;
|
||||
if (!string.IsNullOrEmpty(savePreDir))
|
||||
{
|
||||
if (!Path.IsPathRooted(savePreDir))
|
||||
savePreDir = Path.Combine(casesDir, savePreDir);
|
||||
if (!Directory.Exists(savePreDir))
|
||||
Directory.CreateDirectory(savePreDir);
|
||||
}
|
||||
foreach (var tc in cases)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(tc.Image))
|
||||
{
|
||||
Console.Error.WriteLine($"[SKIP] {tc.Id}: missing image path");
|
||||
continue;
|
||||
}
|
||||
|
||||
string imagePath = Path.IsPathRooted(tc.Image)
|
||||
? tc.Image
|
||||
: Path.Combine(casesDir, tc.Image);
|
||||
|
||||
if (!File.Exists(imagePath))
|
||||
{
|
||||
Console.Error.WriteLine($"[SKIP] {tc.Id}: image not found: {imagePath}");
|
||||
continue;
|
||||
}
|
||||
|
||||
List<string> actualSet;
|
||||
|
||||
if (!string.IsNullOrWhiteSpace(tc.BeforeImage))
|
||||
{
|
||||
string beforePath = Path.IsPathRooted(tc.BeforeImage)
|
||||
? tc.BeforeImage
|
||||
: Path.Combine(casesDir, tc.BeforeImage);
|
||||
|
||||
if (!File.Exists(beforePath))
|
||||
{
|
||||
Console.Error.WriteLine($"[SKIP] {tc.Id}: before image not found: {beforePath}");
|
||||
continue;
|
||||
}
|
||||
|
||||
ocrHandler.HandleSnapshot(new Request { File = beforePath });
|
||||
|
||||
string? savePath = null;
|
||||
if (!string.IsNullOrEmpty(savePreDir))
|
||||
savePath = Path.Combine(savePreDir, $"{tc.Id}.raw.png");
|
||||
|
||||
var response = ocrHandler.HandleDiffOcr(new Request
|
||||
{
|
||||
File = imagePath,
|
||||
Path = savePath,
|
||||
});
|
||||
|
||||
if (response is ErrorResponse err)
|
||||
{
|
||||
Console.Error.WriteLine($"[FAIL] {tc.Id}: {err.Error}");
|
||||
caseFailures++;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (response is DiffOcrResponse diff)
|
||||
actualSet = BuildActualSet(diff.Text, diff.Lines);
|
||||
else if (response is OcrResponse ocr)
|
||||
actualSet = BuildActualSet(ocr.Text, ocr.Lines);
|
||||
else
|
||||
actualSet = [];
|
||||
}
|
||||
else
|
||||
{
|
||||
using var bitmap = new Bitmap(imagePath);
|
||||
using var processed = ImagePreprocessor.PreprocessForOcr(bitmap);
|
||||
|
||||
if (!string.IsNullOrEmpty(savePreDir))
|
||||
{
|
||||
string outPath = Path.Combine(savePreDir, $"{tc.Id}.pre.png");
|
||||
processed.Save(outPath, System.Drawing.Imaging.ImageFormat.Png);
|
||||
}
|
||||
using var pix = ImageUtils.BitmapToPix(processed);
|
||||
using var page = engine.Process(pix);
|
||||
|
||||
var lines = ImageUtils.ExtractLinesFromPage(page, offsetX: 0, offsetY: 0);
|
||||
var actualLines = lines.Select(l => Normalize(l.Text)).Where(s => s.Length > 0).ToList();
|
||||
|
||||
var rawText = page.GetText() ?? string.Empty;
|
||||
var rawLines = rawText.Split('\n')
|
||||
.Select(Normalize)
|
||||
.Where(s => s.Length > 0)
|
||||
.ToList();
|
||||
|
||||
actualSet = actualLines.Concat(rawLines).Distinct().ToList();
|
||||
}
|
||||
|
||||
var expectedLines = tc.Expected
|
||||
.Select(Normalize)
|
||||
.Where(s => s.Length > 0)
|
||||
.ToList();
|
||||
|
||||
totalExpected += expectedLines.Count;
|
||||
int matched = expectedLines.Count(e => actualSet.Contains(e));
|
||||
totalMatched += matched;
|
||||
|
||||
if (matched < expectedLines.Count)
|
||||
{
|
||||
caseFailures++;
|
||||
Console.Error.WriteLine($"[FAIL] {tc.Id}: matched {matched}/{expectedLines.Count}");
|
||||
var missing = expectedLines.Where(e => !actualSet.Contains(e)).ToList();
|
||||
foreach (var line in missing)
|
||||
Console.Error.WriteLine($" missing: {line}");
|
||||
|
||||
Console.Error.WriteLine(" actual:");
|
||||
foreach (var line in actualSet)
|
||||
Console.Error.WriteLine($" > {line}");
|
||||
}
|
||||
else
|
||||
{
|
||||
Console.Error.WriteLine($"[OK] {tc.Id}: matched {matched}/{expectedLines.Count}");
|
||||
}
|
||||
}
|
||||
|
||||
Console.Error.WriteLine($"Summary: matched {totalMatched}/{totalExpected} lines, failed cases: {caseFailures}");
|
||||
return caseFailures == 0 ? 0 : 2;
|
||||
}
|
||||
|
||||
private static string Normalize(string input)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(input)) return string.Empty;
|
||||
var chars = input.Trim().ToLowerInvariant().ToCharArray();
|
||||
var sb = new System.Text.StringBuilder(chars.Length);
|
||||
bool inSpace = false;
|
||||
foreach (char c in chars)
|
||||
{
|
||||
if (char.IsWhiteSpace(c))
|
||||
{
|
||||
if (!inSpace)
|
||||
{
|
||||
sb.Append(' ');
|
||||
inSpace = true;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
inSpace = false;
|
||||
sb.Append(c);
|
||||
}
|
||||
return sb.ToString().Trim();
|
||||
}
|
||||
|
||||
private static List<string> BuildActualSet(string text, List<OcrLineResult> lines)
|
||||
{
|
||||
var lineTexts = lines.Select(l => Normalize(l.Text)).Where(s => s.Length > 0).ToList();
|
||||
var textLines = (text ?? string.Empty).Split('\n')
|
||||
.Select(Normalize)
|
||||
.Where(s => s.Length > 0)
|
||||
.ToList();
|
||||
return lineTexts.Concat(textLines).Distinct().ToList();
|
||||
}
|
||||
|
||||
private sealed class TestCase
|
||||
{
|
||||
public string Id { get; set; } = "";
|
||||
public string Image { get; set; } = "";
|
||||
public string? BeforeImage { get; set; }
|
||||
public List<string> Expected { get; set; } = [];
|
||||
}
|
||||
}
|
||||
|
|
@ -1,79 +0,0 @@
|
|||
[
|
||||
{
|
||||
"id": "vertex1",
|
||||
"image": "images/vertex1.png",
|
||||
"fullImage": "images/vertex-snapshot.png",
|
||||
"expected": [
|
||||
"The Vertex",
|
||||
"Tribal Mask",
|
||||
"Helmet",
|
||||
"Quality: +20%",
|
||||
"Evasion Rating: 79",
|
||||
"Energy Shield: 34",
|
||||
"Requires: Level 33",
|
||||
"16% Increased Life Regeneration Rate",
|
||||
"Has no Attribute Requirements",
|
||||
"+15% to Chaos Resistance",
|
||||
"Skill gems have no attribute requirements",
|
||||
"+3 to level of all skills",
|
||||
"15% increased mana cost efficiency",
|
||||
"Twice Corrupted",
|
||||
"\"A Queen should be seen, Admired, but never touched.\"",
|
||||
"- Atziri, Queen of the Vaal",
|
||||
"Asking Price:",
|
||||
"7x Divine Orb"
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "vertex2",
|
||||
"image": "images/vertex2.png",
|
||||
"fullImage": "images/vertex-snapshot.png",
|
||||
"expected": [
|
||||
"The Vertex",
|
||||
"Tribal Mask",
|
||||
"Helmet",
|
||||
"Quality: +20%",
|
||||
"Evasion Rating: 182",
|
||||
"Energy Shield: 77",
|
||||
"Requires: Level 33",
|
||||
"+29 To Spirit",
|
||||
"+1 to Level of All Minion Skills",
|
||||
"Has no Attribute Requirements",
|
||||
"130% increased Evasion and Energy Shield",
|
||||
"27% Increased Critical Hit Chance",
|
||||
"+13% to Chaos Resistance",
|
||||
"+2 to level of all skills",
|
||||
"Twice Corrupted",
|
||||
"\"A Queen should be seen, Admired, but never touched.\"",
|
||||
"- Atziri, Queen of the Vaal",
|
||||
"Asking Price:",
|
||||
"35x Divine Orb"
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "raphpith1",
|
||||
"image": "images/raphpith.png",
|
||||
"fullImage": "images/raphpith-snapshot.png",
|
||||
"expected": [
|
||||
"RATHPITH GLOBE",
|
||||
"SACRED Focus",
|
||||
"Focus",
|
||||
"Quality: +20%",
|
||||
"Energy Shield: 104",
|
||||
"Requires: Level 75",
|
||||
"16% Increased Energy Shield",
|
||||
"+24 To Maximum Mana",
|
||||
"+5% to all Elemental Resistances",
|
||||
"NON-CHANNELLING SPELLS HAVE 3% INCREASED MAGNITUDE OF AlLMENTS PER 100 MAXIMUM LIFE",
|
||||
"NON-CHANNELLING SPELLS DEAL 6% INCREASED DAMAGE PER 100 MAXIMUM MANA",
|
||||
"+72 TO MAXIMUM LIFE",
|
||||
"NON-CHANNELLING SPELLS HAVE 3% INCREASED CRITICAL HIT CHANCE PER 100 MAXIMUM LIFE",
|
||||
"NON-CHANNELLING SPELLS DEAL 6% INCREASED DAMACE PER 100 MAXIMUM LIFE",
|
||||
"Twice Corrupted",
|
||||
"THE VAAL EMPTIED THEIR SLAVES OF BEATING HEARTS",
|
||||
"AND LEFT A MOUNTAIN OF TWITCHING DEAD",
|
||||
"Asking Price:",
|
||||
"120x Divine Orb"
|
||||
]
|
||||
}
|
||||
]
|
||||
|
|
@ -1,93 +0,0 @@
|
|||
[
|
||||
{
|
||||
"id": "1",
|
||||
"image": "images/tooltip1.png",
|
||||
"snapshotImage": "images/tooltip-snapshot.png",
|
||||
"topLeft": {
|
||||
"x": 0,
|
||||
"y": 84
|
||||
},
|
||||
"bottomRight": {
|
||||
"x": 1185,
|
||||
"y": 774
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "2",
|
||||
"image": "images/tooltip2.png",
|
||||
"snapshotImage": "images/tooltip-snapshot.png",
|
||||
"topLeft": {
|
||||
"x": 304,
|
||||
"y": 0
|
||||
},
|
||||
"bottomRight": {
|
||||
"x": 983,
|
||||
"y": 470
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "3",
|
||||
"image": "images/tooltip3.png",
|
||||
"snapshotImage": "images/tooltip-snapshot.png",
|
||||
"topLeft": {
|
||||
"x": 473,
|
||||
"y": 334
|
||||
},
|
||||
"bottomRight": {
|
||||
"x": 1114,
|
||||
"y": 914
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "4",
|
||||
"image": "images/tooltip4.png",
|
||||
"snapshotImage": "images/tooltip-snapshot.png",
|
||||
"topLeft": {
|
||||
"x": 209,
|
||||
"y": 264
|
||||
},
|
||||
"bottomRight": {
|
||||
"x": 1097,
|
||||
"y": 915
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "5",
|
||||
"image": "images/tooltip5.png",
|
||||
"snapshotImage": "images/tooltip-snapshot.png",
|
||||
"topLeft": {
|
||||
"x": 763,
|
||||
"y": 0
|
||||
},
|
||||
"bottomRight": {
|
||||
"x": 1874,
|
||||
"y": 560
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "6",
|
||||
"image": "images/tooltip6.png",
|
||||
"snapshotImage": "images/tooltip-snapshot.png",
|
||||
"topLeft": {
|
||||
"x": 1541,
|
||||
"y": 154
|
||||
},
|
||||
"bottomRight": {
|
||||
"x": 2348,
|
||||
"y": 614
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "7",
|
||||
"image": "images/tooltip7.png",
|
||||
"snapshotImage": "images/tooltip-snapshot.png",
|
||||
"topLeft": {
|
||||
"x": 1921,
|
||||
"y": 40
|
||||
},
|
||||
"bottomRight": {
|
||||
"x": 2558,
|
||||
"y": 370
|
||||
}
|
||||
}
|
||||
]
|
||||
|
|
@ -1,166 +0,0 @@
|
|||
#!/usr/bin/env node
|
||||
/**
|
||||
* Fetches POE2 trade API data and generates Tesseract user-words and user-patterns
|
||||
* files to improve OCR accuracy for tooltip text.
|
||||
*
|
||||
* Usage: node generate-words.mjs
|
||||
* Output: poe2.user-words, poe2.user-patterns (in same directory)
|
||||
*/
|
||||
|
||||
import { writeFileSync } from "fs";
|
||||
import { dirname, join } from "path";
|
||||
import { fileURLToPath } from "url";
|
||||
|
||||
const __dirname = dirname(fileURLToPath(import.meta.url));
|
||||
const UA = "OAuth poe2trade/1.0 (contact: poe2trade@users.noreply.github.com)";
|
||||
|
||||
async function fetchJson(path) {
|
||||
const url = `https://www.pathofexile.com/api/trade2/data/${path}`;
|
||||
const res = await fetch(url, { headers: { "User-Agent": UA } });
|
||||
if (!res.ok) throw new Error(`${url}: ${res.status}`);
|
||||
return res.json();
|
||||
}
|
||||
|
||||
async function main() {
|
||||
console.log("Fetching POE2 trade API data...");
|
||||
const [items, stats, static_, filters] = await Promise.all([
|
||||
fetchJson("items"),
|
||||
fetchJson("stats"),
|
||||
fetchJson("static"),
|
||||
fetchJson("filters"),
|
||||
]);
|
||||
|
||||
const words = new Set();
|
||||
|
||||
// Helper: split text into individual words and add each
|
||||
function addWords(text) {
|
||||
if (!text) return;
|
||||
// Remove # placeholders and special chars, split on whitespace
|
||||
const cleaned = text
|
||||
.replace(/#/g, "")
|
||||
.replace(/[{}()\[\]]/g, "")
|
||||
.replace(/[+\-]/g, " ");
|
||||
for (const word of cleaned.split(/\s+/)) {
|
||||
// Only keep words that are actual words (not numbers, not single chars)
|
||||
const trimmed = word.replace(/^[^a-zA-Z]+|[^a-zA-Z]+$/g, "");
|
||||
if (trimmed.length >= 2) words.add(trimmed);
|
||||
}
|
||||
}
|
||||
|
||||
// Helper: add a full phrase (multi-word item name) as-is
|
||||
function addPhrase(text) {
|
||||
if (!text) return;
|
||||
addWords(text);
|
||||
}
|
||||
|
||||
// Items: type names (base types like "Tribal Mask", "Leather Vest")
|
||||
for (const cat of items.result) {
|
||||
addPhrase(cat.label);
|
||||
for (const entry of cat.entries) {
|
||||
addPhrase(entry.type);
|
||||
addPhrase(entry.name);
|
||||
addPhrase(entry.text);
|
||||
}
|
||||
}
|
||||
|
||||
// Stats: mod text like "+#% to Chaos Resistance", "# to maximum Life"
|
||||
for (const cat of stats.result) {
|
||||
for (const entry of cat.entries) {
|
||||
addPhrase(entry.text);
|
||||
}
|
||||
}
|
||||
|
||||
// Static: currency/fragment names like "Divine Orb", "Scroll of Wisdom"
|
||||
for (const cat of static_.result) {
|
||||
addPhrase(cat.label);
|
||||
for (const entry of cat.entries) {
|
||||
addPhrase(entry.text);
|
||||
}
|
||||
}
|
||||
|
||||
// Filters: filter labels and option texts
|
||||
for (const cat of filters.result) {
|
||||
addPhrase(cat.title);
|
||||
if (cat.filters) {
|
||||
for (const f of cat.filters) {
|
||||
addPhrase(f.text);
|
||||
if (f.option?.options) {
|
||||
for (const opt of f.option.options) {
|
||||
addPhrase(opt.text);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Add common tooltip keywords not in trade API
|
||||
const extraWords = [
|
||||
// Section headers
|
||||
"Quality", "Requires", "Level", "Asking", "Price",
|
||||
"Corrupted", "Mirrored", "Unmodifiable",
|
||||
"Twice", "Sockets",
|
||||
// Attributes
|
||||
"Strength", "Dexterity", "Intelligence", "Spirit",
|
||||
// Defense types
|
||||
"Armour", "Evasion", "Rating", "Energy", "Shield",
|
||||
// Damage types
|
||||
"Physical", "Elemental", "Lightning", "Cold", "Fire", "Chaos",
|
||||
// Common mod words
|
||||
"increased", "reduced", "more", "less",
|
||||
"added", "converted", "regeneration",
|
||||
"maximum", "minimum", "total",
|
||||
"Resistance", "Damage", "Speed", "Duration",
|
||||
"Critical", "Hit", "Chance", "Multiplier",
|
||||
"Attack", "Cast", "Spell", "Minion", "Skill",
|
||||
"Mana", "Life", "Rarity",
|
||||
// Item classes
|
||||
"Helmet", "Gloves", "Boots", "Body", "Belt",
|
||||
"Ring", "Amulet", "Shield", "Quiver",
|
||||
"Sword", "Axe", "Mace", "Dagger", "Wand", "Staff", "Bow",
|
||||
"Sceptre", "Crossbow", "Flail", "Spear",
|
||||
// Rarity
|
||||
"Normal", "Magic", "Rare", "Unique",
|
||||
];
|
||||
for (const w of extraWords) words.add(w);
|
||||
|
||||
// Sort and write user-words
|
||||
const sortedWords = [...words].sort((a, b) => a.toLowerCase().localeCompare(b.toLowerCase()));
|
||||
const wordsPath = join(__dirname, "poe2.user-words");
|
||||
writeFileSync(wordsPath, sortedWords.join("\n") + "\n");
|
||||
console.log(`Wrote ${sortedWords.length} words to ${wordsPath}`);
|
||||
|
||||
// Generate user-patterns for common tooltip formats
|
||||
const patterns = [
|
||||
// Stat values: "+12% to Chaos Resistance", "+3 to Level"
|
||||
"\\+\\d+%",
|
||||
"\\+\\d+",
|
||||
"\\-\\d+%",
|
||||
"\\-\\d+",
|
||||
// Ranges: "10-20"
|
||||
"\\d+-\\d+",
|
||||
// Currency amounts: "7x Divine Orb", "35x Divine Orb"
|
||||
"\\d+x",
|
||||
// Quality: "+20%"
|
||||
"\\d+%",
|
||||
// Level requirements: "Level \\d+"
|
||||
"Level \\d+",
|
||||
// Asking Price section
|
||||
"Asking Price:",
|
||||
// Item level
|
||||
"Item Level: \\d+",
|
||||
// Requires line
|
||||
"Requires:",
|
||||
// Rating values
|
||||
"Rating: \\d+",
|
||||
"Shield: \\d+",
|
||||
"Quality: \\+\\d+%",
|
||||
];
|
||||
const patternsPath = join(__dirname, "poe2.user-patterns");
|
||||
writeFileSync(patternsPath, patterns.join("\n") + "\n");
|
||||
console.log(`Wrote ${patterns.length} patterns to ${patternsPath}`);
|
||||
}
|
||||
|
||||
main().catch((e) => {
|
||||
console.error(e);
|
||||
process.exit(1);
|
||||
});
|
||||
|
Before Width: | Height: | Size: 5.8 MiB |
|
Before Width: | Height: | Size: 5.8 MiB |
|
Before Width: | Height: | Size: 5.8 MiB |
|
Before Width: | Height: | Size: 5.8 MiB |
|
Before Width: | Height: | Size: 5.8 MiB |
|
Before Width: | Height: | Size: 5.8 MiB |
|
Before Width: | Height: | Size: 5.9 MiB |
|
Before Width: | Height: | Size: 6 MiB |
|
Before Width: | Height: | Size: 5.6 MiB |
|
Before Width: | Height: | Size: 5.3 MiB |
|
Before Width: | Height: | Size: 5.1 MiB |
|
Before Width: | Height: | Size: 5.3 MiB |
|
Before Width: | Height: | Size: 5.2 MiB |
|
Before Width: | Height: | Size: 5.2 MiB |
|
Before Width: | Height: | Size: 4.9 MiB |
|
Before Width: | Height: | Size: 5.1 MiB |
|
Before Width: | Height: | Size: 5.3 MiB |
|
Before Width: | Height: | Size: 5.9 MiB |
|
Before Width: | Height: | Size: 5.8 MiB |
|
Before Width: | Height: | Size: 5.9 MiB |
|
|
@ -1,14 +0,0 @@
|
|||
\+\d+%
|
||||
\+\d+
|
||||
\-\d+%
|
||||
\-\d+
|
||||
\d+-\d+
|
||||
\d+x
|
||||
\d+%
|
||||
Level \d+
|
||||
Asking Price:
|
||||
Item Level: \d+
|
||||
Requires:
|
||||
Rating: \d+
|
||||
Shield: \d+
|
||||
Quality: \+\d+%
|
||||
|
|
@ -2,7 +2,7 @@
|
|||
Persistent Python OCR daemon (stdin/stdout JSON-per-line protocol).
|
||||
|
||||
Supports EasyOCR engine, lazy-loaded on first use.
|
||||
Managed as a subprocess by the C# OcrDaemon.
|
||||
Managed as a subprocess by PythonOcrBridge in Poe2Trade.Screen.
|
||||
|
||||
Request: {"cmd": "ocr", "engine": "easyocr", "imagePath": "C:\\temp\\screenshot.png"}
|
||||
Response: {"ok": true, "text": "...", "lines": [{"text": "...", "words": [...]}]}
|
||||
|
|
@ -12,7 +12,6 @@ import sys
|
|||
import json
|
||||
|
||||
_easyocr_reader = None
|
||||
_paddle_ocr = None
|
||||
|
||||
|
||||
def _redirect_stdout_to_stderr():
|
||||
|
|
@ -116,13 +115,6 @@ def items_to_response(items):
|
|||
return {"ok": True, "text": "\n".join(all_text_parts), "lines": lines}
|
||||
|
||||
|
||||
def run_easyocr(image_path):
|
||||
from PIL import Image
|
||||
import numpy as np
|
||||
img = np.array(Image.open(image_path))
|
||||
return run_easyocr_array(img)
|
||||
|
||||
|
||||
def run_easyocr_array(img, merge_gap=0, **easyocr_kwargs):
|
||||
reader = get_easyocr()
|
||||
|
||||
|
|
@ -147,67 +139,6 @@ def run_easyocr_array(img, merge_gap=0, **easyocr_kwargs):
|
|||
return items_to_response(items)
|
||||
|
||||
|
||||
def get_paddleocr():
|
||||
global _paddle_ocr
|
||||
if _paddle_ocr is None:
|
||||
sys.stderr.write("Loading PaddleOCR model...\n")
|
||||
sys.stderr.flush()
|
||||
import os
|
||||
os.environ.setdefault("PADDLE_PDX_DISABLE_MODEL_SOURCE_CHECK", "True")
|
||||
real_stdout = _redirect_stdout_to_stderr()
|
||||
try:
|
||||
from paddleocr import PaddleOCR
|
||||
_paddle_ocr = PaddleOCR(
|
||||
use_doc_orientation_classify=False,
|
||||
use_doc_unwarping=False,
|
||||
use_textline_orientation=False,
|
||||
lang="en",
|
||||
ocr_version="PP-OCRv4",
|
||||
)
|
||||
finally:
|
||||
_restore_stdout(real_stdout)
|
||||
sys.stderr.write("PaddleOCR model loaded.\n")
|
||||
sys.stderr.flush()
|
||||
return _paddle_ocr
|
||||
|
||||
|
||||
def run_paddleocr_array(img, merge_gap=0):
|
||||
ocr = get_paddleocr()
|
||||
|
||||
# Ensure RGB 3-channel
|
||||
if len(img.shape) == 2:
|
||||
import numpy as np
|
||||
img = np.stack([img, img, img], axis=-1)
|
||||
elif img.shape[2] == 4:
|
||||
img = img[:, :, :3]
|
||||
|
||||
real_stdout = _redirect_stdout_to_stderr()
|
||||
try:
|
||||
results = ocr.predict(img)
|
||||
finally:
|
||||
_restore_stdout(real_stdout)
|
||||
|
||||
items = []
|
||||
# PaddleOCR 3.4: results is list of OCRResult objects
|
||||
for res in results:
|
||||
texts = res.get("rec_texts", []) if hasattr(res, "get") else getattr(res, "rec_texts", [])
|
||||
polys = res.get("dt_polys", []) if hasattr(res, "get") else getattr(res, "dt_polys", [])
|
||||
for i, text in enumerate(texts):
|
||||
if not text.strip():
|
||||
continue
|
||||
if i < len(polys):
|
||||
bbox = polys[i]
|
||||
x, y, w, h = bbox_to_rect(bbox)
|
||||
else:
|
||||
x, y, w, h = 0, 0, 0, 0
|
||||
items.append({"text": text.strip(), "x": x, "y": y, "w": w, "h": h})
|
||||
|
||||
if merge_gap > 0:
|
||||
items = merge_nearby_detections(items, merge_gap)
|
||||
|
||||
return items_to_response(items)
|
||||
|
||||
|
||||
def load_image(req):
|
||||
"""Load image from either imagePath (file) or imageBase64 (base64-encoded PNG)."""
|
||||
from PIL import Image
|
||||
|
|
@ -232,29 +163,23 @@ def handle_request(req):
|
|||
if cmd != "ocr":
|
||||
return {"ok": False, "error": f"Unknown command: {cmd}"}
|
||||
|
||||
engine = req.get("engine", "")
|
||||
img = load_image(req)
|
||||
if img is None:
|
||||
return {"ok": False, "error": "Missing imagePath or imageBase64"}
|
||||
|
||||
merge_gap = req.get("mergeGap", 0)
|
||||
|
||||
if engine == "easyocr":
|
||||
easyocr_kwargs = {}
|
||||
for json_key, py_param in [
|
||||
("linkThreshold", "link_threshold"),
|
||||
("textThreshold", "text_threshold"),
|
||||
("lowText", "low_text"),
|
||||
("widthThs", "width_ths"),
|
||||
("paragraph", "paragraph"),
|
||||
]:
|
||||
if json_key in req:
|
||||
easyocr_kwargs[py_param] = req[json_key]
|
||||
return run_easyocr_array(img, merge_gap=merge_gap, **easyocr_kwargs)
|
||||
elif engine == "paddleocr":
|
||||
return run_paddleocr_array(img, merge_gap=merge_gap)
|
||||
else:
|
||||
return {"ok": False, "error": f"Unknown engine: {engine}"}
|
||||
easyocr_kwargs = {}
|
||||
for json_key, py_param in [
|
||||
("linkThreshold", "link_threshold"),
|
||||
("textThreshold", "text_threshold"),
|
||||
("lowText", "low_text"),
|
||||
("widthThs", "width_ths"),
|
||||
("paragraph", "paragraph"),
|
||||
]:
|
||||
if json_key in req:
|
||||
easyocr_kwargs[py_param] = req[json_key]
|
||||
return run_easyocr_array(img, merge_gap=merge_gap, **easyocr_kwargs)
|
||||
|
||||
|
||||
def main():
|
||||
|
|
|
|||