740 lines
30 KiB
C#
740 lines
30 KiB
C#
namespace OcrDaemon;
|
|
|
|
using System.Drawing;
|
|
using System.Drawing.Imaging;
|
|
using System.Runtime.InteropServices;
|
|
using System.Threading;
|
|
using System.Threading.Tasks;
|
|
using System.Text.Json;
|
|
using OpenCvSharp;
|
|
using OpenCvSharp.Extensions;
|
|
using Tesseract;
|
|
using SdImageFormat = System.Drawing.Imaging.ImageFormat;
|
|
|
|
class OcrHandler(TesseractEngine engine)
|
|
{
|
|
private Bitmap? _referenceFrame;
|
|
|
|
public object HandleOcr(Request req)
|
|
{
|
|
using var bitmap = ScreenCapture.CaptureOrLoad(req.File, req.Region);
|
|
using var pix = ImageUtils.BitmapToPix(bitmap);
|
|
using var page = engine.Process(pix);
|
|
|
|
var text = page.GetText();
|
|
var lines = ImageUtils.ExtractLinesFromPage(page, offsetX: 0, offsetY: 0);
|
|
return new OcrResponse { Text = text, Lines = lines };
|
|
}
|
|
|
|
public object HandleScreenshot(Request req)
|
|
{
|
|
if (string.IsNullOrEmpty(req.Path))
|
|
return new ErrorResponse("screenshot command requires 'path'");
|
|
|
|
// If a reference frame exists, save that (same image used for diff-ocr).
|
|
// Otherwise capture a new frame.
|
|
var bitmap = _referenceFrame ?? ScreenCapture.CaptureOrLoad(req.File, req.Region);
|
|
var format = ImageUtils.GetImageFormat(req.Path);
|
|
var dir = Path.GetDirectoryName(req.Path);
|
|
if (!string.IsNullOrEmpty(dir) && !Directory.Exists(dir))
|
|
Directory.CreateDirectory(dir);
|
|
bitmap.Save(req.Path, format);
|
|
if (bitmap != _referenceFrame) bitmap.Dispose();
|
|
return new OkResponse();
|
|
}
|
|
|
|
public object HandleCapture(Request req)
|
|
{
|
|
using var bitmap = ScreenCapture.CaptureOrLoad(req.File, req.Region);
|
|
using var ms = new MemoryStream();
|
|
bitmap.Save(ms, SdImageFormat.Png);
|
|
var base64 = Convert.ToBase64String(ms.ToArray());
|
|
return new CaptureResponse { Image = base64 };
|
|
}
|
|
|
|
public object HandleSnapshot(Request req)
|
|
{
|
|
_referenceFrame?.Dispose();
|
|
_referenceFrame = ScreenCapture.CaptureOrLoad(req.File, req.Region);
|
|
return new OkResponse();
|
|
}
|
|
|
|
public object HandleDiffOcr(Request req) => HandleDiffOcr(req, req.Threshold > 0
|
|
? new DiffOcrParams { DiffThresh = req.Threshold }
|
|
: new DiffOcrParams());
|
|
|
|
/// <summary>
|
|
/// Diff detection + crop only. Returns the raw tooltip crop bitmap and region,
|
|
/// or null if no tooltip detected. Caller is responsible for disposing the bitmap.
|
|
/// </summary>
|
|
public (Bitmap cropped, Bitmap refCropped, Bitmap current, RegionRect region)? DiffCrop(Request req, DiffOcrParams p)
|
|
{
|
|
if (_referenceFrame == null)
|
|
return null;
|
|
|
|
var current = ScreenCapture.CaptureOrLoad(req.File, null);
|
|
|
|
int w = Math.Min(_referenceFrame.Width, current.Width);
|
|
int h = Math.Min(_referenceFrame.Height, current.Height);
|
|
|
|
var refData = _referenceFrame.LockBits(new Rectangle(0, 0, w, h), ImageLockMode.ReadOnly, PixelFormat.Format32bppArgb);
|
|
byte[] refPx = new byte[refData.Stride * h];
|
|
Marshal.Copy(refData.Scan0, refPx, 0, refPx.Length);
|
|
_referenceFrame.UnlockBits(refData);
|
|
int stride = refData.Stride;
|
|
|
|
var curData = current.LockBits(new Rectangle(0, 0, w, h), ImageLockMode.ReadOnly, PixelFormat.Format32bppArgb);
|
|
byte[] curPx = new byte[curData.Stride * h];
|
|
Marshal.Copy(curData.Scan0, curPx, 0, curPx.Length);
|
|
current.UnlockBits(curData);
|
|
|
|
int diffThresh = p.DiffThresh;
|
|
|
|
// Pass 1: parallel row diff — compute rowCounts[] directly, no changed[] array
|
|
int[] rowCounts = new int[h];
|
|
Parallel.For(0, h, y =>
|
|
{
|
|
int count = 0;
|
|
int rowOffset = y * stride;
|
|
for (int x = 0; x < w; x++)
|
|
{
|
|
int i = rowOffset + x * 4;
|
|
int darker = (refPx[i] - curPx[i]) + (refPx[i + 1] - curPx[i + 1]) + (refPx[i + 2] - curPx[i + 2]);
|
|
if (darker > diffThresh)
|
|
count++;
|
|
}
|
|
rowCounts[y] = count;
|
|
});
|
|
|
|
int totalChanged = 0;
|
|
for (int y = 0; y < h; y++) totalChanged += rowCounts[y];
|
|
|
|
if (totalChanged == 0)
|
|
{
|
|
current.Dispose();
|
|
return null;
|
|
}
|
|
|
|
int maxGap = p.MaxGap;
|
|
int rowThresh = w / p.RowThreshDiv;
|
|
int bestRowStart = 0, bestRowEnd = 0, bestRowLen = 0;
|
|
int curRowStart = -1, lastActiveRow = -1;
|
|
for (int y = 0; y < h; y++)
|
|
{
|
|
if (rowCounts[y] >= rowThresh)
|
|
{
|
|
if (curRowStart < 0) curRowStart = y;
|
|
lastActiveRow = y;
|
|
}
|
|
else if (curRowStart >= 0 && y - lastActiveRow > maxGap)
|
|
{
|
|
int len = lastActiveRow - curRowStart + 1;
|
|
if (len > bestRowLen) { bestRowStart = curRowStart; bestRowEnd = lastActiveRow; bestRowLen = len; }
|
|
curRowStart = -1;
|
|
}
|
|
}
|
|
if (curRowStart >= 0)
|
|
{
|
|
int len = lastActiveRow - curRowStart + 1;
|
|
if (len > bestRowLen) { bestRowStart = curRowStart; bestRowEnd = lastActiveRow; bestRowLen = len; }
|
|
}
|
|
|
|
// Pass 2: parallel column diff — only within the row range, recompute from raw pixels
|
|
int[] colCounts = new int[w];
|
|
int rowRangeLen = bestRowEnd - bestRowStart + 1;
|
|
if (rowRangeLen <= 200)
|
|
{
|
|
for (int y = bestRowStart; y <= bestRowEnd; y++)
|
|
{
|
|
int rowOffset = y * stride;
|
|
for (int x = 0; x < w; x++)
|
|
{
|
|
int i = rowOffset + x * 4;
|
|
int darker = (refPx[i] - curPx[i]) + (refPx[i + 1] - curPx[i + 1]) + (refPx[i + 2] - curPx[i + 2]);
|
|
if (darker > diffThresh)
|
|
colCounts[x]++;
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
Parallel.For(bestRowStart, bestRowEnd + 1,
|
|
() => new int[w],
|
|
(y, _, localCols) =>
|
|
{
|
|
int rowOffset = y * stride;
|
|
for (int x = 0; x < w; x++)
|
|
{
|
|
int i = rowOffset + x * 4;
|
|
int darker = (refPx[i] - curPx[i]) + (refPx[i + 1] - curPx[i + 1]) + (refPx[i + 2] - curPx[i + 2]);
|
|
if (darker > diffThresh)
|
|
localCols[x]++;
|
|
}
|
|
return localCols;
|
|
},
|
|
localCols =>
|
|
{
|
|
for (int x = 0; x < w; x++)
|
|
Interlocked.Add(ref colCounts[x], localCols[x]);
|
|
});
|
|
}
|
|
|
|
int tooltipHeight = bestRowEnd - bestRowStart + 1;
|
|
int colThresh = tooltipHeight / p.ColThreshDiv;
|
|
|
|
int bestColStart = 0, bestColEnd = 0, bestColLen = 0;
|
|
int curColStart = -1, lastActiveCol = -1;
|
|
for (int x = 0; x < w; x++)
|
|
{
|
|
if (colCounts[x] >= colThresh)
|
|
{
|
|
if (curColStart < 0) curColStart = x;
|
|
lastActiveCol = x;
|
|
}
|
|
else if (curColStart >= 0 && x - lastActiveCol > maxGap)
|
|
{
|
|
int len = lastActiveCol - curColStart + 1;
|
|
if (len > bestColLen) { bestColStart = curColStart; bestColEnd = lastActiveCol; bestColLen = len; }
|
|
curColStart = -1;
|
|
}
|
|
}
|
|
if (curColStart >= 0)
|
|
{
|
|
int len = lastActiveCol - curColStart + 1;
|
|
if (len > bestColLen) { bestColStart = curColStart; bestColEnd = lastActiveCol; bestColLen = len; }
|
|
}
|
|
|
|
Console.Error.WriteLine($" diff-ocr: changed={totalChanged} rows={bestRowStart}-{bestRowEnd}({bestRowLen}) cols={bestColStart}-{bestColEnd}({bestColLen}) rowThresh={rowThresh} colThresh={colThresh}");
|
|
|
|
if (bestRowLen < 50 || bestColLen < 50)
|
|
{
|
|
Console.Error.WriteLine($" diff-ocr: no tooltip-sized region found (rows={bestRowLen}, cols={bestColLen})");
|
|
current.Dispose();
|
|
return null;
|
|
}
|
|
|
|
int minX = bestColStart;
|
|
int minY = bestRowStart;
|
|
int maxX = Math.Min(bestColEnd, w - 1);
|
|
int maxY = Math.Min(bestRowEnd, h - 1);
|
|
|
|
int colSpan = maxX - minX + 1;
|
|
if (colSpan > 100)
|
|
{
|
|
int q1 = minX + colSpan / 4;
|
|
int q3 = minX + colSpan * 3 / 4;
|
|
long midSum = 0;
|
|
int midCount = 0;
|
|
for (int x = q1; x <= q3; x++) { midSum += colCounts[x]; midCount++; }
|
|
double avgMidDensity = (double)midSum / midCount;
|
|
double cutoff = avgMidDensity * p.TrimCutoff;
|
|
while (maxX > minX + 100 && colCounts[maxX] < cutoff)
|
|
maxX--;
|
|
}
|
|
int rw = maxX - minX + 1;
|
|
int rh = maxY - minY + 1;
|
|
|
|
var cropped = CropFromBytes(curPx, stride, minX, minY, rw, rh);
|
|
var refCropped = CropFromBytes(refPx, stride, minX, minY, rw, rh);
|
|
var region = new RegionRect { X = minX, Y = minY, Width = rw, Height = rh };
|
|
|
|
Console.Error.WriteLine($" diff-ocr: tooltip region ({minX},{minY}) {rw}x{rh}");
|
|
|
|
return (cropped, refCropped, current, region);
|
|
}
|
|
|
|
public object HandleDiffOcr(Request req, DiffOcrParams p)
|
|
{
|
|
if (_referenceFrame == null)
|
|
return new ErrorResponse("No reference snapshot stored. Send 'snapshot' first.");
|
|
|
|
var cropResult = DiffCrop(req, p);
|
|
if (cropResult == null)
|
|
return new OcrResponse { Text = "", Lines = [] };
|
|
|
|
var (cropped, refCropped, current, region) = cropResult.Value;
|
|
using var _current = current;
|
|
using var _cropped = cropped;
|
|
using var _refCropped = refCropped;
|
|
bool debug = req.Debug;
|
|
int minX = region.X, minY = region.Y, rw = region.Width, rh = region.Height;
|
|
|
|
// Save raw crop if path is provided
|
|
if (!string.IsNullOrEmpty(req.Path))
|
|
{
|
|
var dir = Path.GetDirectoryName(req.Path);
|
|
if (!string.IsNullOrEmpty(dir) && !Directory.Exists(dir))
|
|
Directory.CreateDirectory(dir);
|
|
cropped.Save(req.Path, ImageUtils.GetImageFormat(req.Path));
|
|
if (debug) Console.Error.WriteLine($" diff-ocr: saved raw to {req.Path}");
|
|
}
|
|
|
|
// Pre-process for OCR — get Mat for per-line detection and padding
|
|
Mat processedMat;
|
|
if (p.UseBackgroundSub)
|
|
{
|
|
processedMat = ImagePreprocessor.PreprocessWithBackgroundSubMat(cropped, refCropped, p.DimPercentile, p.TextThresh, p.Upscale, p.SoftThreshold);
|
|
}
|
|
else
|
|
{
|
|
using var topHatBmp = ImagePreprocessor.PreprocessForOcr(cropped, p.KernelSize, p.Upscale);
|
|
processedMat = BitmapConverter.ToMat(topHatBmp);
|
|
}
|
|
using var _processedMat = processedMat; // ensure disposal
|
|
|
|
// Save fullscreen and preprocessed versions alongside raw
|
|
if (!string.IsNullOrEmpty(req.Path))
|
|
{
|
|
var ext = Path.GetExtension(req.Path);
|
|
var fullPath = Path.ChangeExtension(req.Path, ".full" + ext);
|
|
current.Save(fullPath, ImageUtils.GetImageFormat(fullPath));
|
|
if (debug) Console.Error.WriteLine($" diff-ocr: saved fullscreen to {fullPath}");
|
|
|
|
var prePath = Path.ChangeExtension(req.Path, ".pre" + ext);
|
|
using var preBmp = BitmapConverter.ToBitmap(processedMat);
|
|
preBmp.Save(prePath, ImageUtils.GetImageFormat(prePath));
|
|
if (debug) Console.Error.WriteLine($" diff-ocr: saved preprocessed to {prePath}");
|
|
}
|
|
|
|
int pad = p.OcrPad;
|
|
int upscale = p.Upscale > 0 ? p.Upscale : 1;
|
|
var lines = new List<OcrLineResult>();
|
|
|
|
// Per-line OCR: detect text lines via horizontal projection, OCR each individually
|
|
if (p.UsePerLineOcr)
|
|
{
|
|
// DetectTextLines needs binary input; if soft threshold produced grayscale, binarize a copy
|
|
int minRowPx = Math.Max(processedMat.Cols / 200, 3);
|
|
using var detectionMat = p.SoftThreshold ? new Mat() : null;
|
|
if (p.SoftThreshold)
|
|
Cv2.Threshold(processedMat, detectionMat!, 128, 255, ThresholdTypes.Binary);
|
|
var lineDetectInput = p.SoftThreshold ? detectionMat! : processedMat;
|
|
var textLines = ImagePreprocessor.DetectTextLines(lineDetectInput, minRowPixels: minRowPx, gapTolerance: p.LineGapTolerance * upscale);
|
|
if (debug) Console.Error.WriteLine($" diff-ocr: detected {textLines.Count} text lines");
|
|
|
|
if (textLines.Count > 0)
|
|
{
|
|
int linePadY = p.LinePadY;
|
|
foreach (var (yStart, yEnd) in textLines)
|
|
{
|
|
int y0 = Math.Max(yStart - linePadY, 0);
|
|
int y1 = Math.Min(yEnd + linePadY, processedMat.Rows - 1);
|
|
int lineH = y1 - y0 + 1;
|
|
|
|
// Crop line strip (full width)
|
|
using var lineStrip = new Mat(processedMat, new OpenCvSharp.Rect(0, y0, processedMat.Cols, lineH));
|
|
|
|
// Add whitespace padding around the line
|
|
using var padded = new Mat();
|
|
Cv2.CopyMakeBorder(lineStrip, padded, pad, pad, pad, pad, BorderTypes.Constant, Scalar.White);
|
|
|
|
using var lineBmp = BitmapConverter.ToBitmap(padded);
|
|
using var linePix = ImageUtils.BitmapToPix(lineBmp);
|
|
using var linePage = engine.Process(linePix, (PageSegMode)p.Psm);
|
|
|
|
// Extract words, adjusting coordinates back to screen space
|
|
// Word coords are in padded image space → subtract pad, add line offset, scale to original, add region offset
|
|
var lineWords = new List<OcrWordResult>();
|
|
using var iter = linePage.GetIterator();
|
|
if (iter != null)
|
|
{
|
|
iter.Begin();
|
|
do
|
|
{
|
|
var wordText = iter.GetText(PageIteratorLevel.Word);
|
|
if (string.IsNullOrWhiteSpace(wordText)) continue;
|
|
|
|
float conf = iter.GetConfidence(PageIteratorLevel.Word);
|
|
if (conf < 50) continue;
|
|
|
|
if (iter.TryGetBoundingBox(PageIteratorLevel.Word, out var bounds))
|
|
{
|
|
lineWords.Add(new OcrWordResult
|
|
{
|
|
Text = wordText.Trim(),
|
|
X = (bounds.X1 - pad + 0) / upscale + minX,
|
|
Y = (bounds.Y1 - pad + y0) / upscale + minY,
|
|
Width = bounds.Width / upscale,
|
|
Height = bounds.Height / upscale,
|
|
});
|
|
}
|
|
} while (iter.Next(PageIteratorLevel.TextLine, PageIteratorLevel.Word));
|
|
}
|
|
|
|
if (lineWords.Count > 0)
|
|
{
|
|
var lineText = string.Join(" ", lineWords.Select(w => w.Text));
|
|
lines.Add(new OcrLineResult { Text = lineText, Words = lineWords });
|
|
}
|
|
}
|
|
|
|
var text = string.Join("\n", lines.Select(l => l.Text)) + "\n";
|
|
return new DiffOcrResponse
|
|
{
|
|
Text = text,
|
|
Lines = lines,
|
|
Region = new RegionRect { X = minX, Y = minY, Width = rw, Height = rh },
|
|
};
|
|
}
|
|
|
|
if (debug) Console.Error.WriteLine(" diff-ocr: no text lines detected, falling back to whole-block OCR");
|
|
}
|
|
|
|
// Whole-block fallback: add padding and use configurable PSM
|
|
{
|
|
using var padded = new Mat();
|
|
Cv2.CopyMakeBorder(processedMat, padded, pad, pad, pad, pad, BorderTypes.Constant, Scalar.White);
|
|
using var bmp = BitmapConverter.ToBitmap(padded);
|
|
using var pix = ImageUtils.BitmapToPix(bmp);
|
|
using var page = engine.Process(pix, (PageSegMode)p.Psm);
|
|
|
|
var text = page.GetText();
|
|
// Adjust word coordinates: subtract padding offset
|
|
lines = ImageUtils.ExtractLinesFromPage(page, offsetX: minX - pad / upscale, offsetY: minY - pad / upscale);
|
|
|
|
return new DiffOcrResponse
|
|
{
|
|
Text = text,
|
|
Lines = lines,
|
|
Region = new RegionRect { X = minX, Y = minY, Width = rw, Height = rh },
|
|
};
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Run Tesseract OCR on an already-preprocessed bitmap. Converts to Mat, pads,
|
|
/// runs PSM-6, and adjusts word coordinates to screen space using the supplied region.
|
|
/// </summary>
|
|
public DiffOcrResponse RunTesseractOnBitmap(Bitmap processedBmp, RegionRect region, int pad = 10, int upscale = 2, int psm = 6)
|
|
{
|
|
using var processedMat = BitmapConverter.ToMat(processedBmp);
|
|
using var padded = new Mat();
|
|
Cv2.CopyMakeBorder(processedMat, padded, pad, pad, pad, pad, BorderTypes.Constant, Scalar.White);
|
|
using var bmp = BitmapConverter.ToBitmap(padded);
|
|
using var pix = ImageUtils.BitmapToPix(bmp);
|
|
using var page = engine.Process(pix, (PageSegMode)psm);
|
|
|
|
var text = page.GetText();
|
|
int effUpscale = upscale > 0 ? upscale : 1;
|
|
var lines = ImageUtils.ExtractLinesFromPage(page,
|
|
offsetX: region.X - pad / effUpscale,
|
|
offsetY: region.Y - pad / effUpscale);
|
|
|
|
return new DiffOcrResponse
|
|
{
|
|
Text = text,
|
|
Lines = lines,
|
|
Region = region,
|
|
};
|
|
}
|
|
|
|
public object HandleTest(Request req) => RunTestCases(new DiffOcrParams(), verbose: true);
|
|
|
|
public object HandleTune(Request req)
|
|
{
|
|
int totalEvals = 0;
|
|
|
|
// --- Phase 1: Tune top-hat approach ---
|
|
Console.Error.WriteLine("\n========== Phase 1: Top-Hat ==========");
|
|
var topHat = new DiffOcrParams { UseBackgroundSub = false };
|
|
double topHatScore = TuneParams(topHat, ref totalEvals, tuneTopHat: true, tuneBgSub: false);
|
|
|
|
// --- Phase 2: Tune background-subtraction approach ---
|
|
Console.Error.WriteLine("\n========== Phase 2: Background Subtraction ==========");
|
|
// Start bgSub from the best detection params found in phase 1
|
|
var bgSub = topHat.Clone();
|
|
bgSub.UseBackgroundSub = true;
|
|
double bgSubScore = TuneParams(bgSub, ref totalEvals, tuneTopHat: false, tuneBgSub: true);
|
|
|
|
// Pick the winner
|
|
var best = bgSubScore > topHatScore ? bgSub : topHat;
|
|
double bestScore = Math.Max(topHatScore, bgSubScore);
|
|
|
|
Console.Error.WriteLine($"\n========== Result ==========");
|
|
Console.Error.WriteLine($" Top-Hat: {topHatScore:F3} {topHat}");
|
|
Console.Error.WriteLine($" BgSub: {bgSubScore:F3} {bgSub}");
|
|
Console.Error.WriteLine($" Winner: {(best.UseBackgroundSub ? "BgSub" : "TopHat")} evals={totalEvals}\n");
|
|
|
|
// Final verbose report with best params
|
|
RunTestCases(best, verbose: true);
|
|
|
|
return new TuneResponse
|
|
{
|
|
BestScore = bestScore,
|
|
BestParams = best,
|
|
Iterations = totalEvals,
|
|
};
|
|
}
|
|
|
|
private double TuneParams(DiffOcrParams best, ref int totalEvals, bool tuneTopHat, bool tuneBgSub)
|
|
{
|
|
double bestScore = ScoreParams(best);
|
|
Console.Error.WriteLine($" baseline score={bestScore:F3} {best}\n");
|
|
|
|
// Detection params (shared by both approaches)
|
|
var sharedSweeps = new (string Name, int[] Values, Action<DiffOcrParams, int> Set)[]
|
|
{
|
|
("diffThresh", [10, 15, 20, 25, 30, 40, 50, 60], (p, v) => p.DiffThresh = v),
|
|
("rowThreshDiv", [10, 15, 20, 25, 30, 40, 50, 60], (p, v) => p.RowThreshDiv = v),
|
|
("colThreshDiv", [5, 8, 10, 12, 15, 20, 25, 30], (p, v) => p.ColThreshDiv = v),
|
|
("maxGap", [5, 8, 10, 12, 15, 20, 25, 30], (p, v) => p.MaxGap = v),
|
|
("upscale", [1, 2, 3], (p, v) => p.Upscale = v),
|
|
("ocrPad", [0, 5, 10, 15, 20, 30], (p, v) => p.OcrPad = v),
|
|
("psm", [4, 6, 11, 13], (p, v) => p.Psm = v),
|
|
};
|
|
|
|
// Top-hat specific
|
|
var topHatSweeps = new (string Name, int[] Values, Action<DiffOcrParams, int> Set)[]
|
|
{
|
|
("kernelSize", [11, 15, 19, 21, 25, 31, 35, 41, 51], (p, v) => p.KernelSize = v),
|
|
};
|
|
|
|
// Background-subtraction specific
|
|
var bgSubSweeps = new (string Name, int[] Values, Action<DiffOcrParams, int> Set)[]
|
|
{
|
|
("dimPercentile", [5, 10, 15, 20, 25, 30, 40, 50], (p, v) => p.DimPercentile = v),
|
|
("textThresh", [10, 15, 20, 25, 30, 40, 50, 60, 80], (p, v) => p.TextThresh = v),
|
|
("lineGapTolerance", [3, 5, 8, 10, 15], (p, v) => p.LineGapTolerance = v),
|
|
("linePadY", [5, 10, 15, 20], (p, v) => p.LinePadY = v),
|
|
};
|
|
|
|
double[] trimValues = [0.1, 0.15, 0.2, 0.25, 0.3, 0.4, 0.5];
|
|
|
|
var allIntSweeps = sharedSweeps
|
|
.Concat(tuneTopHat ? topHatSweeps : [])
|
|
.Concat(tuneBgSub ? bgSubSweeps : [])
|
|
.ToArray();
|
|
|
|
const int maxRounds = 3;
|
|
for (int round = 0; round < maxRounds; round++)
|
|
{
|
|
bool improved = false;
|
|
Console.Error.WriteLine($"--- Round {round + 1} ---");
|
|
|
|
foreach (var (name, values, set) in allIntSweeps)
|
|
{
|
|
Console.Error.Write($" {name}: ");
|
|
int bestVal = 0;
|
|
double bestValScore = -1;
|
|
|
|
foreach (int v in values)
|
|
{
|
|
var trial = best.Clone();
|
|
set(trial, v);
|
|
double score = ScoreParams(trial);
|
|
totalEvals++;
|
|
Console.Error.Write($"{v}={score:F3} ");
|
|
if (score > bestValScore) { bestValScore = score; bestVal = v; }
|
|
}
|
|
Console.Error.WriteLine();
|
|
|
|
if (bestValScore > bestScore)
|
|
{
|
|
set(best, bestVal);
|
|
bestScore = bestValScore;
|
|
improved = true;
|
|
Console.Error.WriteLine($" → {name}={bestVal} score={bestScore:F3}");
|
|
}
|
|
}
|
|
|
|
// Sweep trimCutoff
|
|
{
|
|
Console.Error.Write($" trimCutoff: ");
|
|
double bestTrim = best.TrimCutoff;
|
|
double bestTrimScore = bestScore;
|
|
|
|
foreach (double v in trimValues)
|
|
{
|
|
var trial = best.Clone();
|
|
trial.TrimCutoff = v;
|
|
double score = ScoreParams(trial);
|
|
totalEvals++;
|
|
Console.Error.Write($"{v:F2}={score:F3} ");
|
|
if (score > bestTrimScore) { bestTrimScore = score; bestTrim = v; }
|
|
}
|
|
Console.Error.WriteLine();
|
|
|
|
if (bestTrimScore > bestScore)
|
|
{
|
|
best.TrimCutoff = bestTrim;
|
|
bestScore = bestTrimScore;
|
|
improved = true;
|
|
Console.Error.WriteLine($" → trimCutoff={bestTrim:F2} score={bestScore:F3}");
|
|
}
|
|
}
|
|
|
|
Console.Error.WriteLine($" End of round {round + 1}: score={bestScore:F3} {best}");
|
|
if (!improved) break;
|
|
}
|
|
|
|
return bestScore;
|
|
}
|
|
|
|
/// <summary>Score a param set: average match ratio across all test cases (0-1).</summary>
|
|
private double ScoreParams(DiffOcrParams p)
|
|
{
|
|
var result = RunTestCases(p, verbose: false);
|
|
if (result is TestResponse tr && tr.Total > 0)
|
|
return tr.Results.Average(r => r.Score);
|
|
return 0;
|
|
}
|
|
|
|
private object RunTestCases(DiffOcrParams p, bool verbose)
|
|
{
|
|
var tessdataDir = Path.Combine(AppContext.BaseDirectory, "tessdata");
|
|
var casesPath = Path.Combine(tessdataDir, "cases.json");
|
|
if (!File.Exists(casesPath))
|
|
return new ErrorResponse($"cases.json not found at {casesPath}");
|
|
|
|
var json = File.ReadAllText(casesPath);
|
|
var cases = JsonSerializer.Deserialize<List<TestCase>>(json);
|
|
if (cases == null || cases.Count == 0)
|
|
return new ErrorResponse("No test cases found in cases.json");
|
|
|
|
var results = new List<TestCaseResult>();
|
|
int passCount = 0;
|
|
|
|
foreach (var tc in cases)
|
|
{
|
|
if (verbose) Console.Error.WriteLine($"\n=== Test: {tc.Id} ===");
|
|
|
|
var fullPath = Path.Combine(tessdataDir, tc.FullImage);
|
|
var imagePath = Path.Combine(tessdataDir, tc.Image);
|
|
|
|
if (!File.Exists(fullPath))
|
|
{
|
|
if (verbose) Console.Error.WriteLine($" SKIP: full image not found: {fullPath}");
|
|
results.Add(new TestCaseResult { Id = tc.Id, Passed = false, Score = 0, Missed = tc.Expected });
|
|
continue;
|
|
}
|
|
if (!File.Exists(imagePath))
|
|
{
|
|
if (verbose) Console.Error.WriteLine($" SKIP: tooltip image not found: {imagePath}");
|
|
results.Add(new TestCaseResult { Id = tc.Id, Passed = false, Score = 0, Missed = tc.Expected });
|
|
continue;
|
|
}
|
|
|
|
// Run the same pipeline: snapshot (reference) then diff-ocr (with tooltip)
|
|
HandleSnapshot(new Request { File = fullPath });
|
|
var diffResult = HandleDiffOcr(new Request { File = imagePath, Debug = verbose }, p);
|
|
|
|
// Extract actual lines from the response
|
|
List<string> actualLines;
|
|
if (diffResult is DiffOcrResponse diffResp)
|
|
actualLines = diffResp.Lines.Select(l => l.Text.Trim()).Where(l => l.Length > 0).ToList();
|
|
else if (diffResult is OcrResponse ocrResp)
|
|
actualLines = ocrResp.Lines.Select(l => l.Text.Trim()).Where(l => l.Length > 0).ToList();
|
|
else
|
|
{
|
|
if (verbose) Console.Error.WriteLine($" ERROR: unexpected response type");
|
|
results.Add(new TestCaseResult { Id = tc.Id, Passed = false, Score = 0, Missed = tc.Expected });
|
|
continue;
|
|
}
|
|
|
|
// Fuzzy match expected vs actual
|
|
var matched = new List<string>();
|
|
var missed = new List<string>();
|
|
var usedActual = new HashSet<int>();
|
|
|
|
foreach (var expected in tc.Expected)
|
|
{
|
|
int bestIdx = -1;
|
|
double bestSim = 0;
|
|
for (int i = 0; i < actualLines.Count; i++)
|
|
{
|
|
if (usedActual.Contains(i)) continue;
|
|
double sim = LevenshteinSimilarity(expected, actualLines[i]);
|
|
if (sim > bestSim) { bestSim = sim; bestIdx = i; }
|
|
}
|
|
|
|
if (bestIdx >= 0 && bestSim >= 0.75)
|
|
{
|
|
matched.Add(expected);
|
|
usedActual.Add(bestIdx);
|
|
if (verbose && bestSim < 1.0)
|
|
Console.Error.WriteLine($" ~ {expected} → {actualLines[bestIdx]} (sim={bestSim:F2})");
|
|
}
|
|
else
|
|
{
|
|
missed.Add(expected);
|
|
if (verbose)
|
|
Console.Error.WriteLine($" MISS: {expected}" + (bestIdx >= 0 ? $" (best: {actualLines[bestIdx]}, sim={bestSim:F2})" : ""));
|
|
}
|
|
}
|
|
|
|
var extra = actualLines.Where((_, i) => !usedActual.Contains(i)).ToList();
|
|
if (verbose)
|
|
foreach (var e in extra)
|
|
Console.Error.WriteLine($" EXTRA: {e}");
|
|
|
|
double score = tc.Expected.Count > 0 ? (double)matched.Count / tc.Expected.Count : 1.0;
|
|
bool passed = missed.Count == 0;
|
|
if (passed) passCount++;
|
|
|
|
if (verbose)
|
|
Console.Error.WriteLine($" Result: {(passed ? "PASS" : "FAIL")} matched={matched.Count}/{tc.Expected.Count} extra={extra.Count} score={score:F2}");
|
|
|
|
results.Add(new TestCaseResult
|
|
{
|
|
Id = tc.Id,
|
|
Passed = passed,
|
|
Score = score,
|
|
Matched = matched,
|
|
Missed = missed,
|
|
Extra = extra,
|
|
});
|
|
}
|
|
|
|
if (verbose)
|
|
Console.Error.WriteLine($"\n=== Summary: {passCount}/{cases.Count} passed ===\n");
|
|
|
|
return new TestResponse
|
|
{
|
|
Passed = passCount,
|
|
Failed = cases.Count - passCount,
|
|
Total = cases.Count,
|
|
Results = results,
|
|
};
|
|
}
|
|
|
|
/// <summary>
|
|
/// Fast crop from raw pixel bytes — avoids slow GDI+ Bitmap.Clone().
|
|
/// </summary>
|
|
private static Bitmap CropFromBytes(byte[] px, int srcStride, int cropX, int cropY, int cropW, int cropH)
|
|
{
|
|
var bmp = new Bitmap(cropW, cropH, PixelFormat.Format32bppArgb);
|
|
var data = bmp.LockBits(new Rectangle(0, 0, cropW, cropH), ImageLockMode.WriteOnly, PixelFormat.Format32bppArgb);
|
|
int dstStride = data.Stride;
|
|
int rowBytes = cropW * 4;
|
|
for (int y = 0; y < cropH; y++)
|
|
{
|
|
int srcOffset = (cropY + y) * srcStride + cropX * 4;
|
|
Marshal.Copy(px, srcOffset, data.Scan0 + y * dstStride, rowBytes);
|
|
}
|
|
bmp.UnlockBits(data);
|
|
return bmp;
|
|
}
|
|
|
|
private static double LevenshteinSimilarity(string a, string b)
|
|
{
|
|
a = a.ToLowerInvariant();
|
|
b = b.ToLowerInvariant();
|
|
if (a == b) return 1.0;
|
|
|
|
int la = a.Length, lb = b.Length;
|
|
if (la == 0 || lb == 0) return 0.0;
|
|
|
|
var d = new int[la + 1, lb + 1];
|
|
for (int i = 0; i <= la; i++) d[i, 0] = i;
|
|
for (int j = 0; j <= lb; j++) d[0, j] = j;
|
|
|
|
for (int i = 1; i <= la; i++)
|
|
for (int j = 1; j <= lb; j++)
|
|
{
|
|
int cost = a[i - 1] == b[j - 1] ? 0 : 1;
|
|
d[i, j] = Math.Min(Math.Min(d[i - 1, j] + 1, d[i, j - 1] + 1), d[i - 1, j - 1] + cost);
|
|
}
|
|
|
|
return 1.0 - (double)d[la, lb] / Math.Max(la, lb);
|
|
}
|
|
}
|