working on crop
This commit is contained in:
parent
93e2234c4e
commit
f74e3e1c85
12 changed files with 1135 additions and 220 deletions
|
|
@ -14,6 +14,7 @@ using SdImageFormat = System.Drawing.Imaging.ImageFormat;
|
|||
class OcrHandler(TesseractEngine engine)
|
||||
{
|
||||
private Bitmap? _referenceFrame;
|
||||
private RegionRect? _referenceRegion;
|
||||
|
||||
public object HandleOcr(Request req)
|
||||
{
|
||||
|
|
@ -56,31 +57,79 @@ class OcrHandler(TesseractEngine engine)
|
|||
{
|
||||
_referenceFrame?.Dispose();
|
||||
_referenceFrame = ScreenCapture.CaptureOrLoad(req.File, req.Region);
|
||||
_referenceRegion = req.Region == null
|
||||
? null
|
||||
: new RegionRect { X = req.Region.X, Y = req.Region.Y, Width = req.Region.Width, Height = req.Region.Height };
|
||||
return new OkResponse();
|
||||
}
|
||||
|
||||
public object HandleDiffOcr(Request req) => HandleDiffOcr(req, req.Threshold > 0
|
||||
? new DiffOcrParams { DiffThresh = req.Threshold }
|
||||
? new DiffOcrParams { Crop = new DiffCropParams { DiffThresh = req.Threshold } }
|
||||
: new DiffOcrParams());
|
||||
|
||||
/// <summary>
|
||||
/// Diff detection + crop only. Returns the raw tooltip crop bitmap and region,
|
||||
/// or null if no tooltip detected. Caller is responsible for disposing the bitmap.
|
||||
/// </summary>
|
||||
public (Bitmap cropped, Bitmap refCropped, Bitmap current, RegionRect region)? DiffCrop(Request req, DiffOcrParams p)
|
||||
public (Bitmap cropped, Bitmap refCropped, Bitmap current, RegionRect region)? DiffCrop(Request req, DiffCropParams c)
|
||||
{
|
||||
if (_referenceFrame == null)
|
||||
return null;
|
||||
|
||||
var current = ScreenCapture.CaptureOrLoad(req.File, null);
|
||||
var diffRegion = req.Region ?? _referenceRegion;
|
||||
int baseX = diffRegion?.X ?? 0;
|
||||
int baseY = diffRegion?.Y ?? 0;
|
||||
var current = ScreenCapture.CaptureOrLoad(req.File, diffRegion);
|
||||
|
||||
int w = Math.Min(_referenceFrame.Width, current.Width);
|
||||
int h = Math.Min(_referenceFrame.Height, current.Height);
|
||||
Bitmap refForDiff = _referenceFrame;
|
||||
bool disposeRef = false;
|
||||
|
||||
var refData = _referenceFrame.LockBits(new Rectangle(0, 0, w, h), ImageLockMode.ReadOnly, PixelFormat.Format32bppArgb);
|
||||
if (diffRegion != null)
|
||||
{
|
||||
if (_referenceRegion == null)
|
||||
{
|
||||
var croppedRef = CropBitmap(_referenceFrame, diffRegion);
|
||||
if (croppedRef == null)
|
||||
{
|
||||
current.Dispose();
|
||||
return null;
|
||||
}
|
||||
refForDiff = croppedRef;
|
||||
disposeRef = true;
|
||||
}
|
||||
else if (!RegionsEqual(diffRegion, _referenceRegion))
|
||||
{
|
||||
int offX = diffRegion.X - _referenceRegion.X;
|
||||
int offY = diffRegion.Y - _referenceRegion.Y;
|
||||
if (offX < 0 || offY < 0 || offX + diffRegion.Width > _referenceFrame.Width || offY + diffRegion.Height > _referenceFrame.Height)
|
||||
{
|
||||
current.Dispose();
|
||||
return null;
|
||||
}
|
||||
var croppedRef = CropBitmap(_referenceFrame, new RegionRect
|
||||
{
|
||||
X = offX,
|
||||
Y = offY,
|
||||
Width = diffRegion.Width,
|
||||
Height = diffRegion.Height,
|
||||
});
|
||||
if (croppedRef == null)
|
||||
{
|
||||
current.Dispose();
|
||||
return null;
|
||||
}
|
||||
refForDiff = croppedRef;
|
||||
disposeRef = true;
|
||||
}
|
||||
}
|
||||
|
||||
int w = Math.Min(refForDiff.Width, current.Width);
|
||||
int h = Math.Min(refForDiff.Height, current.Height);
|
||||
|
||||
var refData = refForDiff.LockBits(new Rectangle(0, 0, w, h), ImageLockMode.ReadOnly, PixelFormat.Format32bppArgb);
|
||||
byte[] refPx = new byte[refData.Stride * h];
|
||||
Marshal.Copy(refData.Scan0, refPx, 0, refPx.Length);
|
||||
_referenceFrame.UnlockBits(refData);
|
||||
refForDiff.UnlockBits(refData);
|
||||
int stride = refData.Stride;
|
||||
|
||||
var curData = current.LockBits(new Rectangle(0, 0, w, h), ImageLockMode.ReadOnly, PixelFormat.Format32bppArgb);
|
||||
|
|
@ -88,7 +137,7 @@ class OcrHandler(TesseractEngine engine)
|
|||
Marshal.Copy(curData.Scan0, curPx, 0, curPx.Length);
|
||||
current.UnlockBits(curData);
|
||||
|
||||
int diffThresh = p.DiffThresh;
|
||||
int diffThresh = c.DiffThresh;
|
||||
|
||||
// Pass 1: parallel row diff — compute rowCounts[] directly, no changed[] array
|
||||
int[] rowCounts = new int[h];
|
||||
|
|
@ -112,11 +161,12 @@ class OcrHandler(TesseractEngine engine)
|
|||
if (totalChanged == 0)
|
||||
{
|
||||
current.Dispose();
|
||||
if (disposeRef) refForDiff.Dispose();
|
||||
return null;
|
||||
}
|
||||
|
||||
int maxGap = p.MaxGap;
|
||||
int rowThresh = w / p.RowThreshDiv;
|
||||
int maxGap = c.MaxGap;
|
||||
int rowThresh = w / c.RowThreshDiv;
|
||||
int bestRowStart = 0, bestRowEnd = 0, bestRowLen = 0;
|
||||
int curRowStart = -1, lastActiveRow = -1;
|
||||
for (int y = 0; y < h; y++)
|
||||
|
|
@ -180,7 +230,7 @@ class OcrHandler(TesseractEngine engine)
|
|||
}
|
||||
|
||||
int tooltipHeight = bestRowEnd - bestRowStart + 1;
|
||||
int colThresh = tooltipHeight / p.ColThreshDiv;
|
||||
int colThresh = tooltipHeight / c.ColThreshDiv;
|
||||
|
||||
int bestColStart = 0, bestColEnd = 0, bestColLen = 0;
|
||||
int curColStart = -1, lastActiveCol = -1;
|
||||
|
|
@ -210,6 +260,7 @@ class OcrHandler(TesseractEngine engine)
|
|||
{
|
||||
Console.Error.WriteLine($" diff-ocr: no tooltip-sized region found (rows={bestRowLen}, cols={bestColLen})");
|
||||
current.Dispose();
|
||||
if (disposeRef) refForDiff.Dispose();
|
||||
return null;
|
||||
}
|
||||
|
||||
|
|
@ -218,37 +269,73 @@ class OcrHandler(TesseractEngine engine)
|
|||
int maxX = Math.Min(bestColEnd, w - 1);
|
||||
int maxY = Math.Min(bestRowEnd, h - 1);
|
||||
|
||||
// Trim low-density edges on both axes to avoid oversized crops.
|
||||
int colSpan = maxX - minX + 1;
|
||||
if (colSpan > 100)
|
||||
if (colSpan > 50)
|
||||
{
|
||||
int q1 = minX + colSpan / 4;
|
||||
int q3 = minX + colSpan * 3 / 4;
|
||||
long midSum = 0;
|
||||
int midCount = 0;
|
||||
for (int x = q1; x <= q3; x++) { midSum += colCounts[x]; midCount++; }
|
||||
double avgMidDensity = (double)midSum / midCount;
|
||||
double cutoff = avgMidDensity * p.TrimCutoff;
|
||||
while (maxX > minX + 100 && colCounts[maxX] < cutoff)
|
||||
double avgMidDensity = (double)midSum / Math.Max(1, midCount);
|
||||
double cutoff = avgMidDensity * c.TrimCutoff;
|
||||
|
||||
while (minX < maxX - 50 && colCounts[minX] < cutoff)
|
||||
minX++;
|
||||
while (maxX > minX + 50 && colCounts[maxX] < cutoff)
|
||||
maxX--;
|
||||
}
|
||||
|
||||
int rowSpan = maxY - minY + 1;
|
||||
if (rowSpan > 50)
|
||||
{
|
||||
int q1 = minY + rowSpan / 4;
|
||||
int q3 = minY + rowSpan * 3 / 4;
|
||||
long midSum = 0;
|
||||
int midCount = 0;
|
||||
for (int y = q1; y <= q3; y++) { midSum += rowCounts[y]; midCount++; }
|
||||
double avgMidDensity = (double)midSum / Math.Max(1, midCount);
|
||||
double cutoff = avgMidDensity * c.TrimCutoff;
|
||||
|
||||
while (minY < maxY - 50 && rowCounts[minY] < cutoff)
|
||||
minY++;
|
||||
while (maxY > minY + 50 && rowCounts[maxY] < cutoff)
|
||||
maxY--;
|
||||
}
|
||||
int rw = maxX - minX + 1;
|
||||
int rh = maxY - minY + 1;
|
||||
|
||||
var cropped = CropFromBytes(curPx, stride, minX, minY, rw, rh);
|
||||
var refCropped = CropFromBytes(refPx, stride, minX, minY, rw, rh);
|
||||
var region = new RegionRect { X = minX, Y = minY, Width = rw, Height = rh };
|
||||
var region = new RegionRect { X = baseX + minX, Y = baseY + minY, Width = rw, Height = rh };
|
||||
|
||||
Console.Error.WriteLine($" diff-ocr: tooltip region ({minX},{minY}) {rw}x{rh}");
|
||||
|
||||
if (disposeRef) refForDiff.Dispose();
|
||||
return (cropped, refCropped, current, region);
|
||||
}
|
||||
|
||||
private static bool RegionsEqual(RegionRect a, RegionRect b) =>
|
||||
a.X == b.X && a.Y == b.Y && a.Width == b.Width && a.Height == b.Height;
|
||||
|
||||
private static Bitmap? CropBitmap(Bitmap src, RegionRect region)
|
||||
{
|
||||
int cx = Math.Max(0, region.X);
|
||||
int cy = Math.Max(0, region.Y);
|
||||
int cw = Math.Min(region.Width, src.Width - cx);
|
||||
int ch = Math.Min(region.Height, src.Height - cy);
|
||||
if (cw <= 0 || ch <= 0)
|
||||
return null;
|
||||
return src.Clone(new Rectangle(cx, cy, cw, ch), PixelFormat.Format32bppArgb);
|
||||
}
|
||||
|
||||
public object HandleDiffOcr(Request req, DiffOcrParams p)
|
||||
{
|
||||
if (_referenceFrame == null)
|
||||
return new ErrorResponse("No reference snapshot stored. Send 'snapshot' first.");
|
||||
|
||||
var cropResult = DiffCrop(req, p);
|
||||
var cropResult = DiffCrop(req, p.Crop);
|
||||
if (cropResult == null)
|
||||
return new OcrResponse { Text = "", Lines = [] };
|
||||
|
||||
|
|
@ -270,14 +357,15 @@ class OcrHandler(TesseractEngine engine)
|
|||
}
|
||||
|
||||
// Pre-process for OCR — get Mat for per-line detection and padding
|
||||
var ocr = p.Ocr;
|
||||
Mat processedMat;
|
||||
if (p.UseBackgroundSub)
|
||||
if (ocr.UseBackgroundSub)
|
||||
{
|
||||
processedMat = ImagePreprocessor.PreprocessWithBackgroundSubMat(cropped, refCropped, p.DimPercentile, p.TextThresh, p.Upscale, p.SoftThreshold);
|
||||
processedMat = ImagePreprocessor.PreprocessWithBackgroundSubMat(cropped, refCropped, ocr.DimPercentile, ocr.TextThresh, ocr.Upscale, ocr.SoftThreshold);
|
||||
}
|
||||
else
|
||||
{
|
||||
using var topHatBmp = ImagePreprocessor.PreprocessForOcr(cropped, p.KernelSize, p.Upscale);
|
||||
using var topHatBmp = ImagePreprocessor.PreprocessForOcr(cropped, ocr.KernelSize, ocr.Upscale);
|
||||
processedMat = BitmapConverter.ToMat(topHatBmp);
|
||||
}
|
||||
using var _processedMat = processedMat; // ensure disposal
|
||||
|
|
@ -296,25 +384,25 @@ class OcrHandler(TesseractEngine engine)
|
|||
if (debug) Console.Error.WriteLine($" diff-ocr: saved preprocessed to {prePath}");
|
||||
}
|
||||
|
||||
int pad = p.OcrPad;
|
||||
int upscale = p.Upscale > 0 ? p.Upscale : 1;
|
||||
int pad = p.Crop.OcrPad;
|
||||
int upscale = ocr.Upscale > 0 ? ocr.Upscale : 1;
|
||||
var lines = new List<OcrLineResult>();
|
||||
|
||||
// Per-line OCR: detect text lines via horizontal projection, OCR each individually
|
||||
if (p.UsePerLineOcr)
|
||||
if (ocr.UsePerLineOcr)
|
||||
{
|
||||
// DetectTextLines needs binary input; if soft threshold produced grayscale, binarize a copy
|
||||
int minRowPx = Math.Max(processedMat.Cols / 200, 3);
|
||||
using var detectionMat = p.SoftThreshold ? new Mat() : null;
|
||||
if (p.SoftThreshold)
|
||||
using var detectionMat = ocr.SoftThreshold ? new Mat() : null;
|
||||
if (ocr.SoftThreshold)
|
||||
Cv2.Threshold(processedMat, detectionMat!, 128, 255, ThresholdTypes.Binary);
|
||||
var lineDetectInput = p.SoftThreshold ? detectionMat! : processedMat;
|
||||
var textLines = ImagePreprocessor.DetectTextLines(lineDetectInput, minRowPixels: minRowPx, gapTolerance: p.LineGapTolerance * upscale);
|
||||
var lineDetectInput = ocr.SoftThreshold ? detectionMat! : processedMat;
|
||||
var textLines = ImagePreprocessor.DetectTextLines(lineDetectInput, minRowPixels: minRowPx, gapTolerance: ocr.LineGapTolerance * upscale);
|
||||
if (debug) Console.Error.WriteLine($" diff-ocr: detected {textLines.Count} text lines");
|
||||
|
||||
if (textLines.Count > 0)
|
||||
{
|
||||
int linePadY = p.LinePadY;
|
||||
int linePadY = ocr.LinePadY;
|
||||
foreach (var (yStart, yEnd) in textLines)
|
||||
{
|
||||
int y0 = Math.Max(yStart - linePadY, 0);
|
||||
|
|
@ -330,7 +418,7 @@ class OcrHandler(TesseractEngine engine)
|
|||
|
||||
using var lineBmp = BitmapConverter.ToBitmap(padded);
|
||||
using var linePix = ImageUtils.BitmapToPix(lineBmp);
|
||||
using var linePage = engine.Process(linePix, (PageSegMode)p.Psm);
|
||||
using var linePage = engine.Process(linePix, (PageSegMode)ocr.Psm);
|
||||
|
||||
// Extract words, adjusting coordinates back to screen space
|
||||
// Word coords are in padded image space → subtract pad, add line offset, scale to original, add region offset
|
||||
|
|
@ -386,7 +474,7 @@ class OcrHandler(TesseractEngine engine)
|
|||
Cv2.CopyMakeBorder(processedMat, padded, pad, pad, pad, pad, BorderTypes.Constant, Scalar.White);
|
||||
using var bmp = BitmapConverter.ToBitmap(padded);
|
||||
using var pix = ImageUtils.BitmapToPix(bmp);
|
||||
using var page = engine.Process(pix, (PageSegMode)p.Psm);
|
||||
using var page = engine.Process(pix, (PageSegMode)ocr.Psm);
|
||||
|
||||
var text = page.GetText();
|
||||
// Adjust word coordinates: subtract padding offset
|
||||
|
|
@ -430,77 +518,161 @@ class OcrHandler(TesseractEngine engine)
|
|||
|
||||
public object HandleTest(Request req) => RunTestCases(new DiffOcrParams(), verbose: true);
|
||||
|
||||
private static DiffOcrParams CloneParams(DiffOcrParams p)
|
||||
{
|
||||
var json = JsonSerializer.Serialize(p);
|
||||
return JsonSerializer.Deserialize<DiffOcrParams>(json)!;
|
||||
}
|
||||
|
||||
public object HandleTune(Request req)
|
||||
{
|
||||
int totalEvals = 0;
|
||||
|
||||
// --- Phase 1: Tune top-hat approach ---
|
||||
Console.Error.WriteLine("\n========== Phase 1: Top-Hat ==========");
|
||||
var topHat = new DiffOcrParams { UseBackgroundSub = false };
|
||||
double topHatScore = TuneParams(topHat, ref totalEvals, tuneTopHat: true, tuneBgSub: false);
|
||||
// --- Phase A: Tune crop params ---
|
||||
Console.Error.WriteLine("\n========== Phase A: Crop Params ==========");
|
||||
var best = new DiffOcrParams();
|
||||
double bestScore = TuneCropParams(best, ref totalEvals);
|
||||
|
||||
// --- Phase 2: Tune background-subtraction approach ---
|
||||
Console.Error.WriteLine("\n========== Phase 2: Background Subtraction ==========");
|
||||
// Start bgSub from the best detection params found in phase 1
|
||||
var bgSub = topHat.Clone();
|
||||
bgSub.UseBackgroundSub = true;
|
||||
double bgSubScore = TuneParams(bgSub, ref totalEvals, tuneTopHat: false, tuneBgSub: true);
|
||||
// --- Phase B: Tune OCR params (top-hat) ---
|
||||
Console.Error.WriteLine("\n========== Phase B: OCR — Top-Hat ==========");
|
||||
var topHat = CloneParams(best);
|
||||
topHat.Ocr.UseBackgroundSub = false;
|
||||
double topHatScore = TuneOcrParams(topHat, ref totalEvals, tuneTopHat: true, tuneBgSub: false);
|
||||
|
||||
// --- Phase C: Tune OCR params (background-subtraction) ---
|
||||
Console.Error.WriteLine("\n========== Phase C: OCR — Background Subtraction ==========");
|
||||
var bgSub = CloneParams(best);
|
||||
bgSub.Ocr.UseBackgroundSub = true;
|
||||
double bgSubScore = TuneOcrParams(bgSub, ref totalEvals, tuneTopHat: false, tuneBgSub: true);
|
||||
|
||||
// Pick the winner
|
||||
var best = bgSubScore > topHatScore ? bgSub : topHat;
|
||||
double bestScore = Math.Max(topHatScore, bgSubScore);
|
||||
var winner = bgSubScore > topHatScore ? bgSub : topHat;
|
||||
double winnerScore = Math.Max(topHatScore, bgSubScore);
|
||||
|
||||
Console.Error.WriteLine($"\n========== Result ==========");
|
||||
Console.Error.WriteLine($" Top-Hat: {topHatScore:F3} {topHat}");
|
||||
Console.Error.WriteLine($" BgSub: {bgSubScore:F3} {bgSub}");
|
||||
Console.Error.WriteLine($" Winner: {(best.UseBackgroundSub ? "BgSub" : "TopHat")} evals={totalEvals}\n");
|
||||
Console.Error.WriteLine($" Winner: {(winner.Ocr.UseBackgroundSub ? "BgSub" : "TopHat")} evals={totalEvals}\n");
|
||||
|
||||
// Final verbose report with best params
|
||||
RunTestCases(best, verbose: true);
|
||||
RunTestCases(winner, verbose: true);
|
||||
|
||||
return new TuneResponse
|
||||
{
|
||||
BestScore = bestScore,
|
||||
BestParams = best,
|
||||
BestScore = winnerScore,
|
||||
BestParams = winner,
|
||||
Iterations = totalEvals,
|
||||
};
|
||||
}
|
||||
|
||||
private double TuneParams(DiffOcrParams best, ref int totalEvals, bool tuneTopHat, bool tuneBgSub)
|
||||
private double TuneCropParams(DiffOcrParams best, ref int totalEvals)
|
||||
{
|
||||
double bestScore = ScoreParams(best);
|
||||
Console.Error.WriteLine($" baseline score={bestScore:F3} {best}\n");
|
||||
|
||||
// Detection params (shared by both approaches)
|
||||
var sharedSweeps = new (string Name, int[] Values, Action<DiffOcrParams, int> Set)[]
|
||||
var cropSweeps = new (string Name, int[] Values, Action<DiffCropParams, int> Set)[]
|
||||
{
|
||||
("diffThresh", [10, 15, 20, 25, 30, 40, 50, 60], (p, v) => p.DiffThresh = v),
|
||||
("rowThreshDiv", [10, 15, 20, 25, 30, 40, 50, 60], (p, v) => p.RowThreshDiv = v),
|
||||
("colThreshDiv", [5, 8, 10, 12, 15, 20, 25, 30], (p, v) => p.ColThreshDiv = v),
|
||||
("maxGap", [5, 8, 10, 12, 15, 20, 25, 30], (p, v) => p.MaxGap = v),
|
||||
("upscale", [1, 2, 3], (p, v) => p.Upscale = v),
|
||||
("ocrPad", [0, 5, 10, 15, 20, 30], (p, v) => p.OcrPad = v),
|
||||
("psm", [4, 6, 11, 13], (p, v) => p.Psm = v),
|
||||
};
|
||||
|
||||
// Top-hat specific
|
||||
var topHatSweeps = new (string Name, int[] Values, Action<DiffOcrParams, int> Set)[]
|
||||
{
|
||||
("kernelSize", [11, 15, 19, 21, 25, 31, 35, 41, 51], (p, v) => p.KernelSize = v),
|
||||
};
|
||||
|
||||
// Background-subtraction specific
|
||||
var bgSubSweeps = new (string Name, int[] Values, Action<DiffOcrParams, int> Set)[]
|
||||
{
|
||||
("dimPercentile", [5, 10, 15, 20, 25, 30, 40, 50], (p, v) => p.DimPercentile = v),
|
||||
("textThresh", [10, 15, 20, 25, 30, 40, 50, 60, 80], (p, v) => p.TextThresh = v),
|
||||
("lineGapTolerance", [3, 5, 8, 10, 15], (p, v) => p.LineGapTolerance = v),
|
||||
("linePadY", [5, 10, 15, 20], (p, v) => p.LinePadY = v),
|
||||
("diffThresh", [10, 15, 20, 25, 30, 40, 50, 60], (c, v) => c.DiffThresh = v),
|
||||
("rowThreshDiv", [10, 15, 20, 25, 30, 40, 50, 60], (c, v) => c.RowThreshDiv = v),
|
||||
("colThreshDiv", [5, 8, 10, 12, 15, 20, 25, 30], (c, v) => c.ColThreshDiv = v),
|
||||
("maxGap", [5, 8, 10, 12, 15, 20, 25, 30], (c, v) => c.MaxGap = v),
|
||||
("ocrPad", [0, 5, 10, 15, 20, 30], (c, v) => c.OcrPad = v),
|
||||
};
|
||||
|
||||
double[] trimValues = [0.1, 0.15, 0.2, 0.25, 0.3, 0.4, 0.5];
|
||||
|
||||
var allIntSweeps = sharedSweeps
|
||||
const int maxRounds = 3;
|
||||
for (int round = 0; round < maxRounds; round++)
|
||||
{
|
||||
bool improved = false;
|
||||
Console.Error.WriteLine($"--- Round {round + 1} ---");
|
||||
|
||||
foreach (var (name, values, set) in cropSweeps)
|
||||
{
|
||||
Console.Error.Write($" {name}: ");
|
||||
int bestVal = 0;
|
||||
double bestValScore = -1;
|
||||
|
||||
foreach (int v in values)
|
||||
{
|
||||
var trial = CloneParams(best);
|
||||
set(trial.Crop, v);
|
||||
double score = ScoreParams(trial);
|
||||
totalEvals++;
|
||||
Console.Error.Write($"{v}={score:F3} ");
|
||||
if (score > bestValScore) { bestValScore = score; bestVal = v; }
|
||||
}
|
||||
Console.Error.WriteLine();
|
||||
|
||||
if (bestValScore > bestScore)
|
||||
{
|
||||
set(best.Crop, bestVal);
|
||||
bestScore = bestValScore;
|
||||
improved = true;
|
||||
Console.Error.WriteLine($" → {name}={bestVal} score={bestScore:F3}");
|
||||
}
|
||||
}
|
||||
|
||||
// Sweep trimCutoff
|
||||
{
|
||||
Console.Error.Write($" trimCutoff: ");
|
||||
double bestTrim = best.Crop.TrimCutoff;
|
||||
double bestTrimScore = bestScore;
|
||||
|
||||
foreach (double v in trimValues)
|
||||
{
|
||||
var trial = CloneParams(best);
|
||||
trial.Crop.TrimCutoff = v;
|
||||
double score = ScoreParams(trial);
|
||||
totalEvals++;
|
||||
Console.Error.Write($"{v:F2}={score:F3} ");
|
||||
if (score > bestTrimScore) { bestTrimScore = score; bestTrim = v; }
|
||||
}
|
||||
Console.Error.WriteLine();
|
||||
|
||||
if (bestTrimScore > bestScore)
|
||||
{
|
||||
best.Crop.TrimCutoff = bestTrim;
|
||||
bestScore = bestTrimScore;
|
||||
improved = true;
|
||||
Console.Error.WriteLine($" → trimCutoff={bestTrim:F2} score={bestScore:F3}");
|
||||
}
|
||||
}
|
||||
|
||||
Console.Error.WriteLine($" End of round {round + 1}: score={bestScore:F3} {best}");
|
||||
if (!improved) break;
|
||||
}
|
||||
|
||||
return bestScore;
|
||||
}
|
||||
|
||||
private double TuneOcrParams(DiffOcrParams best, ref int totalEvals, bool tuneTopHat, bool tuneBgSub)
|
||||
{
|
||||
double bestScore = ScoreParams(best);
|
||||
Console.Error.WriteLine($" baseline score={bestScore:F3} {best}\n");
|
||||
|
||||
var sharedOcrSweeps = new (string Name, int[] Values, Action<OcrParams, int> Set)[]
|
||||
{
|
||||
("upscale", [1, 2, 3], (o, v) => o.Upscale = v),
|
||||
("psm", [4, 6, 11, 13], (o, v) => o.Psm = v),
|
||||
};
|
||||
|
||||
// Top-hat specific
|
||||
var topHatSweeps = new (string Name, int[] Values, Action<OcrParams, int> Set)[]
|
||||
{
|
||||
("kernelSize", [11, 15, 19, 21, 25, 31, 35, 41, 51], (o, v) => o.KernelSize = v),
|
||||
};
|
||||
|
||||
// Background-subtraction specific
|
||||
var bgSubSweeps = new (string Name, int[] Values, Action<OcrParams, int> Set)[]
|
||||
{
|
||||
("dimPercentile", [5, 10, 15, 20, 25, 30, 40, 50], (o, v) => o.DimPercentile = v),
|
||||
("textThresh", [10, 15, 20, 25, 30, 40, 50, 60, 80], (o, v) => o.TextThresh = v),
|
||||
("lineGapTolerance", [3, 5, 8, 10, 15], (o, v) => o.LineGapTolerance = v),
|
||||
("linePadY", [5, 10, 15, 20], (o, v) => o.LinePadY = v),
|
||||
};
|
||||
|
||||
var allOcrSweeps = sharedOcrSweeps
|
||||
.Concat(tuneTopHat ? topHatSweeps : [])
|
||||
.Concat(tuneBgSub ? bgSubSweeps : [])
|
||||
.ToArray();
|
||||
|
|
@ -511,7 +683,7 @@ class OcrHandler(TesseractEngine engine)
|
|||
bool improved = false;
|
||||
Console.Error.WriteLine($"--- Round {round + 1} ---");
|
||||
|
||||
foreach (var (name, values, set) in allIntSweeps)
|
||||
foreach (var (name, values, set) in allOcrSweeps)
|
||||
{
|
||||
Console.Error.Write($" {name}: ");
|
||||
int bestVal = 0;
|
||||
|
|
@ -519,8 +691,8 @@ class OcrHandler(TesseractEngine engine)
|
|||
|
||||
foreach (int v in values)
|
||||
{
|
||||
var trial = best.Clone();
|
||||
set(trial, v);
|
||||
var trial = CloneParams(best);
|
||||
set(trial.Ocr, v);
|
||||
double score = ScoreParams(trial);
|
||||
totalEvals++;
|
||||
Console.Error.Write($"{v}={score:F3} ");
|
||||
|
|
@ -530,39 +702,13 @@ class OcrHandler(TesseractEngine engine)
|
|||
|
||||
if (bestValScore > bestScore)
|
||||
{
|
||||
set(best, bestVal);
|
||||
set(best.Ocr, bestVal);
|
||||
bestScore = bestValScore;
|
||||
improved = true;
|
||||
Console.Error.WriteLine($" → {name}={bestVal} score={bestScore:F3}");
|
||||
}
|
||||
}
|
||||
|
||||
// Sweep trimCutoff
|
||||
{
|
||||
Console.Error.Write($" trimCutoff: ");
|
||||
double bestTrim = best.TrimCutoff;
|
||||
double bestTrimScore = bestScore;
|
||||
|
||||
foreach (double v in trimValues)
|
||||
{
|
||||
var trial = best.Clone();
|
||||
trial.TrimCutoff = v;
|
||||
double score = ScoreParams(trial);
|
||||
totalEvals++;
|
||||
Console.Error.Write($"{v:F2}={score:F3} ");
|
||||
if (score > bestTrimScore) { bestTrimScore = score; bestTrim = v; }
|
||||
}
|
||||
Console.Error.WriteLine();
|
||||
|
||||
if (bestTrimScore > bestScore)
|
||||
{
|
||||
best.TrimCutoff = bestTrim;
|
||||
bestScore = bestTrimScore;
|
||||
improved = true;
|
||||
Console.Error.WriteLine($" → trimCutoff={bestTrim:F2} score={bestScore:F3}");
|
||||
}
|
||||
}
|
||||
|
||||
Console.Error.WriteLine($" End of round {round + 1}: score={bestScore:F3} {best}");
|
||||
if (!improved) break;
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue