more tuning switched to background sub
This commit is contained in:
parent
641c87121a
commit
6600969947
4 changed files with 171 additions and 37 deletions
|
|
@ -55,10 +55,9 @@ class OcrHandler(TesseractEngine engine)
|
|||
return new OkResponse();
|
||||
}
|
||||
|
||||
public object HandleDiffOcr(Request req) => HandleDiffOcr(req, new DiffOcrParams
|
||||
{
|
||||
DiffThresh = req.Threshold > 0 ? req.Threshold : 30,
|
||||
});
|
||||
public object HandleDiffOcr(Request req) => HandleDiffOcr(req, req.Threshold > 0
|
||||
? new DiffOcrParams { DiffThresh = req.Threshold }
|
||||
: new DiffOcrParams());
|
||||
|
||||
public object HandleDiffOcr(Request req, DiffOcrParams p)
|
||||
{
|
||||
|
|
@ -219,9 +218,9 @@ class OcrHandler(TesseractEngine engine)
|
|||
|
||||
if (debug) Console.Error.WriteLine($" diff-ocr: tooltip region ({minX},{minY}) {rw}x{rh}");
|
||||
|
||||
// Simple crop of the tooltip region from the current frame (no per-pixel masking).
|
||||
// The top-hat preprocessing will handle suppressing background text.
|
||||
// Crop tooltip region from both current and reference frames
|
||||
using var cropped = current.Clone(new Rectangle(minX, minY, rw, rh), PixelFormat.Format32bppArgb);
|
||||
using var refCropped = _referenceFrame.Clone(new Rectangle(minX, minY, rw, rh), PixelFormat.Format32bppArgb);
|
||||
|
||||
// Save before/after preprocessing images if path is provided
|
||||
if (!string.IsNullOrEmpty(req.Path))
|
||||
|
|
@ -233,8 +232,10 @@ class OcrHandler(TesseractEngine engine)
|
|||
if (debug) Console.Error.WriteLine($" diff-ocr: saved raw to {req.Path}");
|
||||
}
|
||||
|
||||
// Pre-process for OCR: top-hat + binarize + upscale
|
||||
using var processed = ImagePreprocessor.PreprocessForOcr(cropped, p.KernelSize, p.Upscale);
|
||||
// Pre-process for OCR
|
||||
using var processed = p.UseBackgroundSub
|
||||
? ImagePreprocessor.PreprocessWithBackgroundSub(cropped, refCropped, p.DimPercentile, p.TextThresh, p.Upscale)
|
||||
: ImagePreprocessor.PreprocessForOcr(cropped, p.KernelSize, p.Upscale);
|
||||
|
||||
// Save fullscreen and preprocessed versions alongside raw
|
||||
if (!string.IsNullOrEmpty(req.Path))
|
||||
|
|
@ -266,35 +267,82 @@ class OcrHandler(TesseractEngine engine)
|
|||
|
||||
public object HandleTune(Request req)
|
||||
{
|
||||
// Coordinate descent: optimize one parameter at a time, repeat until stable.
|
||||
var best = new DiffOcrParams();
|
||||
double bestScore = ScoreParams(best);
|
||||
Console.Error.WriteLine($"\n=== Tuning start === baseline score={bestScore:F3} {best}\n");
|
||||
int totalEvals = 0;
|
||||
|
||||
// Define search ranges for each parameter
|
||||
var sweeps = new (string Name, int[] Values, Action<DiffOcrParams, int> Set)[]
|
||||
// --- Phase 1: Tune top-hat approach ---
|
||||
Console.Error.WriteLine("\n========== Phase 1: Top-Hat ==========");
|
||||
var topHat = new DiffOcrParams { UseBackgroundSub = false };
|
||||
double topHatScore = TuneParams(topHat, ref totalEvals, tuneTopHat: true, tuneBgSub: false);
|
||||
|
||||
// --- Phase 2: Tune background-subtraction approach ---
|
||||
Console.Error.WriteLine("\n========== Phase 2: Background Subtraction ==========");
|
||||
// Start bgSub from the best detection params found in phase 1
|
||||
var bgSub = topHat.Clone();
|
||||
bgSub.UseBackgroundSub = true;
|
||||
double bgSubScore = TuneParams(bgSub, ref totalEvals, tuneTopHat: false, tuneBgSub: true);
|
||||
|
||||
// Pick the winner
|
||||
var best = bgSubScore > topHatScore ? bgSub : topHat;
|
||||
double bestScore = Math.Max(topHatScore, bgSubScore);
|
||||
|
||||
Console.Error.WriteLine($"\n========== Result ==========");
|
||||
Console.Error.WriteLine($" Top-Hat: {topHatScore:F3} {topHat}");
|
||||
Console.Error.WriteLine($" BgSub: {bgSubScore:F3} {bgSub}");
|
||||
Console.Error.WriteLine($" Winner: {(best.UseBackgroundSub ? "BgSub" : "TopHat")} evals={totalEvals}\n");
|
||||
|
||||
// Final verbose report with best params
|
||||
RunTestCases(best, verbose: true);
|
||||
|
||||
return new TuneResponse
|
||||
{
|
||||
BestScore = bestScore,
|
||||
BestParams = best,
|
||||
Iterations = totalEvals,
|
||||
};
|
||||
}
|
||||
|
||||
private double TuneParams(DiffOcrParams best, ref int totalEvals, bool tuneTopHat, bool tuneBgSub)
|
||||
{
|
||||
double bestScore = ScoreParams(best);
|
||||
Console.Error.WriteLine($" baseline score={bestScore:F3} {best}\n");
|
||||
|
||||
// Detection params (shared by both approaches)
|
||||
var sharedSweeps = new (string Name, int[] Values, Action<DiffOcrParams, int> Set)[]
|
||||
{
|
||||
("diffThresh", [10, 15, 20, 25, 30, 40, 50, 60], (p, v) => p.DiffThresh = v),
|
||||
("rowThreshDiv", [10, 15, 20, 25, 30, 40, 50, 60], (p, v) => p.RowThreshDiv = v),
|
||||
("colThreshDiv", [5, 8, 10, 12, 15, 20, 25, 30], (p, v) => p.ColThreshDiv = v),
|
||||
("maxGap", [5, 8, 10, 12, 15, 20, 25, 30], (p, v) => p.MaxGap = v),
|
||||
("kernelSize", [11, 15, 19, 21, 25, 31, 35, 41], (p, v) => p.KernelSize = v),
|
||||
("upscale", [1, 2, 3], (p, v) => p.Upscale = v),
|
||||
};
|
||||
|
||||
// trimCutoff needs double values — handle separately
|
||||
// Top-hat specific
|
||||
var topHatSweeps = new (string Name, int[] Values, Action<DiffOcrParams, int> Set)[]
|
||||
{
|
||||
("kernelSize", [11, 15, 19, 21, 25, 31, 35, 41, 51], (p, v) => p.KernelSize = v),
|
||||
};
|
||||
|
||||
// Background-subtraction specific
|
||||
var bgSubSweeps = new (string Name, int[] Values, Action<DiffOcrParams, int> Set)[]
|
||||
{
|
||||
("dimPercentile", [5, 10, 15, 20, 25, 30, 40, 50], (p, v) => p.DimPercentile = v),
|
||||
("textThresh", [10, 15, 20, 25, 30, 40, 50, 60, 80], (p, v) => p.TextThresh = v),
|
||||
};
|
||||
|
||||
double[] trimValues = [0.1, 0.15, 0.2, 0.25, 0.3, 0.4, 0.5];
|
||||
|
||||
int totalEvals = 0;
|
||||
const int maxRounds = 3;
|
||||
var allIntSweeps = sharedSweeps
|
||||
.Concat(tuneTopHat ? topHatSweeps : [])
|
||||
.Concat(tuneBgSub ? bgSubSweeps : [])
|
||||
.ToArray();
|
||||
|
||||
const int maxRounds = 3;
|
||||
for (int round = 0; round < maxRounds; round++)
|
||||
{
|
||||
bool improved = false;
|
||||
Console.Error.WriteLine($"--- Round {round + 1} ---");
|
||||
|
||||
// Sweep integer params
|
||||
foreach (var (name, values, set) in sweeps)
|
||||
foreach (var (name, values, set) in allIntSweeps)
|
||||
{
|
||||
Console.Error.Write($" {name}: ");
|
||||
int bestVal = 0;
|
||||
|
|
@ -307,7 +355,6 @@ class OcrHandler(TesseractEngine engine)
|
|||
double score = ScoreParams(trial);
|
||||
totalEvals++;
|
||||
Console.Error.Write($"{v}={score:F3} ");
|
||||
|
||||
if (score > bestValScore) { bestValScore = score; bestVal = v; }
|
||||
}
|
||||
Console.Error.WriteLine();
|
||||
|
|
@ -334,7 +381,6 @@ class OcrHandler(TesseractEngine engine)
|
|||
double score = ScoreParams(trial);
|
||||
totalEvals++;
|
||||
Console.Error.Write($"{v:F2}={score:F3} ");
|
||||
|
||||
if (score > bestTrimScore) { bestTrimScore = score; bestTrim = v; }
|
||||
}
|
||||
Console.Error.WriteLine();
|
||||
|
|
@ -352,17 +398,7 @@ class OcrHandler(TesseractEngine engine)
|
|||
if (!improved) break;
|
||||
}
|
||||
|
||||
Console.Error.WriteLine($"\n=== Tuning done === evals={totalEvals} bestScore={bestScore:F3}\n {best}\n");
|
||||
|
||||
// Run verbose test with best params for final report
|
||||
var finalResult = RunTestCases(best, verbose: true);
|
||||
|
||||
return new TuneResponse
|
||||
{
|
||||
BestScore = bestScore,
|
||||
BestParams = best,
|
||||
Iterations = totalEvals,
|
||||
};
|
||||
return bestScore;
|
||||
}
|
||||
|
||||
/// <summary>Score a param set: average match ratio across all test cases (0-1).</summary>
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue