work on OCR
This commit is contained in:
parent
6600969947
commit
854a474435
13 changed files with 4374 additions and 38 deletions
|
|
@ -42,9 +42,10 @@ static class ImagePreprocessor
|
|||
/// Background-subtraction preprocessing: uses the reference frame to remove
|
||||
/// background bleed-through from the semi-transparent tooltip overlay.
|
||||
/// Pipeline: estimate dimming factor → subtract expected background → threshold → upscale
|
||||
/// Returns the upscaled binary Mat directly (caller must dispose).
|
||||
/// </summary>
|
||||
public static Bitmap PreprocessWithBackgroundSub(Bitmap tooltipCrop, Bitmap referenceCrop,
|
||||
int dimPercentile = 25, int textThresh = 30, int upscale = 2)
|
||||
public static Mat PreprocessWithBackgroundSubMat(Bitmap tooltipCrop, Bitmap referenceCrop,
|
||||
int dimPercentile = 25, int textThresh = 30, int upscale = 2, bool softThreshold = true)
|
||||
{
|
||||
using var curMat = BitmapConverter.ToMat(tooltipCrop);
|
||||
using var refMat = BitmapConverter.ToMat(referenceCrop);
|
||||
|
|
@ -77,7 +78,11 @@ static class ImagePreprocessor
|
|||
}
|
||||
|
||||
if (ratios.Count == 0)
|
||||
return PreprocessForOcr(tooltipCrop, 41, upscale); // fallback
|
||||
{
|
||||
// Fallback: use top-hat preprocessing, convert to Mat
|
||||
using var fallbackBmp = PreprocessForOcr(tooltipCrop, 41, upscale);
|
||||
return BitmapConverter.ToMat(fallbackBmp);
|
||||
}
|
||||
|
||||
// Use a low percentile of ratios as the dimming factor.
|
||||
// Text pixels have high ratios (bright on dark), overlay pixels have low ratios.
|
||||
|
|
@ -108,19 +113,122 @@ static class ImagePreprocessor
|
|||
}
|
||||
}
|
||||
|
||||
// Threshold: pixels above textThresh are text
|
||||
using var binary = new Mat();
|
||||
Cv2.Threshold(textSignal, binary, textThresh, 255, ThresholdTypes.BinaryInv);
|
||||
|
||||
// Upscale for better LSTM recognition
|
||||
if (upscale > 1)
|
||||
Mat result;
|
||||
if (softThreshold)
|
||||
{
|
||||
using var upscaled = new Mat();
|
||||
Cv2.Resize(binary, upscaled, new OpenCvSharp.Size(binary.Width * upscale, binary.Height * upscale),
|
||||
interpolation: InterpolationFlags.Cubic);
|
||||
return BitmapConverter.ToBitmap(upscaled);
|
||||
// Soft threshold: clip below textThresh, contrast-stretch, invert.
|
||||
// Produces grayscale anti-aliased text on white background,
|
||||
// matching the training data format (text2image renders).
|
||||
result = new Mat(rows, cols, MatType.CV_8UC1);
|
||||
unsafe
|
||||
{
|
||||
byte* srcPtr = (byte*)textSignal.Data;
|
||||
byte* dstPtr = (byte*)result.Data;
|
||||
int srcStep = (int)textSignal.Step();
|
||||
int dstStep = (int)result.Step();
|
||||
|
||||
// Find max signal above threshold for contrast stretch
|
||||
int maxClipped = 1;
|
||||
for (int y = 0; y < rows; y++)
|
||||
for (int x = 0; x < cols; x++)
|
||||
{
|
||||
int val = srcPtr[y * srcStep + x] - textThresh;
|
||||
if (val > maxClipped) maxClipped = val;
|
||||
}
|
||||
|
||||
// Clip, stretch, invert: background → 255 (white), text → dark
|
||||
for (int y = 0; y < rows; y++)
|
||||
for (int x = 0; x < cols; x++)
|
||||
{
|
||||
int clipped = srcPtr[y * srcStep + x] - textThresh;
|
||||
if (clipped <= 0)
|
||||
{
|
||||
dstPtr[y * dstStep + x] = 255; // background
|
||||
}
|
||||
else
|
||||
{
|
||||
int stretched = clipped * 255 / maxClipped;
|
||||
dstPtr[y * dstStep + x] = (byte)(255 - stretched); // invert
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Hard binary threshold (original behavior)
|
||||
result = new Mat();
|
||||
Cv2.Threshold(textSignal, result, textThresh, 255, ThresholdTypes.BinaryInv);
|
||||
}
|
||||
|
||||
return BitmapConverter.ToBitmap(binary);
|
||||
using var _result = result;
|
||||
return UpscaleMat(result, upscale);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Background-subtraction preprocessing returning a Bitmap (convenience wrapper).
|
||||
/// </summary>
|
||||
public static Bitmap PreprocessWithBackgroundSub(Bitmap tooltipCrop, Bitmap referenceCrop,
|
||||
int dimPercentile = 25, int textThresh = 30, int upscale = 2, bool softThreshold = true)
|
||||
{
|
||||
using var mat = PreprocessWithBackgroundSubMat(tooltipCrop, referenceCrop, dimPercentile, textThresh, upscale, softThreshold);
|
||||
return BitmapConverter.ToBitmap(mat);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Detect text lines via horizontal projection on a binary image.
|
||||
/// Binary should be inverted: text=black(0), background=white(255).
|
||||
/// Returns list of (yStart, yEnd) row ranges for each detected text line.
|
||||
/// </summary>
|
||||
public static List<(int yStart, int yEnd)> DetectTextLines(
|
||||
Mat binary, int minRowPixels = 2, int gapTolerance = 5)
|
||||
{
|
||||
int rows = binary.Rows, cols = binary.Cols;
|
||||
|
||||
// Count dark (text) pixels per row — use < 128 threshold since
|
||||
// cubic upscaling introduces anti-aliased intermediate values
|
||||
var rowCounts = new int[rows];
|
||||
unsafe
|
||||
{
|
||||
byte* ptr = (byte*)binary.Data;
|
||||
int step = (int)binary.Step();
|
||||
for (int y = 0; y < rows; y++)
|
||||
for (int x = 0; x < cols; x++)
|
||||
if (ptr[y * step + x] < 128)
|
||||
rowCounts[y]++;
|
||||
}
|
||||
|
||||
// Group into contiguous runs with gap tolerance
|
||||
var lines = new List<(int yStart, int yEnd)>();
|
||||
int lineStart = -1, lastActive = -1;
|
||||
for (int y = 0; y < rows; y++)
|
||||
{
|
||||
if (rowCounts[y] >= minRowPixels)
|
||||
{
|
||||
if (lineStart < 0) lineStart = y;
|
||||
lastActive = y;
|
||||
}
|
||||
else if (lineStart >= 0 && y - lastActive > gapTolerance)
|
||||
{
|
||||
lines.Add((lineStart, lastActive));
|
||||
lineStart = -1;
|
||||
}
|
||||
}
|
||||
if (lineStart >= 0)
|
||||
lines.Add((lineStart, lastActive));
|
||||
|
||||
return lines;
|
||||
}
|
||||
|
||||
/// <summary>Returns a new Mat (caller must dispose). Does NOT dispose src.</summary>
|
||||
private static Mat UpscaleMat(Mat src, int factor)
|
||||
{
|
||||
if (factor > 1)
|
||||
{
|
||||
var upscaled = new Mat();
|
||||
Cv2.Resize(src, upscaled, new OpenCvSharp.Size(src.Width * factor, src.Height * factor),
|
||||
interpolation: InterpolationFlags.Cubic);
|
||||
return upscaled;
|
||||
}
|
||||
return src.Clone();
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue