126 lines
5.2 KiB
C#
126 lines
5.2 KiB
C#
namespace OcrDaemon;
|
||
|
||
using System.Drawing;
|
||
using OpenCvSharp;
|
||
using OpenCvSharp.Extensions;
|
||
|
||
static class ImagePreprocessor
|
||
{
|
||
/// <summary>
|
||
/// Pre-process an image for OCR using morphological white top-hat filtering.
|
||
/// Isolates bright tooltip text, suppresses dim background text visible through overlay.
|
||
/// Pipeline: grayscale → morphological top-hat → Otsu binary → upscale
|
||
/// </summary>
|
||
public static Bitmap PreprocessForOcr(Bitmap src, int kernelSize = 41, int upscale = 2)
|
||
{
|
||
using var mat = BitmapConverter.ToMat(src);
|
||
using var gray = new Mat();
|
||
Cv2.CvtColor(mat, gray, ColorConversionCodes.BGRA2GRAY);
|
||
|
||
// Morphological white top-hat: isolates bright text on dark background
|
||
using var kernel = Cv2.GetStructuringElement(MorphShapes.Rect, new OpenCvSharp.Size(kernelSize, kernelSize));
|
||
using var tophat = new Mat();
|
||
Cv2.MorphologyEx(gray, tophat, MorphTypes.TopHat, kernel);
|
||
|
||
// Otsu binarization: automatic threshold, black text on white
|
||
using var binary = new Mat();
|
||
Cv2.Threshold(tophat, binary, 0, 255, ThresholdTypes.BinaryInv | ThresholdTypes.Otsu);
|
||
|
||
// Upscale for better LSTM recognition
|
||
if (upscale > 1)
|
||
{
|
||
using var upscaled = new Mat();
|
||
Cv2.Resize(binary, upscaled, new OpenCvSharp.Size(binary.Width * upscale, binary.Height * upscale),
|
||
interpolation: InterpolationFlags.Cubic);
|
||
return BitmapConverter.ToBitmap(upscaled);
|
||
}
|
||
|
||
return BitmapConverter.ToBitmap(binary);
|
||
}
|
||
|
||
/// <summary>
|
||
/// Background-subtraction preprocessing: uses the reference frame to remove
|
||
/// background bleed-through from the semi-transparent tooltip overlay.
|
||
/// Pipeline: estimate dimming factor → subtract expected background → threshold → upscale
|
||
/// </summary>
|
||
public static Bitmap PreprocessWithBackgroundSub(Bitmap tooltipCrop, Bitmap referenceCrop,
|
||
int dimPercentile = 25, int textThresh = 30, int upscale = 2)
|
||
{
|
||
using var curMat = BitmapConverter.ToMat(tooltipCrop);
|
||
using var refMat = BitmapConverter.ToMat(referenceCrop);
|
||
using var curGray = new Mat();
|
||
using var refGray = new Mat();
|
||
Cv2.CvtColor(curMat, curGray, ColorConversionCodes.BGRA2GRAY);
|
||
Cv2.CvtColor(refMat, refGray, ColorConversionCodes.BGRA2GRAY);
|
||
|
||
int rows = curGray.Rows, cols = curGray.Cols;
|
||
|
||
// Estimate the dimming factor of the tooltip overlay.
|
||
// For non-text pixels: current ≈ reference × dim_factor
|
||
// Collect ratios where reference is bright enough to be meaningful
|
||
var ratios = new List<double>();
|
||
unsafe
|
||
{
|
||
byte* curPtr = (byte*)curGray.Data;
|
||
byte* refPtr = (byte*)refGray.Data;
|
||
int curStep = (int)curGray.Step();
|
||
int refStep = (int)refGray.Step();
|
||
|
||
for (int y = 0; y < rows; y++)
|
||
for (int x = 0; x < cols; x++)
|
||
{
|
||
byte r = refPtr[y * refStep + x];
|
||
byte c = curPtr[y * curStep + x];
|
||
if (r > 30) // skip very dark reference pixels (no signal)
|
||
ratios.Add((double)c / r);
|
||
}
|
||
}
|
||
|
||
if (ratios.Count == 0)
|
||
return PreprocessForOcr(tooltipCrop, 41, upscale); // fallback
|
||
|
||
// Use a low percentile of ratios as the dimming factor.
|
||
// Text pixels have high ratios (bright on dark), overlay pixels have low ratios.
|
||
// A low percentile captures the overlay dimming, ignoring text.
|
||
ratios.Sort();
|
||
int idx = Math.Clamp(ratios.Count * dimPercentile / 100, 0, ratios.Count - 1);
|
||
double dimFactor = ratios[idx];
|
||
// Clamp to sane range
|
||
dimFactor = Math.Clamp(dimFactor, 0.05, 0.95);
|
||
|
||
// Subtract expected background: text_signal = current - reference × dimFactor
|
||
using var textSignal = new Mat(rows, cols, MatType.CV_8UC1);
|
||
unsafe
|
||
{
|
||
byte* curPtr = (byte*)curGray.Data;
|
||
byte* refPtr = (byte*)refGray.Data;
|
||
byte* outPtr = (byte*)textSignal.Data;
|
||
int curStep = (int)curGray.Step();
|
||
int refStep = (int)refGray.Step();
|
||
int outStep = (int)textSignal.Step();
|
||
|
||
for (int y = 0; y < rows; y++)
|
||
for (int x = 0; x < cols; x++)
|
||
{
|
||
double expected = refPtr[y * refStep + x] * dimFactor;
|
||
double signal = curPtr[y * curStep + x] - expected;
|
||
outPtr[y * outStep + x] = (byte)Math.Clamp(signal, 0, 255);
|
||
}
|
||
}
|
||
|
||
// Threshold: pixels above textThresh are text
|
||
using var binary = new Mat();
|
||
Cv2.Threshold(textSignal, binary, textThresh, 255, ThresholdTypes.BinaryInv);
|
||
|
||
// Upscale for better LSTM recognition
|
||
if (upscale > 1)
|
||
{
|
||
using var upscaled = new Mat();
|
||
Cv2.Resize(binary, upscaled, new OpenCvSharp.Size(binary.Width * upscale, binary.Height * upscale),
|
||
interpolation: InterpolationFlags.Cubic);
|
||
return BitmapConverter.ToBitmap(upscaled);
|
||
}
|
||
|
||
return BitmapConverter.ToBitmap(binary);
|
||
}
|
||
}
|