namespace OcrDaemon; using System.Drawing; using OpenCvSharp; using OpenCvSharp.Extensions; static class ImagePreprocessor { /// /// Pre-process an image for OCR using morphological white top-hat filtering. /// Isolates bright tooltip text, suppresses dim background text visible through overlay. /// Pipeline: grayscale → morphological top-hat → Otsu binary → upscale /// public static Bitmap PreprocessForOcr(Bitmap src, int kernelSize = 41, int upscale = 2) { using var mat = BitmapConverter.ToMat(src); using var gray = new Mat(); Cv2.CvtColor(mat, gray, ColorConversionCodes.BGRA2GRAY); // Morphological white top-hat: isolates bright text on dark background using var kernel = Cv2.GetStructuringElement(MorphShapes.Rect, new OpenCvSharp.Size(kernelSize, kernelSize)); using var tophat = new Mat(); Cv2.MorphologyEx(gray, tophat, MorphTypes.TopHat, kernel); // Otsu binarization: automatic threshold, black text on white using var binary = new Mat(); Cv2.Threshold(tophat, binary, 0, 255, ThresholdTypes.BinaryInv | ThresholdTypes.Otsu); // Upscale for better LSTM recognition if (upscale > 1) { using var upscaled = new Mat(); Cv2.Resize(binary, upscaled, new OpenCvSharp.Size(binary.Width * upscale, binary.Height * upscale), interpolation: InterpolationFlags.Cubic); return BitmapConverter.ToBitmap(upscaled); } return BitmapConverter.ToBitmap(binary); } /// /// Background-subtraction preprocessing: uses the reference frame to remove /// background bleed-through from the semi-transparent tooltip overlay. /// Pipeline: estimate dimming factor → subtract expected background → threshold → upscale /// Returns the upscaled binary Mat directly (caller must dispose). /// public static Mat PreprocessWithBackgroundSubMat(Bitmap tooltipCrop, Bitmap referenceCrop, int dimPercentile = 25, int textThresh = 30, int upscale = 2, bool softThreshold = true) { using var curMat = BitmapConverter.ToMat(tooltipCrop); using var refMat = BitmapConverter.ToMat(referenceCrop); using var curGray = new Mat(); using var refGray = new Mat(); Cv2.CvtColor(curMat, curGray, ColorConversionCodes.BGRA2GRAY); Cv2.CvtColor(refMat, refGray, ColorConversionCodes.BGRA2GRAY); int rows = curGray.Rows, cols = curGray.Cols; // Estimate the dimming factor of the tooltip overlay. // For non-text pixels: current ≈ reference × dim_factor // Collect ratios where reference is bright enough to be meaningful var ratios = new List(); unsafe { byte* curPtr = (byte*)curGray.Data; byte* refPtr = (byte*)refGray.Data; int curStep = (int)curGray.Step(); int refStep = (int)refGray.Step(); for (int y = 0; y < rows; y++) for (int x = 0; x < cols; x++) { byte r = refPtr[y * refStep + x]; byte c = curPtr[y * curStep + x]; if (r > 30) // skip very dark reference pixels (no signal) ratios.Add((double)c / r); } } if (ratios.Count == 0) { // Fallback: use top-hat preprocessing, convert to Mat using var fallbackBmp = PreprocessForOcr(tooltipCrop, 41, upscale); return BitmapConverter.ToMat(fallbackBmp); } // Use a low percentile of ratios as the dimming factor. // Text pixels have high ratios (bright on dark), overlay pixels have low ratios. // A low percentile captures the overlay dimming, ignoring text. ratios.Sort(); int idx = Math.Clamp(ratios.Count * dimPercentile / 100, 0, ratios.Count - 1); double dimFactor = ratios[idx]; // Clamp to sane range dimFactor = Math.Clamp(dimFactor, 0.05, 0.95); // Subtract expected background: text_signal = current - reference × dimFactor using var textSignal = new Mat(rows, cols, MatType.CV_8UC1); unsafe { byte* curPtr = (byte*)curGray.Data; byte* refPtr = (byte*)refGray.Data; byte* outPtr = (byte*)textSignal.Data; int curStep = (int)curGray.Step(); int refStep = (int)refGray.Step(); int outStep = (int)textSignal.Step(); for (int y = 0; y < rows; y++) for (int x = 0; x < cols; x++) { double expected = refPtr[y * refStep + x] * dimFactor; double signal = curPtr[y * curStep + x] - expected; outPtr[y * outStep + x] = (byte)Math.Clamp(signal, 0, 255); } } Mat result; if (softThreshold) { // Soft threshold: clip below textThresh, contrast-stretch, invert. // Produces grayscale anti-aliased text on white background, // matching the training data format (text2image renders). result = new Mat(rows, cols, MatType.CV_8UC1); unsafe { byte* srcPtr = (byte*)textSignal.Data; byte* dstPtr = (byte*)result.Data; int srcStep = (int)textSignal.Step(); int dstStep = (int)result.Step(); // Find max signal above threshold for contrast stretch int maxClipped = 1; for (int y = 0; y < rows; y++) for (int x = 0; x < cols; x++) { int val = srcPtr[y * srcStep + x] - textThresh; if (val > maxClipped) maxClipped = val; } // Clip, stretch, invert: background → 255 (white), text → dark for (int y = 0; y < rows; y++) for (int x = 0; x < cols; x++) { int clipped = srcPtr[y * srcStep + x] - textThresh; if (clipped <= 0) { dstPtr[y * dstStep + x] = 255; // background } else { int stretched = clipped * 255 / maxClipped; dstPtr[y * dstStep + x] = (byte)(255 - stretched); // invert } } } } else { // Hard binary threshold (original behavior) result = new Mat(); Cv2.Threshold(textSignal, result, textThresh, 255, ThresholdTypes.BinaryInv); } using var _result = result; return UpscaleMat(result, upscale); } /// /// Background-subtraction preprocessing returning a Bitmap (convenience wrapper). /// public static Bitmap PreprocessWithBackgroundSub(Bitmap tooltipCrop, Bitmap referenceCrop, int dimPercentile = 25, int textThresh = 30, int upscale = 2, bool softThreshold = true) { using var mat = PreprocessWithBackgroundSubMat(tooltipCrop, referenceCrop, dimPercentile, textThresh, upscale, softThreshold); return BitmapConverter.ToBitmap(mat); } /// /// Detect text lines via horizontal projection on a binary image. /// Binary should be inverted: text=black(0), background=white(255). /// Returns list of (yStart, yEnd) row ranges for each detected text line. /// public static List<(int yStart, int yEnd)> DetectTextLines( Mat binary, int minRowPixels = 2, int gapTolerance = 5) { int rows = binary.Rows, cols = binary.Cols; // Count dark (text) pixels per row — use < 128 threshold since // cubic upscaling introduces anti-aliased intermediate values var rowCounts = new int[rows]; unsafe { byte* ptr = (byte*)binary.Data; int step = (int)binary.Step(); for (int y = 0; y < rows; y++) for (int x = 0; x < cols; x++) if (ptr[y * step + x] < 128) rowCounts[y]++; } // Group into contiguous runs with gap tolerance var lines = new List<(int yStart, int yEnd)>(); int lineStart = -1, lastActive = -1; for (int y = 0; y < rows; y++) { if (rowCounts[y] >= minRowPixels) { if (lineStart < 0) lineStart = y; lastActive = y; } else if (lineStart >= 0 && y - lastActive > gapTolerance) { lines.Add((lineStart, lastActive)); lineStart = -1; } } if (lineStart >= 0) lines.Add((lineStart, lastActive)); return lines; } /// Returns a new Mat (caller must dispose). Does NOT dispose src. private static Mat UpscaleMat(Mat src, int factor) { if (factor > 1) { var upscaled = new Mat(); Cv2.Resize(src, upscaled, new OpenCvSharp.Size(src.Width * factor, src.Height * factor), interpolation: InterpolationFlags.Cubic); return upscaled; } return src.Clone(); } }