poe2-bot/tools/OcrDaemon/ImagePreprocessor.cs

126 lines
5.2 KiB
C#
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

namespace OcrDaemon;
using System.Drawing;
using OpenCvSharp;
using OpenCvSharp.Extensions;
static class ImagePreprocessor
{
/// <summary>
/// Pre-process an image for OCR using morphological white top-hat filtering.
/// Isolates bright tooltip text, suppresses dim background text visible through overlay.
/// Pipeline: grayscale → morphological top-hat → Otsu binary → upscale
/// </summary>
public static Bitmap PreprocessForOcr(Bitmap src, int kernelSize = 41, int upscale = 2)
{
using var mat = BitmapConverter.ToMat(src);
using var gray = new Mat();
Cv2.CvtColor(mat, gray, ColorConversionCodes.BGRA2GRAY);
// Morphological white top-hat: isolates bright text on dark background
using var kernel = Cv2.GetStructuringElement(MorphShapes.Rect, new OpenCvSharp.Size(kernelSize, kernelSize));
using var tophat = new Mat();
Cv2.MorphologyEx(gray, tophat, MorphTypes.TopHat, kernel);
// Otsu binarization: automatic threshold, black text on white
using var binary = new Mat();
Cv2.Threshold(tophat, binary, 0, 255, ThresholdTypes.BinaryInv | ThresholdTypes.Otsu);
// Upscale for better LSTM recognition
if (upscale > 1)
{
using var upscaled = new Mat();
Cv2.Resize(binary, upscaled, new OpenCvSharp.Size(binary.Width * upscale, binary.Height * upscale),
interpolation: InterpolationFlags.Cubic);
return BitmapConverter.ToBitmap(upscaled);
}
return BitmapConverter.ToBitmap(binary);
}
/// <summary>
/// Background-subtraction preprocessing: uses the reference frame to remove
/// background bleed-through from the semi-transparent tooltip overlay.
/// Pipeline: estimate dimming factor → subtract expected background → threshold → upscale
/// </summary>
public static Bitmap PreprocessWithBackgroundSub(Bitmap tooltipCrop, Bitmap referenceCrop,
int dimPercentile = 25, int textThresh = 30, int upscale = 2)
{
using var curMat = BitmapConverter.ToMat(tooltipCrop);
using var refMat = BitmapConverter.ToMat(referenceCrop);
using var curGray = new Mat();
using var refGray = new Mat();
Cv2.CvtColor(curMat, curGray, ColorConversionCodes.BGRA2GRAY);
Cv2.CvtColor(refMat, refGray, ColorConversionCodes.BGRA2GRAY);
int rows = curGray.Rows, cols = curGray.Cols;
// Estimate the dimming factor of the tooltip overlay.
// For non-text pixels: current ≈ reference × dim_factor
// Collect ratios where reference is bright enough to be meaningful
var ratios = new List<double>();
unsafe
{
byte* curPtr = (byte*)curGray.Data;
byte* refPtr = (byte*)refGray.Data;
int curStep = (int)curGray.Step();
int refStep = (int)refGray.Step();
for (int y = 0; y < rows; y++)
for (int x = 0; x < cols; x++)
{
byte r = refPtr[y * refStep + x];
byte c = curPtr[y * curStep + x];
if (r > 30) // skip very dark reference pixels (no signal)
ratios.Add((double)c / r);
}
}
if (ratios.Count == 0)
return PreprocessForOcr(tooltipCrop, 41, upscale); // fallback
// Use a low percentile of ratios as the dimming factor.
// Text pixels have high ratios (bright on dark), overlay pixels have low ratios.
// A low percentile captures the overlay dimming, ignoring text.
ratios.Sort();
int idx = Math.Clamp(ratios.Count * dimPercentile / 100, 0, ratios.Count - 1);
double dimFactor = ratios[idx];
// Clamp to sane range
dimFactor = Math.Clamp(dimFactor, 0.05, 0.95);
// Subtract expected background: text_signal = current - reference × dimFactor
using var textSignal = new Mat(rows, cols, MatType.CV_8UC1);
unsafe
{
byte* curPtr = (byte*)curGray.Data;
byte* refPtr = (byte*)refGray.Data;
byte* outPtr = (byte*)textSignal.Data;
int curStep = (int)curGray.Step();
int refStep = (int)refGray.Step();
int outStep = (int)textSignal.Step();
for (int y = 0; y < rows; y++)
for (int x = 0; x < cols; x++)
{
double expected = refPtr[y * refStep + x] * dimFactor;
double signal = curPtr[y * curStep + x] - expected;
outPtr[y * outStep + x] = (byte)Math.Clamp(signal, 0, 255);
}
}
// Threshold: pixels above textThresh are text
using var binary = new Mat();
Cv2.Threshold(textSignal, binary, textThresh, 255, ThresholdTypes.BinaryInv);
// Upscale for better LSTM recognition
if (upscale > 1)
{
using var upscaled = new Mat();
Cv2.Resize(binary, upscaled, new OpenCvSharp.Size(binary.Width * upscale, binary.Height * upscale),
interpolation: InterpolationFlags.Cubic);
return BitmapConverter.ToBitmap(upscaled);
}
return BitmapConverter.ToBitmap(binary);
}
}