poe2-bot/tools/OcrDaemon/ImageUtils.cs

91 lines
3 KiB
C#

namespace OcrDaemon;
using System.Drawing;
using System.Drawing.Imaging;
using System.Runtime.InteropServices;
using Tesseract;
using SdImageFormat = System.Drawing.Imaging.ImageFormat;
static class ImageUtils
{
public static Pix BitmapToPix(Bitmap bitmap)
{
using var ms = new MemoryStream();
bitmap.Save(ms, SdImageFormat.Png);
return Pix.LoadFromMemory(ms.ToArray());
}
public static List<OcrLineResult> ExtractLinesFromPage(Page page, int offsetX, int offsetY, int minConfidence = 50)
{
var lines = new List<OcrLineResult>();
using var iter = page.GetIterator();
if (iter == null) return lines;
int minConf = Math.Clamp(minConfidence, 0, 100);
iter.Begin();
do
{
var words = new List<OcrWordResult>();
do
{
var wordText = iter.GetText(PageIteratorLevel.Word);
if (string.IsNullOrWhiteSpace(wordText)) continue;
float conf = iter.GetConfidence(PageIteratorLevel.Word);
if (conf < minConf) continue; // reject low-confidence garbage from background bleed
if (iter.TryGetBoundingBox(PageIteratorLevel.Word, out var bounds))
{
words.Add(new OcrWordResult
{
Text = wordText.Trim(),
X = bounds.X1 + offsetX,
Y = bounds.Y1 + offsetY,
Width = bounds.Width,
Height = bounds.Height,
});
}
} while (iter.Next(PageIteratorLevel.TextLine, PageIteratorLevel.Word));
if (words.Count > 0)
{
var lineText = string.Join(" ", words.Select(w => w.Text));
lines.Add(new OcrLineResult { Text = lineText, Words = words });
}
} while (iter.Next(PageIteratorLevel.Block, PageIteratorLevel.TextLine));
return lines;
}
public static (byte[] gray, byte[] argb, int stride) BitmapToGrayAndArgb(Bitmap bmp)
{
int w = bmp.Width, h = bmp.Height;
var data = bmp.LockBits(new Rectangle(0, 0, w, h), ImageLockMode.ReadOnly, PixelFormat.Format32bppArgb);
byte[] argb = new byte[data.Stride * h];
Marshal.Copy(data.Scan0, argb, 0, argb.Length);
bmp.UnlockBits(data);
int stride = data.Stride;
byte[] gray = new byte[w * h];
for (int y = 0; y < h; y++)
for (int x = 0; x < w; x++)
{
int i = y * stride + x * 4;
gray[y * w + x] = (byte)((argb[i] + argb[i + 1] + argb[i + 2]) / 3);
}
return (gray, argb, stride);
}
public static SdImageFormat GetImageFormat(string path)
{
var ext = Path.GetExtension(path).ToLowerInvariant();
return ext switch
{
".jpg" or ".jpeg" => SdImageFormat.Jpeg,
".bmp" => SdImageFormat.Bmp,
_ => SdImageFormat.Png,
};
}
}