poe2-bot/src/Automata.Screen/Ocr/WinOcrEngine.cs
2026-02-28 15:13:31 -05:00

67 lines
2.1 KiB
C#

using System.Drawing;
using System.Drawing.Imaging;
using Serilog;
using Windows.Graphics.Imaging;
using Windows.Media.Ocr;
using Windows.Storage.Streams;
using BitmapDecoder = Windows.Graphics.Imaging.BitmapDecoder;
using SdImageFormat = System.Drawing.Imaging.ImageFormat;
namespace Automata.Screen.Ocr;
public sealed class WinOcrEngine : IOcrEngine
{
private readonly OcrEngine _engine;
public string Name => "WinOCR";
public WinOcrEngine()
{
_engine = OcrEngine.TryCreateFromUserProfileLanguages()
?? throw new InvalidOperationException("Windows OCR engine not available");
Log.Information("WinOcrEngine initialized (language: {Lang})", _engine.RecognizerLanguage.DisplayName);
}
public OcrResponse Recognize(Bitmap bitmap)
{
// Convert System.Drawing.Bitmap → PNG stream → WinRT SoftwareBitmap
using var ms = new MemoryStream();
bitmap.Save(ms, SdImageFormat.Png);
ms.Position = 0;
var stream = ms.AsRandomAccessStream();
var decoder = BitmapDecoder.CreateAsync(stream).AsTask().GetAwaiter().GetResult();
var softwareBitmap = decoder.GetSoftwareBitmapAsync().AsTask().GetAwaiter().GetResult();
var ocrResult = _engine.RecognizeAsync(softwareBitmap).AsTask().GetAwaiter().GetResult();
var lines = new List<OcrLine>();
foreach (var winLine in ocrResult.Lines)
{
var words = new List<OcrWord>();
foreach (var winWord in winLine.Words)
{
var r = winWord.BoundingRect;
words.Add(new OcrWord
{
Text = winWord.Text,
X = (int)r.X,
Y = (int)r.Y,
Width = (int)r.Width,
Height = (int)r.Height,
});
}
lines.Add(new OcrLine
{
Text = winLine.Text,
Words = words,
});
}
var fullText = string.Join("\n", lines.Select(l => l.Text));
return new OcrResponse { Text = fullText, Lines = lines };
}
public void Dispose() { }
}