using System.Drawing; using System.Drawing.Imaging; using Serilog; using Windows.Graphics.Imaging; using Windows.Media.Ocr; using Windows.Storage.Streams; using BitmapDecoder = Windows.Graphics.Imaging.BitmapDecoder; using SdImageFormat = System.Drawing.Imaging.ImageFormat; namespace Automata.Screen.Ocr; public sealed class WinOcrEngine : IOcrEngine { private readonly OcrEngine _engine; public string Name => "WinOCR"; public WinOcrEngine() { _engine = OcrEngine.TryCreateFromUserProfileLanguages() ?? throw new InvalidOperationException("Windows OCR engine not available"); Log.Information("WinOcrEngine initialized (language: {Lang})", _engine.RecognizerLanguage.DisplayName); } public OcrResponse Recognize(Bitmap bitmap) { // Convert System.Drawing.Bitmap → PNG stream → WinRT SoftwareBitmap using var ms = new MemoryStream(); bitmap.Save(ms, SdImageFormat.Png); ms.Position = 0; var stream = ms.AsRandomAccessStream(); var decoder = BitmapDecoder.CreateAsync(stream).AsTask().GetAwaiter().GetResult(); var softwareBitmap = decoder.GetSoftwareBitmapAsync().AsTask().GetAwaiter().GetResult(); var ocrResult = _engine.RecognizeAsync(softwareBitmap).AsTask().GetAwaiter().GetResult(); var lines = new List(); foreach (var winLine in ocrResult.Lines) { var words = new List(); foreach (var winWord in winLine.Words) { var r = winWord.BoundingRect; words.Add(new OcrWord { Text = winWord.Text, X = (int)r.X, Y = (int)r.Y, Width = (int)r.Width, Height = (int)r.Height, }); } lines.Add(new OcrLine { Text = winLine.Text, Words = words, }); } var fullText = string.Join("\n", lines.Select(l => l.Text)); return new OcrResponse { Text = fullText, Lines = lines }; } public void Dispose() { } }