using System.Drawing; using System.Drawing.Imaging; using System.Runtime.InteropServices; using System.Text; using System.Text.Json; using System.Text.Json.Serialization; using Windows.Graphics.Imaging; using Windows.Media.Ocr; using Windows.Storage.Streams; // Make GDI capture DPI-aware so coordinates match physical pixels SetProcessDPIAware(); // Pre-create the OCR engine (reused across all requests) var ocrEngine = OcrEngine.TryCreateFromUserProfileLanguages(); if (ocrEngine == null) { WriteResponse(new ErrorResponse("Failed to create OCR engine. Ensure a language pack is installed.")); return 1; } // Signal ready WriteResponse(new ReadyResponse()); // JSON options var jsonOptions = new JsonSerializerOptions { PropertyNamingPolicy = JsonNamingPolicy.CamelCase, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull, }; // Main loop: read one JSON line, handle, write one JSON line var stdin = Console.In; string? line; while ((line = stdin.ReadLine()) != null) { line = line.Trim(); if (line.Length == 0) continue; try { var request = JsonSerializer.Deserialize(line, jsonOptions); if (request == null) { WriteResponse(new ErrorResponse("Failed to parse request")); continue; } switch (request.Cmd?.ToLowerInvariant()) { case "ocr": HandleOcr(request, ocrEngine); break; case "screenshot": HandleScreenshot(request); break; case "capture": HandleCapture(request); break; default: WriteResponse(new ErrorResponse($"Unknown command: {request.Cmd}")); break; } } catch (Exception ex) { WriteResponse(new ErrorResponse(ex.Message)); } } return 0; // ── Handlers ──────────────────────────────────────────────────────────────── void HandleOcr(Request req, OcrEngine engine) { using var bitmap = CaptureScreen(req.Region); var softwareBitmap = BitmapToSoftwareBitmap(bitmap); var result = engine.RecognizeAsync(softwareBitmap).AsTask().GetAwaiter().GetResult(); var lines = new List(); foreach (var ocrLine in result.Lines) { var words = new List(); foreach (var word in ocrLine.Words) { words.Add(new OcrWordResult { Text = word.Text, X = (int)Math.Round(word.BoundingRect.X), Y = (int)Math.Round(word.BoundingRect.Y), Width = (int)Math.Round(word.BoundingRect.Width), Height = (int)Math.Round(word.BoundingRect.Height), }); } lines.Add(new OcrLineResult { Text = ocrLine.Text, Words = words }); } WriteResponse(new OcrResponse { Text = result.Text, Lines = lines }); } void HandleScreenshot(Request req) { if (string.IsNullOrEmpty(req.Path)) { WriteResponse(new ErrorResponse("screenshot command requires 'path'")); return; } using var bitmap = CaptureScreen(req.Region); var format = GetImageFormat(req.Path); bitmap.Save(req.Path, format); WriteResponse(new OkResponse()); } void HandleCapture(Request req) { using var bitmap = CaptureScreen(req.Region); using var ms = new MemoryStream(); bitmap.Save(ms, ImageFormat.Png); var base64 = Convert.ToBase64String(ms.ToArray()); WriteResponse(new CaptureResponse { Image = base64 }); } // ── Screen Capture ────────────────────────────────────────────────────────── Bitmap CaptureScreen(RegionRect? region) { int x, y, w, h; if (region != null) { x = region.X; y = region.Y; w = region.Width; h = region.Height; } else { // Primary monitor only (0,0 origin, SM_CXSCREEN / SM_CYSCREEN) x = 0; y = 0; w = GetSystemMetrics(0); // SM_CXSCREEN h = GetSystemMetrics(1); // SM_CYSCREEN } var bitmap = new Bitmap(w, h, PixelFormat.Format32bppArgb); using var g = Graphics.FromImage(bitmap); g.CopyFromScreen(x, y, 0, 0, new Size(w, h), CopyPixelOperation.SourceCopy); return bitmap; } // ── Bitmap → SoftwareBitmap conversion (in-memory) ───────────────────────── SoftwareBitmap BitmapToSoftwareBitmap(Bitmap bitmap) { using var ms = new MemoryStream(); bitmap.Save(ms, ImageFormat.Bmp); ms.Position = 0; var stream = ms.AsRandomAccessStream(); var decoder = BitmapDecoder.CreateAsync(stream).AsTask().GetAwaiter().GetResult(); var softwareBitmap = decoder.GetSoftwareBitmapAsync().AsTask().GetAwaiter().GetResult(); return softwareBitmap; } // ── Response writing ──────────────────────────────────────────────────────── void WriteResponse(object response) { var json = JsonSerializer.Serialize(response, jsonOptions); Console.Out.WriteLine(json); Console.Out.Flush(); } ImageFormat GetImageFormat(string path) { var ext = Path.GetExtension(path).ToLowerInvariant(); return ext switch { ".jpg" or ".jpeg" => ImageFormat.Jpeg, ".bmp" => ImageFormat.Bmp, _ => ImageFormat.Png, }; } // ── P/Invoke ──────────────────────────────────────────────────────────────── [DllImport("user32.dll")] static extern bool SetProcessDPIAware(); [DllImport("user32.dll")] static extern int GetSystemMetrics(int nIndex); // ── Request / Response Models ─────────────────────────────────────────────── class Request { [JsonPropertyName("cmd")] public string? Cmd { get; set; } [JsonPropertyName("region")] public RegionRect? Region { get; set; } [JsonPropertyName("path")] public string? Path { get; set; } } class RegionRect { [JsonPropertyName("x")] public int X { get; set; } [JsonPropertyName("y")] public int Y { get; set; } [JsonPropertyName("width")] public int Width { get; set; } [JsonPropertyName("height")] public int Height { get; set; } } class ReadyResponse { [JsonPropertyName("ok")] public bool Ok => true; [JsonPropertyName("ready")] public bool Ready => true; } class OkResponse { [JsonPropertyName("ok")] public bool Ok => true; } class ErrorResponse(string message) { [JsonPropertyName("ok")] public bool Ok => false; [JsonPropertyName("error")] public string Error => message; } class OcrResponse { [JsonPropertyName("ok")] public bool Ok => true; [JsonPropertyName("text")] public string Text { get; set; } = ""; [JsonPropertyName("lines")] public List Lines { get; set; } = []; } class OcrLineResult { [JsonPropertyName("text")] public string Text { get; set; } = ""; [JsonPropertyName("words")] public List Words { get; set; } = []; } class OcrWordResult { [JsonPropertyName("text")] public string Text { get; set; } = ""; [JsonPropertyName("x")] public int X { get; set; } [JsonPropertyName("y")] public int Y { get; set; } [JsonPropertyName("width")] public int Width { get; set; } [JsonPropertyName("height")] public int Height { get; set; } } class CaptureResponse { [JsonPropertyName("ok")] public bool Ok => true; [JsonPropertyName("image")] public string Image { get; set; } = ""; }