diff --git a/src/dashboard/DashboardServer.ts b/src/dashboard/DashboardServer.ts index 2ebcffa..1e1f94b 100644 --- a/src/dashboard/DashboardServer.ts +++ b/src/dashboard/DashboardServer.ts @@ -2,6 +2,7 @@ import express from 'express'; import http from 'http'; import { WebSocketServer, WebSocket } from 'ws'; import path from 'path'; +import { mkdir } from 'fs/promises'; import { fileURLToPath } from 'url'; import { logger } from '../util/logger.js'; import { sleep } from '../util/sleep.js'; @@ -275,21 +276,47 @@ export class DashboardServer { ...matches.map(m => ({ row: m.row, col: m.col, label: `MATCH ${(m.similarity * 100).toFixed(0)}%` })), ]; - // Focus game and hover each cell + // Focus game, take one snapshot with mouse on empty space await this.debug.gameController.focusGame(); + await mkdir('items', { recursive: true }); + const tooltips: Array<{ row: number; col: number; label: string; text: string }> = []; + const ts = Date.now(); + const reg = result.layout.region; + const cellW = reg.width / result.layout.cols; + const cellH = reg.height / result.layout.rows; + + // Move mouse to empty space and take a single reference snapshot + this.debug.gameController.moveMouseInstant(reg.x + reg.width + 50, reg.y + reg.height / 2); + await sleep(50); + await this.debug.screenReader.snapshot(); + await this.debug.screenReader.saveScreenshot(`items/${ts}_snapshot.png`); + await sleep(200); // Let game settle before first hover + for (const cell of hoverCells) { - const center = result.layout.region; - const cellW = center.width / result.layout.cols; - const cellH = center.height / result.layout.rows; - const x = Math.round(center.x + cell.col * cellW + cellW / 2); - const y = Math.round(center.y + cell.row * cellH + cellH / 2); - this.broadcastLog('info', `Hovering ${cell.label} (${cell.row},${cell.col}) at (${x},${y})...`); - await this.debug.gameController.moveMouseTo(x, y); - await sleep(1000); + const cellStart = performance.now(); + const x = Math.round(reg.x + cell.col * cellW + cellW / 2); + const y = Math.round(reg.y + cell.row * cellH + cellH / 2); + + // Quick Bézier move to the cell — tooltip appears on hover + await this.debug.gameController.moveMouseFast(x, y); + await sleep(50); + const afterMove = performance.now(); + + // Diff-OCR: finds tooltip by row/column density of darkened pixels + const imgPath = `items/${ts}_${cell.row}-${cell.col}.png`; + const diff = await this.debug.screenReader.diffOcr(imgPath); + const afterOcr = performance.now(); + const text = diff.text.trim(); + + const regionInfo = diff.region ? ` at (${diff.region.x},${diff.region.y}) ${diff.region.width}x${diff.region.height}` : ''; + tooltips.push({ row: cell.row, col: cell.col, label: cell.label, text }); + + this.broadcastLog('info', + `${cell.label} (${cell.row},${cell.col}) [move: ${(afterMove - cellStart).toFixed(0)}ms, ocr: ${(afterOcr - afterMove).toFixed(0)}ms, total: ${(afterOcr - cellStart).toFixed(0)}ms]${regionInfo}: ${text.substring(0, 150)}${text.length > 150 ? '...' : ''}`); } - this.broadcastLog('info', `Done — hovered ${hoverCells.length} cells`); - res.json({ ok: true, itemSize, matchCount: matches.length, hoveredCount: hoverCells.length }); + this.broadcastLog('info', `Done — hovered ${hoverCells.length} cells, read ${tooltips.filter(t => t.text).length} tooltips`); + res.json({ ok: true, itemSize, matchCount: matches.length, hoveredCount: hoverCells.length, tooltips }); } catch (err) { logger.error({ err }, 'Debug test-match-hover failed'); res.status(500).json({ error: 'Test match hover failed' }); diff --git a/src/game/GameController.ts b/src/game/GameController.ts index c2da7ff..d785612 100644 --- a/src/game/GameController.ts +++ b/src/game/GameController.ts @@ -91,6 +91,14 @@ export class GameController { await this.inputSender.moveMouse(x, y); } + moveMouseInstant(x: number, y: number): void { + this.inputSender.moveMouseInstant(x, y); + } + + async moveMouseFast(x: number, y: number): Promise { + await this.inputSender.moveMouseFast(x, y); + } + async leftClickAt(x: number, y: number): Promise { await this.inputSender.leftClick(x, y); } @@ -99,6 +107,14 @@ export class GameController { await this.inputSender.rightClick(x, y); } + async holdAlt(): Promise { + await this.inputSender.keyDown(VK.MENU); + } + + async releaseAlt(): Promise { + await this.inputSender.keyUp(VK.MENU); + } + async pressEscape(): Promise { await this.inputSender.pressKey(VK.ESCAPE); } diff --git a/src/game/InputSender.ts b/src/game/InputSender.ts index 410ef26..abb1dd0 100644 --- a/src/game/InputSender.ts +++ b/src/game/InputSender.ts @@ -222,6 +222,46 @@ export class InputSender { await randomDelay(5, 15); } + moveMouseInstant(x: number, y: number): void { + this.moveMouseRaw(x, y); + } + + /** Quick Bézier move — ~10-15ms, 5 steps, no jitter. Fast but not a raw teleport. */ + async moveMouseFast(x: number, y: number): Promise { + const start = this.getCursorPos(); + const end: Point = { x, y }; + const dx = end.x - start.x; + const dy = end.y - start.y; + const distance = Math.sqrt(dx * dx + dy * dy); + + if (distance < 10) { + this.moveMouseRaw(x, y); + return; + } + + const perpX = -dy / distance; + const perpY = dx / distance; + const spread = distance * 0.15; + + const cp1: Point = { + x: start.x + dx * 0.3 + perpX * (Math.random() - 0.5) * spread, + y: start.y + dy * 0.3 + perpY * (Math.random() - 0.5) * spread, + }; + const cp2: Point = { + x: start.x + dx * 0.7 + perpX * (Math.random() - 0.5) * spread, + y: start.y + dy * 0.7 + perpY * (Math.random() - 0.5) * spread, + }; + + const steps = 5; + for (let i = 1; i <= steps; i++) { + const t = easeInOutQuad(i / steps); + const pt = cubicBezier(t, start, cp1, cp2, end); + this.moveMouseRaw(Math.round(pt.x), Math.round(pt.y)); + await sleep(2); + } + this.moveMouseRaw(x, y); + } + async leftClick(x: number, y: number): Promise { await this.moveMouse(x, y); await randomDelay(20, 50); diff --git a/src/game/OcrDaemon.ts b/src/game/OcrDaemon.ts index a1ff2d9..8e8ec10 100644 --- a/src/game/OcrDaemon.ts +++ b/src/game/OcrDaemon.ts @@ -43,6 +43,12 @@ export interface GridScanResult { matches?: GridMatch[]; } +export interface DiffOcrResponse { + text: string; + lines: OcrLine[]; + region?: Region; +} + export interface DetectGridResult { detected: boolean; region?: Region; @@ -151,6 +157,22 @@ export class OcrDaemon { }; } + async snapshot(): Promise { + await this.sendWithRetry({ cmd: 'snapshot' }, REQUEST_TIMEOUT); + } + + async diffOcr(savePath?: string, region?: Region): Promise { + const req: DaemonRequest = { cmd: 'diff-ocr' }; + if (savePath) req.path = savePath; + if (region) req.region = region; + const resp = await this.sendWithRetry(req, REQUEST_TIMEOUT); + return { + text: resp.text ?? '', + lines: resp.lines ?? [], + region: resp.region, + }; + } + async saveScreenshot(path: string, region?: Region): Promise { const req: DaemonRequest = { cmd: 'screenshot', path }; if (region) req.region = region; diff --git a/src/game/ScreenReader.ts b/src/game/ScreenReader.ts index be451e4..db57e2b 100644 --- a/src/game/ScreenReader.ts +++ b/src/game/ScreenReader.ts @@ -1,7 +1,7 @@ import { mkdir } from 'fs/promises'; import { join } from 'path'; import { logger } from '../util/logger.js'; -import { OcrDaemon, type OcrResponse } from './OcrDaemon.js'; +import { OcrDaemon, type OcrResponse, type DiffOcrResponse } from './OcrDaemon.js'; import { GridReader, type GridLayout, type CellCoord } from './GridReader.js'; import type { Region } from '../types.js'; @@ -102,6 +102,16 @@ export class ScreenReader { return pos !== null; } + // ── Snapshot / Diff-OCR (for tooltip reading) ────────────────────── + + async snapshot(): Promise { + await this.daemon.snapshot(); + } + + async diffOcr(savePath?: string, region?: Region): Promise { + return this.daemon.diffOcr(savePath, region); + } + // ── Save utilities ────────────────────────────────────────────────── async saveScreenshot(path: string): Promise { diff --git a/tools/OcrDaemon/OcrDaemon.csproj b/tools/OcrDaemon/OcrDaemon.csproj index 84087a0..a21aa76 100644 --- a/tools/OcrDaemon/OcrDaemon.csproj +++ b/tools/OcrDaemon/OcrDaemon.csproj @@ -9,6 +9,13 @@ + + + + + + PreserveNewest + diff --git a/tools/OcrDaemon/Program.cs b/tools/OcrDaemon/Program.cs index 281f382..cb8cc2d 100644 --- a/tools/OcrDaemon/Program.cs +++ b/tools/OcrDaemon/Program.cs @@ -4,18 +4,23 @@ using System.Runtime.InteropServices; using System.Text; using System.Text.Json; using System.Text.Json.Serialization; -using Windows.Graphics.Imaging; -using Windows.Media.Ocr; -using Windows.Storage.Streams; +using Tesseract; +using SdImageFormat = System.Drawing.Imaging.ImageFormat; // Make GDI capture DPI-aware so coordinates match physical pixels SetProcessDPIAware(); -// Pre-create the OCR engine (reused across all requests) -var ocrEngine = OcrEngine.TryCreateFromUserProfileLanguages(); -if (ocrEngine == null) +// Pre-create the Tesseract OCR engine (reused across all requests) +var tessdataPath = Path.Combine(AppContext.BaseDirectory, "tessdata"); +TesseractEngine tessEngine; +try { - WriteResponse(new ErrorResponse("Failed to create OCR engine. Ensure a language pack is installed.")); + tessEngine = new TesseractEngine(tessdataPath, "eng", EngineMode.LstmOnly); + tessEngine.DefaultPageSegMode = PageSegMode.Auto; +} +catch (Exception ex) +{ + WriteResponse(new ErrorResponse($"Failed to create Tesseract engine: {ex.Message}. Ensure tessdata/eng.traineddata exists.")); return 1; } @@ -49,7 +54,7 @@ while ((line = stdin.ReadLine()) != null) switch (request.Cmd?.ToLowerInvariant()) { case "ocr": - HandleOcr(request, ocrEngine); + HandleOcr(request, tessEngine); break; case "screenshot": HandleScreenshot(request); @@ -63,6 +68,12 @@ while ((line = stdin.ReadLine()) != null) case "detect-grid": HandleDetectGrid(request); break; + case "snapshot": + HandleSnapshot(request); + break; + case "diff-ocr": + HandleDiffOcr(request, tessEngine); + break; default: WriteResponse(new ErrorResponse($"Unknown command: {request.Cmd}")); break; @@ -78,31 +89,17 @@ return 0; // ── Handlers ──────────────────────────────────────────────────────────────── -void HandleOcr(Request req, OcrEngine engine) +Bitmap? referenceFrame = null; + +void HandleOcr(Request req, TesseractEngine engine) { using var bitmap = CaptureOrLoad(req.File, req.Region); - var softwareBitmap = BitmapToSoftwareBitmap(bitmap); - var result = engine.RecognizeAsync(softwareBitmap).AsTask().GetAwaiter().GetResult(); + using var pix = BitmapToPix(bitmap); + using var page = engine.Process(pix); - var lines = new List(); - foreach (var ocrLine in result.Lines) - { - var words = new List(); - foreach (var word in ocrLine.Words) - { - words.Add(new OcrWordResult - { - Text = word.Text, - X = (int)Math.Round(word.BoundingRect.X), - Y = (int)Math.Round(word.BoundingRect.Y), - Width = (int)Math.Round(word.BoundingRect.Width), - Height = (int)Math.Round(word.BoundingRect.Height), - }); - } - lines.Add(new OcrLineResult { Text = ocrLine.Text, Words = words }); - } - - WriteResponse(new OcrResponse { Text = result.Text, Lines = lines }); + var text = page.GetText(); + var lines = ExtractLinesFromPage(page, offsetX: 0, offsetY: 0); + WriteResponse(new OcrResponse { Text = text, Lines = lines }); } void HandleScreenshot(Request req) @@ -113,9 +110,15 @@ void HandleScreenshot(Request req) return; } - using var bitmap = CaptureOrLoad(req.File, req.Region); + // If a reference frame exists, save that (same image used for diff-ocr). + // Otherwise capture a new frame. + var bitmap = referenceFrame ?? CaptureOrLoad(req.File, req.Region); var format = GetImageFormat(req.Path); + var dir = System.IO.Path.GetDirectoryName(req.Path); + if (!string.IsNullOrEmpty(dir) && !System.IO.Directory.Exists(dir)) + System.IO.Directory.CreateDirectory(dir); bitmap.Save(req.Path, format); + if (bitmap != referenceFrame) bitmap.Dispose(); WriteResponse(new OkResponse()); } @@ -123,11 +126,253 @@ void HandleCapture(Request req) { using var bitmap = CaptureOrLoad(req.File, req.Region); using var ms = new MemoryStream(); - bitmap.Save(ms, ImageFormat.Png); + bitmap.Save(ms, SdImageFormat.Png); var base64 = Convert.ToBase64String(ms.ToArray()); WriteResponse(new CaptureResponse { Image = base64 }); } +// ── Snapshot / Diff-OCR ───────────────────────────────────────────────────── + +void HandleSnapshot(Request req) +{ + referenceFrame?.Dispose(); + referenceFrame = CaptureOrLoad(req.File, req.Region); + WriteResponse(new OkResponse()); +} + +void HandleDiffOcr(Request req, TesseractEngine engine) +{ + if (referenceFrame == null) + { + WriteResponse(new ErrorResponse("No reference snapshot stored. Send 'snapshot' first.")); + return; + } + + using var current = CaptureOrLoad(req.File, null); + + int w = Math.Min(referenceFrame.Width, current.Width); + int h = Math.Min(referenceFrame.Height, current.Height); + + // Get raw pixels for both frames + var refData = referenceFrame.LockBits(new Rectangle(0, 0, w, h), ImageLockMode.ReadOnly, PixelFormat.Format32bppArgb); + byte[] refPx = new byte[refData.Stride * h]; + Marshal.Copy(refData.Scan0, refPx, 0, refPx.Length); + referenceFrame.UnlockBits(refData); + int stride = refData.Stride; + + var curData = current.LockBits(new Rectangle(0, 0, w, h), ImageLockMode.ReadOnly, PixelFormat.Format32bppArgb); + byte[] curPx = new byte[curData.Stride * h]; + Marshal.Copy(curData.Scan0, curPx, 0, curPx.Length); + current.UnlockBits(curData); + + // Detect pixels that got DARKER (tooltip = dark overlay). + // This filters out item highlight glow (brighter) and cursor changes. + int diffThresh = req.Threshold > 0 ? req.Threshold : 30; + bool[] changed = new bool[w * h]; + int totalChanged = 0; + + for (int y = 0; y < h; y++) + { + for (int x = 0; x < w; x++) + { + int i = y * stride + x * 4; + int darkerB = refPx[i] - curPx[i]; + int darkerG = refPx[i + 1] - curPx[i + 1]; + int darkerR = refPx[i + 2] - curPx[i + 2]; + if (darkerB + darkerG + darkerR > diffThresh) + { + changed[y * w + x] = true; + totalChanged++; + } + } + } + + bool debug = req.Debug; + + if (totalChanged == 0) + { + if (debug) Console.Error.WriteLine(" diff-ocr: no changes detected"); + WriteResponse(new OcrResponse { Text = "", Lines = [] }); + return; + } + + // Two-pass density detection: + // Pass 1: Find row range using full-width row counts + // Pass 2: Find column range using only pixels within detected row range + // This makes the column threshold relative to tooltip height, not screen height. + int maxGap = 15; + + // Pass 1: count changed pixels per row, find longest active run + int[] rowCounts = new int[h]; + for (int y = 0; y < h; y++) + for (int x = 0; x < w; x++) + if (changed[y * w + x]) + rowCounts[y]++; + + int rowThresh = w / 30; // ~3% of width + int bestRowStart = 0, bestRowEnd = 0, bestRowLen = 0; + int curRowStart = -1, lastActiveRow = -1; + for (int y = 0; y < h; y++) + { + if (rowCounts[y] >= rowThresh) + { + if (curRowStart < 0) curRowStart = y; + lastActiveRow = y; + } + else if (curRowStart >= 0 && y - lastActiveRow > maxGap) + { + int len = lastActiveRow - curRowStart + 1; + if (len > bestRowLen) { bestRowStart = curRowStart; bestRowEnd = lastActiveRow; bestRowLen = len; } + curRowStart = -1; + } + } + if (curRowStart >= 0) + { + int len = lastActiveRow - curRowStart + 1; + if (len > bestRowLen) { bestRowStart = curRowStart; bestRowEnd = lastActiveRow; bestRowLen = len; } + } + + // Pass 2: count changed pixels per column, but only within the detected row range + int[] colCounts = new int[w]; + for (int y = bestRowStart; y <= bestRowEnd; y++) + for (int x = 0; x < w; x++) + if (changed[y * w + x]) + colCounts[x]++; + + int tooltipHeight = bestRowEnd - bestRowStart + 1; + int colThresh = tooltipHeight / 15; // ~7% of tooltip height + + int bestColStart = 0, bestColEnd = 0, bestColLen = 0; + int curColStart = -1, lastActiveCol = -1; + for (int x = 0; x < w; x++) + { + if (colCounts[x] >= colThresh) + { + if (curColStart < 0) curColStart = x; + lastActiveCol = x; + } + else if (curColStart >= 0 && x - lastActiveCol > maxGap) + { + int len = lastActiveCol - curColStart + 1; + if (len > bestColLen) { bestColStart = curColStart; bestColEnd = lastActiveCol; bestColLen = len; } + curColStart = -1; + } + } + if (curColStart >= 0) + { + int len = lastActiveCol - curColStart + 1; + if (len > bestColLen) { bestColStart = curColStart; bestColEnd = lastActiveCol; bestColLen = len; } + } + + // Log density detection results + Console.Error.WriteLine($" diff-ocr: changed={totalChanged} rows={bestRowStart}-{bestRowEnd}({bestRowLen}) cols={bestColStart}-{bestColEnd}({bestColLen}) rowThresh={rowThresh} colThresh={colThresh}"); + + if (bestRowLen < 50 || bestColLen < 50) + { + Console.Error.WriteLine($" diff-ocr: no tooltip-sized region found (rows={bestRowLen}, cols={bestColLen})"); + WriteResponse(new OcrResponse { Text = "", Lines = [] }); + return; + } + + int pad = 0; + int minX = Math.Max(bestColStart - pad, 0); + int minY = Math.Max(bestRowStart - pad, 0); + int maxX = Math.Min(bestColEnd + pad, w - 1); + int maxY = Math.Min(bestRowEnd + pad, h - 1); + + // Trim 5px from left/right/bottom to remove tooltip border/shadow artifacts + int trim = 5; + minX = Math.Min(minX + trim, maxX); + maxX = Math.Max(maxX - trim, minX); + maxY = Math.Max(maxY - trim, minY); + int rw = maxX - minX + 1; + int rh = maxY - minY + 1; + + if (debug) Console.Error.WriteLine($" diff-ocr: tooltip region ({minX},{minY}) {rw}x{rh}"); + + // Crop the current frame to the diff bounding box + using var cropped = current.Clone(new Rectangle(minX, minY, rw, rh), PixelFormat.Format32bppArgb); + + // Save raw tooltip image if path is provided + if (!string.IsNullOrEmpty(req.Path)) + { + var dir = System.IO.Path.GetDirectoryName(req.Path); + if (!string.IsNullOrEmpty(dir) && !System.IO.Directory.Exists(dir)) + System.IO.Directory.CreateDirectory(dir); + cropped.Save(req.Path, GetImageFormat(req.Path)); + if (debug) Console.Error.WriteLine($" diff-ocr: saved tooltip to {req.Path}"); + } + + // Pre-process for OCR: scale up 2x, boost contrast, invert colors + using var processed = PreprocessForOcr(cropped); + using var pix = BitmapToPix(processed); + using var page = engine.Process(pix); + + var text = page.GetText(); + var lines = ExtractLinesFromPage(page, offsetX: minX, offsetY: minY); + + WriteResponse(new DiffOcrResponse + { + Text = text, + Lines = lines, + Region = new RegionRect { X = minX, Y = minY, Width = rw, Height = rh }, + }); +} + +/// Pre-process an image for better OCR: boost contrast and invert colors. +/// No upscaling — tooltip text is large enough at native resolution. +Bitmap PreprocessForOcr(Bitmap src) +{ + int dw = src.Width, dh = src.Height; + var scaled = (Bitmap)src.Clone(); + + // Boost contrast: find min/max brightness, stretch to full 0-255 range + var data = scaled.LockBits(new Rectangle(0, 0, dw, dh), ImageLockMode.ReadWrite, PixelFormat.Format32bppArgb); + byte[] px = new byte[data.Stride * dh]; + Marshal.Copy(data.Scan0, px, 0, px.Length); + int stride = data.Stride; + + // Find 5th and 95th percentile brightness for robust stretching + int[] histogram = new int[256]; + for (int y = 0; y < dh; y++) + for (int x = 0; x < dw; x++) + { + int i = y * stride + x * 4; + int bright = Math.Max(px[i], Math.Max(px[i + 1], px[i + 2])); + histogram[bright]++; + } + + int totalPixels = dw * dh; + int lo = 0, hi = 255; + int cumLo = 0, cumHi = 0; + for (int b = 0; b < 256; b++) + { + cumLo += histogram[b]; + if (cumLo >= totalPixels * 0.05) { lo = b; break; } + } + for (int b = 255; b >= 0; b--) + { + cumHi += histogram[b]; + if (cumHi >= totalPixels * 0.05) { hi = b; break; } + } + if (hi <= lo) hi = lo + 1; + double scale = 255.0 / (hi - lo); + + // Stretch contrast and invert colors (light text on dark → dark text on light for Tesseract) + for (int y = 0; y < dh; y++) + for (int x = 0; x < dw; x++) + { + int i = y * stride + x * 4; + px[i] = (byte)(255 - Math.Clamp((int)((px[i] - lo) * scale), 0, 255)); + px[i + 1] = (byte)(255 - Math.Clamp((int)((px[i + 1] - lo) * scale), 0, 255)); + px[i + 2] = (byte)(255 - Math.Clamp((int)((px[i + 2] - lo) * scale), 0, 255)); + } + + Marshal.Copy(px, 0, data.Scan0, px.Length); + scaled.UnlockBits(data); + return scaled; +} + // Pre-loaded empty cell templates (loaded lazily on first grid scan) // Stored as both grayscale (for occupied detection) and ARGB (for item border detection) byte[]? emptyTemplate70Gray = null; @@ -929,18 +1174,54 @@ Bitmap CaptureScreen(RegionRect? region) return bitmap; } -// ── Bitmap → SoftwareBitmap conversion (in-memory) ───────────────────────── +// ── Bitmap → Tesseract Pix conversion (in-memory) ────────────────────────── -SoftwareBitmap BitmapToSoftwareBitmap(Bitmap bitmap) +Pix BitmapToPix(Bitmap bitmap) { using var ms = new MemoryStream(); - bitmap.Save(ms, ImageFormat.Bmp); - ms.Position = 0; + bitmap.Save(ms, SdImageFormat.Png); + return Pix.LoadFromMemory(ms.ToArray()); +} - var stream = ms.AsRandomAccessStream(); - var decoder = BitmapDecoder.CreateAsync(stream).AsTask().GetAwaiter().GetResult(); - var softwareBitmap = decoder.GetSoftwareBitmapAsync().AsTask().GetAwaiter().GetResult(); - return softwareBitmap; +// ── Extract lines/words from Tesseract page result ────────────────────────── + +List ExtractLinesFromPage(Page page, int offsetX, int offsetY) +{ + var lines = new List(); + using var iter = page.GetIterator(); + if (iter == null) return lines; + + iter.Begin(); + + do + { + var words = new List(); + do + { + var wordText = iter.GetText(PageIteratorLevel.Word); + if (string.IsNullOrWhiteSpace(wordText)) continue; + + if (iter.TryGetBoundingBox(PageIteratorLevel.Word, out var bounds)) + { + words.Add(new OcrWordResult + { + Text = wordText.Trim(), + X = bounds.X1 + offsetX, + Y = bounds.Y1 + offsetY, + Width = bounds.Width, + Height = bounds.Height, + }); + } + } while (iter.Next(PageIteratorLevel.TextLine, PageIteratorLevel.Word)); + + if (words.Count > 0) + { + var lineText = string.Join(" ", words.Select(w => w.Text)); + lines.Add(new OcrLineResult { Text = lineText, Words = words }); + } + } while (iter.Next(PageIteratorLevel.Block, PageIteratorLevel.TextLine)); + + return lines; } // ── Response writing ──────────────────────────────────────────────────────── @@ -952,14 +1233,14 @@ void WriteResponse(object response) Console.Out.Flush(); } -ImageFormat GetImageFormat(string path) +SdImageFormat GetImageFormat(string path) { var ext = Path.GetExtension(path).ToLowerInvariant(); return ext switch { - ".jpg" or ".jpeg" => ImageFormat.Jpeg, - ".bmp" => ImageFormat.Bmp, - _ => ImageFormat.Png, + ".jpg" or ".jpeg" => SdImageFormat.Jpeg, + ".bmp" => SdImageFormat.Bmp, + _ => SdImageFormat.Png, }; } @@ -1063,6 +1344,21 @@ class OcrResponse public List Lines { get; set; } = []; } +class DiffOcrResponse +{ + [JsonPropertyName("ok")] + public bool Ok => true; + + [JsonPropertyName("text")] + public string Text { get; set; } = ""; + + [JsonPropertyName("lines")] + public List Lines { get; set; } = []; + + [JsonPropertyName("region")] + public RegionRect? Region { get; set; } +} + class OcrLineResult { [JsonPropertyName("text")]