using System.Drawing; using System.Drawing.Imaging; using System.Runtime.InteropServices; using System.Text; using System.Text.Json; using System.Text.Json.Serialization; using Tesseract; using SdImageFormat = System.Drawing.Imaging.ImageFormat; // Make GDI capture DPI-aware so coordinates match physical pixels SetProcessDPIAware(); // Pre-create the Tesseract OCR engine (reused across all requests) var tessdataPath = Path.Combine(AppContext.BaseDirectory, "tessdata"); var tessLang = File.Exists(Path.Combine(tessdataPath, "poe2.traineddata")) ? "poe2" : "eng"; TesseractEngine tessEngine; try { tessEngine = new TesseractEngine(tessdataPath, tessLang, EngineMode.LstmOnly); tessEngine.DefaultPageSegMode = PageSegMode.Auto; Console.Error.WriteLine($"Tesseract engine loaded with language: {tessLang}"); } catch (Exception ex) { WriteResponse(new ErrorResponse($"Failed to create Tesseract engine: {ex.Message}. Ensure tessdata/eng.traineddata exists.")); return 1; } // Signal ready WriteResponse(new ReadyResponse()); // JSON options var jsonOptions = new JsonSerializerOptions { PropertyNamingPolicy = JsonNamingPolicy.CamelCase, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull, }; // Main loop: read one JSON line, handle, write one JSON line var stdin = Console.In; string? line; while ((line = stdin.ReadLine()) != null) { line = line.Trim(); if (line.Length == 0) continue; try { var request = JsonSerializer.Deserialize(line, jsonOptions); if (request == null) { WriteResponse(new ErrorResponse("Failed to parse request")); continue; } switch (request.Cmd?.ToLowerInvariant()) { case "ocr": HandleOcr(request, tessEngine); break; case "screenshot": HandleScreenshot(request); break; case "capture": HandleCapture(request); break; case "grid": HandleGrid(request); break; case "detect-grid": HandleDetectGrid(request); break; case "snapshot": HandleSnapshot(request); break; case "diff-ocr": HandleDiffOcr(request, tessEngine); break; default: WriteResponse(new ErrorResponse($"Unknown command: {request.Cmd}")); break; } } catch (Exception ex) { WriteResponse(new ErrorResponse(ex.Message)); } } return 0; // ── Handlers ──────────────────────────────────────────────────────────────── Bitmap? referenceFrame = null; void HandleOcr(Request req, TesseractEngine engine) { using var bitmap = CaptureOrLoad(req.File, req.Region); using var pix = BitmapToPix(bitmap); using var page = engine.Process(pix); var text = page.GetText(); var lines = ExtractLinesFromPage(page, offsetX: 0, offsetY: 0); WriteResponse(new OcrResponse { Text = text, Lines = lines }); } void HandleScreenshot(Request req) { if (string.IsNullOrEmpty(req.Path)) { WriteResponse(new ErrorResponse("screenshot command requires 'path'")); return; } // If a reference frame exists, save that (same image used for diff-ocr). // Otherwise capture a new frame. var bitmap = referenceFrame ?? CaptureOrLoad(req.File, req.Region); var format = GetImageFormat(req.Path); var dir = System.IO.Path.GetDirectoryName(req.Path); if (!string.IsNullOrEmpty(dir) && !System.IO.Directory.Exists(dir)) System.IO.Directory.CreateDirectory(dir); bitmap.Save(req.Path, format); if (bitmap != referenceFrame) bitmap.Dispose(); WriteResponse(new OkResponse()); } void HandleCapture(Request req) { using var bitmap = CaptureOrLoad(req.File, req.Region); using var ms = new MemoryStream(); bitmap.Save(ms, SdImageFormat.Png); var base64 = Convert.ToBase64String(ms.ToArray()); WriteResponse(new CaptureResponse { Image = base64 }); } // ── Snapshot / Diff-OCR ───────────────────────────────────────────────────── void HandleSnapshot(Request req) { referenceFrame?.Dispose(); referenceFrame = CaptureOrLoad(req.File, req.Region); WriteResponse(new OkResponse()); } void HandleDiffOcr(Request req, TesseractEngine engine) { if (referenceFrame == null) { WriteResponse(new ErrorResponse("No reference snapshot stored. Send 'snapshot' first.")); return; } using var current = CaptureOrLoad(req.File, null); int w = Math.Min(referenceFrame.Width, current.Width); int h = Math.Min(referenceFrame.Height, current.Height); // Get raw pixels for both frames var refData = referenceFrame.LockBits(new Rectangle(0, 0, w, h), ImageLockMode.ReadOnly, PixelFormat.Format32bppArgb); byte[] refPx = new byte[refData.Stride * h]; Marshal.Copy(refData.Scan0, refPx, 0, refPx.Length); referenceFrame.UnlockBits(refData); int stride = refData.Stride; var curData = current.LockBits(new Rectangle(0, 0, w, h), ImageLockMode.ReadOnly, PixelFormat.Format32bppArgb); byte[] curPx = new byte[curData.Stride * h]; Marshal.Copy(curData.Scan0, curPx, 0, curPx.Length); current.UnlockBits(curData); // Detect pixels that got DARKER (tooltip = dark overlay). // This filters out item highlight glow (brighter) and cursor changes. int diffThresh = req.Threshold > 0 ? req.Threshold : 30; bool[] changed = new bool[w * h]; int totalChanged = 0; for (int y = 0; y < h; y++) { for (int x = 0; x < w; x++) { int i = y * stride + x * 4; int darkerB = refPx[i] - curPx[i]; int darkerG = refPx[i + 1] - curPx[i + 1]; int darkerR = refPx[i + 2] - curPx[i + 2]; if (darkerB + darkerG + darkerR > diffThresh) { changed[y * w + x] = true; totalChanged++; } } } bool debug = req.Debug; if (totalChanged == 0) { if (debug) Console.Error.WriteLine(" diff-ocr: no changes detected"); WriteResponse(new OcrResponse { Text = "", Lines = [] }); return; } // Two-pass density detection: // Pass 1: Find row range using full-width row counts // Pass 2: Find column range using only pixels within detected row range // This makes the column threshold relative to tooltip height, not screen height. int maxGap = 15; // Pass 1: count changed pixels per row, find longest active run int[] rowCounts = new int[h]; for (int y = 0; y < h; y++) for (int x = 0; x < w; x++) if (changed[y * w + x]) rowCounts[y]++; int rowThresh = w / 30; // ~3% of width int bestRowStart = 0, bestRowEnd = 0, bestRowLen = 0; int curRowStart = -1, lastActiveRow = -1; for (int y = 0; y < h; y++) { if (rowCounts[y] >= rowThresh) { if (curRowStart < 0) curRowStart = y; lastActiveRow = y; } else if (curRowStart >= 0 && y - lastActiveRow > maxGap) { int len = lastActiveRow - curRowStart + 1; if (len > bestRowLen) { bestRowStart = curRowStart; bestRowEnd = lastActiveRow; bestRowLen = len; } curRowStart = -1; } } if (curRowStart >= 0) { int len = lastActiveRow - curRowStart + 1; if (len > bestRowLen) { bestRowStart = curRowStart; bestRowEnd = lastActiveRow; bestRowLen = len; } } // Pass 2: count changed pixels per column, but only within the detected row range int[] colCounts = new int[w]; for (int y = bestRowStart; y <= bestRowEnd; y++) for (int x = 0; x < w; x++) if (changed[y * w + x]) colCounts[x]++; int tooltipHeight = bestRowEnd - bestRowStart + 1; int colThresh = tooltipHeight / 15; // ~7% of tooltip height int bestColStart = 0, bestColEnd = 0, bestColLen = 0; int curColStart = -1, lastActiveCol = -1; for (int x = 0; x < w; x++) { if (colCounts[x] >= colThresh) { if (curColStart < 0) curColStart = x; lastActiveCol = x; } else if (curColStart >= 0 && x - lastActiveCol > maxGap) { int len = lastActiveCol - curColStart + 1; if (len > bestColLen) { bestColStart = curColStart; bestColEnd = lastActiveCol; bestColLen = len; } curColStart = -1; } } if (curColStart >= 0) { int len = lastActiveCol - curColStart + 1; if (len > bestColLen) { bestColStart = curColStart; bestColEnd = lastActiveCol; bestColLen = len; } } // Log density detection results Console.Error.WriteLine($" diff-ocr: changed={totalChanged} rows={bestRowStart}-{bestRowEnd}({bestRowLen}) cols={bestColStart}-{bestColEnd}({bestColLen}) rowThresh={rowThresh} colThresh={colThresh}"); if (bestRowLen < 50 || bestColLen < 50) { Console.Error.WriteLine($" diff-ocr: no tooltip-sized region found (rows={bestRowLen}, cols={bestColLen})"); WriteResponse(new OcrResponse { Text = "", Lines = [] }); return; } int pad = 0; int minX = Math.Max(bestColStart - pad, 0); int minY = Math.Max(bestRowStart - pad, 0); int maxX = Math.Min(bestColEnd + pad, w - 1); int maxY = Math.Min(bestRowEnd + pad, h - 1); // Dynamic right-edge trim: if the rightmost columns are much sparser than // the tooltip body, trim them. This handles the ~5% of cases where ambient // noise extends the detected region slightly on the right. int colSpan = maxX - minX + 1; if (colSpan > 100) { // Compute median column density in the middle 50% of the range int q1 = minX + colSpan / 4; int q3 = minX + colSpan * 3 / 4; long midSum = 0; int midCount = 0; for (int x = q1; x <= q3; x++) { midSum += colCounts[x]; midCount++; } double avgMidDensity = (double)midSum / midCount; double cutoff = avgMidDensity * 0.3; // column must have >=30% of avg density // Trim from right while below cutoff while (maxX > minX + 100 && colCounts[maxX] < cutoff) maxX--; } int rw = maxX - minX + 1; int rh = maxY - minY + 1; if (debug) Console.Error.WriteLine($" diff-ocr: tooltip region ({minX},{minY}) {rw}x{rh}"); // Crop the current frame to the diff bounding box using var cropped = current.Clone(new Rectangle(minX, minY, rw, rh), PixelFormat.Format32bppArgb); // Save raw tooltip image if path is provided if (!string.IsNullOrEmpty(req.Path)) { var dir = System.IO.Path.GetDirectoryName(req.Path); if (!string.IsNullOrEmpty(dir) && !System.IO.Directory.Exists(dir)) System.IO.Directory.CreateDirectory(dir); cropped.Save(req.Path, GetImageFormat(req.Path)); if (debug) Console.Error.WriteLine($" diff-ocr: saved tooltip to {req.Path}"); } // Pre-process for OCR: scale up 2x, boost contrast, invert colors using var processed = PreprocessForOcr(cropped); using var pix = BitmapToPix(processed); using var page = engine.Process(pix); var text = page.GetText(); var lines = ExtractLinesFromPage(page, offsetX: minX, offsetY: minY); WriteResponse(new DiffOcrResponse { Text = text, Lines = lines, Region = new RegionRect { X = minX, Y = minY, Width = rw, Height = rh }, }); } /// Pre-process an image for better OCR: boost contrast and invert colors. /// No upscaling — tooltip text is large enough at native resolution. Bitmap PreprocessForOcr(Bitmap src) { int dw = src.Width, dh = src.Height; var scaled = (Bitmap)src.Clone(); // Boost contrast: find min/max brightness, stretch to full 0-255 range var data = scaled.LockBits(new Rectangle(0, 0, dw, dh), ImageLockMode.ReadWrite, PixelFormat.Format32bppArgb); byte[] px = new byte[data.Stride * dh]; Marshal.Copy(data.Scan0, px, 0, px.Length); int stride = data.Stride; // Find 5th and 95th percentile brightness for robust stretching int[] histogram = new int[256]; for (int y = 0; y < dh; y++) for (int x = 0; x < dw; x++) { int i = y * stride + x * 4; int bright = Math.Max(px[i], Math.Max(px[i + 1], px[i + 2])); histogram[bright]++; } int totalPixels = dw * dh; int lo = 0, hi = 255; int cumLo = 0, cumHi = 0; for (int b = 0; b < 256; b++) { cumLo += histogram[b]; if (cumLo >= totalPixels * 0.05) { lo = b; break; } } for (int b = 255; b >= 0; b--) { cumHi += histogram[b]; if (cumHi >= totalPixels * 0.05) { hi = b; break; } } if (hi <= lo) hi = lo + 1; double scale = 255.0 / (hi - lo); // Stretch contrast and invert colors (light text on dark → dark text on light for Tesseract) for (int y = 0; y < dh; y++) for (int x = 0; x < dw; x++) { int i = y * stride + x * 4; px[i] = (byte)(255 - Math.Clamp((int)((px[i] - lo) * scale), 0, 255)); px[i + 1] = (byte)(255 - Math.Clamp((int)((px[i + 1] - lo) * scale), 0, 255)); px[i + 2] = (byte)(255 - Math.Clamp((int)((px[i + 2] - lo) * scale), 0, 255)); } Marshal.Copy(px, 0, data.Scan0, px.Length); scaled.UnlockBits(data); return scaled; } // Pre-loaded empty cell templates (loaded lazily on first grid scan) // Stored as both grayscale (for occupied detection) and ARGB (for item border detection) byte[]? emptyTemplate70Gray = null; byte[]? emptyTemplate70Argb = null; int emptyTemplate70W = 0, emptyTemplate70H = 0, emptyTemplate70Stride = 0; byte[]? emptyTemplate35Gray = null; byte[]? emptyTemplate35Argb = null; int emptyTemplate35W = 0, emptyTemplate35H = 0, emptyTemplate35Stride = 0; void LoadTemplatesIfNeeded() { if (emptyTemplate70Gray != null) return; // Look for templates relative to exe directory var exeDir = AppContext.BaseDirectory; // Templates are in assets/ at project root — walk up from bin/Release/net8.0-.../ var projectRoot = System.IO.Path.GetFullPath(System.IO.Path.Combine(exeDir, "..", "..", "..", "..", "..")); var t70Path = System.IO.Path.Combine(projectRoot, "assets", "empty70.png"); var t35Path = System.IO.Path.Combine(projectRoot, "assets", "empty35.png"); if (System.IO.File.Exists(t70Path)) { using var bmp = new Bitmap(t70Path); emptyTemplate70W = bmp.Width; emptyTemplate70H = bmp.Height; (emptyTemplate70Gray, emptyTemplate70Argb, emptyTemplate70Stride) = BitmapToGrayAndArgb(bmp); } if (System.IO.File.Exists(t35Path)) { using var bmp = new Bitmap(t35Path); emptyTemplate35W = bmp.Width; emptyTemplate35H = bmp.Height; (emptyTemplate35Gray, emptyTemplate35Argb, emptyTemplate35Stride) = BitmapToGrayAndArgb(bmp); } } (byte[] gray, byte[] argb, int stride) BitmapToGrayAndArgb(Bitmap bmp) { int w = bmp.Width, h = bmp.Height; var data = bmp.LockBits(new Rectangle(0, 0, w, h), ImageLockMode.ReadOnly, PixelFormat.Format32bppArgb); byte[] argb = new byte[data.Stride * h]; Marshal.Copy(data.Scan0, argb, 0, argb.Length); bmp.UnlockBits(data); int stride = data.Stride; byte[] gray = new byte[w * h]; for (int y = 0; y < h; y++) for (int x = 0; x < w; x++) { int i = y * stride + x * 4; gray[y * w + x] = (byte)((argb[i] + argb[i + 1] + argb[i + 2]) / 3); } return (gray, argb, stride); } byte[] BitmapToGray(Bitmap bmp) { var (gray, _, _) = BitmapToGrayAndArgb(bmp); return gray; } void HandleGrid(Request req) { if (req.Region == null || req.Cols <= 0 || req.Rows <= 0) { WriteResponse(new ErrorResponse("grid command requires region, cols, rows")); return; } LoadTemplatesIfNeeded(); using var bitmap = CaptureOrLoad(req.File, req.Region); int cols = req.Cols; int rows = req.Rows; float cellW = (float)bitmap.Width / cols; float cellH = (float)bitmap.Height / rows; // Pick the right empty template based on cell size int nominalCell = (int)Math.Round(cellW); byte[]? templateGray; byte[]? templateArgb; int templateW, templateH, templateStride; if (nominalCell <= 40 && emptyTemplate35Gray != null) { templateGray = emptyTemplate35Gray; templateArgb = emptyTemplate35Argb!; templateW = emptyTemplate35W; templateH = emptyTemplate35H; templateStride = emptyTemplate35Stride; } else if (emptyTemplate70Gray != null) { templateGray = emptyTemplate70Gray; templateArgb = emptyTemplate70Argb!; templateW = emptyTemplate70W; templateH = emptyTemplate70H; templateStride = emptyTemplate70Stride; } else { WriteResponse(new ErrorResponse("Empty cell templates not found in assets/")); return; } // Convert captured bitmap to grayscale + keep ARGB for border color comparison var (captureGray, captureArgb, captureStride) = BitmapToGrayAndArgb(bitmap); int captureW = bitmap.Width; // Border to skip (outer pixels may differ between cells) int border = Math.Max(2, nominalCell / 10); // Pre-compute template average for the inner region long templateSum = 0; int innerCount = 0; for (int ty = border; ty < templateH - border; ty++) for (int tx = border; tx < templateW - border; tx++) { templateSum += templateGray[ty * templateW + tx]; innerCount++; } // Threshold for mean absolute difference — default 6 double diffThreshold = req.Threshold > 0 ? req.Threshold : 2; bool debug = req.Debug; if (debug) Console.Error.WriteLine($"Grid: {cols}x{rows}, cellW={cellW:F1}, cellH={cellH:F1}, border={border}, threshold={diffThreshold}"); var cells = new List>(); for (int row = 0; row < rows; row++) { var rowList = new List(); var debugDiffs = new List(); for (int col = 0; col < cols; col++) { int cx0 = (int)(col * cellW); int cy0 = (int)(row * cellH); int cw = (int)Math.Min(cellW, captureW - cx0); int ch = (int)Math.Min(cellH, bitmap.Height - cy0); // Compare inner pixels of cell vs template long diffSum = 0; int compared = 0; int innerW = Math.Min(cw, templateW) - border; int innerH = Math.Min(ch, templateH) - border; for (int py = border; py < innerH; py++) { for (int px = border; px < innerW; px++) { int cellVal = captureGray[(cy0 + py) * captureW + (cx0 + px)]; int tmplVal = templateGray[py * templateW + px]; diffSum += Math.Abs(cellVal - tmplVal); compared++; } } double meanDiff = compared > 0 ? (double)diffSum / compared : 0; bool occupied = meanDiff > diffThreshold; rowList.Add(occupied); if (debug) debugDiffs.Add($"{meanDiff,5:F1}{(occupied ? "*" : " ")}"); } cells.Add(rowList); if (debug) Console.Error.WriteLine($" Row {row,2}: {string.Join(" ", debugDiffs)}"); } // ── Item detection: compare border pixels to empty template (grayscale) ── // Items have a colored tint behind them that shows through grid lines. // Compare each cell's border strip against the template's border pixels. // If they differ → item tint present → cells belong to same item. int[] parent = new int[rows * cols]; for (int i = 0; i < parent.Length; i++) parent[i] = i; int Find(int x) { while (parent[x] != x) { parent[x] = parent[parent[x]]; x = parent[x]; } return x; } void Union(int a, int b) { parent[Find(a)] = Find(b); } int stripWidth = Math.Max(2, border / 2); int stripInset = (int)(cellW * 0.15); double borderDiffThresh = 15.0; for (int row = 0; row < rows; row++) { for (int col = 0; col < cols; col++) { if (!cells[row][col]) continue; int cx0 = (int)(col * cellW); int cy0 = (int)(row * cellH); // Check right neighbor if (col + 1 < cols && cells[row][col + 1]) { long diffSum = 0; int cnt = 0; int xStart = (int)((col + 1) * cellW) - stripWidth; int yFrom = cy0 + stripInset; int yTo = (int)((row + 1) * cellH) - stripInset; for (int sy = yFrom; sy < yTo; sy += 2) { int tmplY = sy - cy0; for (int sx = xStart; sx < xStart + stripWidth * 2; sx++) { if (sx < 0 || sx >= captureW) continue; int tmplX = sx - cx0; if (tmplX < 0 || tmplX >= templateW) continue; diffSum += Math.Abs(captureGray[sy * captureW + sx] - templateGray[tmplY * templateW + tmplX]); cnt++; } } double meanDiff = cnt > 0 ? (double)diffSum / cnt : 0; if (debug) Console.Error.WriteLine($" H ({row},{col})->({row},{col+1}): {meanDiff:F1}{(meanDiff > borderDiffThresh ? " SAME" : "")}"); if (meanDiff > borderDiffThresh) Union(row * cols + col, row * cols + col + 1); } // Check bottom neighbor if (row + 1 < rows && cells[row + 1][col]) { long diffSum = 0; int cnt = 0; int yStart = (int)((row + 1) * cellH) - stripWidth; int xFrom = cx0 + stripInset; int xTo = (int)((col + 1) * cellW) - stripInset; for (int sx = xFrom; sx < xTo; sx += 2) { int tmplX = sx - cx0; for (int sy = yStart; sy < yStart + stripWidth * 2; sy++) { if (sy < 0 || sy >= bitmap.Height) continue; int tmplY = sy - cy0; if (tmplY < 0 || tmplY >= templateH) continue; diffSum += Math.Abs(captureGray[sy * captureW + sx] - templateGray[tmplY * templateW + tmplX]); cnt++; } } double meanDiff = cnt > 0 ? (double)diffSum / cnt : 0; if (debug) Console.Error.WriteLine($" V ({row},{col})->({row+1},{col}): {meanDiff:F1}{(meanDiff > borderDiffThresh ? " SAME" : "")}"); if (meanDiff > borderDiffThresh) Union(row * cols + col, (row + 1) * cols + col); } } } // Extract items from union-find groups var groups = new Dictionary>(); for (int row = 0; row < rows; row++) for (int col = 0; col < cols; col++) if (cells[row][col]) { int root = Find(row * cols + col); if (!groups.ContainsKey(root)) groups[root] = []; groups[root].Add((row, col)); } var items = new List(); foreach (var group in groups.Values) { int minR = group.Min(c => c.row); int maxR = group.Max(c => c.row); int minC = group.Min(c => c.col); int maxC = group.Max(c => c.col); items.Add(new GridItem { Row = minR, Col = minC, W = maxC - minC + 1, H = maxR - minR + 1 }); } if (debug) { Console.Error.WriteLine($" Items found: {items.Count}"); foreach (var item in items) Console.Error.WriteLine($" ({item.Row},{item.Col}) {item.W}x{item.H}"); } // ── Visual matching: find cells similar to target ── List? matches = null; if (req.TargetRow >= 0 && req.TargetCol >= 0 && req.TargetRow < rows && req.TargetCol < cols && cells[req.TargetRow][req.TargetCol]) { matches = FindMatchingCells( captureGray, captureW, bitmap.Height, cells, rows, cols, cellW, cellH, border, req.TargetRow, req.TargetCol, debug); } WriteResponse(new GridResponse { Cells = cells, Items = items, Matches = matches }); } /// Find all occupied cells visually similar to the target cell using full-resolution NCC. /// Full resolution gives better discrimination — sockets are a small fraction of total pixels. List FindMatchingCells( byte[] gray, int imgW, int imgH, List> cells, int rows, int cols, float cellW, float cellH, int border, int targetRow, int targetCol, bool debug) { int innerW = (int)cellW - border * 2; int innerH = (int)cellH - border * 2; if (innerW <= 4 || innerH <= 4) return []; int tCx0 = (int)(targetCol * cellW) + border; int tCy0 = (int)(targetRow * cellH) + border; int tInnerW = Math.Min(innerW, imgW - tCx0); int tInnerH = Math.Min(innerH, imgH - tCy0); if (tInnerW < innerW || tInnerH < innerH) return []; int n = innerW * innerH; // Pre-compute target cell pixels and stats double[] targetPixels = new double[n]; double tMean = 0; for (int py = 0; py < innerH; py++) for (int px = 0; px < innerW; px++) { double v = gray[(tCy0 + py) * imgW + (tCx0 + px)]; targetPixels[py * innerW + px] = v; tMean += v; } tMean /= n; double tStd = 0; for (int i = 0; i < n; i++) tStd += (targetPixels[i] - tMean) * (targetPixels[i] - tMean); tStd = Math.Sqrt(tStd / n); if (debug) Console.Error.WriteLine($" Match target ({targetRow},{targetCol}): {innerW}x{innerH} ({n}px), mean={tMean:F1}, std={tStd:F1}"); if (tStd < 3.0) return []; double matchThreshold = 0.70; var matches = new List(); for (int row = 0; row < rows; row++) { for (int col = 0; col < cols; col++) { if (!cells[row][col]) continue; if (row == targetRow && col == targetCol) continue; int cx0 = (int)(col * cellW) + border; int cy0 = (int)(row * cellH) + border; int cInnerW = Math.Min(innerW, imgW - cx0); int cInnerH = Math.Min(innerH, imgH - cy0); if (cInnerW < innerW || cInnerH < innerH) continue; // Compute NCC at full resolution double cMean = 0; for (int py = 0; py < innerH; py++) for (int px = 0; px < innerW; px++) cMean += gray[(cy0 + py) * imgW + (cx0 + px)]; cMean /= n; double cStd = 0, cross = 0; for (int py = 0; py < innerH; py++) for (int px = 0; px < innerW; px++) { double cv = gray[(cy0 + py) * imgW + (cx0 + px)] - cMean; double tv = targetPixels[py * innerW + px] - tMean; cStd += cv * cv; cross += tv * cv; } cStd = Math.Sqrt(cStd / n); double ncc = (tStd > 0 && cStd > 0) ? cross / (n * tStd * cStd) : 0; if (debug && ncc > 0.5) Console.Error.WriteLine($" ({row},{col}): NCC={ncc:F3}"); if (ncc >= matchThreshold) matches.Add(new GridMatch { Row = row, Col = col, Similarity = Math.Round(ncc, 3) }); } } if (debug) Console.Error.WriteLine($" Matches for ({targetRow},{targetCol}): {matches.Count}"); return matches; } void HandleDetectGrid(Request req) { if (req.Region == null) { WriteResponse(new ErrorResponse("detect-grid requires region")); return; } int minCell = req.MinCellSize > 0 ? req.MinCellSize : 20; int maxCell = req.MaxCellSize > 0 ? req.MaxCellSize : 70; bool debug = req.Debug; Bitmap bitmap = CaptureOrLoad(req.File, req.Region); int w = bitmap.Width; int h = bitmap.Height; var bmpData = bitmap.LockBits( new Rectangle(0, 0, w, h), ImageLockMode.ReadOnly, PixelFormat.Format32bppArgb ); byte[] pixels = new byte[bmpData.Stride * h]; Marshal.Copy(bmpData.Scan0, pixels, 0, pixels.Length); bitmap.UnlockBits(bmpData); int stride = bmpData.Stride; byte[] gray = new byte[w * h]; for (int y = 0; y < h; y++) for (int x = 0; x < w; x++) { int i = y * stride + x * 4; gray[y * w + x] = (byte)((pixels[i] + pixels[i + 1] + pixels[i + 2]) / 3); } bitmap.Dispose(); // ── Pass 1: Scan horizontal bands using "very dark pixel density" ── // Grid lines are nearly all very dark (density ~0.9), cell interiors are // partially dark (0.3-0.5), game world is mostly bright (density ~0.05). // This creates clear periodic peaks at grid line positions. int bandH = 200; int bandStep = 40; const int veryDarkPixelThresh = 12; // pixels below this brightness = "very dark" const double gridSegThresh = 0.25; // density above this = potential grid column var candidates = new List<(int bandY, int cellW, double hAc, int hLeft, int hRight)>(); for (int by = 0; by + bandH <= h; by += bandStep) { // "Very dark pixel density" per column: fraction of pixels below threshold double[] darkDensity = new double[w]; for (int x = 0; x < w; x++) { int count = 0; for (int y = by; y < by + bandH; y++) { if (gray[y * w + x] < veryDarkPixelThresh) count++; } darkDensity[x] = (double)count / bandH; } // Find segments where density > gridSegThresh (grid panel regions) var gridSegs = FindDarkDensitySegments(darkDensity, gridSegThresh, 200); foreach (var (segLeft, segRight) in gridSegs) { // Extract segment and run AC int segLen = segRight - segLeft; double[] segment = new double[segLen]; Array.Copy(darkDensity, segLeft, segment, 0, segLen); var (period, acScore) = FindPeriodWithScore(segment, minCell, maxCell); if (period <= 0) continue; // FindGridExtent within the segment var (extLeft, extRight) = FindGridExtent(segment, period); if (extLeft < 0) continue; // Map back to full image coordinates int absLeft = segLeft + extLeft; int absRight = segLeft + extRight; int extent = absRight - absLeft; // Require at least 8 cells wide AND 200px absolute minimum if (extent < period * 8 || extent < 200) continue; if (debug) Console.Error.WriteLine( $" Band y={by}: seg=[{segLeft}-{segRight}] period={period}, AC={acScore:F3}, " + $"extent={absLeft}-{absRight}={extent}px ({extent / period} cells)"); candidates.Add((by, period, acScore, absLeft, absRight)); } } if (debug) Console.Error.WriteLine($"Pass 1: {candidates.Count} candidates"); // Sort by score = AC * extent (prefer large strongly-periodic areas) candidates.Sort((a, b) => { double sa = a.hAc * (a.hRight - a.hLeft); double sb = b.hAc * (b.hRight - b.hLeft); return sb.CompareTo(sa); }); // ── Pass 2: Verify vertical periodicity ── foreach (var cand in candidates.Take(10)) { int colSpan = cand.hRight - cand.hLeft; if (colSpan < cand.cellW * 3) continue; // Row "very dark pixel density" within the detected column range double[] rowDensity = new double[h]; for (int y = 0; y < h; y++) { int count = 0; for (int x = cand.hLeft; x < cand.hRight; x++) { if (gray[y * w + x] < veryDarkPixelThresh) count++; } rowDensity[y] = (double)count / colSpan; } // Find grid panel vertical segment var vGridSegs = FindDarkDensitySegments(rowDensity, gridSegThresh, 100); if (vGridSegs.Count == 0) continue; // Use the largest segment var (vSegTop, vSegBottom) = vGridSegs.OrderByDescending(s => s.end - s.start).First(); int vSegLen = vSegBottom - vSegTop; double[] vSegment = new double[vSegLen]; Array.Copy(rowDensity, vSegTop, vSegment, 0, vSegLen); var (cellH, vAc) = FindPeriodWithScore(vSegment, minCell, maxCell); if (cellH <= 0) continue; var (extTop, extBottom) = FindGridExtent(vSegment, cellH); if (extTop < 0) continue; int top = vSegTop + extTop; int bottom = vSegTop + extBottom; int vExtent = bottom - top; // Require at least 3 rows tall AND 100px absolute minimum if (vExtent < cellH * 3 || vExtent < 100) continue; if (debug) Console.Error.WriteLine( $" 2D candidate: cellW={cand.cellW}, cellH={cellH}, " + $"region=({cand.hLeft},{top})-({cand.hRight},{bottom}), " + $"vAC={vAc:F3}, extent={vExtent}px ({vExtent / cellH} rows)"); // ── Found a valid 2D grid ── int gridW = cand.hRight - cand.hLeft; int gridH = bottom - top; int cols = Math.Max(2, (int)Math.Round((double)gridW / cand.cellW)); int rows = Math.Max(2, (int)Math.Round((double)gridH / cellH)); // Snap grid dimensions to exact multiples of cell size gridW = cols * cand.cellW; gridH = rows * cellH; if (debug) Console.Error.WriteLine( $" => cols={cols}, rows={rows}, gridW={gridW}, gridH={gridH}"); WriteResponse(new DetectGridResponse { Detected = true, Region = new RegionRect { X = req.Region.X + cand.hLeft, Y = req.Region.Y + top, Width = gridW, Height = gridH, }, Cols = cols, Rows = rows, CellWidth = Math.Round((double)gridW / cols, 1), CellHeight = Math.Round((double)gridH / rows, 1), }); return; } if (debug) Console.Error.WriteLine(" No valid 2D grid found"); WriteResponse(new DetectGridResponse { Detected = false }); } /// Find the dominant period in a signal using autocorrelation. /// Returns (period, score) where score is the autocorrelation strength. (int period, double score) FindPeriodWithScore(double[] signal, int minPeriod, int maxPeriod) { int n = signal.Length; if (n < minPeriod * 3) return (-1, 0); double mean = signal.Average(); double variance = 0; for (int i = 0; i < n; i++) variance += (signal[i] - mean) * (signal[i] - mean); if (variance < 1.0) return (-1, 0); int maxLag = Math.Min(maxPeriod, n / 3); double[] ac = new double[maxLag + 1]; for (int lag = minPeriod; lag <= maxLag; lag++) { double sum = 0; for (int i = 0; i < n - lag; i++) sum += (signal[i] - mean) * (signal[i + lag] - mean); ac[lag] = sum / variance; } // Find the first significant peak — this is the fundamental period. // Using "first" avoids picking harmonics (2x, 3x) or unrelated larger patterns. for (int lag = minPeriod + 1; lag < maxLag; lag++) { if (ac[lag] > 0.01 && ac[lag] >= ac[lag - 1] && ac[lag] >= ac[lag + 1]) return (lag, ac[lag]); } return (-1, 0); } /// Find contiguous segments where values are ABOVE threshold. /// Used to find grid panel regions by density of very dark pixels. /// Allows brief gaps (up to 5px) to handle grid borders. List<(int start, int end)> FindDarkDensitySegments(double[] profile, double threshold, int minLength) { var segments = new List<(int start, int end)>(); int n = profile.Length; int curStart = -1; int maxGap = 5; int gapCount = 0; for (int i = 0; i < n; i++) { if (profile[i] >= threshold) { if (curStart < 0) curStart = i; gapCount = 0; } else { if (curStart >= 0) { gapCount++; if (gapCount > maxGap) { int end = i - gapCount; if (end - curStart >= minLength) segments.Add((curStart, end)); curStart = -1; gapCount = 0; } } } } if (curStart >= 0) { int end = gapCount > 0 ? n - gapCount : n; if (end - curStart >= minLength) segments.Add((curStart, end)); } return segments; } /// Debug: find the top N AC peaks in a signal List<(int lag, double ac)> FindTopAcPeaks(double[] signal, int minPeriod, int maxPeriod, int topN) { int n = signal.Length; if (n < minPeriod * 3) return []; double mean = signal.Average(); double variance = 0; for (int i = 0; i < n; i++) variance += (signal[i] - mean) * (signal[i] - mean); if (variance < 1.0) return []; int maxLag = Math.Min(maxPeriod, n / 3); var peaks = new List<(int lag, double ac)>(); double[] ac = new double[maxLag + 1]; for (int lag = minPeriod; lag <= maxLag; lag++) { double sum = 0; for (int i = 0; i < n - lag; i++) sum += (signal[i] - mean) * (signal[i + lag] - mean); ac[lag] = sum / variance; } for (int lag = minPeriod + 1; lag < maxLag; lag++) { if (ac[lag] >= ac[lag - 1] && ac[lag] >= ac[lag + 1] && ac[lag] > 0.005) peaks.Add((lag, ac[lag])); } peaks.Sort((a, b) => b.ac.CompareTo(a.ac)); return peaks.Take(topN).ToList(); } /// Find the extent of the grid in a 1D profile using local autocorrelation /// at the specific detected period. Only regions where the signal actually /// repeats at the given period will score high — much more precise than variance. (int start, int end) FindGridExtent(double[] signal, int period) { int n = signal.Length; int halfWin = period * 2; // window radius: 2 periods each side if (n < halfWin * 2 + period) return (-1, -1); // Compute local AC at the specific lag=period in a sliding window double[] localAc = new double[n]; for (int center = halfWin; center < n - halfWin; center++) { int wStart = center - halfWin; int wEnd = center + halfWin; int count = wEnd - wStart; // Local mean double sum = 0; for (int i = wStart; i < wEnd; i++) sum += signal[i]; double mean = sum / count; // Local variance double varSum = 0; for (int i = wStart; i < wEnd; i++) varSum += (signal[i] - mean) * (signal[i] - mean); if (varSum < 1.0) continue; // AC at the specific lag=period double acSum = 0; for (int i = wStart; i < wEnd - period; i++) acSum += (signal[i] - mean) * (signal[i + period] - mean); localAc[center] = Math.Max(0, acSum / varSum); } // Find the longest contiguous run above threshold double maxAc = 0; for (int i = 0; i < n; i++) if (localAc[i] > maxAc) maxAc = localAc[i]; if (maxAc < 0.02) return (-1, -1); double threshold = maxAc * 0.25; int bestStart = -1, bestEnd = -1, bestLen = 0; int curStart = -1; for (int i = 0; i < n; i++) { if (localAc[i] > threshold) { if (curStart < 0) curStart = i; } else { if (curStart >= 0) { int len = i - curStart; if (len > bestLen) { bestLen = len; bestStart = curStart; bestEnd = i; } curStart = -1; } } } // Handle run extending to end of signal if (curStart >= 0) { int len = n - curStart; if (len > bestLen) { bestStart = curStart; bestEnd = n; } } if (bestStart < 0) return (-1, -1); // Small extension to include cell borders at edges bestStart = Math.Max(0, bestStart - period / 4); bestEnd = Math.Min(n - 1, bestEnd + period / 4); return (bestStart, bestEnd); } // ── Screen Capture ────────────────────────────────────────────────────────── /// Capture from screen, or load from file if specified. /// When file is set, loads the image and crops to region. Bitmap CaptureOrLoad(string? file, RegionRect? region) { if (!string.IsNullOrEmpty(file)) { var fullBmp = new Bitmap(file); if (region != null) { int cx = Math.Max(0, region.X); int cy = Math.Max(0, region.Y); int cw = Math.Min(region.Width, fullBmp.Width - cx); int ch = Math.Min(region.Height, fullBmp.Height - cy); var cropped = fullBmp.Clone(new Rectangle(cx, cy, cw, ch), PixelFormat.Format32bppArgb); fullBmp.Dispose(); return cropped; } return fullBmp; } return CaptureScreen(region); } Bitmap CaptureScreen(RegionRect? region) { int x, y, w, h; if (region != null) { x = region.X; y = region.Y; w = region.Width; h = region.Height; } else { // Primary monitor only (0,0 origin, SM_CXSCREEN / SM_CYSCREEN) x = 0; y = 0; w = GetSystemMetrics(0); // SM_CXSCREEN h = GetSystemMetrics(1); // SM_CYSCREEN } var bitmap = new Bitmap(w, h, PixelFormat.Format32bppArgb); using var g = Graphics.FromImage(bitmap); g.CopyFromScreen(x, y, 0, 0, new Size(w, h), CopyPixelOperation.SourceCopy); return bitmap; } // ── Bitmap → Tesseract Pix conversion (in-memory) ────────────────────────── Pix BitmapToPix(Bitmap bitmap) { using var ms = new MemoryStream(); bitmap.Save(ms, SdImageFormat.Png); return Pix.LoadFromMemory(ms.ToArray()); } // ── Extract lines/words from Tesseract page result ────────────────────────── List ExtractLinesFromPage(Page page, int offsetX, int offsetY) { var lines = new List(); using var iter = page.GetIterator(); if (iter == null) return lines; iter.Begin(); do { var words = new List(); do { var wordText = iter.GetText(PageIteratorLevel.Word); if (string.IsNullOrWhiteSpace(wordText)) continue; if (iter.TryGetBoundingBox(PageIteratorLevel.Word, out var bounds)) { words.Add(new OcrWordResult { Text = wordText.Trim(), X = bounds.X1 + offsetX, Y = bounds.Y1 + offsetY, Width = bounds.Width, Height = bounds.Height, }); } } while (iter.Next(PageIteratorLevel.TextLine, PageIteratorLevel.Word)); if (words.Count > 0) { var lineText = string.Join(" ", words.Select(w => w.Text)); lines.Add(new OcrLineResult { Text = lineText, Words = words }); } } while (iter.Next(PageIteratorLevel.Block, PageIteratorLevel.TextLine)); return lines; } // ── Response writing ──────────────────────────────────────────────────────── void WriteResponse(object response) { var json = JsonSerializer.Serialize(response, jsonOptions); Console.Out.WriteLine(json); Console.Out.Flush(); } SdImageFormat GetImageFormat(string path) { var ext = Path.GetExtension(path).ToLowerInvariant(); return ext switch { ".jpg" or ".jpeg" => SdImageFormat.Jpeg, ".bmp" => SdImageFormat.Bmp, _ => SdImageFormat.Png, }; } // ── P/Invoke ──────────────────────────────────────────────────────────────── [DllImport("user32.dll")] static extern bool SetProcessDPIAware(); [DllImport("user32.dll")] static extern int GetSystemMetrics(int nIndex); // ── Request / Response Models ─────────────────────────────────────────────── class Request { [JsonPropertyName("cmd")] public string? Cmd { get; set; } [JsonPropertyName("region")] public RegionRect? Region { get; set; } [JsonPropertyName("path")] public string? Path { get; set; } [JsonPropertyName("cols")] public int Cols { get; set; } [JsonPropertyName("rows")] public int Rows { get; set; } [JsonPropertyName("threshold")] public int Threshold { get; set; } [JsonPropertyName("minCellSize")] public int MinCellSize { get; set; } [JsonPropertyName("maxCellSize")] public int MaxCellSize { get; set; } [JsonPropertyName("file")] public string? File { get; set; } [JsonPropertyName("debug")] public bool Debug { get; set; } [JsonPropertyName("targetRow")] public int TargetRow { get; set; } = -1; [JsonPropertyName("targetCol")] public int TargetCol { get; set; } = -1; } class RegionRect { [JsonPropertyName("x")] public int X { get; set; } [JsonPropertyName("y")] public int Y { get; set; } [JsonPropertyName("width")] public int Width { get; set; } [JsonPropertyName("height")] public int Height { get; set; } } class ReadyResponse { [JsonPropertyName("ok")] public bool Ok => true; [JsonPropertyName("ready")] public bool Ready => true; } class OkResponse { [JsonPropertyName("ok")] public bool Ok => true; } class ErrorResponse(string message) { [JsonPropertyName("ok")] public bool Ok => false; [JsonPropertyName("error")] public string Error => message; } class OcrResponse { [JsonPropertyName("ok")] public bool Ok => true; [JsonPropertyName("text")] public string Text { get; set; } = ""; [JsonPropertyName("lines")] public List Lines { get; set; } = []; } class DiffOcrResponse { [JsonPropertyName("ok")] public bool Ok => true; [JsonPropertyName("text")] public string Text { get; set; } = ""; [JsonPropertyName("lines")] public List Lines { get; set; } = []; [JsonPropertyName("region")] public RegionRect? Region { get; set; } } class OcrLineResult { [JsonPropertyName("text")] public string Text { get; set; } = ""; [JsonPropertyName("words")] public List Words { get; set; } = []; } class OcrWordResult { [JsonPropertyName("text")] public string Text { get; set; } = ""; [JsonPropertyName("x")] public int X { get; set; } [JsonPropertyName("y")] public int Y { get; set; } [JsonPropertyName("width")] public int Width { get; set; } [JsonPropertyName("height")] public int Height { get; set; } } class CaptureResponse { [JsonPropertyName("ok")] public bool Ok => true; [JsonPropertyName("image")] public string Image { get; set; } = ""; } class GridResponse { [JsonPropertyName("ok")] public bool Ok => true; [JsonPropertyName("cells")] public List> Cells { get; set; } = []; [JsonPropertyName("items")] public List? Items { get; set; } [JsonPropertyName("matches")] public List? Matches { get; set; } } class GridItem { [JsonPropertyName("row")] public int Row { get; set; } [JsonPropertyName("col")] public int Col { get; set; } [JsonPropertyName("w")] public int W { get; set; } [JsonPropertyName("h")] public int H { get; set; } } class GridMatch { [JsonPropertyName("row")] public int Row { get; set; } [JsonPropertyName("col")] public int Col { get; set; } [JsonPropertyName("similarity")] public double Similarity { get; set; } } class DetectGridResponse { [JsonPropertyName("ok")] public bool Ok => true; [JsonPropertyName("detected")] public bool Detected { get; set; } [JsonPropertyName("region")] public RegionRect? Region { get; set; } [JsonPropertyName("cols")] public int Cols { get; set; } [JsonPropertyName("rows")] public int Rows { get; set; } [JsonPropertyName("cellWidth")] public double CellWidth { get; set; } [JsonPropertyName("cellHeight")] public double CellHeight { get; set; } }