using System.Drawing; using System.Drawing.Imaging; using System.Runtime.InteropServices; using Poe2Trade.Core; using OpenCvSharp.Extensions; using Serilog; using Region = Poe2Trade.Core.Region; namespace Poe2Trade.Screen; public class ScreenReader : IScreenReader { private readonly DiffCropHandler _diffCrop = new(); private readonly GridHandler _gridHandler = new(); private readonly TemplateMatchHandler _templateMatch = new(); private readonly EdgeCropHandler _edgeCrop = new(); private readonly PythonOcrBridge _pythonBridge = new(); private bool _initialized; public GridReader Grid { get; } public ScreenReader() { Grid = new GridReader(_gridHandler); } public Task Warmup() { if (!_initialized) { ScreenCapture.InitDpiAwareness(); _initialized = true; } return Task.CompletedTask; } // -- Capture -- public Task CaptureScreen() { return Task.FromResult(_diffCrop.HandleCapture()); } public Task CaptureRegion(Region region) { return Task.FromResult(_diffCrop.HandleCapture(region)); } // -- OCR -- public Task Ocr(Region? region = null, string? preprocess = null) { using var bitmap = ScreenCapture.CaptureOrLoad(null, region); if (preprocess == "tophat") { using var processed = ImagePreprocessor.PreprocessForOcr(bitmap); return Task.FromResult(_pythonBridge.OcrFromBitmap(processed)); } if (preprocess == "clahe") { using var processed = ImagePreprocessor.PreprocessClahe(bitmap); return Task.FromResult(_pythonBridge.OcrFromBitmap(processed)); } return Task.FromResult(_pythonBridge.OcrFromBitmap(bitmap)); } public async Task<(int X, int Y)?> FindTextOnScreen(string searchText, bool fuzzy = false) { var result = await Ocr(); var pos = FindWordInOcrResult(result, searchText, fuzzy); if (pos.HasValue) Log.Information("Found text '{Text}' at ({X},{Y})", searchText, pos.Value.X, pos.Value.Y); else Log.Information("Text '{Text}' not found on screen", searchText); return pos; } public async Task ReadFullScreen() { var result = await Ocr(); return result.Text; } public async Task<(int X, int Y)?> FindTextInRegion(Region region, string searchText) { var result = await Ocr(region); var pos = FindWordInOcrResult(result, searchText); if (pos.HasValue) return (region.X + pos.Value.X, region.Y + pos.Value.Y); return null; } public async Task ReadRegionText(Region region) { var result = await Ocr(region); return result.Text; } public async Task CheckForText(Region region, string searchText) { var pos = await FindTextInRegion(region, searchText); return pos.HasValue; } // -- Snapshot / Diff OCR -- public Task Snapshot() { _diffCrop.HandleSnapshot(); return Task.CompletedTask; } public Task DiffOcr(string? savePath = null, Region? region = null) { var p = new DiffOcrParams(); var cropResult = _diffCrop.DiffCrop(p.Crop, region: region); if (cropResult == null) return Task.FromResult(new DiffOcrResponse { Text = "", Lines = [] }); var (cropped, refCropped, current, cropRegion) = cropResult.Value; using var _current = current; using var _cropped = cropped; using var _refCropped = refCropped; // Save raw crop if path is provided if (!string.IsNullOrEmpty(savePath)) { var dir = Path.GetDirectoryName(savePath); if (!string.IsNullOrEmpty(dir) && !Directory.Exists(dir)) Directory.CreateDirectory(dir); cropped.Save(savePath, ImageUtils.GetImageFormat(savePath)); } // Preprocess with background subtraction var ocr = p.Ocr; using var processedBmp = ocr.UseBackgroundSub ? ImagePreprocessor.PreprocessWithBackgroundSub(cropped, refCropped, ocr.DimPercentile, ocr.TextThresh, 1, ocr.SoftThreshold) : ImagePreprocessor.PreprocessForOcr(cropped, ocr.KernelSize, 1); var ocrResult = _pythonBridge.OcrFromBitmap(processedBmp, ocr); // Offset coordinates to screen space foreach (var line in ocrResult.Lines) foreach (var word in line.Words) { word.X += cropRegion.X; word.Y += cropRegion.Y; } return Task.FromResult(new DiffOcrResponse { Text = ocrResult.Text, Lines = ocrResult.Lines, Region = cropRegion, }); } // -- Template matching -- public Task TemplateMatch(string templatePath, Region? region = null) { var result = _templateMatch.Match(templatePath, region); if (result != null) Log.Information("Template match found: ({X},{Y}) confidence={Conf:F3}", result.X, result.Y, result.Confidence); return Task.FromResult(result); } // -- Save -- public Task SaveScreenshot(string path) { _diffCrop.HandleScreenshot(path); return Task.CompletedTask; } public Task SaveRegion(Region region, string path) { _diffCrop.HandleScreenshot(path, region); return Task.CompletedTask; } // -- Nameplate diff OCR -- public Bitmap CaptureRawBitmap() => ScreenCapture.CaptureOrLoad(null, null); public Task NameplateDiffOcr(Bitmap reference, Bitmap current) { int w = Math.Min(reference.Width, current.Width); int h = Math.Min(reference.Height, current.Height); var refData = reference.LockBits(new Rectangle(0, 0, w, h), ImageLockMode.ReadOnly, PixelFormat.Format32bppArgb); var curData = current.LockBits(new Rectangle(0, 0, w, h), ImageLockMode.ReadOnly, PixelFormat.Format32bppArgb); byte[] refPx = new byte[refData.Stride * h]; byte[] curPx = new byte[curData.Stride * h]; Marshal.Copy(refData.Scan0, refPx, 0, refPx.Length); Marshal.Copy(curData.Scan0, curPx, 0, curPx.Length); int stride = refData.Stride; reference.UnlockBits(refData); current.UnlockBits(curData); // Build a binary mask of pixels that got significantly brighter (nameplates are bright text) const int brightThresh = 30; bool[] mask = new bool[w * h]; Parallel.For(0, h, y => { int rowOff = y * stride; for (int x = 0; x < w; x++) { int i = rowOff + x * 4; int brighter = (curPx[i] - refPx[i]) + (curPx[i + 1] - refPx[i + 1]) + (curPx[i + 2] - refPx[i + 2]); if (brighter > brightThresh) mask[y * w + x] = true; } }); // Find connected clusters via row-scan: collect bounding boxes of bright regions var boxes = FindBrightClusters(mask, w, h, minWidth: 40, minHeight: 10, maxGap: 8); Log.Information("NameplateDiff: found {Count} bright clusters", boxes.Count); if (boxes.Count == 0) return Task.FromResult(new OcrResponse { Text = "", Lines = [] }); // OCR each cluster crop, accumulate results with screen-space coordinates var allLines = new List(); var allText = new List(); foreach (var box in boxes) { // Pad the crop slightly int pad = 4; int cx = Math.Max(0, box.X - pad); int cy = Math.Max(0, box.Y - pad); int cw = Math.Min(w - cx, box.Width + pad * 2); int ch = Math.Min(h - cy, box.Height + pad * 2); using var crop = current.Clone(new Rectangle(cx, cy, cw, ch), PixelFormat.Format32bppArgb); var ocrResult = _pythonBridge.OcrFromBitmap(crop); // Offset word coordinates to screen space foreach (var line in ocrResult.Lines) { foreach (var word in line.Words) { word.X += cx; word.Y += cy; } allLines.Add(line); allText.Add(line.Text); } } return Task.FromResult(new OcrResponse { Text = string.Join("\n", allText), Lines = allLines, }); } private static List FindBrightClusters(bool[] mask, int w, int h, int minWidth, int minHeight, int maxGap) { // Row density int[] rowCounts = new int[h]; for (int y = 0; y < h; y++) for (int x = 0; x < w; x++) if (mask[y * w + x]) rowCounts[y]++; // Find horizontal bands of bright rows int rowThresh = 3; var bands = new List<(int Top, int Bottom)>(); int bandStart = -1, lastActive = -1; for (int y = 0; y < h; y++) { if (rowCounts[y] >= rowThresh) { if (bandStart < 0) bandStart = y; lastActive = y; } else if (bandStart >= 0 && y - lastActive > maxGap) { if (lastActive - bandStart + 1 >= minHeight) bands.Add((bandStart, lastActive)); bandStart = -1; } } if (bandStart >= 0 && lastActive - bandStart + 1 >= minHeight) bands.Add((bandStart, lastActive)); // For each band, find column extents to get individual nameplate boxes var boxes = new List(); foreach (var (top, bottom) in bands) { int[] colCounts = new int[w]; for (int y = top; y <= bottom; y++) for (int x = 0; x < w; x++) if (mask[y * w + x]) colCounts[x]++; int colThresh = 1; int colStart = -1, lastCol = -1; for (int x = 0; x < w; x++) { if (colCounts[x] >= colThresh) { if (colStart < 0) colStart = x; lastCol = x; } else if (colStart >= 0 && x - lastCol > maxGap) { if (lastCol - colStart + 1 >= minWidth) boxes.Add(new Rectangle(colStart, top, lastCol - colStart + 1, bottom - top + 1)); colStart = -1; } } if (colStart >= 0 && lastCol - colStart + 1 >= minWidth) boxes.Add(new Rectangle(colStart, top, lastCol - colStart + 1, bottom - top + 1)); } return boxes; } public void Dispose() => _pythonBridge.Dispose(); // -- OCR text matching -- private static (int X, int Y)? FindWordInOcrResult(OcrResponse result, string needle, bool fuzzy = false) { var lower = needle.ToLowerInvariant(); const double fuzzyThreshold = 0.55; if (lower.Contains(' ')) { var needleNorm = Normalize(needle); foreach (var line in result.Lines) { if (line.Words.Count == 0) continue; if (line.Text.ToLowerInvariant().Contains(lower)) return LineBoundsCenter(line); if (fuzzy) { var lineNorm = Normalize(line.Text); var windowLen = needleNorm.Length; for (var i = 0; i <= lineNorm.Length - windowLen + 2; i++) { var end = Math.Min(i + windowLen + 2, lineNorm.Length); var window = lineNorm[i..end]; if (BigramSimilarity(needleNorm, window) >= fuzzyThreshold) return LineBoundsCenter(line); } } } return null; } var needleN = Normalize(needle); foreach (var line in result.Lines) { foreach (var word in line.Words) { if (word.Text.ToLowerInvariant().Contains(lower)) return (word.X + word.Width / 2, word.Y + word.Height / 2); if (fuzzy && BigramSimilarity(needleN, Normalize(word.Text)) >= fuzzyThreshold) return (word.X + word.Width / 2, word.Y + word.Height / 2); } } return null; } private static (int X, int Y) LineBoundsCenter(OcrLine line) { var first = line.Words[0]; var last = line.Words[^1]; var x1 = first.X; var y1 = first.Y; var x2 = last.X + last.Width; var y2 = line.Words.Max(w => w.Y + w.Height); return ((x1 + x2) / 2, (y1 + y2) / 2); } private static string Normalize(string s) => new(s.ToLowerInvariant().Where(char.IsLetterOrDigit).ToArray()); private static double BigramSimilarity(string a, string b) { if (a.Length < 2 || b.Length < 2) return a == b ? 1 : 0; var bigramsA = new Dictionary<(char, char), int>(); for (var i = 0; i < a.Length - 1; i++) { var bg = (a[i], a[i + 1]); bigramsA[bg] = bigramsA.GetValueOrDefault(bg) + 1; } var matches = 0; for (var i = 0; i < b.Length - 1; i++) { var bg = (b[i], b[i + 1]); if (bigramsA.TryGetValue(bg, out var count) && count > 0) { matches++; bigramsA[bg] = count - 1; } } return 2.0 * matches / (a.Length - 1 + b.Length - 1); } }