407 lines
14 KiB
C#
407 lines
14 KiB
C#
using System.Drawing;
|
|
using System.Drawing.Imaging;
|
|
using System.Runtime.InteropServices;
|
|
using Poe2Trade.Core;
|
|
using OpenCvSharp.Extensions;
|
|
using Serilog;
|
|
using Region = Poe2Trade.Core.Region;
|
|
|
|
namespace Poe2Trade.Screen;
|
|
|
|
public class ScreenReader : IScreenReader
|
|
{
|
|
private readonly DiffCropHandler _diffCrop = new();
|
|
private readonly GridHandler _gridHandler = new();
|
|
private readonly TemplateMatchHandler _templateMatch = new();
|
|
private readonly EdgeCropHandler _edgeCrop = new();
|
|
private readonly PythonOcrBridge _pythonBridge = new();
|
|
private bool _initialized;
|
|
|
|
public GridReader Grid { get; }
|
|
|
|
public ScreenReader()
|
|
{
|
|
Grid = new GridReader(_gridHandler);
|
|
}
|
|
|
|
public Task Warmup()
|
|
{
|
|
if (!_initialized)
|
|
{
|
|
ScreenCapture.InitDpiAwareness();
|
|
_initialized = true;
|
|
}
|
|
return Task.CompletedTask;
|
|
}
|
|
|
|
// -- Capture --
|
|
|
|
public Task<byte[]> CaptureScreen()
|
|
{
|
|
return Task.FromResult(_diffCrop.HandleCapture());
|
|
}
|
|
|
|
public Task<byte[]> CaptureRegion(Region region)
|
|
{
|
|
return Task.FromResult(_diffCrop.HandleCapture(region));
|
|
}
|
|
|
|
// -- OCR --
|
|
|
|
public Task<OcrResponse> Ocr(Region? region = null, string? preprocess = null)
|
|
{
|
|
using var bitmap = ScreenCapture.CaptureOrLoad(null, region);
|
|
|
|
if (preprocess == "tophat")
|
|
{
|
|
using var processed = ImagePreprocessor.PreprocessForOcr(bitmap);
|
|
return Task.FromResult(_pythonBridge.OcrFromBitmap(processed));
|
|
}
|
|
|
|
if (preprocess == "clahe")
|
|
{
|
|
using var processed = ImagePreprocessor.PreprocessClahe(bitmap);
|
|
return Task.FromResult(_pythonBridge.OcrFromBitmap(processed));
|
|
}
|
|
|
|
return Task.FromResult(_pythonBridge.OcrFromBitmap(bitmap));
|
|
}
|
|
|
|
public async Task<(int X, int Y)?> FindTextOnScreen(string searchText, bool fuzzy = false)
|
|
{
|
|
var result = await Ocr();
|
|
var pos = FindWordInOcrResult(result, searchText, fuzzy);
|
|
if (pos.HasValue)
|
|
Log.Information("Found text '{Text}' at ({X},{Y})", searchText, pos.Value.X, pos.Value.Y);
|
|
else
|
|
Log.Information("Text '{Text}' not found on screen", searchText);
|
|
return pos;
|
|
}
|
|
|
|
public async Task<string> ReadFullScreen()
|
|
{
|
|
var result = await Ocr();
|
|
return result.Text;
|
|
}
|
|
|
|
public async Task<(int X, int Y)?> FindTextInRegion(Region region, string searchText)
|
|
{
|
|
var result = await Ocr(region);
|
|
var pos = FindWordInOcrResult(result, searchText);
|
|
if (pos.HasValue)
|
|
return (region.X + pos.Value.X, region.Y + pos.Value.Y);
|
|
return null;
|
|
}
|
|
|
|
public async Task<string> ReadRegionText(Region region)
|
|
{
|
|
var result = await Ocr(region);
|
|
return result.Text;
|
|
}
|
|
|
|
public async Task<bool> CheckForText(Region region, string searchText)
|
|
{
|
|
var pos = await FindTextInRegion(region, searchText);
|
|
return pos.HasValue;
|
|
}
|
|
|
|
// -- Snapshot / Diff OCR --
|
|
|
|
public Task Snapshot()
|
|
{
|
|
_diffCrop.HandleSnapshot();
|
|
return Task.CompletedTask;
|
|
}
|
|
|
|
public Task<DiffOcrResponse> DiffOcr(string? savePath = null, Region? region = null)
|
|
{
|
|
var p = new DiffOcrParams();
|
|
var cropResult = _diffCrop.DiffCrop(p.Crop, region: region);
|
|
if (cropResult == null)
|
|
return Task.FromResult(new DiffOcrResponse { Text = "", Lines = [] });
|
|
|
|
var (cropped, refCropped, current, cropRegion) = cropResult.Value;
|
|
using var _current = current;
|
|
using var _cropped = cropped;
|
|
using var _refCropped = refCropped;
|
|
|
|
// Save raw crop if path is provided
|
|
if (!string.IsNullOrEmpty(savePath))
|
|
{
|
|
var dir = Path.GetDirectoryName(savePath);
|
|
if (!string.IsNullOrEmpty(dir) && !Directory.Exists(dir))
|
|
Directory.CreateDirectory(dir);
|
|
cropped.Save(savePath, ImageUtils.GetImageFormat(savePath));
|
|
}
|
|
|
|
// Preprocess with background subtraction
|
|
var ocr = p.Ocr;
|
|
using var processedBmp = ocr.UseBackgroundSub
|
|
? ImagePreprocessor.PreprocessWithBackgroundSub(cropped, refCropped, ocr.DimPercentile, ocr.TextThresh, 1, ocr.SoftThreshold)
|
|
: ImagePreprocessor.PreprocessForOcr(cropped, ocr.KernelSize, 1);
|
|
|
|
var ocrResult = _pythonBridge.OcrFromBitmap(processedBmp, ocr);
|
|
|
|
// Offset coordinates to screen space
|
|
foreach (var line in ocrResult.Lines)
|
|
foreach (var word in line.Words)
|
|
{
|
|
word.X += cropRegion.X;
|
|
word.Y += cropRegion.Y;
|
|
}
|
|
|
|
return Task.FromResult(new DiffOcrResponse
|
|
{
|
|
Text = ocrResult.Text,
|
|
Lines = ocrResult.Lines,
|
|
Region = cropRegion,
|
|
});
|
|
}
|
|
|
|
// -- Template matching --
|
|
|
|
public Task<TemplateMatchResult?> TemplateMatch(string templatePath, Region? region = null)
|
|
{
|
|
var result = _templateMatch.Match(templatePath, region);
|
|
if (result != null)
|
|
Log.Information("Template match found: ({X},{Y}) confidence={Conf:F3}", result.X, result.Y, result.Confidence);
|
|
return Task.FromResult(result);
|
|
}
|
|
|
|
// -- Save --
|
|
|
|
public Task SaveScreenshot(string path)
|
|
{
|
|
_diffCrop.HandleScreenshot(path);
|
|
return Task.CompletedTask;
|
|
}
|
|
|
|
public Task SaveRegion(Region region, string path)
|
|
{
|
|
_diffCrop.HandleScreenshot(path, region);
|
|
return Task.CompletedTask;
|
|
}
|
|
|
|
// -- Nameplate diff OCR --
|
|
|
|
public Bitmap CaptureRawBitmap() => ScreenCapture.CaptureOrLoad(null, null);
|
|
|
|
public Task<OcrResponse> NameplateDiffOcr(Bitmap reference, Bitmap current)
|
|
{
|
|
int w = Math.Min(reference.Width, current.Width);
|
|
int h = Math.Min(reference.Height, current.Height);
|
|
|
|
var refData = reference.LockBits(new Rectangle(0, 0, w, h), ImageLockMode.ReadOnly, PixelFormat.Format32bppArgb);
|
|
var curData = current.LockBits(new Rectangle(0, 0, w, h), ImageLockMode.ReadOnly, PixelFormat.Format32bppArgb);
|
|
byte[] refPx = new byte[refData.Stride * h];
|
|
byte[] curPx = new byte[curData.Stride * h];
|
|
Marshal.Copy(refData.Scan0, refPx, 0, refPx.Length);
|
|
Marshal.Copy(curData.Scan0, curPx, 0, curPx.Length);
|
|
int stride = refData.Stride;
|
|
reference.UnlockBits(refData);
|
|
current.UnlockBits(curData);
|
|
|
|
// Build a binary mask of pixels that got significantly brighter (nameplates are bright text)
|
|
const int brightThresh = 30;
|
|
bool[] mask = new bool[w * h];
|
|
Parallel.For(0, h, y =>
|
|
{
|
|
int rowOff = y * stride;
|
|
for (int x = 0; x < w; x++)
|
|
{
|
|
int i = rowOff + x * 4;
|
|
int brighter = (curPx[i] - refPx[i]) + (curPx[i + 1] - refPx[i + 1]) + (curPx[i + 2] - refPx[i + 2]);
|
|
if (brighter > brightThresh)
|
|
mask[y * w + x] = true;
|
|
}
|
|
});
|
|
|
|
// Find connected clusters via row-scan: collect bounding boxes of bright regions
|
|
var boxes = FindBrightClusters(mask, w, h, minWidth: 40, minHeight: 10, maxGap: 8);
|
|
Log.Information("NameplateDiff: found {Count} bright clusters", boxes.Count);
|
|
|
|
if (boxes.Count == 0)
|
|
return Task.FromResult(new OcrResponse { Text = "", Lines = [] });
|
|
|
|
// OCR each cluster crop, accumulate results with screen-space coordinates
|
|
var allLines = new List<OcrLine>();
|
|
var allText = new List<string>();
|
|
|
|
foreach (var box in boxes)
|
|
{
|
|
// Pad the crop slightly
|
|
int pad = 4;
|
|
int cx = Math.Max(0, box.X - pad);
|
|
int cy = Math.Max(0, box.Y - pad);
|
|
int cw = Math.Min(w - cx, box.Width + pad * 2);
|
|
int ch = Math.Min(h - cy, box.Height + pad * 2);
|
|
|
|
using var crop = current.Clone(new Rectangle(cx, cy, cw, ch), PixelFormat.Format32bppArgb);
|
|
var ocrResult = _pythonBridge.OcrFromBitmap(crop);
|
|
|
|
// Offset word coordinates to screen space
|
|
foreach (var line in ocrResult.Lines)
|
|
{
|
|
foreach (var word in line.Words)
|
|
{
|
|
word.X += cx;
|
|
word.Y += cy;
|
|
}
|
|
allLines.Add(line);
|
|
allText.Add(line.Text);
|
|
}
|
|
}
|
|
|
|
return Task.FromResult(new OcrResponse
|
|
{
|
|
Text = string.Join("\n", allText),
|
|
Lines = allLines,
|
|
});
|
|
}
|
|
|
|
private static List<Rectangle> FindBrightClusters(bool[] mask, int w, int h, int minWidth, int minHeight, int maxGap)
|
|
{
|
|
// Row density
|
|
int[] rowCounts = new int[h];
|
|
for (int y = 0; y < h; y++)
|
|
for (int x = 0; x < w; x++)
|
|
if (mask[y * w + x]) rowCounts[y]++;
|
|
|
|
// Find horizontal bands of bright rows
|
|
int rowThresh = 3;
|
|
var bands = new List<(int Top, int Bottom)>();
|
|
int bandStart = -1, lastActive = -1;
|
|
for (int y = 0; y < h; y++)
|
|
{
|
|
if (rowCounts[y] >= rowThresh)
|
|
{
|
|
if (bandStart < 0) bandStart = y;
|
|
lastActive = y;
|
|
}
|
|
else if (bandStart >= 0 && y - lastActive > maxGap)
|
|
{
|
|
if (lastActive - bandStart + 1 >= minHeight)
|
|
bands.Add((bandStart, lastActive));
|
|
bandStart = -1;
|
|
}
|
|
}
|
|
if (bandStart >= 0 && lastActive - bandStart + 1 >= minHeight)
|
|
bands.Add((bandStart, lastActive));
|
|
|
|
// For each band, find column extents to get individual nameplate boxes
|
|
var boxes = new List<Rectangle>();
|
|
foreach (var (top, bottom) in bands)
|
|
{
|
|
int[] colCounts = new int[w];
|
|
for (int y = top; y <= bottom; y++)
|
|
for (int x = 0; x < w; x++)
|
|
if (mask[y * w + x]) colCounts[x]++;
|
|
|
|
int colThresh = 1;
|
|
int colStart = -1, lastCol = -1;
|
|
for (int x = 0; x < w; x++)
|
|
{
|
|
if (colCounts[x] >= colThresh)
|
|
{
|
|
if (colStart < 0) colStart = x;
|
|
lastCol = x;
|
|
}
|
|
else if (colStart >= 0 && x - lastCol > maxGap)
|
|
{
|
|
if (lastCol - colStart + 1 >= minWidth)
|
|
boxes.Add(new Rectangle(colStart, top, lastCol - colStart + 1, bottom - top + 1));
|
|
colStart = -1;
|
|
}
|
|
}
|
|
if (colStart >= 0 && lastCol - colStart + 1 >= minWidth)
|
|
boxes.Add(new Rectangle(colStart, top, lastCol - colStart + 1, bottom - top + 1));
|
|
}
|
|
|
|
return boxes;
|
|
}
|
|
|
|
public void Dispose() => _pythonBridge.Dispose();
|
|
|
|
// -- OCR text matching --
|
|
|
|
private static (int X, int Y)? FindWordInOcrResult(OcrResponse result, string needle, bool fuzzy = false)
|
|
{
|
|
var lower = needle.ToLowerInvariant();
|
|
const double fuzzyThreshold = 0.55;
|
|
|
|
if (lower.Contains(' '))
|
|
{
|
|
var needleNorm = Normalize(needle);
|
|
foreach (var line in result.Lines)
|
|
{
|
|
if (line.Words.Count == 0) continue;
|
|
if (line.Text.ToLowerInvariant().Contains(lower))
|
|
return LineBoundsCenter(line);
|
|
|
|
if (fuzzy)
|
|
{
|
|
var lineNorm = Normalize(line.Text);
|
|
var windowLen = needleNorm.Length;
|
|
for (var i = 0; i <= lineNorm.Length - windowLen + 2; i++)
|
|
{
|
|
var end = Math.Min(i + windowLen + 2, lineNorm.Length);
|
|
var window = lineNorm[i..end];
|
|
if (BigramSimilarity(needleNorm, window) >= fuzzyThreshold)
|
|
return LineBoundsCenter(line);
|
|
}
|
|
}
|
|
}
|
|
return null;
|
|
}
|
|
|
|
var needleN = Normalize(needle);
|
|
foreach (var line in result.Lines)
|
|
{
|
|
foreach (var word in line.Words)
|
|
{
|
|
if (word.Text.ToLowerInvariant().Contains(lower))
|
|
return (word.X + word.Width / 2, word.Y + word.Height / 2);
|
|
|
|
if (fuzzy && BigramSimilarity(needleN, Normalize(word.Text)) >= fuzzyThreshold)
|
|
return (word.X + word.Width / 2, word.Y + word.Height / 2);
|
|
}
|
|
}
|
|
return null;
|
|
}
|
|
|
|
private static (int X, int Y) LineBoundsCenter(OcrLine line)
|
|
{
|
|
var first = line.Words[0];
|
|
var last = line.Words[^1];
|
|
var x1 = first.X;
|
|
var y1 = first.Y;
|
|
var x2 = last.X + last.Width;
|
|
var y2 = line.Words.Max(w => w.Y + w.Height);
|
|
return ((x1 + x2) / 2, (y1 + y2) / 2);
|
|
}
|
|
|
|
private static string Normalize(string s) =>
|
|
new(s.ToLowerInvariant().Where(char.IsLetterOrDigit).ToArray());
|
|
|
|
private static double BigramSimilarity(string a, string b)
|
|
{
|
|
if (a.Length < 2 || b.Length < 2) return a == b ? 1 : 0;
|
|
var bigramsA = new Dictionary<(char, char), int>();
|
|
for (var i = 0; i < a.Length - 1; i++)
|
|
{
|
|
var bg = (a[i], a[i + 1]);
|
|
bigramsA[bg] = bigramsA.GetValueOrDefault(bg) + 1;
|
|
}
|
|
var matches = 0;
|
|
for (var i = 0; i < b.Length - 1; i++)
|
|
{
|
|
var bg = (b[i], b[i + 1]);
|
|
if (bigramsA.TryGetValue(bg, out var count) && count > 0)
|
|
{
|
|
matches++;
|
|
bigramsA[bg] = count - 1;
|
|
}
|
|
}
|
|
return 2.0 * matches / (a.Length - 1 + b.Length - 1);
|
|
}
|
|
}
|