diff --git a/debug_loot_capture.png b/debug_loot_capture.png index dfca7f9..f988200 100644 Binary files a/debug_loot_capture.png and b/debug_loot_capture.png differ diff --git a/debug_loot_detected.png b/debug_loot_detected.png index 62c371c..8ca895e 100644 Binary files a/debug_loot_detected.png and b/debug_loot_detected.png differ diff --git a/debug_loot_edges.png b/debug_loot_edges.png index 2593477..18aeb04 100644 Binary files a/debug_loot_edges.png and b/debug_loot_edges.png differ diff --git a/src/Poe2Trade.Bot/BossRunExecutor.cs b/src/Poe2Trade.Bot/BossRunExecutor.cs index ed01db6..9c818d4 100644 --- a/src/Poe2Trade.Bot/BossRunExecutor.cs +++ b/src/Poe2Trade.Bot/BossRunExecutor.cs @@ -389,13 +389,18 @@ public class BossRunExecutor : GameExecutor Log.Information("Fight area updated to ({X:F0},{Y:F0})", fightWorldX, fightWorldY); } - // Wait for death animation before looking for well - await Sleep(3000); + // Wait for death animation + loot settle, keep updating fight position from YOLO + var deathPos = await PollYoloDuringWait(3000); + if (deathPos != null) + { + fightWorldX = deathPos.Value.X; + fightWorldY = deathPos.Value.Y; + } // Walk to well and click the closest match to screen center Log.Information("Phase {Phase} done, walking to well", phase); await WalkToWorldPosition(wellWorldX, wellWorldY); - await Sleep(500); + await Sleep(1500); await ClickClosestTemplateToCenter(CathedralWellTemplate); await Sleep(200); @@ -666,6 +671,7 @@ public class BossRunExecutor : GameExecutor lastBossWorldPos = ( wp.X + (boss.Cx - screenCx) * screenToWorld, wp.Y + (boss.Cy - screenCy) * screenToWorld); + FightPosition = lastBossWorldPos; yoloLogCount++; if (yoloLogCount % 5 == 1) // log every 5th detection @@ -731,6 +737,37 @@ public class BossRunExecutor : GameExecutor } } + /// + /// Sleep for the given duration while polling YOLO to keep FightPosition updated + /// (e.g., during boss death animation when YOLO still detects the corpse/model). + /// Returns last detected position, or null if no detections. + /// + private async Task<(double X, double Y)?> PollYoloDuringWait(int durationMs) + { + const int screenCx = 1280; + const int screenCy = 660; + const double screenToWorld = 97.0 / 835.0; + (double X, double Y)? lastPos = null; + + var sw = Stopwatch.StartNew(); + while (sw.ElapsedMilliseconds < durationMs) + { + if (_stopped) break; + var snapshot = _bossDetector.Latest; + if (snapshot.Bosses.Count > 0) + { + var boss = snapshot.Bosses[0]; + var wp = _nav.WorldPosition; + lastPos = ( + wp.X + (boss.Cx - screenCx) * screenToWorld, + wp.Y + (boss.Cy - screenCy) * screenToWorld); + FightPosition = lastPos; + } + await Sleep(100); + } + return lastPos; + } + private async Task AttackAtPosition(int x, int y, int durationMs) { var (combatTask, cts) = StartCombatLoop(x, y, jitter: 20); diff --git a/src/Poe2Trade.Bot/BotOrchestrator.cs b/src/Poe2Trade.Bot/BotOrchestrator.cs index 138918f..b100ca1 100644 --- a/src/Poe2Trade.Bot/BotOrchestrator.cs +++ b/src/Poe2Trade.Bot/BotOrchestrator.cs @@ -48,6 +48,8 @@ public class BotOrchestrator : IAsyncDisposable public FrameSaver FrameSaver { get; } public LootDebugDetector LootDebugDetector { get; } public BossRunExecutor BossRunExecutor { get; } + public volatile bool ShowYoloOverlay = true; + public volatile bool ShowFightPositionOverlay = true; private readonly Dictionary _scrapExecutors = new(); // Events diff --git a/src/Poe2Trade.Core/ConfigStore.cs b/src/Poe2Trade.Core/ConfigStore.cs index 6d27a86..933c68c 100644 --- a/src/Poe2Trade.Core/ConfigStore.cs +++ b/src/Poe2Trade.Core/ConfigStore.cs @@ -35,6 +35,7 @@ public class SavedSettings public StashCalibration? StashCalibration { get; set; } public StashCalibration? ShopCalibration { get; set; } public bool ShowHudDebug { get; set; } + public string OcrEngine { get; set; } = "WinOCR"; public KulemakSettings Kulemak { get; set; } = new(); } diff --git a/src/Poe2Trade.Inventory/InventoryManager.cs b/src/Poe2Trade.Inventory/InventoryManager.cs index aa97111..744a80a 100644 --- a/src/Poe2Trade.Inventory/InventoryManager.cs +++ b/src/Poe2Trade.Inventory/InventoryManager.cs @@ -309,16 +309,37 @@ public class InventoryManager : IInventoryManager return null; } - // Single word + // Single word — prefer exact line match ("STASH") over substring ("Guild Stash") + (int X, int Y)? containsMatch = null; + (int X, int Y)? fuzzyMatch = null; + foreach (var line in result.Lines) - foreach (var word in line.Words) { - if (word.Text.Contains(needle, StringComparison.OrdinalIgnoreCase)) - return (word.X + word.Width / 2, word.Y + word.Height / 2); - if (fuzzy && BigramSimilarity(Normalize(needle), Normalize(word.Text)) >= 0.55) - return (word.X + word.Width / 2, word.Y + word.Height / 2); + // Exact line match — the entire line is just this word + if (line.Text.Equals(needle, StringComparison.OrdinalIgnoreCase) && line.Words.Count > 0) + { + var first = line.Words[0]; + var last = line.Words[^1]; + return ((first.X + last.X + last.Width) / 2, (first.Y + last.Y + last.Height) / 2); + } + + foreach (var word in line.Words) + { + if (word.Text.Equals(needle, StringComparison.OrdinalIgnoreCase)) + return (word.X + word.Width / 2, word.Y + word.Height / 2); + + containsMatch ??= word.Text.Contains(needle, StringComparison.OrdinalIgnoreCase) + ? (word.X + word.Width / 2, word.Y + word.Height / 2) + : null; + + if (fuzzy) + fuzzyMatch ??= BigramSimilarity(Normalize(needle), Normalize(word.Text)) >= 0.55 + ? (word.X + word.Width / 2, word.Y + word.Height / 2) + : null; + } } - return null; + + return containsMatch ?? fuzzyMatch; } private static string Normalize(string s) => diff --git a/src/Poe2Trade.Screen/DaemonTypes.cs b/src/Poe2Trade.Screen/DaemonTypes.cs index adc94c4..8aee843 100644 --- a/src/Poe2Trade.Screen/DaemonTypes.cs +++ b/src/Poe2Trade.Screen/DaemonTypes.cs @@ -104,25 +104,6 @@ public sealed class OcrParams [JsonPropertyName("softThreshold")] public bool SoftThreshold { get; set; } = false; - - // EasyOCR tuning - [JsonPropertyName("mergeGap")] - public int MergeGap { get; set; } = 0; - - [JsonPropertyName("linkThreshold")] - public double? LinkThreshold { get; set; } - - [JsonPropertyName("textThreshold")] - public double? TextThreshold { get; set; } - - [JsonPropertyName("lowText")] - public double? LowText { get; set; } - - [JsonPropertyName("widthThs")] - public double? WidthThs { get; set; } - - [JsonPropertyName("paragraph")] - public bool? Paragraph { get; set; } } public sealed class DiffOcrParams diff --git a/src/Poe2Trade.Screen/IOcrEngine.cs b/src/Poe2Trade.Screen/IOcrEngine.cs new file mode 100644 index 0000000..87c8397 --- /dev/null +++ b/src/Poe2Trade.Screen/IOcrEngine.cs @@ -0,0 +1,9 @@ +using System.Drawing; + +namespace Poe2Trade.Screen; + +public interface IOcrEngine : IDisposable +{ + string Name { get; } + OcrResponse Recognize(Bitmap bitmap); +} diff --git a/src/Poe2Trade.Screen/Ocr/EasyOcrEngine.cs b/src/Poe2Trade.Screen/Ocr/EasyOcrEngine.cs new file mode 100644 index 0000000..4ea9900 --- /dev/null +++ b/src/Poe2Trade.Screen/Ocr/EasyOcrEngine.cs @@ -0,0 +1,35 @@ +using System.Drawing; + +namespace Poe2Trade.Screen.Ocr; + +/// +/// OCR engine wrapping the Python EasyOCR daemon. +/// EasyOCR-specific tuning params live here, not in shared OcrParams. +/// +public sealed class EasyOcrEngine : IOcrEngine +{ + private readonly PythonOcrBridge _bridge = new(); + + public string Name => "EasyOCR"; + + // EasyOCR-specific tuning (formerly in OcrParams) + public int MergeGap { get; set; } + public double? LinkThreshold { get; set; } + public double? TextThreshold { get; set; } + public double? LowText { get; set; } + public double? WidthThs { get; set; } + public bool? Paragraph { get; set; } + + public OcrResponse Recognize(Bitmap bitmap) + { + return _bridge.OcrFromBitmap(bitmap, + mergeGap: MergeGap, + linkThreshold: LinkThreshold, + textThreshold: TextThreshold, + lowText: LowText, + widthThs: WidthThs, + paragraph: Paragraph); + } + + public void Dispose() => _bridge.Dispose(); +} diff --git a/src/Poe2Trade.Screen/Ocr/OcrEngineFactory.cs b/src/Poe2Trade.Screen/Ocr/OcrEngineFactory.cs new file mode 100644 index 0000000..774a419 --- /dev/null +++ b/src/Poe2Trade.Screen/Ocr/OcrEngineFactory.cs @@ -0,0 +1,18 @@ +using Serilog; + +namespace Poe2Trade.Screen.Ocr; + +public static class OcrEngineFactory +{ + public static IOcrEngine Create(string engineName) + { + Log.Information("Creating OCR engine: {Engine}", engineName); + + return engineName switch + { + "OneOCR" => new OneOcrEngine(Path.GetFullPath(Path.Combine("tools", "oneocr"))), + "EasyOCR" => new EasyOcrEngine(), + _ => new WinOcrEngine(), + }; + } +} diff --git a/src/Poe2Trade.Screen/Ocr/OneOcrEngine.cs b/src/Poe2Trade.Screen/Ocr/OneOcrEngine.cs new file mode 100644 index 0000000..c54e952 --- /dev/null +++ b/src/Poe2Trade.Screen/Ocr/OneOcrEngine.cs @@ -0,0 +1,268 @@ +using System.Drawing; +using System.Drawing.Imaging; +using System.Runtime.InteropServices; +using Serilog; + +namespace Poe2Trade.Screen.Ocr; + +/// +/// OCR engine using OneOCR (Windows 11 Snipping Tool's built-in engine). +/// Requires oneocr.dll, oneocr.onemodel, and onnxruntime.dll in the model directory. +/// +public sealed class OneOcrEngine : IOcrEngine +{ + public string Name => "OneOCR"; + + // Native handles (int64) — created once, reused per call + private long _pipeline; + private long _initOptions; + private long _processOptions; + + private static readonly byte[] ModelKey = "kj)TGtrK>f]b[Piow.gU+nC@s\"\"\"\"\"\"4"u8.ToArray(); + + public OneOcrEngine(string modelDir) + { + if (!Directory.Exists(modelDir)) + throw new DirectoryNotFoundException($"OneOCR model directory not found: {modelDir}"); + + var modelPath = Path.Combine(modelDir, "oneocr.onemodel"); + if (!File.Exists(modelPath)) + throw new FileNotFoundException($"OneOCR model not found: {modelPath}"); + + var dllPath = Path.Combine(modelDir, "oneocr.dll"); + if (!File.Exists(dllPath)) + throw new FileNotFoundException($"oneocr.dll not found: {dllPath}"); + + // Set DLL search directory so oneocr.dll can find onnxruntime.dll + SetDllDirectoryW(modelDir); + + // Load the DLL explicitly from modelDir + var hDll = NativeLibrary.Load(dllPath); + NativeLibrary.SetDllImportResolver(typeof(OneOcrEngine).Assembly, (name, _, _) => + name == Dll ? hDll : IntPtr.Zero); + + // Init options + CheckResult(Native.CreateOcrInitOptions(out _initOptions), "CreateOcrInitOptions"); + CheckResult(Native.OcrInitOptionsSetUseModelDelayLoad(_initOptions, 0), "SetUseModelDelayLoad"); + + // Pipeline (pass full model path as byte string) + CheckResult(Native.CreateOcrPipeline(modelPath, ModelKey, _initOptions, out _pipeline), "CreateOcrPipeline"); + + // Process options + CheckResult(Native.CreateOcrProcessOptions(out _processOptions), "CreateOcrProcessOptions"); + CheckResult(Native.OcrProcessOptionsSetMaxRecognitionLineCount(_processOptions, 1000), "SetMaxLineCount"); + + Log.Information("OneOcrEngine initialized (modelDir: {Dir})", modelDir); + } + + public OcrResponse Recognize(Bitmap bitmap) + { + // Convert bitmap to BGRA pixel data + var rect = new Rectangle(0, 0, bitmap.Width, bitmap.Height); + var bmpData = bitmap.LockBits(rect, ImageLockMode.ReadOnly, PixelFormat.Format32bppArgb); + + try + { + var imageStruct = new ImageStructure + { + Type = 3, // CV_8UC4 / BGRA + Width = bitmap.Width, + Height = bitmap.Height, + Reserved = 0, + Step = bmpData.Stride, + Data = bmpData.Scan0, + }; + + long result; + long rc = Native.RunOcrPipeline(_pipeline, ref imageStruct, _processOptions, out result); + if (rc != 0) + { + Log.Warning("OneOCR: RunOcrPipeline failed (code {Code})", rc); + return new OcrResponse { Text = "", Lines = [] }; + } + + try + { + return ParseResult(result); + } + finally + { + Native.ReleaseOcrResult(result); + } + } + finally + { + bitmap.UnlockBits(bmpData); + } + } + + private static OcrResponse ParseResult(long result) + { + long lineCount; + if (Native.GetOcrLineCount(result, out lineCount) != 0) + return new OcrResponse { Text = "", Lines = [] }; + + var lines = new List(); + + for (long i = 0; i < lineCount; i++) + { + long line; + if (Native.GetOcrLine(result, i, out line) != 0 || line == 0) continue; + + long wordCount; + if (Native.GetOcrLineWordCount(line, out wordCount) != 0) continue; + + var words = new List(); + + for (long j = 0; j < wordCount; j++) + { + long word; + if (Native.GetOcrWord(line, j, out word) != 0 || word == 0) continue; + + IntPtr contentPtr; + if (Native.GetOcrWordContent(word, out contentPtr) != 0) continue; + var text = Marshal.PtrToStringUTF8(contentPtr); + if (string.IsNullOrEmpty(text)) continue; + + // BoundingBox: 4 corners as floats → axis-aligned rect + IntPtr bboxPtr; + int x = 0, y = 0, w = 0, h = 0; + if (Native.GetOcrWordBoundingBox(word, out bboxPtr) == 0 && bboxPtr != IntPtr.Zero) + { + var bbox = Marshal.PtrToStructure(bboxPtr); + int x1 = (int)MathF.Min(MathF.Min(bbox.X1, bbox.X2), MathF.Min(bbox.X3, bbox.X4)); + int y1 = (int)MathF.Min(MathF.Min(bbox.Y1, bbox.Y2), MathF.Min(bbox.Y3, bbox.Y4)); + int x2 = (int)MathF.Max(MathF.Max(bbox.X1, bbox.X2), MathF.Max(bbox.X3, bbox.X4)); + int y2 = (int)MathF.Max(MathF.Max(bbox.Y1, bbox.Y2), MathF.Max(bbox.Y3, bbox.Y4)); + x = x1; y = y1; w = x2 - x1; h = y2 - y1; + } + + words.Add(new OcrWord { Text = text, X = x, Y = y, Width = w, Height = h }); + } + + if (words.Count > 0) + { + lines.Add(new OcrLine + { + Text = string.Join(" ", words.Select(wd => wd.Text)), + Words = words, + }); + } + } + + var fullText = string.Join("\n", lines.Select(l => l.Text)); + return new OcrResponse { Text = fullText, Lines = lines }; + } + + public void Dispose() + { + if (_processOptions != 0) { Native.ReleaseOcrProcessOptions(_processOptions); _processOptions = 0; } + if (_pipeline != 0) { Native.ReleaseOcrPipeline(_pipeline); _pipeline = 0; } + if (_initOptions != 0) { Native.ReleaseOcrInitOptions(_initOptions); _initOptions = 0; } + } + + private static void CheckResult(long rc, string func) + { + if (rc != 0) + throw new InvalidOperationException($"OneOCR {func} failed (code {rc})"); + } + + // -- Native structs -- + + // Matches C struct: { int32 t, int32 col, int32 row, int32 _unk, int64 step, int64 data_ptr } = 0x20 bytes + [StructLayout(LayoutKind.Sequential)] + private struct ImageStructure + { + public int Type; // 3 = CV_8UC4 (BGRA) + public int Width; + public int Height; + public int Reserved; + public long Step; // stride in bytes per row + public IntPtr Data; // pointer to BGRA pixel data + } + + [StructLayout(LayoutKind.Sequential)] + private struct BoundingBox + { + public float X1, Y1; + public float X2, Y2; + public float X3, Y3; + public float X4, Y4; + } + + // -- P/Invoke -- + + private const string Dll = "oneocr.dll"; + + [DllImport("kernel32.dll", CharSet = CharSet.Unicode, SetLastError = true)] + [return: MarshalAs(UnmanagedType.Bool)] + private static extern bool SetDllDirectoryW(string lpPathName); + + // All OneOCR functions return int64 error code (0 = success) and use out-pointer params for handles. + private static class Native + { + [DllImport(Dll, CallingConvention = CallingConvention.Cdecl)] + public static extern long CreateOcrInitOptions(out long options); + + [DllImport(Dll, CallingConvention = CallingConvention.Cdecl)] + public static extern long OcrInitOptionsSetUseModelDelayLoad(long options, byte flag); + + [DllImport(Dll, CallingConvention = CallingConvention.Cdecl)] + public static extern long CreateOcrPipeline( + [MarshalAs(UnmanagedType.LPUTF8Str)] string modelPath, + byte[] key, + long initOptions, + out long pipeline); + + [DllImport(Dll, CallingConvention = CallingConvention.Cdecl)] + public static extern long CreateOcrProcessOptions(out long options); + + [DllImport(Dll, CallingConvention = CallingConvention.Cdecl)] + public static extern long OcrProcessOptionsSetMaxRecognitionLineCount(long options, long maxLines); + + [DllImport(Dll, CallingConvention = CallingConvention.Cdecl)] + public static extern long RunOcrPipeline(long pipeline, ref ImageStructure image, long processOptions, out long result); + + [DllImport(Dll, CallingConvention = CallingConvention.Cdecl)] + public static extern long GetImageAngle(long result, out float angle); + + [DllImport(Dll, CallingConvention = CallingConvention.Cdecl)] + public static extern long GetOcrLineCount(long result, out long count); + + [DllImport(Dll, CallingConvention = CallingConvention.Cdecl)] + public static extern long GetOcrLine(long result, long index, out long line); + + [DllImport(Dll, CallingConvention = CallingConvention.Cdecl)] + public static extern long GetOcrLineContent(long line, out IntPtr content); + + [DllImport(Dll, CallingConvention = CallingConvention.Cdecl)] + public static extern long GetOcrLineBoundingBox(long line, out IntPtr bbox); + + [DllImport(Dll, CallingConvention = CallingConvention.Cdecl)] + public static extern long GetOcrLineWordCount(long line, out long count); + + [DllImport(Dll, CallingConvention = CallingConvention.Cdecl)] + public static extern long GetOcrWord(long line, long index, out long word); + + [DllImport(Dll, CallingConvention = CallingConvention.Cdecl)] + public static extern long GetOcrWordContent(long word, out IntPtr content); + + [DllImport(Dll, CallingConvention = CallingConvention.Cdecl)] + public static extern long GetOcrWordBoundingBox(long word, out IntPtr bbox); + + [DllImport(Dll, CallingConvention = CallingConvention.Cdecl)] + public static extern long GetOcrWordConfidence(long word, out float confidence); + + [DllImport(Dll, CallingConvention = CallingConvention.Cdecl)] + public static extern void ReleaseOcrResult(long result); + + [DllImport(Dll, CallingConvention = CallingConvention.Cdecl)] + public static extern void ReleaseOcrPipeline(long pipeline); + + [DllImport(Dll, CallingConvention = CallingConvention.Cdecl)] + public static extern void ReleaseOcrInitOptions(long options); + + [DllImport(Dll, CallingConvention = CallingConvention.Cdecl)] + public static extern void ReleaseOcrProcessOptions(long options); + } +} diff --git a/src/Poe2Trade.Screen/Ocr/WinOcrEngine.cs b/src/Poe2Trade.Screen/Ocr/WinOcrEngine.cs new file mode 100644 index 0000000..269e433 --- /dev/null +++ b/src/Poe2Trade.Screen/Ocr/WinOcrEngine.cs @@ -0,0 +1,67 @@ +using System.Drawing; +using System.Drawing.Imaging; +using Serilog; +using Windows.Graphics.Imaging; +using Windows.Media.Ocr; +using Windows.Storage.Streams; +using BitmapDecoder = Windows.Graphics.Imaging.BitmapDecoder; +using SdImageFormat = System.Drawing.Imaging.ImageFormat; + +namespace Poe2Trade.Screen.Ocr; + +public sealed class WinOcrEngine : IOcrEngine +{ + private readonly OcrEngine _engine; + + public string Name => "WinOCR"; + + public WinOcrEngine() + { + _engine = OcrEngine.TryCreateFromUserProfileLanguages() + ?? throw new InvalidOperationException("Windows OCR engine not available"); + Log.Information("WinOcrEngine initialized (language: {Lang})", _engine.RecognizerLanguage.DisplayName); + } + + public OcrResponse Recognize(Bitmap bitmap) + { + // Convert System.Drawing.Bitmap → PNG stream → WinRT SoftwareBitmap + using var ms = new MemoryStream(); + bitmap.Save(ms, SdImageFormat.Png); + ms.Position = 0; + + var stream = ms.AsRandomAccessStream(); + var decoder = BitmapDecoder.CreateAsync(stream).AsTask().GetAwaiter().GetResult(); + var softwareBitmap = decoder.GetSoftwareBitmapAsync().AsTask().GetAwaiter().GetResult(); + + var ocrResult = _engine.RecognizeAsync(softwareBitmap).AsTask().GetAwaiter().GetResult(); + + var lines = new List(); + foreach (var winLine in ocrResult.Lines) + { + var words = new List(); + foreach (var winWord in winLine.Words) + { + var r = winWord.BoundingRect; + words.Add(new OcrWord + { + Text = winWord.Text, + X = (int)r.X, + Y = (int)r.Y, + Width = (int)r.Width, + Height = (int)r.Height, + }); + } + + lines.Add(new OcrLine + { + Text = winLine.Text, + Words = words, + }); + } + + var fullText = string.Join("\n", lines.Select(l => l.Text)); + return new OcrResponse { Text = fullText, Lines = lines }; + } + + public void Dispose() { } +} diff --git a/src/Poe2Trade.Screen/PythonOcrBridge.cs b/src/Poe2Trade.Screen/PythonOcrBridge.cs index 6cd040e..de55714 100644 --- a/src/Poe2Trade.Screen/PythonOcrBridge.cs +++ b/src/Poe2Trade.Screen/PythonOcrBridge.cs @@ -37,7 +37,14 @@ class PythonOcrBridge : IDisposable /// /// Run OCR on a bitmap via the Python EasyOCR engine (base64 PNG over pipe). /// - public OcrResponse OcrFromBitmap(Bitmap bitmap, OcrParams? ocrParams = null) + public OcrResponse OcrFromBitmap( + Bitmap bitmap, + int mergeGap = 0, + double? linkThreshold = null, + double? textThreshold = null, + double? lowText = null, + double? widthThs = null, + bool? paragraph = null) { EnsureRunning(); @@ -45,26 +52,18 @@ class PythonOcrBridge : IDisposable bitmap.Save(ms, SdImageFormat.Png); var imageBase64 = Convert.ToBase64String(ms.ToArray()); - var pyReq = BuildPythonRequest(ocrParams); + var pyReq = new Dictionary { ["cmd"] = "ocr", ["engine"] = "easyocr" }; + if (mergeGap > 0) pyReq["mergeGap"] = mergeGap; + if (linkThreshold.HasValue) pyReq["linkThreshold"] = linkThreshold.Value; + if (textThreshold.HasValue) pyReq["textThreshold"] = textThreshold.Value; + if (lowText.HasValue) pyReq["lowText"] = lowText.Value; + if (widthThs.HasValue) pyReq["widthThs"] = widthThs.Value; + if (paragraph.HasValue) pyReq["paragraph"] = paragraph.Value; + pyReq["imageBase64"] = imageBase64; return SendPythonRequest(pyReq); } - private static Dictionary BuildPythonRequest(OcrParams? ocrParams) - { - var req = new Dictionary { ["cmd"] = "ocr", ["engine"] = "easyocr" }; - if (ocrParams == null) return req; - - if (ocrParams.MergeGap > 0) req["mergeGap"] = ocrParams.MergeGap; - if (ocrParams.LinkThreshold.HasValue) req["linkThreshold"] = ocrParams.LinkThreshold.Value; - if (ocrParams.TextThreshold.HasValue) req["textThreshold"] = ocrParams.TextThreshold.Value; - if (ocrParams.LowText.HasValue) req["lowText"] = ocrParams.LowText.Value; - if (ocrParams.WidthThs.HasValue) req["widthThs"] = ocrParams.WidthThs.Value; - if (ocrParams.Paragraph.HasValue) req["paragraph"] = ocrParams.Paragraph.Value; - - return req; - } - private OcrResponse SendPythonRequest(object pyReq) { var json = JsonSerializer.Serialize(pyReq, JsonOptions); diff --git a/src/Poe2Trade.Screen/ScreenReader.cs b/src/Poe2Trade.Screen/ScreenReader.cs index 8eeb2d9..0565479 100644 --- a/src/Poe2Trade.Screen/ScreenReader.cs +++ b/src/Poe2Trade.Screen/ScreenReader.cs @@ -16,13 +16,14 @@ public class ScreenReader : IScreenReader private readonly GridHandler _gridHandler = new(); private readonly TemplateMatchHandler _templateMatch = new(); private readonly EdgeCropHandler _edgeCrop = new(); - private readonly PythonOcrBridge _pythonBridge = new(); + private readonly IOcrEngine _ocrEngine; private bool _initialized; public GridReader Grid { get; } - public ScreenReader() + public ScreenReader(IOcrEngine ocrEngine) { + _ocrEngine = ocrEngine; Grid = new GridReader(_gridHandler); } @@ -59,16 +60,16 @@ public class ScreenReader : IScreenReader if (preprocess == "tophat") { using var processed = ImagePreprocessor.PreprocessForOcr(bitmap); - result = _pythonBridge.OcrFromBitmap(processed); + result = _ocrEngine.Recognize(processed); } else if (preprocess == "clahe") { using var processed = ImagePreprocessor.PreprocessClahe(bitmap); - result = _pythonBridge.OcrFromBitmap(processed); + result = _ocrEngine.Recognize(processed); } else { - result = _pythonBridge.OcrFromBitmap(bitmap); + result = _ocrEngine.Recognize(bitmap); } var allText = string.Join(" | ", result.Lines.Select(l => l.Text)); @@ -149,7 +150,7 @@ public class ScreenReader : IScreenReader ? ImagePreprocessor.PreprocessWithBackgroundSub(cropped, refCropped, ocr.DimPercentile, ocr.TextThresh, 1, ocr.SoftThreshold) : ImagePreprocessor.PreprocessForOcr(cropped, ocr.KernelSize, 1); - var ocrResult = _pythonBridge.OcrFromBitmap(processedBmp, ocr); + var ocrResult = _ocrEngine.Recognize(processedBmp); // Offset coordinates to screen space foreach (var line in ocrResult.Lines) @@ -299,7 +300,7 @@ public class ScreenReader : IScreenReader var ocrSw2 = System.Diagnostics.Stopwatch.StartNew(); OcrResponse ocrResult2; - try { ocrResult2 = _pythonBridge.OcrFromBitmap(crop); } + try { ocrResult2 = _ocrEngine.Recognize(crop); } catch (TimeoutException) { Log.Warning("NameplateDiffOcr: crop OCR timed out"); @@ -386,7 +387,7 @@ public class ScreenReader : IScreenReader OcrResponse ocrResult; try { - ocrResult = _pythonBridge.OcrFromBitmap(stitched); + ocrResult = _ocrEngine.Recognize(stitched); } catch (TimeoutException) { @@ -975,7 +976,7 @@ public class ScreenReader : IScreenReader return keep; } - public void Dispose() => _pythonBridge.Dispose(); + public void Dispose() => _ocrEngine.Dispose(); // -- OCR text matching -- diff --git a/src/Poe2Trade.Ui/App.axaml.cs b/src/Poe2Trade.Ui/App.axaml.cs index 96149e5..dd38da0 100644 --- a/src/Poe2Trade.Ui/App.axaml.cs +++ b/src/Poe2Trade.Ui/App.axaml.cs @@ -8,6 +8,7 @@ using Poe2Trade.Game; using Poe2Trade.GameLog; using Poe2Trade.Inventory; using Poe2Trade.Screen; +using Poe2Trade.Screen.Ocr; using Poe2Trade.Trade; using Poe2Trade.Ui.Overlay; using Poe2Trade.Ui.ViewModels; @@ -33,6 +34,11 @@ public partial class App : Application services.AddSingleton(sp => sp.GetRequiredService().Settings); // Services + services.AddSingleton(sp => + { + var settings = sp.GetRequiredService(); + return OcrEngineFactory.Create(settings.OcrEngine); + }); services.AddSingleton(); services.AddSingleton(); services.AddSingleton(sp => diff --git a/src/Poe2Trade.Ui/Overlay/D2dOverlay.cs b/src/Poe2Trade.Ui/Overlay/D2dOverlay.cs index f73716a..be6bc87 100644 --- a/src/Poe2Trade.Ui/Overlay/D2dOverlay.cs +++ b/src/Poe2Trade.Ui/Overlay/D2dOverlay.cs @@ -197,6 +197,8 @@ public sealed class D2dOverlay IsExploring: _bot.Navigation.IsExploring, ShowHudDebug: _bot.Store.Settings.ShowHudDebug, ShowLootDebug: showLoot, + ShowYolo: _bot.ShowYoloOverlay, + ShowFightPosition: _bot.ShowFightPositionOverlay, LootLabels: _bot.LootDebugDetector.Latest, FightPosition: _bot.BossRunExecutor.FightPosition, Fps: fps, diff --git a/src/Poe2Trade.Ui/Overlay/IOverlayLayer.cs b/src/Poe2Trade.Ui/Overlay/IOverlayLayer.cs index 415f5db..05c2a08 100644 --- a/src/Poe2Trade.Ui/Overlay/IOverlayLayer.cs +++ b/src/Poe2Trade.Ui/Overlay/IOverlayLayer.cs @@ -14,6 +14,8 @@ public record OverlayState( bool IsExploring, bool ShowHudDebug, bool ShowLootDebug, + bool ShowYolo, + bool ShowFightPosition, IReadOnlyList LootLabels, (double X, double Y)? FightPosition, double Fps, diff --git a/src/Poe2Trade.Ui/Overlay/Layers/D2dEnemyBoxLayer.cs b/src/Poe2Trade.Ui/Overlay/Layers/D2dEnemyBoxLayer.cs index 15f1424..a0c823c 100644 --- a/src/Poe2Trade.Ui/Overlay/Layers/D2dEnemyBoxLayer.cs +++ b/src/Poe2Trade.Ui/Overlay/Layers/D2dEnemyBoxLayer.cs @@ -28,63 +28,68 @@ internal sealed class D2dEnemyBoxLayer : ID2dOverlayLayer, IDisposable public void Draw(D2dRenderContext ctx, OverlayState state) { + if (!state.ShowYolo && !state.ShowFightPosition) return; + var rt = ctx.RenderTarget; - foreach (var enemy in state.Enemies) + if (state.ShowYolo) { - var confirmed = enemy.HealthBarConfirmed; - var boxBrush = confirmed ? ctx.Red : ctx.Yellow; - var rect = new RectangleF(enemy.X, enemy.Y, enemy.Width, enemy.Height); - rt.DrawRectangle(rect, boxBrush, 2f); - - // Confidence label above the box - var pctIndex = Math.Clamp((int)(enemy.Confidence * 100), 0, 100); - var layout = confirmed ? _confirmedLabels[pctIndex] : _unconfirmedLabels[pctIndex]; - var textBrush = confirmed ? ctx.Red : ctx.Yellow; - - var m = layout.Metrics; - var labelX = enemy.X; - var labelY = enemy.Y - m.Height - 2; - - rt.FillRectangle( - new RectangleF(labelX - 1, labelY - 1, m.Width + 2, m.Height + 2), - ctx.LabelBgBrush); - - rt.DrawTextLayout(new System.Numerics.Vector2(labelX, labelY), layout, textBrush); - } - - // Boss bounding boxes (cyan) — extrapolate position to compensate for inference delay - var now = DateTimeOffset.UtcNow.ToUnixTimeMilliseconds(); - var ageMs = (float)Math.Clamp(now - state.BossTimestampMs, 0, 60); - - foreach (var boss in state.Bosses) - { - float dx = boss.VxPerMs * ageMs; - float dy = boss.VyPerMs * ageMs; - var rect = new RectangleF(boss.X + dx, boss.Y + dy, boss.Width, boss.Height); - rt.DrawRectangle(rect, ctx.Cyan, 3f); - - var pct = Math.Clamp((int)(boss.Confidence * 100), 0, 100); - var key = $"{boss.ClassName} {pct}%"; - if (!_bossLabels.TryGetValue(key, out var layout)) + foreach (var enemy in state.Enemies) { - layout = _ctx.CreateTextLayout(key, _ctx.LabelFormat); - _bossLabels[key] = layout; + var confirmed = enemy.HealthBarConfirmed; + var boxBrush = confirmed ? ctx.Red : ctx.Yellow; + var rect = new RectangleF(enemy.X, enemy.Y, enemy.Width, enemy.Height); + rt.DrawRectangle(rect, boxBrush, 2f); + + // Confidence label above the box + var pctIndex = Math.Clamp((int)(enemy.Confidence * 100), 0, 100); + var layout = confirmed ? _confirmedLabels[pctIndex] : _unconfirmedLabels[pctIndex]; + var textBrush = confirmed ? ctx.Red : ctx.Yellow; + + var m = layout.Metrics; + var labelX = enemy.X; + var labelY = enemy.Y - m.Height - 2; + + rt.FillRectangle( + new RectangleF(labelX - 1, labelY - 1, m.Width + 2, m.Height + 2), + ctx.LabelBgBrush); + + rt.DrawTextLayout(new System.Numerics.Vector2(labelX, labelY), layout, textBrush); } - var m = layout.Metrics; - var labelX = boss.X + dx; - var labelY = boss.Y + dy - m.Height - 2; + // Boss bounding boxes (cyan) — extrapolate position to compensate for inference delay + var now = DateTimeOffset.UtcNow.ToUnixTimeMilliseconds(); + var ageMs = (float)Math.Clamp(now - state.BossTimestampMs, 0, 60); - rt.FillRectangle( - new RectangleF(labelX - 1, labelY - 1, m.Width + 2, m.Height + 2), - ctx.LabelBgBrush); + foreach (var boss in state.Bosses) + { + float dx = boss.VxPerMs * ageMs; + float dy = boss.VyPerMs * ageMs; + var rect = new RectangleF(boss.X + dx, boss.Y + dy, boss.Width, boss.Height); + rt.DrawRectangle(rect, ctx.Cyan, 3f); - rt.DrawTextLayout(new System.Numerics.Vector2(labelX, labelY), layout, ctx.Cyan); + var pct = Math.Clamp((int)(boss.Confidence * 100), 0, 100); + var key = $"{boss.ClassName} {pct}%"; + if (!_bossLabels.TryGetValue(key, out var layout)) + { + layout = _ctx.CreateTextLayout(key, _ctx.LabelFormat); + _bossLabels[key] = layout; + } + + var m = layout.Metrics; + var labelX = boss.X + dx; + var labelY = boss.Y + dy - m.Height - 2; + + rt.FillRectangle( + new RectangleF(labelX - 1, labelY - 1, m.Width + 2, m.Height + 2), + ctx.LabelBgBrush); + + rt.DrawTextLayout(new System.Numerics.Vector2(labelX, labelY), layout, ctx.Cyan); + } } // Fight position — red circle on screen where the fight area is - if (state.FightPosition is var (fx, fy)) + if (state.ShowFightPosition && state.FightPosition is var (fx, fy)) { const double worldToScreen = 835.0 / 97.0; // inverse of screenToWorld const int screenCx = 1280, screenCy = 660; // player character screen position diff --git a/src/Poe2Trade.Ui/ViewModels/DebugViewModel.cs b/src/Poe2Trade.Ui/ViewModels/DebugViewModel.cs index 8d4f5c1..ee40af9 100644 --- a/src/Poe2Trade.Ui/ViewModels/DebugViewModel.cs +++ b/src/Poe2Trade.Ui/ViewModels/DebugViewModel.cs @@ -23,12 +23,24 @@ public partial class DebugViewModel : ObservableObject [ObservableProperty] private decimal? _clickX; [ObservableProperty] private decimal? _clickY; [ObservableProperty] private bool _showLootDebug; + [ObservableProperty] private bool _showYolo = true; + [ObservableProperty] private bool _showFightPosition = true; partial void OnShowLootDebugChanged(bool value) { _bot.LootDebugDetector.Enabled = value; } + partial void OnShowYoloChanged(bool value) + { + _bot.ShowYoloOverlay = value; + } + + partial void OnShowFightPositionChanged(bool value) + { + _bot.ShowFightPositionOverlay = value; + } + public string[] GridLayoutNames { get; } = [ "inventory", "stash12", "stash12_folder", "stash24", diff --git a/src/Poe2Trade.Ui/ViewModels/SettingsViewModel.cs b/src/Poe2Trade.Ui/ViewModels/SettingsViewModel.cs index 680bf9b..571ae8b 100644 --- a/src/Poe2Trade.Ui/ViewModels/SettingsViewModel.cs +++ b/src/Poe2Trade.Ui/ViewModels/SettingsViewModel.cs @@ -20,11 +20,14 @@ public partial class SettingsViewModel : ObservableObject [ObservableProperty] private decimal? _betweenTradesDelayMs = 5000; [ObservableProperty] private bool _headless = true; [ObservableProperty] private bool _showHudDebug; + [ObservableProperty] private string _ocrEngine = "WinOCR"; [ObservableProperty] private bool _isSaved; [ObservableProperty] private string _calibrationStatus = ""; [ObservableProperty] private string _stashCalibratedAt = ""; [ObservableProperty] private string _shopCalibratedAt = ""; + public static string[] OcrEngineOptions { get; } = ["WinOCR", "OneOCR", "EasyOCR"]; + public ObservableCollection StashTabs { get; } = []; public ObservableCollection ShopTabs { get; } = []; @@ -46,6 +49,7 @@ public partial class SettingsViewModel : ObservableObject BetweenTradesDelayMs = s.BetweenTradesDelayMs; Headless = s.Headless; ShowHudDebug = s.ShowHudDebug; + OcrEngine = s.OcrEngine; } private void LoadTabs() @@ -97,6 +101,7 @@ public partial class SettingsViewModel : ObservableObject s.BetweenTradesDelayMs = (int)(BetweenTradesDelayMs ?? 5000); s.Headless = Headless; s.ShowHudDebug = ShowHudDebug; + s.OcrEngine = OcrEngine; }); IsSaved = true; @@ -210,4 +215,5 @@ public partial class SettingsViewModel : ObservableObject partial void OnBetweenTradesDelayMsChanged(decimal? value) => IsSaved = false; partial void OnHeadlessChanged(bool value) => IsSaved = false; partial void OnShowHudDebugChanged(bool value) => IsSaved = false; + partial void OnOcrEngineChanged(string value) => IsSaved = false; } diff --git a/src/Poe2Trade.Ui/Views/MainWindow.axaml b/src/Poe2Trade.Ui/Views/MainWindow.axaml index fe976d1..f8ec98d 100644 --- a/src/Poe2Trade.Ui/Views/MainWindow.axaml +++ b/src/Poe2Trade.Ui/Views/MainWindow.axaml @@ -341,6 +341,10 @@ + + @@ -455,6 +459,12 @@ + + + + +