diff --git a/debug_loot_capture.png b/debug_loot_capture.png
index dfca7f9..f988200 100644
Binary files a/debug_loot_capture.png and b/debug_loot_capture.png differ
diff --git a/debug_loot_detected.png b/debug_loot_detected.png
index 62c371c..8ca895e 100644
Binary files a/debug_loot_detected.png and b/debug_loot_detected.png differ
diff --git a/debug_loot_edges.png b/debug_loot_edges.png
index 2593477..18aeb04 100644
Binary files a/debug_loot_edges.png and b/debug_loot_edges.png differ
diff --git a/src/Poe2Trade.Bot/BossRunExecutor.cs b/src/Poe2Trade.Bot/BossRunExecutor.cs
index ed01db6..9c818d4 100644
--- a/src/Poe2Trade.Bot/BossRunExecutor.cs
+++ b/src/Poe2Trade.Bot/BossRunExecutor.cs
@@ -389,13 +389,18 @@ public class BossRunExecutor : GameExecutor
Log.Information("Fight area updated to ({X:F0},{Y:F0})", fightWorldX, fightWorldY);
}
- // Wait for death animation before looking for well
- await Sleep(3000);
+ // Wait for death animation + loot settle, keep updating fight position from YOLO
+ var deathPos = await PollYoloDuringWait(3000);
+ if (deathPos != null)
+ {
+ fightWorldX = deathPos.Value.X;
+ fightWorldY = deathPos.Value.Y;
+ }
// Walk to well and click the closest match to screen center
Log.Information("Phase {Phase} done, walking to well", phase);
await WalkToWorldPosition(wellWorldX, wellWorldY);
- await Sleep(500);
+ await Sleep(1500);
await ClickClosestTemplateToCenter(CathedralWellTemplate);
await Sleep(200);
@@ -666,6 +671,7 @@ public class BossRunExecutor : GameExecutor
lastBossWorldPos = (
wp.X + (boss.Cx - screenCx) * screenToWorld,
wp.Y + (boss.Cy - screenCy) * screenToWorld);
+ FightPosition = lastBossWorldPos;
yoloLogCount++;
if (yoloLogCount % 5 == 1) // log every 5th detection
@@ -731,6 +737,37 @@ public class BossRunExecutor : GameExecutor
}
}
+ ///
+ /// Sleep for the given duration while polling YOLO to keep FightPosition updated
+ /// (e.g., during boss death animation when YOLO still detects the corpse/model).
+ /// Returns last detected position, or null if no detections.
+ ///
+ private async Task<(double X, double Y)?> PollYoloDuringWait(int durationMs)
+ {
+ const int screenCx = 1280;
+ const int screenCy = 660;
+ const double screenToWorld = 97.0 / 835.0;
+ (double X, double Y)? lastPos = null;
+
+ var sw = Stopwatch.StartNew();
+ while (sw.ElapsedMilliseconds < durationMs)
+ {
+ if (_stopped) break;
+ var snapshot = _bossDetector.Latest;
+ if (snapshot.Bosses.Count > 0)
+ {
+ var boss = snapshot.Bosses[0];
+ var wp = _nav.WorldPosition;
+ lastPos = (
+ wp.X + (boss.Cx - screenCx) * screenToWorld,
+ wp.Y + (boss.Cy - screenCy) * screenToWorld);
+ FightPosition = lastPos;
+ }
+ await Sleep(100);
+ }
+ return lastPos;
+ }
+
private async Task AttackAtPosition(int x, int y, int durationMs)
{
var (combatTask, cts) = StartCombatLoop(x, y, jitter: 20);
diff --git a/src/Poe2Trade.Bot/BotOrchestrator.cs b/src/Poe2Trade.Bot/BotOrchestrator.cs
index 138918f..b100ca1 100644
--- a/src/Poe2Trade.Bot/BotOrchestrator.cs
+++ b/src/Poe2Trade.Bot/BotOrchestrator.cs
@@ -48,6 +48,8 @@ public class BotOrchestrator : IAsyncDisposable
public FrameSaver FrameSaver { get; }
public LootDebugDetector LootDebugDetector { get; }
public BossRunExecutor BossRunExecutor { get; }
+ public volatile bool ShowYoloOverlay = true;
+ public volatile bool ShowFightPositionOverlay = true;
private readonly Dictionary _scrapExecutors = new();
// Events
diff --git a/src/Poe2Trade.Core/ConfigStore.cs b/src/Poe2Trade.Core/ConfigStore.cs
index 6d27a86..933c68c 100644
--- a/src/Poe2Trade.Core/ConfigStore.cs
+++ b/src/Poe2Trade.Core/ConfigStore.cs
@@ -35,6 +35,7 @@ public class SavedSettings
public StashCalibration? StashCalibration { get; set; }
public StashCalibration? ShopCalibration { get; set; }
public bool ShowHudDebug { get; set; }
+ public string OcrEngine { get; set; } = "WinOCR";
public KulemakSettings Kulemak { get; set; } = new();
}
diff --git a/src/Poe2Trade.Inventory/InventoryManager.cs b/src/Poe2Trade.Inventory/InventoryManager.cs
index aa97111..744a80a 100644
--- a/src/Poe2Trade.Inventory/InventoryManager.cs
+++ b/src/Poe2Trade.Inventory/InventoryManager.cs
@@ -309,16 +309,37 @@ public class InventoryManager : IInventoryManager
return null;
}
- // Single word
+ // Single word — prefer exact line match ("STASH") over substring ("Guild Stash")
+ (int X, int Y)? containsMatch = null;
+ (int X, int Y)? fuzzyMatch = null;
+
foreach (var line in result.Lines)
- foreach (var word in line.Words)
{
- if (word.Text.Contains(needle, StringComparison.OrdinalIgnoreCase))
- return (word.X + word.Width / 2, word.Y + word.Height / 2);
- if (fuzzy && BigramSimilarity(Normalize(needle), Normalize(word.Text)) >= 0.55)
- return (word.X + word.Width / 2, word.Y + word.Height / 2);
+ // Exact line match — the entire line is just this word
+ if (line.Text.Equals(needle, StringComparison.OrdinalIgnoreCase) && line.Words.Count > 0)
+ {
+ var first = line.Words[0];
+ var last = line.Words[^1];
+ return ((first.X + last.X + last.Width) / 2, (first.Y + last.Y + last.Height) / 2);
+ }
+
+ foreach (var word in line.Words)
+ {
+ if (word.Text.Equals(needle, StringComparison.OrdinalIgnoreCase))
+ return (word.X + word.Width / 2, word.Y + word.Height / 2);
+
+ containsMatch ??= word.Text.Contains(needle, StringComparison.OrdinalIgnoreCase)
+ ? (word.X + word.Width / 2, word.Y + word.Height / 2)
+ : null;
+
+ if (fuzzy)
+ fuzzyMatch ??= BigramSimilarity(Normalize(needle), Normalize(word.Text)) >= 0.55
+ ? (word.X + word.Width / 2, word.Y + word.Height / 2)
+ : null;
+ }
}
- return null;
+
+ return containsMatch ?? fuzzyMatch;
}
private static string Normalize(string s) =>
diff --git a/src/Poe2Trade.Screen/DaemonTypes.cs b/src/Poe2Trade.Screen/DaemonTypes.cs
index adc94c4..8aee843 100644
--- a/src/Poe2Trade.Screen/DaemonTypes.cs
+++ b/src/Poe2Trade.Screen/DaemonTypes.cs
@@ -104,25 +104,6 @@ public sealed class OcrParams
[JsonPropertyName("softThreshold")]
public bool SoftThreshold { get; set; } = false;
-
- // EasyOCR tuning
- [JsonPropertyName("mergeGap")]
- public int MergeGap { get; set; } = 0;
-
- [JsonPropertyName("linkThreshold")]
- public double? LinkThreshold { get; set; }
-
- [JsonPropertyName("textThreshold")]
- public double? TextThreshold { get; set; }
-
- [JsonPropertyName("lowText")]
- public double? LowText { get; set; }
-
- [JsonPropertyName("widthThs")]
- public double? WidthThs { get; set; }
-
- [JsonPropertyName("paragraph")]
- public bool? Paragraph { get; set; }
}
public sealed class DiffOcrParams
diff --git a/src/Poe2Trade.Screen/IOcrEngine.cs b/src/Poe2Trade.Screen/IOcrEngine.cs
new file mode 100644
index 0000000..87c8397
--- /dev/null
+++ b/src/Poe2Trade.Screen/IOcrEngine.cs
@@ -0,0 +1,9 @@
+using System.Drawing;
+
+namespace Poe2Trade.Screen;
+
+public interface IOcrEngine : IDisposable
+{
+ string Name { get; }
+ OcrResponse Recognize(Bitmap bitmap);
+}
diff --git a/src/Poe2Trade.Screen/Ocr/EasyOcrEngine.cs b/src/Poe2Trade.Screen/Ocr/EasyOcrEngine.cs
new file mode 100644
index 0000000..4ea9900
--- /dev/null
+++ b/src/Poe2Trade.Screen/Ocr/EasyOcrEngine.cs
@@ -0,0 +1,35 @@
+using System.Drawing;
+
+namespace Poe2Trade.Screen.Ocr;
+
+///
+/// OCR engine wrapping the Python EasyOCR daemon.
+/// EasyOCR-specific tuning params live here, not in shared OcrParams.
+///
+public sealed class EasyOcrEngine : IOcrEngine
+{
+ private readonly PythonOcrBridge _bridge = new();
+
+ public string Name => "EasyOCR";
+
+ // EasyOCR-specific tuning (formerly in OcrParams)
+ public int MergeGap { get; set; }
+ public double? LinkThreshold { get; set; }
+ public double? TextThreshold { get; set; }
+ public double? LowText { get; set; }
+ public double? WidthThs { get; set; }
+ public bool? Paragraph { get; set; }
+
+ public OcrResponse Recognize(Bitmap bitmap)
+ {
+ return _bridge.OcrFromBitmap(bitmap,
+ mergeGap: MergeGap,
+ linkThreshold: LinkThreshold,
+ textThreshold: TextThreshold,
+ lowText: LowText,
+ widthThs: WidthThs,
+ paragraph: Paragraph);
+ }
+
+ public void Dispose() => _bridge.Dispose();
+}
diff --git a/src/Poe2Trade.Screen/Ocr/OcrEngineFactory.cs b/src/Poe2Trade.Screen/Ocr/OcrEngineFactory.cs
new file mode 100644
index 0000000..774a419
--- /dev/null
+++ b/src/Poe2Trade.Screen/Ocr/OcrEngineFactory.cs
@@ -0,0 +1,18 @@
+using Serilog;
+
+namespace Poe2Trade.Screen.Ocr;
+
+public static class OcrEngineFactory
+{
+ public static IOcrEngine Create(string engineName)
+ {
+ Log.Information("Creating OCR engine: {Engine}", engineName);
+
+ return engineName switch
+ {
+ "OneOCR" => new OneOcrEngine(Path.GetFullPath(Path.Combine("tools", "oneocr"))),
+ "EasyOCR" => new EasyOcrEngine(),
+ _ => new WinOcrEngine(),
+ };
+ }
+}
diff --git a/src/Poe2Trade.Screen/Ocr/OneOcrEngine.cs b/src/Poe2Trade.Screen/Ocr/OneOcrEngine.cs
new file mode 100644
index 0000000..c54e952
--- /dev/null
+++ b/src/Poe2Trade.Screen/Ocr/OneOcrEngine.cs
@@ -0,0 +1,268 @@
+using System.Drawing;
+using System.Drawing.Imaging;
+using System.Runtime.InteropServices;
+using Serilog;
+
+namespace Poe2Trade.Screen.Ocr;
+
+///
+/// OCR engine using OneOCR (Windows 11 Snipping Tool's built-in engine).
+/// Requires oneocr.dll, oneocr.onemodel, and onnxruntime.dll in the model directory.
+///
+public sealed class OneOcrEngine : IOcrEngine
+{
+ public string Name => "OneOCR";
+
+ // Native handles (int64) — created once, reused per call
+ private long _pipeline;
+ private long _initOptions;
+ private long _processOptions;
+
+ private static readonly byte[] ModelKey = "kj)TGtrK>f]b[Piow.gU+nC@s\"\"\"\"\"\"4"u8.ToArray();
+
+ public OneOcrEngine(string modelDir)
+ {
+ if (!Directory.Exists(modelDir))
+ throw new DirectoryNotFoundException($"OneOCR model directory not found: {modelDir}");
+
+ var modelPath = Path.Combine(modelDir, "oneocr.onemodel");
+ if (!File.Exists(modelPath))
+ throw new FileNotFoundException($"OneOCR model not found: {modelPath}");
+
+ var dllPath = Path.Combine(modelDir, "oneocr.dll");
+ if (!File.Exists(dllPath))
+ throw new FileNotFoundException($"oneocr.dll not found: {dllPath}");
+
+ // Set DLL search directory so oneocr.dll can find onnxruntime.dll
+ SetDllDirectoryW(modelDir);
+
+ // Load the DLL explicitly from modelDir
+ var hDll = NativeLibrary.Load(dllPath);
+ NativeLibrary.SetDllImportResolver(typeof(OneOcrEngine).Assembly, (name, _, _) =>
+ name == Dll ? hDll : IntPtr.Zero);
+
+ // Init options
+ CheckResult(Native.CreateOcrInitOptions(out _initOptions), "CreateOcrInitOptions");
+ CheckResult(Native.OcrInitOptionsSetUseModelDelayLoad(_initOptions, 0), "SetUseModelDelayLoad");
+
+ // Pipeline (pass full model path as byte string)
+ CheckResult(Native.CreateOcrPipeline(modelPath, ModelKey, _initOptions, out _pipeline), "CreateOcrPipeline");
+
+ // Process options
+ CheckResult(Native.CreateOcrProcessOptions(out _processOptions), "CreateOcrProcessOptions");
+ CheckResult(Native.OcrProcessOptionsSetMaxRecognitionLineCount(_processOptions, 1000), "SetMaxLineCount");
+
+ Log.Information("OneOcrEngine initialized (modelDir: {Dir})", modelDir);
+ }
+
+ public OcrResponse Recognize(Bitmap bitmap)
+ {
+ // Convert bitmap to BGRA pixel data
+ var rect = new Rectangle(0, 0, bitmap.Width, bitmap.Height);
+ var bmpData = bitmap.LockBits(rect, ImageLockMode.ReadOnly, PixelFormat.Format32bppArgb);
+
+ try
+ {
+ var imageStruct = new ImageStructure
+ {
+ Type = 3, // CV_8UC4 / BGRA
+ Width = bitmap.Width,
+ Height = bitmap.Height,
+ Reserved = 0,
+ Step = bmpData.Stride,
+ Data = bmpData.Scan0,
+ };
+
+ long result;
+ long rc = Native.RunOcrPipeline(_pipeline, ref imageStruct, _processOptions, out result);
+ if (rc != 0)
+ {
+ Log.Warning("OneOCR: RunOcrPipeline failed (code {Code})", rc);
+ return new OcrResponse { Text = "", Lines = [] };
+ }
+
+ try
+ {
+ return ParseResult(result);
+ }
+ finally
+ {
+ Native.ReleaseOcrResult(result);
+ }
+ }
+ finally
+ {
+ bitmap.UnlockBits(bmpData);
+ }
+ }
+
+ private static OcrResponse ParseResult(long result)
+ {
+ long lineCount;
+ if (Native.GetOcrLineCount(result, out lineCount) != 0)
+ return new OcrResponse { Text = "", Lines = [] };
+
+ var lines = new List();
+
+ for (long i = 0; i < lineCount; i++)
+ {
+ long line;
+ if (Native.GetOcrLine(result, i, out line) != 0 || line == 0) continue;
+
+ long wordCount;
+ if (Native.GetOcrLineWordCount(line, out wordCount) != 0) continue;
+
+ var words = new List();
+
+ for (long j = 0; j < wordCount; j++)
+ {
+ long word;
+ if (Native.GetOcrWord(line, j, out word) != 0 || word == 0) continue;
+
+ IntPtr contentPtr;
+ if (Native.GetOcrWordContent(word, out contentPtr) != 0) continue;
+ var text = Marshal.PtrToStringUTF8(contentPtr);
+ if (string.IsNullOrEmpty(text)) continue;
+
+ // BoundingBox: 4 corners as floats → axis-aligned rect
+ IntPtr bboxPtr;
+ int x = 0, y = 0, w = 0, h = 0;
+ if (Native.GetOcrWordBoundingBox(word, out bboxPtr) == 0 && bboxPtr != IntPtr.Zero)
+ {
+ var bbox = Marshal.PtrToStructure(bboxPtr);
+ int x1 = (int)MathF.Min(MathF.Min(bbox.X1, bbox.X2), MathF.Min(bbox.X3, bbox.X4));
+ int y1 = (int)MathF.Min(MathF.Min(bbox.Y1, bbox.Y2), MathF.Min(bbox.Y3, bbox.Y4));
+ int x2 = (int)MathF.Max(MathF.Max(bbox.X1, bbox.X2), MathF.Max(bbox.X3, bbox.X4));
+ int y2 = (int)MathF.Max(MathF.Max(bbox.Y1, bbox.Y2), MathF.Max(bbox.Y3, bbox.Y4));
+ x = x1; y = y1; w = x2 - x1; h = y2 - y1;
+ }
+
+ words.Add(new OcrWord { Text = text, X = x, Y = y, Width = w, Height = h });
+ }
+
+ if (words.Count > 0)
+ {
+ lines.Add(new OcrLine
+ {
+ Text = string.Join(" ", words.Select(wd => wd.Text)),
+ Words = words,
+ });
+ }
+ }
+
+ var fullText = string.Join("\n", lines.Select(l => l.Text));
+ return new OcrResponse { Text = fullText, Lines = lines };
+ }
+
+ public void Dispose()
+ {
+ if (_processOptions != 0) { Native.ReleaseOcrProcessOptions(_processOptions); _processOptions = 0; }
+ if (_pipeline != 0) { Native.ReleaseOcrPipeline(_pipeline); _pipeline = 0; }
+ if (_initOptions != 0) { Native.ReleaseOcrInitOptions(_initOptions); _initOptions = 0; }
+ }
+
+ private static void CheckResult(long rc, string func)
+ {
+ if (rc != 0)
+ throw new InvalidOperationException($"OneOCR {func} failed (code {rc})");
+ }
+
+ // -- Native structs --
+
+ // Matches C struct: { int32 t, int32 col, int32 row, int32 _unk, int64 step, int64 data_ptr } = 0x20 bytes
+ [StructLayout(LayoutKind.Sequential)]
+ private struct ImageStructure
+ {
+ public int Type; // 3 = CV_8UC4 (BGRA)
+ public int Width;
+ public int Height;
+ public int Reserved;
+ public long Step; // stride in bytes per row
+ public IntPtr Data; // pointer to BGRA pixel data
+ }
+
+ [StructLayout(LayoutKind.Sequential)]
+ private struct BoundingBox
+ {
+ public float X1, Y1;
+ public float X2, Y2;
+ public float X3, Y3;
+ public float X4, Y4;
+ }
+
+ // -- P/Invoke --
+
+ private const string Dll = "oneocr.dll";
+
+ [DllImport("kernel32.dll", CharSet = CharSet.Unicode, SetLastError = true)]
+ [return: MarshalAs(UnmanagedType.Bool)]
+ private static extern bool SetDllDirectoryW(string lpPathName);
+
+ // All OneOCR functions return int64 error code (0 = success) and use out-pointer params for handles.
+ private static class Native
+ {
+ [DllImport(Dll, CallingConvention = CallingConvention.Cdecl)]
+ public static extern long CreateOcrInitOptions(out long options);
+
+ [DllImport(Dll, CallingConvention = CallingConvention.Cdecl)]
+ public static extern long OcrInitOptionsSetUseModelDelayLoad(long options, byte flag);
+
+ [DllImport(Dll, CallingConvention = CallingConvention.Cdecl)]
+ public static extern long CreateOcrPipeline(
+ [MarshalAs(UnmanagedType.LPUTF8Str)] string modelPath,
+ byte[] key,
+ long initOptions,
+ out long pipeline);
+
+ [DllImport(Dll, CallingConvention = CallingConvention.Cdecl)]
+ public static extern long CreateOcrProcessOptions(out long options);
+
+ [DllImport(Dll, CallingConvention = CallingConvention.Cdecl)]
+ public static extern long OcrProcessOptionsSetMaxRecognitionLineCount(long options, long maxLines);
+
+ [DllImport(Dll, CallingConvention = CallingConvention.Cdecl)]
+ public static extern long RunOcrPipeline(long pipeline, ref ImageStructure image, long processOptions, out long result);
+
+ [DllImport(Dll, CallingConvention = CallingConvention.Cdecl)]
+ public static extern long GetImageAngle(long result, out float angle);
+
+ [DllImport(Dll, CallingConvention = CallingConvention.Cdecl)]
+ public static extern long GetOcrLineCount(long result, out long count);
+
+ [DllImport(Dll, CallingConvention = CallingConvention.Cdecl)]
+ public static extern long GetOcrLine(long result, long index, out long line);
+
+ [DllImport(Dll, CallingConvention = CallingConvention.Cdecl)]
+ public static extern long GetOcrLineContent(long line, out IntPtr content);
+
+ [DllImport(Dll, CallingConvention = CallingConvention.Cdecl)]
+ public static extern long GetOcrLineBoundingBox(long line, out IntPtr bbox);
+
+ [DllImport(Dll, CallingConvention = CallingConvention.Cdecl)]
+ public static extern long GetOcrLineWordCount(long line, out long count);
+
+ [DllImport(Dll, CallingConvention = CallingConvention.Cdecl)]
+ public static extern long GetOcrWord(long line, long index, out long word);
+
+ [DllImport(Dll, CallingConvention = CallingConvention.Cdecl)]
+ public static extern long GetOcrWordContent(long word, out IntPtr content);
+
+ [DllImport(Dll, CallingConvention = CallingConvention.Cdecl)]
+ public static extern long GetOcrWordBoundingBox(long word, out IntPtr bbox);
+
+ [DllImport(Dll, CallingConvention = CallingConvention.Cdecl)]
+ public static extern long GetOcrWordConfidence(long word, out float confidence);
+
+ [DllImport(Dll, CallingConvention = CallingConvention.Cdecl)]
+ public static extern void ReleaseOcrResult(long result);
+
+ [DllImport(Dll, CallingConvention = CallingConvention.Cdecl)]
+ public static extern void ReleaseOcrPipeline(long pipeline);
+
+ [DllImport(Dll, CallingConvention = CallingConvention.Cdecl)]
+ public static extern void ReleaseOcrInitOptions(long options);
+
+ [DllImport(Dll, CallingConvention = CallingConvention.Cdecl)]
+ public static extern void ReleaseOcrProcessOptions(long options);
+ }
+}
diff --git a/src/Poe2Trade.Screen/Ocr/WinOcrEngine.cs b/src/Poe2Trade.Screen/Ocr/WinOcrEngine.cs
new file mode 100644
index 0000000..269e433
--- /dev/null
+++ b/src/Poe2Trade.Screen/Ocr/WinOcrEngine.cs
@@ -0,0 +1,67 @@
+using System.Drawing;
+using System.Drawing.Imaging;
+using Serilog;
+using Windows.Graphics.Imaging;
+using Windows.Media.Ocr;
+using Windows.Storage.Streams;
+using BitmapDecoder = Windows.Graphics.Imaging.BitmapDecoder;
+using SdImageFormat = System.Drawing.Imaging.ImageFormat;
+
+namespace Poe2Trade.Screen.Ocr;
+
+public sealed class WinOcrEngine : IOcrEngine
+{
+ private readonly OcrEngine _engine;
+
+ public string Name => "WinOCR";
+
+ public WinOcrEngine()
+ {
+ _engine = OcrEngine.TryCreateFromUserProfileLanguages()
+ ?? throw new InvalidOperationException("Windows OCR engine not available");
+ Log.Information("WinOcrEngine initialized (language: {Lang})", _engine.RecognizerLanguage.DisplayName);
+ }
+
+ public OcrResponse Recognize(Bitmap bitmap)
+ {
+ // Convert System.Drawing.Bitmap → PNG stream → WinRT SoftwareBitmap
+ using var ms = new MemoryStream();
+ bitmap.Save(ms, SdImageFormat.Png);
+ ms.Position = 0;
+
+ var stream = ms.AsRandomAccessStream();
+ var decoder = BitmapDecoder.CreateAsync(stream).AsTask().GetAwaiter().GetResult();
+ var softwareBitmap = decoder.GetSoftwareBitmapAsync().AsTask().GetAwaiter().GetResult();
+
+ var ocrResult = _engine.RecognizeAsync(softwareBitmap).AsTask().GetAwaiter().GetResult();
+
+ var lines = new List();
+ foreach (var winLine in ocrResult.Lines)
+ {
+ var words = new List();
+ foreach (var winWord in winLine.Words)
+ {
+ var r = winWord.BoundingRect;
+ words.Add(new OcrWord
+ {
+ Text = winWord.Text,
+ X = (int)r.X,
+ Y = (int)r.Y,
+ Width = (int)r.Width,
+ Height = (int)r.Height,
+ });
+ }
+
+ lines.Add(new OcrLine
+ {
+ Text = winLine.Text,
+ Words = words,
+ });
+ }
+
+ var fullText = string.Join("\n", lines.Select(l => l.Text));
+ return new OcrResponse { Text = fullText, Lines = lines };
+ }
+
+ public void Dispose() { }
+}
diff --git a/src/Poe2Trade.Screen/PythonOcrBridge.cs b/src/Poe2Trade.Screen/PythonOcrBridge.cs
index 6cd040e..de55714 100644
--- a/src/Poe2Trade.Screen/PythonOcrBridge.cs
+++ b/src/Poe2Trade.Screen/PythonOcrBridge.cs
@@ -37,7 +37,14 @@ class PythonOcrBridge : IDisposable
///
/// Run OCR on a bitmap via the Python EasyOCR engine (base64 PNG over pipe).
///
- public OcrResponse OcrFromBitmap(Bitmap bitmap, OcrParams? ocrParams = null)
+ public OcrResponse OcrFromBitmap(
+ Bitmap bitmap,
+ int mergeGap = 0,
+ double? linkThreshold = null,
+ double? textThreshold = null,
+ double? lowText = null,
+ double? widthThs = null,
+ bool? paragraph = null)
{
EnsureRunning();
@@ -45,26 +52,18 @@ class PythonOcrBridge : IDisposable
bitmap.Save(ms, SdImageFormat.Png);
var imageBase64 = Convert.ToBase64String(ms.ToArray());
- var pyReq = BuildPythonRequest(ocrParams);
+ var pyReq = new Dictionary { ["cmd"] = "ocr", ["engine"] = "easyocr" };
+ if (mergeGap > 0) pyReq["mergeGap"] = mergeGap;
+ if (linkThreshold.HasValue) pyReq["linkThreshold"] = linkThreshold.Value;
+ if (textThreshold.HasValue) pyReq["textThreshold"] = textThreshold.Value;
+ if (lowText.HasValue) pyReq["lowText"] = lowText.Value;
+ if (widthThs.HasValue) pyReq["widthThs"] = widthThs.Value;
+ if (paragraph.HasValue) pyReq["paragraph"] = paragraph.Value;
+
pyReq["imageBase64"] = imageBase64;
return SendPythonRequest(pyReq);
}
- private static Dictionary BuildPythonRequest(OcrParams? ocrParams)
- {
- var req = new Dictionary { ["cmd"] = "ocr", ["engine"] = "easyocr" };
- if (ocrParams == null) return req;
-
- if (ocrParams.MergeGap > 0) req["mergeGap"] = ocrParams.MergeGap;
- if (ocrParams.LinkThreshold.HasValue) req["linkThreshold"] = ocrParams.LinkThreshold.Value;
- if (ocrParams.TextThreshold.HasValue) req["textThreshold"] = ocrParams.TextThreshold.Value;
- if (ocrParams.LowText.HasValue) req["lowText"] = ocrParams.LowText.Value;
- if (ocrParams.WidthThs.HasValue) req["widthThs"] = ocrParams.WidthThs.Value;
- if (ocrParams.Paragraph.HasValue) req["paragraph"] = ocrParams.Paragraph.Value;
-
- return req;
- }
-
private OcrResponse SendPythonRequest(object pyReq)
{
var json = JsonSerializer.Serialize(pyReq, JsonOptions);
diff --git a/src/Poe2Trade.Screen/ScreenReader.cs b/src/Poe2Trade.Screen/ScreenReader.cs
index 8eeb2d9..0565479 100644
--- a/src/Poe2Trade.Screen/ScreenReader.cs
+++ b/src/Poe2Trade.Screen/ScreenReader.cs
@@ -16,13 +16,14 @@ public class ScreenReader : IScreenReader
private readonly GridHandler _gridHandler = new();
private readonly TemplateMatchHandler _templateMatch = new();
private readonly EdgeCropHandler _edgeCrop = new();
- private readonly PythonOcrBridge _pythonBridge = new();
+ private readonly IOcrEngine _ocrEngine;
private bool _initialized;
public GridReader Grid { get; }
- public ScreenReader()
+ public ScreenReader(IOcrEngine ocrEngine)
{
+ _ocrEngine = ocrEngine;
Grid = new GridReader(_gridHandler);
}
@@ -59,16 +60,16 @@ public class ScreenReader : IScreenReader
if (preprocess == "tophat")
{
using var processed = ImagePreprocessor.PreprocessForOcr(bitmap);
- result = _pythonBridge.OcrFromBitmap(processed);
+ result = _ocrEngine.Recognize(processed);
}
else if (preprocess == "clahe")
{
using var processed = ImagePreprocessor.PreprocessClahe(bitmap);
- result = _pythonBridge.OcrFromBitmap(processed);
+ result = _ocrEngine.Recognize(processed);
}
else
{
- result = _pythonBridge.OcrFromBitmap(bitmap);
+ result = _ocrEngine.Recognize(bitmap);
}
var allText = string.Join(" | ", result.Lines.Select(l => l.Text));
@@ -149,7 +150,7 @@ public class ScreenReader : IScreenReader
? ImagePreprocessor.PreprocessWithBackgroundSub(cropped, refCropped, ocr.DimPercentile, ocr.TextThresh, 1, ocr.SoftThreshold)
: ImagePreprocessor.PreprocessForOcr(cropped, ocr.KernelSize, 1);
- var ocrResult = _pythonBridge.OcrFromBitmap(processedBmp, ocr);
+ var ocrResult = _ocrEngine.Recognize(processedBmp);
// Offset coordinates to screen space
foreach (var line in ocrResult.Lines)
@@ -299,7 +300,7 @@ public class ScreenReader : IScreenReader
var ocrSw2 = System.Diagnostics.Stopwatch.StartNew();
OcrResponse ocrResult2;
- try { ocrResult2 = _pythonBridge.OcrFromBitmap(crop); }
+ try { ocrResult2 = _ocrEngine.Recognize(crop); }
catch (TimeoutException)
{
Log.Warning("NameplateDiffOcr: crop OCR timed out");
@@ -386,7 +387,7 @@ public class ScreenReader : IScreenReader
OcrResponse ocrResult;
try
{
- ocrResult = _pythonBridge.OcrFromBitmap(stitched);
+ ocrResult = _ocrEngine.Recognize(stitched);
}
catch (TimeoutException)
{
@@ -975,7 +976,7 @@ public class ScreenReader : IScreenReader
return keep;
}
- public void Dispose() => _pythonBridge.Dispose();
+ public void Dispose() => _ocrEngine.Dispose();
// -- OCR text matching --
diff --git a/src/Poe2Trade.Ui/App.axaml.cs b/src/Poe2Trade.Ui/App.axaml.cs
index 96149e5..dd38da0 100644
--- a/src/Poe2Trade.Ui/App.axaml.cs
+++ b/src/Poe2Trade.Ui/App.axaml.cs
@@ -8,6 +8,7 @@ using Poe2Trade.Game;
using Poe2Trade.GameLog;
using Poe2Trade.Inventory;
using Poe2Trade.Screen;
+using Poe2Trade.Screen.Ocr;
using Poe2Trade.Trade;
using Poe2Trade.Ui.Overlay;
using Poe2Trade.Ui.ViewModels;
@@ -33,6 +34,11 @@ public partial class App : Application
services.AddSingleton(sp => sp.GetRequiredService().Settings);
// Services
+ services.AddSingleton(sp =>
+ {
+ var settings = sp.GetRequiredService();
+ return OcrEngineFactory.Create(settings.OcrEngine);
+ });
services.AddSingleton();
services.AddSingleton();
services.AddSingleton(sp =>
diff --git a/src/Poe2Trade.Ui/Overlay/D2dOverlay.cs b/src/Poe2Trade.Ui/Overlay/D2dOverlay.cs
index f73716a..be6bc87 100644
--- a/src/Poe2Trade.Ui/Overlay/D2dOverlay.cs
+++ b/src/Poe2Trade.Ui/Overlay/D2dOverlay.cs
@@ -197,6 +197,8 @@ public sealed class D2dOverlay
IsExploring: _bot.Navigation.IsExploring,
ShowHudDebug: _bot.Store.Settings.ShowHudDebug,
ShowLootDebug: showLoot,
+ ShowYolo: _bot.ShowYoloOverlay,
+ ShowFightPosition: _bot.ShowFightPositionOverlay,
LootLabels: _bot.LootDebugDetector.Latest,
FightPosition: _bot.BossRunExecutor.FightPosition,
Fps: fps,
diff --git a/src/Poe2Trade.Ui/Overlay/IOverlayLayer.cs b/src/Poe2Trade.Ui/Overlay/IOverlayLayer.cs
index 415f5db..05c2a08 100644
--- a/src/Poe2Trade.Ui/Overlay/IOverlayLayer.cs
+++ b/src/Poe2Trade.Ui/Overlay/IOverlayLayer.cs
@@ -14,6 +14,8 @@ public record OverlayState(
bool IsExploring,
bool ShowHudDebug,
bool ShowLootDebug,
+ bool ShowYolo,
+ bool ShowFightPosition,
IReadOnlyList LootLabels,
(double X, double Y)? FightPosition,
double Fps,
diff --git a/src/Poe2Trade.Ui/Overlay/Layers/D2dEnemyBoxLayer.cs b/src/Poe2Trade.Ui/Overlay/Layers/D2dEnemyBoxLayer.cs
index 15f1424..a0c823c 100644
--- a/src/Poe2Trade.Ui/Overlay/Layers/D2dEnemyBoxLayer.cs
+++ b/src/Poe2Trade.Ui/Overlay/Layers/D2dEnemyBoxLayer.cs
@@ -28,63 +28,68 @@ internal sealed class D2dEnemyBoxLayer : ID2dOverlayLayer, IDisposable
public void Draw(D2dRenderContext ctx, OverlayState state)
{
+ if (!state.ShowYolo && !state.ShowFightPosition) return;
+
var rt = ctx.RenderTarget;
- foreach (var enemy in state.Enemies)
+ if (state.ShowYolo)
{
- var confirmed = enemy.HealthBarConfirmed;
- var boxBrush = confirmed ? ctx.Red : ctx.Yellow;
- var rect = new RectangleF(enemy.X, enemy.Y, enemy.Width, enemy.Height);
- rt.DrawRectangle(rect, boxBrush, 2f);
-
- // Confidence label above the box
- var pctIndex = Math.Clamp((int)(enemy.Confidence * 100), 0, 100);
- var layout = confirmed ? _confirmedLabels[pctIndex] : _unconfirmedLabels[pctIndex];
- var textBrush = confirmed ? ctx.Red : ctx.Yellow;
-
- var m = layout.Metrics;
- var labelX = enemy.X;
- var labelY = enemy.Y - m.Height - 2;
-
- rt.FillRectangle(
- new RectangleF(labelX - 1, labelY - 1, m.Width + 2, m.Height + 2),
- ctx.LabelBgBrush);
-
- rt.DrawTextLayout(new System.Numerics.Vector2(labelX, labelY), layout, textBrush);
- }
-
- // Boss bounding boxes (cyan) — extrapolate position to compensate for inference delay
- var now = DateTimeOffset.UtcNow.ToUnixTimeMilliseconds();
- var ageMs = (float)Math.Clamp(now - state.BossTimestampMs, 0, 60);
-
- foreach (var boss in state.Bosses)
- {
- float dx = boss.VxPerMs * ageMs;
- float dy = boss.VyPerMs * ageMs;
- var rect = new RectangleF(boss.X + dx, boss.Y + dy, boss.Width, boss.Height);
- rt.DrawRectangle(rect, ctx.Cyan, 3f);
-
- var pct = Math.Clamp((int)(boss.Confidence * 100), 0, 100);
- var key = $"{boss.ClassName} {pct}%";
- if (!_bossLabels.TryGetValue(key, out var layout))
+ foreach (var enemy in state.Enemies)
{
- layout = _ctx.CreateTextLayout(key, _ctx.LabelFormat);
- _bossLabels[key] = layout;
+ var confirmed = enemy.HealthBarConfirmed;
+ var boxBrush = confirmed ? ctx.Red : ctx.Yellow;
+ var rect = new RectangleF(enemy.X, enemy.Y, enemy.Width, enemy.Height);
+ rt.DrawRectangle(rect, boxBrush, 2f);
+
+ // Confidence label above the box
+ var pctIndex = Math.Clamp((int)(enemy.Confidence * 100), 0, 100);
+ var layout = confirmed ? _confirmedLabels[pctIndex] : _unconfirmedLabels[pctIndex];
+ var textBrush = confirmed ? ctx.Red : ctx.Yellow;
+
+ var m = layout.Metrics;
+ var labelX = enemy.X;
+ var labelY = enemy.Y - m.Height - 2;
+
+ rt.FillRectangle(
+ new RectangleF(labelX - 1, labelY - 1, m.Width + 2, m.Height + 2),
+ ctx.LabelBgBrush);
+
+ rt.DrawTextLayout(new System.Numerics.Vector2(labelX, labelY), layout, textBrush);
}
- var m = layout.Metrics;
- var labelX = boss.X + dx;
- var labelY = boss.Y + dy - m.Height - 2;
+ // Boss bounding boxes (cyan) — extrapolate position to compensate for inference delay
+ var now = DateTimeOffset.UtcNow.ToUnixTimeMilliseconds();
+ var ageMs = (float)Math.Clamp(now - state.BossTimestampMs, 0, 60);
- rt.FillRectangle(
- new RectangleF(labelX - 1, labelY - 1, m.Width + 2, m.Height + 2),
- ctx.LabelBgBrush);
+ foreach (var boss in state.Bosses)
+ {
+ float dx = boss.VxPerMs * ageMs;
+ float dy = boss.VyPerMs * ageMs;
+ var rect = new RectangleF(boss.X + dx, boss.Y + dy, boss.Width, boss.Height);
+ rt.DrawRectangle(rect, ctx.Cyan, 3f);
- rt.DrawTextLayout(new System.Numerics.Vector2(labelX, labelY), layout, ctx.Cyan);
+ var pct = Math.Clamp((int)(boss.Confidence * 100), 0, 100);
+ var key = $"{boss.ClassName} {pct}%";
+ if (!_bossLabels.TryGetValue(key, out var layout))
+ {
+ layout = _ctx.CreateTextLayout(key, _ctx.LabelFormat);
+ _bossLabels[key] = layout;
+ }
+
+ var m = layout.Metrics;
+ var labelX = boss.X + dx;
+ var labelY = boss.Y + dy - m.Height - 2;
+
+ rt.FillRectangle(
+ new RectangleF(labelX - 1, labelY - 1, m.Width + 2, m.Height + 2),
+ ctx.LabelBgBrush);
+
+ rt.DrawTextLayout(new System.Numerics.Vector2(labelX, labelY), layout, ctx.Cyan);
+ }
}
// Fight position — red circle on screen where the fight area is
- if (state.FightPosition is var (fx, fy))
+ if (state.ShowFightPosition && state.FightPosition is var (fx, fy))
{
const double worldToScreen = 835.0 / 97.0; // inverse of screenToWorld
const int screenCx = 1280, screenCy = 660; // player character screen position
diff --git a/src/Poe2Trade.Ui/ViewModels/DebugViewModel.cs b/src/Poe2Trade.Ui/ViewModels/DebugViewModel.cs
index 8d4f5c1..ee40af9 100644
--- a/src/Poe2Trade.Ui/ViewModels/DebugViewModel.cs
+++ b/src/Poe2Trade.Ui/ViewModels/DebugViewModel.cs
@@ -23,12 +23,24 @@ public partial class DebugViewModel : ObservableObject
[ObservableProperty] private decimal? _clickX;
[ObservableProperty] private decimal? _clickY;
[ObservableProperty] private bool _showLootDebug;
+ [ObservableProperty] private bool _showYolo = true;
+ [ObservableProperty] private bool _showFightPosition = true;
partial void OnShowLootDebugChanged(bool value)
{
_bot.LootDebugDetector.Enabled = value;
}
+ partial void OnShowYoloChanged(bool value)
+ {
+ _bot.ShowYoloOverlay = value;
+ }
+
+ partial void OnShowFightPositionChanged(bool value)
+ {
+ _bot.ShowFightPositionOverlay = value;
+ }
+
public string[] GridLayoutNames { get; } =
[
"inventory", "stash12", "stash12_folder", "stash24",
diff --git a/src/Poe2Trade.Ui/ViewModels/SettingsViewModel.cs b/src/Poe2Trade.Ui/ViewModels/SettingsViewModel.cs
index 680bf9b..571ae8b 100644
--- a/src/Poe2Trade.Ui/ViewModels/SettingsViewModel.cs
+++ b/src/Poe2Trade.Ui/ViewModels/SettingsViewModel.cs
@@ -20,11 +20,14 @@ public partial class SettingsViewModel : ObservableObject
[ObservableProperty] private decimal? _betweenTradesDelayMs = 5000;
[ObservableProperty] private bool _headless = true;
[ObservableProperty] private bool _showHudDebug;
+ [ObservableProperty] private string _ocrEngine = "WinOCR";
[ObservableProperty] private bool _isSaved;
[ObservableProperty] private string _calibrationStatus = "";
[ObservableProperty] private string _stashCalibratedAt = "";
[ObservableProperty] private string _shopCalibratedAt = "";
+ public static string[] OcrEngineOptions { get; } = ["WinOCR", "OneOCR", "EasyOCR"];
+
public ObservableCollection StashTabs { get; } = [];
public ObservableCollection ShopTabs { get; } = [];
@@ -46,6 +49,7 @@ public partial class SettingsViewModel : ObservableObject
BetweenTradesDelayMs = s.BetweenTradesDelayMs;
Headless = s.Headless;
ShowHudDebug = s.ShowHudDebug;
+ OcrEngine = s.OcrEngine;
}
private void LoadTabs()
@@ -97,6 +101,7 @@ public partial class SettingsViewModel : ObservableObject
s.BetweenTradesDelayMs = (int)(BetweenTradesDelayMs ?? 5000);
s.Headless = Headless;
s.ShowHudDebug = ShowHudDebug;
+ s.OcrEngine = OcrEngine;
});
IsSaved = true;
@@ -210,4 +215,5 @@ public partial class SettingsViewModel : ObservableObject
partial void OnBetweenTradesDelayMsChanged(decimal? value) => IsSaved = false;
partial void OnHeadlessChanged(bool value) => IsSaved = false;
partial void OnShowHudDebugChanged(bool value) => IsSaved = false;
+ partial void OnOcrEngineChanged(string value) => IsSaved = false;
}
diff --git a/src/Poe2Trade.Ui/Views/MainWindow.axaml b/src/Poe2Trade.Ui/Views/MainWindow.axaml
index fe976d1..f8ec98d 100644
--- a/src/Poe2Trade.Ui/Views/MainWindow.axaml
+++ b/src/Poe2Trade.Ui/Views/MainWindow.axaml
@@ -341,6 +341,10 @@
+
+
@@ -455,6 +459,12 @@
+
+
+
+
+