much better bot and ocr
This commit is contained in:
parent
bb8f50116a
commit
6257bcf122
25 changed files with 583 additions and 101 deletions
Binary file not shown.
|
Before Width: | Height: | Size: 7.2 MiB After Width: | Height: | Size: 7.1 MiB |
Binary file not shown.
|
Before Width: | Height: | Size: 6.2 MiB After Width: | Height: | Size: 5.7 MiB |
Binary file not shown.
|
Before Width: | Height: | Size: 332 KiB After Width: | Height: | Size: 331 KiB |
|
|
@ -389,13 +389,18 @@ public class BossRunExecutor : GameExecutor
|
|||
Log.Information("Fight area updated to ({X:F0},{Y:F0})", fightWorldX, fightWorldY);
|
||||
}
|
||||
|
||||
// Wait for death animation before looking for well
|
||||
await Sleep(3000);
|
||||
// Wait for death animation + loot settle, keep updating fight position from YOLO
|
||||
var deathPos = await PollYoloDuringWait(3000);
|
||||
if (deathPos != null)
|
||||
{
|
||||
fightWorldX = deathPos.Value.X;
|
||||
fightWorldY = deathPos.Value.Y;
|
||||
}
|
||||
|
||||
// Walk to well and click the closest match to screen center
|
||||
Log.Information("Phase {Phase} done, walking to well", phase);
|
||||
await WalkToWorldPosition(wellWorldX, wellWorldY);
|
||||
await Sleep(500);
|
||||
await Sleep(1500);
|
||||
await ClickClosestTemplateToCenter(CathedralWellTemplate);
|
||||
await Sleep(200);
|
||||
|
||||
|
|
@ -666,6 +671,7 @@ public class BossRunExecutor : GameExecutor
|
|||
lastBossWorldPos = (
|
||||
wp.X + (boss.Cx - screenCx) * screenToWorld,
|
||||
wp.Y + (boss.Cy - screenCy) * screenToWorld);
|
||||
FightPosition = lastBossWorldPos;
|
||||
|
||||
yoloLogCount++;
|
||||
if (yoloLogCount % 5 == 1) // log every 5th detection
|
||||
|
|
@ -731,6 +737,37 @@ public class BossRunExecutor : GameExecutor
|
|||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Sleep for the given duration while polling YOLO to keep FightPosition updated
|
||||
/// (e.g., during boss death animation when YOLO still detects the corpse/model).
|
||||
/// Returns last detected position, or null if no detections.
|
||||
/// </summary>
|
||||
private async Task<(double X, double Y)?> PollYoloDuringWait(int durationMs)
|
||||
{
|
||||
const int screenCx = 1280;
|
||||
const int screenCy = 660;
|
||||
const double screenToWorld = 97.0 / 835.0;
|
||||
(double X, double Y)? lastPos = null;
|
||||
|
||||
var sw = Stopwatch.StartNew();
|
||||
while (sw.ElapsedMilliseconds < durationMs)
|
||||
{
|
||||
if (_stopped) break;
|
||||
var snapshot = _bossDetector.Latest;
|
||||
if (snapshot.Bosses.Count > 0)
|
||||
{
|
||||
var boss = snapshot.Bosses[0];
|
||||
var wp = _nav.WorldPosition;
|
||||
lastPos = (
|
||||
wp.X + (boss.Cx - screenCx) * screenToWorld,
|
||||
wp.Y + (boss.Cy - screenCy) * screenToWorld);
|
||||
FightPosition = lastPos;
|
||||
}
|
||||
await Sleep(100);
|
||||
}
|
||||
return lastPos;
|
||||
}
|
||||
|
||||
private async Task AttackAtPosition(int x, int y, int durationMs)
|
||||
{
|
||||
var (combatTask, cts) = StartCombatLoop(x, y, jitter: 20);
|
||||
|
|
|
|||
|
|
@ -48,6 +48,8 @@ public class BotOrchestrator : IAsyncDisposable
|
|||
public FrameSaver FrameSaver { get; }
|
||||
public LootDebugDetector LootDebugDetector { get; }
|
||||
public BossRunExecutor BossRunExecutor { get; }
|
||||
public volatile bool ShowYoloOverlay = true;
|
||||
public volatile bool ShowFightPositionOverlay = true;
|
||||
private readonly Dictionary<string, ScrapExecutor> _scrapExecutors = new();
|
||||
|
||||
// Events
|
||||
|
|
|
|||
|
|
@ -35,6 +35,7 @@ public class SavedSettings
|
|||
public StashCalibration? StashCalibration { get; set; }
|
||||
public StashCalibration? ShopCalibration { get; set; }
|
||||
public bool ShowHudDebug { get; set; }
|
||||
public string OcrEngine { get; set; } = "WinOCR";
|
||||
public KulemakSettings Kulemak { get; set; } = new();
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -309,16 +309,37 @@ public class InventoryManager : IInventoryManager
|
|||
return null;
|
||||
}
|
||||
|
||||
// Single word
|
||||
// Single word — prefer exact line match ("STASH") over substring ("Guild Stash")
|
||||
(int X, int Y)? containsMatch = null;
|
||||
(int X, int Y)? fuzzyMatch = null;
|
||||
|
||||
foreach (var line in result.Lines)
|
||||
foreach (var word in line.Words)
|
||||
{
|
||||
if (word.Text.Contains(needle, StringComparison.OrdinalIgnoreCase))
|
||||
return (word.X + word.Width / 2, word.Y + word.Height / 2);
|
||||
if (fuzzy && BigramSimilarity(Normalize(needle), Normalize(word.Text)) >= 0.55)
|
||||
return (word.X + word.Width / 2, word.Y + word.Height / 2);
|
||||
// Exact line match — the entire line is just this word
|
||||
if (line.Text.Equals(needle, StringComparison.OrdinalIgnoreCase) && line.Words.Count > 0)
|
||||
{
|
||||
var first = line.Words[0];
|
||||
var last = line.Words[^1];
|
||||
return ((first.X + last.X + last.Width) / 2, (first.Y + last.Y + last.Height) / 2);
|
||||
}
|
||||
|
||||
foreach (var word in line.Words)
|
||||
{
|
||||
if (word.Text.Equals(needle, StringComparison.OrdinalIgnoreCase))
|
||||
return (word.X + word.Width / 2, word.Y + word.Height / 2);
|
||||
|
||||
containsMatch ??= word.Text.Contains(needle, StringComparison.OrdinalIgnoreCase)
|
||||
? (word.X + word.Width / 2, word.Y + word.Height / 2)
|
||||
: null;
|
||||
|
||||
if (fuzzy)
|
||||
fuzzyMatch ??= BigramSimilarity(Normalize(needle), Normalize(word.Text)) >= 0.55
|
||||
? (word.X + word.Width / 2, word.Y + word.Height / 2)
|
||||
: null;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
|
||||
return containsMatch ?? fuzzyMatch;
|
||||
}
|
||||
|
||||
private static string Normalize(string s) =>
|
||||
|
|
|
|||
|
|
@ -104,25 +104,6 @@ public sealed class OcrParams
|
|||
|
||||
[JsonPropertyName("softThreshold")]
|
||||
public bool SoftThreshold { get; set; } = false;
|
||||
|
||||
// EasyOCR tuning
|
||||
[JsonPropertyName("mergeGap")]
|
||||
public int MergeGap { get; set; } = 0;
|
||||
|
||||
[JsonPropertyName("linkThreshold")]
|
||||
public double? LinkThreshold { get; set; }
|
||||
|
||||
[JsonPropertyName("textThreshold")]
|
||||
public double? TextThreshold { get; set; }
|
||||
|
||||
[JsonPropertyName("lowText")]
|
||||
public double? LowText { get; set; }
|
||||
|
||||
[JsonPropertyName("widthThs")]
|
||||
public double? WidthThs { get; set; }
|
||||
|
||||
[JsonPropertyName("paragraph")]
|
||||
public bool? Paragraph { get; set; }
|
||||
}
|
||||
|
||||
public sealed class DiffOcrParams
|
||||
|
|
|
|||
9
src/Poe2Trade.Screen/IOcrEngine.cs
Normal file
9
src/Poe2Trade.Screen/IOcrEngine.cs
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
using System.Drawing;
|
||||
|
||||
namespace Poe2Trade.Screen;
|
||||
|
||||
public interface IOcrEngine : IDisposable
|
||||
{
|
||||
string Name { get; }
|
||||
OcrResponse Recognize(Bitmap bitmap);
|
||||
}
|
||||
35
src/Poe2Trade.Screen/Ocr/EasyOcrEngine.cs
Normal file
35
src/Poe2Trade.Screen/Ocr/EasyOcrEngine.cs
Normal file
|
|
@ -0,0 +1,35 @@
|
|||
using System.Drawing;
|
||||
|
||||
namespace Poe2Trade.Screen.Ocr;
|
||||
|
||||
/// <summary>
|
||||
/// OCR engine wrapping the Python EasyOCR daemon.
|
||||
/// EasyOCR-specific tuning params live here, not in shared OcrParams.
|
||||
/// </summary>
|
||||
public sealed class EasyOcrEngine : IOcrEngine
|
||||
{
|
||||
private readonly PythonOcrBridge _bridge = new();
|
||||
|
||||
public string Name => "EasyOCR";
|
||||
|
||||
// EasyOCR-specific tuning (formerly in OcrParams)
|
||||
public int MergeGap { get; set; }
|
||||
public double? LinkThreshold { get; set; }
|
||||
public double? TextThreshold { get; set; }
|
||||
public double? LowText { get; set; }
|
||||
public double? WidthThs { get; set; }
|
||||
public bool? Paragraph { get; set; }
|
||||
|
||||
public OcrResponse Recognize(Bitmap bitmap)
|
||||
{
|
||||
return _bridge.OcrFromBitmap(bitmap,
|
||||
mergeGap: MergeGap,
|
||||
linkThreshold: LinkThreshold,
|
||||
textThreshold: TextThreshold,
|
||||
lowText: LowText,
|
||||
widthThs: WidthThs,
|
||||
paragraph: Paragraph);
|
||||
}
|
||||
|
||||
public void Dispose() => _bridge.Dispose();
|
||||
}
|
||||
18
src/Poe2Trade.Screen/Ocr/OcrEngineFactory.cs
Normal file
18
src/Poe2Trade.Screen/Ocr/OcrEngineFactory.cs
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
using Serilog;
|
||||
|
||||
namespace Poe2Trade.Screen.Ocr;
|
||||
|
||||
public static class OcrEngineFactory
|
||||
{
|
||||
public static IOcrEngine Create(string engineName)
|
||||
{
|
||||
Log.Information("Creating OCR engine: {Engine}", engineName);
|
||||
|
||||
return engineName switch
|
||||
{
|
||||
"OneOCR" => new OneOcrEngine(Path.GetFullPath(Path.Combine("tools", "oneocr"))),
|
||||
"EasyOCR" => new EasyOcrEngine(),
|
||||
_ => new WinOcrEngine(),
|
||||
};
|
||||
}
|
||||
}
|
||||
268
src/Poe2Trade.Screen/Ocr/OneOcrEngine.cs
Normal file
268
src/Poe2Trade.Screen/Ocr/OneOcrEngine.cs
Normal file
|
|
@ -0,0 +1,268 @@
|
|||
using System.Drawing;
|
||||
using System.Drawing.Imaging;
|
||||
using System.Runtime.InteropServices;
|
||||
using Serilog;
|
||||
|
||||
namespace Poe2Trade.Screen.Ocr;
|
||||
|
||||
/// <summary>
|
||||
/// OCR engine using OneOCR (Windows 11 Snipping Tool's built-in engine).
|
||||
/// Requires oneocr.dll, oneocr.onemodel, and onnxruntime.dll in the model directory.
|
||||
/// </summary>
|
||||
public sealed class OneOcrEngine : IOcrEngine
|
||||
{
|
||||
public string Name => "OneOCR";
|
||||
|
||||
// Native handles (int64) — created once, reused per call
|
||||
private long _pipeline;
|
||||
private long _initOptions;
|
||||
private long _processOptions;
|
||||
|
||||
private static readonly byte[] ModelKey = "kj)TGtrK>f]b[Piow.gU+nC@s\"\"\"\"\"\"4"u8.ToArray();
|
||||
|
||||
public OneOcrEngine(string modelDir)
|
||||
{
|
||||
if (!Directory.Exists(modelDir))
|
||||
throw new DirectoryNotFoundException($"OneOCR model directory not found: {modelDir}");
|
||||
|
||||
var modelPath = Path.Combine(modelDir, "oneocr.onemodel");
|
||||
if (!File.Exists(modelPath))
|
||||
throw new FileNotFoundException($"OneOCR model not found: {modelPath}");
|
||||
|
||||
var dllPath = Path.Combine(modelDir, "oneocr.dll");
|
||||
if (!File.Exists(dllPath))
|
||||
throw new FileNotFoundException($"oneocr.dll not found: {dllPath}");
|
||||
|
||||
// Set DLL search directory so oneocr.dll can find onnxruntime.dll
|
||||
SetDllDirectoryW(modelDir);
|
||||
|
||||
// Load the DLL explicitly from modelDir
|
||||
var hDll = NativeLibrary.Load(dllPath);
|
||||
NativeLibrary.SetDllImportResolver(typeof(OneOcrEngine).Assembly, (name, _, _) =>
|
||||
name == Dll ? hDll : IntPtr.Zero);
|
||||
|
||||
// Init options
|
||||
CheckResult(Native.CreateOcrInitOptions(out _initOptions), "CreateOcrInitOptions");
|
||||
CheckResult(Native.OcrInitOptionsSetUseModelDelayLoad(_initOptions, 0), "SetUseModelDelayLoad");
|
||||
|
||||
// Pipeline (pass full model path as byte string)
|
||||
CheckResult(Native.CreateOcrPipeline(modelPath, ModelKey, _initOptions, out _pipeline), "CreateOcrPipeline");
|
||||
|
||||
// Process options
|
||||
CheckResult(Native.CreateOcrProcessOptions(out _processOptions), "CreateOcrProcessOptions");
|
||||
CheckResult(Native.OcrProcessOptionsSetMaxRecognitionLineCount(_processOptions, 1000), "SetMaxLineCount");
|
||||
|
||||
Log.Information("OneOcrEngine initialized (modelDir: {Dir})", modelDir);
|
||||
}
|
||||
|
||||
public OcrResponse Recognize(Bitmap bitmap)
|
||||
{
|
||||
// Convert bitmap to BGRA pixel data
|
||||
var rect = new Rectangle(0, 0, bitmap.Width, bitmap.Height);
|
||||
var bmpData = bitmap.LockBits(rect, ImageLockMode.ReadOnly, PixelFormat.Format32bppArgb);
|
||||
|
||||
try
|
||||
{
|
||||
var imageStruct = new ImageStructure
|
||||
{
|
||||
Type = 3, // CV_8UC4 / BGRA
|
||||
Width = bitmap.Width,
|
||||
Height = bitmap.Height,
|
||||
Reserved = 0,
|
||||
Step = bmpData.Stride,
|
||||
Data = bmpData.Scan0,
|
||||
};
|
||||
|
||||
long result;
|
||||
long rc = Native.RunOcrPipeline(_pipeline, ref imageStruct, _processOptions, out result);
|
||||
if (rc != 0)
|
||||
{
|
||||
Log.Warning("OneOCR: RunOcrPipeline failed (code {Code})", rc);
|
||||
return new OcrResponse { Text = "", Lines = [] };
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
return ParseResult(result);
|
||||
}
|
||||
finally
|
||||
{
|
||||
Native.ReleaseOcrResult(result);
|
||||
}
|
||||
}
|
||||
finally
|
||||
{
|
||||
bitmap.UnlockBits(bmpData);
|
||||
}
|
||||
}
|
||||
|
||||
private static OcrResponse ParseResult(long result)
|
||||
{
|
||||
long lineCount;
|
||||
if (Native.GetOcrLineCount(result, out lineCount) != 0)
|
||||
return new OcrResponse { Text = "", Lines = [] };
|
||||
|
||||
var lines = new List<OcrLine>();
|
||||
|
||||
for (long i = 0; i < lineCount; i++)
|
||||
{
|
||||
long line;
|
||||
if (Native.GetOcrLine(result, i, out line) != 0 || line == 0) continue;
|
||||
|
||||
long wordCount;
|
||||
if (Native.GetOcrLineWordCount(line, out wordCount) != 0) continue;
|
||||
|
||||
var words = new List<OcrWord>();
|
||||
|
||||
for (long j = 0; j < wordCount; j++)
|
||||
{
|
||||
long word;
|
||||
if (Native.GetOcrWord(line, j, out word) != 0 || word == 0) continue;
|
||||
|
||||
IntPtr contentPtr;
|
||||
if (Native.GetOcrWordContent(word, out contentPtr) != 0) continue;
|
||||
var text = Marshal.PtrToStringUTF8(contentPtr);
|
||||
if (string.IsNullOrEmpty(text)) continue;
|
||||
|
||||
// BoundingBox: 4 corners as floats → axis-aligned rect
|
||||
IntPtr bboxPtr;
|
||||
int x = 0, y = 0, w = 0, h = 0;
|
||||
if (Native.GetOcrWordBoundingBox(word, out bboxPtr) == 0 && bboxPtr != IntPtr.Zero)
|
||||
{
|
||||
var bbox = Marshal.PtrToStructure<BoundingBox>(bboxPtr);
|
||||
int x1 = (int)MathF.Min(MathF.Min(bbox.X1, bbox.X2), MathF.Min(bbox.X3, bbox.X4));
|
||||
int y1 = (int)MathF.Min(MathF.Min(bbox.Y1, bbox.Y2), MathF.Min(bbox.Y3, bbox.Y4));
|
||||
int x2 = (int)MathF.Max(MathF.Max(bbox.X1, bbox.X2), MathF.Max(bbox.X3, bbox.X4));
|
||||
int y2 = (int)MathF.Max(MathF.Max(bbox.Y1, bbox.Y2), MathF.Max(bbox.Y3, bbox.Y4));
|
||||
x = x1; y = y1; w = x2 - x1; h = y2 - y1;
|
||||
}
|
||||
|
||||
words.Add(new OcrWord { Text = text, X = x, Y = y, Width = w, Height = h });
|
||||
}
|
||||
|
||||
if (words.Count > 0)
|
||||
{
|
||||
lines.Add(new OcrLine
|
||||
{
|
||||
Text = string.Join(" ", words.Select(wd => wd.Text)),
|
||||
Words = words,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
var fullText = string.Join("\n", lines.Select(l => l.Text));
|
||||
return new OcrResponse { Text = fullText, Lines = lines };
|
||||
}
|
||||
|
||||
public void Dispose()
|
||||
{
|
||||
if (_processOptions != 0) { Native.ReleaseOcrProcessOptions(_processOptions); _processOptions = 0; }
|
||||
if (_pipeline != 0) { Native.ReleaseOcrPipeline(_pipeline); _pipeline = 0; }
|
||||
if (_initOptions != 0) { Native.ReleaseOcrInitOptions(_initOptions); _initOptions = 0; }
|
||||
}
|
||||
|
||||
private static void CheckResult(long rc, string func)
|
||||
{
|
||||
if (rc != 0)
|
||||
throw new InvalidOperationException($"OneOCR {func} failed (code {rc})");
|
||||
}
|
||||
|
||||
// -- Native structs --
|
||||
|
||||
// Matches C struct: { int32 t, int32 col, int32 row, int32 _unk, int64 step, int64 data_ptr } = 0x20 bytes
|
||||
[StructLayout(LayoutKind.Sequential)]
|
||||
private struct ImageStructure
|
||||
{
|
||||
public int Type; // 3 = CV_8UC4 (BGRA)
|
||||
public int Width;
|
||||
public int Height;
|
||||
public int Reserved;
|
||||
public long Step; // stride in bytes per row
|
||||
public IntPtr Data; // pointer to BGRA pixel data
|
||||
}
|
||||
|
||||
[StructLayout(LayoutKind.Sequential)]
|
||||
private struct BoundingBox
|
||||
{
|
||||
public float X1, Y1;
|
||||
public float X2, Y2;
|
||||
public float X3, Y3;
|
||||
public float X4, Y4;
|
||||
}
|
||||
|
||||
// -- P/Invoke --
|
||||
|
||||
private const string Dll = "oneocr.dll";
|
||||
|
||||
[DllImport("kernel32.dll", CharSet = CharSet.Unicode, SetLastError = true)]
|
||||
[return: MarshalAs(UnmanagedType.Bool)]
|
||||
private static extern bool SetDllDirectoryW(string lpPathName);
|
||||
|
||||
// All OneOCR functions return int64 error code (0 = success) and use out-pointer params for handles.
|
||||
private static class Native
|
||||
{
|
||||
[DllImport(Dll, CallingConvention = CallingConvention.Cdecl)]
|
||||
public static extern long CreateOcrInitOptions(out long options);
|
||||
|
||||
[DllImport(Dll, CallingConvention = CallingConvention.Cdecl)]
|
||||
public static extern long OcrInitOptionsSetUseModelDelayLoad(long options, byte flag);
|
||||
|
||||
[DllImport(Dll, CallingConvention = CallingConvention.Cdecl)]
|
||||
public static extern long CreateOcrPipeline(
|
||||
[MarshalAs(UnmanagedType.LPUTF8Str)] string modelPath,
|
||||
byte[] key,
|
||||
long initOptions,
|
||||
out long pipeline);
|
||||
|
||||
[DllImport(Dll, CallingConvention = CallingConvention.Cdecl)]
|
||||
public static extern long CreateOcrProcessOptions(out long options);
|
||||
|
||||
[DllImport(Dll, CallingConvention = CallingConvention.Cdecl)]
|
||||
public static extern long OcrProcessOptionsSetMaxRecognitionLineCount(long options, long maxLines);
|
||||
|
||||
[DllImport(Dll, CallingConvention = CallingConvention.Cdecl)]
|
||||
public static extern long RunOcrPipeline(long pipeline, ref ImageStructure image, long processOptions, out long result);
|
||||
|
||||
[DllImport(Dll, CallingConvention = CallingConvention.Cdecl)]
|
||||
public static extern long GetImageAngle(long result, out float angle);
|
||||
|
||||
[DllImport(Dll, CallingConvention = CallingConvention.Cdecl)]
|
||||
public static extern long GetOcrLineCount(long result, out long count);
|
||||
|
||||
[DllImport(Dll, CallingConvention = CallingConvention.Cdecl)]
|
||||
public static extern long GetOcrLine(long result, long index, out long line);
|
||||
|
||||
[DllImport(Dll, CallingConvention = CallingConvention.Cdecl)]
|
||||
public static extern long GetOcrLineContent(long line, out IntPtr content);
|
||||
|
||||
[DllImport(Dll, CallingConvention = CallingConvention.Cdecl)]
|
||||
public static extern long GetOcrLineBoundingBox(long line, out IntPtr bbox);
|
||||
|
||||
[DllImport(Dll, CallingConvention = CallingConvention.Cdecl)]
|
||||
public static extern long GetOcrLineWordCount(long line, out long count);
|
||||
|
||||
[DllImport(Dll, CallingConvention = CallingConvention.Cdecl)]
|
||||
public static extern long GetOcrWord(long line, long index, out long word);
|
||||
|
||||
[DllImport(Dll, CallingConvention = CallingConvention.Cdecl)]
|
||||
public static extern long GetOcrWordContent(long word, out IntPtr content);
|
||||
|
||||
[DllImport(Dll, CallingConvention = CallingConvention.Cdecl)]
|
||||
public static extern long GetOcrWordBoundingBox(long word, out IntPtr bbox);
|
||||
|
||||
[DllImport(Dll, CallingConvention = CallingConvention.Cdecl)]
|
||||
public static extern long GetOcrWordConfidence(long word, out float confidence);
|
||||
|
||||
[DllImport(Dll, CallingConvention = CallingConvention.Cdecl)]
|
||||
public static extern void ReleaseOcrResult(long result);
|
||||
|
||||
[DllImport(Dll, CallingConvention = CallingConvention.Cdecl)]
|
||||
public static extern void ReleaseOcrPipeline(long pipeline);
|
||||
|
||||
[DllImport(Dll, CallingConvention = CallingConvention.Cdecl)]
|
||||
public static extern void ReleaseOcrInitOptions(long options);
|
||||
|
||||
[DllImport(Dll, CallingConvention = CallingConvention.Cdecl)]
|
||||
public static extern void ReleaseOcrProcessOptions(long options);
|
||||
}
|
||||
}
|
||||
67
src/Poe2Trade.Screen/Ocr/WinOcrEngine.cs
Normal file
67
src/Poe2Trade.Screen/Ocr/WinOcrEngine.cs
Normal file
|
|
@ -0,0 +1,67 @@
|
|||
using System.Drawing;
|
||||
using System.Drawing.Imaging;
|
||||
using Serilog;
|
||||
using Windows.Graphics.Imaging;
|
||||
using Windows.Media.Ocr;
|
||||
using Windows.Storage.Streams;
|
||||
using BitmapDecoder = Windows.Graphics.Imaging.BitmapDecoder;
|
||||
using SdImageFormat = System.Drawing.Imaging.ImageFormat;
|
||||
|
||||
namespace Poe2Trade.Screen.Ocr;
|
||||
|
||||
public sealed class WinOcrEngine : IOcrEngine
|
||||
{
|
||||
private readonly OcrEngine _engine;
|
||||
|
||||
public string Name => "WinOCR";
|
||||
|
||||
public WinOcrEngine()
|
||||
{
|
||||
_engine = OcrEngine.TryCreateFromUserProfileLanguages()
|
||||
?? throw new InvalidOperationException("Windows OCR engine not available");
|
||||
Log.Information("WinOcrEngine initialized (language: {Lang})", _engine.RecognizerLanguage.DisplayName);
|
||||
}
|
||||
|
||||
public OcrResponse Recognize(Bitmap bitmap)
|
||||
{
|
||||
// Convert System.Drawing.Bitmap → PNG stream → WinRT SoftwareBitmap
|
||||
using var ms = new MemoryStream();
|
||||
bitmap.Save(ms, SdImageFormat.Png);
|
||||
ms.Position = 0;
|
||||
|
||||
var stream = ms.AsRandomAccessStream();
|
||||
var decoder = BitmapDecoder.CreateAsync(stream).AsTask().GetAwaiter().GetResult();
|
||||
var softwareBitmap = decoder.GetSoftwareBitmapAsync().AsTask().GetAwaiter().GetResult();
|
||||
|
||||
var ocrResult = _engine.RecognizeAsync(softwareBitmap).AsTask().GetAwaiter().GetResult();
|
||||
|
||||
var lines = new List<OcrLine>();
|
||||
foreach (var winLine in ocrResult.Lines)
|
||||
{
|
||||
var words = new List<OcrWord>();
|
||||
foreach (var winWord in winLine.Words)
|
||||
{
|
||||
var r = winWord.BoundingRect;
|
||||
words.Add(new OcrWord
|
||||
{
|
||||
Text = winWord.Text,
|
||||
X = (int)r.X,
|
||||
Y = (int)r.Y,
|
||||
Width = (int)r.Width,
|
||||
Height = (int)r.Height,
|
||||
});
|
||||
}
|
||||
|
||||
lines.Add(new OcrLine
|
||||
{
|
||||
Text = winLine.Text,
|
||||
Words = words,
|
||||
});
|
||||
}
|
||||
|
||||
var fullText = string.Join("\n", lines.Select(l => l.Text));
|
||||
return new OcrResponse { Text = fullText, Lines = lines };
|
||||
}
|
||||
|
||||
public void Dispose() { }
|
||||
}
|
||||
|
|
@ -37,7 +37,14 @@ class PythonOcrBridge : IDisposable
|
|||
/// <summary>
|
||||
/// Run OCR on a bitmap via the Python EasyOCR engine (base64 PNG over pipe).
|
||||
/// </summary>
|
||||
public OcrResponse OcrFromBitmap(Bitmap bitmap, OcrParams? ocrParams = null)
|
||||
public OcrResponse OcrFromBitmap(
|
||||
Bitmap bitmap,
|
||||
int mergeGap = 0,
|
||||
double? linkThreshold = null,
|
||||
double? textThreshold = null,
|
||||
double? lowText = null,
|
||||
double? widthThs = null,
|
||||
bool? paragraph = null)
|
||||
{
|
||||
EnsureRunning();
|
||||
|
||||
|
|
@ -45,26 +52,18 @@ class PythonOcrBridge : IDisposable
|
|||
bitmap.Save(ms, SdImageFormat.Png);
|
||||
var imageBase64 = Convert.ToBase64String(ms.ToArray());
|
||||
|
||||
var pyReq = BuildPythonRequest(ocrParams);
|
||||
var pyReq = new Dictionary<string, object?> { ["cmd"] = "ocr", ["engine"] = "easyocr" };
|
||||
if (mergeGap > 0) pyReq["mergeGap"] = mergeGap;
|
||||
if (linkThreshold.HasValue) pyReq["linkThreshold"] = linkThreshold.Value;
|
||||
if (textThreshold.HasValue) pyReq["textThreshold"] = textThreshold.Value;
|
||||
if (lowText.HasValue) pyReq["lowText"] = lowText.Value;
|
||||
if (widthThs.HasValue) pyReq["widthThs"] = widthThs.Value;
|
||||
if (paragraph.HasValue) pyReq["paragraph"] = paragraph.Value;
|
||||
|
||||
pyReq["imageBase64"] = imageBase64;
|
||||
return SendPythonRequest(pyReq);
|
||||
}
|
||||
|
||||
private static Dictionary<string, object?> BuildPythonRequest(OcrParams? ocrParams)
|
||||
{
|
||||
var req = new Dictionary<string, object?> { ["cmd"] = "ocr", ["engine"] = "easyocr" };
|
||||
if (ocrParams == null) return req;
|
||||
|
||||
if (ocrParams.MergeGap > 0) req["mergeGap"] = ocrParams.MergeGap;
|
||||
if (ocrParams.LinkThreshold.HasValue) req["linkThreshold"] = ocrParams.LinkThreshold.Value;
|
||||
if (ocrParams.TextThreshold.HasValue) req["textThreshold"] = ocrParams.TextThreshold.Value;
|
||||
if (ocrParams.LowText.HasValue) req["lowText"] = ocrParams.LowText.Value;
|
||||
if (ocrParams.WidthThs.HasValue) req["widthThs"] = ocrParams.WidthThs.Value;
|
||||
if (ocrParams.Paragraph.HasValue) req["paragraph"] = ocrParams.Paragraph.Value;
|
||||
|
||||
return req;
|
||||
}
|
||||
|
||||
private OcrResponse SendPythonRequest(object pyReq)
|
||||
{
|
||||
var json = JsonSerializer.Serialize(pyReq, JsonOptions);
|
||||
|
|
|
|||
|
|
@ -16,13 +16,14 @@ public class ScreenReader : IScreenReader
|
|||
private readonly GridHandler _gridHandler = new();
|
||||
private readonly TemplateMatchHandler _templateMatch = new();
|
||||
private readonly EdgeCropHandler _edgeCrop = new();
|
||||
private readonly PythonOcrBridge _pythonBridge = new();
|
||||
private readonly IOcrEngine _ocrEngine;
|
||||
private bool _initialized;
|
||||
|
||||
public GridReader Grid { get; }
|
||||
|
||||
public ScreenReader()
|
||||
public ScreenReader(IOcrEngine ocrEngine)
|
||||
{
|
||||
_ocrEngine = ocrEngine;
|
||||
Grid = new GridReader(_gridHandler);
|
||||
}
|
||||
|
||||
|
|
@ -59,16 +60,16 @@ public class ScreenReader : IScreenReader
|
|||
if (preprocess == "tophat")
|
||||
{
|
||||
using var processed = ImagePreprocessor.PreprocessForOcr(bitmap);
|
||||
result = _pythonBridge.OcrFromBitmap(processed);
|
||||
result = _ocrEngine.Recognize(processed);
|
||||
}
|
||||
else if (preprocess == "clahe")
|
||||
{
|
||||
using var processed = ImagePreprocessor.PreprocessClahe(bitmap);
|
||||
result = _pythonBridge.OcrFromBitmap(processed);
|
||||
result = _ocrEngine.Recognize(processed);
|
||||
}
|
||||
else
|
||||
{
|
||||
result = _pythonBridge.OcrFromBitmap(bitmap);
|
||||
result = _ocrEngine.Recognize(bitmap);
|
||||
}
|
||||
|
||||
var allText = string.Join(" | ", result.Lines.Select(l => l.Text));
|
||||
|
|
@ -149,7 +150,7 @@ public class ScreenReader : IScreenReader
|
|||
? ImagePreprocessor.PreprocessWithBackgroundSub(cropped, refCropped, ocr.DimPercentile, ocr.TextThresh, 1, ocr.SoftThreshold)
|
||||
: ImagePreprocessor.PreprocessForOcr(cropped, ocr.KernelSize, 1);
|
||||
|
||||
var ocrResult = _pythonBridge.OcrFromBitmap(processedBmp, ocr);
|
||||
var ocrResult = _ocrEngine.Recognize(processedBmp);
|
||||
|
||||
// Offset coordinates to screen space
|
||||
foreach (var line in ocrResult.Lines)
|
||||
|
|
@ -299,7 +300,7 @@ public class ScreenReader : IScreenReader
|
|||
|
||||
var ocrSw2 = System.Diagnostics.Stopwatch.StartNew();
|
||||
OcrResponse ocrResult2;
|
||||
try { ocrResult2 = _pythonBridge.OcrFromBitmap(crop); }
|
||||
try { ocrResult2 = _ocrEngine.Recognize(crop); }
|
||||
catch (TimeoutException)
|
||||
{
|
||||
Log.Warning("NameplateDiffOcr: crop OCR timed out");
|
||||
|
|
@ -386,7 +387,7 @@ public class ScreenReader : IScreenReader
|
|||
OcrResponse ocrResult;
|
||||
try
|
||||
{
|
||||
ocrResult = _pythonBridge.OcrFromBitmap(stitched);
|
||||
ocrResult = _ocrEngine.Recognize(stitched);
|
||||
}
|
||||
catch (TimeoutException)
|
||||
{
|
||||
|
|
@ -975,7 +976,7 @@ public class ScreenReader : IScreenReader
|
|||
return keep;
|
||||
}
|
||||
|
||||
public void Dispose() => _pythonBridge.Dispose();
|
||||
public void Dispose() => _ocrEngine.Dispose();
|
||||
|
||||
// -- OCR text matching --
|
||||
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@ using Poe2Trade.Game;
|
|||
using Poe2Trade.GameLog;
|
||||
using Poe2Trade.Inventory;
|
||||
using Poe2Trade.Screen;
|
||||
using Poe2Trade.Screen.Ocr;
|
||||
using Poe2Trade.Trade;
|
||||
using Poe2Trade.Ui.Overlay;
|
||||
using Poe2Trade.Ui.ViewModels;
|
||||
|
|
@ -33,6 +34,11 @@ public partial class App : Application
|
|||
services.AddSingleton(sp => sp.GetRequiredService<ConfigStore>().Settings);
|
||||
|
||||
// Services
|
||||
services.AddSingleton<IOcrEngine>(sp =>
|
||||
{
|
||||
var settings = sp.GetRequiredService<SavedSettings>();
|
||||
return OcrEngineFactory.Create(settings.OcrEngine);
|
||||
});
|
||||
services.AddSingleton<IGameController, GameController>();
|
||||
services.AddSingleton<IScreenReader, ScreenReader>();
|
||||
services.AddSingleton<IClientLogWatcher>(sp =>
|
||||
|
|
|
|||
|
|
@ -197,6 +197,8 @@ public sealed class D2dOverlay
|
|||
IsExploring: _bot.Navigation.IsExploring,
|
||||
ShowHudDebug: _bot.Store.Settings.ShowHudDebug,
|
||||
ShowLootDebug: showLoot,
|
||||
ShowYolo: _bot.ShowYoloOverlay,
|
||||
ShowFightPosition: _bot.ShowFightPositionOverlay,
|
||||
LootLabels: _bot.LootDebugDetector.Latest,
|
||||
FightPosition: _bot.BossRunExecutor.FightPosition,
|
||||
Fps: fps,
|
||||
|
|
|
|||
|
|
@ -14,6 +14,8 @@ public record OverlayState(
|
|||
bool IsExploring,
|
||||
bool ShowHudDebug,
|
||||
bool ShowLootDebug,
|
||||
bool ShowYolo,
|
||||
bool ShowFightPosition,
|
||||
IReadOnlyList<LootLabel> LootLabels,
|
||||
(double X, double Y)? FightPosition,
|
||||
double Fps,
|
||||
|
|
|
|||
|
|
@ -28,63 +28,68 @@ internal sealed class D2dEnemyBoxLayer : ID2dOverlayLayer, IDisposable
|
|||
|
||||
public void Draw(D2dRenderContext ctx, OverlayState state)
|
||||
{
|
||||
if (!state.ShowYolo && !state.ShowFightPosition) return;
|
||||
|
||||
var rt = ctx.RenderTarget;
|
||||
|
||||
foreach (var enemy in state.Enemies)
|
||||
if (state.ShowYolo)
|
||||
{
|
||||
var confirmed = enemy.HealthBarConfirmed;
|
||||
var boxBrush = confirmed ? ctx.Red : ctx.Yellow;
|
||||
var rect = new RectangleF(enemy.X, enemy.Y, enemy.Width, enemy.Height);
|
||||
rt.DrawRectangle(rect, boxBrush, 2f);
|
||||
|
||||
// Confidence label above the box
|
||||
var pctIndex = Math.Clamp((int)(enemy.Confidence * 100), 0, 100);
|
||||
var layout = confirmed ? _confirmedLabels[pctIndex] : _unconfirmedLabels[pctIndex];
|
||||
var textBrush = confirmed ? ctx.Red : ctx.Yellow;
|
||||
|
||||
var m = layout.Metrics;
|
||||
var labelX = enemy.X;
|
||||
var labelY = enemy.Y - m.Height - 2;
|
||||
|
||||
rt.FillRectangle(
|
||||
new RectangleF(labelX - 1, labelY - 1, m.Width + 2, m.Height + 2),
|
||||
ctx.LabelBgBrush);
|
||||
|
||||
rt.DrawTextLayout(new System.Numerics.Vector2(labelX, labelY), layout, textBrush);
|
||||
}
|
||||
|
||||
// Boss bounding boxes (cyan) — extrapolate position to compensate for inference delay
|
||||
var now = DateTimeOffset.UtcNow.ToUnixTimeMilliseconds();
|
||||
var ageMs = (float)Math.Clamp(now - state.BossTimestampMs, 0, 60);
|
||||
|
||||
foreach (var boss in state.Bosses)
|
||||
{
|
||||
float dx = boss.VxPerMs * ageMs;
|
||||
float dy = boss.VyPerMs * ageMs;
|
||||
var rect = new RectangleF(boss.X + dx, boss.Y + dy, boss.Width, boss.Height);
|
||||
rt.DrawRectangle(rect, ctx.Cyan, 3f);
|
||||
|
||||
var pct = Math.Clamp((int)(boss.Confidence * 100), 0, 100);
|
||||
var key = $"{boss.ClassName} {pct}%";
|
||||
if (!_bossLabels.TryGetValue(key, out var layout))
|
||||
foreach (var enemy in state.Enemies)
|
||||
{
|
||||
layout = _ctx.CreateTextLayout(key, _ctx.LabelFormat);
|
||||
_bossLabels[key] = layout;
|
||||
var confirmed = enemy.HealthBarConfirmed;
|
||||
var boxBrush = confirmed ? ctx.Red : ctx.Yellow;
|
||||
var rect = new RectangleF(enemy.X, enemy.Y, enemy.Width, enemy.Height);
|
||||
rt.DrawRectangle(rect, boxBrush, 2f);
|
||||
|
||||
// Confidence label above the box
|
||||
var pctIndex = Math.Clamp((int)(enemy.Confidence * 100), 0, 100);
|
||||
var layout = confirmed ? _confirmedLabels[pctIndex] : _unconfirmedLabels[pctIndex];
|
||||
var textBrush = confirmed ? ctx.Red : ctx.Yellow;
|
||||
|
||||
var m = layout.Metrics;
|
||||
var labelX = enemy.X;
|
||||
var labelY = enemy.Y - m.Height - 2;
|
||||
|
||||
rt.FillRectangle(
|
||||
new RectangleF(labelX - 1, labelY - 1, m.Width + 2, m.Height + 2),
|
||||
ctx.LabelBgBrush);
|
||||
|
||||
rt.DrawTextLayout(new System.Numerics.Vector2(labelX, labelY), layout, textBrush);
|
||||
}
|
||||
|
||||
var m = layout.Metrics;
|
||||
var labelX = boss.X + dx;
|
||||
var labelY = boss.Y + dy - m.Height - 2;
|
||||
// Boss bounding boxes (cyan) — extrapolate position to compensate for inference delay
|
||||
var now = DateTimeOffset.UtcNow.ToUnixTimeMilliseconds();
|
||||
var ageMs = (float)Math.Clamp(now - state.BossTimestampMs, 0, 60);
|
||||
|
||||
rt.FillRectangle(
|
||||
new RectangleF(labelX - 1, labelY - 1, m.Width + 2, m.Height + 2),
|
||||
ctx.LabelBgBrush);
|
||||
foreach (var boss in state.Bosses)
|
||||
{
|
||||
float dx = boss.VxPerMs * ageMs;
|
||||
float dy = boss.VyPerMs * ageMs;
|
||||
var rect = new RectangleF(boss.X + dx, boss.Y + dy, boss.Width, boss.Height);
|
||||
rt.DrawRectangle(rect, ctx.Cyan, 3f);
|
||||
|
||||
rt.DrawTextLayout(new System.Numerics.Vector2(labelX, labelY), layout, ctx.Cyan);
|
||||
var pct = Math.Clamp((int)(boss.Confidence * 100), 0, 100);
|
||||
var key = $"{boss.ClassName} {pct}%";
|
||||
if (!_bossLabels.TryGetValue(key, out var layout))
|
||||
{
|
||||
layout = _ctx.CreateTextLayout(key, _ctx.LabelFormat);
|
||||
_bossLabels[key] = layout;
|
||||
}
|
||||
|
||||
var m = layout.Metrics;
|
||||
var labelX = boss.X + dx;
|
||||
var labelY = boss.Y + dy - m.Height - 2;
|
||||
|
||||
rt.FillRectangle(
|
||||
new RectangleF(labelX - 1, labelY - 1, m.Width + 2, m.Height + 2),
|
||||
ctx.LabelBgBrush);
|
||||
|
||||
rt.DrawTextLayout(new System.Numerics.Vector2(labelX, labelY), layout, ctx.Cyan);
|
||||
}
|
||||
}
|
||||
|
||||
// Fight position — red circle on screen where the fight area is
|
||||
if (state.FightPosition is var (fx, fy))
|
||||
if (state.ShowFightPosition && state.FightPosition is var (fx, fy))
|
||||
{
|
||||
const double worldToScreen = 835.0 / 97.0; // inverse of screenToWorld
|
||||
const int screenCx = 1280, screenCy = 660; // player character screen position
|
||||
|
|
|
|||
|
|
@ -23,12 +23,24 @@ public partial class DebugViewModel : ObservableObject
|
|||
[ObservableProperty] private decimal? _clickX;
|
||||
[ObservableProperty] private decimal? _clickY;
|
||||
[ObservableProperty] private bool _showLootDebug;
|
||||
[ObservableProperty] private bool _showYolo = true;
|
||||
[ObservableProperty] private bool _showFightPosition = true;
|
||||
|
||||
partial void OnShowLootDebugChanged(bool value)
|
||||
{
|
||||
_bot.LootDebugDetector.Enabled = value;
|
||||
}
|
||||
|
||||
partial void OnShowYoloChanged(bool value)
|
||||
{
|
||||
_bot.ShowYoloOverlay = value;
|
||||
}
|
||||
|
||||
partial void OnShowFightPositionChanged(bool value)
|
||||
{
|
||||
_bot.ShowFightPositionOverlay = value;
|
||||
}
|
||||
|
||||
public string[] GridLayoutNames { get; } =
|
||||
[
|
||||
"inventory", "stash12", "stash12_folder", "stash24",
|
||||
|
|
|
|||
|
|
@ -20,11 +20,14 @@ public partial class SettingsViewModel : ObservableObject
|
|||
[ObservableProperty] private decimal? _betweenTradesDelayMs = 5000;
|
||||
[ObservableProperty] private bool _headless = true;
|
||||
[ObservableProperty] private bool _showHudDebug;
|
||||
[ObservableProperty] private string _ocrEngine = "WinOCR";
|
||||
[ObservableProperty] private bool _isSaved;
|
||||
[ObservableProperty] private string _calibrationStatus = "";
|
||||
[ObservableProperty] private string _stashCalibratedAt = "";
|
||||
[ObservableProperty] private string _shopCalibratedAt = "";
|
||||
|
||||
public static string[] OcrEngineOptions { get; } = ["WinOCR", "OneOCR", "EasyOCR"];
|
||||
|
||||
public ObservableCollection<StashTabViewModel> StashTabs { get; } = [];
|
||||
public ObservableCollection<StashTabViewModel> ShopTabs { get; } = [];
|
||||
|
||||
|
|
@ -46,6 +49,7 @@ public partial class SettingsViewModel : ObservableObject
|
|||
BetweenTradesDelayMs = s.BetweenTradesDelayMs;
|
||||
Headless = s.Headless;
|
||||
ShowHudDebug = s.ShowHudDebug;
|
||||
OcrEngine = s.OcrEngine;
|
||||
}
|
||||
|
||||
private void LoadTabs()
|
||||
|
|
@ -97,6 +101,7 @@ public partial class SettingsViewModel : ObservableObject
|
|||
s.BetweenTradesDelayMs = (int)(BetweenTradesDelayMs ?? 5000);
|
||||
s.Headless = Headless;
|
||||
s.ShowHudDebug = ShowHudDebug;
|
||||
s.OcrEngine = OcrEngine;
|
||||
});
|
||||
|
||||
IsSaved = true;
|
||||
|
|
@ -210,4 +215,5 @@ public partial class SettingsViewModel : ObservableObject
|
|||
partial void OnBetweenTradesDelayMsChanged(decimal? value) => IsSaved = false;
|
||||
partial void OnHeadlessChanged(bool value) => IsSaved = false;
|
||||
partial void OnShowHudDebugChanged(bool value) => IsSaved = false;
|
||||
partial void OnOcrEngineChanged(string value) => IsSaved = false;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -341,6 +341,10 @@
|
|||
<StackPanel Spacing="6">
|
||||
<TextBlock Text="DEBUG OVERLAYS" FontSize="11" FontWeight="SemiBold"
|
||||
Foreground="#8b949e" />
|
||||
<ToggleSwitch IsChecked="{Binding ShowYolo}"
|
||||
Content="YOLO Boxes" Foreground="#e6edf3" />
|
||||
<ToggleSwitch IsChecked="{Binding ShowFightPosition}"
|
||||
Content="Fight Position" Foreground="#e6edf3" />
|
||||
<ToggleSwitch IsChecked="{Binding ShowLootDebug}"
|
||||
Content="Loot Labels" Foreground="#e6edf3" />
|
||||
</StackPanel>
|
||||
|
|
@ -455,6 +459,12 @@
|
|||
<CheckBox IsChecked="{Binding ShowHudDebug}" Content="Show HUD debug overlay"
|
||||
Foreground="#e6edf3" />
|
||||
|
||||
<StackPanel Spacing="4" Margin="0,4,0,0">
|
||||
<TextBlock Text="OCR Engine (restart required)" FontSize="11" Foreground="#8b949e" />
|
||||
<ComboBox ItemsSource="{Binding OcrEngineOptions}"
|
||||
SelectedItem="{Binding OcrEngine}" MinWidth="200" />
|
||||
</StackPanel>
|
||||
|
||||
<StackPanel Orientation="Horizontal" Spacing="8" Margin="0,2,0,0">
|
||||
<Button Content="Save Settings" Command="{Binding SaveSettingsCommand}" />
|
||||
<TextBlock Text="Saved!" Foreground="#3fb950" VerticalAlignment="Center"
|
||||
|
|
|
|||
BIN
tools/oneocr/oneocr.dll
Normal file
BIN
tools/oneocr/oneocr.dll
Normal file
Binary file not shown.
BIN
tools/oneocr/oneocr.onemodel
Normal file
BIN
tools/oneocr/oneocr.onemodel
Normal file
Binary file not shown.
BIN
tools/oneocr/onnxruntime.dll
Normal file
BIN
tools/oneocr/onnxruntime.dll
Normal file
Binary file not shown.
Loading…
Add table
Add a link
Reference in a new issue