work on well of souls and yolo detection

This commit is contained in:
Boki 2026-02-20 16:40:50 -05:00
parent 3456e0d62a
commit 40d30115bf
41 changed files with 3031 additions and 148 deletions

View file

@ -0,0 +1,78 @@
using Poe2Trade.Core;
using Serilog;
using Region = Poe2Trade.Core.Region;
namespace Poe2Trade.Screen;
public class BossDetector : IFrameConsumer, IDisposable
{
private const int DetectEveryNFrames = 6;
private const int MinConsecutiveFrames = 2;
private readonly PythonDetectBridge _bridge = new();
private volatile BossSnapshot _latest = new([], 0, 0);
private int _frameCounter;
private int _consecutiveDetections;
private string _modelName = "boss-kulemak";
public bool Enabled { get; set; }
public BossSnapshot Latest => _latest;
public event Action<BossSnapshot>? BossDetected;
public void SetBoss(string bossName)
{
_modelName = $"boss-{bossName}";
_consecutiveDetections = 0;
}
public void Process(ScreenFrame frame)
{
if (!Enabled) return;
if (++_frameCounter % DetectEveryNFrames != 0) return;
try
{
// Use full frame — model was trained on full 2560x1440 screenshots
var fullRegion = new Region(0, 0, frame.Width, frame.Height);
using var bgr = frame.CropBgr(fullRegion);
var result = _bridge.Detect(bgr, conf: 0.60f, imgsz: 1280, model: _modelName);
var bosses = new List<DetectedBoss>(result.Count);
foreach (var det in result.Detections)
{
bosses.Add(new DetectedBoss(
det.ClassName,
det.Confidence,
det.X,
det.Y,
det.Width,
det.Height,
det.Cx,
det.Cy));
}
var snapshot = new BossSnapshot(
bosses.AsReadOnly(),
DateTimeOffset.UtcNow.ToUnixTimeMilliseconds(),
result.InferenceMs);
_latest = snapshot;
if (bosses.Count > 0)
{
_consecutiveDetections++;
if (_consecutiveDetections >= MinConsecutiveFrames)
BossDetected?.Invoke(snapshot);
}
else
{
_consecutiveDetections = 0;
}
}
catch (Exception ex)
{
Log.Debug(ex, "BossDetector YOLO failed");
}
}
public void Dispose() => _bridge.Dispose();
}

View file

@ -10,3 +10,14 @@ public record DetectionSnapshot(
IReadOnlyList<DetectedEnemy> Enemies,
long Timestamp,
float InferenceMs);
public record DetectedBoss(
string ClassName,
float Confidence,
int X, int Y, int Width, int Height,
int Cx, int Cy);
public record BossSnapshot(
IReadOnlyList<DetectedBoss> Bosses,
long Timestamp,
float InferenceMs);

View file

@ -16,6 +16,7 @@ public class FrameSaver : IFrameConsumer
private const int JpegQuality = 95;
private const int MinSaveIntervalMs = 1000;
private const int BurstIntervalMs = 200;
private const int MinRedPixels = 50;
private const int ThumbSize = 64;
private const double MovementThreshold = 8.0; // mean absolute diff on 64x64 grayscale
@ -26,6 +27,7 @@ public class FrameSaver : IFrameConsumer
private Mat? _prevThumb;
public bool Enabled { get; set; }
public bool BurstMode { get; set; }
public int SavedCount => _savedCount;
public FrameSaver(string outputDir = "training-data/raw")
@ -35,10 +37,11 @@ public class FrameSaver : IFrameConsumer
public void Process(ScreenFrame frame)
{
if (!Enabled) return;
if (!Enabled && !BurstMode) return;
var now = DateTimeOffset.UtcNow.ToUnixTimeMilliseconds();
if (now - _lastSaveTime < MinSaveIntervalMs) return;
var interval = BurstMode ? BurstIntervalMs : MinSaveIntervalMs;
if (now - _lastSaveTime < interval) return;
if (GameplayRegion.X + GameplayRegion.Width > frame.Width ||
GameplayRegion.Y + GameplayRegion.Height > frame.Height)
@ -46,10 +49,12 @@ public class FrameSaver : IFrameConsumer
try
{
using var bgr = frame.CropBgr(GameplayRegion);
if (!HasHealthBars(bgr)) return;
if (!HasSceneChanged(bgr)) return;
if (!BurstMode)
{
using var bgr = frame.CropBgr(GameplayRegion);
if (!HasHealthBars(bgr)) return;
if (!HasSceneChanged(bgr)) return;
}
if (!Directory.Exists(_outputDir))
Directory.CreateDirectory(_outputDir);

View file

@ -1,5 +1,3 @@
using System.Drawing;
using System.Text.RegularExpressions;
using OpenCvSharp;
using Poe2Trade.Core;
using Serilog;
@ -7,38 +5,41 @@ using Region = Poe2Trade.Core.Region;
namespace Poe2Trade.Screen;
public record HudValues(int Current, int Max);
public record HudSnapshot
{
public HudValues? Life { get; init; }
public HudValues? Mana { get; init; }
public HudValues? EnergyShield { get; init; }
public HudValues? Spirit { get; init; }
public float LifePct { get; init; }
public float ShieldPct { get; init; }
public float ManaPct { get; init; }
public long Timestamp { get; init; }
public float LifePct => Life is { Max: > 0 } l ? (float)l.Current / l.Max : 1f;
public float ManaPct => Mana is { Max: > 0 } m ? (float)m.Current / m.Max : 1f;
}
/// <summary>
/// Reads life/mana/ES/spirit values from HUD globe text via OCR.
/// Throttled to ~1 read per second (every 30 frames at 30fps).
/// Reads life/mana/shield fill levels by sampling pixel colors on the globes.
/// Finds the highest Y where the fill color appears — the fill drains from top down.
/// Samples a horizontal band (±SampleHalfWidth) at each Y for robustness against the frame ornaments.
/// </summary>
public class HudReader : IFrameConsumer
{
private static readonly Regex ValuePattern = new(@"(\d+)\s*/\s*(\d+)", RegexOptions.Compiled);
// Globe centers at 2560x1440
private const int LifeX = 167;
private const int ManaX = 2394;
private const int GlobeTop = 1185;
private const int GlobeBottom = 1411;
// Crop regions for HUD text at 2560x1440 — placeholders, need calibration
private static readonly Region LifeRegion = new(100, 1340, 200, 40);
private static readonly Region ManaRegion = new(2260, 1340, 200, 40);
private static readonly Region EsRegion = new(100, 1300, 200, 40);
private static readonly Region SpiritRegion = new(2260, 1300, 200, 40);
// Shield ring: circle centered at (168, 1294), radius 130
private const int ShieldCX = 170;
private const int ShieldCY = 1298;
private const int ShieldRadius = 130;
private const int OcrEveryNFrames = 30;
// Sample a horizontal band of pixels at each Y level
private const int SampleHalfWidth = 8;
// Minimum pixels in the band that must match to count as "filled"
private const int MinHits = 2;
private readonly PythonOcrBridge _ocr = new();
private volatile HudSnapshot _current = new() { Timestamp = 0 };
private const int MinChannel = 60;
private const float DominanceRatio = 1.2f;
private volatile HudSnapshot _current = new();
private int _frameCounter;
public HudSnapshot Current => _current;
@ -47,64 +48,128 @@ public class HudReader : IFrameConsumer
public void Process(ScreenFrame frame)
{
if (++_frameCounter % OcrEveryNFrames != 0) return;
if (++_frameCounter % 2 != 0) return;
try
{
var life = ReadValue(frame, LifeRegion);
var mana = ReadValue(frame, ManaRegion);
var es = ReadValue(frame, EsRegion);
var spirit = ReadValue(frame, SpiritRegion);
var manaPct = SampleFillLevel(frame, ManaX, IsManaPixel);
var shieldPct = SampleShieldRing(frame);
// If life globe is cyan (1-life build), life = 0
var redFill = SampleFillLevel(frame, LifeX, IsLifePixel);
var cyanFill = SampleFillLevel(frame, LifeX, IsCyanPixel);
var lifePct = cyanFill > redFill ? 0f : redFill;
var snapshot = new HudSnapshot
{
Life = life,
Mana = mana,
EnergyShield = es,
Spirit = spirit,
LifePct = lifePct,
ManaPct = manaPct,
ShieldPct = shieldPct,
Timestamp = DateTimeOffset.UtcNow.ToUnixTimeMilliseconds(),
};
_current = snapshot;
Updated?.Invoke(snapshot);
if (snapshot.LifePct < 0.3f)
if (lifePct < 0.3f)
LowLife?.Invoke(snapshot);
}
catch (Exception ex)
{
Log.Debug(ex, "HudReader OCR failed");
Log.Debug(ex, "HudReader sample failed");
}
}
private HudValues? ReadValue(ScreenFrame frame, Region region)
/// <summary>
/// Scan from top to bottom to find the first Y row where the fill color appears.
/// Fill % = 1 - (firstFilledY - GlobeTop) / (GlobeBottom - GlobeTop).
/// At each Y, sample a horizontal band of pixels and require MinHits matches.
/// </summary>
private static float SampleFillLevel(ScreenFrame frame, int centerX, Func<Vec4b, bool> colorTest)
{
// Bounds check
if (region.X + region.Width > frame.Width || region.Y + region.Height > frame.Height)
return null;
if (centerX >= frame.Width || GlobeBottom >= frame.Height) return 0f;
using var bgr = frame.CropBgr(region);
using var gray = new Mat();
Cv2.CvtColor(bgr, gray, ColorConversionCodes.BGR2GRAY);
int height = GlobeBottom - GlobeTop;
if (height <= 0) return 0f;
// Threshold for white text on dark background
using var thresh = new Mat();
Cv2.Threshold(gray, thresh, 180, 255, ThresholdTypes.Binary);
int xMin = Math.Max(0, centerX - SampleHalfWidth);
int xMax = Math.Min(frame.Width - 1, centerX + SampleHalfWidth);
// Convert to Bitmap for OCR bridge
var bytes = thresh.ToBytes(".png");
using var ms = new System.IO.MemoryStream(bytes);
using var bitmap = new Bitmap(ms);
// Scan from top down — find first row with enough matching pixels
for (int y = GlobeTop; y <= GlobeBottom; y++)
{
int hits = 0;
for (int x = xMin; x <= xMax; x++)
{
if (colorTest(frame.PixelAt(x, y)))
hits++;
if (hits >= MinHits) break;
}
var result = _ocr.OcrFromBitmap(bitmap);
if (string.IsNullOrWhiteSpace(result.Text)) return null;
if (hits >= MinHits)
{
// Fill level = how far down from top this first row is
// If found at GlobeTop → 100%, at GlobeBottom → 0%
return 1f - (float)(y - GlobeTop) / height;
}
}
var match = ValuePattern.Match(result.Text);
if (!match.Success) return null;
return new HudValues(
int.Parse(match.Groups[1].Value),
int.Parse(match.Groups[2].Value)
);
return 0f; // no fill found
}
/// <summary>
/// Sample the shield ring — right semicircle (12 o'clock to 6 o'clock) around the life globe.
/// Scans from bottom (6 o'clock) upward along the arc, tracking contiguous cyan fill.
/// </summary>
private static float SampleShieldRing(ScreenFrame frame)
{
int yTop = ShieldCY - ShieldRadius;
int yBot = ShieldCY + ShieldRadius;
if (yBot >= frame.Height) return 0f;
int r2 = ShieldRadius * ShieldRadius;
// Scan from top (12 o'clock) down along the right arc
// When we find the first cyan row, convert Y to arc fraction
for (int y = yTop; y <= yBot; y++)
{
int dy = y - ShieldCY;
int dx = (int)Math.Sqrt(r2 - dy * dy);
int arcX = ShieldCX + dx;
if (arcX >= frame.Width) continue;
int hits = 0;
for (int x = Math.Max(0, arcX - 3); x <= Math.Min(frame.Width - 1, arcX + 3); x++)
{
if (IsCyanPixel(frame.PixelAt(x, y)))
hits++;
if (hits >= MinHits) break;
}
if (hits >= MinHits)
{
// Convert Y to angle on the semicircle: θ = arcsin((y - cy) / r)
// Arc fraction from top = (θ + π/2) / π
// Fill = 1 - arc_fraction
var theta = Math.Asin(Math.Clamp((double)(y - ShieldCY) / ShieldRadius, -1, 1));
var arcFraction = (theta + Math.PI / 2) / Math.PI;
return (float)(1.0 - arcFraction);
}
}
return 0f;
}
// B=0, G=1, R=2, A=3
private static bool IsLifePixel(Vec4b px) =>
px[2] > MinChannel && px[2] > px[1] * DominanceRatio && px[2] > px[0] * DominanceRatio;
private static bool IsManaPixel(Vec4b px) =>
px[0] > MinChannel && px[0] > px[1] * DominanceRatio && px[0] > px[2] * DominanceRatio;
private static bool IsCyanPixel(Vec4b px) =>
px[0] > MinChannel && px[1] > MinChannel
&& px[0] > px[2] * DominanceRatio
&& px[1] > px[2] * DominanceRatio;
}

View file

@ -17,6 +17,8 @@ public interface IScreenReader : IDisposable
Task Snapshot();
Task<DiffOcrResponse> DiffOcr(string? savePath = null, Region? region = null);
Task<TemplateMatchResult?> TemplateMatch(string templatePath, Region? region = null);
Task<OcrResponse> NameplateDiffOcr(System.Drawing.Bitmap reference, System.Drawing.Bitmap current);
System.Drawing.Bitmap CaptureRawBitmap();
Task SaveScreenshot(string path);
Task SaveRegion(Region region, string path);
}

View file

@ -35,7 +35,7 @@ class PythonDetectBridge : IDisposable
/// <summary>
/// Run YOLO detection on a BGR Mat. Returns parsed detection results.
/// </summary>
public DetectResponse Detect(Mat bgrMat, float conf = 0.3f, float iou = 0.45f, int imgsz = 640)
public DetectResponse Detect(Mat bgrMat, float conf = 0.3f, float iou = 0.45f, int imgsz = 640, string? model = null)
{
EnsureRunning();
@ -49,6 +49,7 @@ class PythonDetectBridge : IDisposable
["conf"] = conf,
["iou"] = iou,
["imgsz"] = imgsz,
["model"] = model,
};
return SendRequest(req);

View file

@ -1,6 +1,10 @@
using System.Drawing;
using System.Drawing.Imaging;
using System.Runtime.InteropServices;
using Poe2Trade.Core;
using OpenCvSharp.Extensions;
using Serilog;
using Region = Poe2Trade.Core.Region;
namespace Poe2Trade.Screen;
@ -178,6 +182,144 @@ public class ScreenReader : IScreenReader
return Task.CompletedTask;
}
// -- Nameplate diff OCR --
public Bitmap CaptureRawBitmap() => ScreenCapture.CaptureOrLoad(null, null);
public Task<OcrResponse> NameplateDiffOcr(Bitmap reference, Bitmap current)
{
int w = Math.Min(reference.Width, current.Width);
int h = Math.Min(reference.Height, current.Height);
var refData = reference.LockBits(new Rectangle(0, 0, w, h), ImageLockMode.ReadOnly, PixelFormat.Format32bppArgb);
var curData = current.LockBits(new Rectangle(0, 0, w, h), ImageLockMode.ReadOnly, PixelFormat.Format32bppArgb);
byte[] refPx = new byte[refData.Stride * h];
byte[] curPx = new byte[curData.Stride * h];
Marshal.Copy(refData.Scan0, refPx, 0, refPx.Length);
Marshal.Copy(curData.Scan0, curPx, 0, curPx.Length);
int stride = refData.Stride;
reference.UnlockBits(refData);
current.UnlockBits(curData);
// Build a binary mask of pixels that got significantly brighter (nameplates are bright text)
const int brightThresh = 30;
bool[] mask = new bool[w * h];
Parallel.For(0, h, y =>
{
int rowOff = y * stride;
for (int x = 0; x < w; x++)
{
int i = rowOff + x * 4;
int brighter = (curPx[i] - refPx[i]) + (curPx[i + 1] - refPx[i + 1]) + (curPx[i + 2] - refPx[i + 2]);
if (brighter > brightThresh)
mask[y * w + x] = true;
}
});
// Find connected clusters via row-scan: collect bounding boxes of bright regions
var boxes = FindBrightClusters(mask, w, h, minWidth: 40, minHeight: 10, maxGap: 8);
Log.Information("NameplateDiff: found {Count} bright clusters", boxes.Count);
if (boxes.Count == 0)
return Task.FromResult(new OcrResponse { Text = "", Lines = [] });
// OCR each cluster crop, accumulate results with screen-space coordinates
var allLines = new List<OcrLine>();
var allText = new List<string>();
foreach (var box in boxes)
{
// Pad the crop slightly
int pad = 4;
int cx = Math.Max(0, box.X - pad);
int cy = Math.Max(0, box.Y - pad);
int cw = Math.Min(w - cx, box.Width + pad * 2);
int ch = Math.Min(h - cy, box.Height + pad * 2);
using var crop = current.Clone(new Rectangle(cx, cy, cw, ch), PixelFormat.Format32bppArgb);
var ocrResult = _pythonBridge.OcrFromBitmap(crop);
// Offset word coordinates to screen space
foreach (var line in ocrResult.Lines)
{
foreach (var word in line.Words)
{
word.X += cx;
word.Y += cy;
}
allLines.Add(line);
allText.Add(line.Text);
}
}
return Task.FromResult(new OcrResponse
{
Text = string.Join("\n", allText),
Lines = allLines,
});
}
private static List<Rectangle> FindBrightClusters(bool[] mask, int w, int h, int minWidth, int minHeight, int maxGap)
{
// Row density
int[] rowCounts = new int[h];
for (int y = 0; y < h; y++)
for (int x = 0; x < w; x++)
if (mask[y * w + x]) rowCounts[y]++;
// Find horizontal bands of bright rows
int rowThresh = 3;
var bands = new List<(int Top, int Bottom)>();
int bandStart = -1, lastActive = -1;
for (int y = 0; y < h; y++)
{
if (rowCounts[y] >= rowThresh)
{
if (bandStart < 0) bandStart = y;
lastActive = y;
}
else if (bandStart >= 0 && y - lastActive > maxGap)
{
if (lastActive - bandStart + 1 >= minHeight)
bands.Add((bandStart, lastActive));
bandStart = -1;
}
}
if (bandStart >= 0 && lastActive - bandStart + 1 >= minHeight)
bands.Add((bandStart, lastActive));
// For each band, find column extents to get individual nameplate boxes
var boxes = new List<Rectangle>();
foreach (var (top, bottom) in bands)
{
int[] colCounts = new int[w];
for (int y = top; y <= bottom; y++)
for (int x = 0; x < w; x++)
if (mask[y * w + x]) colCounts[x]++;
int colThresh = 1;
int colStart = -1, lastCol = -1;
for (int x = 0; x < w; x++)
{
if (colCounts[x] >= colThresh)
{
if (colStart < 0) colStart = x;
lastCol = x;
}
else if (colStart >= 0 && x - lastCol > maxGap)
{
if (lastCol - colStart + 1 >= minWidth)
boxes.Add(new Rectangle(colStart, top, lastCol - colStart + 1, bottom - top + 1));
colStart = -1;
}
}
if (colStart >= 0 && lastCol - colStart + 1 >= minWidth)
boxes.Add(new Rectangle(colStart, top, lastCol - colStart + 1, bottom - top + 1));
}
return boxes;
}
public void Dispose() => _pythonBridge.Dispose();
// -- OCR text matching --

View file

@ -27,27 +27,56 @@ class TemplateMatchHandler
else
screenMat.CopyTo(screenBgr);
// Template must fit within screenshot
if (template.Rows > screenBgr.Rows || template.Cols > screenBgr.Cols)
// Try exact size first (fast path)
var exact = MatchAtScale(screenBgr, template, region, 1.0, threshold);
if (exact is { Confidence: > 0.95 })
return exact;
// Multi-scale: resize template from 50% to 150% in steps of 10%
TemplateMatchResult? best = exact;
for (var pct = 50; pct <= 150; pct += 10)
{
var scale = pct / 100.0;
if (pct == 100) continue; // already tried
var match = MatchAtScale(screenBgr, template, region, scale, threshold);
if (match != null && (best == null || match.Confidence > best.Confidence))
{
best = match;
if (best.Confidence > 0.95) break;
}
}
return best;
}
private static TemplateMatchResult? MatchAtScale(Mat screen, Mat template,
Region? region, double scale, double threshold)
{
using var scaled = scale == 1.0 ? template.Clone()
: template.Resize(new OpenCvSharp.Size(
Math.Max(1, (int)(template.Cols * scale)),
Math.Max(1, (int)(template.Rows * scale))));
if (scaled.Rows > screen.Rows || scaled.Cols > screen.Cols)
return null;
using var result = new Mat();
Cv2.MatchTemplate(screenBgr, template, result, TemplateMatchModes.CCoeffNormed);
Cv2.MatchTemplate(screen, scaled, result, TemplateMatchModes.CCoeffNormed);
Cv2.MinMaxLoc(result, out _, out double maxVal, out _, out OpenCvSharp.Point maxLoc);
if (maxVal < threshold)
return null;
int offsetX = region?.X ?? 0;
int offsetY = region?.Y ?? 0;
var offsetX = region?.X ?? 0;
var offsetY = region?.Y ?? 0;
return new TemplateMatchResult
{
X = offsetX + maxLoc.X + template.Cols / 2,
Y = offsetY + maxLoc.Y + template.Rows / 2,
Width = template.Cols,
Height = template.Rows,
X = offsetX + maxLoc.X + scaled.Cols / 2,
Y = offsetY + maxLoc.Y + scaled.Rows / 2,
Width = scaled.Cols,
Height = scaled.Rows,
Confidence = maxVal,
};
}