work on first boss

This commit is contained in:
Boki 2026-02-21 12:30:41 -05:00
parent 053a016c8b
commit 89c3a0a077
16 changed files with 702 additions and 150 deletions

View file

@ -17,6 +17,7 @@ public interface IScreenReader : IDisposable
Task Snapshot();
Task<DiffOcrResponse> DiffOcr(string? savePath = null, Region? region = null);
Task<TemplateMatchResult?> TemplateMatch(string templatePath, Region? region = null);
Task<List<TemplateMatchResult>> TemplateMatchAll(string templatePath, Region? region = null, double threshold = 0.7);
Task<OcrResponse> NameplateDiffOcr(System.Drawing.Bitmap reference, System.Drawing.Bitmap current);
void SetLootBaseline(System.Drawing.Bitmap frame);
List<LootLabel> DetectLootLabels(System.Drawing.Bitmap reference, System.Drawing.Bitmap current);

View file

@ -74,7 +74,17 @@ class PythonOcrBridge : IDisposable
{
_proc!.StandardInput.WriteLine(json);
_proc.StandardInput.Flush();
responseLine = _proc.StandardOutput.ReadLine()
// Read with timeout to prevent indefinite hang
var readTask = Task.Run(() => _proc.StandardOutput.ReadLine());
if (!readTask.Wait(TimeSpan.FromSeconds(15)))
{
Log.Warning("Python OCR daemon timed out after 15s, restarting");
try { _proc.Kill(); } catch { /* best effort */ }
_proc = null;
throw new TimeoutException("Python OCR daemon timed out");
}
responseLine = readTask.Result
?? throw new Exception("Python daemon returned null");
}

View file

@ -52,21 +52,28 @@ public class ScreenReader : IScreenReader
public Task<OcrResponse> Ocr(Region? region = null, string? preprocess = null)
{
var sw = System.Diagnostics.Stopwatch.StartNew();
using var bitmap = ScreenCapture.CaptureOrLoad(null, region);
OcrResponse result;
if (preprocess == "tophat")
{
using var processed = ImagePreprocessor.PreprocessForOcr(bitmap);
return Task.FromResult(_pythonBridge.OcrFromBitmap(processed));
result = _pythonBridge.OcrFromBitmap(processed);
}
if (preprocess == "clahe")
else if (preprocess == "clahe")
{
using var processed = ImagePreprocessor.PreprocessClahe(bitmap);
return Task.FromResult(_pythonBridge.OcrFromBitmap(processed));
result = _pythonBridge.OcrFromBitmap(processed);
}
else
{
result = _pythonBridge.OcrFromBitmap(bitmap);
}
return Task.FromResult(_pythonBridge.OcrFromBitmap(bitmap));
var allText = string.Join(" | ", result.Lines.Select(l => l.Text));
Log.Information("OCR completed in {Ms}ms ({Lines} lines): {Text}", sw.ElapsedMilliseconds, result.Lines.Count, allText);
return Task.FromResult(result);
}
public async Task<(int X, int Y)?> FindTextOnScreen(string searchText, bool fuzzy = false)
@ -170,6 +177,13 @@ public class ScreenReader : IScreenReader
return Task.FromResult(result);
}
public Task<List<TemplateMatchResult>> TemplateMatchAll(string templatePath, Region? region = null, double threshold = 0.7)
{
var results = _templateMatch.MatchAll(templatePath, region, threshold);
Log.Information("TemplateMatchAll: {Count} matches for {Template}", results.Count, Path.GetFileName(templatePath));
return Task.FromResult(results);
}
// -- Save --
public Task SaveScreenshot(string path)
@ -229,8 +243,9 @@ public class ScreenReader : IScreenReader
var allLines = new List<OcrLine>();
var allText = new List<string>();
foreach (var box in boxes)
for (int bi = 0; bi < boxes.Count; bi++)
{
var box = boxes[bi];
// Pad the crop slightly
int pad = 4;
int cx = Math.Max(0, box.X - pad);
@ -239,7 +254,19 @@ public class ScreenReader : IScreenReader
int ch = Math.Min(h - cy, box.Height + pad * 2);
using var crop = current.Clone(new Rectangle(cx, cy, cw, ch), PixelFormat.Format32bppArgb);
var ocrResult = _pythonBridge.OcrFromBitmap(crop);
var clusterSw = System.Diagnostics.Stopwatch.StartNew();
OcrResponse ocrResult;
try
{
ocrResult = _pythonBridge.OcrFromBitmap(crop);
}
catch (TimeoutException)
{
Log.Warning("NameplateDiffOcr: cluster {I}/{Count} OCR timed out, skipping", bi + 1, boxes.Count);
continue;
}
Log.Debug("NameplateDiffOcr: cluster {I}/{Count} ({W}x{H}) OCR took {Ms}ms",
bi + 1, boxes.Count, cw, ch, clusterSw.ElapsedMilliseconds);
// Offset word coordinates to screen space
foreach (var line in ocrResult.Lines)
@ -322,78 +349,301 @@ public class ScreenReader : IScreenReader
return boxes;
}
// -- Loot label detection (magenta background) --
// -- Loot label detection (Canny edge + contour) --
//
// All loot labels: white border, magenta (255,0,255) background, black text.
// Magenta never appears in the game world → detect directly, no diff needed.
// Finds rectangular contours from Canny edges, filters by shape
// (aspect ratio, size, rectangularity) and content (label interior
// must have visible color/brightness, unlike the dark game world).
// Single frame — no diff needed, no custom filter colors required.
public void SetLootBaseline(Bitmap frame) { }
// Detection parameters
// -- Loot detection constants --
private const int CannyLow = 20, CannyHigh = 80;
// Shape constraints
private const int LabelMinW = 80, LabelMaxW = 500;
private const int LabelMinH = 15, LabelMaxH = 100;
private const double LabelMinAspect = 1.3, LabelMaxAspect = 10.0;
// Strict pass: well-formed rectangle contours
private const double MinRectangularity = 0.5;
private const float StrictMinBS = 200f;
private const float StrictMinEdgeDensity = 25f;
// Relaxed pass: any contour bbox in play area (catches VFX-broken borders)
private const int RelaxedMinW = 100;
private const float RelaxedMinBS = 250f;
private const float RelaxedMinEdgeDensity = 25f;
private const double UiMarginTop = 0.08;
private const double UiMarginBottom = 0.82;
// Post-processing
private const int MergeGap = 30;
private const int MergeYTolerance = 15;
private const double NmsIouThresh = 0.4;
/// <summary>
/// Two-pass loot label detection:
/// 1. Strict: polygon-approximated rectangle contours (high precision)
/// 2. Relaxed: any contour bbox in play area (catches VFX-broken borders)
/// Results merged, horizontal fragments joined, then NMS.
/// </summary>
public List<LootLabel> DetectLootLabels(Bitmap reference, Bitmap current)
{
using var mat = BitmapConverter.ToMat(current);
if (mat.Channels() == 4)
Cv2.CvtColor(mat, mat, ColorConversionCodes.BGRA2BGR);
// Mask magenta background pixels (BGR: B≈255, G≈0, R≈255)
using var mask = new Mat();
Cv2.InRange(mat, new Scalar(200, 0, 200), new Scalar(255, 60, 255), mask);
int imgH = mat.Height, imgW = mat.Width;
int playTop = (int)(imgH * UiMarginTop);
int playBot = (int)(imgH * UiMarginBottom);
// Morph close fills text gaps within a label
// Height=2 bridges line gaps within multi-line labels but not between separate labels
using var kernel = Cv2.GetStructuringElement(MorphShapes.Rect, new Size(12, 2));
using var closed = new Mat();
Cv2.MorphologyEx(mask, closed, MorphTypes.Close, kernel);
using var gray = new Mat();
Cv2.CvtColor(mat, gray, ColorConversionCodes.BGR2GRAY);
// Save debug images
using var hsv = new Mat();
Cv2.CvtColor(mat, hsv, ColorConversionCodes.BGR2HSV);
// Edge detection
using var edges = new Mat();
Cv2.Canny(gray, edges, CannyLow, CannyHigh);
using var dilateKernel = Cv2.GetStructuringElement(MorphShapes.Rect, new Size(3, 3));
using var dilated = new Mat();
Cv2.Dilate(edges, dilated, dilateKernel, iterations: 1);
Cv2.FindContours(dilated, out var contours, out _, RetrievalModes.Tree, ContourApproximationModes.ApproxSimple);
var strict = new List<LabelCandidate>();
var relaxed = new List<LabelCandidate>();
foreach (var contour in contours)
{
var box = Cv2.BoundingRect(contour);
// Common shape gate
if (box.Width <= LabelMinW || box.Width >= LabelMaxW) continue;
if (box.Height <= LabelMinH || box.Height >= LabelMaxH) continue;
double aspect = (double)box.Width / Math.Max(box.Height, 1);
if (aspect <= LabelMinAspect || aspect >= LabelMaxAspect) continue;
// Content metrics
using var roiHsv = new Mat(hsv, box);
var meanHsv = Cv2.Mean(roiHsv);
float meanVal = (float)meanHsv[2];
float meanSat = (float)meanHsv[1];
float bs = meanVal + meanSat;
using var roiGray = new Mat(gray, box);
using var roiEdges = new Mat();
Cv2.Canny(roiGray, roiEdges, 50, 150);
float ed = (float)Cv2.Mean(roiEdges)[0];
// Strict pass: well-formed polygon (4-8 vertices)
var peri = Cv2.ArcLength(contour, true);
var approx = Cv2.ApproxPolyDP(contour, 0.02 * peri, true);
if (approx.Length >= 4 && approx.Length <= 8)
{
double contourArea = Cv2.ContourArea(approx);
double rect = contourArea / Math.Max(box.Width * box.Height, 1);
if (rect >= MinRectangularity && bs >= StrictMinBS && ed >= StrictMinEdgeDensity)
strict.Add(new LabelCandidate(box.X, box.Y, box.Width, box.Height, meanVal, meanSat));
}
// Relaxed pass: any contour bbox in play area
bool inPlay = box.Y > playTop && box.Y + box.Height < playBot;
if (inPlay && box.Width >= RelaxedMinW && bs >= RelaxedMinBS && ed >= RelaxedMinEdgeDensity)
relaxed.Add(new LabelCandidate(box.X, box.Y, box.Width, box.Height, meanVal, meanSat));
}
// Merge strict + relaxed (strict wins on overlap)
var merged = new List<LabelCandidate>(strict);
foreach (var rlb in relaxed)
{
if (!OverlapsAny(rlb, strict, 0.3))
merged.Add(rlb);
}
// Join horizontal fragments
merged = MergeHorizontal(merged, MergeGap, MergeYTolerance);
// Build LootLabels with color classification
var scored = new List<(LootLabel Label, float Score)>();
foreach (var c in merged)
{
var (avgR, avgG, avgB) = SampleLabelColor(mat, c.X, c.Y, c.W, c.H);
var tier = LootColorClassifier.Classify(avgR, avgG, avgB);
int cx = c.X + c.W / 2;
int cy = c.Y + c.H / 2;
scored.Add((new LootLabel(cx, cy, c.W, c.H, tier, avgR, avgG, avgB),
c.MeanBrightness + c.MeanSaturation));
}
// Final NMS
var labels = NmsLootLabels(scored, NmsIouThresh);
labels.Sort((a, b) => a.CenterY.CompareTo(b.CenterY));
// Debug images
try
{
Cv2.ImWrite("debug_loot_mask.png", mask);
Cv2.ImWrite("debug_loot_closed.png", closed);
current.Save("debug_loot_capture.png", System.Drawing.Imaging.ImageFormat.Png);
Log.Information("Saved debug images: debug_loot_mask.png, debug_loot_closed.png, debug_loot_capture.png");
Cv2.ImWrite("debug_loot_edges.png", edges);
Cv2.ImWrite("debug_loot_dilated.png", dilated);
using var debugMat = mat.Clone();
foreach (var label in labels)
Cv2.Rectangle(debugMat,
new Rect(label.CenterX - label.Width / 2, label.CenterY - label.Height / 2, label.Width, label.Height),
new Scalar(0, 255, 0), 2);
Cv2.ImWrite("debug_loot_detected.png", debugMat);
}
catch (Exception ex)
{
Log.Warning(ex, "Failed to save debug images");
}
Cv2.FindContours(closed, out var contours, out _,
RetrievalModes.External, ContourApproximationModes.ApproxSimple);
Log.Information("DetectLootLabels: {N} magenta contours", contours.Length);
const int minW = 40, maxW = 600;
const int minH = 8, maxH = 100;
const double minAspect = 1.5;
int yMax = mat.Height - 210;
var labels = new List<LootLabel>();
foreach (var contour in contours)
{
var box = Cv2.BoundingRect(contour);
double aspect = box.Height > 0 ? (double)box.Width / box.Height : 0;
if (box.Width < minW || box.Width > maxW ||
box.Height < minH || box.Height > maxH ||
aspect < minAspect ||
box.Y < 65 || box.Y + box.Height > yMax)
{
Log.Information("Rejected contour: ({X},{Y}) {W}x{H} aspect={Aspect:F1} yMax={YMax}",
box.X, box.Y, box.Width, box.Height, aspect, yMax);
continue;
}
int cx = box.X + box.Width / 2;
int cy = box.Y + box.Height / 2;
Log.Information("Label at ({X},{Y}) {W}x{H}", box.X, box.Y, box.Width, box.Height);
labels.Add(new LootLabel(cx, cy, box.Width, box.Height, "loot", 255, 0, 255));
}
Log.Information("DetectLootLabels: strict={Strict} relaxed={Relaxed} merged={Merged} final={Final}",
strict.Count, relaxed.Count, merged.Count, labels.Count);
foreach (var label in labels)
Log.Information(" Label ({X},{Y}) {W}x{H} color=({R},{G},{B}) tier={Tier}",
label.CenterX - label.Width / 2, label.CenterY - label.Height / 2,
label.Width, label.Height, label.AvgR, label.AvgG, label.AvgB, label.Tier);
return labels;
}
// -- Loot detection helpers --
private record struct LabelCandidate(int X, int Y, int W, int H, float MeanBrightness, float MeanSaturation);
private static (byte R, byte G, byte B) SampleLabelColor(Mat mat, int x, int y, int w, int h)
{
var roiX = x + w / 4;
var roiY = y + h / 4;
var roiW = Math.Min(mat.Cols - roiX, w / 2);
var roiH = Math.Min(mat.Rows - roiY, h / 2);
if (roiW <= 0 || roiH <= 0) return (0, 0, 0);
using var roi = new Mat(mat, new Rect(roiX, roiY, roiW, roiH));
var mean = Cv2.Mean(roi);
return ((byte)mean[2], (byte)mean[1], (byte)mean[0]);
}
private static bool OverlapsAny(LabelCandidate label, List<LabelCandidate> others, double iouThresh)
{
foreach (var o in others)
{
int ix1 = Math.Max(label.X, o.X), iy1 = Math.Max(label.Y, o.Y);
int ix2 = Math.Min(label.X + label.W, o.X + o.W);
int iy2 = Math.Min(label.Y + label.H, o.Y + o.H);
int inter = Math.Max(0, ix2 - ix1) * Math.Max(0, iy2 - iy1);
int union = label.W * label.H + o.W * o.H - inter;
if (inter / (double)Math.Max(union, 1) > iouThresh)
return true;
}
return false;
}
/// <summary>
/// Merge labels that sit side-by-side on the same line.
/// </summary>
private static List<LabelCandidate> MergeHorizontal(List<LabelCandidate> labels, int gap, int yTol)
{
if (labels.Count < 2) return labels;
var used = new bool[labels.Count];
var indices = Enumerable.Range(0, labels.Count)
.OrderBy(i => labels[i].Y).ThenBy(i => labels[i].X).ToList();
var result = new List<LabelCandidate>();
for (int ii = 0; ii < indices.Count; ii++)
{
int i = indices[ii];
if (used[i]) continue;
used[i] = true;
var a = labels[i];
int gx1 = a.X, gy1 = a.Y, gx2 = a.X + a.W, gy2 = a.Y + a.H;
double briArea = a.MeanBrightness * a.W * a.H;
double satArea = a.MeanSaturation * a.W * a.H;
int totalArea = a.W * a.H;
bool changed = true;
while (changed)
{
changed = false;
for (int jj = 0; jj < indices.Count; jj++)
{
int j = indices[jj];
if (used[j]) continue;
var b = labels[j];
double cyA = (gy1 + gy2) / 2.0;
double cyB = b.Y + b.H / 2.0;
if (Math.Abs(cyA - cyB) > yTol) continue;
int hGap = Math.Max(b.X - gx2, gx1 - (b.X + b.W));
if (hGap > gap) continue;
int bArea = b.W * b.H;
gx1 = Math.Min(gx1, b.X);
gy1 = Math.Min(gy1, b.Y);
gx2 = Math.Max(gx2, b.X + b.W);
gy2 = Math.Max(gy2, b.Y + b.H);
briArea += b.MeanBrightness * bArea;
satArea += b.MeanSaturation * bArea;
totalArea += bArea;
used[j] = true;
changed = true;
}
}
int w = gx2 - gx1, h = gy2 - gy1;
float bri = (float)(briArea / Math.Max(totalArea, 1));
float sat = (float)(satArea / Math.Max(totalArea, 1));
result.Add(new LabelCandidate(gx1, gy1, w, h, bri, sat));
}
return result;
}
private static List<LootLabel> NmsLootLabels(List<(LootLabel Label, float Score)> candidates, double iouThresh)
{
if (candidates.Count == 0) return [];
candidates.Sort((a, b) => b.Score.CompareTo(a.Score));
var keep = new List<LootLabel>();
var suppressed = new bool[candidates.Count];
for (int i = 0; i < candidates.Count; i++)
{
if (suppressed[i]) continue;
keep.Add(candidates[i].Label);
var a = candidates[i].Label;
int ax1 = a.CenterX - a.Width / 2, ay1 = a.CenterY - a.Height / 2;
int ax2 = ax1 + a.Width, ay2 = ay1 + a.Height;
int areaA = a.Width * a.Height;
for (int j = i + 1; j < candidates.Count; j++)
{
if (suppressed[j]) continue;
var b = candidates[j].Label;
int bx1 = b.CenterX - b.Width / 2, by1 = b.CenterY - b.Height / 2;
int bx2 = bx1 + b.Width, by2 = by1 + b.Height;
int areaB = b.Width * b.Height;
int ix1 = Math.Max(ax1, bx1), iy1 = Math.Max(ay1, by1);
int ix2 = Math.Min(ax2, bx2), iy2 = Math.Min(ay2, by2);
int inter = Math.Max(0, ix2 - ix1) * Math.Max(0, iy2 - iy1);
double iou = inter / (double)(areaA + areaB - inter + 1);
if (iou >= iouThresh)
suppressed[j] = true;
}
}
return keep;
}
public void Dispose() => _pythonBridge.Dispose();
// -- OCR text matching --

View file

@ -50,6 +50,65 @@ class TemplateMatchHandler
return best;
}
/// <summary>
/// Find all matches above threshold, suppressing overlapping detections.
/// </summary>
public List<TemplateMatchResult> MatchAll(string templatePath, Region? region = null,
double threshold = 0.7)
{
if (!System.IO.File.Exists(templatePath))
throw new FileNotFoundException($"Template file not found: {templatePath}");
using var screenshot = ScreenCapture.CaptureOrLoad(null, region);
using var screenMat = BitmapConverter.ToMat(screenshot);
using var template = Cv2.ImRead(templatePath, ImreadModes.Color);
if (template.Empty())
throw new InvalidOperationException($"Failed to load template image: {templatePath}");
using var screenBgr = new Mat();
if (screenMat.Channels() == 4)
Cv2.CvtColor(screenMat, screenBgr, ColorConversionCodes.BGRA2BGR);
else
screenMat.CopyTo(screenBgr);
if (template.Rows > screenBgr.Rows || template.Cols > screenBgr.Cols)
return [];
using var result = new Mat();
Cv2.MatchTemplate(screenBgr, template, result, TemplateMatchModes.CCoeffNormed);
var offsetX = region?.X ?? 0;
var offsetY = region?.Y ?? 0;
var matches = new List<TemplateMatchResult>();
// Find all peaks above threshold using non-maximum suppression
while (true)
{
Cv2.MinMaxLoc(result, out _, out double maxVal, out _, out OpenCvSharp.Point maxLoc);
if (maxVal < threshold) break;
matches.Add(new TemplateMatchResult
{
X = offsetX + maxLoc.X + template.Cols / 2,
Y = offsetY + maxLoc.Y + template.Rows / 2,
Width = template.Cols,
Height = template.Rows,
Confidence = maxVal,
});
// Suppress this region so we find the next match
var suppressX = Math.Max(0, maxLoc.X - template.Cols / 2);
var suppressY = Math.Max(0, maxLoc.Y - template.Rows / 2);
var suppressW = Math.Min(result.Cols - suppressX, template.Cols);
var suppressH = Math.Min(result.Rows - suppressY, template.Rows);
using var roi = new Mat(result, new Rect(suppressX, suppressY, suppressW, suppressH));
roi.SetTo(new Scalar(0));
}
return matches;
}
private static TemplateMatchResult? MatchAtScale(Mat screen, Mat template,
Region? region, double scale, double threshold)
{