boss ready

This commit is contained in:
Boki 2026-02-21 15:44:22 -05:00
parent 89c3a0a077
commit 64a6ab694b
21 changed files with 857 additions and 249 deletions

View file

@ -0,0 +1,71 @@
using Serilog;
namespace Poe2Trade.Screen;
/// <summary>
/// Debug-only: periodically captures the screen, runs loot label detection,
/// and exposes the latest results for overlay rendering.
/// </summary>
public class LootDebugDetector : IDisposable
{
private readonly IScreenReader _screen;
private volatile List<LootLabel> _latest = [];
private Timer? _timer;
private volatile bool _enabled;
private int _running; // guard against overlapping ticks
public LootDebugDetector(IScreenReader screen)
{
_screen = screen;
}
public IReadOnlyList<LootLabel> Latest => _latest;
public bool Enabled
{
get => _enabled;
set
{
if (_enabled == value) return;
_enabled = value;
if (value)
_timer = new Timer(_ => Tick(), null, 0, 500);
else
{
_timer?.Dispose();
_timer = null;
_latest = [];
}
}
}
private void Tick()
{
if (!_enabled) return;
if (Interlocked.CompareExchange(ref _running, 1, 0) != 0) return;
try
{
using var frame = _screen.CaptureRawBitmap();
var labels = _screen.DetectLootLabels(frame, frame);
_latest = labels;
if (labels.Count > 0)
Log.Information("[LootDebug] Detected {Count} labels", labels.Count);
}
catch (Exception ex)
{
Log.Warning("[LootDebug] Detection failed: {Error}", ex.Message);
_latest = [];
}
finally
{
Interlocked.Exchange(ref _running, 0);
}
}
public void Dispose()
{
_timer?.Dispose();
_timer = null;
}
}

View file

@ -202,11 +202,20 @@ public class ScreenReader : IScreenReader
public Bitmap CaptureRawBitmap() => ScreenCapture.CaptureOrLoad(null, null);
// Nameplate search region — skip top HUD, bottom bar, and side margins
private const int NpTop = 120, NpBottom = 1080, NpMargin = 300;
public Task<OcrResponse> NameplateDiffOcr(Bitmap reference, Bitmap current)
{
int w = Math.Min(reference.Width, current.Width);
int h = Math.Min(reference.Height, current.Height);
// Clamp search region to image bounds
int scanY0 = Math.Min(NpTop, h);
int scanY1 = Math.Min(NpBottom, h);
int scanX0 = Math.Min(NpMargin, w);
int scanX1 = Math.Max(scanX0, w - NpMargin);
var refData = reference.LockBits(new Rectangle(0, 0, w, h), ImageLockMode.ReadOnly, PixelFormat.Format32bppArgb);
var curData = current.LockBits(new Rectangle(0, 0, w, h), ImageLockMode.ReadOnly, PixelFormat.Format32bppArgb);
byte[] refPx = new byte[refData.Stride * h];
@ -218,74 +227,100 @@ public class ScreenReader : IScreenReader
current.UnlockBits(curData);
// Build a binary mask of pixels that got significantly brighter (nameplates are bright text)
// Only scan within the play-area region to skip UI and reduce work
const int brightThresh = 30;
bool[] mask = new bool[w * h];
Parallel.For(0, h, y =>
int scanW = scanX1 - scanX0;
int scanH = scanY1 - scanY0;
bool[] mask = new bool[scanW * scanH];
Parallel.For(0, scanH, sy =>
{
int y = sy + scanY0;
int rowOff = y * stride;
for (int x = 0; x < w; x++)
for (int sx = 0; sx < scanW; sx++)
{
int x = sx + scanX0;
int i = rowOff + x * 4;
int brighter = (curPx[i] - refPx[i]) + (curPx[i + 1] - refPx[i + 1]) + (curPx[i + 2] - refPx[i + 2]);
if (brighter > brightThresh)
mask[y * w + x] = true;
mask[sy * scanW + sx] = true;
}
});
// Find connected clusters via row-scan: collect bounding boxes of bright regions
var boxes = FindBrightClusters(mask, w, h, minWidth: 40, minHeight: 10, maxGap: 8);
var boxes = FindBrightClusters(mask, scanW, scanH, minWidth: 40, minHeight: 10, maxGap: 8);
// Offset cluster boxes back to full-image coordinates
for (int i = 0; i < boxes.Count; i++)
{
var b = boxes[i];
boxes[i] = new Rectangle(b.X + scanX0, b.Y + scanY0, b.Width, b.Height);
}
Log.Information("NameplateDiff: found {Count} bright clusters", boxes.Count);
if (boxes.Count == 0)
return Task.FromResult(new OcrResponse { Text = "", Lines = [] });
// OCR each cluster crop, accumulate results with screen-space coordinates
var allLines = new List<OcrLine>();
var allText = new List<string>();
// Collect valid cluster crops and stitch into a single image for one OCR call
const int pad = 4;
const int sep = 20; // black separator between crops to prevent cross-detection
var crops = new List<(int screenX, int screenY, int cropW, int cropH, int stitchY)>();
for (int bi = 0; bi < boxes.Count; bi++)
int maxCropW = 0;
int totalH = 0;
foreach (var box in boxes)
{
var box = boxes[bi];
// Pad the crop slightly
int pad = 4;
int cx = Math.Max(0, box.X - pad);
int cy = Math.Max(0, box.Y - pad);
int cw = Math.Min(w - cx, box.Width + pad * 2);
int ch = Math.Min(h - cy, box.Height + pad * 2);
using var crop = current.Clone(new Rectangle(cx, cy, cw, ch), PixelFormat.Format32bppArgb);
var clusterSw = System.Diagnostics.Stopwatch.StartNew();
OcrResponse ocrResult;
try
{
ocrResult = _pythonBridge.OcrFromBitmap(crop);
}
catch (TimeoutException)
{
Log.Warning("NameplateDiffOcr: cluster {I}/{Count} OCR timed out, skipping", bi + 1, boxes.Count);
continue;
}
Log.Debug("NameplateDiffOcr: cluster {I}/{Count} ({W}x{H}) OCR took {Ms}ms",
bi + 1, boxes.Count, cw, ch, clusterSw.ElapsedMilliseconds);
crops.Add((cx, cy, cw, ch, totalH));
maxCropW = Math.Max(maxCropW, cw);
totalH += ch + sep;
}
// Offset word coordinates to screen space
foreach (var line in ocrResult.Lines)
if (crops.Count == 0)
return Task.FromResult(new OcrResponse { Text = "", Lines = [] });
totalH -= sep; // no separator after last crop
// Stitch all crops vertically into one image
using var stitched = new Bitmap(maxCropW, totalH, PixelFormat.Format32bppArgb);
using (var g = System.Drawing.Graphics.FromImage(stitched))
{
g.Clear(System.Drawing.Color.Black);
foreach (var (sx, sy, cw, ch, sY) in crops)
g.DrawImage(current, new Rectangle(0, sY, cw, ch), new Rectangle(sx, sy, cw, ch), GraphicsUnit.Pixel);
}
// Single OCR call for all clusters
var ocrSw = System.Diagnostics.Stopwatch.StartNew();
OcrResponse ocrResult;
try
{
ocrResult = _pythonBridge.OcrFromBitmap(stitched);
}
catch (TimeoutException)
{
Log.Warning("NameplateDiffOcr: batch OCR timed out ({Count} clusters)", crops.Count);
return Task.FromResult(new OcrResponse { Text = "", Lines = [] });
}
Log.Information("NameplateDiffOcr: batch OCR {Count} clusters in {Ms}ms",
crops.Count, ocrSw.ElapsedMilliseconds);
// Map OCR results back to screen coordinates
foreach (var line in ocrResult.Lines)
{
foreach (var word in line.Words)
{
foreach (var word in line.Words)
{
word.X += cx;
word.Y += cy;
}
allLines.Add(line);
allText.Add(line.Text);
// Find which crop this word belongs to by Y position
var crop = crops.Last(c => word.Y >= c.stitchY);
word.X += crop.screenX;
word.Y = word.Y - crop.stitchY + crop.screenY;
}
}
return Task.FromResult(new OcrResponse
{
Text = string.Join("\n", allText),
Lines = allLines,
});
return Task.FromResult(ocrResult);
}
private static List<Rectangle> FindBrightClusters(bool[] mask, int w, int h, int minWidth, int minHeight, int maxGap)
@ -358,21 +393,29 @@ public class ScreenReader : IScreenReader
public void SetLootBaseline(Bitmap frame) { }
// Detection parameters
// -- Loot detection constants --
private const int CannyLow = 20, CannyHigh = 80;
// Shape constraints
private const int LabelMinW = 80, LabelMaxW = 500;
// Shape constraints (passes 1 & 2)
private const int LabelMinW = 100, LabelMaxW = 500;
private const int LabelMinH = 15, LabelMaxH = 100;
private const double LabelMinAspect = 1.3, LabelMaxAspect = 10.0;
// Strict pass: well-formed rectangle contours
private const double MinRectangularity = 0.5;
private const float StrictMinBS = 200f;
private const float StrictMinEdgeDensity = 25f;
// Relaxed pass: any contour bbox in play area (catches VFX-broken borders)
// Pass 1: strict (well-formed bordered rectangles)
private const double MinRectangularity = 0.7;
private const double StrictMinBS = 255;
// Pass 2: relaxed (play-area contours, VFX-tolerant)
private const int RelaxedMinW = 100;
private const float RelaxedMinBS = 250f;
private const float RelaxedMinEdgeDensity = 25f;
private const double RelaxedMinBS = 265;
private const double RelaxedBrightPctThreshold = 8;
private const double RelaxedBgDarkPctThreshold = 50;
private const double MaxGreenPct = 5;
// Pass 3: yellow text clusters (borderless labels)
private const int YellowHueMin = 10, YellowHueMax = 35;
private const int YellowMinSat = 120, YellowMinVal = 120;
private const double YellowTextPctThreshold = 25;
private const int TextClusterMinWidth = 100;
private const double TextClusterMinAspect = 1.5;
private const double TextClusterContainmentThreshold = 0.5;
// Play area bounds
private const double UiMarginTop = 0.08;
private const double UiMarginBottom = 0.82;
// Post-processing
@ -381,9 +424,10 @@ public class ScreenReader : IScreenReader
private const double NmsIouThresh = 0.4;
/// <summary>
/// Two-pass loot label detection:
/// 1. Strict: polygon-approximated rectangle contours (high precision)
/// 2. Relaxed: any contour bbox in play area (catches VFX-broken borders)
/// Three-pass loot label detection:
/// 1. Strict: polygon-approximated rectangle contours (bordered labels)
/// 2. Relaxed: contour bbox with label-like content OR bright text on dark background
/// 3. Yellow text clusters: morphological detection of gold/yellow text without background box
/// Results merged, horizontal fragments joined, then NMS.
/// </summary>
public List<LootLabel> DetectLootLabels(Bitmap reference, Bitmap current)
@ -402,57 +446,59 @@ public class ScreenReader : IScreenReader
using var hsv = new Mat();
Cv2.CvtColor(mat, hsv, ColorConversionCodes.BGR2HSV);
// Edge detection
// Split HSV channels once for reuse
Cv2.Split(hsv, out Mat[] hsvChannels);
using var hChan = hsvChannels[0];
using var sChan = hsvChannels[1];
using var vChan = hsvChannels[2];
// ── Passes 1 & 2: Edge-based detection ──
using var edges = new Mat();
Cv2.Canny(gray, edges, CannyLow, CannyHigh);
using var dilateKernel = Cv2.GetStructuringElement(MorphShapes.Rect, new Size(3, 3));
using var dilated = new Mat();
Cv2.Dilate(edges, dilated, dilateKernel, iterations: 1);
Cv2.Dilate(edges, edges, dilateKernel, iterations: 1);
Cv2.FindContours(dilated, out var contours, out _, RetrievalModes.Tree, ContourApproximationModes.ApproxSimple);
Cv2.FindContours(edges, out var contours, out _, RetrievalModes.Tree, ContourApproximationModes.ApproxSimple);
var strict = new List<LabelCandidate>();
var relaxed = new List<LabelCandidate>();
foreach (var contour in contours)
{
var box = Cv2.BoundingRect(contour);
var bbox = Cv2.BoundingRect(contour);
int x = bbox.X, y = bbox.Y, w = bbox.Width, h = bbox.Height;
// Common shape gate
if (box.Width <= LabelMinW || box.Width >= LabelMaxW) continue;
if (box.Height <= LabelMinH || box.Height >= LabelMaxH) continue;
double aspect = (double)box.Width / Math.Max(box.Height, 1);
if (aspect <= LabelMinAspect || aspect >= LabelMaxAspect) continue;
if (w < LabelMinW || w > LabelMaxW || h < LabelMinH || h > LabelMaxH) continue;
double aspect = (double)w / Math.Max(h, 1);
if (aspect < LabelMinAspect || aspect > LabelMaxAspect) continue;
// Content metrics
using var roiHsv = new Mat(hsv, box);
var meanHsv = Cv2.Mean(roiHsv);
float meanVal = (float)meanHsv[2];
float meanSat = (float)meanHsv[1];
float bs = meanVal + meanSat;
using var roiGray = new Mat(gray, box);
using var roiEdges = new Mat();
Cv2.Canny(roiGray, roiEdges, 50, 150);
float ed = (float)Cv2.Mean(roiEdges)[0];
// Strict pass: well-formed polygon (4-8 vertices)
var peri = Cv2.ArcLength(contour, true);
var approx = Cv2.ApproxPolyDP(contour, 0.02 * peri, true);
// Content metrics (mean brightness + saturation)
using var roiV = new Mat(vChan, bbox);
using var roiS = new Mat(sChan, bbox);
double meanVal = Cv2.Mean(roiV).Val0;
double meanSat = Cv2.Mean(roiS).Val0;
double bs = meanVal + meanSat;
// Pass 1: strict well-formed polygon (4-8 vertices)
var approx = Cv2.ApproxPolyDP(contour, Cv2.ArcLength(contour, true) * 0.02, true);
if (approx.Length >= 4 && approx.Length <= 8)
{
double contourArea = Cv2.ContourArea(approx);
double rect = contourArea / Math.Max(box.Width * box.Height, 1);
if (rect >= MinRectangularity && bs >= StrictMinBS && ed >= StrictMinEdgeDensity)
strict.Add(new LabelCandidate(box.X, box.Y, box.Width, box.Height, meanVal, meanSat));
double rectangularity = contourArea / Math.Max(w * h, 1);
if (rectangularity >= MinRectangularity && bs >= StrictMinBS)
strict.Add(new LabelCandidate(x, y, w, h, (float)meanVal, (float)meanSat));
}
// Relaxed pass: any contour bbox in play area
bool inPlay = box.Y > playTop && box.Y + box.Height < playBot;
if (inPlay && box.Width >= RelaxedMinW && bs >= RelaxedMinBS && ed >= RelaxedMinEdgeDensity)
relaxed.Add(new LabelCandidate(box.X, box.Y, box.Width, box.Height, meanVal, meanSat));
// Pass 2: relaxed play area, bs OR bright-on-dark
bool inPlay = y > playTop && (y + h) < playBot;
if (!inPlay || w < RelaxedMinW) continue;
bool passesBs = bs >= RelaxedMinBS;
bool passesTextOnDark = !passesBs && CheckBrightTextOnDark(mat, vChan, sChan, bbox);
if (passesBs || passesTextOnDark)
relaxed.Add(new LabelCandidate(x, y, w, h, (float)meanVal, (float)meanSat));
}
// Merge strict + relaxed (strict wins on overlap)
@ -466,6 +512,14 @@ public class ScreenReader : IScreenReader
// Join horizontal fragments
merged = MergeHorizontal(merged, MergeGap, MergeYTolerance);
// ── Pass 3: Yellow text cluster detection (borderless labels) ──
var textClusters = DetectYellowTextClusters(mat, hChan, sChan, vChan, playTop, playBot);
foreach (var tc in textClusters)
{
if (!ContainedByAny(tc, merged, TextClusterContainmentThreshold))
merged.Add(tc);
}
// Build LootLabels with color classification
var scored = new List<(LootLabel Label, float Score)>();
foreach (var c in merged)
@ -487,7 +541,6 @@ public class ScreenReader : IScreenReader
{
current.Save("debug_loot_capture.png", System.Drawing.Imaging.ImageFormat.Png);
Cv2.ImWrite("debug_loot_edges.png", edges);
Cv2.ImWrite("debug_loot_dilated.png", dilated);
using var debugMat = mat.Clone();
foreach (var label in labels)
Cv2.Rectangle(debugMat,
@ -500,8 +553,8 @@ public class ScreenReader : IScreenReader
Log.Warning(ex, "Failed to save debug images");
}
Log.Information("DetectLootLabels: strict={Strict} relaxed={Relaxed} merged={Merged} final={Final}",
strict.Count, relaxed.Count, merged.Count, labels.Count);
Log.Information("DetectLootLabels: strict={Strict} relaxed={Relaxed} yellow={Yellow} final={Final}",
strict.Count, relaxed.Count, textClusters.Count, labels.Count);
foreach (var label in labels)
Log.Information(" Label ({X},{Y}) {W}x{H} color=({R},{G},{B}) tier={Tier}",
label.CenterX - label.Width / 2, label.CenterY - label.Height / 2,
@ -526,81 +579,271 @@ public class ScreenReader : IScreenReader
return ((byte)mean[2], (byte)mean[1], (byte)mean[0]);
}
/// <summary>
/// Pass 2 helper: verify bright+saturated text on dark background with green fire rejection.
/// </summary>
private bool CheckBrightTextOnDark(Mat bgrImage, Mat vChan, Mat sChan, Rect bbox)
{
int area = bbox.Width * bbox.Height;
if (area == 0) return false;
using var roiV = new Mat(vChan, bbox);
using var roiS = new Mat(sChan, bbox);
// Bright + saturated pixels (the text)
using var brightMask = new Mat();
using var satMask = new Mat();
Cv2.Threshold(roiV, brightMask, 150, 255, ThresholdTypes.Binary);
Cv2.Threshold(roiS, satMask, 100, 255, ThresholdTypes.Binary);
using var brightSat = new Mat();
Cv2.BitwiseAnd(brightMask, satMask, brightSat);
double brightPct = (double)Cv2.CountNonZero(brightSat) / area * 100;
if (brightPct < RelaxedBrightPctThreshold)
return false;
// Background darkness: non-text pixels should be dark
using var textMask = new Mat();
using var bgMask = new Mat();
Cv2.Threshold(roiV, textMask, 120, 255, ThresholdTypes.Binary);
using var textSatMask = new Mat();
Cv2.Threshold(roiS, textSatMask, 100, 255, ThresholdTypes.Binary);
Cv2.BitwiseAnd(textMask, textSatMask, textMask);
Cv2.BitwiseNot(textMask, bgMask);
int bgCount = Cv2.CountNonZero(bgMask);
if (bgCount == 0) return false;
using var bgV = new Mat();
roiV.CopyTo(bgV, bgMask);
using var darkBg = new Mat();
Cv2.Threshold(bgV, darkBg, 40, 255, ThresholdTypes.BinaryInv);
Cv2.BitwiseAnd(darkBg, bgMask, darkBg);
double bgDarkPct = (double)Cv2.CountNonZero(darkBg) / bgCount * 100;
if (bgDarkPct < RelaxedBgDarkPctThreshold)
return false;
// Green fire rejection
return !IsGreenDominant(bgrImage, bbox, area);
}
/// <summary>
/// Pass 3: detect gold/yellow text clusters without background boxes (normal rarity items).
/// Uses HSV thresholding, green fire subtraction, and morphological grouping.
/// </summary>
private List<LabelCandidate> DetectYellowTextClusters(
Mat bgrImage, Mat hChan, Mat sChan, Mat vChan,
int playTop, int playBot)
{
var results = new List<LabelCandidate>();
// Build yellow text mask: H:10-35, S>120, V>120
using var hMin = new Mat();
using var hMax = new Mat();
using var sThresh = new Mat();
using var vThresh = new Mat();
Cv2.Threshold(hChan, hMin, YellowHueMin - 1, 255, ThresholdTypes.Binary);
Cv2.Threshold(hChan, hMax, YellowHueMax, 255, ThresholdTypes.BinaryInv);
Cv2.Threshold(sChan, sThresh, YellowMinSat - 1, 255, ThresholdTypes.Binary);
Cv2.Threshold(vChan, vThresh, YellowMinVal - 1, 255, ThresholdTypes.Binary);
using var yellowMask = new Mat();
Cv2.BitwiseAnd(hMin, hMax, yellowMask);
Cv2.BitwiseAnd(yellowMask, sThresh, yellowMask);
Cv2.BitwiseAnd(yellowMask, vThresh, yellowMask);
// Subtract green fire pixels
SubtractGreenFire(bgrImage, yellowMask);
// Morphological grouping
using var kH = Cv2.GetStructuringElement(MorphShapes.Rect, new Size(25, 1));
using var dilated = new Mat();
Cv2.Dilate(yellowMask, dilated, kH, iterations: 1);
using var kV = Cv2.GetStructuringElement(MorphShapes.Rect, new Size(1, 8));
using var closed = new Mat();
Cv2.MorphologyEx(dilated, closed, MorphTypes.Close, kV);
using var kO = Cv2.GetStructuringElement(MorphShapes.Rect, new Size(40, 5));
using var cleaned = new Mat();
Cv2.MorphologyEx(closed, cleaned, MorphTypes.Open, kO);
// Find and filter text clusters
Cv2.FindContours(cleaned, out var textContours, out _, RetrievalModes.External, ContourApproximationModes.ApproxSimple);
foreach (var contour in textContours)
{
var bbox = Cv2.BoundingRect(contour);
int x = bbox.X, y = bbox.Y, w = bbox.Width, h = bbox.Height;
if (w < TextClusterMinWidth || h < LabelMinH || h > 120) continue;
double aspect = (double)w / Math.Max(h, 1);
if (aspect < TextClusterMinAspect) continue;
bool inPlay = y > playTop && (y + h) < playBot;
if (!inPlay) continue;
// Verify yellow text density in the bounding box
using var roiYellow = new Mat(yellowMask, bbox);
double yellowPct = (double)Cv2.CountNonZero(roiYellow) / (w * h) * 100;
if (yellowPct >= YellowTextPctThreshold)
results.Add(new LabelCandidate(x, y, w, h, (float)(yellowPct * 10), 0));
}
return results;
}
/// <summary>Check if region is green-fire dominant (G > R+15, G > B+15, G > 80).</summary>
private static bool IsGreenDominant(Mat bgrImage, Rect bbox, int area)
{
using var roiBgr = new Mat(bgrImage, bbox);
Cv2.Split(roiBgr, out Mat[] bgr);
try
{
using var gMinusR = new Mat();
using var gMinusB = new Mat();
Cv2.Subtract(bgr[1], bgr[2], gMinusR);
Cv2.Subtract(bgr[1], bgr[0], gMinusB);
using var thR = new Mat();
using var thB = new Mat();
using var thG = new Mat();
Cv2.Threshold(gMinusR, thR, 15, 255, ThresholdTypes.Binary);
Cv2.Threshold(gMinusB, thB, 15, 255, ThresholdTypes.Binary);
Cv2.Threshold(bgr[1], thG, 80, 255, ThresholdTypes.Binary);
using var greenMask = new Mat();
Cv2.BitwiseAnd(thR, thB, greenMask);
Cv2.BitwiseAnd(greenMask, thG, greenMask);
double greenPct = (double)Cv2.CountNonZero(greenMask) / area * 100;
return greenPct >= MaxGreenPct;
}
finally
{
foreach (var ch in bgr) ch.Dispose();
}
}
/// <summary>Zero out green-fire pixels from a mask in-place.</summary>
private static void SubtractGreenFire(Mat bgrImage, Mat mask)
{
Cv2.Split(bgrImage, out Mat[] bgr);
try
{
using var gMinusR = new Mat();
using var gMinusB = new Mat();
Cv2.Subtract(bgr[1], bgr[2], gMinusR);
Cv2.Subtract(bgr[1], bgr[0], gMinusB);
using var thR = new Mat();
using var thB = new Mat();
using var thG = new Mat();
Cv2.Threshold(gMinusR, thR, 15, 255, ThresholdTypes.Binary);
Cv2.Threshold(gMinusB, thB, 15, 255, ThresholdTypes.Binary);
Cv2.Threshold(bgr[1], thG, 80, 255, ThresholdTypes.Binary);
using var greenFire = new Mat();
Cv2.BitwiseAnd(thR, thB, greenFire);
Cv2.BitwiseAnd(greenFire, thG, greenFire);
using var notGreen = new Mat();
Cv2.BitwiseNot(greenFire, notGreen);
Cv2.BitwiseAnd(mask, notGreen, mask);
}
finally
{
foreach (var ch in bgr) ch.Dispose();
}
}
private static bool OverlapsAny(LabelCandidate label, List<LabelCandidate> others, double iouThresh)
{
foreach (var o in others)
{
int ix1 = Math.Max(label.X, o.X), iy1 = Math.Max(label.Y, o.Y);
int ix2 = Math.Min(label.X + label.W, o.X + o.W);
int iy2 = Math.Min(label.Y + label.H, o.Y + o.H);
int inter = Math.Max(0, ix2 - ix1) * Math.Max(0, iy2 - iy1);
int union = label.W * label.H + o.W * o.H - inter;
if (inter / (double)Math.Max(union, 1) > iouThresh)
if (ComputeIoU(label, o) > iouThresh)
return true;
}
return false;
}
/// <summary>
/// Merge labels that sit side-by-side on the same line.
/// </summary>
/// <summary>Check if label is mostly contained inside any existing detection.</summary>
private static bool ContainedByAny(LabelCandidate label, List<LabelCandidate> others, double containThresh)
{
int labelArea = label.W * label.H;
if (labelArea == 0) return true;
foreach (var o in others)
{
int xx1 = Math.Max(label.X, o.X);
int yy1 = Math.Max(label.Y, o.Y);
int xx2 = Math.Min(label.X + label.W, o.X + o.W);
int yy2 = Math.Min(label.Y + label.H, o.Y + o.H);
int inter = Math.Max(0, xx2 - xx1) * Math.Max(0, yy2 - yy1);
if ((double)inter / labelArea > containThresh)
return true;
}
return false;
}
private static double ComputeIoU(LabelCandidate a, LabelCandidate b)
{
int xx1 = Math.Max(a.X, b.X);
int yy1 = Math.Max(a.Y, b.Y);
int xx2 = Math.Min(a.X + a.W, b.X + b.W);
int yy2 = Math.Min(a.Y + a.H, b.Y + b.H);
int inter = Math.Max(0, xx2 - xx1) * Math.Max(0, yy2 - yy1);
int union = a.W * a.H + b.W * b.H - inter;
return (double)inter / Math.Max(union, 1);
}
private static List<LabelCandidate> MergeHorizontal(List<LabelCandidate> labels, int gap, int yTol)
{
if (labels.Count < 2) return labels;
var used = new bool[labels.Count];
var indices = Enumerable.Range(0, labels.Count)
var sorted = Enumerable.Range(0, labels.Count)
.OrderBy(i => labels[i].Y).ThenBy(i => labels[i].X).ToList();
var result = new List<LabelCandidate>();
for (int ii = 0; ii < indices.Count; ii++)
foreach (int i in sorted)
{
int i = indices[ii];
if (used[i]) continue;
used[i] = true;
var a = labels[i];
int gx1 = a.X, gy1 = a.Y, gx2 = a.X + a.W, gy2 = a.Y + a.H;
double briArea = a.MeanBrightness * a.W * a.H;
double satArea = a.MeanSaturation * a.W * a.H;
int totalArea = a.W * a.H;
int gx1 = labels[i].X, gy1 = labels[i].Y;
int gx2 = gx1 + labels[i].W, gy2 = gy1 + labels[i].H;
double wBri = labels[i].MeanBrightness * labels[i].W * labels[i].H;
double wSat = labels[i].MeanSaturation * labels[i].W * labels[i].H;
double area = labels[i].W * labels[i].H;
bool changed = true;
while (changed)
{
changed = false;
for (int jj = 0; jj < indices.Count; jj++)
foreach (int j in sorted)
{
int j = indices[jj];
if (used[j]) continue;
var b = labels[j];
double cyA = (gy1 + gy2) / 2.0;
double cyB = b.Y + b.H / 2.0;
if (Math.Abs(cyA - cyB) > yTol) continue;
int hGap = Math.Max(b.X - gx2, gx1 - (b.X + b.W));
if (hGap > gap) continue;
int bArea = b.W * b.H;
double bArea = b.W * b.H;
gx1 = Math.Min(gx1, b.X);
gy1 = Math.Min(gy1, b.Y);
gx2 = Math.Max(gx2, b.X + b.W);
gy2 = Math.Max(gy2, b.Y + b.H);
briArea += b.MeanBrightness * bArea;
satArea += b.MeanSaturation * bArea;
totalArea += bArea;
wBri += b.MeanBrightness * bArea;
wSat += b.MeanSaturation * bArea;
area += bArea;
used[j] = true;
changed = true;
}
}
int w = gx2 - gx1, h = gy2 - gy1;
float bri = (float)(briArea / Math.Max(totalArea, 1));
float sat = (float)(satArea / Math.Max(totalArea, 1));
result.Add(new LabelCandidate(gx1, gy1, w, h, bri, sat));
result.Add(new LabelCandidate(gx1, gy1, w, h,
(float)(wBri / Math.Max(area, 1)), (float)(wSat / Math.Max(area, 1))));
}
return result;
}