rename
This commit is contained in:
parent
bef61f841d
commit
c3de5fdb63
107 changed files with 0 additions and 0 deletions
|
|
@ -1,227 +0,0 @@
|
|||
using System.Runtime.InteropServices;
|
||||
using Microsoft.ML.OnnxRuntime;
|
||||
using Microsoft.ML.OnnxRuntime.Tensors;
|
||||
using OpenCvSharp;
|
||||
using OpenCvSharp.Dnn;
|
||||
using Serilog;
|
||||
|
||||
namespace Poe2Trade.Screen;
|
||||
|
||||
/// <summary>
|
||||
/// YOLO11 object detection via ONNX Runtime with CUDA GPU acceleration.
|
||||
/// Handles letterbox preprocessing, inference, and NMS postprocessing.
|
||||
/// Buffers are pooled to avoid LOH allocations that trigger Gen2 GC pauses.
|
||||
/// </summary>
|
||||
public class OnnxYoloDetector : IDisposable
|
||||
{
|
||||
private readonly InferenceSession _session;
|
||||
private readonly string[] _classNames;
|
||||
private readonly int _imgSize;
|
||||
private readonly float _confThreshold;
|
||||
private readonly float _iouThreshold;
|
||||
private readonly string _inputName;
|
||||
private bool _warmedUp;
|
||||
|
||||
// Pooled buffers — allocated once, reused every inference (avoids LOH/GC pressure)
|
||||
private readonly float[] _tensorBuffer; // 3 * imgSize * imgSize (~1.2MB for 640)
|
||||
private float[]? _outputBuffer; // rowSize * numDetections, sized on first use
|
||||
|
||||
// Pre-allocated Mats for preprocessing (reused every inference — avoids alloc/GC per frame)
|
||||
private readonly Mat _resized = new();
|
||||
private readonly Mat _padded;
|
||||
private readonly Mat _rgb = new();
|
||||
private readonly Mat _floatMat = new();
|
||||
|
||||
public OnnxYoloDetector(string modelPath, string[] classNames,
|
||||
float confThreshold = 0.40f, float iouThreshold = 0.45f)
|
||||
{
|
||||
_classNames = classNames;
|
||||
_confThreshold = confThreshold;
|
||||
_iouThreshold = iouThreshold;
|
||||
|
||||
var opts = new SessionOptions();
|
||||
opts.GraphOptimizationLevel = GraphOptimizationLevel.ORT_ENABLE_ALL;
|
||||
opts.InterOpNumThreads = 1; // single model, no inter-op parallelism needed
|
||||
opts.IntraOpNumThreads = Environment.ProcessorCount / 2; // use half the cores (leave room for game + pipeline)
|
||||
opts.ExecutionMode = ExecutionMode.ORT_SEQUENTIAL; // sequential is faster for single inference
|
||||
// CPU EP — avoids GPU contention with DXGI screen capture
|
||||
Log.Information("OnnxYolo: using CPU EP, intra threads={Threads}", opts.IntraOpNumThreads);
|
||||
|
||||
_session = new InferenceSession(modelPath, opts);
|
||||
_inputName = _session.InputNames[0];
|
||||
|
||||
// Read imgSize from the model's input shape (NCHW: [1, 3, H, W])
|
||||
var inputMeta = _session.InputMetadata[_inputName];
|
||||
_imgSize = inputMeta.Dimensions[2]; // H == W for square YOLO input
|
||||
|
||||
_tensorBuffer = new float[3 * _imgSize * _imgSize];
|
||||
_padded = new Mat(_imgSize, _imgSize, MatType.CV_8UC3, new Scalar(114, 114, 114));
|
||||
|
||||
Log.Information("OnnxYolo: loaded {Path} (input: {Input}, imgSize: {ImgSize})",
|
||||
modelPath, _inputName, _imgSize);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Run detection on a BGR Mat. Returns detected bosses in original image coordinates.
|
||||
/// </summary>
|
||||
public (List<DetectedBoss> Detections, float TotalMs, float PreMs, float InfMs) Detect(Mat bgrMat)
|
||||
{
|
||||
var swTotal = System.Diagnostics.Stopwatch.StartNew();
|
||||
|
||||
// 1. Letterbox preprocess (reuses _tensorBuffer)
|
||||
var swPre = System.Diagnostics.Stopwatch.StartNew();
|
||||
var (tensor, scale, padX, padY) = Preprocess(bgrMat);
|
||||
swPre.Stop();
|
||||
|
||||
// 2. Run inference
|
||||
var swInf = System.Diagnostics.Stopwatch.StartNew();
|
||||
var inputs = new List<NamedOnnxValue>
|
||||
{
|
||||
NamedOnnxValue.CreateFromTensor(_inputName, tensor)
|
||||
};
|
||||
using var results = _session.Run(inputs);
|
||||
swInf.Stop();
|
||||
|
||||
// 3. Parse output (reuses _outputBuffer)
|
||||
var outputTensor = results.First().AsTensor<float>();
|
||||
var detections = Postprocess(outputTensor, scale, padX, padY, bgrMat.Width, bgrMat.Height);
|
||||
|
||||
swTotal.Stop();
|
||||
var totalMs = (float)swTotal.Elapsed.TotalMilliseconds;
|
||||
|
||||
if (!_warmedUp)
|
||||
{
|
||||
_warmedUp = true;
|
||||
Log.Information("OnnxYolo warmup: pre={Pre:F0}ms inf={Inf:F0}ms total={Total:F0}ms",
|
||||
swPre.Elapsed.TotalMilliseconds, swInf.Elapsed.TotalMilliseconds, totalMs);
|
||||
}
|
||||
|
||||
return (detections, totalMs, (float)swPre.Elapsed.TotalMilliseconds, (float)swInf.Elapsed.TotalMilliseconds);
|
||||
}
|
||||
|
||||
private (DenseTensor<float> tensor, float scale, int padX, int padY) Preprocess(Mat bgrMat)
|
||||
{
|
||||
int origW = bgrMat.Width, origH = bgrMat.Height;
|
||||
|
||||
float scale = Math.Min((float)_imgSize / origW, (float)_imgSize / origH);
|
||||
int newW = (int)Math.Round(origW * scale);
|
||||
int newH = (int)Math.Round(origH * scale);
|
||||
|
||||
int padX = (_imgSize - newW) / 2;
|
||||
int padY = (_imgSize - newH) / 2;
|
||||
|
||||
Cv2.Resize(bgrMat, _resized, new Size(newW, newH), interpolation: InterpolationFlags.Linear);
|
||||
|
||||
_padded.SetTo(new Scalar(114, 114, 114));
|
||||
_resized.CopyTo(_padded[new Rect(padX, padY, newW, newH)]);
|
||||
|
||||
Cv2.CvtColor(_padded, _rgb, ColorConversionCodes.BGR2RGB);
|
||||
|
||||
_rgb.ConvertTo(_floatMat, MatType.CV_32FC3, 1.0 / 255.0);
|
||||
|
||||
// HWC → NCHW via channel split + Marshal.Copy into pooled buffer
|
||||
int pixels = _imgSize * _imgSize;
|
||||
Cv2.Split(_floatMat, out Mat[] channels);
|
||||
try
|
||||
{
|
||||
for (int c = 0; c < 3; c++)
|
||||
Marshal.Copy(channels[c].Data, _tensorBuffer, c * pixels, pixels);
|
||||
}
|
||||
finally
|
||||
{
|
||||
foreach (var ch in channels) ch.Dispose();
|
||||
}
|
||||
|
||||
// Wrap pooled buffer in tensor (no copy — DenseTensor references the array)
|
||||
var tensor = new DenseTensor<float>(_tensorBuffer, [1, 3, _imgSize, _imgSize]);
|
||||
return (tensor, scale, padX, padY);
|
||||
}
|
||||
|
||||
private List<DetectedBoss> Postprocess(Tensor<float> output, float scale,
|
||||
int padX, int padY, int origW, int origH)
|
||||
{
|
||||
int numClasses = _classNames.Length;
|
||||
int numDetections = output.Dimensions[2];
|
||||
int rowSize = output.Dimensions[1]; // 4 + nc
|
||||
int flatSize = rowSize * numDetections;
|
||||
|
||||
// Reuse output buffer (resize only if model output shape changed)
|
||||
if (_outputBuffer == null || _outputBuffer.Length < flatSize)
|
||||
_outputBuffer = new float[flatSize];
|
||||
|
||||
if (output is DenseTensor<float> dense)
|
||||
dense.Buffer.Span.CopyTo(_outputBuffer);
|
||||
else
|
||||
for (int r = 0; r < rowSize; r++)
|
||||
for (int i = 0; i < numDetections; i++)
|
||||
_outputBuffer[r * numDetections + i] = output[0, r, i];
|
||||
|
||||
var boxes = new List<Rect>();
|
||||
var confidences = new List<float>();
|
||||
var classIds = new List<int>();
|
||||
|
||||
for (int i = 0; i < numDetections; i++)
|
||||
{
|
||||
float bestConf = 0;
|
||||
int bestClass = 0;
|
||||
for (int c = 0; c < numClasses; c++)
|
||||
{
|
||||
float conf = _outputBuffer[(4 + c) * numDetections + i];
|
||||
if (conf > bestConf)
|
||||
{
|
||||
bestConf = conf;
|
||||
bestClass = c;
|
||||
}
|
||||
}
|
||||
|
||||
if (bestConf < _confThreshold) continue;
|
||||
|
||||
float cx = _outputBuffer[0 * numDetections + i];
|
||||
float cy = _outputBuffer[1 * numDetections + i];
|
||||
float w = _outputBuffer[2 * numDetections + i];
|
||||
float h = _outputBuffer[3 * numDetections + i];
|
||||
|
||||
float x1 = (cx - w / 2 - padX) / scale;
|
||||
float y1 = (cy - h / 2 - padY) / scale;
|
||||
float bw = w / scale;
|
||||
float bh = h / scale;
|
||||
|
||||
x1 = Math.Max(0, x1);
|
||||
y1 = Math.Max(0, y1);
|
||||
bw = Math.Min(bw, origW - x1);
|
||||
bh = Math.Min(bh, origH - y1);
|
||||
|
||||
boxes.Add(new Rect((int)x1, (int)y1, (int)bw, (int)bh));
|
||||
confidences.Add(bestConf);
|
||||
classIds.Add(bestClass);
|
||||
}
|
||||
|
||||
if (boxes.Count == 0)
|
||||
return [];
|
||||
|
||||
CvDnn.NMSBoxes(boxes, confidences, _confThreshold, _iouThreshold, out int[] indices);
|
||||
|
||||
var detections = new List<DetectedBoss>(indices.Length);
|
||||
foreach (var idx in indices)
|
||||
{
|
||||
var box = boxes[idx];
|
||||
detections.Add(new DetectedBoss(
|
||||
_classNames[classIds[idx]],
|
||||
confidences[idx],
|
||||
box.X, box.Y, box.Width, box.Height,
|
||||
box.X + box.Width / 2,
|
||||
box.Y + box.Height / 2));
|
||||
}
|
||||
|
||||
return detections;
|
||||
}
|
||||
|
||||
public void Dispose()
|
||||
{
|
||||
_session.Dispose();
|
||||
_resized.Dispose();
|
||||
_padded.Dispose();
|
||||
_rgb.Dispose();
|
||||
_floatMat.Dispose();
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue