got detection somewhat working

2026-02-20 21:57:51 -05:00 · 2026-02-20 21:57:51 -05:00 · c75b2b27f0
commit c75b2b27f0
parent 40d30115bf
10 changed files with 500 additions and 56 deletions
--- a/src/Poe2Trade.Bot/BotOrchestrator.cs
+++ b/src/Poe2Trade.Bot/BotOrchestrator.cs
@ -91,11 +91,36 @@ public class BotOrchestrator : IAsyncDisposable

        BossRunExecutor = new BossRunExecutor(game, screen, inventory, logWatcher, store.Settings, BossDetector);

-        logWatcher.AreaEntered += _ => Navigation.Reset();
+        logWatcher.AreaEntered += area =>
+        {
+            Navigation.Reset();
+            OnAreaEntered(area);
+        };
        logWatcher.Start(); // start early so area events fire even before Bot.Start()
        _paused = store.Settings.Paused;
    }

+    // Boss zones → boss name mapping
+    private static readonly Dictionary<string, string> BossZones = new(StringComparer.OrdinalIgnoreCase)
+    {
+        ["The Black Cathedral"] = "kulemak",
+    };
+
+    private void OnAreaEntered(string area)
+    {
+        if (BossZones.TryGetValue(area, out var boss))
+        {
+            BossDetector.SetBoss(boss);
+            BossDetector.Enabled = true;
+            Log.Information("Boss zone detected: {Area} → enabling {Boss} detector", area, boss);
+        }
+        else if (BossDetector.Enabled)
+        {
+            BossDetector.Enabled = false;
+            Log.Information("Left boss zone → disabling boss detector");
+        }
+    }
+
    public bool IsReady => _started;
    public bool IsPaused => _paused;

--- a/src/Poe2Trade.Screen/BossDetector.cs
+++ b/src/Poe2Trade.Screen/BossDetector.cs
@ -1,78 +1,228 @@
-using Poe2Trade.Core;
+using OpenCvSharp;
 using Serilog;
 using Region = Poe2Trade.Core.Region;

 namespace Poe2Trade.Screen;

+/// <summary>
+/// Detects bosses using YOLO running on a background thread.
+/// Process() feeds frames, YOLO updates Latest with authoritative positions.
+/// At ~26ms inference, YOLO runs fast enough to not need template tracking.
+/// </summary>
 public class BossDetector : IFrameConsumer, IDisposable
 {
-    private const int DetectEveryNFrames = 6;
    private const int MinConsecutiveFrames = 2;
+    private const string ModelsDir = "tools/python-detect/models";

-    private readonly PythonDetectBridge _bridge = new();
-    private volatile BossSnapshot _latest = new([], 0, 0);
-    private int _frameCounter;
-    private int _consecutiveDetections;
+    private OnnxYoloDetector? _detector;
    private string _modelName = "boss-kulemak";
+    private string _bossName = "kulemak";
+    private volatile BossSnapshot _latest = new([], 0, 0);
+    private BossSnapshot _previous = new([], 0, 0);
+    private int _consecutiveDetections;
+    private int _inferenceCount;
+
+    // Async frame-slot: Process() drops frame here, background loop runs YOLO
+    private volatile Mat? _pendingFrame;
+    private readonly ManualResetEventSlim _frameReady = new(false);
+    private Task? _inferenceLoop;
+    private CancellationTokenSource? _cts;
+    private bool _enabled;
+
+    public bool Enabled
+    {
+        get => _enabled;
+        set
+        {
+            if (_enabled == value) return;
+            _enabled = value;
+            if (value)
+                StartLoop();
+            else
+                StopLoop();
+        }
+    }

-    public bool Enabled { get; set; }
    public BossSnapshot Latest => _latest;
    public event Action<BossSnapshot>? BossDetected;

    public void SetBoss(string bossName)
    {
+        _bossName = bossName;
        _modelName = $"boss-{bossName}";
        _consecutiveDetections = 0;
+        _inferenceCount = 0;
+
+        if (_enabled)
+        {
+            StopLoop();
+            StartLoop();
+        }
    }

+    /// <summary>
+    /// Called by FramePipeline every frame.
+    /// Feeds the latest frame to YOLO (drops older pending frames).
+    /// </summary>
    public void Process(ScreenFrame frame)
    {
-        if (!Enabled) return;
-        if (++_frameCounter % DetectEveryNFrames != 0) return;
+        if (!_enabled) return;
+
+        var fullRegion = new Region(0, 0, frame.Width, frame.Height);
+        using var bgr = frame.CropBgr(fullRegion);
+        var clone = bgr.Clone();
+        var old = Interlocked.Exchange(ref _pendingFrame, clone);
+        old?.Dispose();
+        _frameReady.Set();
+    }
+
+    private static Rect ClampRect(Rect r, int maxW, int maxH)
+    {
+        int x = Math.Max(0, r.X);
+        int y = Math.Max(0, r.Y);
+        int w = Math.Min(r.Width, maxW - x);
+        int h = Math.Min(r.Height, maxH - y);
+        return new Rect(x, y, Math.Max(0, w), Math.Max(0, h));
+    }
+
+    // ── YOLO inference (background thread) ──────────────────────
+
+    private void StartLoop()
+    {
+        StopLoop();
+
+        var modelPath = Path.GetFullPath(Path.Combine(ModelsDir, $"{_modelName}.onnx"));
+        if (!File.Exists(modelPath))
+        {
+            Log.Error("BossDetector: ONNX model not found at {Path}", modelPath);
+            return;
+        }

        try
        {
-            // Use full frame — model was trained on full 2560x1440 screenshots
-            var fullRegion = new Region(0, 0, frame.Width, frame.Height);
-            using var bgr = frame.CropBgr(fullRegion);
-            var result = _bridge.Detect(bgr, conf: 0.60f, imgsz: 1280, model: _modelName);
-
-            var bosses = new List<DetectedBoss>(result.Count);
-            foreach (var det in result.Detections)
-            {
-                bosses.Add(new DetectedBoss(
-                    det.ClassName,
-                    det.Confidence,
-                    det.X,
-                    det.Y,
-                    det.Width,
-                    det.Height,
-                    det.Cx,
-                    det.Cy));
-            }
-
-            var snapshot = new BossSnapshot(
-                bosses.AsReadOnly(),
-                DateTimeOffset.UtcNow.ToUnixTimeMilliseconds(),
-                result.InferenceMs);
-
-            _latest = snapshot;
-            if (bosses.Count > 0)
-            {
-                _consecutiveDetections++;
-                if (_consecutiveDetections >= MinConsecutiveFrames)
-                    BossDetected?.Invoke(snapshot);
-            }
-            else
-            {
-                _consecutiveDetections = 0;
-            }
+            _detector = new OnnxYoloDetector(modelPath, [_bossName], confThreshold: 0.40f);
        }
        catch (Exception ex)
        {
-            Log.Debug(ex, "BossDetector YOLO failed");
+            Log.Error(ex, "BossDetector: failed to load ONNX model {Path}", modelPath);
+            return;
        }
+
+        _cts = new CancellationTokenSource();
+        _inferenceLoop = Task.Factory.StartNew(
+            () => InferenceLoop(_cts.Token),
+            _cts.Token,
+            TaskCreationOptions.LongRunning,
+            TaskScheduler.Default).Unwrap();
+
+        Log.Information("BossDetector: started inference loop with {Model}", _modelName);
    }

-    public void Dispose() => _bridge.Dispose();
+    private void StopLoop()
+    {
+        if (_cts != null)
+        {
+            _cts.Cancel();
+            _frameReady.Set();
+            try { _inferenceLoop?.Wait(TimeSpan.FromSeconds(3)); } catch { /* timeout ok */ }
+            _cts.Dispose();
+            _cts = null;
+            _inferenceLoop = null;
+        }
+
+        _detector?.Dispose();
+        _detector = null;
+
+        var old = Interlocked.Exchange(ref _pendingFrame, null);
+        old?.Dispose();
+        _frameReady.Reset();
+        _consecutiveDetections = 0;
+    }
+
+    private async Task InferenceLoop(CancellationToken ct)
+    {
+        Log.Information("BossDetector: inference loop started");
+
+        while (!ct.IsCancellationRequested)
+        {
+            try
+            {
+                _frameReady.Wait(ct);
+                _frameReady.Reset();
+
+                var frame = Interlocked.Exchange(ref _pendingFrame, null);
+                if (frame == null) continue;
+
+                try
+                {
+                    var (detections, totalMs, preMs, infMs) = _detector!.Detect(frame);
+                    _inferenceCount++;
+
+                    if (_inferenceCount % 15 == 0)
+                        Log.Information("BossDetect: {Count} hits, total={Total:F0}ms (pre={Pre:F0} inf={Inf:F0}), consecutive={Consec}",
+                            detections.Count, totalMs, preMs, infMs, _consecutiveDetections);
+
+                    if (detections.Count > 0)
+                    {
+                        _consecutiveDetections++;
+
+                        var timestamp = DateTimeOffset.UtcNow.ToUnixTimeMilliseconds();
+                        var deltaMs = (float)(timestamp - _previous.Timestamp);
+
+                        var enriched = new List<DetectedBoss>(detections.Count);
+                        foreach (var det in detections)
+                        {
+                            float vx = 0, vy = 0;
+                            if (deltaMs > 0 && _previous.Bosses.Count > 0)
+                            {
+                                var prev = _previous.Bosses.FirstOrDefault(b => b.ClassName == det.ClassName);
+                                if (prev != null)
+                                {
+                                    vx = (det.Cx - prev.Cx) / deltaMs;
+                                    vy = (det.Cy - prev.Cy) / deltaMs;
+                                }
+                            }
+                            enriched.Add(det with { VxPerMs = vx, VyPerMs = vy });
+                        }
+
+                        var snapshot = new BossSnapshot(
+                            enriched.AsReadOnly(),
+                            timestamp,
+                            totalMs);
+                        _latest = snapshot;
+                        _previous = snapshot;
+
+                        if (_consecutiveDetections >= MinConsecutiveFrames)
+                            BossDetected?.Invoke(snapshot);
+                    }
+                    else
+                    {
+                        _consecutiveDetections = 0;
+                        _latest = new BossSnapshot([], DateTimeOffset.UtcNow.ToUnixTimeMilliseconds(), 0);
+                    }
+                }
+                finally
+                {
+                    frame.Dispose();
+                }
+            }
+            catch (OperationCanceledException)
+            {
+                break;
+            }
+            catch (Exception ex)
+            {
+                Log.Warning(ex, "BossDetector inference failed");
+                await Task.Delay(100, ct);
+            }
+        }
+
+        Log.Information("BossDetector: inference loop stopped");
+    }
+
+    public void Dispose()
+    {
+        StopLoop();
+        _frameReady.Dispose();
+    }
 }
--- a/src/Poe2Trade.Screen/DetectionTypes.cs
+++ b/src/Poe2Trade.Screen/DetectionTypes.cs
@ -15,7 +15,8 @@ public record DetectedBoss(
    string ClassName,
    float Confidence,
    int X, int Y, int Width, int Height,
-    int Cx, int Cy);
+    int Cx, int Cy,
+    float VxPerMs = 0, float VyPerMs = 0);

 public record BossSnapshot(
    IReadOnlyList<DetectedBoss> Bosses,
--- a/src/Poe2Trade.Screen/OnnxYoloDetector.cs
+++ b/src/Poe2Trade.Screen/OnnxYoloDetector.cs
@ -0,0 +1,227 @@
+using System.Runtime.InteropServices;
+using Microsoft.ML.OnnxRuntime;
+using Microsoft.ML.OnnxRuntime.Tensors;
+using OpenCvSharp;
+using OpenCvSharp.Dnn;
+using Serilog;
+
+namespace Poe2Trade.Screen;
+
+/// <summary>
+/// YOLO11 object detection via ONNX Runtime with CUDA GPU acceleration.
+/// Handles letterbox preprocessing, inference, and NMS postprocessing.
+/// Buffers are pooled to avoid LOH allocations that trigger Gen2 GC pauses.
+/// </summary>
+public class OnnxYoloDetector : IDisposable
+{
+    private readonly InferenceSession _session;
+    private readonly string[] _classNames;
+    private readonly int _imgSize;
+    private readonly float _confThreshold;
+    private readonly float _iouThreshold;
+    private readonly string _inputName;
+    private bool _warmedUp;
+
+    // Pooled buffers — allocated once, reused every inference (avoids LOH/GC pressure)
+    private readonly float[] _tensorBuffer;       // 3 * imgSize * imgSize (~1.2MB for 640)
+    private float[]? _outputBuffer;               // rowSize * numDetections, sized on first use
+
+    // Pre-allocated Mats for preprocessing (reused every inference — avoids alloc/GC per frame)
+    private readonly Mat _resized = new();
+    private readonly Mat _padded;
+    private readonly Mat _rgb = new();
+    private readonly Mat _floatMat = new();
+
+    public OnnxYoloDetector(string modelPath, string[] classNames,
+        float confThreshold = 0.40f, float iouThreshold = 0.45f)
+    {
+        _classNames = classNames;
+        _confThreshold = confThreshold;
+        _iouThreshold = iouThreshold;
+
+        var opts = new SessionOptions();
+        opts.GraphOptimizationLevel = GraphOptimizationLevel.ORT_ENABLE_ALL;
+        opts.InterOpNumThreads = 1;                                    // single model, no inter-op parallelism needed
+        opts.IntraOpNumThreads = Environment.ProcessorCount / 2;       // use half the cores (leave room for game + pipeline)
+        opts.ExecutionMode = ExecutionMode.ORT_SEQUENTIAL;             // sequential is faster for single inference
+        // CPU EP — avoids GPU contention with DXGI screen capture
+        Log.Information("OnnxYolo: using CPU EP, intra threads={Threads}", opts.IntraOpNumThreads);
+
+        _session = new InferenceSession(modelPath, opts);
+        _inputName = _session.InputNames[0];
+
+        // Read imgSize from the model's input shape (NCHW: [1, 3, H, W])
+        var inputMeta = _session.InputMetadata[_inputName];
+        _imgSize = inputMeta.Dimensions[2];  // H == W for square YOLO input
+
+        _tensorBuffer = new float[3 * _imgSize * _imgSize];
+        _padded = new Mat(_imgSize, _imgSize, MatType.CV_8UC3, new Scalar(114, 114, 114));
+
+        Log.Information("OnnxYolo: loaded {Path} (input: {Input}, imgSize: {ImgSize})",
+            modelPath, _inputName, _imgSize);
+    }
+
+    /// <summary>
+    /// Run detection on a BGR Mat. Returns detected bosses in original image coordinates.
+    /// </summary>
+    public (List<DetectedBoss> Detections, float TotalMs, float PreMs, float InfMs) Detect(Mat bgrMat)
+    {
+        var swTotal = System.Diagnostics.Stopwatch.StartNew();
+
+        // 1. Letterbox preprocess (reuses _tensorBuffer)
+        var swPre = System.Diagnostics.Stopwatch.StartNew();
+        var (tensor, scale, padX, padY) = Preprocess(bgrMat);
+        swPre.Stop();
+
+        // 2. Run inference
+        var swInf = System.Diagnostics.Stopwatch.StartNew();
+        var inputs = new List<NamedOnnxValue>
+        {
+            NamedOnnxValue.CreateFromTensor(_inputName, tensor)
+        };
+        using var results = _session.Run(inputs);
+        swInf.Stop();
+
+        // 3. Parse output (reuses _outputBuffer)
+        var outputTensor = results.First().AsTensor<float>();
+        var detections = Postprocess(outputTensor, scale, padX, padY, bgrMat.Width, bgrMat.Height);
+
+        swTotal.Stop();
+        var totalMs = (float)swTotal.Elapsed.TotalMilliseconds;
+
+        if (!_warmedUp)
+        {
+            _warmedUp = true;
+            Log.Information("OnnxYolo warmup: pre={Pre:F0}ms inf={Inf:F0}ms total={Total:F0}ms",
+                swPre.Elapsed.TotalMilliseconds, swInf.Elapsed.TotalMilliseconds, totalMs);
+        }
+
+        return (detections, totalMs, (float)swPre.Elapsed.TotalMilliseconds, (float)swInf.Elapsed.TotalMilliseconds);
+    }
+
+    private (DenseTensor<float> tensor, float scale, int padX, int padY) Preprocess(Mat bgrMat)
+    {
+        int origW = bgrMat.Width, origH = bgrMat.Height;
+
+        float scale = Math.Min((float)_imgSize / origW, (float)_imgSize / origH);
+        int newW = (int)Math.Round(origW * scale);
+        int newH = (int)Math.Round(origH * scale);
+
+        int padX = (_imgSize - newW) / 2;
+        int padY = (_imgSize - newH) / 2;
+
+        Cv2.Resize(bgrMat, _resized, new Size(newW, newH), interpolation: InterpolationFlags.Linear);
+
+        _padded.SetTo(new Scalar(114, 114, 114));
+        _resized.CopyTo(_padded[new Rect(padX, padY, newW, newH)]);
+
+        Cv2.CvtColor(_padded, _rgb, ColorConversionCodes.BGR2RGB);
+
+        _rgb.ConvertTo(_floatMat, MatType.CV_32FC3, 1.0 / 255.0);
+
+        // HWC → NCHW via channel split + Marshal.Copy into pooled buffer
+        int pixels = _imgSize * _imgSize;
+        Cv2.Split(_floatMat, out Mat[] channels);
+        try
+        {
+            for (int c = 0; c < 3; c++)
+                Marshal.Copy(channels[c].Data, _tensorBuffer, c * pixels, pixels);
+        }
+        finally
+        {
+            foreach (var ch in channels) ch.Dispose();
+        }
+
+        // Wrap pooled buffer in tensor (no copy — DenseTensor references the array)
+        var tensor = new DenseTensor<float>(_tensorBuffer, [1, 3, _imgSize, _imgSize]);
+        return (tensor, scale, padX, padY);
+    }
+
+    private List<DetectedBoss> Postprocess(Tensor<float> output, float scale,
+        int padX, int padY, int origW, int origH)
+    {
+        int numClasses = _classNames.Length;
+        int numDetections = output.Dimensions[2];
+        int rowSize = output.Dimensions[1]; // 4 + nc
+        int flatSize = rowSize * numDetections;
+
+        // Reuse output buffer (resize only if model output shape changed)
+        if (_outputBuffer == null || _outputBuffer.Length < flatSize)
+            _outputBuffer = new float[flatSize];
+
+        if (output is DenseTensor<float> dense)
+            dense.Buffer.Span.CopyTo(_outputBuffer);
+        else
+            for (int r = 0; r < rowSize; r++)
+                for (int i = 0; i < numDetections; i++)
+                    _outputBuffer[r * numDetections + i] = output[0, r, i];
+
+        var boxes = new List<Rect>();
+        var confidences = new List<float>();
+        var classIds = new List<int>();
+
+        for (int i = 0; i < numDetections; i++)
+        {
+            float bestConf = 0;
+            int bestClass = 0;
+            for (int c = 0; c < numClasses; c++)
+            {
+                float conf = _outputBuffer[(4 + c) * numDetections + i];
+                if (conf > bestConf)
+                {
+                    bestConf = conf;
+                    bestClass = c;
+                }
+            }
+
+            if (bestConf < _confThreshold) continue;
+
+            float cx = _outputBuffer[0 * numDetections + i];
+            float cy = _outputBuffer[1 * numDetections + i];
+            float w = _outputBuffer[2 * numDetections + i];
+            float h = _outputBuffer[3 * numDetections + i];
+
+            float x1 = (cx - w / 2 - padX) / scale;
+            float y1 = (cy - h / 2 - padY) / scale;
+            float bw = w / scale;
+            float bh = h / scale;
+
+            x1 = Math.Max(0, x1);
+            y1 = Math.Max(0, y1);
+            bw = Math.Min(bw, origW - x1);
+            bh = Math.Min(bh, origH - y1);
+
+            boxes.Add(new Rect((int)x1, (int)y1, (int)bw, (int)bh));
+            confidences.Add(bestConf);
+            classIds.Add(bestClass);
+        }
+
+        if (boxes.Count == 0)
+            return [];
+
+        CvDnn.NMSBoxes(boxes, confidences, _confThreshold, _iouThreshold, out int[] indices);
+
+        var detections = new List<DetectedBoss>(indices.Length);
+        foreach (var idx in indices)
+        {
+            var box = boxes[idx];
+            detections.Add(new DetectedBoss(
+                _classNames[classIds[idx]],
+                confidences[idx],
+                box.X, box.Y, box.Width, box.Height,
+                box.X + box.Width / 2,
+                box.Y + box.Height / 2));
+        }
+
+        return detections;
+    }
+
+    public void Dispose()
+    {
+        _session.Dispose();
+        _resized.Dispose();
+        _padded.Dispose();
+        _rgb.Dispose();
+        _floatMat.Dispose();
+    }
+}
--- a/src/Poe2Trade.Screen/Poe2Trade.Screen.csproj
+++ b/src/Poe2Trade.Screen/Poe2Trade.Screen.csproj
@ -11,6 +11,7 @@
    <PackageReference Include="OpenCvSharp4.runtime.win" Version="4.11.0.*" />
    <PackageReference Include="System.Drawing.Common" Version="8.0.12" />
    <PackageReference Include="Vortice.Direct3D11" Version="3.8.2" />
+    <PackageReference Include="Microsoft.ML.OnnxRuntime" Version="1.24.*" />
    <PackageReference Include="Vortice.DXGI" Version="3.8.2" />
  </ItemGroup>
  <ItemGroup>
--- a/src/Poe2Trade.Screen/PythonDetectBridge.cs
+++ b/src/Poe2Trade.Screen/PythonDetectBridge.cs
@ -39,7 +39,7 @@ class PythonDetectBridge : IDisposable
    {
        EnsureRunning();

-        var imageBytes = bgrMat.ToBytes(".png");
+        var imageBytes = bgrMat.ToBytes(".jpg", [(int)ImwriteFlags.JpegQuality, 95]);
        var imageBase64 = Convert.ToBase64String(imageBytes);

        var req = new Dictionary<string, object?>
--- a/src/Poe2Trade.Ui/Overlay/D2dOverlay.cs
+++ b/src/Poe2Trade.Ui/Overlay/D2dOverlay.cs
@ -185,6 +185,7 @@ public sealed class D2dOverlay
        return new OverlayState(
            Enemies: detection.Enemies,
            Bosses: bossDetection.Bosses,
+            BossTimestampMs: bossDetection.Timestamp,
            InferenceMs: detection.InferenceMs,
            Hud: _bot.HudReader.Current,
            NavState: _bot.Navigation.State,
--- a/src/Poe2Trade.Ui/Overlay/IOverlayLayer.cs
+++ b/src/Poe2Trade.Ui/Overlay/IOverlayLayer.cs
@ -6,6 +6,7 @@ namespace Poe2Trade.Ui.Overlay;
 public record OverlayState(
    IReadOnlyList<DetectedEnemy> Enemies,
    IReadOnlyList<DetectedBoss> Bosses,
+    long BossTimestampMs,
    float InferenceMs,
    HudSnapshot? Hud,
    NavigationState NavState,
--- a/src/Poe2Trade.Ui/Overlay/Layers/D2dEnemyBoxLayer.cs
+++ b/src/Poe2Trade.Ui/Overlay/Layers/D2dEnemyBoxLayer.cs
@ -53,10 +53,15 @@ internal sealed class D2dEnemyBoxLayer : ID2dOverlayLayer, IDisposable
            rt.DrawTextLayout(new System.Numerics.Vector2(labelX, labelY), layout, textBrush);
        }

-        // Boss bounding boxes (cyan)
+        // Boss bounding boxes (cyan) — extrapolate position to compensate for inference delay
+        var now = DateTimeOffset.UtcNow.ToUnixTimeMilliseconds();
+        var ageMs = (float)Math.Clamp(now - state.BossTimestampMs, 0, 60);
+
        foreach (var boss in state.Bosses)
        {
-            var rect = new RectangleF(boss.X, boss.Y, boss.Width, boss.Height);
+            float dx = boss.VxPerMs * ageMs;
+            float dy = boss.VyPerMs * ageMs;
+            var rect = new RectangleF(boss.X + dx, boss.Y + dy, boss.Width, boss.Height);
            rt.DrawRectangle(rect, ctx.Cyan, 3f);

            var pct = Math.Clamp((int)(boss.Confidence * 100), 0, 100);
@ -68,8 +73,8 @@ internal sealed class D2dEnemyBoxLayer : ID2dOverlayLayer, IDisposable
            }

            var m = layout.Metrics;
-            var labelX = boss.X;
-            var labelY = boss.Y - m.Height - 2;
+            var labelX = boss.X + dx;
+            var labelY = boss.Y + dy - m.Height - 2;

            rt.FillRectangle(
                new RectangleF(labelX - 1, labelY - 1, m.Width + 2, m.Height + 2),
--- a/tools/python-detect/manage.py
+++ b/tools/python-detect/manage.py
@ -7,6 +7,7 @@ Subcommands (all take a positional boss name):
    runs     kulemak                               List training runs + metrics table
    annotate kulemak [dir]                         Launch annotation GUI
    prelabel kulemak [dir] [--model boss-kulemak]  Auto-label unlabeled images
+    export   kulemak [--imgsz 640]                Export .pt to ONNX format
 """

 import argparse
@ -291,6 +292,32 @@ def cmd_prelabel(args):
 _PRELABEL_MODEL_DEFAULT = "__auto__"


+# ── export ──────────────────────────────────────────────────────
+def cmd_export(args):
+    """Export .pt model to .onnx format for ONNX Runtime inference."""
+    boss = args.boss
+    model_name = f"boss-{boss}"
+    pt_path = os.path.join(MODELS_DIR, f"{model_name}.pt")
+
+    if not os.path.exists(pt_path):
+        print(f"Model not found: {pt_path}")
+        return
+
+    from ultralytics import YOLO
+    model = YOLO(pt_path)
+
+    print(f"Exporting {pt_path} -> ONNX (imgsz={args.imgsz})...")
+    model.export(format="onnx", imgsz=args.imgsz, opset=17, simplify=True, dynamic=False)
+
+    # ultralytics writes the .onnx next to the .pt file
+    onnx_src = os.path.join(MODELS_DIR, f"{model_name}.onnx")
+    if os.path.exists(onnx_src):
+        size_mb = os.path.getsize(onnx_src) / (1024 * 1024)
+        print(f"\nExported: {onnx_src} ({size_mb:.1f} MB)")
+    else:
+        print(f"\nWarning: expected output not found at {onnx_src}")
+
+
 # ── CLI ──────────────────────────────────────────────────────────
 def main():
    parser = argparse.ArgumentParser(
@ -315,10 +342,10 @@ def main():
    p = sub.add_parser("train", help="Train YOLO model")
    p.add_argument("boss", help="Boss name (e.g. kulemak)")
    p.add_argument("--data", default=None, help="Path to data.yaml")
-    p.add_argument("--model", default="yolo11s", help="YOLO model variant")
+    p.add_argument("--model", default="yolo11n", help="YOLO model variant")
    p.add_argument("--epochs", type=int, default=200, help="Training epochs")
-    p.add_argument("--imgsz", type=int, default=1280, help="Image size")
-    p.add_argument("--batch", type=int, default=8, help="Batch size")
+    p.add_argument("--imgsz", type=int, default=640, help="Image size")
+    p.add_argument("--batch", type=int, default=16, help="Batch size")
    p.add_argument("--device", default="0", help="CUDA device")
    p.add_argument("--name", default=None, help="Run name (auto-increments if omitted)")

@ -333,6 +360,11 @@ def main():
    p.add_argument("--model", default=_PRELABEL_MODEL_DEFAULT, help="Model name in models/ (default: boss-{boss})")
    p.add_argument("--conf", type=float, default=0.20, help="Confidence threshold")

+    # export
+    p = sub.add_parser("export", help="Export .pt model to ONNX format")
+    p.add_argument("boss", help="Boss name (e.g. kulemak)")
+    p.add_argument("--imgsz", type=int, default=640, help="Image size for export")
+
    args = parser.parse_args()

    if args.command is None:
@ -345,6 +377,7 @@ def main():
        "train":    cmd_train,
        "runs":     cmd_runs,
        "prelabel": cmd_prelabel,
+        "export":   cmd_export,
    }
    commands[args.command](args)