work on well of souls and yolo detection

This commit is contained in:
Boki 2026-02-20 16:40:50 -05:00
parent 3456e0d62a
commit 40d30115bf
41 changed files with 3031 additions and 148 deletions

8
.gitignore vendored
View file

@ -18,8 +18,16 @@ config.json
browser-data/ browser-data/
*.log *.log
debug-screenshots/ debug-screenshots/
debug/
items/ items/
# YOLO / ML
runs/
training-data/
tools/python-detect/models/
*.pt
*.engine
# IDE / tools # IDE / tools
.claude/ .claude/
nul nul

BIN
assets/black-cathedral.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 10 KiB

BIN
assets/invitation.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.6 KiB

BIN
assets/well-of-souls.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 6.9 KiB

View file

@ -0,0 +1,529 @@
using System.Diagnostics;
using Poe2Trade.Core;
using Poe2Trade.Game;
using Poe2Trade.GameLog;
using Poe2Trade.Inventory;
using Poe2Trade.Screen;
using Serilog;
namespace Poe2Trade.Bot;
public class BossRunExecutor
{
private static readonly string WellOfSoulsTemplate = Path.Combine("assets", "well-of-souls.png");
private static readonly string BlackCathedralTemplate = Path.Combine("assets", "black-cathedral.png");
private static readonly string InvitationTemplate = Path.Combine("assets", "invitation.png");
private BossRunState _state = BossRunState.Idle;
private bool _stopped;
private readonly IGameController _game;
private readonly IScreenReader _screen;
private readonly IInventoryManager _inventory;
private readonly IClientLogWatcher _logWatcher;
private readonly SavedSettings _config;
private readonly BossDetector _bossDetector;
public event Action<BossRunState>? StateChanged;
public BossRunExecutor(IGameController game, IScreenReader screen,
IInventoryManager inventory, IClientLogWatcher logWatcher, SavedSettings config,
BossDetector bossDetector)
{
_game = game;
_screen = screen;
_inventory = inventory;
_logWatcher = logWatcher;
_config = config;
_bossDetector = bossDetector;
}
public BossRunState State => _state;
private void SetState(BossRunState s)
{
_state = s;
StateChanged?.Invoke(s);
}
public void Stop()
{
_stopped = true;
Log.Information("Boss run executor stop requested");
}
public async Task RunBossLoop()
{
_stopped = false;
_bossDetector.SetBoss("kulemak");
Log.Information("Starting boss run loop ({Count} invitations)", _config.Kulemak.InvitationCount);
if (!await Prepare())
{
SetState(BossRunState.Failed);
await RecoverToHideout();
SetState(BossRunState.Idle);
return;
}
var completed = 0;
for (var i = 0; i < _config.Kulemak.InvitationCount; i++)
{
if (_stopped) break;
Log.Information("=== Boss run {N}/{Total} ===", i + 1, _config.Kulemak.InvitationCount);
if (!await TravelToZone())
{
Log.Error("Failed to travel to zone");
await RecoverToHideout();
break;
}
if (_stopped) break;
var entrance = await WalkToEntrance();
if (entrance == null)
{
Log.Error("Failed to find Black Cathedral entrance");
await RecoverToHideout();
break;
}
if (_stopped) break;
if (!await UseInvitation(entrance.X, entrance.Y))
{
Log.Error("Failed to use invitation");
await RecoverToHideout();
break;
}
if (_stopped) break;
await Fight();
if (_stopped) break;
await Loot();
if (_stopped) break;
if (!await ReturnHome())
{
Log.Error("Failed to return home");
await RecoverToHideout();
break;
}
if (_stopped) break;
await StoreLoot();
completed++;
if (_stopped) break;
}
Log.Information("Boss run loop finished: {Completed}/{Total} runs completed", completed, _config.Kulemak.InvitationCount);
SetState(BossRunState.Complete);
await Helpers.Sleep(1000);
SetState(BossRunState.Idle);
}
private async Task<bool> Prepare()
{
SetState(BossRunState.Preparing);
Log.Information("Preparing: depositing inventory and grabbing invitations");
await _game.FocusGame();
await Helpers.Sleep(Delays.PostFocus);
// Open stash
var stashPos = await _inventory.FindAndClickNameplate("Stash");
if (stashPos == null)
{
Log.Error("Could not find Stash nameplate");
return false;
}
await Helpers.Sleep(Delays.PostStashOpen);
// Click loot tab and deposit all inventory items
var (lootTab, lootFolder) = ResolveTabPath(_config.Kulemak.LootTabPath);
if (lootTab != null)
{
await _inventory.ClickStashTab(lootTab, lootFolder);
// Deposit all inventory items via ctrl+click
var scanResult = await _screen.Grid.Scan("inventory");
if (scanResult.Occupied.Count > 0)
{
Log.Information("Depositing {Count} inventory items to loot tab", scanResult.Occupied.Count);
await _game.KeyDown(InputSender.VK.SHIFT);
await _game.HoldCtrl();
foreach (var cell in scanResult.Occupied)
{
var center = _screen.Grid.GetCellCenter(GridLayouts.Inventory, cell.Row, cell.Col);
await _game.LeftClickAt(center.X, center.Y);
await Helpers.Sleep(Delays.ClickInterval);
}
await _game.ReleaseCtrl();
await _game.KeyUp(InputSender.VK.SHIFT);
await Helpers.Sleep(Delays.PostEscape);
}
}
else
{
Log.Warning("Loot tab path not configured or not found, skipping deposit");
}
// Click invitation tab and grab invitations
var (invTab, invFolder) = ResolveTabPath(_config.Kulemak.InvitationTabPath);
if (invTab != null)
{
await _inventory.ClickStashTab(invTab, invFolder);
// Determine layout name based on tab config
var layoutName = (invTab.GridCols == 24, invFolder != null) switch
{
(true, true) => "stash24_folder",
(true, false) => "stash24",
(false, true) => "stash12_folder",
(false, false) => "stash12",
};
await _inventory.GrabItemsFromStash(layoutName, _config.Kulemak.InvitationCount, InvitationTemplate);
}
else
{
Log.Warning("Invitation tab path not configured or not found, skipping grab");
}
// Close stash
await _game.PressEscape();
await Helpers.Sleep(Delays.PostEscape);
Log.Information("Preparation complete");
return true;
}
private async Task<bool> TravelToZone()
{
SetState(BossRunState.TravelingToZone);
Log.Information("Traveling to Well of Souls via waypoint");
await _game.FocusGame();
await Helpers.Sleep(Delays.PostFocus);
// Find and click Waypoint
var wpPos = await _inventory.FindAndClickNameplate("Waypoint");
if (wpPos == null)
{
Log.Error("Could not find Waypoint nameplate");
return false;
}
await Helpers.Sleep(1000);
// Template match well-of-souls.png and click
var match = await _screen.TemplateMatch(WellOfSoulsTemplate);
if (match == null)
{
Log.Error("Could not find Well of Souls on waypoint map");
await _game.PressEscape();
return false;
}
Log.Information("Found Well of Souls at ({X},{Y}), clicking", match.X, match.Y);
await _game.LeftClickAt(match.X, match.Y);
// Wait for area transition
var arrived = await _inventory.WaitForAreaTransition(_config.TravelTimeoutMs);
if (!arrived)
{
Log.Error("Timed out waiting for Well of Souls transition");
return false;
}
await Helpers.Sleep(Delays.PostTravel);
Log.Information("Arrived at Well of Souls");
return true;
}
private async Task<TemplateMatchResult?> WalkToEntrance()
{
SetState(BossRunState.WalkingToEntrance);
Log.Information("Walking to Black Cathedral entrance (W+D)");
return await WalkAndMatch(BlackCathedralTemplate, InputSender.VK.W, InputSender.VK.D, 15000);
}
private async Task<bool> UseInvitation(int x, int y)
{
SetState(BossRunState.UsingInvitation);
Log.Information("Using invitation at ({X},{Y})", x, y);
// Hover first so the game registers the target, then use invitation
await _game.MoveMouseTo(x, y);
await Helpers.Sleep(500);
await _game.CtrlLeftClickAt(x, y);
await Helpers.Sleep(1000);
// Find "NEW" text — pick the leftmost instance
var ocr = await _screen.Ocr();
var newWords = ocr.Lines
.SelectMany(l => l.Words)
.Where(w => w.Text.Equals("NEW", StringComparison.OrdinalIgnoreCase)
|| w.Text.Equals("New", StringComparison.Ordinal))
.OrderBy(w => w.X)
.ToList();
if (newWords.Count == 0)
{
Log.Error("Could not find 'NEW' text for instance selection");
return false;
}
var target = newWords[0];
var clickX = target.X + target.Width / 2;
var clickY = target.Y + target.Height / 2;
Log.Information("Found {Count} 'NEW' matches, clicking leftmost at ({X},{Y})", newWords.Count, clickX, clickY);
await _game.MoveMouseTo(clickX, clickY);
await Helpers.Sleep(500);
await _game.LeftClickAt(clickX, clickY);
// Wait for area transition into boss arena
var arrived = await _inventory.WaitForAreaTransition(_config.TravelTimeoutMs);
if (!arrived)
{
Log.Error("Timed out waiting for boss arena transition");
return false;
}
await Helpers.Sleep(Delays.PostTravel);
Log.Information("Entered boss arena");
return true;
}
private async Task Fight()
{
SetState(BossRunState.Fighting);
Log.Information("[PLACEHOLDER] Fight phase - waiting for manual combat");
// Placeholder: user handles combat manually for now
await Helpers.Sleep(1000);
}
private async Task Loot()
{
SetState(BossRunState.Looting);
Log.Information("[PLACEHOLDER] Loot phase - waiting for manual looting");
// Placeholder: user handles looting manually for now
await Helpers.Sleep(1000);
}
private async Task<bool> ReturnHome()
{
SetState(BossRunState.Returning);
Log.Information("Returning home");
await _game.FocusGame();
await Helpers.Sleep(Delays.PostFocus);
// Walk away from loot (hold S briefly)
await _game.KeyDown(InputSender.VK.S);
await Helpers.Sleep(1000);
await _game.KeyUp(InputSender.VK.S);
await Helpers.Sleep(300);
// Press + to open portal
await _game.PressPlus();
await Helpers.Sleep(1500);
// Find "The Ardura Caravan" and click it
var caravanPos = await _inventory.FindAndClickNameplate("The Ardura Caravan", maxRetries: 5, retryDelayMs: 1500);
if (caravanPos == null)
{
Log.Error("Could not find 'The Ardura Caravan' portal");
return false;
}
// Wait for area transition to caravan
var arrivedCaravan = await _inventory.WaitForAreaTransition(_config.TravelTimeoutMs);
if (!arrivedCaravan)
{
Log.Error("Timed out waiting for caravan transition");
return false;
}
await Helpers.Sleep(Delays.PostTravel);
// /hideout to go home
var arrivedHome = await _inventory.WaitForAreaTransition(
_config.TravelTimeoutMs, () => _game.GoToHideout());
if (!arrivedHome)
{
Log.Error("Timed out going to hideout");
return false;
}
await Helpers.Sleep(Delays.PostTravel);
_inventory.SetLocation(true);
Log.Information("Arrived at hideout");
return true;
}
private async Task StoreLoot()
{
SetState(BossRunState.StoringLoot);
Log.Information("Storing loot");
await _game.FocusGame();
await Helpers.Sleep(Delays.PostFocus);
// Open stash
var stashPos = await _inventory.FindAndClickNameplate("Stash");
if (stashPos == null)
{
Log.Warning("Could not find Stash, skipping loot storage");
return;
}
await Helpers.Sleep(Delays.PostStashOpen);
// Click loot tab
var (lootTab, lootFolder) = ResolveTabPath(_config.Kulemak.LootTabPath);
if (lootTab != null)
await _inventory.ClickStashTab(lootTab, lootFolder);
// Deposit all inventory items
var scanResult = await _screen.Grid.Scan("inventory");
if (scanResult.Occupied.Count > 0)
{
Log.Information("Depositing {Count} items to loot tab", scanResult.Occupied.Count);
await _game.KeyDown(InputSender.VK.SHIFT);
await _game.HoldCtrl();
foreach (var cell in scanResult.Occupied)
{
var center = _screen.Grid.GetCellCenter(GridLayouts.Inventory, cell.Row, cell.Col);
await _game.LeftClickAt(center.X, center.Y);
await Helpers.Sleep(Delays.ClickInterval);
}
await _game.ReleaseCtrl();
await _game.KeyUp(InputSender.VK.SHIFT);
await Helpers.Sleep(Delays.PostEscape);
}
// Close stash
await _game.PressEscape();
await Helpers.Sleep(Delays.PostEscape);
Log.Information("Loot stored");
}
private async Task<TemplateMatchResult?> WalkAndMatch(string templatePath, int vk1, int vk2,
int timeoutMs = 15000, int closeRadius = 350)
{
const int screenCx = 2560 / 2;
const int screenCy = 1440 / 2;
await _game.KeyDown(vk1);
await _game.KeyDown(vk2);
try
{
var sw = Stopwatch.StartNew();
bool spotted = false;
while (sw.ElapsedMilliseconds < timeoutMs)
{
if (_stopped) return null;
var match = await _screen.TemplateMatch(templatePath);
if (match == null)
{
await Helpers.Sleep(500);
continue;
}
var dx = match.X - screenCx;
var dy = match.Y - screenCy;
var dist = Math.Sqrt(dx * dx + dy * dy);
if (!spotted)
{
Log.Information("Template spotted at ({X},{Y}), dist={Dist:F0}px from center, approaching...",
match.X, match.Y, dist);
spotted = true;
}
if (dist <= closeRadius)
{
Log.Information("Close enough at ({X},{Y}), dist={Dist:F0}px, stopping", match.X, match.Y, dist);
// Stop, settle, re-match for accurate position
await _game.KeyUp(vk2);
await _game.KeyUp(vk1);
await Helpers.Sleep(300);
var fresh = await _screen.TemplateMatch(templatePath);
if (fresh != null)
{
Log.Information("Final position at ({X},{Y})", fresh.X, fresh.Y);
return fresh;
}
Log.Warning("Re-match failed, using last known position");
return match;
}
await Helpers.Sleep(200);
}
Log.Error("WalkAndMatch timed out after {Ms}ms (spotted={Spotted})", timeoutMs, spotted);
return null;
}
finally
{
await _game.KeyUp(vk2);
await _game.KeyUp(vk1);
}
}
private (StashTabInfo? Tab, StashTabInfo? Folder) ResolveTabPath(string tabPath)
{
if (string.IsNullOrEmpty(tabPath) || _config.StashCalibration == null)
return (null, null);
var parts = tabPath.Split('/');
if (parts.Length == 1)
{
// Simple tab name
var tab = _config.StashCalibration.Tabs.FirstOrDefault(t => t.Name == parts[0]);
return (tab, null);
}
if (parts.Length == 2)
{
// Folder/SubTab
var folder = _config.StashCalibration.Tabs.FirstOrDefault(t => t.Name == parts[0] && t.IsFolder);
if (folder == null) return (null, null);
var subTab = folder.SubTabs.FirstOrDefault(t => t.Name == parts[1]);
return (subTab, folder);
}
return (null, null);
}
private async Task RecoverToHideout()
{
try
{
Log.Information("Recovering: escaping and going to hideout");
await _game.FocusGame();
await _game.PressEscape();
await Helpers.Sleep(Delays.PostEscape);
await _game.PressEscape();
await Helpers.Sleep(Delays.PostEscape);
var arrived = await _inventory.WaitForAreaTransition(
_config.TravelTimeoutMs, () => _game.GoToHideout());
if (arrived)
{
_inventory.SetLocation(true);
Log.Information("Recovery: arrived at hideout");
}
else
{
Log.Warning("Recovery: timed out going to hideout");
}
}
catch (Exception ex)
{
Log.Error(ex, "Recovery failed");
}
}
}

View file

@ -44,7 +44,9 @@ public class BotOrchestrator : IAsyncDisposable
public GameStateDetector GameState { get; } public GameStateDetector GameState { get; }
public HudReader HudReader { get; } public HudReader HudReader { get; }
public EnemyDetector EnemyDetector { get; } public EnemyDetector EnemyDetector { get; }
public BossDetector BossDetector { get; }
public FrameSaver FrameSaver { get; } public FrameSaver FrameSaver { get; }
public BossRunExecutor BossRunExecutor { get; }
private readonly Dictionary<string, ScrapExecutor> _scrapExecutors = new(); private readonly Dictionary<string, ScrapExecutor> _scrapExecutors = new();
// Events // Events
@ -72,6 +74,7 @@ public class BotOrchestrator : IAsyncDisposable
GameState = new GameStateDetector(); GameState = new GameStateDetector();
HudReader = new HudReader(); HudReader = new HudReader();
EnemyDetector = new EnemyDetector(); EnemyDetector = new EnemyDetector();
BossDetector = new BossDetector();
FrameSaver = new FrameSaver(); FrameSaver = new FrameSaver();
// Register on shared pipeline // Register on shared pipeline
@ -79,12 +82,15 @@ public class BotOrchestrator : IAsyncDisposable
pipelineService.Pipeline.AddConsumer(GameState); pipelineService.Pipeline.AddConsumer(GameState);
pipelineService.Pipeline.AddConsumer(HudReader); pipelineService.Pipeline.AddConsumer(HudReader);
pipelineService.Pipeline.AddConsumer(EnemyDetector); pipelineService.Pipeline.AddConsumer(EnemyDetector);
pipelineService.Pipeline.AddConsumer(BossDetector);
pipelineService.Pipeline.AddConsumer(FrameSaver); pipelineService.Pipeline.AddConsumer(FrameSaver);
// Pass shared pipeline to NavigationExecutor // Pass shared pipeline to NavigationExecutor
Navigation = new NavigationExecutor(game, pipelineService.Pipeline, minimapCapture, Navigation = new NavigationExecutor(game, pipelineService.Pipeline, minimapCapture,
enemyDetector: EnemyDetector); enemyDetector: EnemyDetector);
BossRunExecutor = new BossRunExecutor(game, screen, inventory, logWatcher, store.Settings, BossDetector);
logWatcher.AreaEntered += _ => Navigation.Reset(); logWatcher.AreaEntered += _ => Navigation.Reset();
logWatcher.Start(); // start early so area events fire even before Bot.Start() logWatcher.Start(); // start early so area events fire even before Bot.Start()
_paused = store.Settings.Paused; _paused = store.Settings.Paused;
@ -182,6 +188,11 @@ public class BotOrchestrator : IAsyncDisposable
return; return;
} }
} }
if (BossRunExecutor.State != BossRunState.Idle)
{
State = BossRunExecutor.State.ToString();
return;
}
if (Navigation.State != NavigationState.Idle) if (Navigation.State != NavigationState.Idle)
{ {
State = Navigation.State.ToString(); State = Navigation.State.ToString();
@ -264,26 +275,61 @@ public class BotOrchestrator : IAsyncDisposable
{ {
LogWatcher.Start(); LogWatcher.Start();
await Game.FocusGame(); await Game.FocusGame();
await Screen.Warmup();
BossRunExecutor.StateChanged += _ => UpdateExecutorState();
Navigation.StateChanged += _ => UpdateExecutorState(); Navigation.StateChanged += _ => UpdateExecutorState();
_started = true; _started = true;
Emit("info", "Starting map exploration..."); if (Config.MapType == MapType.Kulemak)
State = "Exploring";
_ = Navigation.RunExploreLoop().ContinueWith(t =>
{ {
if (t.IsFaulted) // Boss run needs hideout first
var inHideout = LogWatcher.CurrentArea.Contains("hideout", StringComparison.OrdinalIgnoreCase);
if (!inHideout)
{ {
Log.Error(t.Exception!, "Explore loop failed"); Emit("info", "Sending /hideout command...");
Emit("error", $"Explore loop failed: {t.Exception?.InnerException?.Message}"); var arrivedHome = await Inventory.WaitForAreaTransition(Config.TravelTimeoutMs, () => Game.GoToHideout());
if (!arrivedHome)
Log.Warning("Timed out waiting for hideout transition on startup");
} }
else Inventory.SetLocation(true);
Emit("info", "Starting boss run loop...");
State = "Preparing";
_ = BossRunExecutor.RunBossLoop().ContinueWith(t =>
{ {
Emit("info", "Exploration finished"); if (t.IsFaulted)
} {
State = "Idle"; Log.Error(t.Exception!, "Boss run loop failed");
StatusUpdated?.Invoke(); Emit("error", $"Boss run failed: {t.Exception?.InnerException?.Message}");
}); }
else
{
Emit("info", "Boss run loop finished");
}
State = "Idle";
StatusUpdated?.Invoke();
});
}
else
{
Emit("info", "Starting map exploration...");
State = "Exploring";
_ = Navigation.RunExploreLoop().ContinueWith(t =>
{
if (t.IsFaulted)
{
Log.Error(t.Exception!, "Explore loop failed");
Emit("error", $"Explore loop failed: {t.Exception?.InnerException?.Message}");
}
else
{
Emit("info", "Exploration finished");
}
State = "Idle";
StatusUpdated?.Invoke();
});
}
} }
public async ValueTask DisposeAsync() public async ValueTask DisposeAsync()

View file

@ -34,6 +34,16 @@ public class SavedSettings
public MapType MapType { get; set; } = MapType.TrialOfChaos; public MapType MapType { get; set; } = MapType.TrialOfChaos;
public StashCalibration? StashCalibration { get; set; } public StashCalibration? StashCalibration { get; set; }
public StashCalibration? ShopCalibration { get; set; } public StashCalibration? ShopCalibration { get; set; }
public bool ShowHudDebug { get; set; }
public KulemakSettings Kulemak { get; set; } = new();
}
public class KulemakSettings
{
public bool Enabled { get; set; }
public string InvitationTabPath { get; set; } = "";
public string LootTabPath { get; set; } = "";
public int InvitationCount { get; set; } = 15;
} }
public class ConfigStore public class ConfigStore
@ -129,6 +139,33 @@ public class ConfigStore
try try
{ {
var raw = File.ReadAllText(_filePath); var raw = File.ReadAllText(_filePath);
// Migrate: BossRun was removed from BotMode, now it's MapType.Kulemak
if (raw.Contains("\"bossRun\"") || raw.Contains("\"BossRun\""))
{
const System.Text.RegularExpressions.RegexOptions ic =
System.Text.RegularExpressions.RegexOptions.IgnoreCase;
// Mode enum: bossRun → mapping
using var doc = JsonDocument.Parse(raw);
if (doc.RootElement.TryGetProperty("Mode", out var modeProp) &&
modeProp.GetString()?.Equals("bossRun", StringComparison.OrdinalIgnoreCase) == true)
{
raw = System.Text.RegularExpressions.Regex.Replace(
raw, @"""Mode""\s*:\s*""bossRun""", @"""Mode"": ""mapping""", ic);
raw = System.Text.RegularExpressions.Regex.Replace(
raw, @"""MapType""\s*:\s*""[^""]*""", @"""MapType"": ""kulemak""", ic);
Log.Information("Migrated config: Mode bossRun -> mapping + MapType kulemak");
}
// MapType enum value: bossRun → kulemak
raw = System.Text.RegularExpressions.Regex.Replace(
raw, @"""MapType""\s*:\s*""bossRun""", @"""MapType"": ""kulemak""", ic);
// Settings property name: BossRun → Kulemak
raw = raw.Replace("\"BossRun\":", "\"Kulemak\":");
}
var parsed = JsonSerializer.Deserialize<SavedSettings>(raw, JsonOptions); var parsed = JsonSerializer.Deserialize<SavedSettings>(raw, JsonOptions);
if (parsed == null) return new SavedSettings(); if (parsed == null) return new SavedSettings();

View file

@ -75,11 +75,27 @@ public enum BotMode
Mapping Mapping
} }
public enum BossRunState
{
Idle,
Preparing,
TravelingToZone,
WalkingToEntrance,
UsingInvitation,
Fighting,
Looting,
Returning,
StoringLoot,
Complete,
Failed
}
public enum MapType public enum MapType
{ {
TrialOfChaos, TrialOfChaos,
Temple, Temple,
Endgame Endgame,
Kulemak
} }
public enum GameUiState public enum GameUiState

View file

@ -77,4 +77,10 @@ public class GameController : IGameController
public Task ToggleMinimap() => _input.PressKey(InputSender.VK.TAB); public Task ToggleMinimap() => _input.PressKey(InputSender.VK.TAB);
public Task KeyDown(int vkCode) => _input.KeyDown(vkCode); public Task KeyDown(int vkCode) => _input.KeyDown(vkCode);
public Task KeyUp(int vkCode) => _input.KeyUp(vkCode); public Task KeyUp(int vkCode) => _input.KeyUp(vkCode);
public Task PressPlus() => _input.PressKey(0xBB); // VK_OEM_PLUS
public Task PressKey(int vkCode) => _input.PressKey(vkCode);
public void LeftMouseDown() => _input.LeftMouseDown();
public void LeftMouseUp() => _input.LeftMouseUp();
public void RightMouseDown() => _input.RightMouseDown();
public void RightMouseUp() => _input.RightMouseUp();
} }

View file

@ -22,4 +22,10 @@ public interface IGameController
Task ToggleMinimap(); Task ToggleMinimap();
Task KeyDown(int vkCode); Task KeyDown(int vkCode);
Task KeyUp(int vkCode); Task KeyUp(int vkCode);
Task PressPlus();
Task PressKey(int vkCode);
void LeftMouseDown();
void LeftMouseUp();
void RightMouseDown();
void RightMouseUp();
} }

View file

@ -34,6 +34,7 @@ public class InputSender
public const int W = 0x57; public const int W = 0x57;
public const int S = 0x53; public const int S = 0x53;
public const int D = 0x44; public const int D = 0x44;
public const int Z = 0x5A;
} }
public async Task PressKey(int vkCode) public async Task PressKey(int vkCode)
@ -142,6 +143,11 @@ public class InputSender
await Helpers.RandomDelay(5, 15); await Helpers.RandomDelay(5, 15);
} }
public void LeftMouseDown() => SendMouseInput(0, 0, 0, InputNative.MOUSEEVENTF_LEFTDOWN);
public void LeftMouseUp() => SendMouseInput(0, 0, 0, InputNative.MOUSEEVENTF_LEFTUP);
public void RightMouseDown() => SendMouseInput(0, 0, 0, InputNative.MOUSEEVENTF_RIGHTDOWN);
public void RightMouseUp() => SendMouseInput(0, 0, 0, InputNative.MOUSEEVENTF_RIGHTUP);
public void MoveMouseInstant(int x, int y) => MoveMouseRaw(x, y); public void MoveMouseInstant(int x, int y) => MoveMouseRaw(x, y);
public async Task MoveMouseFast(int x, int y) public async Task MoveMouseFast(int x, int y)

View file

@ -19,4 +19,6 @@ public interface IInventoryManager
Task DepositItemsToStash(List<PlacedItem> items); Task DepositItemsToStash(List<PlacedItem> items);
Task<bool> SalvageItems(List<PlacedItem> items); Task<bool> SalvageItems(List<PlacedItem> items);
(bool[,] Grid, List<PlacedItem> Items, int Free) GetInventoryState(); (bool[,] Grid, List<PlacedItem> Items, int Free) GetInventoryState();
Task ClickStashTab(StashTabInfo tab, StashTabInfo? parentFolder = null);
Task GrabItemsFromStash(string layoutName, int maxItems, string? templatePath = null);
} }

View file

@ -155,6 +155,7 @@ public class InventoryManager : IInventoryManager
private async Task CtrlClickItems(List<PlacedItem> items, GridLayout layout, int clickDelayMs = Delays.ClickInterval) private async Task CtrlClickItems(List<PlacedItem> items, GridLayout layout, int clickDelayMs = Delays.ClickInterval)
{ {
await _game.KeyDown(Game.InputSender.VK.SHIFT);
await _game.HoldCtrl(); await _game.HoldCtrl();
foreach (var item in items) foreach (var item in items)
{ {
@ -163,6 +164,7 @@ public class InventoryManager : IInventoryManager
await Helpers.Sleep(clickDelayMs); await Helpers.Sleep(clickDelayMs);
} }
await _game.ReleaseCtrl(); await _game.ReleaseCtrl();
await _game.KeyUp(Game.InputSender.VK.SHIFT);
await Helpers.Sleep(Delays.PostEscape); await Helpers.Sleep(Delays.PostEscape);
} }
@ -208,13 +210,31 @@ public class InventoryManager : IInventoryManager
for (var attempt = 1; attempt <= maxRetries; attempt++) for (var attempt = 1; attempt <= maxRetries; attempt++)
{ {
Log.Information("Searching for nameplate '{Name}' (attempt {Attempt}/{Max})", name, attempt, maxRetries); Log.Information("Searching for nameplate '{Name}' (attempt {Attempt}/{Max})", name, attempt, maxRetries);
var pos = await _screen.FindTextOnScreen(name, fuzzy: true);
// Move mouse to bottom-left so it doesn't cover nameplates
_game.MoveMouseInstant(0, 1440);
await Helpers.Sleep(100);
// Nameplates hidden by default — capture clean reference
using var reference = _screen.CaptureRawBitmap();
// Hold Alt to show nameplates, capture, then release
await _game.KeyDown(Game.InputSender.VK.MENU);
await Helpers.Sleep(50);
using var current = _screen.CaptureRawBitmap();
await _game.KeyUp(Game.InputSender.VK.MENU);
// Diff OCR — only processes the bright nameplate regions
var result = await _screen.NameplateDiffOcr(reference, current);
var pos = FindWordInOcrResult(result, name, fuzzy: true);
if (pos.HasValue) if (pos.HasValue)
{ {
Log.Information("Clicking nameplate '{Name}' at ({X},{Y})", name, pos.Value.X, pos.Value.Y); Log.Information("Clicking nameplate '{Name}' at ({X},{Y})", name, pos.Value.X, pos.Value.Y);
await _game.LeftClickAt(pos.Value.X, pos.Value.Y); await _game.LeftClickAt(pos.Value.X, pos.Value.Y);
return pos; return pos;
} }
Log.Debug("Nameplate '{Name}' not found in diff OCR (attempt {Attempt}), text: {Text}", name, attempt, result.Text);
if (attempt < maxRetries) if (attempt < maxRetries)
await Helpers.Sleep(retryDelayMs); await Helpers.Sleep(retryDelayMs);
} }
@ -223,6 +243,73 @@ public class InventoryManager : IInventoryManager
return null; return null;
} }
private static (int X, int Y)? FindWordInOcrResult(OcrResponse result, string needle, bool fuzzy)
{
var lower = needle.ToLowerInvariant();
// Multi-word: match against full line text
if (lower.Contains(' '))
{
foreach (var line in result.Lines)
{
if (line.Words.Count == 0) continue;
if (line.Text.Contains(needle, StringComparison.OrdinalIgnoreCase))
{
var first = line.Words[0];
var last = line.Words[^1];
return ((first.X + last.X + last.Width) / 2, (first.Y + last.Y + last.Height) / 2);
}
if (fuzzy)
{
var sim = BigramSimilarity(Normalize(needle), Normalize(line.Text));
if (sim >= 0.55)
{
var first = line.Words[0];
var last = line.Words[^1];
return ((first.X + last.X + last.Width) / 2, (first.Y + last.Y + last.Height) / 2);
}
}
}
return null;
}
// Single word
foreach (var line in result.Lines)
foreach (var word in line.Words)
{
if (word.Text.Contains(needle, StringComparison.OrdinalIgnoreCase))
return (word.X + word.Width / 2, word.Y + word.Height / 2);
if (fuzzy && BigramSimilarity(Normalize(needle), Normalize(word.Text)) >= 0.55)
return (word.X + word.Width / 2, word.Y + word.Height / 2);
}
return null;
}
private static string Normalize(string s) =>
new(s.ToLowerInvariant().Where(char.IsLetterOrDigit).ToArray());
private static double BigramSimilarity(string a, string b)
{
if (a.Length < 2 || b.Length < 2) return a == b ? 1 : 0;
var bigramsA = new Dictionary<(char, char), int>();
for (var i = 0; i < a.Length - 1; i++)
{
var bg = (a[i], a[i + 1]);
bigramsA[bg] = bigramsA.GetValueOrDefault(bg) + 1;
}
var matches = 0;
for (var i = 0; i < b.Length - 1; i++)
{
var bg = (b[i], b[i + 1]);
if (bigramsA.TryGetValue(bg, out var count) && count > 0)
{
matches++;
bigramsA[bg] = count - 1;
}
}
return 2.0 * matches / (a.Length - 1 + b.Length - 1);
}
public async Task<bool> WaitForAreaTransition(int timeoutMs, Func<Task>? triggerAction = null) public async Task<bool> WaitForAreaTransition(int timeoutMs, Func<Task>? triggerAction = null)
{ {
var tcs = new TaskCompletionSource<bool>(TaskCreationOptions.RunContinuationsAsynchronously); var tcs = new TaskCompletionSource<bool>(TaskCreationOptions.RunContinuationsAsynchronously);
@ -254,4 +341,64 @@ public class InventoryManager : IInventoryManager
{ {
return (Tracker.GetGrid(), Tracker.GetItems(), Tracker.FreeCells); return (Tracker.GetGrid(), Tracker.GetItems(), Tracker.FreeCells);
} }
public async Task ClickStashTab(StashTabInfo tab, StashTabInfo? parentFolder = null)
{
if (parentFolder != null)
{
Log.Information("Clicking folder '{Folder}' at ({X},{Y})", parentFolder.Name, parentFolder.ClickX, parentFolder.ClickY);
await _game.LeftClickAt(parentFolder.ClickX, parentFolder.ClickY);
await Helpers.Sleep(200);
}
Log.Information("Clicking tab '{Tab}' at ({X},{Y})", tab.Name, tab.ClickX, tab.ClickY);
await _game.LeftClickAt(tab.ClickX, tab.ClickY);
await Helpers.Sleep(Delays.PostStashOpen);
}
public async Task GrabItemsFromStash(string layoutName, int maxItems, string? templatePath = null)
{
Log.Information("Grabbing up to {Max} items from stash layout '{Layout}' (template={Template})",
maxItems, layoutName, templatePath ?? "none");
var layout = GridLayouts.All[layoutName];
if (templatePath != null)
{
// Template matching mode: repeatedly find and click matching items
var grabbed = 0;
await _game.HoldCtrl();
while (grabbed < maxItems)
{
var match = await _screen.TemplateMatch(templatePath, layout.Region);
if (match == null) break;
Log.Information("Template match at ({X},{Y}), grabbing", match.X, match.Y);
await _game.LeftClickAt(match.X, match.Y);
await Helpers.Sleep(Delays.ClickInterval);
grabbed++;
}
await _game.ReleaseCtrl();
await Helpers.Sleep(Delays.PostEscape);
Log.Information("Grabbed {Count} matching items from stash", grabbed);
}
else
{
// Grid scan mode: grab all occupied cells
var result = await _screen.Grid.Scan(layoutName);
var grabbed = 0;
await _game.HoldCtrl();
foreach (var cell in result.Occupied)
{
if (grabbed >= maxItems) break;
var center = _screen.Grid.GetCellCenter(layout, cell.Row, cell.Col);
await _game.LeftClickAt(center.X, center.Y);
await Helpers.Sleep(Delays.ClickInterval);
grabbed++;
}
await _game.ReleaseCtrl();
await Helpers.Sleep(Delays.PostEscape);
Log.Information("Grabbed {Count} items from stash", grabbed);
}
}
} }

View file

@ -0,0 +1,78 @@
using Poe2Trade.Core;
using Serilog;
using Region = Poe2Trade.Core.Region;
namespace Poe2Trade.Screen;
public class BossDetector : IFrameConsumer, IDisposable
{
private const int DetectEveryNFrames = 6;
private const int MinConsecutiveFrames = 2;
private readonly PythonDetectBridge _bridge = new();
private volatile BossSnapshot _latest = new([], 0, 0);
private int _frameCounter;
private int _consecutiveDetections;
private string _modelName = "boss-kulemak";
public bool Enabled { get; set; }
public BossSnapshot Latest => _latest;
public event Action<BossSnapshot>? BossDetected;
public void SetBoss(string bossName)
{
_modelName = $"boss-{bossName}";
_consecutiveDetections = 0;
}
public void Process(ScreenFrame frame)
{
if (!Enabled) return;
if (++_frameCounter % DetectEveryNFrames != 0) return;
try
{
// Use full frame — model was trained on full 2560x1440 screenshots
var fullRegion = new Region(0, 0, frame.Width, frame.Height);
using var bgr = frame.CropBgr(fullRegion);
var result = _bridge.Detect(bgr, conf: 0.60f, imgsz: 1280, model: _modelName);
var bosses = new List<DetectedBoss>(result.Count);
foreach (var det in result.Detections)
{
bosses.Add(new DetectedBoss(
det.ClassName,
det.Confidence,
det.X,
det.Y,
det.Width,
det.Height,
det.Cx,
det.Cy));
}
var snapshot = new BossSnapshot(
bosses.AsReadOnly(),
DateTimeOffset.UtcNow.ToUnixTimeMilliseconds(),
result.InferenceMs);
_latest = snapshot;
if (bosses.Count > 0)
{
_consecutiveDetections++;
if (_consecutiveDetections >= MinConsecutiveFrames)
BossDetected?.Invoke(snapshot);
}
else
{
_consecutiveDetections = 0;
}
}
catch (Exception ex)
{
Log.Debug(ex, "BossDetector YOLO failed");
}
}
public void Dispose() => _bridge.Dispose();
}

View file

@ -10,3 +10,14 @@ public record DetectionSnapshot(
IReadOnlyList<DetectedEnemy> Enemies, IReadOnlyList<DetectedEnemy> Enemies,
long Timestamp, long Timestamp,
float InferenceMs); float InferenceMs);
public record DetectedBoss(
string ClassName,
float Confidence,
int X, int Y, int Width, int Height,
int Cx, int Cy);
public record BossSnapshot(
IReadOnlyList<DetectedBoss> Bosses,
long Timestamp,
float InferenceMs);

View file

@ -16,6 +16,7 @@ public class FrameSaver : IFrameConsumer
private const int JpegQuality = 95; private const int JpegQuality = 95;
private const int MinSaveIntervalMs = 1000; private const int MinSaveIntervalMs = 1000;
private const int BurstIntervalMs = 200;
private const int MinRedPixels = 50; private const int MinRedPixels = 50;
private const int ThumbSize = 64; private const int ThumbSize = 64;
private const double MovementThreshold = 8.0; // mean absolute diff on 64x64 grayscale private const double MovementThreshold = 8.0; // mean absolute diff on 64x64 grayscale
@ -26,6 +27,7 @@ public class FrameSaver : IFrameConsumer
private Mat? _prevThumb; private Mat? _prevThumb;
public bool Enabled { get; set; } public bool Enabled { get; set; }
public bool BurstMode { get; set; }
public int SavedCount => _savedCount; public int SavedCount => _savedCount;
public FrameSaver(string outputDir = "training-data/raw") public FrameSaver(string outputDir = "training-data/raw")
@ -35,10 +37,11 @@ public class FrameSaver : IFrameConsumer
public void Process(ScreenFrame frame) public void Process(ScreenFrame frame)
{ {
if (!Enabled) return; if (!Enabled && !BurstMode) return;
var now = DateTimeOffset.UtcNow.ToUnixTimeMilliseconds(); var now = DateTimeOffset.UtcNow.ToUnixTimeMilliseconds();
if (now - _lastSaveTime < MinSaveIntervalMs) return; var interval = BurstMode ? BurstIntervalMs : MinSaveIntervalMs;
if (now - _lastSaveTime < interval) return;
if (GameplayRegion.X + GameplayRegion.Width > frame.Width || if (GameplayRegion.X + GameplayRegion.Width > frame.Width ||
GameplayRegion.Y + GameplayRegion.Height > frame.Height) GameplayRegion.Y + GameplayRegion.Height > frame.Height)
@ -46,10 +49,12 @@ public class FrameSaver : IFrameConsumer
try try
{ {
using var bgr = frame.CropBgr(GameplayRegion); if (!BurstMode)
{
if (!HasHealthBars(bgr)) return; using var bgr = frame.CropBgr(GameplayRegion);
if (!HasSceneChanged(bgr)) return; if (!HasHealthBars(bgr)) return;
if (!HasSceneChanged(bgr)) return;
}
if (!Directory.Exists(_outputDir)) if (!Directory.Exists(_outputDir))
Directory.CreateDirectory(_outputDir); Directory.CreateDirectory(_outputDir);

View file

@ -1,5 +1,3 @@
using System.Drawing;
using System.Text.RegularExpressions;
using OpenCvSharp; using OpenCvSharp;
using Poe2Trade.Core; using Poe2Trade.Core;
using Serilog; using Serilog;
@ -7,38 +5,41 @@ using Region = Poe2Trade.Core.Region;
namespace Poe2Trade.Screen; namespace Poe2Trade.Screen;
public record HudValues(int Current, int Max);
public record HudSnapshot public record HudSnapshot
{ {
public HudValues? Life { get; init; } public float LifePct { get; init; }
public HudValues? Mana { get; init; } public float ShieldPct { get; init; }
public HudValues? EnergyShield { get; init; } public float ManaPct { get; init; }
public HudValues? Spirit { get; init; }
public long Timestamp { get; init; } public long Timestamp { get; init; }
public float LifePct => Life is { Max: > 0 } l ? (float)l.Current / l.Max : 1f;
public float ManaPct => Mana is { Max: > 0 } m ? (float)m.Current / m.Max : 1f;
} }
/// <summary> /// <summary>
/// Reads life/mana/ES/spirit values from HUD globe text via OCR. /// Reads life/mana/shield fill levels by sampling pixel colors on the globes.
/// Throttled to ~1 read per second (every 30 frames at 30fps). /// Finds the highest Y where the fill color appears — the fill drains from top down.
/// Samples a horizontal band (±SampleHalfWidth) at each Y for robustness against the frame ornaments.
/// </summary> /// </summary>
public class HudReader : IFrameConsumer public class HudReader : IFrameConsumer
{ {
private static readonly Regex ValuePattern = new(@"(\d+)\s*/\s*(\d+)", RegexOptions.Compiled); // Globe centers at 2560x1440
private const int LifeX = 167;
private const int ManaX = 2394;
private const int GlobeTop = 1185;
private const int GlobeBottom = 1411;
// Crop regions for HUD text at 2560x1440 — placeholders, need calibration // Shield ring: circle centered at (168, 1294), radius 130
private static readonly Region LifeRegion = new(100, 1340, 200, 40); private const int ShieldCX = 170;
private static readonly Region ManaRegion = new(2260, 1340, 200, 40); private const int ShieldCY = 1298;
private static readonly Region EsRegion = new(100, 1300, 200, 40); private const int ShieldRadius = 130;
private static readonly Region SpiritRegion = new(2260, 1300, 200, 40);
private const int OcrEveryNFrames = 30; // Sample a horizontal band of pixels at each Y level
private const int SampleHalfWidth = 8;
// Minimum pixels in the band that must match to count as "filled"
private const int MinHits = 2;
private readonly PythonOcrBridge _ocr = new(); private const int MinChannel = 60;
private volatile HudSnapshot _current = new() { Timestamp = 0 }; private const float DominanceRatio = 1.2f;
private volatile HudSnapshot _current = new();
private int _frameCounter; private int _frameCounter;
public HudSnapshot Current => _current; public HudSnapshot Current => _current;
@ -47,64 +48,128 @@ public class HudReader : IFrameConsumer
public void Process(ScreenFrame frame) public void Process(ScreenFrame frame)
{ {
if (++_frameCounter % OcrEveryNFrames != 0) return; if (++_frameCounter % 2 != 0) return;
try try
{ {
var life = ReadValue(frame, LifeRegion); var manaPct = SampleFillLevel(frame, ManaX, IsManaPixel);
var mana = ReadValue(frame, ManaRegion); var shieldPct = SampleShieldRing(frame);
var es = ReadValue(frame, EsRegion);
var spirit = ReadValue(frame, SpiritRegion); // If life globe is cyan (1-life build), life = 0
var redFill = SampleFillLevel(frame, LifeX, IsLifePixel);
var cyanFill = SampleFillLevel(frame, LifeX, IsCyanPixel);
var lifePct = cyanFill > redFill ? 0f : redFill;
var snapshot = new HudSnapshot var snapshot = new HudSnapshot
{ {
Life = life, LifePct = lifePct,
Mana = mana, ManaPct = manaPct,
EnergyShield = es, ShieldPct = shieldPct,
Spirit = spirit,
Timestamp = DateTimeOffset.UtcNow.ToUnixTimeMilliseconds(), Timestamp = DateTimeOffset.UtcNow.ToUnixTimeMilliseconds(),
}; };
_current = snapshot; _current = snapshot;
Updated?.Invoke(snapshot); Updated?.Invoke(snapshot);
if (snapshot.LifePct < 0.3f) if (lifePct < 0.3f)
LowLife?.Invoke(snapshot); LowLife?.Invoke(snapshot);
} }
catch (Exception ex) catch (Exception ex)
{ {
Log.Debug(ex, "HudReader OCR failed"); Log.Debug(ex, "HudReader sample failed");
} }
} }
private HudValues? ReadValue(ScreenFrame frame, Region region) /// <summary>
/// Scan from top to bottom to find the first Y row where the fill color appears.
/// Fill % = 1 - (firstFilledY - GlobeTop) / (GlobeBottom - GlobeTop).
/// At each Y, sample a horizontal band of pixels and require MinHits matches.
/// </summary>
private static float SampleFillLevel(ScreenFrame frame, int centerX, Func<Vec4b, bool> colorTest)
{ {
// Bounds check if (centerX >= frame.Width || GlobeBottom >= frame.Height) return 0f;
if (region.X + region.Width > frame.Width || region.Y + region.Height > frame.Height)
return null;
using var bgr = frame.CropBgr(region); int height = GlobeBottom - GlobeTop;
using var gray = new Mat(); if (height <= 0) return 0f;
Cv2.CvtColor(bgr, gray, ColorConversionCodes.BGR2GRAY);
// Threshold for white text on dark background int xMin = Math.Max(0, centerX - SampleHalfWidth);
using var thresh = new Mat(); int xMax = Math.Min(frame.Width - 1, centerX + SampleHalfWidth);
Cv2.Threshold(gray, thresh, 180, 255, ThresholdTypes.Binary);
// Convert to Bitmap for OCR bridge // Scan from top down — find first row with enough matching pixels
var bytes = thresh.ToBytes(".png"); for (int y = GlobeTop; y <= GlobeBottom; y++)
using var ms = new System.IO.MemoryStream(bytes); {
using var bitmap = new Bitmap(ms); int hits = 0;
for (int x = xMin; x <= xMax; x++)
{
if (colorTest(frame.PixelAt(x, y)))
hits++;
if (hits >= MinHits) break;
}
var result = _ocr.OcrFromBitmap(bitmap); if (hits >= MinHits)
if (string.IsNullOrWhiteSpace(result.Text)) return null; {
// Fill level = how far down from top this first row is
// If found at GlobeTop → 100%, at GlobeBottom → 0%
return 1f - (float)(y - GlobeTop) / height;
}
}
var match = ValuePattern.Match(result.Text); return 0f; // no fill found
if (!match.Success) return null;
return new HudValues(
int.Parse(match.Groups[1].Value),
int.Parse(match.Groups[2].Value)
);
} }
/// <summary>
/// Sample the shield ring — right semicircle (12 o'clock to 6 o'clock) around the life globe.
/// Scans from bottom (6 o'clock) upward along the arc, tracking contiguous cyan fill.
/// </summary>
private static float SampleShieldRing(ScreenFrame frame)
{
int yTop = ShieldCY - ShieldRadius;
int yBot = ShieldCY + ShieldRadius;
if (yBot >= frame.Height) return 0f;
int r2 = ShieldRadius * ShieldRadius;
// Scan from top (12 o'clock) down along the right arc
// When we find the first cyan row, convert Y to arc fraction
for (int y = yTop; y <= yBot; y++)
{
int dy = y - ShieldCY;
int dx = (int)Math.Sqrt(r2 - dy * dy);
int arcX = ShieldCX + dx;
if (arcX >= frame.Width) continue;
int hits = 0;
for (int x = Math.Max(0, arcX - 3); x <= Math.Min(frame.Width - 1, arcX + 3); x++)
{
if (IsCyanPixel(frame.PixelAt(x, y)))
hits++;
if (hits >= MinHits) break;
}
if (hits >= MinHits)
{
// Convert Y to angle on the semicircle: θ = arcsin((y - cy) / r)
// Arc fraction from top = (θ + π/2) / π
// Fill = 1 - arc_fraction
var theta = Math.Asin(Math.Clamp((double)(y - ShieldCY) / ShieldRadius, -1, 1));
var arcFraction = (theta + Math.PI / 2) / Math.PI;
return (float)(1.0 - arcFraction);
}
}
return 0f;
}
// B=0, G=1, R=2, A=3
private static bool IsLifePixel(Vec4b px) =>
px[2] > MinChannel && px[2] > px[1] * DominanceRatio && px[2] > px[0] * DominanceRatio;
private static bool IsManaPixel(Vec4b px) =>
px[0] > MinChannel && px[0] > px[1] * DominanceRatio && px[0] > px[2] * DominanceRatio;
private static bool IsCyanPixel(Vec4b px) =>
px[0] > MinChannel && px[1] > MinChannel
&& px[0] > px[2] * DominanceRatio
&& px[1] > px[2] * DominanceRatio;
} }

View file

@ -17,6 +17,8 @@ public interface IScreenReader : IDisposable
Task Snapshot(); Task Snapshot();
Task<DiffOcrResponse> DiffOcr(string? savePath = null, Region? region = null); Task<DiffOcrResponse> DiffOcr(string? savePath = null, Region? region = null);
Task<TemplateMatchResult?> TemplateMatch(string templatePath, Region? region = null); Task<TemplateMatchResult?> TemplateMatch(string templatePath, Region? region = null);
Task<OcrResponse> NameplateDiffOcr(System.Drawing.Bitmap reference, System.Drawing.Bitmap current);
System.Drawing.Bitmap CaptureRawBitmap();
Task SaveScreenshot(string path); Task SaveScreenshot(string path);
Task SaveRegion(Region region, string path); Task SaveRegion(Region region, string path);
} }

View file

@ -35,7 +35,7 @@ class PythonDetectBridge : IDisposable
/// <summary> /// <summary>
/// Run YOLO detection on a BGR Mat. Returns parsed detection results. /// Run YOLO detection on a BGR Mat. Returns parsed detection results.
/// </summary> /// </summary>
public DetectResponse Detect(Mat bgrMat, float conf = 0.3f, float iou = 0.45f, int imgsz = 640) public DetectResponse Detect(Mat bgrMat, float conf = 0.3f, float iou = 0.45f, int imgsz = 640, string? model = null)
{ {
EnsureRunning(); EnsureRunning();
@ -49,6 +49,7 @@ class PythonDetectBridge : IDisposable
["conf"] = conf, ["conf"] = conf,
["iou"] = iou, ["iou"] = iou,
["imgsz"] = imgsz, ["imgsz"] = imgsz,
["model"] = model,
}; };
return SendRequest(req); return SendRequest(req);

View file

@ -1,6 +1,10 @@
using System.Drawing;
using System.Drawing.Imaging;
using System.Runtime.InteropServices;
using Poe2Trade.Core; using Poe2Trade.Core;
using OpenCvSharp.Extensions; using OpenCvSharp.Extensions;
using Serilog; using Serilog;
using Region = Poe2Trade.Core.Region;
namespace Poe2Trade.Screen; namespace Poe2Trade.Screen;
@ -178,6 +182,144 @@ public class ScreenReader : IScreenReader
return Task.CompletedTask; return Task.CompletedTask;
} }
// -- Nameplate diff OCR --
public Bitmap CaptureRawBitmap() => ScreenCapture.CaptureOrLoad(null, null);
public Task<OcrResponse> NameplateDiffOcr(Bitmap reference, Bitmap current)
{
int w = Math.Min(reference.Width, current.Width);
int h = Math.Min(reference.Height, current.Height);
var refData = reference.LockBits(new Rectangle(0, 0, w, h), ImageLockMode.ReadOnly, PixelFormat.Format32bppArgb);
var curData = current.LockBits(new Rectangle(0, 0, w, h), ImageLockMode.ReadOnly, PixelFormat.Format32bppArgb);
byte[] refPx = new byte[refData.Stride * h];
byte[] curPx = new byte[curData.Stride * h];
Marshal.Copy(refData.Scan0, refPx, 0, refPx.Length);
Marshal.Copy(curData.Scan0, curPx, 0, curPx.Length);
int stride = refData.Stride;
reference.UnlockBits(refData);
current.UnlockBits(curData);
// Build a binary mask of pixels that got significantly brighter (nameplates are bright text)
const int brightThresh = 30;
bool[] mask = new bool[w * h];
Parallel.For(0, h, y =>
{
int rowOff = y * stride;
for (int x = 0; x < w; x++)
{
int i = rowOff + x * 4;
int brighter = (curPx[i] - refPx[i]) + (curPx[i + 1] - refPx[i + 1]) + (curPx[i + 2] - refPx[i + 2]);
if (brighter > brightThresh)
mask[y * w + x] = true;
}
});
// Find connected clusters via row-scan: collect bounding boxes of bright regions
var boxes = FindBrightClusters(mask, w, h, minWidth: 40, minHeight: 10, maxGap: 8);
Log.Information("NameplateDiff: found {Count} bright clusters", boxes.Count);
if (boxes.Count == 0)
return Task.FromResult(new OcrResponse { Text = "", Lines = [] });
// OCR each cluster crop, accumulate results with screen-space coordinates
var allLines = new List<OcrLine>();
var allText = new List<string>();
foreach (var box in boxes)
{
// Pad the crop slightly
int pad = 4;
int cx = Math.Max(0, box.X - pad);
int cy = Math.Max(0, box.Y - pad);
int cw = Math.Min(w - cx, box.Width + pad * 2);
int ch = Math.Min(h - cy, box.Height + pad * 2);
using var crop = current.Clone(new Rectangle(cx, cy, cw, ch), PixelFormat.Format32bppArgb);
var ocrResult = _pythonBridge.OcrFromBitmap(crop);
// Offset word coordinates to screen space
foreach (var line in ocrResult.Lines)
{
foreach (var word in line.Words)
{
word.X += cx;
word.Y += cy;
}
allLines.Add(line);
allText.Add(line.Text);
}
}
return Task.FromResult(new OcrResponse
{
Text = string.Join("\n", allText),
Lines = allLines,
});
}
private static List<Rectangle> FindBrightClusters(bool[] mask, int w, int h, int minWidth, int minHeight, int maxGap)
{
// Row density
int[] rowCounts = new int[h];
for (int y = 0; y < h; y++)
for (int x = 0; x < w; x++)
if (mask[y * w + x]) rowCounts[y]++;
// Find horizontal bands of bright rows
int rowThresh = 3;
var bands = new List<(int Top, int Bottom)>();
int bandStart = -1, lastActive = -1;
for (int y = 0; y < h; y++)
{
if (rowCounts[y] >= rowThresh)
{
if (bandStart < 0) bandStart = y;
lastActive = y;
}
else if (bandStart >= 0 && y - lastActive > maxGap)
{
if (lastActive - bandStart + 1 >= minHeight)
bands.Add((bandStart, lastActive));
bandStart = -1;
}
}
if (bandStart >= 0 && lastActive - bandStart + 1 >= minHeight)
bands.Add((bandStart, lastActive));
// For each band, find column extents to get individual nameplate boxes
var boxes = new List<Rectangle>();
foreach (var (top, bottom) in bands)
{
int[] colCounts = new int[w];
for (int y = top; y <= bottom; y++)
for (int x = 0; x < w; x++)
if (mask[y * w + x]) colCounts[x]++;
int colThresh = 1;
int colStart = -1, lastCol = -1;
for (int x = 0; x < w; x++)
{
if (colCounts[x] >= colThresh)
{
if (colStart < 0) colStart = x;
lastCol = x;
}
else if (colStart >= 0 && x - lastCol > maxGap)
{
if (lastCol - colStart + 1 >= minWidth)
boxes.Add(new Rectangle(colStart, top, lastCol - colStart + 1, bottom - top + 1));
colStart = -1;
}
}
if (colStart >= 0 && lastCol - colStart + 1 >= minWidth)
boxes.Add(new Rectangle(colStart, top, lastCol - colStart + 1, bottom - top + 1));
}
return boxes;
}
public void Dispose() => _pythonBridge.Dispose(); public void Dispose() => _pythonBridge.Dispose();
// -- OCR text matching -- // -- OCR text matching --

View file

@ -27,27 +27,56 @@ class TemplateMatchHandler
else else
screenMat.CopyTo(screenBgr); screenMat.CopyTo(screenBgr);
// Template must fit within screenshot // Try exact size first (fast path)
if (template.Rows > screenBgr.Rows || template.Cols > screenBgr.Cols) var exact = MatchAtScale(screenBgr, template, region, 1.0, threshold);
if (exact is { Confidence: > 0.95 })
return exact;
// Multi-scale: resize template from 50% to 150% in steps of 10%
TemplateMatchResult? best = exact;
for (var pct = 50; pct <= 150; pct += 10)
{
var scale = pct / 100.0;
if (pct == 100) continue; // already tried
var match = MatchAtScale(screenBgr, template, region, scale, threshold);
if (match != null && (best == null || match.Confidence > best.Confidence))
{
best = match;
if (best.Confidence > 0.95) break;
}
}
return best;
}
private static TemplateMatchResult? MatchAtScale(Mat screen, Mat template,
Region? region, double scale, double threshold)
{
using var scaled = scale == 1.0 ? template.Clone()
: template.Resize(new OpenCvSharp.Size(
Math.Max(1, (int)(template.Cols * scale)),
Math.Max(1, (int)(template.Rows * scale))));
if (scaled.Rows > screen.Rows || scaled.Cols > screen.Cols)
return null; return null;
using var result = new Mat(); using var result = new Mat();
Cv2.MatchTemplate(screenBgr, template, result, TemplateMatchModes.CCoeffNormed); Cv2.MatchTemplate(screen, scaled, result, TemplateMatchModes.CCoeffNormed);
Cv2.MinMaxLoc(result, out _, out double maxVal, out _, out OpenCvSharp.Point maxLoc); Cv2.MinMaxLoc(result, out _, out double maxVal, out _, out OpenCvSharp.Point maxLoc);
if (maxVal < threshold) if (maxVal < threshold)
return null; return null;
int offsetX = region?.X ?? 0; var offsetX = region?.X ?? 0;
int offsetY = region?.Y ?? 0; var offsetY = region?.Y ?? 0;
return new TemplateMatchResult return new TemplateMatchResult
{ {
X = offsetX + maxLoc.X + template.Cols / 2, X = offsetX + maxLoc.X + scaled.Cols / 2,
Y = offsetY + maxLoc.Y + template.Rows / 2, Y = offsetY + maxLoc.Y + scaled.Rows / 2,
Width = template.Cols, Width = scaled.Cols,
Height = template.Rows, Height = scaled.Rows,
Confidence = maxVal, Confidence = maxVal,
}; };
} }

View file

@ -103,6 +103,7 @@ public class MapRequirementsConverter : IValueConverter
MapType.TrialOfChaos => "Trial Token x1", MapType.TrialOfChaos => "Trial Token x1",
MapType.Temple => "Identity Scroll x20", MapType.Temple => "Identity Scroll x20",
MapType.Endgame => "Identity Scroll x20", MapType.Endgame => "Identity Scroll x20",
MapType.Kulemak => "Invitation x1",
_ => "", _ => "",
}; };
} }

View file

@ -181,13 +181,16 @@ public sealed class D2dOverlay
private OverlayState BuildState(double fps, RenderTiming timing) private OverlayState BuildState(double fps, RenderTiming timing)
{ {
var detection = _bot.EnemyDetector.Latest; var detection = _bot.EnemyDetector.Latest;
var bossDetection = _bot.BossDetector.Latest;
return new OverlayState( return new OverlayState(
Enemies: detection.Enemies, Enemies: detection.Enemies,
Bosses: bossDetection.Bosses,
InferenceMs: detection.InferenceMs, InferenceMs: detection.InferenceMs,
Hud: _bot.HudReader.Current, Hud: _bot.HudReader.Current,
NavState: _bot.Navigation.State, NavState: _bot.Navigation.State,
NavPosition: _bot.Navigation.Position, NavPosition: _bot.Navigation.Position,
IsExploring: _bot.Navigation.IsExploring, IsExploring: _bot.Navigation.IsExploring,
ShowHudDebug: _bot.Store.Settings.ShowHudDebug,
Fps: fps, Fps: fps,
Timing: timing); Timing: timing);
} }

View file

@ -24,11 +24,13 @@ public sealed class D2dRenderContext : IDisposable
// Pre-created brushes // Pre-created brushes
public ID2D1SolidColorBrush Red { get; private set; } = null!; public ID2D1SolidColorBrush Red { get; private set; } = null!;
public ID2D1SolidColorBrush Yellow { get; private set; } = null!; public ID2D1SolidColorBrush Yellow { get; private set; } = null!;
public ID2D1SolidColorBrush Cyan { get; private set; } = null!;
public ID2D1SolidColorBrush Green { get; private set; } = null!; public ID2D1SolidColorBrush Green { get; private set; } = null!;
public ID2D1SolidColorBrush White { get; private set; } = null!; public ID2D1SolidColorBrush White { get; private set; } = null!;
public ID2D1SolidColorBrush Gray { get; private set; } = null!; public ID2D1SolidColorBrush Gray { get; private set; } = null!;
public ID2D1SolidColorBrush LifeBrush { get; private set; } = null!; public ID2D1SolidColorBrush LifeBrush { get; private set; } = null!;
public ID2D1SolidColorBrush ManaBrush { get; private set; } = null!; public ID2D1SolidColorBrush ManaBrush { get; private set; } = null!;
public ID2D1SolidColorBrush ShieldBrush { get; private set; } = null!;
public ID2D1SolidColorBrush BarBgBrush { get; private set; } = null!; public ID2D1SolidColorBrush BarBgBrush { get; private set; } = null!;
public ID2D1SolidColorBrush LabelBgBrush { get; private set; } = null!; public ID2D1SolidColorBrush LabelBgBrush { get; private set; } = null!;
public ID2D1SolidColorBrush DebugTextBrush { get; private set; } = null!; public ID2D1SolidColorBrush DebugTextBrush { get; private set; } = null!;
@ -79,11 +81,13 @@ public sealed class D2dRenderContext : IDisposable
{ {
Red = RenderTarget.CreateSolidColorBrush(new Color4(1f, 0f, 0f, 1f)); Red = RenderTarget.CreateSolidColorBrush(new Color4(1f, 0f, 0f, 1f));
Yellow = RenderTarget.CreateSolidColorBrush(new Color4(1f, 1f, 0f, 1f)); Yellow = RenderTarget.CreateSolidColorBrush(new Color4(1f, 1f, 0f, 1f));
Cyan = RenderTarget.CreateSolidColorBrush(new Color4(0f, 1f, 1f, 1f));
Green = RenderTarget.CreateSolidColorBrush(new Color4(0.31f, 1f, 0.31f, 1f)); // 80,255,80 Green = RenderTarget.CreateSolidColorBrush(new Color4(0.31f, 1f, 0.31f, 1f)); // 80,255,80
White = RenderTarget.CreateSolidColorBrush(new Color4(1f, 1f, 1f, 1f)); White = RenderTarget.CreateSolidColorBrush(new Color4(1f, 1f, 1f, 1f));
Gray = RenderTarget.CreateSolidColorBrush(new Color4(0.5f, 0.5f, 0.5f, 1f)); Gray = RenderTarget.CreateSolidColorBrush(new Color4(0.5f, 0.5f, 0.5f, 1f));
LifeBrush = RenderTarget.CreateSolidColorBrush(new Color4(200 / 255f, 40 / 255f, 40 / 255f, 1f)); LifeBrush = RenderTarget.CreateSolidColorBrush(new Color4(200 / 255f, 40 / 255f, 40 / 255f, 1f));
ManaBrush = RenderTarget.CreateSolidColorBrush(new Color4(40 / 255f, 80 / 255f, 200 / 255f, 1f)); ManaBrush = RenderTarget.CreateSolidColorBrush(new Color4(40 / 255f, 80 / 255f, 200 / 255f, 1f));
ShieldBrush = RenderTarget.CreateSolidColorBrush(new Color4(100 / 255f, 180 / 255f, 220 / 255f, 1f));
BarBgBrush = RenderTarget.CreateSolidColorBrush(new Color4(20 / 255f, 20 / 255f, 20 / 255f, 140 / 255f)); BarBgBrush = RenderTarget.CreateSolidColorBrush(new Color4(20 / 255f, 20 / 255f, 20 / 255f, 140 / 255f));
LabelBgBrush = RenderTarget.CreateSolidColorBrush(new Color4(0f, 0f, 0f, 160 / 255f)); LabelBgBrush = RenderTarget.CreateSolidColorBrush(new Color4(0f, 0f, 0f, 160 / 255f));
DebugTextBrush = RenderTarget.CreateSolidColorBrush(new Color4(80 / 255f, 1f, 80 / 255f, 1f)); DebugTextBrush = RenderTarget.CreateSolidColorBrush(new Color4(80 / 255f, 1f, 80 / 255f, 1f));
@ -95,11 +99,13 @@ public sealed class D2dRenderContext : IDisposable
{ {
Red?.Dispose(); Red?.Dispose();
Yellow?.Dispose(); Yellow?.Dispose();
Cyan?.Dispose();
Green?.Dispose(); Green?.Dispose();
White?.Dispose(); White?.Dispose();
Gray?.Dispose(); Gray?.Dispose();
LifeBrush?.Dispose(); LifeBrush?.Dispose();
ManaBrush?.Dispose(); ManaBrush?.Dispose();
ShieldBrush?.Dispose();
BarBgBrush?.Dispose(); BarBgBrush?.Dispose();
LabelBgBrush?.Dispose(); LabelBgBrush?.Dispose();
DebugTextBrush?.Dispose(); DebugTextBrush?.Dispose();

View file

@ -5,11 +5,13 @@ namespace Poe2Trade.Ui.Overlay;
public record OverlayState( public record OverlayState(
IReadOnlyList<DetectedEnemy> Enemies, IReadOnlyList<DetectedEnemy> Enemies,
IReadOnlyList<DetectedBoss> Bosses,
float InferenceMs, float InferenceMs,
HudSnapshot? Hud, HudSnapshot? Hud,
NavigationState NavState, NavigationState NavState,
MapPosition NavPosition, MapPosition NavPosition,
bool IsExploring, bool IsExploring,
bool ShowHudDebug,
double Fps, double Fps,
RenderTiming? Timing); RenderTiming? Timing);

View file

@ -27,7 +27,7 @@ internal sealed class D2dDebugTextLayer : ID2dOverlayLayer, IDisposable
UpdateCache(ctx, _left, ref lc, $"Pos: ({state.NavPosition.X:F0}, {state.NavPosition.Y:F0})", ctx.DebugTextBrush); UpdateCache(ctx, _left, ref lc, $"Pos: ({state.NavPosition.X:F0}, {state.NavPosition.Y:F0})", ctx.DebugTextBrush);
UpdateCache(ctx, _left, ref lc, $"Enemies: {state.Enemies.Count} YOLO: {state.InferenceMs:F1}ms", ctx.DebugTextBrush); UpdateCache(ctx, _left, ref lc, $"Enemies: {state.Enemies.Count} YOLO: {state.InferenceMs:F1}ms", ctx.DebugTextBrush);
if (state.Hud is { Timestamp: > 0 } hud) if (state.Hud is { Timestamp: > 0 } hud)
UpdateCache(ctx, _left, ref lc, $"HP: {hud.LifePct:P0} MP: {hud.ManaPct:P0}", ctx.DebugTextBrush); UpdateCache(ctx, _left, ref lc, $"HP: {hud.LifePct:P0} ES: {hud.ShieldPct:P0} MP: {hud.ManaPct:P0}", ctx.DebugTextBrush);
// Right column: timing // Right column: timing
if (state.Timing != null) if (state.Timing != null)

View file

@ -11,8 +11,13 @@ internal sealed class D2dEnemyBoxLayer : ID2dOverlayLayer, IDisposable
private readonly IDWriteTextLayout[] _confirmedLabels = new IDWriteTextLayout[101]; private readonly IDWriteTextLayout[] _confirmedLabels = new IDWriteTextLayout[101];
private readonly IDWriteTextLayout[] _unconfirmedLabels = new IDWriteTextLayout[101]; private readonly IDWriteTextLayout[] _unconfirmedLabels = new IDWriteTextLayout[101];
// Boss labels: cached by "classname NN%" string
private readonly Dictionary<string, IDWriteTextLayout> _bossLabels = new();
private readonly D2dRenderContext _ctx;
public D2dEnemyBoxLayer(D2dRenderContext ctx) public D2dEnemyBoxLayer(D2dRenderContext ctx)
{ {
_ctx = ctx;
for (int i = 0; i <= 100; i++) for (int i = 0; i <= 100; i++)
{ {
var text = $"{i}%"; var text = $"{i}%";
@ -41,18 +46,43 @@ internal sealed class D2dEnemyBoxLayer : ID2dOverlayLayer, IDisposable
var labelX = enemy.X; var labelX = enemy.X;
var labelY = enemy.Y - m.Height - 2; var labelY = enemy.Y - m.Height - 2;
// Background behind label
rt.FillRectangle( rt.FillRectangle(
new RectangleF(labelX - 1, labelY - 1, m.Width + 2, m.Height + 2), new RectangleF(labelX - 1, labelY - 1, m.Width + 2, m.Height + 2),
ctx.LabelBgBrush); ctx.LabelBgBrush);
rt.DrawTextLayout(new System.Numerics.Vector2(labelX, labelY), layout, textBrush); rt.DrawTextLayout(new System.Numerics.Vector2(labelX, labelY), layout, textBrush);
} }
// Boss bounding boxes (cyan)
foreach (var boss in state.Bosses)
{
var rect = new RectangleF(boss.X, boss.Y, boss.Width, boss.Height);
rt.DrawRectangle(rect, ctx.Cyan, 3f);
var pct = Math.Clamp((int)(boss.Confidence * 100), 0, 100);
var key = $"{boss.ClassName} {pct}%";
if (!_bossLabels.TryGetValue(key, out var layout))
{
layout = _ctx.CreateTextLayout(key, _ctx.LabelFormat);
_bossLabels[key] = layout;
}
var m = layout.Metrics;
var labelX = boss.X;
var labelY = boss.Y - m.Height - 2;
rt.FillRectangle(
new RectangleF(labelX - 1, labelY - 1, m.Width + 2, m.Height + 2),
ctx.LabelBgBrush);
rt.DrawTextLayout(new System.Numerics.Vector2(labelX, labelY), layout, ctx.Cyan);
}
} }
public void Dispose() public void Dispose()
{ {
foreach (var l in _confirmedLabels) l?.Dispose(); foreach (var l in _confirmedLabels) l?.Dispose();
foreach (var l in _unconfirmedLabels) l?.Dispose(); foreach (var l in _unconfirmedLabels) l?.Dispose();
foreach (var l in _bossLabels.Values) l?.Dispose();
} }
} }

View file

@ -7,15 +7,20 @@ namespace Poe2Trade.Ui.Overlay.Layers;
internal sealed class D2dHudInfoLayer : ID2dOverlayLayer, IDisposable internal sealed class D2dHudInfoLayer : ID2dOverlayLayer, IDisposable
{ {
private const float BarWidth = 200; private const float BarWidth = 160;
private const float BarHeight = 16; private const float BarHeight = 16;
private const float BarY = 1300; private const float BarGap = 8;
private const float LifeBarX = 1130; private const float BarY = 1416; // near bottom of 1440
private const float ManaBarX = 1230;
// 3 bars centered: total = 160*3 + 8*2 = 496, start = (2560-496)/2 = 1032
private const float LifeBarX = 1032;
private const float ShieldBarX = LifeBarX + BarWidth + BarGap;
private const float ManaBarX = ShieldBarX + BarWidth + BarGap;
// Cached bar value layouts
private string? _lifeLabel; private string? _lifeLabel;
private IDWriteTextLayout? _lifeLayout; private IDWriteTextLayout? _lifeLayout;
private string? _shieldLabel;
private IDWriteTextLayout? _shieldLayout;
private string? _manaLabel; private string? _manaLabel;
private IDWriteTextLayout? _manaLayout; private IDWriteTextLayout? _manaLayout;
@ -23,14 +28,24 @@ internal sealed class D2dHudInfoLayer : ID2dOverlayLayer, IDisposable
{ {
if (state.Hud == null || state.Hud.Timestamp == 0) return; if (state.Hud == null || state.Hud.Timestamp == 0) return;
DrawBar(ctx, LifeBarX, BarY, state.Hud.LifePct, ctx.LifeBrush, state.Hud.Life, DrawBar(ctx, LifeBarX, BarY, state.Hud.LifePct, ctx.LifeBrush,
ref _lifeLabel, ref _lifeLayout); ref _lifeLabel, ref _lifeLayout);
DrawBar(ctx, ManaBarX, BarY, state.Hud.ManaPct, ctx.ManaBrush, state.Hud.Mana, DrawBar(ctx, ShieldBarX, BarY, state.Hud.ShieldPct, ctx.ShieldBrush,
ref _shieldLabel, ref _shieldLayout);
DrawBar(ctx, ManaBarX, BarY, state.Hud.ManaPct, ctx.ManaBrush,
ref _manaLabel, ref _manaLayout); ref _manaLabel, ref _manaLayout);
// DEBUG: draw sampling lines
if (state.ShowHudDebug)
{
DrawShieldArc(ctx);
DrawSampleLine(ctx, 167, 1185, 1411, ctx.LifeBrush); // life
DrawSampleLine(ctx, 2394, 1185, 1411, ctx.ManaBrush); // mana
}
} }
private static void DrawBar(D2dRenderContext ctx, float x, float y, float pct, private static void DrawBar(D2dRenderContext ctx, float x, float y, float pct,
ID2D1SolidColorBrush fillBrush, Screen.HudValues? values, ID2D1SolidColorBrush fillBrush,
ref string? cachedLabel, ref IDWriteTextLayout? cachedLayout) ref string? cachedLabel, ref IDWriteTextLayout? cachedLayout)
{ {
var rt = ctx.RenderTarget; var rt = ctx.RenderTarget;
@ -42,31 +57,57 @@ internal sealed class D2dHudInfoLayer : ID2dOverlayLayer, IDisposable
rt.DrawRectangle(outer, ctx.Gray, 1f); rt.DrawRectangle(outer, ctx.Gray, 1f);
// Fill // Fill
var fillWidth = BarWidth * Math.Clamp(pct, 0, 1); var clamped = Math.Clamp(pct, 0, 1);
var fillWidth = BarWidth * clamped;
if (fillWidth > 0) if (fillWidth > 0)
rt.FillRectangle(new RectangleF(x, y, fillWidth, BarHeight), fillBrush); rt.FillRectangle(new RectangleF(x, y, fillWidth, BarHeight), fillBrush);
// Value text // Percentage text
if (values != null) var label = $"{clamped:P0}";
if (label != cachedLabel)
{ {
var label = $"{values.Current}/{values.Max}"; cachedLayout?.Dispose();
if (label != cachedLabel) cachedLabel = label;
{ cachedLayout = ctx.CreateTextLayout(label, ctx.BarValueFormat);
cachedLayout?.Dispose();
cachedLabel = label;
cachedLayout = ctx.CreateTextLayout(label, ctx.BarValueFormat);
}
var m = cachedLayout!.Metrics;
var textX = x + (BarWidth - m.Width) / 2;
var textY = y + (BarHeight - m.Height) / 2;
rt.DrawTextLayout(new System.Numerics.Vector2(textX, textY), cachedLayout, ctx.White);
} }
var m = cachedLayout!.Metrics;
var textX = x + (BarWidth - m.Width) / 2;
var textY = y + (BarHeight - m.Height) / 2;
rt.DrawTextLayout(new System.Numerics.Vector2(textX, textY), cachedLayout, ctx.White);
}
private static void DrawShieldArc(D2dRenderContext ctx)
{
const float cx = 170, cy = 1298, r = 130;
var rt = ctx.RenderTarget;
// Draw dots along the right semicircle (-90° to +90°)
for (int deg = -90; deg <= 90; deg += 2)
{
var rad = deg * Math.PI / 180.0;
var x = (float)(cx + r * Math.Cos(rad));
var y = (float)(cy + r * Math.Sin(rad));
rt.FillRectangle(new RectangleF(x - 1, y - 1, 3, 3), ctx.Yellow);
}
// Draw center cross
rt.FillRectangle(new RectangleF(cx - 3, cy - 1, 7, 3), ctx.Yellow);
rt.FillRectangle(new RectangleF(cx - 1, cy - 3, 3, 7), ctx.Yellow);
}
private static void DrawSampleLine(D2dRenderContext ctx, float x, float yTop, float yBot, ID2D1SolidColorBrush brush)
{
ctx.RenderTarget.DrawLine(
new System.Numerics.Vector2(x, yTop),
new System.Numerics.Vector2(x, yBot),
brush, 2f);
} }
public void Dispose() public void Dispose()
{ {
_lifeLayout?.Dispose(); _lifeLayout?.Dispose();
_shieldLayout?.Dispose();
_manaLayout?.Dispose(); _manaLayout?.Dispose();
} }
} }

View file

@ -12,6 +12,11 @@ public partial class DebugViewModel : ObservableObject
[ObservableProperty] private string _findText = ""; [ObservableProperty] private string _findText = "";
[ObservableProperty] private string _debugResult = ""; [ObservableProperty] private string _debugResult = "";
[ObservableProperty]
[NotifyPropertyChangedFor(nameof(BurstCaptureLabel))]
private bool _isBurstCapturing;
public string BurstCaptureLabel => IsBurstCapturing ? "Stop Capture" : "Burst Capture";
[ObservableProperty] private string _selectedGridLayout = "inventory"; [ObservableProperty] private string _selectedGridLayout = "inventory";
[ObservableProperty] private decimal? _clickX; [ObservableProperty] private decimal? _clickX;
[ObservableProperty] private decimal? _clickY; [ObservableProperty] private decimal? _clickY;
@ -148,6 +153,15 @@ public partial class DebugViewModel : ObservableObject
} }
} }
[RelayCommand]
private void DetectionStatus()
{
var enemy = _bot.EnemyDetector.Latest;
var boss = _bot.BossDetector.Latest;
DebugResult = $"Enemy: enabled={_bot.EnemyDetector.Enabled}, count={enemy.Enemies.Count}, ms={enemy.InferenceMs:F1}\n" +
$"Boss: enabled={_bot.BossDetector.Enabled}, count={boss.Bosses.Count}, ms={boss.InferenceMs:F1}";
}
[RelayCommand] [RelayCommand]
private void SaveMinimapDebug() private void SaveMinimapDebug()
{ {
@ -187,6 +201,116 @@ public partial class DebugViewModel : ObservableObject
catch (Exception ex) { DebugResult = $"Failed: {ex.Message}"; } catch (Exception ex) { DebugResult = $"Failed: {ex.Message}"; }
} }
[RelayCommand]
private async Task AttackTest()
{
const int VK_Q = 0x51;
const int DurationMs = 30_000;
const int PollMs = 100;
const float ManaLow = 0.50f;
const float ManaResume = 0.75f;
const float ManaQThreshold = 0.60f;
const int QPhaseStableMs = 2_000;
const int QCooldownMs = 5_000;
var rng = new Random();
try
{
DebugResult = "Attack test: focusing game...";
await _bot.Game.FocusGame();
await _bot.Game.MoveMouseTo(1280, 720);
await Task.Delay(300);
var holding = true;
_bot.Game.LeftMouseDown();
_bot.Game.RightMouseDown();
var sw = System.Diagnostics.Stopwatch.StartNew();
var manaStableStart = (long?)null;
var qPhase = false;
long lastQTime = -QCooldownMs;
while (sw.ElapsedMilliseconds < DurationMs)
{
var mana = _bot.HudReader.Current.ManaPct;
var elapsed = sw.ElapsedMilliseconds;
// Mana management
if (holding && mana < ManaLow)
{
_bot.Game.LeftMouseUp();
_bot.Game.RightMouseUp();
holding = false;
DebugResult = $"Attack test: mana low ({mana:P0}), waiting...";
await Task.Delay(50 + rng.Next(100));
}
else if (!holding && mana >= ManaResume)
{
await Task.Delay(50 + rng.Next(100));
_bot.Game.LeftMouseDown();
_bot.Game.RightMouseDown();
holding = true;
DebugResult = $"Attack test: mana recovered ({mana:P0}), attacking...";
}
// Track Q phase activation
if (!qPhase)
{
if (mana > ManaQThreshold)
{
manaStableStart ??= elapsed;
if (elapsed - manaStableStart.Value >= QPhaseStableMs)
{
qPhase = true;
DebugResult = "Attack test: Q phase activated";
}
}
else
{
manaStableStart = null;
}
}
// Press Q+E periodically
if (qPhase && holding && elapsed - lastQTime >= QCooldownMs)
{
await _bot.Game.PressKey(VK_Q);
await Task.Delay(100 + rng.Next(100));
_bot.Game.LeftMouseUp();
_bot.Game.RightMouseUp();
await Task.Delay(200 + rng.Next(100));
_bot.Game.LeftMouseDown();
_bot.Game.RightMouseDown();
lastQTime = elapsed;
}
await Task.Delay(PollMs + rng.Next(100));
}
DebugResult = "Attack test: completed (30s)";
}
catch (Exception ex)
{
DebugResult = $"Attack test failed: {ex.Message}";
Log.Error(ex, "Attack test failed");
}
finally
{
_bot.Game.LeftMouseUp();
_bot.Game.RightMouseUp();
}
}
[RelayCommand]
private void ToggleBurstCapture()
{
IsBurstCapturing = !IsBurstCapturing;
_bot.FrameSaver.BurstMode = IsBurstCapturing;
DebugResult = IsBurstCapturing
? "Burst capture ON — saving every 200ms to training-data/raw/"
: $"Burst capture OFF — {_bot.FrameSaver.SavedCount} frames saved";
}
[RelayCommand] [RelayCommand]
private async Task ClickSalvage() private async Task ClickSalvage()
{ {

View file

@ -194,11 +194,12 @@ public partial class MainWindowViewModel : ObservableObject
{ {
Log.Information("END pressed — emergency stop"); Log.Information("END pressed — emergency stop");
await _bot.Navigation.Stop(); await _bot.Navigation.Stop();
_bot.BossRunExecutor.Stop();
_bot.Pause(); _bot.Pause();
Avalonia.Threading.Dispatcher.UIThread.Post(() => Avalonia.Threading.Dispatcher.UIThread.Post(() =>
{ {
IsPaused = true; IsPaused = true;
State = "Stopped (F12)"; State = "Stopped (END)";
}); });
} }
f12WasDown = endDown; f12WasDown = endDown;

View file

@ -1,3 +1,4 @@
using System.Collections.ObjectModel;
using Timer = System.Timers.Timer; using Timer = System.Timers.Timer;
using Avalonia.Threading; using Avalonia.Threading;
using CommunityToolkit.Mvvm.ComponentModel; using CommunityToolkit.Mvvm.ComponentModel;
@ -19,16 +20,33 @@ public partial class MappingViewModel : ObservableObject, IDisposable
[ObservableProperty] private int _enemiesDetected; [ObservableProperty] private int _enemiesDetected;
[ObservableProperty] private float _inferenceMs; [ObservableProperty] private float _inferenceMs;
[ObservableProperty] private bool _hasModel; [ObservableProperty] private bool _hasModel;
[ObservableProperty] private bool _isKulemak;
[ObservableProperty] private bool _kulemakEnabled;
[ObservableProperty] private string _invitationTabPath = "";
[ObservableProperty] private string _lootTabPath = "";
[ObservableProperty] private decimal? _invitationCount = 15;
public static MapType[] MapTypes { get; } = [MapType.TrialOfChaos, MapType.Temple, MapType.Endgame]; public static MapType[] MapTypes { get; } = [MapType.TrialOfChaos, MapType.Temple, MapType.Endgame, MapType.Kulemak];
public ObservableCollection<string> StashTabPaths { get; } = [];
private static readonly string ModelPath = Path.GetFullPath("tools/python-detect/models/enemy-v1.pt"); private static readonly string ModelsDir = Path.GetFullPath("tools/python-detect/models");
private static bool AnyModelExists() =>
Directory.Exists(ModelsDir) && Directory.GetFiles(ModelsDir, "*.pt").Length > 0;
public MappingViewModel(BotOrchestrator bot) public MappingViewModel(BotOrchestrator bot)
{ {
_bot = bot; _bot = bot;
_selectedMapType = bot.Config.MapType; _selectedMapType = bot.Config.MapType;
_hasModel = File.Exists(ModelPath); _isKulemak = _selectedMapType == MapType.Kulemak;
_hasModel = AnyModelExists();
// Load Kulemak settings
_kulemakEnabled = bot.Config.Kulemak.Enabled;
_invitationTabPath = bot.Config.Kulemak.InvitationTabPath;
_lootTabPath = bot.Config.Kulemak.LootTabPath;
_invitationCount = bot.Config.Kulemak.InvitationCount;
LoadStashTabPaths();
_bot.EnemyDetector.DetectionUpdated += OnDetectionUpdated; _bot.EnemyDetector.DetectionUpdated += OnDetectionUpdated;
@ -40,6 +58,47 @@ public partial class MappingViewModel : ObservableObject, IDisposable
partial void OnSelectedMapTypeChanged(MapType value) partial void OnSelectedMapTypeChanged(MapType value)
{ {
_bot.Store.UpdateSettings(s => s.MapType = value); _bot.Store.UpdateSettings(s => s.MapType = value);
IsKulemak = value == MapType.Kulemak;
}
partial void OnKulemakEnabledChanged(bool value)
{
_bot.Store.UpdateSettings(s => s.Kulemak.Enabled = value);
}
partial void OnInvitationTabPathChanged(string value)
{
_bot.Store.UpdateSettings(s => s.Kulemak.InvitationTabPath = value);
}
partial void OnLootTabPathChanged(string value)
{
_bot.Store.UpdateSettings(s => s.Kulemak.LootTabPath = value);
}
partial void OnInvitationCountChanged(decimal? value)
{
_bot.Store.UpdateSettings(s => s.Kulemak.InvitationCount = (int)(value ?? 15));
}
private void LoadStashTabPaths()
{
StashTabPaths.Clear();
StashTabPaths.Add(""); // empty = not configured
var s = _bot.Store.Settings;
if (s.StashCalibration == null) return;
foreach (var tab in s.StashCalibration.Tabs)
{
if (tab.IsFolder)
{
foreach (var sub in tab.SubTabs)
StashTabPaths.Add($"{tab.Name}/{sub.Name}");
}
else
{
StashTabPaths.Add(tab.Name);
}
}
} }
partial void OnIsFrameSaverEnabledChanged(bool value) partial void OnIsFrameSaverEnabledChanged(bool value)
@ -50,6 +109,7 @@ public partial class MappingViewModel : ObservableObject, IDisposable
partial void OnIsDetectionEnabledChanged(bool value) partial void OnIsDetectionEnabledChanged(bool value)
{ {
_bot.EnemyDetector.Enabled = value; _bot.EnemyDetector.Enabled = value;
_bot.BossDetector.Enabled = value;
} }
private void OnDetectionUpdated(DetectionSnapshot snapshot) private void OnDetectionUpdated(DetectionSnapshot snapshot)
@ -64,7 +124,7 @@ public partial class MappingViewModel : ObservableObject, IDisposable
private void RefreshStats() private void RefreshStats()
{ {
FramesSaved = _bot.FrameSaver.SavedCount; FramesSaved = _bot.FrameSaver.SavedCount;
HasModel = File.Exists(ModelPath); HasModel = AnyModelExists();
} }
public void Dispose() public void Dispose()

View file

@ -19,6 +19,7 @@ public partial class SettingsViewModel : ObservableObject
[ObservableProperty] private decimal? _waitForMoreItemsMs = 20000; [ObservableProperty] private decimal? _waitForMoreItemsMs = 20000;
[ObservableProperty] private decimal? _betweenTradesDelayMs = 5000; [ObservableProperty] private decimal? _betweenTradesDelayMs = 5000;
[ObservableProperty] private bool _headless = true; [ObservableProperty] private bool _headless = true;
[ObservableProperty] private bool _showHudDebug;
[ObservableProperty] private bool _isSaved; [ObservableProperty] private bool _isSaved;
[ObservableProperty] private string _calibrationStatus = ""; [ObservableProperty] private string _calibrationStatus = "";
[ObservableProperty] private string _stashCalibratedAt = ""; [ObservableProperty] private string _stashCalibratedAt = "";
@ -44,6 +45,7 @@ public partial class SettingsViewModel : ObservableObject
WaitForMoreItemsMs = s.WaitForMoreItemsMs; WaitForMoreItemsMs = s.WaitForMoreItemsMs;
BetweenTradesDelayMs = s.BetweenTradesDelayMs; BetweenTradesDelayMs = s.BetweenTradesDelayMs;
Headless = s.Headless; Headless = s.Headless;
ShowHudDebug = s.ShowHudDebug;
} }
private void LoadTabs() private void LoadTabs()
@ -94,6 +96,7 @@ public partial class SettingsViewModel : ObservableObject
s.WaitForMoreItemsMs = (int)(WaitForMoreItemsMs ?? 20000); s.WaitForMoreItemsMs = (int)(WaitForMoreItemsMs ?? 20000);
s.BetweenTradesDelayMs = (int)(BetweenTradesDelayMs ?? 5000); s.BetweenTradesDelayMs = (int)(BetweenTradesDelayMs ?? 5000);
s.Headless = Headless; s.Headless = Headless;
s.ShowHudDebug = ShowHudDebug;
}); });
IsSaved = true; IsSaved = true;
@ -206,4 +209,5 @@ public partial class SettingsViewModel : ObservableObject
partial void OnWaitForMoreItemsMsChanged(decimal? value) => IsSaved = false; partial void OnWaitForMoreItemsMsChanged(decimal? value) => IsSaved = false;
partial void OnBetweenTradesDelayMsChanged(decimal? value) => IsSaved = false; partial void OnBetweenTradesDelayMsChanged(decimal? value) => IsSaved = false;
partial void OnHeadlessChanged(bool value) => IsSaved = false; partial void OnHeadlessChanged(bool value) => IsSaved = false;
partial void OnShowHudDebugChanged(bool value) => IsSaved = false;
} }

View file

@ -233,6 +233,36 @@
</StackPanel> </StackPanel>
</Border> </Border>
<!-- Kulemak Settings (visible when Kulemak selected) -->
<Border IsVisible="{Binding IsKulemak}" Background="#161b22"
BorderBrush="#30363d" BorderThickness="1"
CornerRadius="8" Padding="10">
<StackPanel Spacing="8">
<TextBlock Text="KULEMAK" FontSize="11" FontWeight="SemiBold"
Foreground="#8b949e" />
<CheckBox IsChecked="{Binding KulemakEnabled}" Content="Enabled"
Foreground="#e6edf3" />
<DockPanel>
<TextBlock Text="Invitation Tab" FontSize="11" Foreground="#8b949e"
Width="140" VerticalAlignment="Center" />
<ComboBox ItemsSource="{Binding StashTabPaths}"
SelectedItem="{Binding InvitationTabPath}" />
</DockPanel>
<DockPanel>
<TextBlock Text="Loot Tab" FontSize="11" Foreground="#8b949e"
Width="140" VerticalAlignment="Center" />
<ComboBox ItemsSource="{Binding StashTabPaths}"
SelectedItem="{Binding LootTabPath}" />
</DockPanel>
<DockPanel>
<TextBlock Text="Invitations per batch" FontSize="11" Foreground="#8b949e"
Width="140" VerticalAlignment="Center" />
<NumericUpDown Value="{Binding InvitationCount}"
Minimum="1" Maximum="60" Increment="1" Width="100" />
</DockPanel>
</StackPanel>
</Border>
<!-- Training Data --> <!-- Training Data -->
<Border Background="#161b22" BorderBrush="#30363d" BorderThickness="1" <Border Background="#161b22" BorderBrush="#30363d" BorderThickness="1"
CornerRadius="8" Padding="10"> CornerRadius="8" Padding="10">
@ -298,6 +328,10 @@
<Button Content="ANGE" Command="{Binding ClickAngeCommand}" /> <Button Content="ANGE" Command="{Binding ClickAngeCommand}" />
<Button Content="STASH" Command="{Binding ClickStashCommand}" /> <Button Content="STASH" Command="{Binding ClickStashCommand}" />
<Button Content="SALVAGE" Command="{Binding ClickSalvageCommand}" /> <Button Content="SALVAGE" Command="{Binding ClickSalvageCommand}" />
<Button Content="Attack Test" Command="{Binding AttackTestCommand}" />
<Button Content="Detection?" Command="{Binding DetectionStatusCommand}" />
<Button Content="{Binding BurstCaptureLabel}"
Command="{Binding ToggleBurstCaptureCommand}" />
</StackPanel> </StackPanel>
</StackPanel> </StackPanel>
</Border> </Border>
@ -408,6 +442,8 @@
<CheckBox IsChecked="{Binding Headless}" Content="Headless browser" <CheckBox IsChecked="{Binding Headless}" Content="Headless browser"
Foreground="#e6edf3" Margin="0,4,0,0" /> Foreground="#e6edf3" Margin="0,4,0,0" />
<CheckBox IsChecked="{Binding ShowHudDebug}" Content="Show HUD debug overlay"
Foreground="#e6edf3" />
<StackPanel Orientation="Horizontal" Spacing="8" Margin="0,2,0,0"> <StackPanel Orientation="Horizontal" Spacing="8" Margin="0,2,0,0">
<Button Content="Save Settings" Command="{Binding SaveSettingsCommand}" /> <Button Content="Save Settings" Command="{Binding SaveSettingsCommand}" />
@ -564,6 +600,7 @@
</ItemsControl> </ItemsControl>
</StackPanel> </StackPanel>
</Border> </Border>
</StackPanel> </StackPanel>
</ScrollViewer> </ScrollViewer>
</TabItem> </TabItem>

View file

@ -0,0 +1,123 @@
# YOLO Detection Pipeline
Capture frames → annotate → train → detect live in-game.
## Setup
```bash
cd tools/python-detect
python -m venv .venv
.venv/Scripts/activate # Windows
pip install ultralytics opencv-python
# GPU training (NVIDIA):
pip install torch torchvision --index-url https://download.pytorch.org/whl/cu128 --force-reinstall
```
## 1. Capture Frames
In the app Debug tab, click **Burst Capture** during a boss fight. Frames save to `training-data/raw/` every 200ms. Click **Stop Capture** when done.
## 2. Annotate
```bash
python annotate.py # defaults to ../../training-data/raw
python annotate.py ../../training-data/raw
tools\python-detect\.venv\Scripts\python.exe tools\python-detect\annotate.py training-data\raw
```
### Toolbar
Clickable buttons at the top of the window:
| Button | Hotkey | Action |
|---|---|---|
| Predict | **P** | Run YOLO model on current image to auto-generate boxes |
| Save+Next | **Space** | Save labels and advance to next image |
| Filter | **F** | Cycle filter: All → Unlabeled → Labeled → per-class |
| Undo | **Z** | Undo last change |
| Del Image | **X** | Delete current image file and its label |
### Controls
| Action | Input |
|---|---|
| Draw new box | Left-drag on empty area |
| Select box | Left-click on box |
| Move box | Left-drag box body |
| Resize box | Left-drag corner handle |
| Cycle class | Right-click on box |
| Set class | 1-9 keys (selected box or new-box default) |
| Delete box | Delete / Backspace |
| Navigate | Left / Right arrow |
| Deselect | E |
| Quit | Q / Escape (auto-saves) |
### Predict (P)
Press **P** or click the Predict button to run the current YOLO model (`models/boss-v1.pt`) on the image. It replaces existing boxes with model predictions (undoable with Z). The model lazy-loads on first use.
Workflow: press P to auto-predict → adjust/delete bad boxes → Space to save+next.
### Classes
Edit the `CLASSES` list at the top of `annotate.py` to add new boss types:
```python
CLASSES = ["kulemak", "arbiter"]
```
## 3. Split Dataset
Create a train/valid split (85/15) for training. Example structure:
```
training-data/boss-dataset/
├── data.yaml
├── train/
│ ├── images/
│ └── labels/
└── valid/
├── images/
└── labels/
```
`data.yaml`:
```yaml
train: C:/Users/boki/repos/poe2trade/training-data/boss-dataset/train/images
val: C:/Users/boki/repos/poe2trade/training-data/boss-dataset/valid/images
nc: 2
names: ['kulemak', 'arbiter']
```
Copy images and their `.txt` label files into the appropriate split directories.
## 4. Train
```bash
python train.py --data C:/Users/boki/repos/poe2trade/training-data/boss-dataset/data.yaml --name boss-v1 --epochs 100
```
Options: `--batch 16`, `--imgsz 640`, `--device 0` (GPU) or `--device cpu`.
Best weights are auto-copied to `models/boss-v1.pt`. If ultralytics auto-increments the run name (e.g. `boss-v12`), copy manually:
```bash
cp runs/detect/boss-v1*/weights/best.pt models/boss-v1.pt
```
## 5. Live Detection
The C# app loads `models/boss-v1.pt` via `daemon.py` (JSON stdin/stdout protocol). Enable detection in the Mapping tab. Boss bounding boxes render on the Direct2D overlay in cyan.
## Files
| File | Purpose |
|---|---|
| `annotate.py` | Interactive annotator with predict, select/move/resize, tag, filter |
| `train.py` | Train YOLOv11n, copies best weights to `models/` |
| `daemon.py` | Persistent inference daemon (managed by C# `PythonDetectBridge`) |
| `fix_labels.py` | One-time fix for labels generated with the old buggy annotator |
| `models/` | Trained `.pt` weight files |

View file

@ -0,0 +1,760 @@
"""
Bounding-box annotator with select / move / resize / tag / filter / predict.
Controls
--------
Left drag (empty area) : draw new box
Left click (on box) : select it
Left drag (box body) : move it
Left drag (corner) : resize it
Right-click (on box) : cycle class
1-9 : set class of selected box (or default new-box class)
Delete : remove selected box
Space / Enter : save + next image
Left / Right arrow : prev / next image
P : predict run YOLO model on current image
F : cycle filter (All > Unlabeled > Empty > Labeled > per-class)
Z : undo
X : delete image file + next
E : deselect
Q / Escape : quit (auto-saves current)
Toolbar buttons at the top are also clickable.
Aspect ratio is always preserved (letterboxed).
Saves YOLO-format .txt labels alongside images.
"""
import cv2
import numpy as np
import os
import sys
import glob
# ── Classes ──────────────────────────────────────────────────────
# Passed in by manage.py via run_annotator(img_dir, classes).
# Standalone fallback: single-class kulemak.
DEFAULT_CLASSES = ["kulemak"]
COLORS = [
(0, 255, 255), # cyan-yellow (kulemak)
(255, 0, 255), # magenta (arbiter)
(0, 255, 0), # green
(255, 128, 0), # orange
(128, 0, 255), # purple
]
HANDLE_R = 7 # corner handle radius (px)
MIN_BOX = 0.01 # min normalised box dimension
PREDICT_CONF = 0.20 # confidence threshold for auto-predict
# Layout
TOOLBAR_Y = 32 # top of toolbar row (below info line)
TOOLBAR_H = 30 # toolbar row height
IMG_TOP = TOOLBAR_Y + TOOLBAR_H + 4 # image area starts here
HELP_H = 22 # reserved at bottom for help text
# Windows arrow / special key codes from cv2.waitKeyEx
K_LEFT = 2424832
K_RIGHT = 2555904
K_DEL = 3014656
# ── Box dataclass ─────────────────────────────────────────────────
class Box:
__slots__ = ("cx", "cy", "w", "h", "cls_id")
def __init__(self, cx, cy, w, h, cls_id=0):
self.cx, self.cy, self.w, self.h, self.cls_id = cx, cy, w, h, cls_id
@property
def x1(self): return self.cx - self.w / 2
@property
def y1(self): return self.cy - self.h / 2
@property
def x2(self): return self.cx + self.w / 2
@property
def y2(self): return self.cy + self.h / 2
def set_corners(self, x1, y1, x2, y2):
self.cx = (x1 + x2) / 2
self.cy = (y1 + y2) / 2
self.w = abs(x2 - x1)
self.h = abs(y2 - y1)
def contains(self, nx, ny):
return self.x1 <= nx <= self.x2 and self.y1 <= ny <= self.y2
def corner_at(self, nx, ny, thr):
for hx, hy, tag in [
(self.x1, self.y1, "tl"), (self.x2, self.y1, "tr"),
(self.x1, self.y2, "bl"), (self.x2, self.y2, "br"),
]:
if abs(nx - hx) < thr and abs(ny - hy) < thr:
return tag
return None
def copy(self):
return Box(self.cx, self.cy, self.w, self.h, self.cls_id)
# ── Toolbar button ────────────────────────────────────────────────
class Button:
__slots__ = ("label", "action", "x1", "y1", "x2", "y2")
def __init__(self, label, action):
self.label = label
self.action = action
self.x1 = self.y1 = self.x2 = self.y2 = 0
def hit(self, wx, wy):
return self.x1 <= wx <= self.x2 and self.y1 <= wy <= self.y2
# ── Main tool ─────────────────────────────────────────────────────
class Annotator:
def __init__(self, img_dir, classes=None):
self.classes = classes or DEFAULT_CLASSES
self.img_dir = os.path.abspath(img_dir)
self.all_files = self._scan()
# filter
self.FILTERS = ["all", "unlabeled", "empty", "labeled"] + \
[f"class:{i}" for i in range(len(self.classes))]
self.filt_idx = 0
self.files = list(self.all_files)
self.pos = 0
# image state
self.img = None
self.iw = 0
self.ih = 0
self.boxes = []
self.sel = -1
self.cur_cls = 0
self.dirty = False
self.undo_stack = []
# drag state
self.mode = None
self.d_start = None
self.d_anchor = None
self.d_orig = None
self.mouse_n = None
# display
self.WIN = "Annotator"
self.ww = 1600
self.wh = 900
self._cache = None
self._cache_key = None
# toolbar buttons (laid out during _draw)
self.buttons = [
Button("[P] Predict", "predict"),
Button("[Space] Save+Next", "save_next"),
Button("[F] Filter", "filter"),
Button("[Z] Undo", "undo"),
Button("[X] Del Image", "del_img"),
]
# YOLO model (lazy-loaded)
self._model = None
self._model_tried = False
# stats
self.n_saved = 0
self.n_deleted = 0
# ── file scanning ─────────────────────────────────────────────
def _scan(self):
files = []
for ext in ("*.jpg", "*.jpeg", "*.png"):
files.extend(glob.glob(os.path.join(self.img_dir, ext)))
files.sort()
return files
@staticmethod
def _lbl(fp):
return os.path.splitext(fp)[0] + ".txt"
@staticmethod
def _is_empty_label(lp):
"""Label file exists but has no boxes (negative example)."""
if not os.path.exists(lp):
return False
with open(lp) as f:
return f.read().strip() == ""
@staticmethod
def _has_labels(lp):
"""Label file exists and contains at least one box."""
if not os.path.exists(lp):
return False
with open(lp) as f:
return f.read().strip() != ""
def _refilter(self):
mode = self.FILTERS[self.filt_idx]
if mode == "all":
self.files = [f for f in self.all_files if os.path.exists(f)]
elif mode == "unlabeled":
self.files = [f for f in self.all_files
if os.path.exists(f) and not os.path.exists(self._lbl(f))]
elif mode == "empty":
self.files = [f for f in self.all_files
if os.path.exists(f) and self._is_empty_label(self._lbl(f))]
elif mode == "labeled":
self.files = [f for f in self.all_files
if os.path.exists(f) and self._has_labels(self._lbl(f))]
elif mode.startswith("class:"):
cid = int(mode.split(":")[1])
self.files = []
for f in self.all_files:
if not os.path.exists(f):
continue
lp = self._lbl(f)
if os.path.exists(lp):
with open(lp) as fh:
if any(l.strip().startswith(f"{cid} ") for l in fh):
self.files.append(f)
self.pos = max(0, min(self.pos, len(self.files) - 1))
# ── I/O ───────────────────────────────────────────────────────
def _load(self):
if not self.files:
return False
self.img = cv2.imread(self.files[self.pos])
if self.img is None:
return False
self.ih, self.iw = self.img.shape[:2]
self._cache = None
self._load_boxes()
self.sel = -1
self.dirty = False
self.undo_stack.clear()
return True
def _load_boxes(self):
self.boxes = []
lp = self._lbl(self.files[self.pos])
if not os.path.exists(lp):
return
with open(lp) as f:
for line in f:
p = line.strip().split()
if len(p) >= 5:
self.boxes.append(
Box(float(p[1]), float(p[2]),
float(p[3]), float(p[4]), int(p[0])))
def _save(self):
if not self.files:
return
lp = self._lbl(self.files[self.pos])
with open(lp, "w") as f:
for b in self.boxes:
f.write(f"{b.cls_id} {b.cx:.6f} {b.cy:.6f} "
f"{b.w:.6f} {b.h:.6f}\n")
self.n_saved += 1
self.dirty = False
def _push_undo(self):
self.undo_stack.append([b.copy() for b in self.boxes])
if len(self.undo_stack) > 50:
self.undo_stack.pop(0)
def _pop_undo(self):
if not self.undo_stack:
return
self.boxes = self.undo_stack.pop()
self.sel = -1
self.dirty = True
# ── YOLO predict ──────────────────────────────────────────────
def _ensure_model(self):
if self._model_tried:
return self._model is not None
self._model_tried = True
model_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "models")
# Find the latest boss-v*.pt by version number, fallback to any .pt
import re
best_name, best_ver = None, -1
if os.path.isdir(model_dir):
for name in os.listdir(model_dir):
if not name.endswith(".pt"):
continue
m = re.match(r"boss-v(\d+)\.pt$", name)
if m and int(m.group(1)) > best_ver:
best_ver = int(m.group(1))
best_name = name
elif best_name is None:
best_name = name
if best_name:
path = os.path.join(model_dir, best_name)
print(f" Loading model: {best_name} ...")
from ultralytics import YOLO
self._model = YOLO(path)
print(f" Model loaded.")
return True
print(f" No .pt model found in {model_dir}")
return False
def _predict(self):
if self.img is None or not self.files:
return
if not self._ensure_model():
return
self._push_undo()
results = self._model(self.files[self.pos], conf=PREDICT_CONF, verbose=False)
det = results[0].boxes
self.boxes = []
for box in det:
cls_id = int(box.cls[0])
cx, cy, w, h = box.xywhn[0].tolist()
conf = box.conf[0].item()
self.boxes.append(Box(cx, cy, w, h, cls_id))
self.sel = -1
self.dirty = True
print(f" Predicted {len(self.boxes)} box(es)")
# ── coordinate transforms (letterbox) ─────────────────────────
def _xform(self):
"""Returns (scale, offset_x, offset_y) for letterbox display."""
avail_h = max(1, self.wh - IMG_TOP - HELP_H)
s = min(self.ww / self.iw, avail_h / self.ih)
dw = int(self.iw * s)
dh = int(self.ih * s)
ox = (self.ww - dw) // 2
oy = IMG_TOP + (avail_h - dh) // 2
return s, ox, oy
def _to_norm(self, wx, wy):
s, ox, oy = self._xform()
return (wx - ox) / (self.iw * s), (wy - oy) / (self.ih * s)
def _to_win(self, nx, ny):
s, ox, oy = self._xform()
return int(nx * self.iw * s + ox), int(ny * self.ih * s + oy)
def _corner_thr(self):
s, _, _ = self._xform()
return (HANDLE_R + 4) / (min(self.iw, self.ih) * s)
# ── hit-test ──────────────────────────────────────────────────
def _hit(self, nx, ny):
thr = self._corner_thr()
if 0 <= self.sel < len(self.boxes):
b = self.boxes[self.sel]
c = b.corner_at(nx, ny, thr)
if c:
return self.sel, c
if b.contains(nx, ny):
return self.sel, "inside"
for i, b in enumerate(self.boxes):
c = b.corner_at(nx, ny, thr)
if c:
return i, c
for i, b in enumerate(self.boxes):
if b.contains(nx, ny):
return i, "inside"
return -1, None
# ── drawing ───────────────────────────────────────────────────
def _scaled_base(self):
s, ox, oy = self._xform()
sz = (int(self.iw * s), int(self.ih * s))
key = (sz, self.ww, self.wh)
if self._cache is not None and self._cache_key == key:
return self._cache.copy(), s, ox, oy
canvas = np.zeros((self.wh, self.ww, 3), np.uint8)
resized = cv2.resize(self.img, sz, interpolation=cv2.INTER_AREA)
canvas[oy:oy + sz[1], ox:ox + sz[0]] = resized
self._cache = canvas
self._cache_key = key
return canvas.copy(), s, ox, oy
def _draw(self):
if self.img is None:
canvas = np.zeros((self.wh, self.ww, 3), np.uint8)
cv2.putText(canvas, "No images", (self.ww // 2 - 60, self.wh // 2),
cv2.FONT_HERSHEY_SIMPLEX, 0.7, (128, 128, 128), 1)
cv2.imshow(self.WIN, canvas)
return
canvas, s, ox, oy = self._scaled_base()
# ── Annotation boxes ──
for i, b in enumerate(self.boxes):
col = COLORS[b.cls_id % len(COLORS)]
is_sel = i == self.sel
p1 = self._to_win(b.x1, b.y1)
p2 = self._to_win(b.x2, b.y2)
cv2.rectangle(canvas, p1, p2, col, 3 if is_sel else 2)
name = self.classes[b.cls_id] if b.cls_id < len(self.classes) else f"c{b.cls_id}"
(tw, th), _ = cv2.getTextSize(name, cv2.FONT_HERSHEY_SIMPLEX, 0.55, 1)
cv2.rectangle(canvas, (p1[0], p1[1] - th - 8),
(p1[0] + tw + 6, p1[1]), col, -1)
cv2.putText(canvas, name, (p1[0] + 3, p1[1] - 5),
cv2.FONT_HERSHEY_SIMPLEX, 0.55, (0, 0, 0), 1)
if is_sel:
for hx, hy in [p1, (p2[0], p1[1]), (p1[0], p2[1]), p2]:
cv2.circle(canvas, (hx, hy), HANDLE_R, (255, 255, 255), -1)
cv2.circle(canvas, (hx, hy), HANDLE_R, col, 2)
# rubber-band
if self.mode == "draw" and self.d_start and self.mouse_n:
col = COLORS[self.cur_cls % len(COLORS)]
cv2.rectangle(canvas,
self._to_win(*self.d_start),
self._to_win(*self.mouse_n), col, 2)
# ── HUD info line ──
if self.files:
fname = os.path.basename(self.files[self.pos])
n = len(self.files)
filt = self.FILTERS[self.filt_idx]
cname = self.classes[self.cur_cls] if self.cur_cls < len(self.classes) \
else f"c{self.cur_cls}"
info = (f"[{self.pos + 1}/{n}] {fname} | "
f"filter: {filt} | new class: {cname} | "
f"boxes: {len(self.boxes)}")
if self.dirty:
info += " *"
cv2.putText(canvas, info, (10, 22),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (220, 220, 220), 1)
# class legend (top-right)
for i, c in enumerate(self.classes):
txt = f"{i + 1}: {c}"
(tw, _), _ = cv2.getTextSize(txt, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
cv2.putText(canvas, txt,
(self.ww - tw - 12, 22 + i * 20),
cv2.FONT_HERSHEY_SIMPLEX, 0.5,
COLORS[i % len(COLORS)], 1)
# ── Toolbar buttons ──
bx = 10
for btn in self.buttons:
(tw, th), _ = cv2.getTextSize(btn.label, cv2.FONT_HERSHEY_SIMPLEX, 0.45, 1)
bw = tw + 16
bh = TOOLBAR_H - 4
btn.x1 = bx
btn.y1 = TOOLBAR_Y
btn.x2 = bx + bw
btn.y2 = TOOLBAR_Y + bh
# button bg
cv2.rectangle(canvas, (btn.x1, btn.y1), (btn.x2, btn.y2),
(60, 60, 60), -1)
cv2.rectangle(canvas, (btn.x1, btn.y1), (btn.x2, btn.y2),
(140, 140, 140), 1)
# button text
cv2.putText(canvas, btn.label,
(bx + 8, TOOLBAR_Y + bh - 7),
cv2.FONT_HERSHEY_SIMPLEX, 0.45, (220, 220, 220), 1)
bx = btn.x2 + 6
# ── Help bar (bottom) ──
cv2.putText(
canvas,
"drag=draw | click=select | drag=move/resize | RClick=cycle class"
" | 1-9=class | Del=remove box | E=deselect",
(10, self.wh - 6),
cv2.FONT_HERSHEY_SIMPLEX, 0.36, (120, 120, 120), 1)
cv2.imshow(self.WIN, canvas)
# ── mouse ─────────────────────────────────────────────────────
def _on_mouse(self, ev, wx, wy, flags, _):
nx, ny = self._to_norm(wx, wy)
self.mouse_n = (nx, ny)
if ev == cv2.EVENT_LBUTTONDOWN:
# check toolbar buttons first
for btn in self.buttons:
if btn.hit(wx, wy):
self._do_action(btn.action)
return
# only interact with image area below toolbar
if wy < IMG_TOP:
return
idx, what = self._hit(nx, ny)
if what in ("tl", "tr", "bl", "br"):
self._push_undo()
self.sel = idx
self.mode = "resize"
b = self.boxes[idx]
opp = {"tl": (b.x2, b.y2), "tr": (b.x1, b.y2),
"bl": (b.x2, b.y1), "br": (b.x1, b.y1)}
self.d_anchor = opp[what]
self.d_start = (nx, ny)
elif what == "inside":
self._push_undo()
self.sel = idx
self.mode = "move"
self.d_start = (nx, ny)
b = self.boxes[idx]
self.d_orig = (b.cx, b.cy)
else:
self.sel = -1
self.mode = "draw"
self.d_start = (nx, ny)
self._draw()
elif ev == cv2.EVENT_MOUSEMOVE:
if self.mode == "draw":
self._draw()
elif self.mode == "move" and self.d_start and \
0 <= self.sel < len(self.boxes):
b = self.boxes[self.sel]
b.cx = self.d_orig[0] + (nx - self.d_start[0])
b.cy = self.d_orig[1] + (ny - self.d_start[1])
self.dirty = True
self._draw()
elif self.mode == "resize" and self.d_anchor:
b = self.boxes[self.sel]
ax, ay = self.d_anchor
b.set_corners(min(ax, nx), min(ay, ny),
max(ax, nx), max(ay, ny))
self.dirty = True
self._draw()
elif ev == cv2.EVENT_LBUTTONUP:
if self.mode == "draw" and self.d_start:
x1, y1 = min(self.d_start[0], nx), min(self.d_start[1], ny)
x2, y2 = max(self.d_start[0], nx), max(self.d_start[1], ny)
if (x2 - x1) > MIN_BOX and (y2 - y1) > MIN_BOX:
self._push_undo()
b = Box(0, 0, 0, 0, self.cur_cls)
b.set_corners(x1, y1, x2, y2)
self.boxes.append(b)
self.sel = len(self.boxes) - 1
self.dirty = True
self.mode = None
self.d_start = self.d_anchor = self.d_orig = None
self._draw()
elif ev == cv2.EVENT_RBUTTONDOWN:
idx, _ = self._hit(nx, ny)
if idx >= 0:
self._push_undo()
self.sel = idx
self.boxes[idx].cls_id = \
(self.boxes[idx].cls_id + 1) % len(self.classes)
self.dirty = True
self._draw()
# ── actions (shared by keys + buttons) ────────────────────────
def _do_action(self, action):
if action == "predict":
self._predict()
self._draw()
elif action == "save_next":
self._do_save_next()
elif action == "filter":
self._do_filter()
elif action == "undo":
self._pop_undo()
self._draw()
elif action == "del_img":
self._do_del_img()
def _do_save_next(self):
if not self.files:
return
self._save()
fname = os.path.basename(self.files[self.pos])
print(f" Saved {fname} ({len(self.boxes)} box(es))")
self._goto(self.pos + 1)
def _do_filter(self):
self.filt_idx = (self.filt_idx + 1) % len(self.FILTERS)
if self.dirty:
self._save()
self._refilter()
if self.files:
self._load()
self._draw()
print(f" Filter: {self.FILTERS[self.filt_idx]}"
f" ({len(self.files)} images)")
else:
self.img = None
self._draw()
print(f" Filter: {self.FILTERS[self.filt_idx]} (0 images)")
def _do_del_img(self):
if not self.files:
return
fp = self.files[self.pos]
lp = self._lbl(fp)
if os.path.exists(fp):
os.remove(fp)
if os.path.exists(lp):
os.remove(lp)
self.n_deleted += 1
print(f" Deleted {os.path.basename(fp)}")
self.all_files = [f for f in self.all_files if f != fp]
self.dirty = False
self._refilter()
if not self.files:
self.img = None
self._draw()
return
self.pos = min(self.pos, len(self.files) - 1)
self._load()
self._draw()
# ── navigation ────────────────────────────────────────────────
def _goto(self, new_pos):
if self.dirty:
self._save()
new_pos = max(0, min(new_pos, len(self.files) - 1))
if new_pos == self.pos and self.img is not None:
return
self.pos = new_pos
self._load()
self._draw()
# ── main loop ─────────────────────────────────────────────────
def run(self):
if not self.all_files:
print(f"No images in {self.img_dir}")
return
cv2.namedWindow(self.WIN, cv2.WINDOW_NORMAL)
cv2.resizeWindow(self.WIN, self.ww, self.wh)
cv2.setMouseCallback(self.WIN, self._on_mouse)
self._refilter()
if not self.files:
print("No images match current filter")
return
self._load()
self._draw()
while True:
key = cv2.waitKeyEx(30)
# detect window close (user clicked X)
if cv2.getWindowProperty(self.WIN, cv2.WND_PROP_VISIBLE) < 1:
if self.dirty:
self._save()
break
# detect window resize
try:
r = cv2.getWindowImageRect(self.WIN)
if r[2] > 0 and r[3] > 0 and \
(r[2] != self.ww or r[3] != self.wh):
self.ww, self.wh = r[2], r[3]
self._cache = None
self._draw()
except cv2.error:
pass
if key == -1:
continue
# Quit
if key in (ord("q"), 27):
if self.dirty:
self._save()
break
# Save + next
if key in (32, 13):
self._do_save_next()
continue
# Navigation
if key == K_LEFT:
self._goto(self.pos - 1)
continue
if key == K_RIGHT:
self._goto(self.pos + 1)
continue
# Predict
if key == ord("p"):
self._predict()
self._draw()
continue
# Delete selected box
if key == K_DEL or key == 8:
if 0 <= self.sel < len(self.boxes):
self._push_undo()
self.boxes.pop(self.sel)
self.sel = -1
self.dirty = True
self._draw()
continue
# Delete image
if key == ord("x"):
self._do_del_img()
continue
# Undo
if key == ord("z"):
self._pop_undo()
self._draw()
continue
# Filter
if key == ord("f"):
self._do_filter()
continue
# Number keys -> set class
if ord("1") <= key <= ord("9"):
cls_id = key - ord("1")
if cls_id < len(self.classes):
if 0 <= self.sel < len(self.boxes):
self._push_undo()
self.boxes[self.sel].cls_id = cls_id
self.dirty = True
self.cur_cls = cls_id
self._draw()
continue
# Deselect
if key == ord("e"):
self.sel = -1
self._draw()
continue
cv2.destroyAllWindows()
total = len(self.all_files)
labeled = sum(1 for f in self.all_files if self._has_labels(self._lbl(f)))
empty = sum(1 for f in self.all_files if self._is_empty_label(self._lbl(f)))
unlabeled = total - labeled - empty
print(f"\nDone. Saved: {self.n_saved}, Deleted: {self.n_deleted}")
print(f"Dataset: {total} images, {labeled} labeled, "
f"{empty} empty, {unlabeled} unlabeled")
def run_annotator(img_dir, classes=None):
"""Entry point callable from manage.py or standalone."""
tool = Annotator(img_dir, classes)
tool.run()
def main():
img_dir = sys.argv[1] if len(sys.argv) > 1 else "../../training-data/kulemak/raw"
run_annotator(img_dir)
if __name__ == "__main__":
main()

View file

@ -12,7 +12,7 @@ import sys
import json import json
import time import time
_model = None _models = {}
def _redirect_stdout_to_stderr(): def _redirect_stdout_to_stderr():
@ -26,36 +26,40 @@ def _restore_stdout(real_stdout):
sys.stdout = real_stdout sys.stdout = real_stdout
def load_model(): def load_model(name="enemy-v1"):
global _model if name in _models:
if _model is not None: return _models[name]
return _model
import os import os
from ultralytics import YOLO from ultralytics import YOLO
model_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "models") model_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "models")
model_path = os.path.join(model_dir, "enemy-v1.pt")
# Prefer TensorRT engine if available, fall back to .pt
engine_path = os.path.join(model_dir, f"{name}.engine")
pt_path = os.path.join(model_dir, f"{name}.pt")
model_path = engine_path if os.path.exists(engine_path) else pt_path
if not os.path.exists(model_path): if not os.path.exists(model_path):
raise FileNotFoundError(f"Model not found: {model_path}") raise FileNotFoundError(f"Model not found: {pt_path} (also checked {engine_path})")
sys.stderr.write(f"Loading YOLO model from {model_path}...\n") sys.stderr.write(f"Loading YOLO model '{name}' from {model_path}...\n")
sys.stderr.flush() sys.stderr.flush()
real_stdout = _redirect_stdout_to_stderr() real_stdout = _redirect_stdout_to_stderr()
try: try:
_model = YOLO(model_path) model = YOLO(model_path)
# Warmup with dummy inference (triggers CUDA init) # Warmup with dummy inference (triggers CUDA init)
import numpy as np import numpy as np
dummy = np.zeros((640, 640, 3), dtype=np.uint8) dummy = np.zeros((640, 640, 3), dtype=np.uint8)
_model.predict(dummy, verbose=False) model.predict(dummy, verbose=False)
finally: finally:
_restore_stdout(real_stdout) _restore_stdout(real_stdout)
sys.stderr.write("YOLO model loaded and warmed up.\n") _models[name] = model
sys.stderr.write(f"YOLO model '{name}' loaded and warmed up.\n")
sys.stderr.flush() sys.stderr.flush()
return _model return model
def handle_detect(req): def handle_detect(req):
@ -70,12 +74,15 @@ def handle_detect(req):
img_bytes = base64.b64decode(image_base64) img_bytes = base64.b64decode(image_base64)
img = np.array(Image.open(io.BytesIO(img_bytes))) img = np.array(Image.open(io.BytesIO(img_bytes)))
# PIL gives RGB, but ultralytics model.predict() assumes numpy arrays are BGR
img = img[:, :, ::-1]
conf = req.get("conf", 0.3) conf = req.get("conf", 0.3)
iou = req.get("iou", 0.45) iou = req.get("iou", 0.45)
imgsz = req.get("imgsz", 640) imgsz = req.get("imgsz", 640)
model_name = req.get("model", "enemy-v1")
model = load_model() model = load_model(model_name)
real_stdout = _redirect_stdout_to_stderr() real_stdout = _redirect_stdout_to_stderr()
try: try:

View file

@ -0,0 +1,77 @@
"""Fix labels generated by buggy annotate.py that multiplied by scale."""
import glob
import os
import cv2
import sys
def compute_scale(img_path):
img = cv2.imread(img_path)
if img is None:
return None
h, w = img.shape[:2]
max_h, max_w = 900, 1600
if h > max_h or w > max_w:
return min(max_w / w, max_h / h)
return 1.0
def fix_label(label_path, scale):
if scale == 1.0:
return False
with open(label_path) as f:
lines = f.readlines()
fixed = []
for line in lines:
parts = line.strip().split()
if len(parts) != 5:
fixed.append(line)
continue
cls = parts[0]
cx = float(parts[1]) / scale
cy = float(parts[2]) / scale
w = float(parts[3]) / scale
h = float(parts[4]) / scale
cx = min(cx, 1.0)
cy = min(cy, 1.0)
w = min(w, 1.0)
h = min(h, 1.0)
fixed.append(f"{cls} {cx:.6f} {cy:.6f} {w:.6f} {h:.6f}\n")
with open(label_path, "w") as f:
f.writelines(fixed)
return True
def main():
dataset_dir = sys.argv[1] if len(sys.argv) > 1 else "../../training-data/boss-dataset"
dataset_dir = os.path.abspath(dataset_dir)
count = 0
for split in ["train", "valid"]:
label_dir = os.path.join(dataset_dir, split, "labels")
img_dir = os.path.join(dataset_dir, split, "images")
if not os.path.isdir(label_dir):
continue
for label_path in glob.glob(os.path.join(label_dir, "*.txt")):
base = os.path.splitext(os.path.basename(label_path))[0]
img_path = None
for ext in (".jpg", ".jpeg", ".png"):
candidate = os.path.join(img_dir, base + ext)
if os.path.exists(candidate):
img_path = candidate
break
if img_path is None:
print(f" WARNING: no image for {label_path}")
continue
scale = compute_scale(img_path)
if scale is None:
print(f" WARNING: can't read {img_path}")
continue
if fix_label(label_path, scale):
count += 1
# Show first few for verification
if count <= 3:
with open(label_path) as f:
print(f" Fixed {os.path.basename(label_path)}: {f.read().strip()}")
print(f"\nFixed {count} label files")
if __name__ == "__main__":
main()

View file

@ -0,0 +1,353 @@
"""
Unified CLI for the YOLO detection pipeline.
Subcommands (all take a positional boss name):
build kulemak [--ratio 0.85] [--seed 42] Split raw/ -> dataset/
train kulemak [--epochs 200] [--name X] Train model (auto-increments name)
runs kulemak List training runs + metrics table
annotate kulemak [dir] Launch annotation GUI
prelabel kulemak [dir] [--model boss-kulemak] Auto-label unlabeled images
"""
import argparse
import csv
import os
import random
import re
import shutil
import sys
# ── Shared constants ─────────────────────────────────────────────
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
REPO_ROOT = os.path.abspath(os.path.join(SCRIPT_DIR, "..", ".."))
RUNS_DIR = os.path.join(REPO_ROOT, "runs", "detect")
MODELS_DIR = os.path.join(SCRIPT_DIR, "models")
def raw_dir(boss):
return os.path.join(REPO_ROOT, "training-data", boss, "raw")
def dataset_dir(boss):
return os.path.join(REPO_ROOT, "training-data", boss, "dataset")
def boss_classes(boss):
"""Single-class list for the given boss."""
return [boss]
# ── build ────────────────────────────────────────────────────────
def cmd_build(args):
"""Scan raw/, split labeled+empty images into dataset train/valid."""
import glob as g
boss = args.boss
raw = raw_dir(boss)
dataset = dataset_dir(boss)
classes = boss_classes(boss)
images = sorted(g.glob(os.path.join(raw, "*.jpg")))
labeled, empty = [], []
skipped = 0
for img in images:
txt = os.path.splitext(img)[0] + ".txt"
if not os.path.exists(txt):
skipped += 1
continue
with open(txt) as f:
content = f.read().strip()
if content:
labeled.append(img)
else:
empty.append(img)
print(f"Raw: {len(images)} images -- {len(labeled)} labeled, "
f"{len(empty)} empty (negative), {skipped} unlabeled (skipped)")
if not labeled and not empty:
print("Nothing to build.")
return
rng = random.Random(args.seed)
rng.shuffle(labeled)
rng.shuffle(empty)
def split(lst, ratio):
n = max(1, round(len(lst) * ratio)) if lst else 0
return lst[:n], lst[n:]
train_labeled, valid_labeled = split(labeled, args.ratio)
train_empty, valid_empty = split(empty, args.ratio)
train_files = train_labeled + train_empty
valid_files = valid_labeled + valid_empty
# Wipe and recreate
for sub in ("train/images", "train/labels", "valid/images", "valid/labels"):
d = os.path.join(dataset, sub)
if os.path.exists(d):
shutil.rmtree(d)
os.makedirs(d)
def copy_files(file_list, split_name):
for img in file_list:
txt = os.path.splitext(img)[0] + ".txt"
base = os.path.basename(img)
base_txt = os.path.splitext(base)[0] + ".txt"
shutil.copy2(img, os.path.join(dataset, split_name, "images", base))
shutil.copy2(txt, os.path.join(dataset, split_name, "labels", base_txt))
copy_files(train_files, "train")
copy_files(valid_files, "valid")
# Write data.yaml
yaml_path = os.path.join(dataset, "data.yaml")
with open(yaml_path, "w") as f:
f.write(f"train: {os.path.join(dataset, 'train', 'images')}\n")
f.write(f"val: {os.path.join(dataset, 'valid', 'images')}\n\n")
f.write(f"nc: {len(classes)}\n")
f.write(f"names: {classes}\n")
# Delete stale label caches
for root, dirs, files in os.walk(dataset):
for fn in files:
if fn == "labels.cache":
os.remove(os.path.join(root, fn))
print(f"\nTrain: {len(train_files)} ({len(train_labeled)} labeled + {len(train_empty)} empty)")
print(f"Valid: {len(valid_files)} ({len(valid_labeled)} labeled + {len(valid_empty)} empty)")
print(f"data.yaml: {yaml_path}")
# ── runs ─────────────────────────────────────────────────────────
def _parse_simple_yaml(path):
"""Parse flat key: value YAML without requiring PyYAML."""
result = {}
if not os.path.exists(path):
return result
with open(path) as f:
for line in f:
line = line.strip()
if not line or line.startswith("#"):
continue
if ": " in line:
key, val = line.split(": ", 1)
# Try to cast to int/float/bool/null
val = val.strip()
if val == "null" or val == "~":
val = None
elif val == "true":
val = True
elif val == "false":
val = False
else:
try:
val = int(val)
except ValueError:
try:
val = float(val)
except ValueError:
pass
result[key.strip()] = val
return result
def cmd_runs(args):
"""List training runs with metrics for the given boss."""
boss = args.boss
prefix = f"{boss}-v"
if not os.path.isdir(RUNS_DIR):
print(f"No runs directory: {RUNS_DIR}")
return
run_dirs = sorted(
[d for d in os.listdir(RUNS_DIR)
if os.path.isdir(os.path.join(RUNS_DIR, d)) and d.startswith(prefix)],
key=lambda d: _run_sort_key(d)
)
if not run_dirs:
print(f"No {prefix}* runs found.")
return
rows = []
for name in run_dirs:
run_path = os.path.join(RUNS_DIR, name)
args_file = os.path.join(run_path, "args.yaml")
csv_file = os.path.join(run_path, "results.csv")
model = epochs_cfg = imgsz = "?"
if os.path.exists(args_file):
cfg = _parse_simple_yaml(args_file)
model = os.path.splitext(os.path.basename(str(cfg.get("model", "?"))))[0]
epochs_cfg = str(cfg.get("epochs", "?"))
imgsz = str(cfg.get("imgsz", "?"))
mAP50 = mAP50_95 = prec = rec = "-"
actual_epochs = "?"
status = "unknown"
if os.path.exists(csv_file):
with open(csv_file) as f:
reader = csv.DictReader(f)
best_map = -1
best_row = None
last_epoch = 0
for row in reader:
row = {k.strip(): v.strip() for k, v in row.items()}
ep = int(row.get("epoch", 0))
last_epoch = max(last_epoch, ep)
val = float(row.get("metrics/mAP50(B)", 0))
if val > best_map:
best_map = val
best_row = row
if best_row:
mAP50 = f"{float(best_row.get('metrics/mAP50(B)', 0)):.3f}"
mAP50_95 = f"{float(best_row.get('metrics/mAP50-95(B)', 0)):.3f}"
prec = f"{float(best_row.get('metrics/precision(B)', 0)):.3f}"
rec = f"{float(best_row.get('metrics/recall(B)', 0)):.3f}"
actual_epochs = str(last_epoch)
try:
if int(epochs_cfg) > last_epoch + 1:
status = "early-stop"
else:
status = "done"
except ValueError:
status = "?"
epoch_str = f"{actual_epochs}/{epochs_cfg}"
rows.append((name, model, epoch_str, imgsz, mAP50, mAP50_95, prec, rec, status))
headers = ("Run", "Model", "Epochs", "ImgSz", "mAP50", "mAP50-95", "P", "R", "Status")
widths = [max(len(h), max(len(r[i]) for r in rows)) for i, h in enumerate(headers)]
header_line = " ".join(h.ljust(w) for h, w in zip(headers, widths))
print(header_line)
print(" ".join("-" * w for w in widths))
for row in rows:
print(" ".join(val.ljust(w) for val, w in zip(row, widths)))
def _run_sort_key(name):
m = re.search(r"(\d+)", name)
return int(m.group(1)) if m else 0
# ── train ────────────────────────────────────────────────────────
def cmd_train(args):
"""Train a YOLO model, auto-incrementing run name per boss."""
boss = args.boss
# Auto-increment name: {boss}-v1, {boss}-v2, ...
if args.name is None:
prefix = f"{boss}-v"
highest = 0
if os.path.isdir(RUNS_DIR):
for d in os.listdir(RUNS_DIR):
m = re.match(re.escape(prefix) + r"(\d+)", d)
if m:
highest = max(highest, int(m.group(1)))
args.name = f"{prefix}{highest + 1}"
print(f"Auto-assigned run name: {args.name}")
if args.data is None:
args.data = os.path.join(dataset_dir(boss), "data.yaml")
if not os.path.exists(args.data):
print(f"data.yaml not found: {args.data}")
print(f"Run 'python manage.py build {boss}' first.")
return
# Pass boss name so train.py can name the output model
args.boss = boss
from train import run_training
run_training(args)
# ── annotate ─────────────────────────────────────────────────────
def cmd_annotate(args):
"""Launch annotation GUI for the given boss."""
boss = args.boss
img_dir = args.dir or raw_dir(boss)
classes = boss_classes(boss)
from annotate import run_annotator
run_annotator(img_dir, classes)
# ── prelabel ─────────────────────────────────────────────────────
def cmd_prelabel(args):
"""Auto-label unlabeled images."""
boss = args.boss
if args.img_dir is None:
args.img_dir = raw_dir(boss)
if args.model == _PRELABEL_MODEL_DEFAULT:
args.model = f"boss-{boss}"
from prelabel import run_prelabel
run_prelabel(args)
_PRELABEL_MODEL_DEFAULT = "__auto__"
# ── CLI ──────────────────────────────────────────────────────────
def main():
parser = argparse.ArgumentParser(
description="YOLO detection pipeline manager",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog=__doc__,
)
sub = parser.add_subparsers(dest="command")
# annotate
p = sub.add_parser("annotate", help="Launch annotation GUI")
p.add_argument("boss", help="Boss name (e.g. kulemak)")
p.add_argument("dir", nargs="?", default=None, help="Image directory (default: training-data/{boss}/raw)")
# build
p = sub.add_parser("build", help="Build dataset from raw/")
p.add_argument("boss", help="Boss name (e.g. kulemak)")
p.add_argument("--ratio", type=float, default=0.85, help="Train ratio (default 0.85)")
p.add_argument("--seed", type=int, default=42, help="Random seed")
# train
p = sub.add_parser("train", help="Train YOLO model")
p.add_argument("boss", help="Boss name (e.g. kulemak)")
p.add_argument("--data", default=None, help="Path to data.yaml")
p.add_argument("--model", default="yolo11s", help="YOLO model variant")
p.add_argument("--epochs", type=int, default=200, help="Training epochs")
p.add_argument("--imgsz", type=int, default=1280, help="Image size")
p.add_argument("--batch", type=int, default=8, help="Batch size")
p.add_argument("--device", default="0", help="CUDA device")
p.add_argument("--name", default=None, help="Run name (auto-increments if omitted)")
# runs
p = sub.add_parser("runs", help="List training runs with metrics")
p.add_argument("boss", help="Boss name (e.g. kulemak)")
# prelabel
p = sub.add_parser("prelabel", help="Pre-label unlabeled images")
p.add_argument("boss", help="Boss name (e.g. kulemak)")
p.add_argument("img_dir", nargs="?", default=None, help="Image directory")
p.add_argument("--model", default=_PRELABEL_MODEL_DEFAULT, help="Model name in models/ (default: boss-{boss})")
p.add_argument("--conf", type=float, default=0.20, help="Confidence threshold")
args = parser.parse_args()
if args.command is None:
parser.print_help()
return
commands = {
"annotate": cmd_annotate,
"build": cmd_build,
"train": cmd_train,
"runs": cmd_runs,
"prelabel": cmd_prelabel,
}
commands[args.command](args)
if __name__ == "__main__":
main()

View file

@ -0,0 +1,78 @@
"""
Pre-label images using an existing YOLO model.
Generates .txt labels for unlabeled images so the annotator can load them for review.
Usage:
python prelabel.py [image_dir] [--model boss-v1] [--conf 0.20]
"""
import argparse
import glob
import os
def run_prelabel(args):
"""Run pre-labeling. Called from main() or manage.py."""
img_dir = os.path.abspath(args.img_dir)
model_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "models", f"{args.model}.pt")
if not os.path.exists(model_path):
print(f"Model not found: {model_path}")
return
from ultralytics import YOLO
model = YOLO(model_path)
extensions = ("*.jpg", "*.jpeg", "*.png")
files = []
for ext in extensions:
files.extend(glob.glob(os.path.join(img_dir, ext)))
files.sort()
# Only process unlabeled images
unlabeled = []
for f in files:
label_path = os.path.splitext(f)[0] + ".txt"
if not os.path.exists(label_path):
unlabeled.append(f)
print(f"Found {len(files)} images, {len(files) - len(unlabeled)} already labeled, {len(unlabeled)} to pre-label")
if not unlabeled:
print("All images already have labels!")
return
labeled = 0
skipped = 0
for filepath in unlabeled:
results = model(filepath, conf=args.conf, verbose=False)
boxes = results[0].boxes
if len(boxes) == 0:
skipped += 1
continue
label_path = os.path.splitext(filepath)[0] + ".txt"
with open(label_path, "w") as f:
for box in boxes:
cls = int(box.cls[0])
xywhn = box.xywhn[0] # normalized center x, y, w, h
cx, cy, w, h = xywhn.tolist()
f.write(f"{cls} {cx:.6f} {cy:.6f} {w:.6f} {h:.6f}\n")
labeled += 1
fname = os.path.basename(filepath)
conf = boxes.conf[0].item()
print(f" {fname}: {len(boxes)} box(es), best conf={conf:.2f}")
print(f"\nPre-labeled {labeled} images, skipped {skipped} (no detections)")
def main():
parser = argparse.ArgumentParser(description="Pre-label images with YOLO model")
parser.add_argument("img_dir", nargs="?", default="../../training-data/kulemak/raw")
parser.add_argument("--model", default="boss-kulemak", help="Model name in models/")
parser.add_argument("--conf", type=float, default=0.20, help="Confidence threshold")
args = parser.parse_args()
run_prelabel(args)
if __name__ == "__main__":
main()

View file

@ -1,30 +1,24 @@
""" """
Training script for YOLOv11n enemy detection model. Training script for YOLO enemy/boss detection model.
Usage: Usage:
python train.py --data path/to/data.yaml --epochs 100 python train.py --data path/to/data.yaml --epochs 200
python train.py --data path/to/data.yaml --model yolo11m --imgsz 1280 --epochs 300
Expects YOLO-format dataset with data.yaml pointing to train/val image directories. Expects YOLO-format dataset with data.yaml pointing to train/val image directories.
Export from Roboflow in "YOLOv11" format. Export from Roboflow in "YOLOv11" format.
""" """
import argparse import argparse
import glob
import os import os
def main(): def run_training(args):
parser = argparse.ArgumentParser(description="Train YOLOv11n enemy detector") """Run YOLO training. Called from main() or manage.py."""
parser.add_argument("--data", required=True, help="Path to data.yaml")
parser.add_argument("--epochs", type=int, default=100, help="Training epochs")
parser.add_argument("--imgsz", type=int, default=640, help="Image size")
parser.add_argument("--batch", type=int, default=16, help="Batch size")
parser.add_argument("--device", default="0", help="CUDA device (0, cpu)")
parser.add_argument("--name", default="enemy-v1", help="Run name")
args = parser.parse_args()
from ultralytics import YOLO from ultralytics import YOLO
model = YOLO("yolo11n.pt") # start from pretrained nano model = YOLO(f"{args.model}.pt")
model.train( model.train(
data=args.data, data=args.data,
@ -33,25 +27,71 @@ def main():
batch=args.batch, batch=args.batch,
device=args.device, device=args.device,
name=args.name, name=args.name,
patience=20, # early stopping patience=30,
# Learning rate (fine-tuning pretrained, not from scratch)
lr0=0.001,
lrf=0.01,
cos_lr=True,
warmup_epochs=5,
weight_decay=0.001,
# Augmentation tuned for boss glow/morph effects
hsv_h=0.03,
hsv_s=0.8,
hsv_v=0.6,
scale=0.7,
translate=0.2,
degrees=5.0,
mixup=0.15,
close_mosaic=15,
erasing=0.3,
workers=0, # avoid multiprocessing paging file issues on Windows
save=True, save=True,
save_period=10, save_period=10,
plots=True, plots=True,
verbose=True, verbose=True,
) )
# Copy best weights to models directory # Find best.pt — try the trainer's save_dir first, then scan runs/detect/
best_path = os.path.join("runs", "detect", args.name, "weights", "best.pt") best_path = None
save_dir = getattr(model.trainer, "save_dir", None)
if save_dir:
candidate = os.path.join(str(save_dir), "weights", "best.pt")
if os.path.exists(candidate):
best_path = candidate
if not best_path:
run_base = os.path.join("runs", "detect")
candidates = sorted(glob.glob(os.path.join(run_base, f"{args.name}*", "weights", "best.pt")))
best_path = candidates[-1] if candidates else os.path.join(run_base, args.name, "weights", "best.pt")
output_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "models") output_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "models")
os.makedirs(output_dir, exist_ok=True) os.makedirs(output_dir, exist_ok=True)
output_path = os.path.join(output_dir, f"{args.name}.pt") # If boss is set (from manage.py), deploy as boss-{boss}.pt; otherwise use run name
boss = getattr(args, "boss", None)
model_filename = f"boss-{boss}.pt" if boss else f"{args.name}.pt"
output_path = os.path.join(output_dir, model_filename)
if os.path.exists(best_path): if os.path.exists(best_path):
import shutil import shutil
shutil.copy2(best_path, output_path) shutil.copy2(best_path, output_path)
print(f"\nBest model copied to: {output_path}") print(f"\nBest model copied to: {output_path}")
else: else:
print(f"\nWarning: {best_path} not found — check training output") print(f"\nWarning: {best_path} not found -- check training output")
def main():
parser = argparse.ArgumentParser(description="Train YOLO enemy/boss detector")
parser.add_argument("--data", required=True, help="Path to data.yaml")
parser.add_argument("--model", default="yolo11s", help="YOLO model variant (yolo11n, yolo11s, yolo11m)")
parser.add_argument("--epochs", type=int, default=200, help="Training epochs")
parser.add_argument("--imgsz", type=int, default=1280, help="Image size")
parser.add_argument("--batch", type=int, default=8, help="Batch size")
parser.add_argument("--device", default="0", help="CUDA device (0, cpu)")
parser.add_argument("--name", default="enemy-v1", help="Run name")
args = parser.parse_args()
run_training(args)
if __name__ == "__main__": if __name__ == "__main__":

Binary file not shown.