This commit is contained in:
Boki 2026-02-16 13:18:04 -05:00
parent 2d6a6bd3a1
commit d80e723b94
28 changed files with 1801 additions and 352 deletions

View file

@ -0,0 +1,12 @@
namespace Poe2Trade.Screen;
public record DetectedEnemy(
float Confidence,
int X, int Y, int Width, int Height,
int Cx, int Cy,
bool HealthBarConfirmed);
public record DetectionSnapshot(
IReadOnlyList<DetectedEnemy> Enemies,
long Timestamp,
float InferenceMs);

View file

@ -10,23 +10,30 @@ class DiffCropHandler
{
private Bitmap? _referenceFrame;
private Region? _referenceRegion;
private readonly object _refLock = new();
public void HandleSnapshot(string? file = null, Region? region = null)
{
_referenceFrame?.Dispose();
_referenceFrame = ScreenCapture.CaptureOrLoad(file, region);
_referenceRegion = region;
var newFrame = ScreenCapture.CaptureOrLoad(file, region);
lock (_refLock)
{
_referenceFrame?.Dispose();
_referenceFrame = newFrame;
_referenceRegion = region;
}
}
public void HandleScreenshot(string path, Region? region = null)
{
var bitmap = _referenceFrame ?? ScreenCapture.CaptureOrLoad(null, region);
Bitmap? refCopy;
lock (_refLock) { refCopy = _referenceFrame != null ? (Bitmap)_referenceFrame.Clone() : null; }
var bitmap = refCopy ?? ScreenCapture.CaptureOrLoad(null, region);
var format = ImageUtils.GetImageFormat(path);
var dir = Path.GetDirectoryName(path);
if (!string.IsNullOrEmpty(dir) && !Directory.Exists(dir))
Directory.CreateDirectory(dir);
bitmap.Save(path, format);
if (bitmap != _referenceFrame) bitmap.Dispose();
bitmap.Dispose();
}
public byte[] HandleCapture(Region? region = null)
@ -44,47 +51,54 @@ class DiffCropHandler
public (Bitmap cropped, Bitmap refCropped, Bitmap current, Region region)? DiffCrop(
DiffCropParams c, string? file = null, Region? region = null)
{
if (_referenceFrame == null)
return null;
Bitmap refSnapshot;
Region? refRegion;
lock (_refLock)
{
if (_referenceFrame == null)
return null;
refSnapshot = (Bitmap)_referenceFrame.Clone();
refRegion = _referenceRegion;
}
var diffRegion = region ?? _referenceRegion;
var diffRegion = region ?? refRegion;
int baseX = diffRegion?.X ?? 0;
int baseY = diffRegion?.Y ?? 0;
var current = ScreenCapture.CaptureOrLoad(file, diffRegion);
Bitmap refForDiff = _referenceFrame;
bool disposeRef = false;
Bitmap refForDiff = refSnapshot;
if (diffRegion != null)
{
if (_referenceRegion == null)
if (refRegion == null)
{
var croppedRef = CropBitmap(_referenceFrame, diffRegion);
var croppedRef = CropBitmap(refSnapshot, diffRegion);
if (croppedRef == null)
{
current.Dispose();
refSnapshot.Dispose();
return null;
}
refForDiff = croppedRef;
disposeRef = true;
}
else if (!RegionsEqual(diffRegion, _referenceRegion))
else if (!RegionsEqual(diffRegion, refRegion))
{
int offX = diffRegion.X - _referenceRegion.X;
int offY = diffRegion.Y - _referenceRegion.Y;
if (offX < 0 || offY < 0 || offX + diffRegion.Width > _referenceFrame.Width || offY + diffRegion.Height > _referenceFrame.Height)
int offX = diffRegion.X - refRegion.X;
int offY = diffRegion.Y - refRegion.Y;
if (offX < 0 || offY < 0 || offX + diffRegion.Width > refSnapshot.Width || offY + diffRegion.Height > refSnapshot.Height)
{
current.Dispose();
refSnapshot.Dispose();
return null;
}
var croppedRef = CropBitmap(_referenceFrame, new Region(offX, offY, diffRegion.Width, diffRegion.Height));
var croppedRef = CropBitmap(refSnapshot, new Region(offX, offY, diffRegion.Width, diffRegion.Height));
if (croppedRef == null)
{
current.Dispose();
refSnapshot.Dispose();
return null;
}
refForDiff = croppedRef;
disposeRef = true;
}
}
@ -126,7 +140,8 @@ class DiffCropHandler
if (totalChanged == 0)
{
current.Dispose();
if (disposeRef) refForDiff.Dispose();
if (refForDiff != refSnapshot) refForDiff.Dispose();
refSnapshot.Dispose();
return null;
}
@ -226,7 +241,8 @@ class DiffCropHandler
{
Log.Debug("diff-crop: no tooltip-sized region found");
current.Dispose();
if (disposeRef) refForDiff.Dispose();
if (refForDiff != refSnapshot) refForDiff.Dispose();
refSnapshot.Dispose();
return null;
}
@ -307,7 +323,8 @@ class DiffCropHandler
Log.Debug("diff-crop: tooltip region ({X},{Y}) {W}x{H}", minX, minY, rw, rh);
if (disposeRef) refForDiff.Dispose();
if (refForDiff != refSnapshot) refForDiff.Dispose();
refSnapshot.Dispose();
return (cropped, refCropped, current, resultRegion);
}

View file

@ -0,0 +1,173 @@
using OpenCvSharp;
using Poe2Trade.Core;
using Serilog;
using Region = Poe2Trade.Core.Region;
namespace Poe2Trade.Screen;
/// <summary>
/// Detects enemies on screen using two-stage approach:
/// 1. YOLO detection via Python daemon (~5Hz, every 6th frame)
/// 2. Health bar confirmation via HSV threshold on bbox region (every frame, ~1ms)
/// </summary>
public class EnemyDetector : IFrameConsumer, IDisposable
{
// Crop region for gameplay area at 2560x1440 — excludes HUD globes, minimap
private static readonly Region GameplayRegion = new(320, 100, 1920, 1200);
private const int DetectEveryNFrames = 6; // ~5Hz at 30fps
private const int HealthBarHeight = 10; // px above bbox to scan for red bar
private const float RedPixelThreshold = 0.05f; // 5% red pixels = confirmed
private readonly PythonDetectBridge _bridge = new();
private volatile DetectionSnapshot _latest = new([], 0, 0);
private int _frameCounter;
private List<DetectedEnemy> _activeEnemies = [];
public bool Enabled { get; set; }
public DetectionSnapshot Latest => _latest;
public event Action<DetectionSnapshot>? DetectionUpdated;
public void Process(ScreenFrame frame)
{
if (!Enabled) return;
_frameCounter++;
// Health bar confirmation runs every frame for known enemies
if (_activeEnemies.Count > 0)
{
_activeEnemies = ConfirmHealthBars(frame, _activeEnemies);
}
// YOLO detection runs every Nth frame
if (_frameCounter % DetectEveryNFrames != 0) return;
try
{
// Bounds check
if (GameplayRegion.X + GameplayRegion.Width > frame.Width ||
GameplayRegion.Y + GameplayRegion.Height > frame.Height)
return;
using var cropped = frame.CropBgr(GameplayRegion);
var result = _bridge.Detect(cropped);
// Offset bbox coords by crop origin → screen-space coordinates
var enemies = new List<DetectedEnemy>(result.Count);
foreach (var det in result.Detections)
{
var screenX = det.X + GameplayRegion.X;
var screenY = det.Y + GameplayRegion.Y;
var screenCx = det.Cx + GameplayRegion.X;
var screenCy = det.Cy + GameplayRegion.Y;
// Check if this enemy was previously confirmed
var wasConfirmed = _activeEnemies.Any(e =>
Math.Abs(e.Cx - screenCx) < 50 && Math.Abs(e.Cy - screenCy) < 50 &&
e.HealthBarConfirmed);
enemies.Add(new DetectedEnemy(
det.Confidence,
screenX, screenY, det.Width, det.Height,
screenCx, screenCy,
wasConfirmed));
}
_activeEnemies = enemies;
var snapshot = new DetectionSnapshot(
enemies.AsReadOnly(),
DateTimeOffset.UtcNow.ToUnixTimeMilliseconds(),
result.InferenceMs);
_latest = snapshot;
DetectionUpdated?.Invoke(snapshot);
}
catch (Exception ex)
{
Log.Debug(ex, "EnemyDetector YOLO failed");
}
}
/// <summary>
/// Scan a narrow band above each enemy bbox for red health bar pixels (HSV threshold).
/// Returns updated list with HealthBarConfirmed set where detected.
/// </summary>
private static List<DetectedEnemy> ConfirmHealthBars(ScreenFrame frame, List<DetectedEnemy> enemies)
{
var updated = new List<DetectedEnemy>(enemies.Count);
foreach (var enemy in enemies)
{
if (enemy.HealthBarConfirmed)
{
updated.Add(enemy);
continue;
}
// Scan region: narrow strip above the bbox top edge
var scanY = Math.Max(0, enemy.Y - HealthBarHeight);
var scanHeight = Math.Min(HealthBarHeight, enemy.Y);
if (scanHeight <= 0 || enemy.Width <= 0)
{
updated.Add(enemy);
continue;
}
var scanRegion = new Region(
Math.Max(0, enemy.X),
scanY,
Math.Min(enemy.Width, frame.Width - Math.Max(0, enemy.X)),
scanHeight);
if (scanRegion.Width <= 0 || scanRegion.Height <= 0 ||
scanRegion.X + scanRegion.Width > frame.Width ||
scanRegion.Y + scanRegion.Height > frame.Height)
{
updated.Add(enemy);
continue;
}
try
{
var confirmed = HasRedHealthBar(frame, scanRegion);
updated.Add(enemy with { HealthBarConfirmed = confirmed });
}
catch
{
updated.Add(enemy);
}
}
return updated;
}
/// <summary>
/// Check if a region contains enough red pixels to indicate a health bar.
/// Red in HSV: H=0-10 or H=170-180, S>100, V>80.
/// </summary>
private static bool HasRedHealthBar(ScreenFrame frame, Region region)
{
using var bgr = frame.CropBgr(region);
using var hsv = new Mat();
Cv2.CvtColor(bgr, hsv, ColorConversionCodes.BGR2HSV);
// Red wraps around in HSV — check both ranges
using var mask1 = new Mat();
using var mask2 = new Mat();
Cv2.InRange(hsv, new Scalar(0, 100, 80), new Scalar(10, 255, 255), mask1);
Cv2.InRange(hsv, new Scalar(170, 100, 80), new Scalar(180, 255, 255), mask2);
using var combined = new Mat();
Cv2.BitwiseOr(mask1, mask2, combined);
var totalPixels = combined.Rows * combined.Cols;
var redPixels = Cv2.CountNonZero(combined);
var ratio = (float)redPixels / totalPixels;
return ratio >= RedPixelThreshold;
}
public void Dispose() => _bridge.Dispose();
}

View file

@ -13,6 +13,7 @@ public class FramePipeline : IDisposable
public IScreenCapture Capture => _capture;
public void AddConsumer(IFrameConsumer consumer) => _consumers.Add(consumer);
public void RemoveConsumer(IFrameConsumer consumer) => _consumers.Remove(consumer);
/// <summary>
/// Capture one frame, dispatch to all consumers in parallel, then dispose frame.

View file

@ -0,0 +1,37 @@
using Serilog;
namespace Poe2Trade.Screen;
public class FramePipelineService : IDisposable
{
public FramePipeline Pipeline { get; }
public IScreenCapture Backend { get; }
public FramePipelineService()
{
Backend = CreateBackend();
Pipeline = new FramePipeline(Backend);
}
private static IScreenCapture CreateBackend()
{
try
{
var dxgi = new DesktopDuplication();
Log.Information("Screen capture: DXGI Desktop Duplication");
return dxgi;
}
catch (Exception ex)
{
Log.Warning(ex, "DXGI unavailable, falling back to GDI");
}
Log.Information("Screen capture: GDI (CopyFromScreen)");
return new GdiCapture();
}
public void Dispose()
{
Pipeline.Dispose();
}
}

View file

@ -0,0 +1,118 @@
using OpenCvSharp;
using Serilog;
using Region = Poe2Trade.Core.Region;
namespace Poe2Trade.Screen;
/// <summary>
/// Saves full-screen frames as JPEGs for YOLO training data collection.
/// Only saves when: (1) health bars detected, (2) scene has changed since last save.
/// This avoids flooding disk when standing still in a dense pack.
/// </summary>
public class FrameSaver : IFrameConsumer
{
// Gameplay area at 2560x1440 — excludes HUD globes, minimap
private static readonly Region GameplayRegion = new(320, 100, 1920, 1200);
private const int JpegQuality = 95;
private const int MinSaveIntervalMs = 1000;
private const int MinRedPixels = 50;
private const int ThumbSize = 64;
private const double MovementThreshold = 8.0; // mean absolute diff on 64x64 grayscale
private readonly string _outputDir;
private int _savedCount;
private long _lastSaveTime;
private Mat? _prevThumb;
public bool Enabled { get; set; }
public int SavedCount => _savedCount;
public FrameSaver(string outputDir = "training-data/raw")
{
_outputDir = Path.GetFullPath(outputDir);
}
public void Process(ScreenFrame frame)
{
if (!Enabled) return;
var now = DateTimeOffset.UtcNow.ToUnixTimeMilliseconds();
if (now - _lastSaveTime < MinSaveIntervalMs) return;
if (GameplayRegion.X + GameplayRegion.Width > frame.Width ||
GameplayRegion.Y + GameplayRegion.Height > frame.Height)
return;
try
{
using var bgr = frame.CropBgr(GameplayRegion);
if (!HasHealthBars(bgr)) return;
if (!HasSceneChanged(bgr)) return;
if (!Directory.Exists(_outputDir))
Directory.CreateDirectory(_outputDir);
var fullRegion = new Region(0, 0, frame.Width, frame.Height);
using var fullBgr = frame.CropBgr(fullRegion);
var path = Path.Combine(_outputDir, $"frame_{now}.jpg");
var prms = new ImageEncodingParam(ImwriteFlags.JpegQuality, JpegQuality);
Cv2.ImWrite(path, fullBgr, [prms]);
_savedCount++;
_lastSaveTime = now;
if (_savedCount % 10 == 0)
Log.Information("FrameSaver: saved {Count} frames to {Dir}", _savedCount, _outputDir);
}
catch (Exception ex)
{
Log.Debug(ex, "FrameSaver failed to save frame");
}
}
/// <summary>
/// Scan for enemy health bar pixels (HSV threshold).
/// Target colors: #C1251E, #BB281C → H≈1-3, S≈215, V≈187-193.
/// </summary>
private static bool HasHealthBars(Mat bgr)
{
using var hsv = new Mat();
Cv2.CvtColor(bgr, hsv, ColorConversionCodes.BGR2HSV);
using var mask = new Mat();
Cv2.InRange(hsv, new Scalar(0, 150, 130), new Scalar(8, 255, 255), mask);
return Cv2.CountNonZero(mask) >= MinRedPixels;
}
/// <summary>
/// Compare a 64x64 grayscale thumbnail to the previous save.
/// Returns true if the scene changed enough (character moved).
/// </summary>
private bool HasSceneChanged(Mat bgr)
{
using var gray = new Mat();
Cv2.CvtColor(bgr, gray, ColorConversionCodes.BGR2GRAY);
var thumb = new Mat();
Cv2.Resize(gray, thumb, new Size(ThumbSize, ThumbSize), interpolation: InterpolationFlags.Area);
if (_prevThumb == null)
{
_prevThumb = thumb;
return true; // first frame, always save
}
using var diff = new Mat();
Cv2.Absdiff(thumb, _prevThumb, diff);
var mad = Cv2.Mean(diff).Val0;
_prevThumb.Dispose();
_prevThumb = thumb;
return mad >= MovementThreshold;
}
}

View file

@ -0,0 +1,35 @@
using Poe2Trade.Core;
using Serilog;
namespace Poe2Trade.Screen;
/// <summary>
/// Classifies the current game UI state by probing known pixel positions on each frame.
/// Near-zero cost (~0.01ms per frame) — just a few pixel reads.
/// </summary>
public class GameStateDetector : IFrameConsumer, IGameStateProvider
{
private volatile GameUiState _currentState = GameUiState.Unknown;
public GameUiState CurrentState => _currentState;
public event Action<GameUiState, GameUiState>? StateChanged;
public void Process(ScreenFrame frame)
{
var newState = Classify(frame);
var old = _currentState;
if (newState == old) return;
_currentState = newState;
Log.Debug("GameState: {Old} → {New}", old, newState);
StateChanged?.Invoke(old, newState);
}
private GameUiState Classify(ScreenFrame frame)
{
// TODO: Calibrate pixel probe positions from actual 2560x1440 screenshots.
// Each state has 2-3 characteristic pixels that distinguish it.
// For now, return Unknown — actual detection requires screenshot calibration.
return GameUiState.Unknown;
}
}

View file

@ -0,0 +1,110 @@
using System.Drawing;
using System.Text.RegularExpressions;
using OpenCvSharp;
using Poe2Trade.Core;
using Serilog;
using Region = Poe2Trade.Core.Region;
namespace Poe2Trade.Screen;
public record HudValues(int Current, int Max);
public record HudSnapshot
{
public HudValues? Life { get; init; }
public HudValues? Mana { get; init; }
public HudValues? EnergyShield { get; init; }
public HudValues? Spirit { get; init; }
public long Timestamp { get; init; }
public float LifePct => Life is { Max: > 0 } l ? (float)l.Current / l.Max : 1f;
public float ManaPct => Mana is { Max: > 0 } m ? (float)m.Current / m.Max : 1f;
}
/// <summary>
/// Reads life/mana/ES/spirit values from HUD globe text via OCR.
/// Throttled to ~1 read per second (every 30 frames at 30fps).
/// </summary>
public class HudReader : IFrameConsumer
{
private static readonly Regex ValuePattern = new(@"(\d+)\s*/\s*(\d+)", RegexOptions.Compiled);
// Crop regions for HUD text at 2560x1440 — placeholders, need calibration
private static readonly Region LifeRegion = new(100, 1340, 200, 40);
private static readonly Region ManaRegion = new(2260, 1340, 200, 40);
private static readonly Region EsRegion = new(100, 1300, 200, 40);
private static readonly Region SpiritRegion = new(2260, 1300, 200, 40);
private const int OcrEveryNFrames = 30;
private readonly PythonOcrBridge _ocr = new();
private volatile HudSnapshot _current = new() { Timestamp = 0 };
private int _frameCounter;
public HudSnapshot Current => _current;
public event Action<HudSnapshot>? Updated;
public event Action<HudSnapshot>? LowLife;
public void Process(ScreenFrame frame)
{
if (++_frameCounter % OcrEveryNFrames != 0) return;
try
{
var life = ReadValue(frame, LifeRegion);
var mana = ReadValue(frame, ManaRegion);
var es = ReadValue(frame, EsRegion);
var spirit = ReadValue(frame, SpiritRegion);
var snapshot = new HudSnapshot
{
Life = life,
Mana = mana,
EnergyShield = es,
Spirit = spirit,
Timestamp = DateTimeOffset.UtcNow.ToUnixTimeMilliseconds(),
};
_current = snapshot;
Updated?.Invoke(snapshot);
if (snapshot.LifePct < 0.3f)
LowLife?.Invoke(snapshot);
}
catch (Exception ex)
{
Log.Debug(ex, "HudReader OCR failed");
}
}
private HudValues? ReadValue(ScreenFrame frame, Region region)
{
// Bounds check
if (region.X + region.Width > frame.Width || region.Y + region.Height > frame.Height)
return null;
using var bgr = frame.CropBgr(region);
using var gray = new Mat();
Cv2.CvtColor(bgr, gray, ColorConversionCodes.BGR2GRAY);
// Threshold for white text on dark background
using var thresh = new Mat();
Cv2.Threshold(gray, thresh, 180, 255, ThresholdTypes.Binary);
// Convert to Bitmap for OCR bridge
var bytes = thresh.ToBytes(".png");
using var ms = new System.IO.MemoryStream(bytes);
using var bitmap = new Bitmap(ms);
var result = _ocr.OcrFromBitmap(bitmap);
if (string.IsNullOrWhiteSpace(result.Text)) return null;
var match = ValuePattern.Match(result.Text);
if (!match.Success) return null;
return new HudValues(
int.Parse(match.Groups[1].Value),
int.Parse(match.Groups[2].Value)
);
}
}

View file

@ -0,0 +1,195 @@
namespace Poe2Trade.Screen;
using System.Diagnostics;
using System.Text.Json;
using System.Text.Json.Serialization;
using OpenCvSharp;
using Serilog;
/// <summary>
/// Manages a persistent Python subprocess for YOLO object detection.
/// Lazy-starts on first request; reuses the process for subsequent calls.
/// Same stdin/stdout JSON-per-line protocol as PythonOcrBridge.
/// </summary>
class PythonDetectBridge : IDisposable
{
private static readonly JsonSerializerOptions JsonOptions = new()
{
PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
};
private Process? _proc;
private readonly string _daemonScript;
private readonly string _pythonExe;
private readonly object _lock = new();
public PythonDetectBridge()
{
_daemonScript = Path.GetFullPath(Path.Combine("tools", "python-detect", "daemon.py"));
var venvPython = Path.GetFullPath(Path.Combine("tools", "python-detect", ".venv", "Scripts", "python.exe"));
_pythonExe = File.Exists(venvPython) ? venvPython : "python";
}
/// <summary>
/// Run YOLO detection on a BGR Mat. Returns parsed detection results.
/// </summary>
public DetectResponse Detect(Mat bgrMat, float conf = 0.3f, float iou = 0.45f, int imgsz = 640)
{
EnsureRunning();
var imageBytes = bgrMat.ToBytes(".png");
var imageBase64 = Convert.ToBase64String(imageBytes);
var req = new Dictionary<string, object?>
{
["cmd"] = "detect",
["imageBase64"] = imageBase64,
["conf"] = conf,
["iou"] = iou,
["imgsz"] = imgsz,
};
return SendRequest(req);
}
private DetectResponse SendRequest(object req)
{
var json = JsonSerializer.Serialize(req, JsonOptions);
string responseLine;
lock (_lock)
{
_proc!.StandardInput.WriteLine(json);
_proc.StandardInput.Flush();
responseLine = _proc.StandardOutput.ReadLine()
?? throw new Exception("Python detect daemon returned null");
}
var resp = JsonSerializer.Deserialize<PythonDetectResponse>(responseLine, JsonOptions);
if (resp == null)
throw new Exception("Failed to parse Python detect response");
if (!resp.Ok)
throw new Exception(resp.Error ?? "Python detect failed");
return new DetectResponse
{
Count = resp.Count,
InferenceMs = resp.InferenceMs,
Detections = resp.Detections ?? [],
};
}
private void EnsureRunning()
{
if (_proc != null && !_proc.HasExited)
return;
_proc?.Dispose();
_proc = null;
if (!File.Exists(_daemonScript))
throw new Exception($"Python detect daemon not found at {_daemonScript}");
Log.Information("Spawning Python detect daemon: {Python} {Script}", _pythonExe, _daemonScript);
var proc = new Process
{
StartInfo = new ProcessStartInfo
{
FileName = _pythonExe,
Arguments = $"\"{_daemonScript}\"",
UseShellExecute = false,
RedirectStandardInput = true,
RedirectStandardOutput = true,
RedirectStandardError = true,
CreateNoWindow = true,
}
};
proc.ErrorDataReceived += (_, e) =>
{
if (!string.IsNullOrEmpty(e.Data))
Log.Debug("[python-detect] {Line}", e.Data);
};
try
{
proc.Start();
proc.BeginErrorReadLine();
// Wait for ready signal (up to 60s for CUDA warmup)
var readyTask = Task.Run(() => proc.StandardOutput.ReadLine());
if (!readyTask.Wait(TimeSpan.FromSeconds(60)))
throw new Exception("Python detect daemon timed out waiting for ready signal");
var readyLine = readyTask.Result;
if (readyLine == null)
throw new Exception("Python detect daemon exited before ready signal");
var ready = JsonSerializer.Deserialize<PythonDetectResponse>(readyLine, JsonOptions);
if (ready?.Ready != true)
throw new Exception($"Python detect daemon did not send ready signal: {readyLine}");
}
catch
{
try { if (!proc.HasExited) proc.Kill(); } catch { /* best effort */ }
proc.Dispose();
throw;
}
_proc = proc;
Log.Information("Python detect daemon ready");
}
public void Dispose()
{
if (_proc != null && !_proc.HasExited)
{
try
{
_proc.StandardInput.Close();
_proc.WaitForExit(3000);
if (!_proc.HasExited) _proc.Kill();
}
catch { /* ignore */ }
}
_proc?.Dispose();
_proc = null;
}
// -- Response types --
public class DetectResponse
{
public int Count { get; set; }
public float InferenceMs { get; set; }
public List<Detection> Detections { get; set; } = [];
}
public class Detection
{
[JsonPropertyName("class")]
public string ClassName { get; set; } = "";
public int ClassId { get; set; }
public float Confidence { get; set; }
public int X { get; set; }
public int Y { get; set; }
public int Width { get; set; }
public int Height { get; set; }
public int Cx { get; set; }
public int Cy { get; set; }
}
private class PythonDetectResponse
{
public bool Ok { get; set; }
public bool? Ready { get; set; }
public string? Error { get; set; }
public int Count { get; set; }
public float InferenceMs { get; set; }
public List<Detection>? Detections { get; set; }
}
}

View file

@ -104,7 +104,7 @@ class PythonOcrBridge : IDisposable
Log.Information("Spawning Python OCR daemon: {Python} {Script}", _pythonExe, _daemonScript);
_proc = new Process
var proc = new Process
{
StartInfo = new ProcessStartInfo
{
@ -118,24 +118,35 @@ class PythonOcrBridge : IDisposable
}
};
_proc.ErrorDataReceived += (_, e) =>
proc.ErrorDataReceived += (_, e) =>
{
if (!string.IsNullOrEmpty(e.Data))
Log.Debug("[python-ocr] {Line}", e.Data);
};
_proc.Start();
_proc.BeginErrorReadLine();
try
{
proc.Start();
proc.BeginErrorReadLine();
// Wait for ready signal (up to 30s for first model load)
var readyLine = _proc.StandardOutput.ReadLine();
if (readyLine == null)
throw new Exception("Python OCR daemon exited before ready signal");
// Wait for ready signal (up to 30s for first model load)
var readyLine = proc.StandardOutput.ReadLine();
if (readyLine == null)
throw new Exception("Python OCR daemon exited before ready signal");
var ready = JsonSerializer.Deserialize<PythonResponse>(readyLine, JsonOptions);
if (ready?.Ready != true)
throw new Exception($"Python OCR daemon did not send ready signal: {readyLine}");
var ready = JsonSerializer.Deserialize<PythonResponse>(readyLine, JsonOptions);
if (ready?.Ready != true)
throw new Exception($"Python OCR daemon did not send ready signal: {readyLine}");
}
catch
{
// Kill orphaned process before re-throwing
try { if (!proc.HasExited) proc.Kill(); } catch { /* best effort */ }
proc.Dispose();
throw;
}
_proc = proc;
Log.Information("Python OCR daemon ready");
}

View file

@ -238,16 +238,16 @@ public class ScreenReader : IScreenReader
private static double BigramSimilarity(string a, string b)
{
if (a.Length < 2 || b.Length < 2) return a == b ? 1 : 0;
var bigramsA = new Dictionary<string, int>();
var bigramsA = new Dictionary<(char, char), int>();
for (var i = 0; i < a.Length - 1; i++)
{
var bg = a.Substring(i, 2);
var bg = (a[i], a[i + 1]);
bigramsA[bg] = bigramsA.GetValueOrDefault(bg) + 1;
}
var matches = 0;
for (var i = 0; i < b.Length - 1; i++)
{
var bg = b.Substring(i, 2);
var bg = (b[i], b[i + 1]);
if (bigramsA.TryGetValue(bg, out var count) && count > 0)
{
matches++;