poe2-bot/tools/OcrDaemon/PythonOcrBridge.cs
2026-02-12 17:48:16 -05:00

210 lines
7 KiB
C#

namespace OcrDaemon;
using System.Diagnostics;
using System.Drawing;
using System.Text.Json;
using System.Text.Json.Serialization;
using SdImageFormat = System.Drawing.Imaging.ImageFormat;
/// <summary>
/// Manages a persistent Python subprocess for EasyOCR / PaddleOCR.
/// Lazy-starts on first request; reuses the process for subsequent calls.
/// Same stdin/stdout JSON-per-line protocol as the C# daemon itself.
/// </summary>
class PythonOcrBridge : IDisposable
{
private static readonly JsonSerializerOptions JsonOptions = new()
{
PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
};
private Process? _proc;
private readonly string _daemonScript;
private readonly string _pythonExe;
private readonly object _lock = new();
public PythonOcrBridge()
{
// Resolve paths relative to this exe
var exeDir = AppContext.BaseDirectory;
// exeDir = tools/OcrDaemon/bin/Release/net8.0-.../
// Walk up 4 levels to tools/
var toolsDir = Path.GetFullPath(Path.Combine(exeDir, "..", "..", "..", ".."));
_daemonScript = Path.GetFullPath(Path.Combine(toolsDir, "python-ocr", "daemon.py"));
// Use the venv Python if it exists, otherwise fall back to system python
var venvPython = Path.GetFullPath(Path.Combine(toolsDir, "python-ocr", ".venv", "Scripts", "python.exe"));
_pythonExe = File.Exists(venvPython) ? venvPython : "python";
}
/// <summary>
/// Run OCR on a screen region using the specified Python engine.
/// Captures screenshot, saves to temp file, sends to Python, returns OcrResponse.
/// </summary>
public object HandleOcr(Request req, string engine)
{
var tmpPath = Path.Combine(Path.GetTempPath(), $"ocr_{Guid.NewGuid():N}.png");
try
{
using var bitmap = ScreenCapture.CaptureOrLoad(req.File, req.Region);
bitmap.Save(tmpPath, SdImageFormat.Png);
return OcrFromFile(tmpPath, engine);
}
finally
{
try { File.Delete(tmpPath); } catch { /* ignore */ }
}
}
/// <summary>
/// Run OCR on an already-saved image file via the Python engine.
/// </summary>
public OcrResponse OcrFromFile(string imagePath, string engine, OcrParams? ocrParams = null)
{
EnsureRunning();
var pyReq = BuildPythonRequest(engine, ocrParams);
pyReq["imagePath"] = imagePath;
return SendPythonRequest(pyReq);
}
/// <summary>
/// Run OCR on a bitmap via the Python engine (base64 PNG over pipe, no temp file).
/// </summary>
public OcrResponse OcrFromBitmap(Bitmap bitmap, string engine, OcrParams? ocrParams = null)
{
EnsureRunning();
using var ms = new MemoryStream();
bitmap.Save(ms, SdImageFormat.Png);
var imageBase64 = Convert.ToBase64String(ms.ToArray());
var pyReq = BuildPythonRequest(engine, ocrParams);
pyReq["imageBase64"] = imageBase64;
return SendPythonRequest(pyReq);
}
private static Dictionary<string, object?> BuildPythonRequest(string engine, OcrParams? ocrParams)
{
var req = new Dictionary<string, object?> { ["cmd"] = "ocr", ["engine"] = engine };
if (ocrParams == null) return req;
if (ocrParams.MergeGap > 0) req["mergeGap"] = ocrParams.MergeGap;
if (ocrParams.LinkThreshold.HasValue) req["linkThreshold"] = ocrParams.LinkThreshold.Value;
if (ocrParams.TextThreshold.HasValue) req["textThreshold"] = ocrParams.TextThreshold.Value;
if (ocrParams.LowText.HasValue) req["lowText"] = ocrParams.LowText.Value;
if (ocrParams.WidthThs.HasValue) req["widthThs"] = ocrParams.WidthThs.Value;
if (ocrParams.Paragraph.HasValue) req["paragraph"] = ocrParams.Paragraph.Value;
return req;
}
private OcrResponse SendPythonRequest(object pyReq)
{
var json = JsonSerializer.Serialize(pyReq, JsonOptions);
string responseLine;
lock (_lock)
{
_proc!.StandardInput.WriteLine(json);
_proc.StandardInput.Flush();
responseLine = _proc.StandardOutput.ReadLine()
?? throw new Exception("Python daemon returned null");
}
var resp = JsonSerializer.Deserialize<PythonResponse>(responseLine, JsonOptions);
if (resp == null)
throw new Exception("Failed to parse Python OCR response");
if (!resp.Ok)
throw new Exception(resp.Error ?? "Python OCR failed");
return new OcrResponse
{
Text = resp.Text ?? "",
Lines = resp.Lines ?? [],
};
}
private void EnsureRunning()
{
if (_proc != null && !_proc.HasExited)
return;
_proc?.Dispose();
_proc = null;
if (!File.Exists(_daemonScript))
throw new Exception($"Python OCR daemon not found at {_daemonScript}");
Console.Error.WriteLine($"Spawning Python OCR daemon: {_pythonExe} {_daemonScript}");
_proc = new Process
{
StartInfo = new ProcessStartInfo
{
FileName = _pythonExe,
Arguments = $"\"{_daemonScript}\"",
UseShellExecute = false,
RedirectStandardInput = true,
RedirectStandardOutput = true,
RedirectStandardError = true,
CreateNoWindow = true,
}
};
_proc.ErrorDataReceived += (_, e) =>
{
if (!string.IsNullOrEmpty(e.Data))
Console.Error.WriteLine($"[python-ocr] {e.Data}");
};
_proc.Start();
_proc.BeginErrorReadLine();
// Wait for ready signal (up to 30s for first model load)
var readyLine = _proc.StandardOutput.ReadLine();
if (readyLine == null)
throw new Exception("Python OCR daemon exited before ready signal");
var ready = JsonSerializer.Deserialize<PythonResponse>(readyLine, JsonOptions);
if (ready?.Ready != true)
throw new Exception($"Python OCR daemon did not send ready signal: {readyLine}");
Console.Error.WriteLine("Python OCR daemon ready");
}
public void Dispose()
{
if (_proc != null && !_proc.HasExited)
{
try
{
_proc.StandardInput.Close();
_proc.WaitForExit(3000);
if (!_proc.HasExited) _proc.Kill();
}
catch { /* ignore */ }
}
_proc?.Dispose();
_proc = null;
}
private class PythonResponse
{
[JsonPropertyName("ok")]
public bool Ok { get; set; }
[JsonPropertyName("ready")]
public bool? Ready { get; set; }
[JsonPropertyName("text")]
public string? Text { get; set; }
[JsonPropertyName("lines")]
public List<OcrLineResult>? Lines { get; set; }
[JsonPropertyName("error")]
public string? Error { get; set; }
}
}