210 lines
7 KiB
C#
210 lines
7 KiB
C#
namespace OcrDaemon;
|
|
|
|
using System.Diagnostics;
|
|
using System.Drawing;
|
|
using System.Text.Json;
|
|
using System.Text.Json.Serialization;
|
|
using SdImageFormat = System.Drawing.Imaging.ImageFormat;
|
|
|
|
/// <summary>
|
|
/// Manages a persistent Python subprocess for EasyOCR / PaddleOCR.
|
|
/// Lazy-starts on first request; reuses the process for subsequent calls.
|
|
/// Same stdin/stdout JSON-per-line protocol as the C# daemon itself.
|
|
/// </summary>
|
|
class PythonOcrBridge : IDisposable
|
|
{
|
|
private static readonly JsonSerializerOptions JsonOptions = new()
|
|
{
|
|
PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
|
|
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
|
|
};
|
|
|
|
private Process? _proc;
|
|
private readonly string _daemonScript;
|
|
private readonly string _pythonExe;
|
|
private readonly object _lock = new();
|
|
|
|
public PythonOcrBridge()
|
|
{
|
|
// Resolve paths relative to this exe
|
|
var exeDir = AppContext.BaseDirectory;
|
|
// exeDir = tools/OcrDaemon/bin/Release/net8.0-.../
|
|
// Walk up 4 levels to tools/
|
|
var toolsDir = Path.GetFullPath(Path.Combine(exeDir, "..", "..", "..", ".."));
|
|
_daemonScript = Path.GetFullPath(Path.Combine(toolsDir, "python-ocr", "daemon.py"));
|
|
|
|
// Use the venv Python if it exists, otherwise fall back to system python
|
|
var venvPython = Path.GetFullPath(Path.Combine(toolsDir, "python-ocr", ".venv", "Scripts", "python.exe"));
|
|
_pythonExe = File.Exists(venvPython) ? venvPython : "python";
|
|
}
|
|
|
|
/// <summary>
|
|
/// Run OCR on a screen region using the specified Python engine.
|
|
/// Captures screenshot, saves to temp file, sends to Python, returns OcrResponse.
|
|
/// </summary>
|
|
public object HandleOcr(Request req, string engine)
|
|
{
|
|
var tmpPath = Path.Combine(Path.GetTempPath(), $"ocr_{Guid.NewGuid():N}.png");
|
|
try
|
|
{
|
|
using var bitmap = ScreenCapture.CaptureOrLoad(req.File, req.Region);
|
|
bitmap.Save(tmpPath, SdImageFormat.Png);
|
|
return OcrFromFile(tmpPath, engine);
|
|
}
|
|
finally
|
|
{
|
|
try { File.Delete(tmpPath); } catch { /* ignore */ }
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Run OCR on an already-saved image file via the Python engine.
|
|
/// </summary>
|
|
public OcrResponse OcrFromFile(string imagePath, string engine, OcrParams? ocrParams = null)
|
|
{
|
|
EnsureRunning();
|
|
|
|
var pyReq = BuildPythonRequest(engine, ocrParams);
|
|
pyReq["imagePath"] = imagePath;
|
|
return SendPythonRequest(pyReq);
|
|
}
|
|
|
|
/// <summary>
|
|
/// Run OCR on a bitmap via the Python engine (base64 PNG over pipe, no temp file).
|
|
/// </summary>
|
|
public OcrResponse OcrFromBitmap(Bitmap bitmap, string engine, OcrParams? ocrParams = null)
|
|
{
|
|
EnsureRunning();
|
|
|
|
using var ms = new MemoryStream();
|
|
bitmap.Save(ms, SdImageFormat.Png);
|
|
var imageBase64 = Convert.ToBase64String(ms.ToArray());
|
|
|
|
var pyReq = BuildPythonRequest(engine, ocrParams);
|
|
pyReq["imageBase64"] = imageBase64;
|
|
return SendPythonRequest(pyReq);
|
|
}
|
|
|
|
private static Dictionary<string, object?> BuildPythonRequest(string engine, OcrParams? ocrParams)
|
|
{
|
|
var req = new Dictionary<string, object?> { ["cmd"] = "ocr", ["engine"] = engine };
|
|
if (ocrParams == null) return req;
|
|
|
|
if (ocrParams.MergeGap > 0) req["mergeGap"] = ocrParams.MergeGap;
|
|
if (ocrParams.LinkThreshold.HasValue) req["linkThreshold"] = ocrParams.LinkThreshold.Value;
|
|
if (ocrParams.TextThreshold.HasValue) req["textThreshold"] = ocrParams.TextThreshold.Value;
|
|
if (ocrParams.LowText.HasValue) req["lowText"] = ocrParams.LowText.Value;
|
|
if (ocrParams.WidthThs.HasValue) req["widthThs"] = ocrParams.WidthThs.Value;
|
|
if (ocrParams.Paragraph.HasValue) req["paragraph"] = ocrParams.Paragraph.Value;
|
|
|
|
return req;
|
|
}
|
|
|
|
private OcrResponse SendPythonRequest(object pyReq)
|
|
{
|
|
var json = JsonSerializer.Serialize(pyReq, JsonOptions);
|
|
|
|
string responseLine;
|
|
lock (_lock)
|
|
{
|
|
_proc!.StandardInput.WriteLine(json);
|
|
_proc.StandardInput.Flush();
|
|
responseLine = _proc.StandardOutput.ReadLine()
|
|
?? throw new Exception("Python daemon returned null");
|
|
}
|
|
|
|
var resp = JsonSerializer.Deserialize<PythonResponse>(responseLine, JsonOptions);
|
|
if (resp == null)
|
|
throw new Exception("Failed to parse Python OCR response");
|
|
if (!resp.Ok)
|
|
throw new Exception(resp.Error ?? "Python OCR failed");
|
|
|
|
return new OcrResponse
|
|
{
|
|
Text = resp.Text ?? "",
|
|
Lines = resp.Lines ?? [],
|
|
};
|
|
}
|
|
|
|
private void EnsureRunning()
|
|
{
|
|
if (_proc != null && !_proc.HasExited)
|
|
return;
|
|
|
|
_proc?.Dispose();
|
|
_proc = null;
|
|
|
|
if (!File.Exists(_daemonScript))
|
|
throw new Exception($"Python OCR daemon not found at {_daemonScript}");
|
|
|
|
Console.Error.WriteLine($"Spawning Python OCR daemon: {_pythonExe} {_daemonScript}");
|
|
|
|
_proc = new Process
|
|
{
|
|
StartInfo = new ProcessStartInfo
|
|
{
|
|
FileName = _pythonExe,
|
|
Arguments = $"\"{_daemonScript}\"",
|
|
UseShellExecute = false,
|
|
RedirectStandardInput = true,
|
|
RedirectStandardOutput = true,
|
|
RedirectStandardError = true,
|
|
CreateNoWindow = true,
|
|
}
|
|
};
|
|
|
|
_proc.ErrorDataReceived += (_, e) =>
|
|
{
|
|
if (!string.IsNullOrEmpty(e.Data))
|
|
Console.Error.WriteLine($"[python-ocr] {e.Data}");
|
|
};
|
|
|
|
_proc.Start();
|
|
_proc.BeginErrorReadLine();
|
|
|
|
// Wait for ready signal (up to 30s for first model load)
|
|
var readyLine = _proc.StandardOutput.ReadLine();
|
|
if (readyLine == null)
|
|
throw new Exception("Python OCR daemon exited before ready signal");
|
|
|
|
var ready = JsonSerializer.Deserialize<PythonResponse>(readyLine, JsonOptions);
|
|
if (ready?.Ready != true)
|
|
throw new Exception($"Python OCR daemon did not send ready signal: {readyLine}");
|
|
|
|
Console.Error.WriteLine("Python OCR daemon ready");
|
|
}
|
|
|
|
public void Dispose()
|
|
{
|
|
if (_proc != null && !_proc.HasExited)
|
|
{
|
|
try
|
|
{
|
|
_proc.StandardInput.Close();
|
|
_proc.WaitForExit(3000);
|
|
if (!_proc.HasExited) _proc.Kill();
|
|
}
|
|
catch { /* ignore */ }
|
|
}
|
|
_proc?.Dispose();
|
|
_proc = null;
|
|
}
|
|
|
|
private class PythonResponse
|
|
{
|
|
[JsonPropertyName("ok")]
|
|
public bool Ok { get; set; }
|
|
|
|
[JsonPropertyName("ready")]
|
|
public bool? Ready { get; set; }
|
|
|
|
[JsonPropertyName("text")]
|
|
public string? Text { get; set; }
|
|
|
|
[JsonPropertyName("lines")]
|
|
public List<OcrLineResult>? Lines { get; set; }
|
|
|
|
[JsonPropertyName("error")]
|
|
public string? Error { get; set; }
|
|
}
|
|
}
|