added easyOCR
This commit is contained in:
parent
37d6678577
commit
9f208b0606
27 changed files with 1780 additions and 112 deletions
193
tools/OcrDaemon/PythonOcrBridge.cs
Normal file
193
tools/OcrDaemon/PythonOcrBridge.cs
Normal file
|
|
@ -0,0 +1,193 @@
|
|||
namespace OcrDaemon;
|
||||
|
||||
using System.Diagnostics;
|
||||
using System.Drawing;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
using SdImageFormat = System.Drawing.Imaging.ImageFormat;
|
||||
|
||||
/// <summary>
|
||||
/// Manages a persistent Python subprocess for EasyOCR / PaddleOCR.
|
||||
/// Lazy-starts on first request; reuses the process for subsequent calls.
|
||||
/// Same stdin/stdout JSON-per-line protocol as the C# daemon itself.
|
||||
/// </summary>
|
||||
class PythonOcrBridge : IDisposable
|
||||
{
|
||||
private static readonly JsonSerializerOptions JsonOptions = new()
|
||||
{
|
||||
PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
|
||||
};
|
||||
|
||||
private Process? _proc;
|
||||
private readonly string _daemonScript;
|
||||
private readonly string _pythonExe;
|
||||
private readonly object _lock = new();
|
||||
|
||||
public PythonOcrBridge()
|
||||
{
|
||||
// Resolve paths relative to this exe
|
||||
var exeDir = AppContext.BaseDirectory;
|
||||
// exeDir = tools/OcrDaemon/bin/Release/net8.0-.../
|
||||
// Walk up 4 levels to tools/
|
||||
var toolsDir = Path.GetFullPath(Path.Combine(exeDir, "..", "..", "..", ".."));
|
||||
_daemonScript = Path.GetFullPath(Path.Combine(toolsDir, "python-ocr", "daemon.py"));
|
||||
|
||||
// Use the venv Python if it exists, otherwise fall back to system python
|
||||
var venvPython = Path.GetFullPath(Path.Combine(toolsDir, "python-ocr", ".venv", "Scripts", "python.exe"));
|
||||
_pythonExe = File.Exists(venvPython) ? venvPython : "python";
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Run OCR on a screen region using the specified Python engine.
|
||||
/// Captures screenshot, saves to temp file, sends to Python, returns OcrResponse.
|
||||
/// </summary>
|
||||
public object HandleOcr(Request req, string engine)
|
||||
{
|
||||
var tmpPath = Path.Combine(Path.GetTempPath(), $"ocr_{Guid.NewGuid():N}.png");
|
||||
try
|
||||
{
|
||||
using var bitmap = ScreenCapture.CaptureOrLoad(req.File, req.Region);
|
||||
bitmap.Save(tmpPath, SdImageFormat.Png);
|
||||
return OcrFromFile(tmpPath, engine);
|
||||
}
|
||||
finally
|
||||
{
|
||||
try { File.Delete(tmpPath); } catch { /* ignore */ }
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Run OCR on an already-saved image file via the Python engine.
|
||||
/// </summary>
|
||||
public OcrResponse OcrFromFile(string imagePath, string engine)
|
||||
{
|
||||
EnsureRunning();
|
||||
|
||||
var pyReq = new { cmd = "ocr", engine, imagePath };
|
||||
return SendPythonRequest(pyReq);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Run OCR on a bitmap via the Python engine (base64 PNG over pipe, no temp file).
|
||||
/// </summary>
|
||||
public OcrResponse OcrFromBitmap(Bitmap bitmap, string engine)
|
||||
{
|
||||
EnsureRunning();
|
||||
|
||||
using var ms = new MemoryStream();
|
||||
bitmap.Save(ms, SdImageFormat.Png);
|
||||
var imageBase64 = Convert.ToBase64String(ms.ToArray());
|
||||
|
||||
var pyReq = new { cmd = "ocr", engine, imageBase64 };
|
||||
return SendPythonRequest(pyReq);
|
||||
}
|
||||
|
||||
private OcrResponse SendPythonRequest(object pyReq)
|
||||
{
|
||||
var json = JsonSerializer.Serialize(pyReq, JsonOptions);
|
||||
|
||||
string responseLine;
|
||||
lock (_lock)
|
||||
{
|
||||
_proc!.StandardInput.WriteLine(json);
|
||||
_proc.StandardInput.Flush();
|
||||
responseLine = _proc.StandardOutput.ReadLine()
|
||||
?? throw new Exception("Python daemon returned null");
|
||||
}
|
||||
|
||||
var resp = JsonSerializer.Deserialize<PythonResponse>(responseLine, JsonOptions);
|
||||
if (resp == null)
|
||||
throw new Exception("Failed to parse Python OCR response");
|
||||
if (!resp.Ok)
|
||||
throw new Exception(resp.Error ?? "Python OCR failed");
|
||||
|
||||
return new OcrResponse
|
||||
{
|
||||
Text = resp.Text ?? "",
|
||||
Lines = resp.Lines ?? [],
|
||||
};
|
||||
}
|
||||
|
||||
private void EnsureRunning()
|
||||
{
|
||||
if (_proc != null && !_proc.HasExited)
|
||||
return;
|
||||
|
||||
_proc?.Dispose();
|
||||
_proc = null;
|
||||
|
||||
if (!File.Exists(_daemonScript))
|
||||
throw new Exception($"Python OCR daemon not found at {_daemonScript}");
|
||||
|
||||
Console.Error.WriteLine($"Spawning Python OCR daemon: {_pythonExe} {_daemonScript}");
|
||||
|
||||
_proc = new Process
|
||||
{
|
||||
StartInfo = new ProcessStartInfo
|
||||
{
|
||||
FileName = _pythonExe,
|
||||
Arguments = $"\"{_daemonScript}\"",
|
||||
UseShellExecute = false,
|
||||
RedirectStandardInput = true,
|
||||
RedirectStandardOutput = true,
|
||||
RedirectStandardError = true,
|
||||
CreateNoWindow = true,
|
||||
}
|
||||
};
|
||||
|
||||
_proc.ErrorDataReceived += (_, e) =>
|
||||
{
|
||||
if (!string.IsNullOrEmpty(e.Data))
|
||||
Console.Error.WriteLine($"[python-ocr] {e.Data}");
|
||||
};
|
||||
|
||||
_proc.Start();
|
||||
_proc.BeginErrorReadLine();
|
||||
|
||||
// Wait for ready signal (up to 30s for first model load)
|
||||
var readyLine = _proc.StandardOutput.ReadLine();
|
||||
if (readyLine == null)
|
||||
throw new Exception("Python OCR daemon exited before ready signal");
|
||||
|
||||
var ready = JsonSerializer.Deserialize<PythonResponse>(readyLine, JsonOptions);
|
||||
if (ready?.Ready != true)
|
||||
throw new Exception($"Python OCR daemon did not send ready signal: {readyLine}");
|
||||
|
||||
Console.Error.WriteLine("Python OCR daemon ready");
|
||||
}
|
||||
|
||||
public void Dispose()
|
||||
{
|
||||
if (_proc != null && !_proc.HasExited)
|
||||
{
|
||||
try
|
||||
{
|
||||
_proc.StandardInput.Close();
|
||||
_proc.WaitForExit(3000);
|
||||
if (!_proc.HasExited) _proc.Kill();
|
||||
}
|
||||
catch { /* ignore */ }
|
||||
}
|
||||
_proc?.Dispose();
|
||||
_proc = null;
|
||||
}
|
||||
|
||||
private class PythonResponse
|
||||
{
|
||||
[JsonPropertyName("ok")]
|
||||
public bool Ok { get; set; }
|
||||
|
||||
[JsonPropertyName("ready")]
|
||||
public bool? Ready { get; set; }
|
||||
|
||||
[JsonPropertyName("text")]
|
||||
public string? Text { get; set; }
|
||||
|
||||
[JsonPropertyName("lines")]
|
||||
public List<OcrLineResult>? Lines { get; set; }
|
||||
|
||||
[JsonPropertyName("error")]
|
||||
public string? Error { get; set; }
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue