poe2-bot/tools/OcrDaemon/Daemon.cs
2026-02-12 09:29:10 -05:00

168 lines
6.9 KiB
C#

namespace OcrDaemon;
using System.Text.Json;
using System.Text.Json.Serialization;
using Tesseract;
static class Daemon
{
private static readonly JsonSerializerOptions JsonOptions = new()
{
PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
};
public static int Run()
{
ScreenCapture.InitDpiAwareness();
// Pre-create the Tesseract OCR engine (reused across all requests)
var tessdataPath = Path.Combine(AppContext.BaseDirectory, "tessdata");
var tessLang = File.Exists(Path.Combine(tessdataPath, "poe2.traineddata")) ? "poe2" : "eng";
TesseractEngine tessEngine;
try
{
tessEngine = new TesseractEngine(tessdataPath, tessLang, EngineMode.LstmOnly);
tessEngine.DefaultPageSegMode = PageSegMode.SingleBlock;
tessEngine.SetVariable("preserve_interword_spaces", "1");
var userWordsPath = Path.Combine(tessdataPath, $"{tessLang}.user-words");
var userPatternsPath = Path.Combine(tessdataPath, $"{tessLang}.user-patterns");
if (File.Exists(userWordsPath))
{
tessEngine.SetVariable("user_words_file", userWordsPath);
var lineCount = File.ReadAllLines(userWordsPath).Length;
Console.Error.WriteLine($"Loaded user-words: {lineCount} words from {userWordsPath}");
}
if (File.Exists(userPatternsPath))
{
tessEngine.SetVariable("user_patterns_file", userPatternsPath);
var lineCount = File.ReadAllLines(userPatternsPath).Length;
Console.Error.WriteLine($"Loaded user-patterns: {lineCount} patterns from {userPatternsPath}");
}
Console.Error.WriteLine($"Tesseract engine loaded with language: {tessLang}");
}
catch (Exception ex)
{
WriteResponse(new ErrorResponse($"Failed to create Tesseract engine: {ex.Message}. Ensure tessdata/eng.traineddata exists."));
return 1;
}
// Signal ready
WriteResponse(new ReadyResponse());
var ocrHandler = new OcrHandler(tessEngine);
var gridHandler = new GridHandler();
var detectGridHandler = new DetectGridHandler();
var templateMatchHandler = new TemplateMatchHandler();
var pythonBridge = new PythonOcrBridge();
// Main loop: read one JSON line, handle, write one JSON line
string? line;
while ((line = Console.In.ReadLine()) != null)
{
line = line.Trim();
if (line.Length == 0) continue;
try
{
var request = JsonSerializer.Deserialize<Request>(line, JsonOptions);
if (request == null)
{
WriteResponse(new ErrorResponse("Failed to parse request"));
continue;
}
object response = request.Cmd?.ToLowerInvariant() switch
{
"ocr" when request.Engine is "easyocr"
=> pythonBridge.HandleOcr(request, request.Engine),
"ocr" => ocrHandler.HandleOcr(request),
"screenshot" => ocrHandler.HandleScreenshot(request),
"capture" => ocrHandler.HandleCapture(request),
"snapshot" => ocrHandler.HandleSnapshot(request),
"diff-ocr" when request.Engine is "easyocr"
=> HandleDiffOcrPython(ocrHandler, pythonBridge, request),
"diff-ocr" => ocrHandler.HandleDiffOcr(request),
"test" => ocrHandler.HandleTest(request),
"tune" => ocrHandler.HandleTune(request),
"grid" => gridHandler.HandleGrid(request),
"detect-grid" => detectGridHandler.HandleDetectGrid(request),
"match-template" => templateMatchHandler.HandleTemplateMatch(request),
_ => new ErrorResponse($"Unknown command: {request.Cmd}"),
};
WriteResponse(response);
}
catch (Exception ex)
{
WriteResponse(new ErrorResponse(ex.Message));
}
}
pythonBridge.Dispose();
return 0;
}
private static object HandleDiffOcrPython(OcrHandler ocrHandler, PythonOcrBridge pythonBridge, Request request)
{
var sw = System.Diagnostics.Stopwatch.StartNew();
// Use default params (same wide crop as Tesseract path).
// Background subtraction below eliminates stash items from the image.
var p = new DiffOcrParams();
if (request.Threshold > 0) p.DiffThresh = request.Threshold;
var cropResult = ocrHandler.DiffCrop(request, p);
if (cropResult == null)
return new OcrResponse { Text = "", Lines = [] };
var (cropped, refCropped, current, region) = cropResult.Value;
using var _current = current;
// Apply background subtraction to isolate tooltip text.
// This removes stash items and game world — only tooltip text remains.
// No upscale (upscale=1) to keep the image small for EasyOCR speed.
// Hard threshold (softThreshold=false) produces clean binary for OCR.
using var processed = ImagePreprocessor.PreprocessWithBackgroundSub(
cropped, refCropped, dimPercentile: 40, textThresh: 60, upscale: 1, softThreshold: false);
cropped.Dispose();
refCropped.Dispose();
var diffMs = sw.ElapsedMilliseconds;
// Save processed crop if path provided
if (!string.IsNullOrEmpty(request.Path))
{
var dir = Path.GetDirectoryName(request.Path);
if (!string.IsNullOrEmpty(dir) && !Directory.Exists(dir))
Directory.CreateDirectory(dir);
processed.Save(request.Path, ImageUtils.GetImageFormat(request.Path));
}
// Send processed image to Python OCR via base64
sw.Restart();
var ocrResult = pythonBridge.OcrFromBitmap(processed, request.Engine!);
var ocrMs = sw.ElapsedMilliseconds;
Console.Error.WriteLine($" diff-ocr-python: diff={diffMs}ms ocr={ocrMs}ms total={diffMs + ocrMs}ms crop={region.Width}x{region.Height}");
// Offset word coordinates to screen space
foreach (var line in ocrResult.Lines)
foreach (var word in line.Words)
{
word.X += region.X;
word.Y += region.Y;
}
return new DiffOcrResponse
{
Text = ocrResult.Text,
Lines = ocrResult.Lines,
Region = region,
};
}
private static void WriteResponse(object response)
{
var json = JsonSerializer.Serialize(response, JsonOptions);
Console.Out.WriteLine(json);
Console.Out.Flush();
}
}