work on OCR

This commit is contained in:
Boki 2026-02-11 17:42:28 -05:00
parent 6600969947
commit 854a474435
13 changed files with 4374 additions and 38 deletions

View file

@ -25,6 +25,20 @@ static class Daemon
tessEngine = new TesseractEngine(tessdataPath, tessLang, EngineMode.LstmOnly);
tessEngine.DefaultPageSegMode = PageSegMode.SingleBlock;
tessEngine.SetVariable("preserve_interword_spaces", "1");
var userWordsPath = Path.Combine(tessdataPath, $"{tessLang}.user-words");
var userPatternsPath = Path.Combine(tessdataPath, $"{tessLang}.user-patterns");
if (File.Exists(userWordsPath))
{
tessEngine.SetVariable("user_words_file", userWordsPath);
var lineCount = File.ReadAllLines(userWordsPath).Length;
Console.Error.WriteLine($"Loaded user-words: {lineCount} words from {userWordsPath}");
}
if (File.Exists(userPatternsPath))
{
tessEngine.SetVariable("user_patterns_file", userPatternsPath);
var lineCount = File.ReadAllLines(userPatternsPath).Length;
Console.Error.WriteLine($"Loaded user-patterns: {lineCount} patterns from {userPatternsPath}");
}
Console.Error.WriteLine($"Tesseract engine loaded with language: {tessLang}");
}
catch (Exception ex)