work on OCR
This commit is contained in:
parent
6600969947
commit
854a474435
13 changed files with 4374 additions and 38 deletions
|
|
@ -11,7 +11,8 @@
|
|||
"start": "node dist/index.js",
|
||||
"stop:daemon": "taskkill /IM OcrDaemon.exe /F 2>nul || exit /b 0",
|
||||
"test:ocr": "taskkill /IM OcrDaemon.exe /F 2>nul & dotnet build tools/OcrDaemon -c Release && echo {\"cmd\":\"test\"} | tools\\OcrDaemon\\bin\\Release\\net8.0-windows10.0.19041.0\\OcrDaemon.exe",
|
||||
"tune:ocr": "taskkill /IM OcrDaemon.exe /F 2>nul & dotnet build tools/OcrDaemon -c Release && echo {\"cmd\":\"tune\"} | tools\\OcrDaemon\\bin\\Release\\net8.0-windows10.0.19041.0\\OcrDaemon.exe"
|
||||
"tune:ocr": "taskkill /IM OcrDaemon.exe /F 2>nul & dotnet build tools/OcrDaemon -c Release && echo {\"cmd\":\"tune\"} | tools\\OcrDaemon\\bin\\Release\\net8.0-windows10.0.19041.0\\OcrDaemon.exe",
|
||||
"generate:words": "node tools/OcrDaemon/tessdata/generate-words.mjs"
|
||||
},
|
||||
"dependencies": {
|
||||
"chokidar": "^4.0.3",
|
||||
|
|
|
|||
|
|
@ -25,6 +25,20 @@ static class Daemon
|
|||
tessEngine = new TesseractEngine(tessdataPath, tessLang, EngineMode.LstmOnly);
|
||||
tessEngine.DefaultPageSegMode = PageSegMode.SingleBlock;
|
||||
tessEngine.SetVariable("preserve_interword_spaces", "1");
|
||||
var userWordsPath = Path.Combine(tessdataPath, $"{tessLang}.user-words");
|
||||
var userPatternsPath = Path.Combine(tessdataPath, $"{tessLang}.user-patterns");
|
||||
if (File.Exists(userWordsPath))
|
||||
{
|
||||
tessEngine.SetVariable("user_words_file", userWordsPath);
|
||||
var lineCount = File.ReadAllLines(userWordsPath).Length;
|
||||
Console.Error.WriteLine($"Loaded user-words: {lineCount} words from {userWordsPath}");
|
||||
}
|
||||
if (File.Exists(userPatternsPath))
|
||||
{
|
||||
tessEngine.SetVariable("user_patterns_file", userPatternsPath);
|
||||
var lineCount = File.ReadAllLines(userPatternsPath).Length;
|
||||
Console.Error.WriteLine($"Loaded user-patterns: {lineCount} patterns from {userPatternsPath}");
|
||||
}
|
||||
Console.Error.WriteLine($"Tesseract engine loaded with language: {tessLang}");
|
||||
}
|
||||
catch (Exception ex)
|
||||
|
|
|
|||
|
|
@ -42,9 +42,10 @@ static class ImagePreprocessor
|
|||
/// Background-subtraction preprocessing: uses the reference frame to remove
|
||||
/// background bleed-through from the semi-transparent tooltip overlay.
|
||||
/// Pipeline: estimate dimming factor → subtract expected background → threshold → upscale
|
||||
/// Returns the upscaled binary Mat directly (caller must dispose).
|
||||
/// </summary>
|
||||
public static Bitmap PreprocessWithBackgroundSub(Bitmap tooltipCrop, Bitmap referenceCrop,
|
||||
int dimPercentile = 25, int textThresh = 30, int upscale = 2)
|
||||
public static Mat PreprocessWithBackgroundSubMat(Bitmap tooltipCrop, Bitmap referenceCrop,
|
||||
int dimPercentile = 25, int textThresh = 30, int upscale = 2, bool softThreshold = true)
|
||||
{
|
||||
using var curMat = BitmapConverter.ToMat(tooltipCrop);
|
||||
using var refMat = BitmapConverter.ToMat(referenceCrop);
|
||||
|
|
@ -77,7 +78,11 @@ static class ImagePreprocessor
|
|||
}
|
||||
|
||||
if (ratios.Count == 0)
|
||||
return PreprocessForOcr(tooltipCrop, 41, upscale); // fallback
|
||||
{
|
||||
// Fallback: use top-hat preprocessing, convert to Mat
|
||||
using var fallbackBmp = PreprocessForOcr(tooltipCrop, 41, upscale);
|
||||
return BitmapConverter.ToMat(fallbackBmp);
|
||||
}
|
||||
|
||||
// Use a low percentile of ratios as the dimming factor.
|
||||
// Text pixels have high ratios (bright on dark), overlay pixels have low ratios.
|
||||
|
|
@ -108,19 +113,122 @@ static class ImagePreprocessor
|
|||
}
|
||||
}
|
||||
|
||||
// Threshold: pixels above textThresh are text
|
||||
using var binary = new Mat();
|
||||
Cv2.Threshold(textSignal, binary, textThresh, 255, ThresholdTypes.BinaryInv);
|
||||
|
||||
// Upscale for better LSTM recognition
|
||||
if (upscale > 1)
|
||||
Mat result;
|
||||
if (softThreshold)
|
||||
{
|
||||
using var upscaled = new Mat();
|
||||
Cv2.Resize(binary, upscaled, new OpenCvSharp.Size(binary.Width * upscale, binary.Height * upscale),
|
||||
interpolation: InterpolationFlags.Cubic);
|
||||
return BitmapConverter.ToBitmap(upscaled);
|
||||
// Soft threshold: clip below textThresh, contrast-stretch, invert.
|
||||
// Produces grayscale anti-aliased text on white background,
|
||||
// matching the training data format (text2image renders).
|
||||
result = new Mat(rows, cols, MatType.CV_8UC1);
|
||||
unsafe
|
||||
{
|
||||
byte* srcPtr = (byte*)textSignal.Data;
|
||||
byte* dstPtr = (byte*)result.Data;
|
||||
int srcStep = (int)textSignal.Step();
|
||||
int dstStep = (int)result.Step();
|
||||
|
||||
// Find max signal above threshold for contrast stretch
|
||||
int maxClipped = 1;
|
||||
for (int y = 0; y < rows; y++)
|
||||
for (int x = 0; x < cols; x++)
|
||||
{
|
||||
int val = srcPtr[y * srcStep + x] - textThresh;
|
||||
if (val > maxClipped) maxClipped = val;
|
||||
}
|
||||
|
||||
// Clip, stretch, invert: background → 255 (white), text → dark
|
||||
for (int y = 0; y < rows; y++)
|
||||
for (int x = 0; x < cols; x++)
|
||||
{
|
||||
int clipped = srcPtr[y * srcStep + x] - textThresh;
|
||||
if (clipped <= 0)
|
||||
{
|
||||
dstPtr[y * dstStep + x] = 255; // background
|
||||
}
|
||||
else
|
||||
{
|
||||
int stretched = clipped * 255 / maxClipped;
|
||||
dstPtr[y * dstStep + x] = (byte)(255 - stretched); // invert
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Hard binary threshold (original behavior)
|
||||
result = new Mat();
|
||||
Cv2.Threshold(textSignal, result, textThresh, 255, ThresholdTypes.BinaryInv);
|
||||
}
|
||||
|
||||
return BitmapConverter.ToBitmap(binary);
|
||||
using var _result = result;
|
||||
return UpscaleMat(result, upscale);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Background-subtraction preprocessing returning a Bitmap (convenience wrapper).
|
||||
/// </summary>
|
||||
public static Bitmap PreprocessWithBackgroundSub(Bitmap tooltipCrop, Bitmap referenceCrop,
|
||||
int dimPercentile = 25, int textThresh = 30, int upscale = 2, bool softThreshold = true)
|
||||
{
|
||||
using var mat = PreprocessWithBackgroundSubMat(tooltipCrop, referenceCrop, dimPercentile, textThresh, upscale, softThreshold);
|
||||
return BitmapConverter.ToBitmap(mat);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Detect text lines via horizontal projection on a binary image.
|
||||
/// Binary should be inverted: text=black(0), background=white(255).
|
||||
/// Returns list of (yStart, yEnd) row ranges for each detected text line.
|
||||
/// </summary>
|
||||
public static List<(int yStart, int yEnd)> DetectTextLines(
|
||||
Mat binary, int minRowPixels = 2, int gapTolerance = 5)
|
||||
{
|
||||
int rows = binary.Rows, cols = binary.Cols;
|
||||
|
||||
// Count dark (text) pixels per row — use < 128 threshold since
|
||||
// cubic upscaling introduces anti-aliased intermediate values
|
||||
var rowCounts = new int[rows];
|
||||
unsafe
|
||||
{
|
||||
byte* ptr = (byte*)binary.Data;
|
||||
int step = (int)binary.Step();
|
||||
for (int y = 0; y < rows; y++)
|
||||
for (int x = 0; x < cols; x++)
|
||||
if (ptr[y * step + x] < 128)
|
||||
rowCounts[y]++;
|
||||
}
|
||||
|
||||
// Group into contiguous runs with gap tolerance
|
||||
var lines = new List<(int yStart, int yEnd)>();
|
||||
int lineStart = -1, lastActive = -1;
|
||||
for (int y = 0; y < rows; y++)
|
||||
{
|
||||
if (rowCounts[y] >= minRowPixels)
|
||||
{
|
||||
if (lineStart < 0) lineStart = y;
|
||||
lastActive = y;
|
||||
}
|
||||
else if (lineStart >= 0 && y - lastActive > gapTolerance)
|
||||
{
|
||||
lines.Add((lineStart, lastActive));
|
||||
lineStart = -1;
|
||||
}
|
||||
}
|
||||
if (lineStart >= 0)
|
||||
lines.Add((lineStart, lastActive));
|
||||
|
||||
return lines;
|
||||
}
|
||||
|
||||
/// <summary>Returns a new Mat (caller must dispose). Does NOT dispose src.</summary>
|
||||
private static Mat UpscaleMat(Mat src, int factor)
|
||||
{
|
||||
if (factor > 1)
|
||||
{
|
||||
var upscaled = new Mat();
|
||||
Cv2.Resize(src, upscaled, new OpenCvSharp.Size(src.Width * factor, src.Height * factor),
|
||||
interpolation: InterpolationFlags.Cubic);
|
||||
return upscaled;
|
||||
}
|
||||
return src.Clone();
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -241,12 +241,30 @@ class DiffOcrParams
|
|||
[JsonPropertyName("textThresh")]
|
||||
public int TextThresh { get; set; } = 60;
|
||||
|
||||
[JsonPropertyName("softThreshold")]
|
||||
public bool SoftThreshold { get; set; } = false;
|
||||
|
||||
[JsonPropertyName("ocrPad")]
|
||||
public int OcrPad { get; set; } = 10;
|
||||
|
||||
[JsonPropertyName("usePerLineOcr")]
|
||||
public bool UsePerLineOcr { get; set; } = false;
|
||||
|
||||
[JsonPropertyName("lineGapTolerance")]
|
||||
public int LineGapTolerance { get; set; } = 10;
|
||||
|
||||
[JsonPropertyName("linePadY")]
|
||||
public int LinePadY { get; set; } = 20;
|
||||
|
||||
[JsonPropertyName("psm")]
|
||||
public int Psm { get; set; } = 6;
|
||||
|
||||
public DiffOcrParams Clone() => (DiffOcrParams)MemberwiseClone();
|
||||
|
||||
public override string ToString() =>
|
||||
UseBackgroundSub
|
||||
? $"bgSub dimPct={DimPercentile} textThresh={TextThresh} diffThresh={DiffThresh} rowThreshDiv={RowThreshDiv} colThreshDiv={ColThreshDiv} maxGap={MaxGap} trimCutoff={TrimCutoff:F2} upscale={Upscale}"
|
||||
: $"topHat kernelSize={KernelSize} diffThresh={DiffThresh} rowThreshDiv={RowThreshDiv} colThreshDiv={ColThreshDiv} maxGap={MaxGap} trimCutoff={TrimCutoff:F2} upscale={Upscale}";
|
||||
? $"bgSub dimPct={DimPercentile} textThresh={TextThresh} soft={SoftThreshold} ocrPad={OcrPad} perLine={UsePerLineOcr} lineGap={LineGapTolerance} linePadY={LinePadY} psm={Psm} diffThresh={DiffThresh} rowThreshDiv={RowThreshDiv} colThreshDiv={ColThreshDiv} maxGap={MaxGap} trimCutoff={TrimCutoff:F2} upscale={Upscale}"
|
||||
: $"topHat kernelSize={KernelSize} ocrPad={OcrPad} perLine={UsePerLineOcr} lineGap={LineGapTolerance} linePadY={LinePadY} psm={Psm} diffThresh={DiffThresh} rowThreshDiv={RowThreshDiv} colThreshDiv={ColThreshDiv} maxGap={MaxGap} trimCutoff={TrimCutoff:F2} upscale={Upscale}";
|
||||
}
|
||||
|
||||
class TestCase
|
||||
|
|
|
|||
|
|
@ -26,6 +26,12 @@
|
|||
<None Update="tessdata\cases.json">
|
||||
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
|
||||
</None>
|
||||
<None Update="tessdata\poe2.user-words" Condition="Exists('tessdata\poe2.user-words')">
|
||||
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
|
||||
</None>
|
||||
<None Update="tessdata\poe2.user-patterns" Condition="Exists('tessdata\poe2.user-patterns')">
|
||||
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
|
||||
</None>
|
||||
<None Include="tessdata\images\*">
|
||||
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
|
||||
</None>
|
||||
|
|
|
|||
|
|
@ -4,6 +4,8 @@ using System.Drawing;
|
|||
using System.Drawing.Imaging;
|
||||
using System.Runtime.InteropServices;
|
||||
using System.Text.Json;
|
||||
using OpenCvSharp;
|
||||
using OpenCvSharp.Extensions;
|
||||
using Tesseract;
|
||||
using SdImageFormat = System.Drawing.Imaging.ImageFormat;
|
||||
|
||||
|
|
@ -188,11 +190,10 @@ class OcrHandler(TesseractEngine engine)
|
|||
return new OcrResponse { Text = "", Lines = [] };
|
||||
}
|
||||
|
||||
int pad = 0;
|
||||
int minX = Math.Max(bestColStart - pad, 0);
|
||||
int minY = Math.Max(bestRowStart - pad, 0);
|
||||
int maxX = Math.Min(bestColEnd + pad, w - 1);
|
||||
int maxY = Math.Min(bestRowEnd + pad, h - 1);
|
||||
int minX = bestColStart;
|
||||
int minY = bestRowStart;
|
||||
int maxX = Math.Min(bestColEnd, w - 1);
|
||||
int maxY = Math.Min(bestRowEnd, h - 1);
|
||||
|
||||
// Dynamic right-edge trim: if the rightmost columns are much sparser than
|
||||
// the tooltip body, trim them. This handles the ~5% of cases where ambient
|
||||
|
|
@ -232,10 +233,18 @@ class OcrHandler(TesseractEngine engine)
|
|||
if (debug) Console.Error.WriteLine($" diff-ocr: saved raw to {req.Path}");
|
||||
}
|
||||
|
||||
// Pre-process for OCR
|
||||
using var processed = p.UseBackgroundSub
|
||||
? ImagePreprocessor.PreprocessWithBackgroundSub(cropped, refCropped, p.DimPercentile, p.TextThresh, p.Upscale)
|
||||
: ImagePreprocessor.PreprocessForOcr(cropped, p.KernelSize, p.Upscale);
|
||||
// Pre-process for OCR — get Mat for per-line detection and padding
|
||||
Mat processedMat;
|
||||
if (p.UseBackgroundSub)
|
||||
{
|
||||
processedMat = ImagePreprocessor.PreprocessWithBackgroundSubMat(cropped, refCropped, p.DimPercentile, p.TextThresh, p.Upscale, p.SoftThreshold);
|
||||
}
|
||||
else
|
||||
{
|
||||
using var topHatBmp = ImagePreprocessor.PreprocessForOcr(cropped, p.KernelSize, p.Upscale);
|
||||
processedMat = BitmapConverter.ToMat(topHatBmp);
|
||||
}
|
||||
using var _processedMat = processedMat; // ensure disposal
|
||||
|
||||
// Save fullscreen and preprocessed versions alongside raw
|
||||
if (!string.IsNullOrEmpty(req.Path))
|
||||
|
|
@ -246,21 +255,114 @@ class OcrHandler(TesseractEngine engine)
|
|||
if (debug) Console.Error.WriteLine($" diff-ocr: saved fullscreen to {fullPath}");
|
||||
|
||||
var prePath = Path.ChangeExtension(req.Path, ".pre" + ext);
|
||||
processed.Save(prePath, ImageUtils.GetImageFormat(prePath));
|
||||
using var preBmp = BitmapConverter.ToBitmap(processedMat);
|
||||
preBmp.Save(prePath, ImageUtils.GetImageFormat(prePath));
|
||||
if (debug) Console.Error.WriteLine($" diff-ocr: saved preprocessed to {prePath}");
|
||||
}
|
||||
using var pix = ImageUtils.BitmapToPix(processed);
|
||||
using var page = engine.Process(pix);
|
||||
|
||||
var text = page.GetText();
|
||||
var lines = ImageUtils.ExtractLinesFromPage(page, offsetX: minX, offsetY: minY);
|
||||
int pad = p.OcrPad;
|
||||
int upscale = p.Upscale > 0 ? p.Upscale : 1;
|
||||
var lines = new List<OcrLineResult>();
|
||||
|
||||
return new DiffOcrResponse
|
||||
// Per-line OCR: detect text lines via horizontal projection, OCR each individually
|
||||
if (p.UsePerLineOcr)
|
||||
{
|
||||
Text = text,
|
||||
Lines = lines,
|
||||
Region = new RegionRect { X = minX, Y = minY, Width = rw, Height = rh },
|
||||
};
|
||||
// DetectTextLines needs binary input; if soft threshold produced grayscale, binarize a copy
|
||||
int minRowPx = Math.Max(processedMat.Cols / 200, 3);
|
||||
using var detectionMat = p.SoftThreshold ? new Mat() : null;
|
||||
if (p.SoftThreshold)
|
||||
Cv2.Threshold(processedMat, detectionMat!, 128, 255, ThresholdTypes.Binary);
|
||||
var lineDetectInput = p.SoftThreshold ? detectionMat! : processedMat;
|
||||
var textLines = ImagePreprocessor.DetectTextLines(lineDetectInput, minRowPixels: minRowPx, gapTolerance: p.LineGapTolerance * upscale);
|
||||
if (debug) Console.Error.WriteLine($" diff-ocr: detected {textLines.Count} text lines");
|
||||
|
||||
if (textLines.Count > 0)
|
||||
{
|
||||
int linePadY = p.LinePadY;
|
||||
foreach (var (yStart, yEnd) in textLines)
|
||||
{
|
||||
int y0 = Math.Max(yStart - linePadY, 0);
|
||||
int y1 = Math.Min(yEnd + linePadY, processedMat.Rows - 1);
|
||||
int lineH = y1 - y0 + 1;
|
||||
|
||||
// Crop line strip (full width)
|
||||
using var lineStrip = new Mat(processedMat, new OpenCvSharp.Rect(0, y0, processedMat.Cols, lineH));
|
||||
|
||||
// Add whitespace padding around the line
|
||||
using var padded = new Mat();
|
||||
Cv2.CopyMakeBorder(lineStrip, padded, pad, pad, pad, pad, BorderTypes.Constant, Scalar.White);
|
||||
|
||||
using var lineBmp = BitmapConverter.ToBitmap(padded);
|
||||
using var linePix = ImageUtils.BitmapToPix(lineBmp);
|
||||
using var linePage = engine.Process(linePix, (PageSegMode)p.Psm);
|
||||
|
||||
// Extract words, adjusting coordinates back to screen space
|
||||
// Word coords are in padded image space → subtract pad, add line offset, scale to original, add region offset
|
||||
var lineWords = new List<OcrWordResult>();
|
||||
using var iter = linePage.GetIterator();
|
||||
if (iter != null)
|
||||
{
|
||||
iter.Begin();
|
||||
do
|
||||
{
|
||||
var wordText = iter.GetText(PageIteratorLevel.Word);
|
||||
if (string.IsNullOrWhiteSpace(wordText)) continue;
|
||||
|
||||
float conf = iter.GetConfidence(PageIteratorLevel.Word);
|
||||
if (conf < 50) continue;
|
||||
|
||||
if (iter.TryGetBoundingBox(PageIteratorLevel.Word, out var bounds))
|
||||
{
|
||||
lineWords.Add(new OcrWordResult
|
||||
{
|
||||
Text = wordText.Trim(),
|
||||
X = (bounds.X1 - pad + 0) / upscale + minX,
|
||||
Y = (bounds.Y1 - pad + y0) / upscale + minY,
|
||||
Width = bounds.Width / upscale,
|
||||
Height = bounds.Height / upscale,
|
||||
});
|
||||
}
|
||||
} while (iter.Next(PageIteratorLevel.TextLine, PageIteratorLevel.Word));
|
||||
}
|
||||
|
||||
if (lineWords.Count > 0)
|
||||
{
|
||||
var lineText = string.Join(" ", lineWords.Select(w => w.Text));
|
||||
lines.Add(new OcrLineResult { Text = lineText, Words = lineWords });
|
||||
}
|
||||
}
|
||||
|
||||
var text = string.Join("\n", lines.Select(l => l.Text)) + "\n";
|
||||
return new DiffOcrResponse
|
||||
{
|
||||
Text = text,
|
||||
Lines = lines,
|
||||
Region = new RegionRect { X = minX, Y = minY, Width = rw, Height = rh },
|
||||
};
|
||||
}
|
||||
|
||||
if (debug) Console.Error.WriteLine(" diff-ocr: no text lines detected, falling back to whole-block OCR");
|
||||
}
|
||||
|
||||
// Whole-block fallback: add padding and use configurable PSM
|
||||
{
|
||||
using var padded = new Mat();
|
||||
Cv2.CopyMakeBorder(processedMat, padded, pad, pad, pad, pad, BorderTypes.Constant, Scalar.White);
|
||||
using var bmp = BitmapConverter.ToBitmap(padded);
|
||||
using var pix = ImageUtils.BitmapToPix(bmp);
|
||||
using var page = engine.Process(pix, (PageSegMode)p.Psm);
|
||||
|
||||
var text = page.GetText();
|
||||
// Adjust word coordinates: subtract padding offset
|
||||
lines = ImageUtils.ExtractLinesFromPage(page, offsetX: minX - pad / upscale, offsetY: minY - pad / upscale);
|
||||
|
||||
return new DiffOcrResponse
|
||||
{
|
||||
Text = text,
|
||||
Lines = lines,
|
||||
Region = new RegionRect { X = minX, Y = minY, Width = rw, Height = rh },
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
public object HandleTest(Request req) => RunTestCases(new DiffOcrParams(), verbose: true);
|
||||
|
|
@ -314,6 +416,8 @@ class OcrHandler(TesseractEngine engine)
|
|||
("colThreshDiv", [5, 8, 10, 12, 15, 20, 25, 30], (p, v) => p.ColThreshDiv = v),
|
||||
("maxGap", [5, 8, 10, 12, 15, 20, 25, 30], (p, v) => p.MaxGap = v),
|
||||
("upscale", [1, 2, 3], (p, v) => p.Upscale = v),
|
||||
("ocrPad", [0, 5, 10, 15, 20, 30], (p, v) => p.OcrPad = v),
|
||||
("psm", [4, 6, 11, 13], (p, v) => p.Psm = v),
|
||||
};
|
||||
|
||||
// Top-hat specific
|
||||
|
|
@ -325,8 +429,10 @@ class OcrHandler(TesseractEngine engine)
|
|||
// Background-subtraction specific
|
||||
var bgSubSweeps = new (string Name, int[] Values, Action<DiffOcrParams, int> Set)[]
|
||||
{
|
||||
("dimPercentile", [5, 10, 15, 20, 25, 30, 40, 50], (p, v) => p.DimPercentile = v),
|
||||
("textThresh", [10, 15, 20, 25, 30, 40, 50, 60, 80], (p, v) => p.TextThresh = v),
|
||||
("dimPercentile", [5, 10, 15, 20, 25, 30, 40, 50], (p, v) => p.DimPercentile = v),
|
||||
("textThresh", [10, 15, 20, 25, 30, 40, 50, 60, 80], (p, v) => p.TextThresh = v),
|
||||
("lineGapTolerance", [3, 5, 8, 10, 15], (p, v) => p.LineGapTolerance = v),
|
||||
("linePadY", [5, 10, 15, 20], (p, v) => p.LinePadY = v),
|
||||
};
|
||||
|
||||
double[] trimValues = [0.1, 0.15, 0.2, 0.25, 0.3, 0.4, 0.5];
|
||||
|
|
|
|||
166
tools/OcrDaemon/tessdata/generate-words.mjs
Normal file
166
tools/OcrDaemon/tessdata/generate-words.mjs
Normal file
|
|
@ -0,0 +1,166 @@
|
|||
#!/usr/bin/env node
|
||||
/**
|
||||
* Fetches POE2 trade API data and generates Tesseract user-words and user-patterns
|
||||
* files to improve OCR accuracy for tooltip text.
|
||||
*
|
||||
* Usage: node generate-words.mjs
|
||||
* Output: poe2.user-words, poe2.user-patterns (in same directory)
|
||||
*/
|
||||
|
||||
import { writeFileSync } from "fs";
|
||||
import { dirname, join } from "path";
|
||||
import { fileURLToPath } from "url";
|
||||
|
||||
const __dirname = dirname(fileURLToPath(import.meta.url));
|
||||
const UA = "OAuth poe2trade/1.0 (contact: poe2trade@users.noreply.github.com)";
|
||||
|
||||
async function fetchJson(path) {
|
||||
const url = `https://www.pathofexile.com/api/trade2/data/${path}`;
|
||||
const res = await fetch(url, { headers: { "User-Agent": UA } });
|
||||
if (!res.ok) throw new Error(`${url}: ${res.status}`);
|
||||
return res.json();
|
||||
}
|
||||
|
||||
async function main() {
|
||||
console.log("Fetching POE2 trade API data...");
|
||||
const [items, stats, static_, filters] = await Promise.all([
|
||||
fetchJson("items"),
|
||||
fetchJson("stats"),
|
||||
fetchJson("static"),
|
||||
fetchJson("filters"),
|
||||
]);
|
||||
|
||||
const words = new Set();
|
||||
|
||||
// Helper: split text into individual words and add each
|
||||
function addWords(text) {
|
||||
if (!text) return;
|
||||
// Remove # placeholders and special chars, split on whitespace
|
||||
const cleaned = text
|
||||
.replace(/#/g, "")
|
||||
.replace(/[{}()\[\]]/g, "")
|
||||
.replace(/[+\-]/g, " ");
|
||||
for (const word of cleaned.split(/\s+/)) {
|
||||
// Only keep words that are actual words (not numbers, not single chars)
|
||||
const trimmed = word.replace(/^[^a-zA-Z]+|[^a-zA-Z]+$/g, "");
|
||||
if (trimmed.length >= 2) words.add(trimmed);
|
||||
}
|
||||
}
|
||||
|
||||
// Helper: add a full phrase (multi-word item name) as-is
|
||||
function addPhrase(text) {
|
||||
if (!text) return;
|
||||
addWords(text);
|
||||
}
|
||||
|
||||
// Items: type names (base types like "Tribal Mask", "Leather Vest")
|
||||
for (const cat of items.result) {
|
||||
addPhrase(cat.label);
|
||||
for (const entry of cat.entries) {
|
||||
addPhrase(entry.type);
|
||||
addPhrase(entry.name);
|
||||
addPhrase(entry.text);
|
||||
}
|
||||
}
|
||||
|
||||
// Stats: mod text like "+#% to Chaos Resistance", "# to maximum Life"
|
||||
for (const cat of stats.result) {
|
||||
for (const entry of cat.entries) {
|
||||
addPhrase(entry.text);
|
||||
}
|
||||
}
|
||||
|
||||
// Static: currency/fragment names like "Divine Orb", "Scroll of Wisdom"
|
||||
for (const cat of static_.result) {
|
||||
addPhrase(cat.label);
|
||||
for (const entry of cat.entries) {
|
||||
addPhrase(entry.text);
|
||||
}
|
||||
}
|
||||
|
||||
// Filters: filter labels and option texts
|
||||
for (const cat of filters.result) {
|
||||
addPhrase(cat.title);
|
||||
if (cat.filters) {
|
||||
for (const f of cat.filters) {
|
||||
addPhrase(f.text);
|
||||
if (f.option?.options) {
|
||||
for (const opt of f.option.options) {
|
||||
addPhrase(opt.text);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Add common tooltip keywords not in trade API
|
||||
const extraWords = [
|
||||
// Section headers
|
||||
"Quality", "Requires", "Level", "Asking", "Price",
|
||||
"Corrupted", "Mirrored", "Unmodifiable",
|
||||
"Twice", "Sockets",
|
||||
// Attributes
|
||||
"Strength", "Dexterity", "Intelligence", "Spirit",
|
||||
// Defense types
|
||||
"Armour", "Evasion", "Rating", "Energy", "Shield",
|
||||
// Damage types
|
||||
"Physical", "Elemental", "Lightning", "Cold", "Fire", "Chaos",
|
||||
// Common mod words
|
||||
"increased", "reduced", "more", "less",
|
||||
"added", "converted", "regeneration",
|
||||
"maximum", "minimum", "total",
|
||||
"Resistance", "Damage", "Speed", "Duration",
|
||||
"Critical", "Hit", "Chance", "Multiplier",
|
||||
"Attack", "Cast", "Spell", "Minion", "Skill",
|
||||
"Mana", "Life", "Rarity",
|
||||
// Item classes
|
||||
"Helmet", "Gloves", "Boots", "Body", "Belt",
|
||||
"Ring", "Amulet", "Shield", "Quiver",
|
||||
"Sword", "Axe", "Mace", "Dagger", "Wand", "Staff", "Bow",
|
||||
"Sceptre", "Crossbow", "Flail", "Spear",
|
||||
// Rarity
|
||||
"Normal", "Magic", "Rare", "Unique",
|
||||
];
|
||||
for (const w of extraWords) words.add(w);
|
||||
|
||||
// Sort and write user-words
|
||||
const sortedWords = [...words].sort((a, b) => a.toLowerCase().localeCompare(b.toLowerCase()));
|
||||
const wordsPath = join(__dirname, "poe2.user-words");
|
||||
writeFileSync(wordsPath, sortedWords.join("\n") + "\n");
|
||||
console.log(`Wrote ${sortedWords.length} words to ${wordsPath}`);
|
||||
|
||||
// Generate user-patterns for common tooltip formats
|
||||
const patterns = [
|
||||
// Stat values: "+12% to Chaos Resistance", "+3 to Level"
|
||||
"\\+\\d+%",
|
||||
"\\+\\d+",
|
||||
"\\-\\d+%",
|
||||
"\\-\\d+",
|
||||
// Ranges: "10-20"
|
||||
"\\d+-\\d+",
|
||||
// Currency amounts: "7x Divine Orb", "35x Divine Orb"
|
||||
"\\d+x",
|
||||
// Quality: "+20%"
|
||||
"\\d+%",
|
||||
// Level requirements: "Level \\d+"
|
||||
"Level \\d+",
|
||||
// Asking Price section
|
||||
"Asking Price:",
|
||||
// Item level
|
||||
"Item Level: \\d+",
|
||||
// Requires line
|
||||
"Requires:",
|
||||
// Rating values
|
||||
"Rating: \\d+",
|
||||
"Shield: \\d+",
|
||||
"Quality: \\+\\d+%",
|
||||
];
|
||||
const patternsPath = join(__dirname, "poe2.user-patterns");
|
||||
writeFileSync(patternsPath, patterns.join("\n") + "\n");
|
||||
console.log(`Wrote ${patterns.length} patterns to ${patternsPath}`);
|
||||
}
|
||||
|
||||
main().catch((e) => {
|
||||
console.error(e);
|
||||
process.exit(1);
|
||||
});
|
||||
14
tools/OcrDaemon/tessdata/poe2.user-patterns
Normal file
14
tools/OcrDaemon/tessdata/poe2.user-patterns
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
\+\d+%
|
||||
\+\d+
|
||||
\-\d+%
|
||||
\-\d+
|
||||
\d+-\d+
|
||||
\d+x
|
||||
\d+%
|
||||
Level \d+
|
||||
Asking Price:
|
||||
Item Level: \d+
|
||||
Requires:
|
||||
Rating: \d+
|
||||
Shield: \d+
|
||||
Quality: \+\d+%
|
||||
3899
tools/OcrDaemon/tessdata/poe2.user-words
Normal file
3899
tools/OcrDaemon/tessdata/poe2.user-words
Normal file
File diff suppressed because it is too large
Load diff
1
tools/OcrDaemon/tessdata/poe2_filters.json
Normal file
1
tools/OcrDaemon/tessdata/poe2_filters.json
Normal file
File diff suppressed because one or more lines are too long
1
tools/OcrDaemon/tessdata/poe2_items.json
Normal file
1
tools/OcrDaemon/tessdata/poe2_items.json
Normal file
File diff suppressed because one or more lines are too long
1
tools/OcrDaemon/tessdata/poe2_static.json
Normal file
1
tools/OcrDaemon/tessdata/poe2_static.json
Normal file
File diff suppressed because one or more lines are too long
1
tools/OcrDaemon/tessdata/poe2_stats.json
Normal file
1
tools/OcrDaemon/tessdata/poe2_stats.json
Normal file
File diff suppressed because one or more lines are too long
Loading…
Add table
Add a link
Reference in a new issue