added easyOCR

This commit is contained in:
Boki 2026-02-12 01:04:19 -05:00
parent 37d6678577
commit 9f208b0606
27 changed files with 1780 additions and 112 deletions

View file

@ -3,6 +3,8 @@ namespace OcrDaemon;
using System.Drawing;
using System.Drawing.Imaging;
using System.Runtime.InteropServices;
using System.Threading;
using System.Threading.Tasks;
using System.Text.Json;
using OpenCvSharp;
using OpenCvSharp.Extensions;
@ -61,17 +63,20 @@ class OcrHandler(TesseractEngine engine)
? new DiffOcrParams { DiffThresh = req.Threshold }
: new DiffOcrParams());
public object HandleDiffOcr(Request req, DiffOcrParams p)
/// <summary>
/// Diff detection + crop only. Returns the raw tooltip crop bitmap and region,
/// or null if no tooltip detected. Caller is responsible for disposing the bitmap.
/// </summary>
public (Bitmap cropped, Bitmap refCropped, Bitmap current, RegionRect region)? DiffCrop(Request req, DiffOcrParams p)
{
if (_referenceFrame == null)
return new ErrorResponse("No reference snapshot stored. Send 'snapshot' first.");
return null;
using var current = ScreenCapture.CaptureOrLoad(req.File, null);
var current = ScreenCapture.CaptureOrLoad(req.File, null);
int w = Math.Min(_referenceFrame.Width, current.Width);
int h = Math.Min(_referenceFrame.Height, current.Height);
// Get raw pixels for both frames
var refData = _referenceFrame.LockBits(new Rectangle(0, 0, w, h), ImageLockMode.ReadOnly, PixelFormat.Format32bppArgb);
byte[] refPx = new byte[refData.Stride * h];
Marshal.Copy(refData.Scan0, refPx, 0, refPx.Length);
@ -83,49 +88,34 @@ class OcrHandler(TesseractEngine engine)
Marshal.Copy(curData.Scan0, curPx, 0, curPx.Length);
current.UnlockBits(curData);
// Detect pixels that got DARKER (tooltip = dark overlay).
// This filters out item highlight glow (brighter) and cursor changes.
int diffThresh = p.DiffThresh;
bool[] changed = new bool[w * h];
int totalChanged = 0;
for (int y = 0; y < h; y++)
// Pass 1: parallel row diff — compute rowCounts[] directly, no changed[] array
int[] rowCounts = new int[h];
Parallel.For(0, h, y =>
{
int count = 0;
int rowOffset = y * stride;
for (int x = 0; x < w; x++)
{
int i = y * stride + x * 4;
int darkerB = refPx[i] - curPx[i];
int darkerG = refPx[i + 1] - curPx[i + 1];
int darkerR = refPx[i + 2] - curPx[i + 2];
if (darkerB + darkerG + darkerR > diffThresh)
{
changed[y * w + x] = true;
totalChanged++;
}
int i = rowOffset + x * 4;
int darker = (refPx[i] - curPx[i]) + (refPx[i + 1] - curPx[i + 1]) + (refPx[i + 2] - curPx[i + 2]);
if (darker > diffThresh)
count++;
}
}
rowCounts[y] = count;
});
bool debug = req.Debug;
int totalChanged = 0;
for (int y = 0; y < h; y++) totalChanged += rowCounts[y];
if (totalChanged == 0)
{
if (debug) Console.Error.WriteLine(" diff-ocr: no changes detected");
return new OcrResponse { Text = "", Lines = [] };
current.Dispose();
return null;
}
// Two-pass density detection:
// Pass 1: Find row range using full-width row counts
// Pass 2: Find column range using only pixels within detected row range
// This makes the column threshold relative to tooltip height, not screen height.
int maxGap = p.MaxGap;
// Pass 1: count changed pixels per row, find longest active run
int[] rowCounts = new int[h];
for (int y = 0; y < h; y++)
for (int x = 0; x < w; x++)
if (changed[y * w + x])
rowCounts[y]++;
int rowThresh = w / p.RowThreshDiv;
int bestRowStart = 0, bestRowEnd = 0, bestRowLen = 0;
int curRowStart = -1, lastActiveRow = -1;
@ -149,12 +139,46 @@ class OcrHandler(TesseractEngine engine)
if (len > bestRowLen) { bestRowStart = curRowStart; bestRowEnd = lastActiveRow; bestRowLen = len; }
}
// Pass 2: count changed pixels per column, but only within the detected row range
// Pass 2: parallel column diff — only within the row range, recompute from raw pixels
int[] colCounts = new int[w];
for (int y = bestRowStart; y <= bestRowEnd; y++)
for (int x = 0; x < w; x++)
if (changed[y * w + x])
colCounts[x]++;
int rowRangeLen = bestRowEnd - bestRowStart + 1;
if (rowRangeLen <= 200)
{
// Small range: serial is faster than Parallel overhead
for (int y = bestRowStart; y <= bestRowEnd; y++)
{
int rowOffset = y * stride;
for (int x = 0; x < w; x++)
{
int i = rowOffset + x * 4;
int darker = (refPx[i] - curPx[i]) + (refPx[i + 1] - curPx[i + 1]) + (refPx[i + 2] - curPx[i + 2]);
if (darker > diffThresh)
colCounts[x]++;
}
}
}
else
{
Parallel.For(bestRowStart, bestRowEnd + 1,
() => new int[w],
(y, _, localCols) =>
{
int rowOffset = y * stride;
for (int x = 0; x < w; x++)
{
int i = rowOffset + x * 4;
int darker = (refPx[i] - curPx[i]) + (refPx[i + 1] - curPx[i + 1]) + (refPx[i + 2] - curPx[i + 2]);
if (darker > diffThresh)
localCols[x]++;
}
return localCols;
},
localCols =>
{
for (int x = 0; x < w; x++)
Interlocked.Add(ref colCounts[x], localCols[x]);
});
}
int tooltipHeight = bestRowEnd - bestRowStart + 1;
int colThresh = tooltipHeight / p.ColThreshDiv;
@ -181,13 +205,13 @@ class OcrHandler(TesseractEngine engine)
if (len > bestColLen) { bestColStart = curColStart; bestColEnd = lastActiveCol; bestColLen = len; }
}
// Log density detection results
Console.Error.WriteLine($" diff-ocr: changed={totalChanged} rows={bestRowStart}-{bestRowEnd}({bestRowLen}) cols={bestColStart}-{bestColEnd}({bestColLen}) rowThresh={rowThresh} colThresh={colThresh}");
if (bestRowLen < 50 || bestColLen < 50)
{
Console.Error.WriteLine($" diff-ocr: no tooltip-sized region found (rows={bestRowLen}, cols={bestColLen})");
return new OcrResponse { Text = "", Lines = [] };
current.Dispose();
return null;
}
int minX = bestColStart;
@ -195,13 +219,9 @@ class OcrHandler(TesseractEngine engine)
int maxX = Math.Min(bestColEnd, w - 1);
int maxY = Math.Min(bestRowEnd, h - 1);
// Dynamic right-edge trim: if the rightmost columns are much sparser than
// the tooltip body, trim them. This handles the ~5% of cases where ambient
// noise extends the detected region slightly on the right.
int colSpan = maxX - minX + 1;
if (colSpan > 100)
{
// Compute median column density in the middle 50% of the range
int q1 = minX + colSpan / 4;
int q3 = minX + colSpan * 3 / 4;
long midSum = 0;
@ -209,21 +229,38 @@ class OcrHandler(TesseractEngine engine)
for (int x = q1; x <= q3; x++) { midSum += colCounts[x]; midCount++; }
double avgMidDensity = (double)midSum / midCount;
double cutoff = avgMidDensity * p.TrimCutoff;
// Trim from right while below cutoff
while (maxX > minX + 100 && colCounts[maxX] < cutoff)
maxX--;
}
int rw = maxX - minX + 1;
int rh = maxY - minY + 1;
if (debug) Console.Error.WriteLine($" diff-ocr: tooltip region ({minX},{minY}) {rw}x{rh}");
var cropped = CropFromBytes(curPx, stride, minX, minY, rw, rh);
var refCropped = CropFromBytes(refPx, stride, minX, minY, rw, rh);
var region = new RegionRect { X = minX, Y = minY, Width = rw, Height = rh };
// Crop tooltip region from both current and reference frames
using var cropped = current.Clone(new Rectangle(minX, minY, rw, rh), PixelFormat.Format32bppArgb);
using var refCropped = _referenceFrame.Clone(new Rectangle(minX, minY, rw, rh), PixelFormat.Format32bppArgb);
Console.Error.WriteLine($" diff-ocr: tooltip region ({minX},{minY}) {rw}x{rh}");
// Save before/after preprocessing images if path is provided
return (cropped, refCropped, current, region);
}
public object HandleDiffOcr(Request req, DiffOcrParams p)
{
if (_referenceFrame == null)
return new ErrorResponse("No reference snapshot stored. Send 'snapshot' first.");
var cropResult = DiffCrop(req, p);
if (cropResult == null)
return new OcrResponse { Text = "", Lines = [] };
var (cropped, refCropped, current, region) = cropResult.Value;
using var _current = current;
using var _cropped = cropped;
using var _refCropped = refCropped;
bool debug = req.Debug;
int minX = region.X, minY = region.Y, rw = region.Width, rh = region.Height;
// Save raw crop if path is provided
if (!string.IsNullOrEmpty(req.Path))
{
var dir = Path.GetDirectoryName(req.Path);
@ -634,6 +671,24 @@ class OcrHandler(TesseractEngine engine)
};
}
/// <summary>
/// Fast crop from raw pixel bytes — avoids slow GDI+ Bitmap.Clone().
/// </summary>
private static Bitmap CropFromBytes(byte[] px, int srcStride, int cropX, int cropY, int cropW, int cropH)
{
var bmp = new Bitmap(cropW, cropH, PixelFormat.Format32bppArgb);
var data = bmp.LockBits(new Rectangle(0, 0, cropW, cropH), ImageLockMode.WriteOnly, PixelFormat.Format32bppArgb);
int dstStride = data.Stride;
int rowBytes = cropW * 4;
for (int y = 0; y < cropH; y++)
{
int srcOffset = (cropY + y) * srcStride + cropX * 4;
Marshal.Copy(px, srcOffset, data.Scan0 + y * dstStride, rowBytes);
}
bmp.UnlockBits(data);
return bmp;
}
private static double LevenshteinSimilarity(string a, string b)
{
a = a.ToLowerInvariant();