added easyOCR
This commit is contained in:
parent
37d6678577
commit
9f208b0606
27 changed files with 1780 additions and 112 deletions
|
|
@ -3,6 +3,8 @@ namespace OcrDaemon;
|
|||
using System.Drawing;
|
||||
using System.Drawing.Imaging;
|
||||
using System.Runtime.InteropServices;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using System.Text.Json;
|
||||
using OpenCvSharp;
|
||||
using OpenCvSharp.Extensions;
|
||||
|
|
@ -61,17 +63,20 @@ class OcrHandler(TesseractEngine engine)
|
|||
? new DiffOcrParams { DiffThresh = req.Threshold }
|
||||
: new DiffOcrParams());
|
||||
|
||||
public object HandleDiffOcr(Request req, DiffOcrParams p)
|
||||
/// <summary>
|
||||
/// Diff detection + crop only. Returns the raw tooltip crop bitmap and region,
|
||||
/// or null if no tooltip detected. Caller is responsible for disposing the bitmap.
|
||||
/// </summary>
|
||||
public (Bitmap cropped, Bitmap refCropped, Bitmap current, RegionRect region)? DiffCrop(Request req, DiffOcrParams p)
|
||||
{
|
||||
if (_referenceFrame == null)
|
||||
return new ErrorResponse("No reference snapshot stored. Send 'snapshot' first.");
|
||||
return null;
|
||||
|
||||
using var current = ScreenCapture.CaptureOrLoad(req.File, null);
|
||||
var current = ScreenCapture.CaptureOrLoad(req.File, null);
|
||||
|
||||
int w = Math.Min(_referenceFrame.Width, current.Width);
|
||||
int h = Math.Min(_referenceFrame.Height, current.Height);
|
||||
|
||||
// Get raw pixels for both frames
|
||||
var refData = _referenceFrame.LockBits(new Rectangle(0, 0, w, h), ImageLockMode.ReadOnly, PixelFormat.Format32bppArgb);
|
||||
byte[] refPx = new byte[refData.Stride * h];
|
||||
Marshal.Copy(refData.Scan0, refPx, 0, refPx.Length);
|
||||
|
|
@ -83,49 +88,34 @@ class OcrHandler(TesseractEngine engine)
|
|||
Marshal.Copy(curData.Scan0, curPx, 0, curPx.Length);
|
||||
current.UnlockBits(curData);
|
||||
|
||||
// Detect pixels that got DARKER (tooltip = dark overlay).
|
||||
// This filters out item highlight glow (brighter) and cursor changes.
|
||||
int diffThresh = p.DiffThresh;
|
||||
bool[] changed = new bool[w * h];
|
||||
int totalChanged = 0;
|
||||
|
||||
for (int y = 0; y < h; y++)
|
||||
// Pass 1: parallel row diff — compute rowCounts[] directly, no changed[] array
|
||||
int[] rowCounts = new int[h];
|
||||
Parallel.For(0, h, y =>
|
||||
{
|
||||
int count = 0;
|
||||
int rowOffset = y * stride;
|
||||
for (int x = 0; x < w; x++)
|
||||
{
|
||||
int i = y * stride + x * 4;
|
||||
int darkerB = refPx[i] - curPx[i];
|
||||
int darkerG = refPx[i + 1] - curPx[i + 1];
|
||||
int darkerR = refPx[i + 2] - curPx[i + 2];
|
||||
if (darkerB + darkerG + darkerR > diffThresh)
|
||||
{
|
||||
changed[y * w + x] = true;
|
||||
totalChanged++;
|
||||
}
|
||||
int i = rowOffset + x * 4;
|
||||
int darker = (refPx[i] - curPx[i]) + (refPx[i + 1] - curPx[i + 1]) + (refPx[i + 2] - curPx[i + 2]);
|
||||
if (darker > diffThresh)
|
||||
count++;
|
||||
}
|
||||
}
|
||||
rowCounts[y] = count;
|
||||
});
|
||||
|
||||
bool debug = req.Debug;
|
||||
int totalChanged = 0;
|
||||
for (int y = 0; y < h; y++) totalChanged += rowCounts[y];
|
||||
|
||||
if (totalChanged == 0)
|
||||
{
|
||||
if (debug) Console.Error.WriteLine(" diff-ocr: no changes detected");
|
||||
return new OcrResponse { Text = "", Lines = [] };
|
||||
current.Dispose();
|
||||
return null;
|
||||
}
|
||||
|
||||
// Two-pass density detection:
|
||||
// Pass 1: Find row range using full-width row counts
|
||||
// Pass 2: Find column range using only pixels within detected row range
|
||||
// This makes the column threshold relative to tooltip height, not screen height.
|
||||
int maxGap = p.MaxGap;
|
||||
|
||||
// Pass 1: count changed pixels per row, find longest active run
|
||||
int[] rowCounts = new int[h];
|
||||
for (int y = 0; y < h; y++)
|
||||
for (int x = 0; x < w; x++)
|
||||
if (changed[y * w + x])
|
||||
rowCounts[y]++;
|
||||
|
||||
int rowThresh = w / p.RowThreshDiv;
|
||||
int bestRowStart = 0, bestRowEnd = 0, bestRowLen = 0;
|
||||
int curRowStart = -1, lastActiveRow = -1;
|
||||
|
|
@ -149,12 +139,46 @@ class OcrHandler(TesseractEngine engine)
|
|||
if (len > bestRowLen) { bestRowStart = curRowStart; bestRowEnd = lastActiveRow; bestRowLen = len; }
|
||||
}
|
||||
|
||||
// Pass 2: count changed pixels per column, but only within the detected row range
|
||||
// Pass 2: parallel column diff — only within the row range, recompute from raw pixels
|
||||
int[] colCounts = new int[w];
|
||||
for (int y = bestRowStart; y <= bestRowEnd; y++)
|
||||
for (int x = 0; x < w; x++)
|
||||
if (changed[y * w + x])
|
||||
colCounts[x]++;
|
||||
int rowRangeLen = bestRowEnd - bestRowStart + 1;
|
||||
if (rowRangeLen <= 200)
|
||||
{
|
||||
// Small range: serial is faster than Parallel overhead
|
||||
for (int y = bestRowStart; y <= bestRowEnd; y++)
|
||||
{
|
||||
int rowOffset = y * stride;
|
||||
for (int x = 0; x < w; x++)
|
||||
{
|
||||
int i = rowOffset + x * 4;
|
||||
int darker = (refPx[i] - curPx[i]) + (refPx[i + 1] - curPx[i + 1]) + (refPx[i + 2] - curPx[i + 2]);
|
||||
if (darker > diffThresh)
|
||||
colCounts[x]++;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
Parallel.For(bestRowStart, bestRowEnd + 1,
|
||||
() => new int[w],
|
||||
(y, _, localCols) =>
|
||||
{
|
||||
int rowOffset = y * stride;
|
||||
for (int x = 0; x < w; x++)
|
||||
{
|
||||
int i = rowOffset + x * 4;
|
||||
int darker = (refPx[i] - curPx[i]) + (refPx[i + 1] - curPx[i + 1]) + (refPx[i + 2] - curPx[i + 2]);
|
||||
if (darker > diffThresh)
|
||||
localCols[x]++;
|
||||
}
|
||||
return localCols;
|
||||
},
|
||||
localCols =>
|
||||
{
|
||||
for (int x = 0; x < w; x++)
|
||||
Interlocked.Add(ref colCounts[x], localCols[x]);
|
||||
});
|
||||
}
|
||||
|
||||
int tooltipHeight = bestRowEnd - bestRowStart + 1;
|
||||
int colThresh = tooltipHeight / p.ColThreshDiv;
|
||||
|
|
@ -181,13 +205,13 @@ class OcrHandler(TesseractEngine engine)
|
|||
if (len > bestColLen) { bestColStart = curColStart; bestColEnd = lastActiveCol; bestColLen = len; }
|
||||
}
|
||||
|
||||
// Log density detection results
|
||||
Console.Error.WriteLine($" diff-ocr: changed={totalChanged} rows={bestRowStart}-{bestRowEnd}({bestRowLen}) cols={bestColStart}-{bestColEnd}({bestColLen}) rowThresh={rowThresh} colThresh={colThresh}");
|
||||
|
||||
if (bestRowLen < 50 || bestColLen < 50)
|
||||
{
|
||||
Console.Error.WriteLine($" diff-ocr: no tooltip-sized region found (rows={bestRowLen}, cols={bestColLen})");
|
||||
return new OcrResponse { Text = "", Lines = [] };
|
||||
current.Dispose();
|
||||
return null;
|
||||
}
|
||||
|
||||
int minX = bestColStart;
|
||||
|
|
@ -195,13 +219,9 @@ class OcrHandler(TesseractEngine engine)
|
|||
int maxX = Math.Min(bestColEnd, w - 1);
|
||||
int maxY = Math.Min(bestRowEnd, h - 1);
|
||||
|
||||
// Dynamic right-edge trim: if the rightmost columns are much sparser than
|
||||
// the tooltip body, trim them. This handles the ~5% of cases where ambient
|
||||
// noise extends the detected region slightly on the right.
|
||||
int colSpan = maxX - minX + 1;
|
||||
if (colSpan > 100)
|
||||
{
|
||||
// Compute median column density in the middle 50% of the range
|
||||
int q1 = minX + colSpan / 4;
|
||||
int q3 = minX + colSpan * 3 / 4;
|
||||
long midSum = 0;
|
||||
|
|
@ -209,21 +229,38 @@ class OcrHandler(TesseractEngine engine)
|
|||
for (int x = q1; x <= q3; x++) { midSum += colCounts[x]; midCount++; }
|
||||
double avgMidDensity = (double)midSum / midCount;
|
||||
double cutoff = avgMidDensity * p.TrimCutoff;
|
||||
|
||||
// Trim from right while below cutoff
|
||||
while (maxX > minX + 100 && colCounts[maxX] < cutoff)
|
||||
maxX--;
|
||||
}
|
||||
int rw = maxX - minX + 1;
|
||||
int rh = maxY - minY + 1;
|
||||
|
||||
if (debug) Console.Error.WriteLine($" diff-ocr: tooltip region ({minX},{minY}) {rw}x{rh}");
|
||||
var cropped = CropFromBytes(curPx, stride, minX, minY, rw, rh);
|
||||
var refCropped = CropFromBytes(refPx, stride, minX, minY, rw, rh);
|
||||
var region = new RegionRect { X = minX, Y = minY, Width = rw, Height = rh };
|
||||
|
||||
// Crop tooltip region from both current and reference frames
|
||||
using var cropped = current.Clone(new Rectangle(minX, minY, rw, rh), PixelFormat.Format32bppArgb);
|
||||
using var refCropped = _referenceFrame.Clone(new Rectangle(minX, minY, rw, rh), PixelFormat.Format32bppArgb);
|
||||
Console.Error.WriteLine($" diff-ocr: tooltip region ({minX},{minY}) {rw}x{rh}");
|
||||
|
||||
// Save before/after preprocessing images if path is provided
|
||||
return (cropped, refCropped, current, region);
|
||||
}
|
||||
|
||||
public object HandleDiffOcr(Request req, DiffOcrParams p)
|
||||
{
|
||||
if (_referenceFrame == null)
|
||||
return new ErrorResponse("No reference snapshot stored. Send 'snapshot' first.");
|
||||
|
||||
var cropResult = DiffCrop(req, p);
|
||||
if (cropResult == null)
|
||||
return new OcrResponse { Text = "", Lines = [] };
|
||||
|
||||
var (cropped, refCropped, current, region) = cropResult.Value;
|
||||
using var _current = current;
|
||||
using var _cropped = cropped;
|
||||
using var _refCropped = refCropped;
|
||||
bool debug = req.Debug;
|
||||
int minX = region.X, minY = region.Y, rw = region.Width, rh = region.Height;
|
||||
|
||||
// Save raw crop if path is provided
|
||||
if (!string.IsNullOrEmpty(req.Path))
|
||||
{
|
||||
var dir = Path.GetDirectoryName(req.Path);
|
||||
|
|
@ -634,6 +671,24 @@ class OcrHandler(TesseractEngine engine)
|
|||
};
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Fast crop from raw pixel bytes — avoids slow GDI+ Bitmap.Clone().
|
||||
/// </summary>
|
||||
private static Bitmap CropFromBytes(byte[] px, int srcStride, int cropX, int cropY, int cropW, int cropH)
|
||||
{
|
||||
var bmp = new Bitmap(cropW, cropH, PixelFormat.Format32bppArgb);
|
||||
var data = bmp.LockBits(new Rectangle(0, 0, cropW, cropH), ImageLockMode.WriteOnly, PixelFormat.Format32bppArgb);
|
||||
int dstStride = data.Stride;
|
||||
int rowBytes = cropW * 4;
|
||||
for (int y = 0; y < cropH; y++)
|
||||
{
|
||||
int srcOffset = (cropY + y) * srcStride + cropX * 4;
|
||||
Marshal.Copy(px, srcOffset, data.Scan0 + y * dstStride, rowBytes);
|
||||
}
|
||||
bmp.UnlockBits(data);
|
||||
return bmp;
|
||||
}
|
||||
|
||||
private static double LevenshteinSimilarity(string a, string b)
|
||||
{
|
||||
a = a.ToLowerInvariant();
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue