This reverts commit 6242220, which broke tooltip detection.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
258 lines
10 KiB
C#
258 lines
10 KiB
C#
namespace OcrDaemon;
|
|
|
|
using System.Drawing;
|
|
using System.Drawing.Imaging;
|
|
using System.Runtime.InteropServices;
|
|
using Tesseract;
|
|
using SdImageFormat = System.Drawing.Imaging.ImageFormat;
|
|
|
|
class OcrHandler(TesseractEngine engine)
|
|
{
|
|
private Bitmap? _referenceFrame;
|
|
|
|
public object HandleOcr(Request req)
|
|
{
|
|
using var bitmap = ScreenCapture.CaptureOrLoad(req.File, req.Region);
|
|
using var pix = ImageUtils.BitmapToPix(bitmap);
|
|
using var page = engine.Process(pix);
|
|
|
|
var text = page.GetText();
|
|
var lines = ImageUtils.ExtractLinesFromPage(page, offsetX: 0, offsetY: 0);
|
|
return new OcrResponse { Text = text, Lines = lines };
|
|
}
|
|
|
|
public object HandleScreenshot(Request req)
|
|
{
|
|
if (string.IsNullOrEmpty(req.Path))
|
|
return new ErrorResponse("screenshot command requires 'path'");
|
|
|
|
// If a reference frame exists, save that (same image used for diff-ocr).
|
|
// Otherwise capture a new frame.
|
|
var bitmap = _referenceFrame ?? ScreenCapture.CaptureOrLoad(req.File, req.Region);
|
|
var format = ImageUtils.GetImageFormat(req.Path);
|
|
var dir = Path.GetDirectoryName(req.Path);
|
|
if (!string.IsNullOrEmpty(dir) && !Directory.Exists(dir))
|
|
Directory.CreateDirectory(dir);
|
|
bitmap.Save(req.Path, format);
|
|
if (bitmap != _referenceFrame) bitmap.Dispose();
|
|
return new OkResponse();
|
|
}
|
|
|
|
public object HandleCapture(Request req)
|
|
{
|
|
using var bitmap = ScreenCapture.CaptureOrLoad(req.File, req.Region);
|
|
using var ms = new MemoryStream();
|
|
bitmap.Save(ms, SdImageFormat.Png);
|
|
var base64 = Convert.ToBase64String(ms.ToArray());
|
|
return new CaptureResponse { Image = base64 };
|
|
}
|
|
|
|
public object HandleSnapshot(Request req)
|
|
{
|
|
_referenceFrame?.Dispose();
|
|
_referenceFrame = ScreenCapture.CaptureOrLoad(req.File, req.Region);
|
|
return new OkResponse();
|
|
}
|
|
|
|
public object HandleDiffOcr(Request req)
|
|
{
|
|
if (_referenceFrame == null)
|
|
return new ErrorResponse("No reference snapshot stored. Send 'snapshot' first.");
|
|
|
|
using var current = ScreenCapture.CaptureOrLoad(req.File, null);
|
|
|
|
int w = Math.Min(_referenceFrame.Width, current.Width);
|
|
int h = Math.Min(_referenceFrame.Height, current.Height);
|
|
|
|
// Get raw pixels for both frames
|
|
var refData = _referenceFrame.LockBits(new Rectangle(0, 0, w, h), ImageLockMode.ReadOnly, PixelFormat.Format32bppArgb);
|
|
byte[] refPx = new byte[refData.Stride * h];
|
|
Marshal.Copy(refData.Scan0, refPx, 0, refPx.Length);
|
|
_referenceFrame.UnlockBits(refData);
|
|
int stride = refData.Stride;
|
|
|
|
var curData = current.LockBits(new Rectangle(0, 0, w, h), ImageLockMode.ReadOnly, PixelFormat.Format32bppArgb);
|
|
byte[] curPx = new byte[curData.Stride * h];
|
|
Marshal.Copy(curData.Scan0, curPx, 0, curPx.Length);
|
|
current.UnlockBits(curData);
|
|
|
|
// Detect pixels that got DARKER (tooltip = dark overlay).
|
|
// This filters out item highlight glow (brighter) and cursor changes.
|
|
int diffThresh = req.Threshold > 0 ? req.Threshold : 30;
|
|
bool[] changed = new bool[w * h];
|
|
int totalChanged = 0;
|
|
|
|
for (int y = 0; y < h; y++)
|
|
{
|
|
for (int x = 0; x < w; x++)
|
|
{
|
|
int i = y * stride + x * 4;
|
|
int darkerB = refPx[i] - curPx[i];
|
|
int darkerG = refPx[i + 1] - curPx[i + 1];
|
|
int darkerR = refPx[i + 2] - curPx[i + 2];
|
|
if (darkerB + darkerG + darkerR > diffThresh)
|
|
{
|
|
changed[y * w + x] = true;
|
|
totalChanged++;
|
|
}
|
|
}
|
|
}
|
|
|
|
bool debug = req.Debug;
|
|
|
|
if (totalChanged == 0)
|
|
{
|
|
if (debug) Console.Error.WriteLine(" diff-ocr: no changes detected");
|
|
return new OcrResponse { Text = "", Lines = [] };
|
|
}
|
|
|
|
// Two-pass density detection:
|
|
// Pass 1: Find row range using full-width row counts
|
|
// Pass 2: Find column range using only pixels within detected row range
|
|
// This makes the column threshold relative to tooltip height, not screen height.
|
|
int maxGap = 15;
|
|
|
|
// Pass 1: count changed pixels per row, find longest active run
|
|
int[] rowCounts = new int[h];
|
|
for (int y = 0; y < h; y++)
|
|
for (int x = 0; x < w; x++)
|
|
if (changed[y * w + x])
|
|
rowCounts[y]++;
|
|
|
|
int rowThresh = w / 30; // ~3% of width
|
|
int bestRowStart = 0, bestRowEnd = 0, bestRowLen = 0;
|
|
int curRowStart = -1, lastActiveRow = -1;
|
|
for (int y = 0; y < h; y++)
|
|
{
|
|
if (rowCounts[y] >= rowThresh)
|
|
{
|
|
if (curRowStart < 0) curRowStart = y;
|
|
lastActiveRow = y;
|
|
}
|
|
else if (curRowStart >= 0 && y - lastActiveRow > maxGap)
|
|
{
|
|
int len = lastActiveRow - curRowStart + 1;
|
|
if (len > bestRowLen) { bestRowStart = curRowStart; bestRowEnd = lastActiveRow; bestRowLen = len; }
|
|
curRowStart = -1;
|
|
}
|
|
}
|
|
if (curRowStart >= 0)
|
|
{
|
|
int len = lastActiveRow - curRowStart + 1;
|
|
if (len > bestRowLen) { bestRowStart = curRowStart; bestRowEnd = lastActiveRow; bestRowLen = len; }
|
|
}
|
|
|
|
// Pass 2: count changed pixels per column, but only within the detected row range
|
|
int[] colCounts = new int[w];
|
|
for (int y = bestRowStart; y <= bestRowEnd; y++)
|
|
for (int x = 0; x < w; x++)
|
|
if (changed[y * w + x])
|
|
colCounts[x]++;
|
|
|
|
int tooltipHeight = bestRowEnd - bestRowStart + 1;
|
|
int colThresh = tooltipHeight / 15; // ~7% of tooltip height
|
|
|
|
int bestColStart = 0, bestColEnd = 0, bestColLen = 0;
|
|
int curColStart = -1, lastActiveCol = -1;
|
|
for (int x = 0; x < w; x++)
|
|
{
|
|
if (colCounts[x] >= colThresh)
|
|
{
|
|
if (curColStart < 0) curColStart = x;
|
|
lastActiveCol = x;
|
|
}
|
|
else if (curColStart >= 0 && x - lastActiveCol > maxGap)
|
|
{
|
|
int len = lastActiveCol - curColStart + 1;
|
|
if (len > bestColLen) { bestColStart = curColStart; bestColEnd = lastActiveCol; bestColLen = len; }
|
|
curColStart = -1;
|
|
}
|
|
}
|
|
if (curColStart >= 0)
|
|
{
|
|
int len = lastActiveCol - curColStart + 1;
|
|
if (len > bestColLen) { bestColStart = curColStart; bestColEnd = lastActiveCol; bestColLen = len; }
|
|
}
|
|
|
|
// Log density detection results
|
|
Console.Error.WriteLine($" diff-ocr: changed={totalChanged} rows={bestRowStart}-{bestRowEnd}({bestRowLen}) cols={bestColStart}-{bestColEnd}({bestColLen}) rowThresh={rowThresh} colThresh={colThresh}");
|
|
|
|
if (bestRowLen < 50 || bestColLen < 50)
|
|
{
|
|
Console.Error.WriteLine($" diff-ocr: no tooltip-sized region found (rows={bestRowLen}, cols={bestColLen})");
|
|
return new OcrResponse { Text = "", Lines = [] };
|
|
}
|
|
|
|
int pad = 0;
|
|
int minX = Math.Max(bestColStart - pad, 0);
|
|
int minY = Math.Max(bestRowStart - pad, 0);
|
|
int maxX = Math.Min(bestColEnd + pad, w - 1);
|
|
int maxY = Math.Min(bestRowEnd + pad, h - 1);
|
|
|
|
// Dynamic right-edge trim: if the rightmost columns are much sparser than
|
|
// the tooltip body, trim them. This handles the ~5% of cases where ambient
|
|
// noise extends the detected region slightly on the right.
|
|
int colSpan = maxX - minX + 1;
|
|
if (colSpan > 100)
|
|
{
|
|
// Compute median column density in the middle 50% of the range
|
|
int q1 = minX + colSpan / 4;
|
|
int q3 = minX + colSpan * 3 / 4;
|
|
long midSum = 0;
|
|
int midCount = 0;
|
|
for (int x = q1; x <= q3; x++) { midSum += colCounts[x]; midCount++; }
|
|
double avgMidDensity = (double)midSum / midCount;
|
|
double cutoff = avgMidDensity * 0.3; // column must have >=30% of avg density
|
|
|
|
// Trim from right while below cutoff
|
|
while (maxX > minX + 100 && colCounts[maxX] < cutoff)
|
|
maxX--;
|
|
}
|
|
int rw = maxX - minX + 1;
|
|
int rh = maxY - minY + 1;
|
|
|
|
if (debug) Console.Error.WriteLine($" diff-ocr: tooltip region ({minX},{minY}) {rw}x{rh}");
|
|
|
|
// Simple crop of the tooltip region from the current frame (no per-pixel masking).
|
|
// The top-hat preprocessing will handle suppressing background text.
|
|
using var cropped = current.Clone(new Rectangle(minX, minY, rw, rh), PixelFormat.Format32bppArgb);
|
|
|
|
// Save before/after preprocessing images if path is provided
|
|
if (!string.IsNullOrEmpty(req.Path))
|
|
{
|
|
var dir = Path.GetDirectoryName(req.Path);
|
|
if (!string.IsNullOrEmpty(dir) && !Directory.Exists(dir))
|
|
Directory.CreateDirectory(dir);
|
|
cropped.Save(req.Path, ImageUtils.GetImageFormat(req.Path));
|
|
if (debug) Console.Error.WriteLine($" diff-ocr: saved raw to {req.Path}");
|
|
}
|
|
|
|
// Pre-process for OCR: boost contrast, invert colors
|
|
using var processed = ImagePreprocessor.PreprocessForOcr(cropped);
|
|
|
|
// Save fullscreen and preprocessed versions alongside raw
|
|
if (!string.IsNullOrEmpty(req.Path))
|
|
{
|
|
var ext = Path.GetExtension(req.Path);
|
|
var fullPath = Path.ChangeExtension(req.Path, ".full" + ext);
|
|
current.Save(fullPath, ImageUtils.GetImageFormat(fullPath));
|
|
if (debug) Console.Error.WriteLine($" diff-ocr: saved fullscreen to {fullPath}");
|
|
|
|
var prePath = Path.ChangeExtension(req.Path, ".pre" + ext);
|
|
processed.Save(prePath, ImageUtils.GetImageFormat(prePath));
|
|
if (debug) Console.Error.WriteLine($" diff-ocr: saved preprocessed to {prePath}");
|
|
}
|
|
using var pix = ImageUtils.BitmapToPix(processed);
|
|
using var page = engine.Process(pix);
|
|
|
|
var text = page.GetText();
|
|
var lines = ImageUtils.ExtractLinesFromPage(page, offsetX: minX, offsetY: minY);
|
|
|
|
return new DiffOcrResponse
|
|
{
|
|
Text = text,
|
|
Lines = lines,
|
|
Region = new RegionRect { X = minX, Y = minY, Width = rw, Height = rh },
|
|
};
|
|
}
|
|
}
|