356 lines
13 KiB
C#
356 lines
13 KiB
C#
namespace OcrDaemon;
|
|
|
|
using System.Drawing;
|
|
using System.Drawing.Imaging;
|
|
using System.Runtime.InteropServices;
|
|
using Tesseract;
|
|
using SdImageFormat = System.Drawing.Imaging.ImageFormat;
|
|
|
|
class OcrHandler(TesseractEngine engine)
|
|
{
|
|
private Bitmap? _referenceFrame;
|
|
|
|
public object HandleOcr(Request req)
|
|
{
|
|
var options = NormalizeOptions(req.Ocr);
|
|
using var bitmap = ScreenCapture.CaptureOrLoad(req.File, req.Region);
|
|
using var processed = ImagePreprocessor.PreprocessForOcr(bitmap, options);
|
|
using var pix = ImageUtils.BitmapToPix(processed);
|
|
using var page = engine.Process(pix);
|
|
|
|
var text = page.GetText();
|
|
var lines = ImageUtils.ExtractLinesFromPage(page, offsetX: 0, offsetY: 0, minConfidence: options.MinConfidence);
|
|
return new OcrResponse { Text = text, Lines = lines };
|
|
}
|
|
|
|
public object HandleScreenshot(Request req)
|
|
{
|
|
if (string.IsNullOrEmpty(req.Path))
|
|
return new ErrorResponse("screenshot command requires 'path'");
|
|
|
|
// If a reference frame exists, save that (same image used for diff-ocr).
|
|
// Otherwise capture a new frame.
|
|
var bitmap = _referenceFrame ?? ScreenCapture.CaptureOrLoad(req.File, req.Region);
|
|
var format = ImageUtils.GetImageFormat(req.Path);
|
|
var dir = Path.GetDirectoryName(req.Path);
|
|
if (!string.IsNullOrEmpty(dir) && !Directory.Exists(dir))
|
|
Directory.CreateDirectory(dir);
|
|
bitmap.Save(req.Path, format);
|
|
if (bitmap != _referenceFrame) bitmap.Dispose();
|
|
return new OkResponse();
|
|
}
|
|
|
|
public object HandleCapture(Request req)
|
|
{
|
|
using var bitmap = ScreenCapture.CaptureOrLoad(req.File, req.Region);
|
|
using var ms = new MemoryStream();
|
|
bitmap.Save(ms, SdImageFormat.Png);
|
|
var base64 = Convert.ToBase64String(ms.ToArray());
|
|
return new CaptureResponse { Image = base64 };
|
|
}
|
|
|
|
public object HandleSnapshot(Request req)
|
|
{
|
|
_referenceFrame?.Dispose();
|
|
_referenceFrame = ScreenCapture.CaptureOrLoad(req.File, req.Region);
|
|
return new OkResponse();
|
|
}
|
|
|
|
public object HandleDiffOcr(Request req)
|
|
{
|
|
var options = NormalizeOptions(req.Ocr);
|
|
if (_referenceFrame == null)
|
|
return new ErrorResponse("No reference snapshot stored. Send 'snapshot' first.");
|
|
|
|
using var current = ScreenCapture.CaptureOrLoad(req.File, null);
|
|
|
|
int w = Math.Min(_referenceFrame.Width, current.Width);
|
|
int h = Math.Min(_referenceFrame.Height, current.Height);
|
|
|
|
// Get raw pixels for both frames
|
|
var refData = _referenceFrame.LockBits(new Rectangle(0, 0, w, h), ImageLockMode.ReadOnly, PixelFormat.Format32bppArgb);
|
|
byte[] refPx = new byte[refData.Stride * h];
|
|
Marshal.Copy(refData.Scan0, refPx, 0, refPx.Length);
|
|
_referenceFrame.UnlockBits(refData);
|
|
int stride = refData.Stride;
|
|
|
|
var curData = current.LockBits(new Rectangle(0, 0, w, h), ImageLockMode.ReadOnly, PixelFormat.Format32bppArgb);
|
|
byte[] curPx = new byte[curData.Stride * h];
|
|
Marshal.Copy(curData.Scan0, curPx, 0, curPx.Length);
|
|
current.UnlockBits(curData);
|
|
|
|
bool debug = req.Debug;
|
|
|
|
int[] delta = new int[w * h];
|
|
long sum = 0;
|
|
long sumSq = 0;
|
|
int count = 0;
|
|
|
|
for (int y = 0; y < h; y++)
|
|
{
|
|
for (int x = 0; x < w; x++)
|
|
{
|
|
int i = y * stride + x * 4;
|
|
int refB = refPx[i];
|
|
int refG = refPx[i + 1];
|
|
int refR = refPx[i + 2];
|
|
int curB = curPx[i];
|
|
int curG = curPx[i + 1];
|
|
int curR = curPx[i + 2];
|
|
|
|
int refL = (refR * 30 + refG * 59 + refB * 11) / 100;
|
|
int curL = (curR * 30 + curG * 59 + curB * 11) / 100;
|
|
int d = refL - curL;
|
|
delta[y * w + x] = d;
|
|
|
|
if (d > 0)
|
|
{
|
|
sum += d;
|
|
sumSq += (long)d * d;
|
|
count++;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (count == 0)
|
|
{
|
|
if (debug) Console.Error.WriteLine(" diff-ocr: no darkening detected");
|
|
return new OcrResponse { Text = "", Lines = [] };
|
|
}
|
|
|
|
double mean = (double)sum / count;
|
|
double variance = Math.Max(0, (double)sumSq / count - mean * mean);
|
|
double std = Math.Sqrt(variance);
|
|
|
|
int minThresh = req.Threshold > 0 ? req.Threshold : 20;
|
|
int diffThresh = (int)Math.Round(Math.Max(mean + 2.0 * std, minThresh));
|
|
int brightThresh = Math.Max(minThresh, diffThresh / 2);
|
|
|
|
bool[] changed = new bool[w * h];
|
|
int totalChanged = 0;
|
|
for (int i = 0; i < delta.Length; i++)
|
|
{
|
|
int d = delta[i];
|
|
if (d >= diffThresh || d <= -brightThresh)
|
|
{
|
|
changed[i] = true;
|
|
totalChanged++;
|
|
}
|
|
}
|
|
|
|
if (totalChanged == 0)
|
|
{
|
|
if (debug) Console.Error.WriteLine(" diff-ocr: no changes detected after threshold");
|
|
return new OcrResponse { Text = "", Lines = [] };
|
|
}
|
|
|
|
bool[] closed = ErodeMask(DilateMask(changed, w, h, radius: 3), w, h, radius: 3);
|
|
bool[] cleaned = DilateMask(ErodeMask(closed, w, h, radius: 1), w, h, radius: 1);
|
|
|
|
if (!TryFindBestComponent(cleaned, delta, w, h, minArea: (w * h) / 1000, out var compBounds))
|
|
{
|
|
if (debug) Console.Error.WriteLine(" diff-ocr: no tooltip-sized region found");
|
|
return new OcrResponse { Text = "", Lines = [] };
|
|
}
|
|
|
|
int pad = Math.Clamp(Math.Min(compBounds.Width, compBounds.Height) / 20, 6, 20);
|
|
int extraRight = Math.Clamp(compBounds.Width / 6, 12, 80);
|
|
int minX = Math.Max(compBounds.Left - pad, 0);
|
|
int minY = Math.Max(compBounds.Top - pad, 0);
|
|
int maxX = Math.Min(compBounds.Right - 1 + pad + extraRight, w - 1);
|
|
int maxY = Math.Min(compBounds.Bottom - 1 + pad, h - 1);
|
|
int rw = maxX - minX + 1;
|
|
int rh = maxY - minY + 1;
|
|
|
|
if (debug)
|
|
Console.Error.WriteLine($" diff-ocr: changed={totalChanged} thresh={diffThresh} mean={mean:F1} std={std:F1} region=({minX},{minY}) {rw}x{rh}");
|
|
|
|
// Simple crop of the tooltip region from the current frame (no per-pixel masking).
|
|
// The top-hat preprocessing will handle suppressing background text.
|
|
using var cropped = current.Clone(new Rectangle(minX, minY, rw, rh), PixelFormat.Format32bppArgb);
|
|
|
|
// Save before/after preprocessing images if path is provided
|
|
if (!string.IsNullOrEmpty(req.Path))
|
|
{
|
|
var dir = Path.GetDirectoryName(req.Path);
|
|
if (!string.IsNullOrEmpty(dir) && !Directory.Exists(dir))
|
|
Directory.CreateDirectory(dir);
|
|
cropped.Save(req.Path, ImageUtils.GetImageFormat(req.Path));
|
|
if (debug) Console.Error.WriteLine($" diff-ocr: saved raw to {req.Path}");
|
|
}
|
|
|
|
// Pre-process for OCR: boost contrast, invert colors
|
|
using var processed = ImagePreprocessor.PreprocessForOcr(cropped, options);
|
|
|
|
// Save fullscreen and preprocessed versions alongside raw
|
|
if (!string.IsNullOrEmpty(req.Path))
|
|
{
|
|
var ext = Path.GetExtension(req.Path);
|
|
var fullPath = Path.ChangeExtension(req.Path, ".full" + ext);
|
|
current.Save(fullPath, ImageUtils.GetImageFormat(fullPath));
|
|
if (debug) Console.Error.WriteLine($" diff-ocr: saved fullscreen to {fullPath}");
|
|
|
|
if (options.Preprocess)
|
|
{
|
|
var prePath = Path.ChangeExtension(req.Path, ".pre" + ext);
|
|
processed.Save(prePath, ImageUtils.GetImageFormat(prePath));
|
|
if (debug) Console.Error.WriteLine($" diff-ocr: saved preprocessed to {prePath}");
|
|
}
|
|
}
|
|
using var pix = ImageUtils.BitmapToPix(processed);
|
|
using var page = engine.Process(pix);
|
|
|
|
var text = page.GetText();
|
|
var lines = ImageUtils.ExtractLinesFromPage(page, offsetX: minX, offsetY: minY, minConfidence: options.MinConfidence);
|
|
|
|
return new DiffOcrResponse
|
|
{
|
|
Text = text,
|
|
Lines = lines,
|
|
Region = new RegionRect { X = minX, Y = minY, Width = rw, Height = rh },
|
|
};
|
|
}
|
|
|
|
private static bool[] DilateMask(bool[] src, int w, int h, int radius)
|
|
{
|
|
if (radius <= 0) return src;
|
|
bool[] dst = new bool[w * h];
|
|
int r = Math.Max(1, radius);
|
|
for (int y = 0; y < h; y++)
|
|
{
|
|
int y0 = Math.Max(0, y - r);
|
|
int y1 = Math.Min(h - 1, y + r);
|
|
for (int x = 0; x < w; x++)
|
|
{
|
|
int x0 = Math.Max(0, x - r);
|
|
int x1 = Math.Min(w - 1, x + r);
|
|
bool any = false;
|
|
for (int yy = y0; yy <= y1 && !any; yy++)
|
|
{
|
|
int row = yy * w;
|
|
for (int xx = x0; xx <= x1; xx++)
|
|
{
|
|
if (src[row + xx]) { any = true; break; }
|
|
}
|
|
}
|
|
dst[y * w + x] = any;
|
|
}
|
|
}
|
|
return dst;
|
|
}
|
|
|
|
private static bool[] ErodeMask(bool[] src, int w, int h, int radius)
|
|
{
|
|
if (radius <= 0) return src;
|
|
bool[] dst = new bool[w * h];
|
|
int r = Math.Max(1, radius);
|
|
for (int y = 0; y < h; y++)
|
|
{
|
|
int y0 = Math.Max(0, y - r);
|
|
int y1 = Math.Min(h - 1, y + r);
|
|
for (int x = 0; x < w; x++)
|
|
{
|
|
int x0 = Math.Max(0, x - r);
|
|
int x1 = Math.Min(w - 1, x + r);
|
|
bool all = true;
|
|
for (int yy = y0; yy <= y1 && all; yy++)
|
|
{
|
|
int row = yy * w;
|
|
for (int xx = x0; xx <= x1; xx++)
|
|
{
|
|
if (!src[row + xx]) { all = false; break; }
|
|
}
|
|
}
|
|
dst[y * w + x] = all;
|
|
}
|
|
}
|
|
return dst;
|
|
}
|
|
|
|
private static bool TryFindBestComponent(bool[] mask, int[] delta, int w, int h, int minArea, out Rectangle bounds)
|
|
{
|
|
bounds = Rectangle.Empty;
|
|
bool[] visited = new bool[w * h];
|
|
double bestScore = 0;
|
|
Rectangle bestBounds = Rectangle.Empty;
|
|
int[] qx = new int[w * h];
|
|
int[] qy = new int[w * h];
|
|
|
|
for (int y = 0; y < h; y++)
|
|
{
|
|
for (int x = 0; x < w; x++)
|
|
{
|
|
int idx = y * w + x;
|
|
if (!mask[idx] || visited[idx]) continue;
|
|
|
|
int head = 0, tail = 0;
|
|
qx[tail] = x; qy[tail] = y; tail++;
|
|
visited[idx] = true;
|
|
|
|
int minX = x, maxX = x, minY = y, maxY = y;
|
|
int area = 0;
|
|
long sumDelta = 0;
|
|
|
|
while (head < tail)
|
|
{
|
|
int cx = qx[head];
|
|
int cy = qy[head];
|
|
head++;
|
|
area++;
|
|
int didx = cy * w + cx;
|
|
int d = delta[didx];
|
|
if (d > 0) sumDelta += d;
|
|
|
|
if (cx < minX) minX = cx;
|
|
if (cx > maxX) maxX = cx;
|
|
if (cy < minY) minY = cy;
|
|
if (cy > maxY) maxY = cy;
|
|
|
|
for (int ny = cy - 1; ny <= cy + 1; ny++)
|
|
{
|
|
if (ny < 0 || ny >= h) continue;
|
|
int row = ny * w;
|
|
for (int nx = cx - 1; nx <= cx + 1; nx++)
|
|
{
|
|
if (nx < 0 || nx >= w) continue;
|
|
int nidx = row + nx;
|
|
if (!mask[nidx] || visited[nidx]) continue;
|
|
visited[nidx] = true;
|
|
qx[tail] = nx; qy[tail] = ny; tail++;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (area >= minArea)
|
|
{
|
|
int rectW = maxX - minX + 1;
|
|
int rectH = maxY - minY + 1;
|
|
int rectArea = rectW * rectH;
|
|
double fillRatio = rectArea > 0 ? (double)area / rectArea : 0;
|
|
double avgDelta = area > 0 ? (double)sumDelta / area : 0;
|
|
double score = area * fillRatio * avgDelta;
|
|
|
|
if (score > bestScore)
|
|
{
|
|
bestScore = score;
|
|
bestBounds = Rectangle.FromLTRB(minX, minY, maxX + 1, maxY + 1);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if (bestScore <= 0) return false;
|
|
bounds = bestBounds;
|
|
return true;
|
|
}
|
|
|
|
private static OcrOptions NormalizeOptions(OcrOptions? options)
|
|
{
|
|
var normalized = options ?? new OcrOptions();
|
|
if (normalized.KernelSize < 3) normalized.KernelSize = 3;
|
|
if (normalized.KernelSize % 2 == 0) normalized.KernelSize += 1;
|
|
if (normalized.Scale < 1) normalized.Scale = 1;
|
|
if (normalized.MinConfidence < 0) normalized.MinConfidence = 0;
|
|
return normalized;
|
|
}
|
|
}
|