finished ocr daemon update
This commit is contained in:
parent
cc50368d3b
commit
6ad382cb09
13 changed files with 1471 additions and 1479 deletions
87
tools/OcrDaemon/Daemon.cs
Normal file
87
tools/OcrDaemon/Daemon.cs
Normal file
|
|
@ -0,0 +1,87 @@
|
|||
namespace OcrDaemon;
|
||||
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
using Tesseract;
|
||||
|
||||
static class Daemon
|
||||
{
|
||||
private static readonly JsonSerializerOptions JsonOptions = new()
|
||||
{
|
||||
PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
|
||||
};
|
||||
|
||||
public static int Run()
|
||||
{
|
||||
ScreenCapture.InitDpiAwareness();
|
||||
|
||||
// Pre-create the Tesseract OCR engine (reused across all requests)
|
||||
var tessdataPath = Path.Combine(AppContext.BaseDirectory, "tessdata");
|
||||
var tessLang = File.Exists(Path.Combine(tessdataPath, "poe2.traineddata")) ? "poe2" : "eng";
|
||||
TesseractEngine tessEngine;
|
||||
try
|
||||
{
|
||||
tessEngine = new TesseractEngine(tessdataPath, tessLang, EngineMode.LstmOnly);
|
||||
tessEngine.DefaultPageSegMode = PageSegMode.SingleBlock;
|
||||
tessEngine.SetVariable("preserve_interword_spaces", "1");
|
||||
Console.Error.WriteLine($"Tesseract engine loaded with language: {tessLang}");
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
WriteResponse(new ErrorResponse($"Failed to create Tesseract engine: {ex.Message}. Ensure tessdata/eng.traineddata exists."));
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Signal ready
|
||||
WriteResponse(new ReadyResponse());
|
||||
|
||||
var ocrHandler = new OcrHandler(tessEngine);
|
||||
var gridHandler = new GridHandler();
|
||||
var detectGridHandler = new DetectGridHandler();
|
||||
|
||||
// Main loop: read one JSON line, handle, write one JSON line
|
||||
string? line;
|
||||
while ((line = Console.In.ReadLine()) != null)
|
||||
{
|
||||
line = line.Trim();
|
||||
if (line.Length == 0) continue;
|
||||
|
||||
try
|
||||
{
|
||||
var request = JsonSerializer.Deserialize<Request>(line, JsonOptions);
|
||||
if (request == null)
|
||||
{
|
||||
WriteResponse(new ErrorResponse("Failed to parse request"));
|
||||
continue;
|
||||
}
|
||||
|
||||
object response = request.Cmd?.ToLowerInvariant() switch
|
||||
{
|
||||
"ocr" => ocrHandler.HandleOcr(request),
|
||||
"screenshot" => ocrHandler.HandleScreenshot(request),
|
||||
"capture" => ocrHandler.HandleCapture(request),
|
||||
"snapshot" => ocrHandler.HandleSnapshot(request),
|
||||
"diff-ocr" => ocrHandler.HandleDiffOcr(request),
|
||||
"grid" => gridHandler.HandleGrid(request),
|
||||
"detect-grid" => detectGridHandler.HandleDetectGrid(request),
|
||||
_ => new ErrorResponse($"Unknown command: {request.Cmd}"),
|
||||
};
|
||||
WriteResponse(response);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
WriteResponse(new ErrorResponse(ex.Message));
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
private static void WriteResponse(object response)
|
||||
{
|
||||
var json = JsonSerializer.Serialize(response, JsonOptions);
|
||||
Console.Out.WriteLine(json);
|
||||
Console.Out.Flush();
|
||||
}
|
||||
}
|
||||
190
tools/OcrDaemon/DetectGridHandler.cs
Normal file
190
tools/OcrDaemon/DetectGridHandler.cs
Normal file
|
|
@ -0,0 +1,190 @@
|
|||
namespace OcrDaemon;
|
||||
|
||||
using System.Drawing;
|
||||
using System.Drawing.Imaging;
|
||||
using System.Runtime.InteropServices;
|
||||
|
||||
class DetectGridHandler
|
||||
{
|
||||
public object HandleDetectGrid(Request req)
|
||||
{
|
||||
if (req.Region == null)
|
||||
return new ErrorResponse("detect-grid requires region");
|
||||
|
||||
int minCell = req.MinCellSize > 0 ? req.MinCellSize : 20;
|
||||
int maxCell = req.MaxCellSize > 0 ? req.MaxCellSize : 70;
|
||||
bool debug = req.Debug;
|
||||
|
||||
Bitmap bitmap = ScreenCapture.CaptureOrLoad(req.File, req.Region);
|
||||
int w = bitmap.Width;
|
||||
int h = bitmap.Height;
|
||||
|
||||
var bmpData = bitmap.LockBits(
|
||||
new Rectangle(0, 0, w, h),
|
||||
ImageLockMode.ReadOnly,
|
||||
PixelFormat.Format32bppArgb
|
||||
);
|
||||
byte[] pixels = new byte[bmpData.Stride * h];
|
||||
Marshal.Copy(bmpData.Scan0, pixels, 0, pixels.Length);
|
||||
bitmap.UnlockBits(bmpData);
|
||||
int stride = bmpData.Stride;
|
||||
|
||||
byte[] gray = new byte[w * h];
|
||||
for (int y = 0; y < h; y++)
|
||||
for (int x = 0; x < w; x++)
|
||||
{
|
||||
int i = y * stride + x * 4;
|
||||
gray[y * w + x] = (byte)((pixels[i] + pixels[i + 1] + pixels[i + 2]) / 3);
|
||||
}
|
||||
|
||||
bitmap.Dispose();
|
||||
|
||||
// ── Pass 1: Scan horizontal bands using "very dark pixel density" ──
|
||||
// Grid lines are nearly all very dark (density ~0.9), cell interiors are
|
||||
// partially dark (0.3-0.5), game world is mostly bright (density ~0.05).
|
||||
// This creates clear periodic peaks at grid line positions.
|
||||
int bandH = 200;
|
||||
int bandStep = 40;
|
||||
const int veryDarkPixelThresh = 12; // pixels below this brightness = "very dark"
|
||||
const double gridSegThresh = 0.25; // density above this = potential grid column
|
||||
|
||||
var candidates = new List<(int bandY, int cellW, double hAc, int hLeft, int hRight)>();
|
||||
|
||||
for (int by = 0; by + bandH <= h; by += bandStep)
|
||||
{
|
||||
// "Very dark pixel density" per column: fraction of pixels below threshold
|
||||
double[] darkDensity = new double[w];
|
||||
for (int x = 0; x < w; x++)
|
||||
{
|
||||
int count = 0;
|
||||
for (int y = by; y < by + bandH; y++)
|
||||
{
|
||||
if (gray[y * w + x] < veryDarkPixelThresh) count++;
|
||||
}
|
||||
darkDensity[x] = (double)count / bandH;
|
||||
}
|
||||
|
||||
// Find segments where density > gridSegThresh (grid panel regions)
|
||||
var gridSegs = SignalProcessing.FindDarkDensitySegments(darkDensity, gridSegThresh, 200);
|
||||
|
||||
foreach (var (segLeft, segRight) in gridSegs)
|
||||
{
|
||||
// Extract segment and run AC
|
||||
int segLen = segRight - segLeft;
|
||||
double[] segment = new double[segLen];
|
||||
Array.Copy(darkDensity, segLeft, segment, 0, segLen);
|
||||
|
||||
var (period, acScore) = SignalProcessing.FindPeriodWithScore(segment, minCell, maxCell);
|
||||
|
||||
if (period <= 0) continue;
|
||||
|
||||
// FindGridExtent within the segment
|
||||
var (extLeft, extRight) = SignalProcessing.FindGridExtent(segment, period);
|
||||
if (extLeft < 0) continue;
|
||||
|
||||
// Map back to full image coordinates
|
||||
int absLeft = segLeft + extLeft;
|
||||
int absRight = segLeft + extRight;
|
||||
int extent = absRight - absLeft;
|
||||
|
||||
// Require at least 8 cells wide AND 200px absolute minimum
|
||||
if (extent < period * 8 || extent < 200) continue;
|
||||
|
||||
if (debug) Console.Error.WriteLine(
|
||||
$" Band y={by}: seg=[{segLeft}-{segRight}] period={period}, AC={acScore:F3}, " +
|
||||
$"extent={absLeft}-{absRight}={extent}px ({extent / period} cells)");
|
||||
|
||||
candidates.Add((by, period, acScore, absLeft, absRight));
|
||||
}
|
||||
}
|
||||
|
||||
if (debug) Console.Error.WriteLine($"Pass 1: {candidates.Count} candidates");
|
||||
|
||||
// Sort by score = AC * extent (prefer large strongly-periodic areas)
|
||||
candidates.Sort((a, b) =>
|
||||
{
|
||||
double sa = a.hAc * (a.hRight - a.hLeft);
|
||||
double sb = b.hAc * (b.hRight - b.hLeft);
|
||||
return sb.CompareTo(sa);
|
||||
});
|
||||
|
||||
// ── Pass 2: Verify vertical periodicity ──
|
||||
foreach (var cand in candidates.Take(10))
|
||||
{
|
||||
int colSpan = cand.hRight - cand.hLeft;
|
||||
if (colSpan < cand.cellW * 3) continue;
|
||||
|
||||
// Row "very dark pixel density" within the detected column range
|
||||
double[] rowDensity = new double[h];
|
||||
for (int y = 0; y < h; y++)
|
||||
{
|
||||
int count = 0;
|
||||
for (int x = cand.hLeft; x < cand.hRight; x++)
|
||||
{
|
||||
if (gray[y * w + x] < veryDarkPixelThresh) count++;
|
||||
}
|
||||
rowDensity[y] = (double)count / colSpan;
|
||||
}
|
||||
|
||||
// Find grid panel vertical segment
|
||||
var vGridSegs = SignalProcessing.FindDarkDensitySegments(rowDensity, gridSegThresh, 100);
|
||||
if (vGridSegs.Count == 0) continue;
|
||||
|
||||
// Use the largest segment
|
||||
var (vSegTop, vSegBottom) = vGridSegs.OrderByDescending(s => s.end - s.start).First();
|
||||
int vSegLen = vSegBottom - vSegTop;
|
||||
double[] vSegment = new double[vSegLen];
|
||||
Array.Copy(rowDensity, vSegTop, vSegment, 0, vSegLen);
|
||||
|
||||
var (cellH, vAc) = SignalProcessing.FindPeriodWithScore(vSegment, minCell, maxCell);
|
||||
if (cellH <= 0) continue;
|
||||
|
||||
var (extTop, extBottom) = SignalProcessing.FindGridExtent(vSegment, cellH);
|
||||
if (extTop < 0) continue;
|
||||
|
||||
int top = vSegTop + extTop;
|
||||
int bottom = vSegTop + extBottom;
|
||||
int vExtent = bottom - top;
|
||||
|
||||
// Require at least 3 rows tall AND 100px absolute minimum
|
||||
if (vExtent < cellH * 3 || vExtent < 100) continue;
|
||||
|
||||
if (debug) Console.Error.WriteLine(
|
||||
$" 2D candidate: cellW={cand.cellW}, cellH={cellH}, " +
|
||||
$"region=({cand.hLeft},{top})-({cand.hRight},{bottom}), " +
|
||||
$"vAC={vAc:F3}, extent={vExtent}px ({vExtent / cellH} rows)");
|
||||
|
||||
// ── Found a valid 2D grid ──
|
||||
int gridW = cand.hRight - cand.hLeft;
|
||||
int gridH = bottom - top;
|
||||
int cols = Math.Max(2, (int)Math.Round((double)gridW / cand.cellW));
|
||||
int rows = Math.Max(2, (int)Math.Round((double)gridH / cellH));
|
||||
|
||||
// Snap grid dimensions to exact multiples of cell size
|
||||
gridW = cols * cand.cellW;
|
||||
gridH = rows * cellH;
|
||||
|
||||
if (debug) Console.Error.WriteLine(
|
||||
$" => cols={cols}, rows={rows}, gridW={gridW}, gridH={gridH}");
|
||||
|
||||
return new DetectGridResponse
|
||||
{
|
||||
Detected = true,
|
||||
Region = new RegionRect
|
||||
{
|
||||
X = req.Region.X + cand.hLeft,
|
||||
Y = req.Region.Y + top,
|
||||
Width = gridW,
|
||||
Height = gridH,
|
||||
},
|
||||
Cols = cols,
|
||||
Rows = rows,
|
||||
CellWidth = Math.Round((double)gridW / cols, 1),
|
||||
CellHeight = Math.Round((double)gridH / rows, 1),
|
||||
};
|
||||
}
|
||||
|
||||
if (debug) Console.Error.WriteLine(" No valid 2D grid found");
|
||||
return new DetectGridResponse { Detected = false };
|
||||
}
|
||||
}
|
||||
347
tools/OcrDaemon/GridHandler.cs
Normal file
347
tools/OcrDaemon/GridHandler.cs
Normal file
|
|
@ -0,0 +1,347 @@
|
|||
namespace OcrDaemon;
|
||||
|
||||
using System.Drawing;
|
||||
using System.Drawing.Imaging;
|
||||
using System.Runtime.InteropServices;
|
||||
|
||||
class GridHandler
|
||||
{
|
||||
// Pre-loaded empty cell templates (loaded lazily on first grid scan)
|
||||
private byte[]? _emptyTemplate70Gray;
|
||||
private byte[]? _emptyTemplate70Argb;
|
||||
private int _emptyTemplate70W, _emptyTemplate70H, _emptyTemplate70Stride;
|
||||
private byte[]? _emptyTemplate35Gray;
|
||||
private byte[]? _emptyTemplate35Argb;
|
||||
private int _emptyTemplate35W, _emptyTemplate35H, _emptyTemplate35Stride;
|
||||
|
||||
public object HandleGrid(Request req)
|
||||
{
|
||||
if (req.Region == null || req.Cols <= 0 || req.Rows <= 0)
|
||||
return new ErrorResponse("grid command requires region, cols, rows");
|
||||
|
||||
LoadTemplatesIfNeeded();
|
||||
|
||||
using var bitmap = ScreenCapture.CaptureOrLoad(req.File, req.Region);
|
||||
int cols = req.Cols;
|
||||
int rows = req.Rows;
|
||||
float cellW = (float)bitmap.Width / cols;
|
||||
float cellH = (float)bitmap.Height / rows;
|
||||
|
||||
// Pick the right empty template based on cell size
|
||||
int nominalCell = (int)Math.Round(cellW);
|
||||
byte[]? templateGray;
|
||||
byte[]? templateArgb;
|
||||
int templateW, templateH, templateStride;
|
||||
if (nominalCell <= 40 && _emptyTemplate35Gray != null)
|
||||
{
|
||||
templateGray = _emptyTemplate35Gray;
|
||||
templateArgb = _emptyTemplate35Argb!;
|
||||
templateW = _emptyTemplate35W;
|
||||
templateH = _emptyTemplate35H;
|
||||
templateStride = _emptyTemplate35Stride;
|
||||
}
|
||||
else if (_emptyTemplate70Gray != null)
|
||||
{
|
||||
templateGray = _emptyTemplate70Gray;
|
||||
templateArgb = _emptyTemplate70Argb!;
|
||||
templateW = _emptyTemplate70W;
|
||||
templateH = _emptyTemplate70H;
|
||||
templateStride = _emptyTemplate70Stride;
|
||||
}
|
||||
else
|
||||
{
|
||||
return new ErrorResponse("Empty cell templates not found in assets/");
|
||||
}
|
||||
|
||||
// Convert captured bitmap to grayscale + keep ARGB for border color comparison
|
||||
var (captureGray, captureArgb, captureStride) = ImageUtils.BitmapToGrayAndArgb(bitmap);
|
||||
int captureW = bitmap.Width;
|
||||
|
||||
// Border to skip (outer pixels may differ between cells)
|
||||
int border = Math.Max(2, nominalCell / 10);
|
||||
|
||||
// Pre-compute template average for the inner region
|
||||
long templateSum = 0;
|
||||
int innerCount = 0;
|
||||
for (int ty = border; ty < templateH - border; ty++)
|
||||
for (int tx = border; tx < templateW - border; tx++)
|
||||
{
|
||||
templateSum += templateGray[ty * templateW + tx];
|
||||
innerCount++;
|
||||
}
|
||||
|
||||
// Threshold for mean absolute difference — default 6
|
||||
double diffThreshold = req.Threshold > 0 ? req.Threshold : 2;
|
||||
bool debug = req.Debug;
|
||||
|
||||
if (debug) Console.Error.WriteLine($"Grid: {cols}x{rows}, cellW={cellW:F1}, cellH={cellH:F1}, border={border}, threshold={diffThreshold}");
|
||||
|
||||
var cells = new List<List<bool>>();
|
||||
for (int row = 0; row < rows; row++)
|
||||
{
|
||||
var rowList = new List<bool>();
|
||||
var debugDiffs = new List<string>();
|
||||
for (int col = 0; col < cols; col++)
|
||||
{
|
||||
int cx0 = (int)(col * cellW);
|
||||
int cy0 = (int)(row * cellH);
|
||||
int cw = (int)Math.Min(cellW, captureW - cx0);
|
||||
int ch = (int)Math.Min(cellH, bitmap.Height - cy0);
|
||||
|
||||
// Compare inner pixels of cell vs template
|
||||
long diffSum = 0;
|
||||
int compared = 0;
|
||||
int innerW = Math.Min(cw, templateW) - border;
|
||||
int innerH = Math.Min(ch, templateH) - border;
|
||||
for (int py = border; py < innerH; py++)
|
||||
{
|
||||
for (int px = border; px < innerW; px++)
|
||||
{
|
||||
int cellVal = captureGray[(cy0 + py) * captureW + (cx0 + px)];
|
||||
int tmplVal = templateGray[py * templateW + px];
|
||||
diffSum += Math.Abs(cellVal - tmplVal);
|
||||
compared++;
|
||||
}
|
||||
}
|
||||
double meanDiff = compared > 0 ? (double)diffSum / compared : 0;
|
||||
bool occupied = meanDiff > diffThreshold;
|
||||
rowList.Add(occupied);
|
||||
if (debug) debugDiffs.Add($"{meanDiff,5:F1}{(occupied ? "*" : " ")}");
|
||||
}
|
||||
cells.Add(rowList);
|
||||
if (debug) Console.Error.WriteLine($" Row {row,2}: {string.Join(" ", debugDiffs)}");
|
||||
}
|
||||
|
||||
// ── Item detection: compare border pixels to empty template (grayscale) ──
|
||||
// Items have a colored tint behind them that shows through grid lines.
|
||||
// Compare each cell's border strip against the template's border pixels.
|
||||
// If they differ → item tint present → cells belong to same item.
|
||||
int[] parent = new int[rows * cols];
|
||||
for (int i = 0; i < parent.Length; i++) parent[i] = i;
|
||||
|
||||
int Find(int x) { while (parent[x] != x) { parent[x] = parent[parent[x]]; x = parent[x]; } return x; }
|
||||
void Union(int a, int b) { parent[Find(a)] = Find(b); }
|
||||
|
||||
int stripWidth = Math.Max(2, border / 2);
|
||||
int stripInset = (int)(cellW * 0.15);
|
||||
double borderDiffThresh = 15.0;
|
||||
|
||||
for (int row = 0; row < rows; row++)
|
||||
{
|
||||
for (int col = 0; col < cols; col++)
|
||||
{
|
||||
if (!cells[row][col]) continue;
|
||||
int cx0 = (int)(col * cellW);
|
||||
int cy0 = (int)(row * cellH);
|
||||
|
||||
// Check right neighbor
|
||||
if (col + 1 < cols && cells[row][col + 1])
|
||||
{
|
||||
long diffSum = 0; int cnt = 0;
|
||||
int xStart = (int)((col + 1) * cellW) - stripWidth;
|
||||
int yFrom = cy0 + stripInset;
|
||||
int yTo = (int)((row + 1) * cellH) - stripInset;
|
||||
for (int sy = yFrom; sy < yTo; sy += 2)
|
||||
{
|
||||
int tmplY = sy - cy0;
|
||||
for (int sx = xStart; sx < xStart + stripWidth * 2; sx++)
|
||||
{
|
||||
if (sx < 0 || sx >= captureW) continue;
|
||||
int tmplX = sx - cx0;
|
||||
if (tmplX < 0 || tmplX >= templateW) continue;
|
||||
diffSum += Math.Abs(captureGray[sy * captureW + sx] - templateGray[tmplY * templateW + tmplX]);
|
||||
cnt++;
|
||||
}
|
||||
}
|
||||
double meanDiff = cnt > 0 ? (double)diffSum / cnt : 0;
|
||||
if (debug) Console.Error.WriteLine($" H ({row},{col})->({row},{col+1}): {meanDiff:F1}{(meanDiff > borderDiffThresh ? " SAME" : "")}");
|
||||
if (meanDiff > borderDiffThresh)
|
||||
Union(row * cols + col, row * cols + col + 1);
|
||||
}
|
||||
|
||||
// Check bottom neighbor
|
||||
if (row + 1 < rows && cells[row + 1][col])
|
||||
{
|
||||
long diffSum = 0; int cnt = 0;
|
||||
int yStart = (int)((row + 1) * cellH) - stripWidth;
|
||||
int xFrom = cx0 + stripInset;
|
||||
int xTo = (int)((col + 1) * cellW) - stripInset;
|
||||
for (int sx = xFrom; sx < xTo; sx += 2)
|
||||
{
|
||||
int tmplX = sx - cx0;
|
||||
for (int sy = yStart; sy < yStart + stripWidth * 2; sy++)
|
||||
{
|
||||
if (sy < 0 || sy >= bitmap.Height) continue;
|
||||
int tmplY = sy - cy0;
|
||||
if (tmplY < 0 || tmplY >= templateH) continue;
|
||||
diffSum += Math.Abs(captureGray[sy * captureW + sx] - templateGray[tmplY * templateW + tmplX]);
|
||||
cnt++;
|
||||
}
|
||||
}
|
||||
double meanDiff = cnt > 0 ? (double)diffSum / cnt : 0;
|
||||
if (debug) Console.Error.WriteLine($" V ({row},{col})->({row+1},{col}): {meanDiff:F1}{(meanDiff > borderDiffThresh ? " SAME" : "")}");
|
||||
if (meanDiff > borderDiffThresh)
|
||||
Union(row * cols + col, (row + 1) * cols + col);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Extract items from union-find groups
|
||||
var groups = new Dictionary<int, List<(int row, int col)>>();
|
||||
for (int row = 0; row < rows; row++)
|
||||
for (int col = 0; col < cols; col++)
|
||||
if (cells[row][col])
|
||||
{
|
||||
int root = Find(row * cols + col);
|
||||
if (!groups.ContainsKey(root)) groups[root] = [];
|
||||
groups[root].Add((row, col));
|
||||
}
|
||||
|
||||
var items = new List<GridItem>();
|
||||
foreach (var group in groups.Values)
|
||||
{
|
||||
int minR = group.Min(c => c.row);
|
||||
int maxR = group.Max(c => c.row);
|
||||
int minC = group.Min(c => c.col);
|
||||
int maxC = group.Max(c => c.col);
|
||||
items.Add(new GridItem { Row = minR, Col = minC, W = maxC - minC + 1, H = maxR - minR + 1 });
|
||||
}
|
||||
|
||||
if (debug)
|
||||
{
|
||||
Console.Error.WriteLine($" Items found: {items.Count}");
|
||||
foreach (var item in items)
|
||||
Console.Error.WriteLine($" ({item.Row},{item.Col}) {item.W}x{item.H}");
|
||||
}
|
||||
|
||||
// ── Visual matching: find cells similar to target ──
|
||||
List<GridMatch>? matches = null;
|
||||
if (req.TargetRow >= 0 && req.TargetCol >= 0 &&
|
||||
req.TargetRow < rows && req.TargetCol < cols &&
|
||||
cells[req.TargetRow][req.TargetCol])
|
||||
{
|
||||
matches = FindMatchingCells(
|
||||
captureGray, captureW, bitmap.Height,
|
||||
cells, rows, cols, cellW, cellH, border,
|
||||
req.TargetRow, req.TargetCol, debug);
|
||||
}
|
||||
|
||||
return new GridResponse { Cells = cells, Items = items, Matches = matches };
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Find all occupied cells visually similar to the target cell using full-resolution NCC.
|
||||
/// </summary>
|
||||
private List<GridMatch> FindMatchingCells(
|
||||
byte[] gray, int imgW, int imgH,
|
||||
List<List<bool>> cells, int rows, int cols,
|
||||
float cellW, float cellH, int border,
|
||||
int targetRow, int targetCol, bool debug)
|
||||
{
|
||||
int innerW = (int)cellW - border * 2;
|
||||
int innerH = (int)cellH - border * 2;
|
||||
if (innerW <= 4 || innerH <= 4) return [];
|
||||
|
||||
int tCx0 = (int)(targetCol * cellW) + border;
|
||||
int tCy0 = (int)(targetRow * cellH) + border;
|
||||
int tInnerW = Math.Min(innerW, imgW - tCx0);
|
||||
int tInnerH = Math.Min(innerH, imgH - tCy0);
|
||||
if (tInnerW < innerW || tInnerH < innerH) return [];
|
||||
|
||||
int n = innerW * innerH;
|
||||
|
||||
// Pre-compute target cell pixels and stats
|
||||
double[] targetPixels = new double[n];
|
||||
double tMean = 0;
|
||||
for (int py = 0; py < innerH; py++)
|
||||
for (int px = 0; px < innerW; px++)
|
||||
{
|
||||
double v = gray[(tCy0 + py) * imgW + (tCx0 + px)];
|
||||
targetPixels[py * innerW + px] = v;
|
||||
tMean += v;
|
||||
}
|
||||
tMean /= n;
|
||||
|
||||
double tStd = 0;
|
||||
for (int i = 0; i < n; i++)
|
||||
tStd += (targetPixels[i] - tMean) * (targetPixels[i] - tMean);
|
||||
tStd = Math.Sqrt(tStd / n);
|
||||
|
||||
if (debug) Console.Error.WriteLine($" Match target ({targetRow},{targetCol}): {innerW}x{innerH} ({n}px), mean={tMean:F1}, std={tStd:F1}");
|
||||
if (tStd < 3.0) return [];
|
||||
|
||||
double matchThreshold = 0.70;
|
||||
var matches = new List<GridMatch>();
|
||||
|
||||
for (int row = 0; row < rows; row++)
|
||||
{
|
||||
for (int col = 0; col < cols; col++)
|
||||
{
|
||||
if (!cells[row][col]) continue;
|
||||
if (row == targetRow && col == targetCol) continue;
|
||||
|
||||
int cx0 = (int)(col * cellW) + border;
|
||||
int cy0 = (int)(row * cellH) + border;
|
||||
int cInnerW = Math.Min(innerW, imgW - cx0);
|
||||
int cInnerH = Math.Min(innerH, imgH - cy0);
|
||||
if (cInnerW < innerW || cInnerH < innerH) continue;
|
||||
|
||||
// Compute NCC at full resolution
|
||||
double cMean = 0;
|
||||
for (int py = 0; py < innerH; py++)
|
||||
for (int px = 0; px < innerW; px++)
|
||||
cMean += gray[(cy0 + py) * imgW + (cx0 + px)];
|
||||
cMean /= n;
|
||||
|
||||
double cStd = 0, cross = 0;
|
||||
for (int py = 0; py < innerH; py++)
|
||||
for (int px = 0; px < innerW; px++)
|
||||
{
|
||||
double cv = gray[(cy0 + py) * imgW + (cx0 + px)] - cMean;
|
||||
double tv = targetPixels[py * innerW + px] - tMean;
|
||||
cStd += cv * cv;
|
||||
cross += tv * cv;
|
||||
}
|
||||
cStd = Math.Sqrt(cStd / n);
|
||||
|
||||
double ncc = (tStd > 0 && cStd > 0) ? cross / (n * tStd * cStd) : 0;
|
||||
|
||||
if (debug && ncc > 0.5)
|
||||
Console.Error.WriteLine($" ({row},{col}): NCC={ncc:F3}");
|
||||
|
||||
if (ncc >= matchThreshold)
|
||||
matches.Add(new GridMatch { Row = row, Col = col, Similarity = Math.Round(ncc, 3) });
|
||||
}
|
||||
}
|
||||
|
||||
if (debug) Console.Error.WriteLine($" Matches for ({targetRow},{targetCol}): {matches.Count}");
|
||||
return matches;
|
||||
}
|
||||
|
||||
private void LoadTemplatesIfNeeded()
|
||||
{
|
||||
if (_emptyTemplate70Gray != null) return;
|
||||
|
||||
// Look for templates relative to exe directory
|
||||
var exeDir = AppContext.BaseDirectory;
|
||||
// Templates are in assets/ at project root — walk up from bin/Release/net8.0-.../
|
||||
var projectRoot = Path.GetFullPath(Path.Combine(exeDir, "..", "..", "..", "..", ".."));
|
||||
var t70Path = Path.Combine(projectRoot, "assets", "empty70.png");
|
||||
var t35Path = Path.Combine(projectRoot, "assets", "empty35.png");
|
||||
|
||||
if (File.Exists(t70Path))
|
||||
{
|
||||
using var bmp = new Bitmap(t70Path);
|
||||
_emptyTemplate70W = bmp.Width;
|
||||
_emptyTemplate70H = bmp.Height;
|
||||
(_emptyTemplate70Gray, _emptyTemplate70Argb, _emptyTemplate70Stride) = ImageUtils.BitmapToGrayAndArgb(bmp);
|
||||
}
|
||||
if (File.Exists(t35Path))
|
||||
{
|
||||
using var bmp = new Bitmap(t35Path);
|
||||
_emptyTemplate35W = bmp.Width;
|
||||
_emptyTemplate35H = bmp.Height;
|
||||
(_emptyTemplate35Gray, _emptyTemplate35Argb, _emptyTemplate35Stride) = ImageUtils.BitmapToGrayAndArgb(bmp);
|
||||
}
|
||||
}
|
||||
}
|
||||
37
tools/OcrDaemon/ImagePreprocessor.cs
Normal file
37
tools/OcrDaemon/ImagePreprocessor.cs
Normal file
|
|
@ -0,0 +1,37 @@
|
|||
namespace OcrDaemon;
|
||||
|
||||
using System.Drawing;
|
||||
using OpenCvSharp;
|
||||
using OpenCvSharp.Extensions;
|
||||
|
||||
static class ImagePreprocessor
|
||||
{
|
||||
/// <summary>
|
||||
/// Pre-process an image for OCR using morphological white top-hat filtering.
|
||||
/// Isolates bright tooltip text, suppresses dim background text visible through overlay.
|
||||
/// Pipeline: grayscale → morphological top-hat → Otsu binary → 2x upscale
|
||||
/// </summary>
|
||||
public static Bitmap PreprocessForOcr(Bitmap src)
|
||||
{
|
||||
using var mat = BitmapConverter.ToMat(src);
|
||||
using var gray = new Mat();
|
||||
Cv2.CvtColor(mat, gray, ColorConversionCodes.BGRA2GRAY);
|
||||
|
||||
// Morphological white top-hat: isolates bright text on dark background
|
||||
// Kernel size 25x25 captures text strokes, suppresses dim background text
|
||||
using var kernel = Cv2.GetStructuringElement(MorphShapes.Rect, new OpenCvSharp.Size(25, 25));
|
||||
using var tophat = new Mat();
|
||||
Cv2.MorphologyEx(gray, tophat, MorphTypes.TopHat, kernel);
|
||||
|
||||
// Otsu binarization: automatic threshold, black text on white
|
||||
using var binary = new Mat();
|
||||
Cv2.Threshold(tophat, binary, 0, 255, ThresholdTypes.BinaryInv | ThresholdTypes.Otsu);
|
||||
|
||||
// 2x upscale for better LSTM recognition
|
||||
using var upscaled = new Mat();
|
||||
Cv2.Resize(binary, upscaled, new OpenCvSharp.Size(binary.Width * 2, binary.Height * 2),
|
||||
interpolation: InterpolationFlags.Cubic);
|
||||
|
||||
return BitmapConverter.ToBitmap(upscaled);
|
||||
}
|
||||
}
|
||||
89
tools/OcrDaemon/ImageUtils.cs
Normal file
89
tools/OcrDaemon/ImageUtils.cs
Normal file
|
|
@ -0,0 +1,89 @@
|
|||
namespace OcrDaemon;
|
||||
|
||||
using System.Drawing;
|
||||
using System.Drawing.Imaging;
|
||||
using System.Runtime.InteropServices;
|
||||
using Tesseract;
|
||||
using SdImageFormat = System.Drawing.Imaging.ImageFormat;
|
||||
|
||||
static class ImageUtils
|
||||
{
|
||||
public static Pix BitmapToPix(Bitmap bitmap)
|
||||
{
|
||||
using var ms = new MemoryStream();
|
||||
bitmap.Save(ms, SdImageFormat.Png);
|
||||
return Pix.LoadFromMemory(ms.ToArray());
|
||||
}
|
||||
|
||||
public static List<OcrLineResult> ExtractLinesFromPage(Page page, int offsetX, int offsetY)
|
||||
{
|
||||
var lines = new List<OcrLineResult>();
|
||||
using var iter = page.GetIterator();
|
||||
if (iter == null) return lines;
|
||||
|
||||
iter.Begin();
|
||||
|
||||
do
|
||||
{
|
||||
var words = new List<OcrWordResult>();
|
||||
do
|
||||
{
|
||||
var wordText = iter.GetText(PageIteratorLevel.Word);
|
||||
if (string.IsNullOrWhiteSpace(wordText)) continue;
|
||||
|
||||
float conf = iter.GetConfidence(PageIteratorLevel.Word);
|
||||
if (conf < 50) continue; // reject low-confidence garbage from background bleed
|
||||
|
||||
if (iter.TryGetBoundingBox(PageIteratorLevel.Word, out var bounds))
|
||||
{
|
||||
words.Add(new OcrWordResult
|
||||
{
|
||||
Text = wordText.Trim(),
|
||||
X = bounds.X1 + offsetX,
|
||||
Y = bounds.Y1 + offsetY,
|
||||
Width = bounds.Width,
|
||||
Height = bounds.Height,
|
||||
});
|
||||
}
|
||||
} while (iter.Next(PageIteratorLevel.TextLine, PageIteratorLevel.Word));
|
||||
|
||||
if (words.Count > 0)
|
||||
{
|
||||
var lineText = string.Join(" ", words.Select(w => w.Text));
|
||||
lines.Add(new OcrLineResult { Text = lineText, Words = words });
|
||||
}
|
||||
} while (iter.Next(PageIteratorLevel.Block, PageIteratorLevel.TextLine));
|
||||
|
||||
return lines;
|
||||
}
|
||||
|
||||
public static (byte[] gray, byte[] argb, int stride) BitmapToGrayAndArgb(Bitmap bmp)
|
||||
{
|
||||
int w = bmp.Width, h = bmp.Height;
|
||||
var data = bmp.LockBits(new Rectangle(0, 0, w, h), ImageLockMode.ReadOnly, PixelFormat.Format32bppArgb);
|
||||
byte[] argb = new byte[data.Stride * h];
|
||||
Marshal.Copy(data.Scan0, argb, 0, argb.Length);
|
||||
bmp.UnlockBits(data);
|
||||
int stride = data.Stride;
|
||||
|
||||
byte[] gray = new byte[w * h];
|
||||
for (int y = 0; y < h; y++)
|
||||
for (int x = 0; x < w; x++)
|
||||
{
|
||||
int i = y * stride + x * 4;
|
||||
gray[y * w + x] = (byte)((argb[i] + argb[i + 1] + argb[i + 2]) / 3);
|
||||
}
|
||||
return (gray, argb, stride);
|
||||
}
|
||||
|
||||
public static SdImageFormat GetImageFormat(string path)
|
||||
{
|
||||
var ext = Path.GetExtension(path).ToLowerInvariant();
|
||||
return ext switch
|
||||
{
|
||||
".jpg" or ".jpeg" => SdImageFormat.Jpeg,
|
||||
".bmp" => SdImageFormat.Bmp,
|
||||
_ => SdImageFormat.Png,
|
||||
};
|
||||
}
|
||||
}
|
||||
210
tools/OcrDaemon/Models.cs
Normal file
210
tools/OcrDaemon/Models.cs
Normal file
|
|
@ -0,0 +1,210 @@
|
|||
namespace OcrDaemon;
|
||||
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
class Request
|
||||
{
|
||||
[JsonPropertyName("cmd")]
|
||||
public string? Cmd { get; set; }
|
||||
|
||||
[JsonPropertyName("region")]
|
||||
public RegionRect? Region { get; set; }
|
||||
|
||||
[JsonPropertyName("path")]
|
||||
public string? Path { get; set; }
|
||||
|
||||
[JsonPropertyName("cols")]
|
||||
public int Cols { get; set; }
|
||||
|
||||
[JsonPropertyName("rows")]
|
||||
public int Rows { get; set; }
|
||||
|
||||
[JsonPropertyName("threshold")]
|
||||
public int Threshold { get; set; }
|
||||
|
||||
[JsonPropertyName("minCellSize")]
|
||||
public int MinCellSize { get; set; }
|
||||
|
||||
[JsonPropertyName("maxCellSize")]
|
||||
public int MaxCellSize { get; set; }
|
||||
|
||||
[JsonPropertyName("file")]
|
||||
public string? File { get; set; }
|
||||
|
||||
[JsonPropertyName("debug")]
|
||||
public bool Debug { get; set; }
|
||||
|
||||
[JsonPropertyName("targetRow")]
|
||||
public int TargetRow { get; set; } = -1;
|
||||
|
||||
[JsonPropertyName("targetCol")]
|
||||
public int TargetCol { get; set; } = -1;
|
||||
}
|
||||
|
||||
class RegionRect
|
||||
{
|
||||
[JsonPropertyName("x")]
|
||||
public int X { get; set; }
|
||||
|
||||
[JsonPropertyName("y")]
|
||||
public int Y { get; set; }
|
||||
|
||||
[JsonPropertyName("width")]
|
||||
public int Width { get; set; }
|
||||
|
||||
[JsonPropertyName("height")]
|
||||
public int Height { get; set; }
|
||||
}
|
||||
|
||||
class ReadyResponse
|
||||
{
|
||||
[JsonPropertyName("ok")]
|
||||
public bool Ok => true;
|
||||
|
||||
[JsonPropertyName("ready")]
|
||||
public bool Ready => true;
|
||||
}
|
||||
|
||||
class OkResponse
|
||||
{
|
||||
[JsonPropertyName("ok")]
|
||||
public bool Ok => true;
|
||||
}
|
||||
|
||||
class ErrorResponse(string message)
|
||||
{
|
||||
[JsonPropertyName("ok")]
|
||||
public bool Ok => false;
|
||||
|
||||
[JsonPropertyName("error")]
|
||||
public string Error => message;
|
||||
}
|
||||
|
||||
class OcrResponse
|
||||
{
|
||||
[JsonPropertyName("ok")]
|
||||
public bool Ok => true;
|
||||
|
||||
[JsonPropertyName("text")]
|
||||
public string Text { get; set; } = "";
|
||||
|
||||
[JsonPropertyName("lines")]
|
||||
public List<OcrLineResult> Lines { get; set; } = [];
|
||||
}
|
||||
|
||||
class DiffOcrResponse
|
||||
{
|
||||
[JsonPropertyName("ok")]
|
||||
public bool Ok => true;
|
||||
|
||||
[JsonPropertyName("text")]
|
||||
public string Text { get; set; } = "";
|
||||
|
||||
[JsonPropertyName("lines")]
|
||||
public List<OcrLineResult> Lines { get; set; } = [];
|
||||
|
||||
[JsonPropertyName("region")]
|
||||
public RegionRect? Region { get; set; }
|
||||
}
|
||||
|
||||
class OcrLineResult
|
||||
{
|
||||
[JsonPropertyName("text")]
|
||||
public string Text { get; set; } = "";
|
||||
|
||||
[JsonPropertyName("words")]
|
||||
public List<OcrWordResult> Words { get; set; } = [];
|
||||
}
|
||||
|
||||
class OcrWordResult
|
||||
{
|
||||
[JsonPropertyName("text")]
|
||||
public string Text { get; set; } = "";
|
||||
|
||||
[JsonPropertyName("x")]
|
||||
public int X { get; set; }
|
||||
|
||||
[JsonPropertyName("y")]
|
||||
public int Y { get; set; }
|
||||
|
||||
[JsonPropertyName("width")]
|
||||
public int Width { get; set; }
|
||||
|
||||
[JsonPropertyName("height")]
|
||||
public int Height { get; set; }
|
||||
}
|
||||
|
||||
class CaptureResponse
|
||||
{
|
||||
[JsonPropertyName("ok")]
|
||||
public bool Ok => true;
|
||||
|
||||
[JsonPropertyName("image")]
|
||||
public string Image { get; set; } = "";
|
||||
}
|
||||
|
||||
class GridResponse
|
||||
{
|
||||
[JsonPropertyName("ok")]
|
||||
public bool Ok => true;
|
||||
|
||||
[JsonPropertyName("cells")]
|
||||
public List<List<bool>> Cells { get; set; } = [];
|
||||
|
||||
[JsonPropertyName("items")]
|
||||
public List<GridItem>? Items { get; set; }
|
||||
|
||||
[JsonPropertyName("matches")]
|
||||
public List<GridMatch>? Matches { get; set; }
|
||||
}
|
||||
|
||||
class GridItem
|
||||
{
|
||||
[JsonPropertyName("row")]
|
||||
public int Row { get; set; }
|
||||
|
||||
[JsonPropertyName("col")]
|
||||
public int Col { get; set; }
|
||||
|
||||
[JsonPropertyName("w")]
|
||||
public int W { get; set; }
|
||||
|
||||
[JsonPropertyName("h")]
|
||||
public int H { get; set; }
|
||||
}
|
||||
|
||||
class GridMatch
|
||||
{
|
||||
[JsonPropertyName("row")]
|
||||
public int Row { get; set; }
|
||||
|
||||
[JsonPropertyName("col")]
|
||||
public int Col { get; set; }
|
||||
|
||||
[JsonPropertyName("similarity")]
|
||||
public double Similarity { get; set; }
|
||||
}
|
||||
|
||||
class DetectGridResponse
|
||||
{
|
||||
[JsonPropertyName("ok")]
|
||||
public bool Ok => true;
|
||||
|
||||
[JsonPropertyName("detected")]
|
||||
public bool Detected { get; set; }
|
||||
|
||||
[JsonPropertyName("region")]
|
||||
public RegionRect? Region { get; set; }
|
||||
|
||||
[JsonPropertyName("cols")]
|
||||
public int Cols { get; set; }
|
||||
|
||||
[JsonPropertyName("rows")]
|
||||
public int Rows { get; set; }
|
||||
|
||||
[JsonPropertyName("cellWidth")]
|
||||
public double CellWidth { get; set; }
|
||||
|
||||
[JsonPropertyName("cellHeight")]
|
||||
public double CellHeight { get; set; }
|
||||
}
|
||||
|
|
@ -8,6 +8,9 @@
|
|||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="OpenCvSharp4" Version="4.11.0.20250507" />
|
||||
<PackageReference Include="OpenCvSharp4.Extensions" Version="4.11.0.20250507" />
|
||||
<PackageReference Include="OpenCvSharp4.runtime.win" Version="4.11.0.20250507" />
|
||||
<PackageReference Include="System.Drawing.Common" Version="8.0.12" />
|
||||
<PackageReference Include="Tesseract" Version="5.2.0" />
|
||||
</ItemGroup>
|
||||
|
|
|
|||
254
tools/OcrDaemon/OcrHandler.cs
Normal file
254
tools/OcrDaemon/OcrHandler.cs
Normal file
|
|
@ -0,0 +1,254 @@
|
|||
namespace OcrDaemon;
|
||||
|
||||
using System.Drawing;
|
||||
using System.Drawing.Imaging;
|
||||
using System.Runtime.InteropServices;
|
||||
using Tesseract;
|
||||
using SdImageFormat = System.Drawing.Imaging.ImageFormat;
|
||||
|
||||
class OcrHandler(TesseractEngine engine)
|
||||
{
|
||||
private Bitmap? _referenceFrame;
|
||||
|
||||
public object HandleOcr(Request req)
|
||||
{
|
||||
using var bitmap = ScreenCapture.CaptureOrLoad(req.File, req.Region);
|
||||
using var pix = ImageUtils.BitmapToPix(bitmap);
|
||||
using var page = engine.Process(pix);
|
||||
|
||||
var text = page.GetText();
|
||||
var lines = ImageUtils.ExtractLinesFromPage(page, offsetX: 0, offsetY: 0);
|
||||
return new OcrResponse { Text = text, Lines = lines };
|
||||
}
|
||||
|
||||
public object HandleScreenshot(Request req)
|
||||
{
|
||||
if (string.IsNullOrEmpty(req.Path))
|
||||
return new ErrorResponse("screenshot command requires 'path'");
|
||||
|
||||
// If a reference frame exists, save that (same image used for diff-ocr).
|
||||
// Otherwise capture a new frame.
|
||||
var bitmap = _referenceFrame ?? ScreenCapture.CaptureOrLoad(req.File, req.Region);
|
||||
var format = ImageUtils.GetImageFormat(req.Path);
|
||||
var dir = Path.GetDirectoryName(req.Path);
|
||||
if (!string.IsNullOrEmpty(dir) && !Directory.Exists(dir))
|
||||
Directory.CreateDirectory(dir);
|
||||
bitmap.Save(req.Path, format);
|
||||
if (bitmap != _referenceFrame) bitmap.Dispose();
|
||||
return new OkResponse();
|
||||
}
|
||||
|
||||
public object HandleCapture(Request req)
|
||||
{
|
||||
using var bitmap = ScreenCapture.CaptureOrLoad(req.File, req.Region);
|
||||
using var ms = new MemoryStream();
|
||||
bitmap.Save(ms, SdImageFormat.Png);
|
||||
var base64 = Convert.ToBase64String(ms.ToArray());
|
||||
return new CaptureResponse { Image = base64 };
|
||||
}
|
||||
|
||||
public object HandleSnapshot(Request req)
|
||||
{
|
||||
_referenceFrame?.Dispose();
|
||||
_referenceFrame = ScreenCapture.CaptureOrLoad(req.File, req.Region);
|
||||
return new OkResponse();
|
||||
}
|
||||
|
||||
public object HandleDiffOcr(Request req)
|
||||
{
|
||||
if (_referenceFrame == null)
|
||||
return new ErrorResponse("No reference snapshot stored. Send 'snapshot' first.");
|
||||
|
||||
using var current = ScreenCapture.CaptureOrLoad(req.File, null);
|
||||
|
||||
int w = Math.Min(_referenceFrame.Width, current.Width);
|
||||
int h = Math.Min(_referenceFrame.Height, current.Height);
|
||||
|
||||
// Get raw pixels for both frames
|
||||
var refData = _referenceFrame.LockBits(new Rectangle(0, 0, w, h), ImageLockMode.ReadOnly, PixelFormat.Format32bppArgb);
|
||||
byte[] refPx = new byte[refData.Stride * h];
|
||||
Marshal.Copy(refData.Scan0, refPx, 0, refPx.Length);
|
||||
_referenceFrame.UnlockBits(refData);
|
||||
int stride = refData.Stride;
|
||||
|
||||
var curData = current.LockBits(new Rectangle(0, 0, w, h), ImageLockMode.ReadOnly, PixelFormat.Format32bppArgb);
|
||||
byte[] curPx = new byte[curData.Stride * h];
|
||||
Marshal.Copy(curData.Scan0, curPx, 0, curPx.Length);
|
||||
current.UnlockBits(curData);
|
||||
|
||||
// Detect pixels that got DARKER (tooltip = dark overlay).
|
||||
// This filters out item highlight glow (brighter) and cursor changes.
|
||||
int diffThresh = req.Threshold > 0 ? req.Threshold : 30;
|
||||
bool[] changed = new bool[w * h];
|
||||
int totalChanged = 0;
|
||||
|
||||
for (int y = 0; y < h; y++)
|
||||
{
|
||||
for (int x = 0; x < w; x++)
|
||||
{
|
||||
int i = y * stride + x * 4;
|
||||
int darkerB = refPx[i] - curPx[i];
|
||||
int darkerG = refPx[i + 1] - curPx[i + 1];
|
||||
int darkerR = refPx[i + 2] - curPx[i + 2];
|
||||
if (darkerB + darkerG + darkerR > diffThresh)
|
||||
{
|
||||
changed[y * w + x] = true;
|
||||
totalChanged++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool debug = req.Debug;
|
||||
|
||||
if (totalChanged == 0)
|
||||
{
|
||||
if (debug) Console.Error.WriteLine(" diff-ocr: no changes detected");
|
||||
return new OcrResponse { Text = "", Lines = [] };
|
||||
}
|
||||
|
||||
// Two-pass density detection:
|
||||
// Pass 1: Find row range using full-width row counts
|
||||
// Pass 2: Find column range using only pixels within detected row range
|
||||
// This makes the column threshold relative to tooltip height, not screen height.
|
||||
int maxGap = 15;
|
||||
|
||||
// Pass 1: count changed pixels per row, find longest active run
|
||||
int[] rowCounts = new int[h];
|
||||
for (int y = 0; y < h; y++)
|
||||
for (int x = 0; x < w; x++)
|
||||
if (changed[y * w + x])
|
||||
rowCounts[y]++;
|
||||
|
||||
int rowThresh = w / 30; // ~3% of width
|
||||
int bestRowStart = 0, bestRowEnd = 0, bestRowLen = 0;
|
||||
int curRowStart = -1, lastActiveRow = -1;
|
||||
for (int y = 0; y < h; y++)
|
||||
{
|
||||
if (rowCounts[y] >= rowThresh)
|
||||
{
|
||||
if (curRowStart < 0) curRowStart = y;
|
||||
lastActiveRow = y;
|
||||
}
|
||||
else if (curRowStart >= 0 && y - lastActiveRow > maxGap)
|
||||
{
|
||||
int len = lastActiveRow - curRowStart + 1;
|
||||
if (len > bestRowLen) { bestRowStart = curRowStart; bestRowEnd = lastActiveRow; bestRowLen = len; }
|
||||
curRowStart = -1;
|
||||
}
|
||||
}
|
||||
if (curRowStart >= 0)
|
||||
{
|
||||
int len = lastActiveRow - curRowStart + 1;
|
||||
if (len > bestRowLen) { bestRowStart = curRowStart; bestRowEnd = lastActiveRow; bestRowLen = len; }
|
||||
}
|
||||
|
||||
// Pass 2: count changed pixels per column, but only within the detected row range
|
||||
int[] colCounts = new int[w];
|
||||
for (int y = bestRowStart; y <= bestRowEnd; y++)
|
||||
for (int x = 0; x < w; x++)
|
||||
if (changed[y * w + x])
|
||||
colCounts[x]++;
|
||||
|
||||
int tooltipHeight = bestRowEnd - bestRowStart + 1;
|
||||
int colThresh = tooltipHeight / 15; // ~7% of tooltip height
|
||||
|
||||
int bestColStart = 0, bestColEnd = 0, bestColLen = 0;
|
||||
int curColStart = -1, lastActiveCol = -1;
|
||||
for (int x = 0; x < w; x++)
|
||||
{
|
||||
if (colCounts[x] >= colThresh)
|
||||
{
|
||||
if (curColStart < 0) curColStart = x;
|
||||
lastActiveCol = x;
|
||||
}
|
||||
else if (curColStart >= 0 && x - lastActiveCol > maxGap)
|
||||
{
|
||||
int len = lastActiveCol - curColStart + 1;
|
||||
if (len > bestColLen) { bestColStart = curColStart; bestColEnd = lastActiveCol; bestColLen = len; }
|
||||
curColStart = -1;
|
||||
}
|
||||
}
|
||||
if (curColStart >= 0)
|
||||
{
|
||||
int len = lastActiveCol - curColStart + 1;
|
||||
if (len > bestColLen) { bestColStart = curColStart; bestColEnd = lastActiveCol; bestColLen = len; }
|
||||
}
|
||||
|
||||
// Log density detection results
|
||||
Console.Error.WriteLine($" diff-ocr: changed={totalChanged} rows={bestRowStart}-{bestRowEnd}({bestRowLen}) cols={bestColStart}-{bestColEnd}({bestColLen}) rowThresh={rowThresh} colThresh={colThresh}");
|
||||
|
||||
if (bestRowLen < 50 || bestColLen < 50)
|
||||
{
|
||||
Console.Error.WriteLine($" diff-ocr: no tooltip-sized region found (rows={bestRowLen}, cols={bestColLen})");
|
||||
return new OcrResponse { Text = "", Lines = [] };
|
||||
}
|
||||
|
||||
int pad = 0;
|
||||
int minX = Math.Max(bestColStart - pad, 0);
|
||||
int minY = Math.Max(bestRowStart - pad, 0);
|
||||
int maxX = Math.Min(bestColEnd + pad, w - 1);
|
||||
int maxY = Math.Min(bestRowEnd + pad, h - 1);
|
||||
|
||||
// Dynamic right-edge trim: if the rightmost columns are much sparser than
|
||||
// the tooltip body, trim them. This handles the ~5% of cases where ambient
|
||||
// noise extends the detected region slightly on the right.
|
||||
int colSpan = maxX - minX + 1;
|
||||
if (colSpan > 100)
|
||||
{
|
||||
// Compute median column density in the middle 50% of the range
|
||||
int q1 = minX + colSpan / 4;
|
||||
int q3 = minX + colSpan * 3 / 4;
|
||||
long midSum = 0;
|
||||
int midCount = 0;
|
||||
for (int x = q1; x <= q3; x++) { midSum += colCounts[x]; midCount++; }
|
||||
double avgMidDensity = (double)midSum / midCount;
|
||||
double cutoff = avgMidDensity * 0.3; // column must have >=30% of avg density
|
||||
|
||||
// Trim from right while below cutoff
|
||||
while (maxX > minX + 100 && colCounts[maxX] < cutoff)
|
||||
maxX--;
|
||||
}
|
||||
int rw = maxX - minX + 1;
|
||||
int rh = maxY - minY + 1;
|
||||
|
||||
if (debug) Console.Error.WriteLine($" diff-ocr: tooltip region ({minX},{minY}) {rw}x{rh}");
|
||||
|
||||
// Simple crop of the tooltip region from the current frame (no per-pixel masking).
|
||||
// The top-hat preprocessing will handle suppressing background text.
|
||||
using var cropped = current.Clone(new Rectangle(minX, minY, rw, rh), PixelFormat.Format32bppArgb);
|
||||
|
||||
// Save before/after preprocessing images if path is provided
|
||||
if (!string.IsNullOrEmpty(req.Path))
|
||||
{
|
||||
var dir = Path.GetDirectoryName(req.Path);
|
||||
if (!string.IsNullOrEmpty(dir) && !Directory.Exists(dir))
|
||||
Directory.CreateDirectory(dir);
|
||||
cropped.Save(req.Path, ImageUtils.GetImageFormat(req.Path));
|
||||
if (debug) Console.Error.WriteLine($" diff-ocr: saved raw to {req.Path}");
|
||||
}
|
||||
|
||||
// Pre-process for OCR: boost contrast, invert colors
|
||||
using var processed = ImagePreprocessor.PreprocessForOcr(cropped);
|
||||
|
||||
// Save preprocessed version alongside raw
|
||||
if (!string.IsNullOrEmpty(req.Path))
|
||||
{
|
||||
var ext = Path.GetExtension(req.Path);
|
||||
var prePath = Path.ChangeExtension(req.Path, ".pre" + ext);
|
||||
processed.Save(prePath, ImageUtils.GetImageFormat(prePath));
|
||||
if (debug) Console.Error.WriteLine($" diff-ocr: saved preprocessed to {prePath}");
|
||||
}
|
||||
using var pix = ImageUtils.BitmapToPix(processed);
|
||||
using var page = engine.Process(pix);
|
||||
|
||||
var text = page.GetText();
|
||||
var lines = ImageUtils.ExtractLinesFromPage(page, offsetX: minX, offsetY: minY);
|
||||
|
||||
return new DiffOcrResponse
|
||||
{
|
||||
Text = text,
|
||||
Lines = lines,
|
||||
Region = new RegionRect { X = minX, Y = minY, Width = rw, Height = rh },
|
||||
};
|
||||
}
|
||||
}
|
||||
File diff suppressed because it is too large
Load diff
65
tools/OcrDaemon/ScreenCapture.cs
Normal file
65
tools/OcrDaemon/ScreenCapture.cs
Normal file
|
|
@ -0,0 +1,65 @@
|
|||
namespace OcrDaemon;
|
||||
|
||||
using System.Drawing;
|
||||
using System.Drawing.Imaging;
|
||||
using System.Runtime.InteropServices;
|
||||
|
||||
static class ScreenCapture
|
||||
{
|
||||
[DllImport("user32.dll")]
|
||||
private static extern bool SetProcessDPIAware();
|
||||
|
||||
[DllImport("user32.dll")]
|
||||
private static extern int GetSystemMetrics(int nIndex);
|
||||
|
||||
public static void InitDpiAwareness() => SetProcessDPIAware();
|
||||
|
||||
/// <summary>
|
||||
/// Capture from screen, or load from file if specified.
|
||||
/// When file is set, loads the image and crops to region.
|
||||
/// </summary>
|
||||
public static Bitmap CaptureOrLoad(string? file, RegionRect? region)
|
||||
{
|
||||
if (!string.IsNullOrEmpty(file))
|
||||
{
|
||||
var fullBmp = new Bitmap(file);
|
||||
if (region != null)
|
||||
{
|
||||
int cx = Math.Max(0, region.X);
|
||||
int cy = Math.Max(0, region.Y);
|
||||
int cw = Math.Min(region.Width, fullBmp.Width - cx);
|
||||
int ch = Math.Min(region.Height, fullBmp.Height - cy);
|
||||
var cropped = fullBmp.Clone(new Rectangle(cx, cy, cw, ch), PixelFormat.Format32bppArgb);
|
||||
fullBmp.Dispose();
|
||||
return cropped;
|
||||
}
|
||||
return fullBmp;
|
||||
}
|
||||
return CaptureScreen(region);
|
||||
}
|
||||
|
||||
public static Bitmap CaptureScreen(RegionRect? region)
|
||||
{
|
||||
int x, y, w, h;
|
||||
if (region != null)
|
||||
{
|
||||
x = region.X;
|
||||
y = region.Y;
|
||||
w = region.Width;
|
||||
h = region.Height;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Primary monitor only (0,0 origin, SM_CXSCREEN / SM_CYSCREEN)
|
||||
x = 0;
|
||||
y = 0;
|
||||
w = GetSystemMetrics(0); // SM_CXSCREEN
|
||||
h = GetSystemMetrics(1); // SM_CYSCREEN
|
||||
}
|
||||
|
||||
var bitmap = new Bitmap(w, h, PixelFormat.Format32bppArgb);
|
||||
using var g = Graphics.FromImage(bitmap);
|
||||
g.CopyFromScreen(x, y, 0, 0, new System.Drawing.Size(w, h), CopyPixelOperation.SourceCopy);
|
||||
return bitmap;
|
||||
}
|
||||
}
|
||||
177
tools/OcrDaemon/SignalProcessing.cs
Normal file
177
tools/OcrDaemon/SignalProcessing.cs
Normal file
|
|
@ -0,0 +1,177 @@
|
|||
namespace OcrDaemon;
|
||||
|
||||
static class SignalProcessing
|
||||
{
|
||||
/// <summary>
|
||||
/// Find the dominant period in a signal using autocorrelation.
|
||||
/// Returns (period, score) where score is the autocorrelation strength.
|
||||
/// </summary>
|
||||
public static (int period, double score) FindPeriodWithScore(double[] signal, int minPeriod, int maxPeriod)
|
||||
{
|
||||
int n = signal.Length;
|
||||
if (n < minPeriod * 3) return (-1, 0);
|
||||
|
||||
double mean = signal.Average();
|
||||
double variance = 0;
|
||||
for (int i = 0; i < n; i++)
|
||||
variance += (signal[i] - mean) * (signal[i] - mean);
|
||||
if (variance < 1.0) return (-1, 0);
|
||||
|
||||
int maxLag = Math.Min(maxPeriod, n / 3);
|
||||
double[] ac = new double[maxLag + 1];
|
||||
for (int lag = minPeriod; lag <= maxLag; lag++)
|
||||
{
|
||||
double sum = 0;
|
||||
for (int i = 0; i < n - lag; i++)
|
||||
sum += (signal[i] - mean) * (signal[i + lag] - mean);
|
||||
ac[lag] = sum / variance;
|
||||
}
|
||||
|
||||
// Find the first significant peak — this is the fundamental period.
|
||||
// Using "first" avoids picking harmonics (2x, 3x) or unrelated larger patterns.
|
||||
for (int lag = minPeriod + 1; lag < maxLag; lag++)
|
||||
{
|
||||
if (ac[lag] > 0.01 && ac[lag] >= ac[lag - 1] && ac[lag] >= ac[lag + 1])
|
||||
return (lag, ac[lag]);
|
||||
}
|
||||
|
||||
return (-1, 0);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Find contiguous segments where values are ABOVE threshold.
|
||||
/// Used to find grid panel regions by density of very dark pixels.
|
||||
/// Allows brief gaps (up to 5px) to handle grid borders.
|
||||
/// </summary>
|
||||
public static List<(int start, int end)> FindDarkDensitySegments(double[] profile, double threshold, int minLength)
|
||||
{
|
||||
var segments = new List<(int start, int end)>();
|
||||
int n = profile.Length;
|
||||
int curStart = -1;
|
||||
int maxGap = 5;
|
||||
int gapCount = 0;
|
||||
|
||||
for (int i = 0; i < n; i++)
|
||||
{
|
||||
if (profile[i] >= threshold)
|
||||
{
|
||||
if (curStart < 0) curStart = i;
|
||||
gapCount = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (curStart >= 0)
|
||||
{
|
||||
gapCount++;
|
||||
if (gapCount > maxGap)
|
||||
{
|
||||
int end = i - gapCount;
|
||||
if (end - curStart >= minLength)
|
||||
segments.Add((curStart, end));
|
||||
curStart = -1;
|
||||
gapCount = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (curStart >= 0)
|
||||
{
|
||||
int end = gapCount > 0 ? n - gapCount : n;
|
||||
if (end - curStart >= minLength)
|
||||
segments.Add((curStart, end));
|
||||
}
|
||||
|
||||
return segments;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Find the extent of the grid in a 1D profile using local autocorrelation
|
||||
/// at the specific detected period. Only regions where the signal actually
|
||||
/// repeats at the given period will score high — much more precise than variance.
|
||||
/// </summary>
|
||||
public static (int start, int end) FindGridExtent(double[] signal, int period)
|
||||
{
|
||||
int n = signal.Length;
|
||||
int halfWin = period * 2; // window radius: 2 periods each side
|
||||
if (n < halfWin * 2 + period) return (-1, -1);
|
||||
|
||||
// Compute local AC at the specific lag=period in a sliding window
|
||||
double[] localAc = new double[n];
|
||||
for (int center = halfWin; center < n - halfWin; center++)
|
||||
{
|
||||
int wStart = center - halfWin;
|
||||
int wEnd = center + halfWin;
|
||||
int count = wEnd - wStart;
|
||||
|
||||
// Local mean
|
||||
double sum = 0;
|
||||
for (int i = wStart; i < wEnd; i++)
|
||||
sum += signal[i];
|
||||
double mean = sum / count;
|
||||
|
||||
// Local variance
|
||||
double varSum = 0;
|
||||
for (int i = wStart; i < wEnd; i++)
|
||||
varSum += (signal[i] - mean) * (signal[i] - mean);
|
||||
|
||||
if (varSum < 1.0) continue;
|
||||
|
||||
// AC at the specific lag=period
|
||||
double acSum = 0;
|
||||
for (int i = wStart; i < wEnd - period; i++)
|
||||
acSum += (signal[i] - mean) * (signal[i + period] - mean);
|
||||
|
||||
localAc[center] = Math.Max(0, acSum / varSum);
|
||||
}
|
||||
|
||||
// Find the longest contiguous run above threshold
|
||||
double maxAc = 0;
|
||||
for (int i = 0; i < n; i++)
|
||||
if (localAc[i] > maxAc) maxAc = localAc[i];
|
||||
if (maxAc < 0.02) return (-1, -1);
|
||||
|
||||
double threshold = maxAc * 0.25;
|
||||
|
||||
int bestStart = -1, bestEnd = -1, bestLen = 0;
|
||||
int curStartPos = -1;
|
||||
for (int i = 0; i < n; i++)
|
||||
{
|
||||
if (localAc[i] > threshold)
|
||||
{
|
||||
if (curStartPos < 0) curStartPos = i;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (curStartPos >= 0)
|
||||
{
|
||||
int len = i - curStartPos;
|
||||
if (len > bestLen)
|
||||
{
|
||||
bestLen = len;
|
||||
bestStart = curStartPos;
|
||||
bestEnd = i;
|
||||
}
|
||||
curStartPos = -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Handle run extending to end of signal
|
||||
if (curStartPos >= 0)
|
||||
{
|
||||
int len = n - curStartPos;
|
||||
if (len > bestLen)
|
||||
{
|
||||
bestStart = curStartPos;
|
||||
bestEnd = n;
|
||||
}
|
||||
}
|
||||
|
||||
if (bestStart < 0) return (-1, -1);
|
||||
|
||||
// Small extension to include cell borders at edges
|
||||
bestStart = Math.Max(0, bestStart - period / 4);
|
||||
bestEnd = Math.Min(n - 1, bestEnd + period / 4);
|
||||
|
||||
return (bestStart, bestEnd);
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue