tooltip bounds
This commit is contained in:
parent
930e00c9cc
commit
bb2b9cf507
7 changed files with 474 additions and 56 deletions
|
|
@ -2,6 +2,7 @@ import express from 'express';
|
||||||
import http from 'http';
|
import http from 'http';
|
||||||
import { WebSocketServer, WebSocket } from 'ws';
|
import { WebSocketServer, WebSocket } from 'ws';
|
||||||
import path from 'path';
|
import path from 'path';
|
||||||
|
import { mkdir } from 'fs/promises';
|
||||||
import { fileURLToPath } from 'url';
|
import { fileURLToPath } from 'url';
|
||||||
import { logger } from '../util/logger.js';
|
import { logger } from '../util/logger.js';
|
||||||
import { sleep } from '../util/sleep.js';
|
import { sleep } from '../util/sleep.js';
|
||||||
|
|
@ -275,21 +276,47 @@ export class DashboardServer {
|
||||||
...matches.map(m => ({ row: m.row, col: m.col, label: `MATCH ${(m.similarity * 100).toFixed(0)}%` })),
|
...matches.map(m => ({ row: m.row, col: m.col, label: `MATCH ${(m.similarity * 100).toFixed(0)}%` })),
|
||||||
];
|
];
|
||||||
|
|
||||||
// Focus game and hover each cell
|
// Focus game, take one snapshot with mouse on empty space
|
||||||
await this.debug.gameController.focusGame();
|
await this.debug.gameController.focusGame();
|
||||||
|
await mkdir('items', { recursive: true });
|
||||||
|
const tooltips: Array<{ row: number; col: number; label: string; text: string }> = [];
|
||||||
|
const ts = Date.now();
|
||||||
|
const reg = result.layout.region;
|
||||||
|
const cellW = reg.width / result.layout.cols;
|
||||||
|
const cellH = reg.height / result.layout.rows;
|
||||||
|
|
||||||
|
// Move mouse to empty space and take a single reference snapshot
|
||||||
|
this.debug.gameController.moveMouseInstant(reg.x + reg.width + 50, reg.y + reg.height / 2);
|
||||||
|
await sleep(50);
|
||||||
|
await this.debug.screenReader.snapshot();
|
||||||
|
await this.debug.screenReader.saveScreenshot(`items/${ts}_snapshot.png`);
|
||||||
|
await sleep(200); // Let game settle before first hover
|
||||||
|
|
||||||
for (const cell of hoverCells) {
|
for (const cell of hoverCells) {
|
||||||
const center = result.layout.region;
|
const cellStart = performance.now();
|
||||||
const cellW = center.width / result.layout.cols;
|
const x = Math.round(reg.x + cell.col * cellW + cellW / 2);
|
||||||
const cellH = center.height / result.layout.rows;
|
const y = Math.round(reg.y + cell.row * cellH + cellH / 2);
|
||||||
const x = Math.round(center.x + cell.col * cellW + cellW / 2);
|
|
||||||
const y = Math.round(center.y + cell.row * cellH + cellH / 2);
|
// Quick Bézier move to the cell — tooltip appears on hover
|
||||||
this.broadcastLog('info', `Hovering ${cell.label} (${cell.row},${cell.col}) at (${x},${y})...`);
|
await this.debug.gameController.moveMouseFast(x, y);
|
||||||
await this.debug.gameController.moveMouseTo(x, y);
|
await sleep(50);
|
||||||
await sleep(1000);
|
const afterMove = performance.now();
|
||||||
|
|
||||||
|
// Diff-OCR: finds tooltip by row/column density of darkened pixels
|
||||||
|
const imgPath = `items/${ts}_${cell.row}-${cell.col}.png`;
|
||||||
|
const diff = await this.debug.screenReader.diffOcr(imgPath);
|
||||||
|
const afterOcr = performance.now();
|
||||||
|
const text = diff.text.trim();
|
||||||
|
|
||||||
|
const regionInfo = diff.region ? ` at (${diff.region.x},${diff.region.y}) ${diff.region.width}x${diff.region.height}` : '';
|
||||||
|
tooltips.push({ row: cell.row, col: cell.col, label: cell.label, text });
|
||||||
|
|
||||||
|
this.broadcastLog('info',
|
||||||
|
`${cell.label} (${cell.row},${cell.col}) [move: ${(afterMove - cellStart).toFixed(0)}ms, ocr: ${(afterOcr - afterMove).toFixed(0)}ms, total: ${(afterOcr - cellStart).toFixed(0)}ms]${regionInfo}: ${text.substring(0, 150)}${text.length > 150 ? '...' : ''}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
this.broadcastLog('info', `Done — hovered ${hoverCells.length} cells`);
|
this.broadcastLog('info', `Done — hovered ${hoverCells.length} cells, read ${tooltips.filter(t => t.text).length} tooltips`);
|
||||||
res.json({ ok: true, itemSize, matchCount: matches.length, hoveredCount: hoverCells.length });
|
res.json({ ok: true, itemSize, matchCount: matches.length, hoveredCount: hoverCells.length, tooltips });
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
logger.error({ err }, 'Debug test-match-hover failed');
|
logger.error({ err }, 'Debug test-match-hover failed');
|
||||||
res.status(500).json({ error: 'Test match hover failed' });
|
res.status(500).json({ error: 'Test match hover failed' });
|
||||||
|
|
|
||||||
|
|
@ -91,6 +91,14 @@ export class GameController {
|
||||||
await this.inputSender.moveMouse(x, y);
|
await this.inputSender.moveMouse(x, y);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
moveMouseInstant(x: number, y: number): void {
|
||||||
|
this.inputSender.moveMouseInstant(x, y);
|
||||||
|
}
|
||||||
|
|
||||||
|
async moveMouseFast(x: number, y: number): Promise<void> {
|
||||||
|
await this.inputSender.moveMouseFast(x, y);
|
||||||
|
}
|
||||||
|
|
||||||
async leftClickAt(x: number, y: number): Promise<void> {
|
async leftClickAt(x: number, y: number): Promise<void> {
|
||||||
await this.inputSender.leftClick(x, y);
|
await this.inputSender.leftClick(x, y);
|
||||||
}
|
}
|
||||||
|
|
@ -99,6 +107,14 @@ export class GameController {
|
||||||
await this.inputSender.rightClick(x, y);
|
await this.inputSender.rightClick(x, y);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async holdAlt(): Promise<void> {
|
||||||
|
await this.inputSender.keyDown(VK.MENU);
|
||||||
|
}
|
||||||
|
|
||||||
|
async releaseAlt(): Promise<void> {
|
||||||
|
await this.inputSender.keyUp(VK.MENU);
|
||||||
|
}
|
||||||
|
|
||||||
async pressEscape(): Promise<void> {
|
async pressEscape(): Promise<void> {
|
||||||
await this.inputSender.pressKey(VK.ESCAPE);
|
await this.inputSender.pressKey(VK.ESCAPE);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -222,6 +222,46 @@ export class InputSender {
|
||||||
await randomDelay(5, 15);
|
await randomDelay(5, 15);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
moveMouseInstant(x: number, y: number): void {
|
||||||
|
this.moveMouseRaw(x, y);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Quick Bézier move — ~10-15ms, 5 steps, no jitter. Fast but not a raw teleport. */
|
||||||
|
async moveMouseFast(x: number, y: number): Promise<void> {
|
||||||
|
const start = this.getCursorPos();
|
||||||
|
const end: Point = { x, y };
|
||||||
|
const dx = end.x - start.x;
|
||||||
|
const dy = end.y - start.y;
|
||||||
|
const distance = Math.sqrt(dx * dx + dy * dy);
|
||||||
|
|
||||||
|
if (distance < 10) {
|
||||||
|
this.moveMouseRaw(x, y);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const perpX = -dy / distance;
|
||||||
|
const perpY = dx / distance;
|
||||||
|
const spread = distance * 0.15;
|
||||||
|
|
||||||
|
const cp1: Point = {
|
||||||
|
x: start.x + dx * 0.3 + perpX * (Math.random() - 0.5) * spread,
|
||||||
|
y: start.y + dy * 0.3 + perpY * (Math.random() - 0.5) * spread,
|
||||||
|
};
|
||||||
|
const cp2: Point = {
|
||||||
|
x: start.x + dx * 0.7 + perpX * (Math.random() - 0.5) * spread,
|
||||||
|
y: start.y + dy * 0.7 + perpY * (Math.random() - 0.5) * spread,
|
||||||
|
};
|
||||||
|
|
||||||
|
const steps = 5;
|
||||||
|
for (let i = 1; i <= steps; i++) {
|
||||||
|
const t = easeInOutQuad(i / steps);
|
||||||
|
const pt = cubicBezier(t, start, cp1, cp2, end);
|
||||||
|
this.moveMouseRaw(Math.round(pt.x), Math.round(pt.y));
|
||||||
|
await sleep(2);
|
||||||
|
}
|
||||||
|
this.moveMouseRaw(x, y);
|
||||||
|
}
|
||||||
|
|
||||||
async leftClick(x: number, y: number): Promise<void> {
|
async leftClick(x: number, y: number): Promise<void> {
|
||||||
await this.moveMouse(x, y);
|
await this.moveMouse(x, y);
|
||||||
await randomDelay(20, 50);
|
await randomDelay(20, 50);
|
||||||
|
|
|
||||||
|
|
@ -43,6 +43,12 @@ export interface GridScanResult {
|
||||||
matches?: GridMatch[];
|
matches?: GridMatch[];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export interface DiffOcrResponse {
|
||||||
|
text: string;
|
||||||
|
lines: OcrLine[];
|
||||||
|
region?: Region;
|
||||||
|
}
|
||||||
|
|
||||||
export interface DetectGridResult {
|
export interface DetectGridResult {
|
||||||
detected: boolean;
|
detected: boolean;
|
||||||
region?: Region;
|
region?: Region;
|
||||||
|
|
@ -151,6 +157,22 @@ export class OcrDaemon {
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async snapshot(): Promise<void> {
|
||||||
|
await this.sendWithRetry({ cmd: 'snapshot' }, REQUEST_TIMEOUT);
|
||||||
|
}
|
||||||
|
|
||||||
|
async diffOcr(savePath?: string, region?: Region): Promise<DiffOcrResponse> {
|
||||||
|
const req: DaemonRequest = { cmd: 'diff-ocr' };
|
||||||
|
if (savePath) req.path = savePath;
|
||||||
|
if (region) req.region = region;
|
||||||
|
const resp = await this.sendWithRetry(req, REQUEST_TIMEOUT);
|
||||||
|
return {
|
||||||
|
text: resp.text ?? '',
|
||||||
|
lines: resp.lines ?? [],
|
||||||
|
region: resp.region,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
async saveScreenshot(path: string, region?: Region): Promise<void> {
|
async saveScreenshot(path: string, region?: Region): Promise<void> {
|
||||||
const req: DaemonRequest = { cmd: 'screenshot', path };
|
const req: DaemonRequest = { cmd: 'screenshot', path };
|
||||||
if (region) req.region = region;
|
if (region) req.region = region;
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,7 @@
|
||||||
import { mkdir } from 'fs/promises';
|
import { mkdir } from 'fs/promises';
|
||||||
import { join } from 'path';
|
import { join } from 'path';
|
||||||
import { logger } from '../util/logger.js';
|
import { logger } from '../util/logger.js';
|
||||||
import { OcrDaemon, type OcrResponse } from './OcrDaemon.js';
|
import { OcrDaemon, type OcrResponse, type DiffOcrResponse } from './OcrDaemon.js';
|
||||||
import { GridReader, type GridLayout, type CellCoord } from './GridReader.js';
|
import { GridReader, type GridLayout, type CellCoord } from './GridReader.js';
|
||||||
import type { Region } from '../types.js';
|
import type { Region } from '../types.js';
|
||||||
|
|
||||||
|
|
@ -102,6 +102,16 @@ export class ScreenReader {
|
||||||
return pos !== null;
|
return pos !== null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ── Snapshot / Diff-OCR (for tooltip reading) ──────────────────────
|
||||||
|
|
||||||
|
async snapshot(): Promise<void> {
|
||||||
|
await this.daemon.snapshot();
|
||||||
|
}
|
||||||
|
|
||||||
|
async diffOcr(savePath?: string, region?: Region): Promise<DiffOcrResponse> {
|
||||||
|
return this.daemon.diffOcr(savePath, region);
|
||||||
|
}
|
||||||
|
|
||||||
// ── Save utilities ──────────────────────────────────────────────────
|
// ── Save utilities ──────────────────────────────────────────────────
|
||||||
|
|
||||||
async saveScreenshot(path: string): Promise<void> {
|
async saveScreenshot(path: string): Promise<void> {
|
||||||
|
|
|
||||||
|
|
@ -9,6 +9,13 @@
|
||||||
|
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<PackageReference Include="System.Drawing.Common" Version="8.0.12" />
|
<PackageReference Include="System.Drawing.Common" Version="8.0.12" />
|
||||||
|
<PackageReference Include="Tesseract" Version="5.2.0" />
|
||||||
|
</ItemGroup>
|
||||||
|
|
||||||
|
<ItemGroup>
|
||||||
|
<None Update="tessdata\eng.traineddata">
|
||||||
|
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
|
||||||
|
</None>
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
|
|
||||||
</Project>
|
</Project>
|
||||||
|
|
|
||||||
|
|
@ -4,18 +4,23 @@ using System.Runtime.InteropServices;
|
||||||
using System.Text;
|
using System.Text;
|
||||||
using System.Text.Json;
|
using System.Text.Json;
|
||||||
using System.Text.Json.Serialization;
|
using System.Text.Json.Serialization;
|
||||||
using Windows.Graphics.Imaging;
|
using Tesseract;
|
||||||
using Windows.Media.Ocr;
|
using SdImageFormat = System.Drawing.Imaging.ImageFormat;
|
||||||
using Windows.Storage.Streams;
|
|
||||||
|
|
||||||
// Make GDI capture DPI-aware so coordinates match physical pixels
|
// Make GDI capture DPI-aware so coordinates match physical pixels
|
||||||
SetProcessDPIAware();
|
SetProcessDPIAware();
|
||||||
|
|
||||||
// Pre-create the OCR engine (reused across all requests)
|
// Pre-create the Tesseract OCR engine (reused across all requests)
|
||||||
var ocrEngine = OcrEngine.TryCreateFromUserProfileLanguages();
|
var tessdataPath = Path.Combine(AppContext.BaseDirectory, "tessdata");
|
||||||
if (ocrEngine == null)
|
TesseractEngine tessEngine;
|
||||||
|
try
|
||||||
{
|
{
|
||||||
WriteResponse(new ErrorResponse("Failed to create OCR engine. Ensure a language pack is installed."));
|
tessEngine = new TesseractEngine(tessdataPath, "eng", EngineMode.LstmOnly);
|
||||||
|
tessEngine.DefaultPageSegMode = PageSegMode.Auto;
|
||||||
|
}
|
||||||
|
catch (Exception ex)
|
||||||
|
{
|
||||||
|
WriteResponse(new ErrorResponse($"Failed to create Tesseract engine: {ex.Message}. Ensure tessdata/eng.traineddata exists."));
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -49,7 +54,7 @@ while ((line = stdin.ReadLine()) != null)
|
||||||
switch (request.Cmd?.ToLowerInvariant())
|
switch (request.Cmd?.ToLowerInvariant())
|
||||||
{
|
{
|
||||||
case "ocr":
|
case "ocr":
|
||||||
HandleOcr(request, ocrEngine);
|
HandleOcr(request, tessEngine);
|
||||||
break;
|
break;
|
||||||
case "screenshot":
|
case "screenshot":
|
||||||
HandleScreenshot(request);
|
HandleScreenshot(request);
|
||||||
|
|
@ -63,6 +68,12 @@ while ((line = stdin.ReadLine()) != null)
|
||||||
case "detect-grid":
|
case "detect-grid":
|
||||||
HandleDetectGrid(request);
|
HandleDetectGrid(request);
|
||||||
break;
|
break;
|
||||||
|
case "snapshot":
|
||||||
|
HandleSnapshot(request);
|
||||||
|
break;
|
||||||
|
case "diff-ocr":
|
||||||
|
HandleDiffOcr(request, tessEngine);
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
WriteResponse(new ErrorResponse($"Unknown command: {request.Cmd}"));
|
WriteResponse(new ErrorResponse($"Unknown command: {request.Cmd}"));
|
||||||
break;
|
break;
|
||||||
|
|
@ -78,31 +89,17 @@ return 0;
|
||||||
|
|
||||||
// ── Handlers ────────────────────────────────────────────────────────────────
|
// ── Handlers ────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
void HandleOcr(Request req, OcrEngine engine)
|
Bitmap? referenceFrame = null;
|
||||||
|
|
||||||
|
void HandleOcr(Request req, TesseractEngine engine)
|
||||||
{
|
{
|
||||||
using var bitmap = CaptureOrLoad(req.File, req.Region);
|
using var bitmap = CaptureOrLoad(req.File, req.Region);
|
||||||
var softwareBitmap = BitmapToSoftwareBitmap(bitmap);
|
using var pix = BitmapToPix(bitmap);
|
||||||
var result = engine.RecognizeAsync(softwareBitmap).AsTask().GetAwaiter().GetResult();
|
using var page = engine.Process(pix);
|
||||||
|
|
||||||
var lines = new List<OcrLineResult>();
|
var text = page.GetText();
|
||||||
foreach (var ocrLine in result.Lines)
|
var lines = ExtractLinesFromPage(page, offsetX: 0, offsetY: 0);
|
||||||
{
|
WriteResponse(new OcrResponse { Text = text, Lines = lines });
|
||||||
var words = new List<OcrWordResult>();
|
|
||||||
foreach (var word in ocrLine.Words)
|
|
||||||
{
|
|
||||||
words.Add(new OcrWordResult
|
|
||||||
{
|
|
||||||
Text = word.Text,
|
|
||||||
X = (int)Math.Round(word.BoundingRect.X),
|
|
||||||
Y = (int)Math.Round(word.BoundingRect.Y),
|
|
||||||
Width = (int)Math.Round(word.BoundingRect.Width),
|
|
||||||
Height = (int)Math.Round(word.BoundingRect.Height),
|
|
||||||
});
|
|
||||||
}
|
|
||||||
lines.Add(new OcrLineResult { Text = ocrLine.Text, Words = words });
|
|
||||||
}
|
|
||||||
|
|
||||||
WriteResponse(new OcrResponse { Text = result.Text, Lines = lines });
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void HandleScreenshot(Request req)
|
void HandleScreenshot(Request req)
|
||||||
|
|
@ -113,9 +110,15 @@ void HandleScreenshot(Request req)
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
using var bitmap = CaptureOrLoad(req.File, req.Region);
|
// If a reference frame exists, save that (same image used for diff-ocr).
|
||||||
|
// Otherwise capture a new frame.
|
||||||
|
var bitmap = referenceFrame ?? CaptureOrLoad(req.File, req.Region);
|
||||||
var format = GetImageFormat(req.Path);
|
var format = GetImageFormat(req.Path);
|
||||||
|
var dir = System.IO.Path.GetDirectoryName(req.Path);
|
||||||
|
if (!string.IsNullOrEmpty(dir) && !System.IO.Directory.Exists(dir))
|
||||||
|
System.IO.Directory.CreateDirectory(dir);
|
||||||
bitmap.Save(req.Path, format);
|
bitmap.Save(req.Path, format);
|
||||||
|
if (bitmap != referenceFrame) bitmap.Dispose();
|
||||||
WriteResponse(new OkResponse());
|
WriteResponse(new OkResponse());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -123,11 +126,253 @@ void HandleCapture(Request req)
|
||||||
{
|
{
|
||||||
using var bitmap = CaptureOrLoad(req.File, req.Region);
|
using var bitmap = CaptureOrLoad(req.File, req.Region);
|
||||||
using var ms = new MemoryStream();
|
using var ms = new MemoryStream();
|
||||||
bitmap.Save(ms, ImageFormat.Png);
|
bitmap.Save(ms, SdImageFormat.Png);
|
||||||
var base64 = Convert.ToBase64String(ms.ToArray());
|
var base64 = Convert.ToBase64String(ms.ToArray());
|
||||||
WriteResponse(new CaptureResponse { Image = base64 });
|
WriteResponse(new CaptureResponse { Image = base64 });
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ── Snapshot / Diff-OCR ─────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
void HandleSnapshot(Request req)
|
||||||
|
{
|
||||||
|
referenceFrame?.Dispose();
|
||||||
|
referenceFrame = CaptureOrLoad(req.File, req.Region);
|
||||||
|
WriteResponse(new OkResponse());
|
||||||
|
}
|
||||||
|
|
||||||
|
void HandleDiffOcr(Request req, TesseractEngine engine)
|
||||||
|
{
|
||||||
|
if (referenceFrame == null)
|
||||||
|
{
|
||||||
|
WriteResponse(new ErrorResponse("No reference snapshot stored. Send 'snapshot' first."));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
using var current = CaptureOrLoad(req.File, null);
|
||||||
|
|
||||||
|
int w = Math.Min(referenceFrame.Width, current.Width);
|
||||||
|
int h = Math.Min(referenceFrame.Height, current.Height);
|
||||||
|
|
||||||
|
// Get raw pixels for both frames
|
||||||
|
var refData = referenceFrame.LockBits(new Rectangle(0, 0, w, h), ImageLockMode.ReadOnly, PixelFormat.Format32bppArgb);
|
||||||
|
byte[] refPx = new byte[refData.Stride * h];
|
||||||
|
Marshal.Copy(refData.Scan0, refPx, 0, refPx.Length);
|
||||||
|
referenceFrame.UnlockBits(refData);
|
||||||
|
int stride = refData.Stride;
|
||||||
|
|
||||||
|
var curData = current.LockBits(new Rectangle(0, 0, w, h), ImageLockMode.ReadOnly, PixelFormat.Format32bppArgb);
|
||||||
|
byte[] curPx = new byte[curData.Stride * h];
|
||||||
|
Marshal.Copy(curData.Scan0, curPx, 0, curPx.Length);
|
||||||
|
current.UnlockBits(curData);
|
||||||
|
|
||||||
|
// Detect pixels that got DARKER (tooltip = dark overlay).
|
||||||
|
// This filters out item highlight glow (brighter) and cursor changes.
|
||||||
|
int diffThresh = req.Threshold > 0 ? req.Threshold : 30;
|
||||||
|
bool[] changed = new bool[w * h];
|
||||||
|
int totalChanged = 0;
|
||||||
|
|
||||||
|
for (int y = 0; y < h; y++)
|
||||||
|
{
|
||||||
|
for (int x = 0; x < w; x++)
|
||||||
|
{
|
||||||
|
int i = y * stride + x * 4;
|
||||||
|
int darkerB = refPx[i] - curPx[i];
|
||||||
|
int darkerG = refPx[i + 1] - curPx[i + 1];
|
||||||
|
int darkerR = refPx[i + 2] - curPx[i + 2];
|
||||||
|
if (darkerB + darkerG + darkerR > diffThresh)
|
||||||
|
{
|
||||||
|
changed[y * w + x] = true;
|
||||||
|
totalChanged++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool debug = req.Debug;
|
||||||
|
|
||||||
|
if (totalChanged == 0)
|
||||||
|
{
|
||||||
|
if (debug) Console.Error.WriteLine(" diff-ocr: no changes detected");
|
||||||
|
WriteResponse(new OcrResponse { Text = "", Lines = [] });
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Two-pass density detection:
|
||||||
|
// Pass 1: Find row range using full-width row counts
|
||||||
|
// Pass 2: Find column range using only pixels within detected row range
|
||||||
|
// This makes the column threshold relative to tooltip height, not screen height.
|
||||||
|
int maxGap = 15;
|
||||||
|
|
||||||
|
// Pass 1: count changed pixels per row, find longest active run
|
||||||
|
int[] rowCounts = new int[h];
|
||||||
|
for (int y = 0; y < h; y++)
|
||||||
|
for (int x = 0; x < w; x++)
|
||||||
|
if (changed[y * w + x])
|
||||||
|
rowCounts[y]++;
|
||||||
|
|
||||||
|
int rowThresh = w / 30; // ~3% of width
|
||||||
|
int bestRowStart = 0, bestRowEnd = 0, bestRowLen = 0;
|
||||||
|
int curRowStart = -1, lastActiveRow = -1;
|
||||||
|
for (int y = 0; y < h; y++)
|
||||||
|
{
|
||||||
|
if (rowCounts[y] >= rowThresh)
|
||||||
|
{
|
||||||
|
if (curRowStart < 0) curRowStart = y;
|
||||||
|
lastActiveRow = y;
|
||||||
|
}
|
||||||
|
else if (curRowStart >= 0 && y - lastActiveRow > maxGap)
|
||||||
|
{
|
||||||
|
int len = lastActiveRow - curRowStart + 1;
|
||||||
|
if (len > bestRowLen) { bestRowStart = curRowStart; bestRowEnd = lastActiveRow; bestRowLen = len; }
|
||||||
|
curRowStart = -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (curRowStart >= 0)
|
||||||
|
{
|
||||||
|
int len = lastActiveRow - curRowStart + 1;
|
||||||
|
if (len > bestRowLen) { bestRowStart = curRowStart; bestRowEnd = lastActiveRow; bestRowLen = len; }
|
||||||
|
}
|
||||||
|
|
||||||
|
// Pass 2: count changed pixels per column, but only within the detected row range
|
||||||
|
int[] colCounts = new int[w];
|
||||||
|
for (int y = bestRowStart; y <= bestRowEnd; y++)
|
||||||
|
for (int x = 0; x < w; x++)
|
||||||
|
if (changed[y * w + x])
|
||||||
|
colCounts[x]++;
|
||||||
|
|
||||||
|
int tooltipHeight = bestRowEnd - bestRowStart + 1;
|
||||||
|
int colThresh = tooltipHeight / 15; // ~7% of tooltip height
|
||||||
|
|
||||||
|
int bestColStart = 0, bestColEnd = 0, bestColLen = 0;
|
||||||
|
int curColStart = -1, lastActiveCol = -1;
|
||||||
|
for (int x = 0; x < w; x++)
|
||||||
|
{
|
||||||
|
if (colCounts[x] >= colThresh)
|
||||||
|
{
|
||||||
|
if (curColStart < 0) curColStart = x;
|
||||||
|
lastActiveCol = x;
|
||||||
|
}
|
||||||
|
else if (curColStart >= 0 && x - lastActiveCol > maxGap)
|
||||||
|
{
|
||||||
|
int len = lastActiveCol - curColStart + 1;
|
||||||
|
if (len > bestColLen) { bestColStart = curColStart; bestColEnd = lastActiveCol; bestColLen = len; }
|
||||||
|
curColStart = -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (curColStart >= 0)
|
||||||
|
{
|
||||||
|
int len = lastActiveCol - curColStart + 1;
|
||||||
|
if (len > bestColLen) { bestColStart = curColStart; bestColEnd = lastActiveCol; bestColLen = len; }
|
||||||
|
}
|
||||||
|
|
||||||
|
// Log density detection results
|
||||||
|
Console.Error.WriteLine($" diff-ocr: changed={totalChanged} rows={bestRowStart}-{bestRowEnd}({bestRowLen}) cols={bestColStart}-{bestColEnd}({bestColLen}) rowThresh={rowThresh} colThresh={colThresh}");
|
||||||
|
|
||||||
|
if (bestRowLen < 50 || bestColLen < 50)
|
||||||
|
{
|
||||||
|
Console.Error.WriteLine($" diff-ocr: no tooltip-sized region found (rows={bestRowLen}, cols={bestColLen})");
|
||||||
|
WriteResponse(new OcrResponse { Text = "", Lines = [] });
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
int pad = 0;
|
||||||
|
int minX = Math.Max(bestColStart - pad, 0);
|
||||||
|
int minY = Math.Max(bestRowStart - pad, 0);
|
||||||
|
int maxX = Math.Min(bestColEnd + pad, w - 1);
|
||||||
|
int maxY = Math.Min(bestRowEnd + pad, h - 1);
|
||||||
|
|
||||||
|
// Trim 5px from left/right/bottom to remove tooltip border/shadow artifacts
|
||||||
|
int trim = 5;
|
||||||
|
minX = Math.Min(minX + trim, maxX);
|
||||||
|
maxX = Math.Max(maxX - trim, minX);
|
||||||
|
maxY = Math.Max(maxY - trim, minY);
|
||||||
|
int rw = maxX - minX + 1;
|
||||||
|
int rh = maxY - minY + 1;
|
||||||
|
|
||||||
|
if (debug) Console.Error.WriteLine($" diff-ocr: tooltip region ({minX},{minY}) {rw}x{rh}");
|
||||||
|
|
||||||
|
// Crop the current frame to the diff bounding box
|
||||||
|
using var cropped = current.Clone(new Rectangle(minX, minY, rw, rh), PixelFormat.Format32bppArgb);
|
||||||
|
|
||||||
|
// Save raw tooltip image if path is provided
|
||||||
|
if (!string.IsNullOrEmpty(req.Path))
|
||||||
|
{
|
||||||
|
var dir = System.IO.Path.GetDirectoryName(req.Path);
|
||||||
|
if (!string.IsNullOrEmpty(dir) && !System.IO.Directory.Exists(dir))
|
||||||
|
System.IO.Directory.CreateDirectory(dir);
|
||||||
|
cropped.Save(req.Path, GetImageFormat(req.Path));
|
||||||
|
if (debug) Console.Error.WriteLine($" diff-ocr: saved tooltip to {req.Path}");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Pre-process for OCR: scale up 2x, boost contrast, invert colors
|
||||||
|
using var processed = PreprocessForOcr(cropped);
|
||||||
|
using var pix = BitmapToPix(processed);
|
||||||
|
using var page = engine.Process(pix);
|
||||||
|
|
||||||
|
var text = page.GetText();
|
||||||
|
var lines = ExtractLinesFromPage(page, offsetX: minX, offsetY: minY);
|
||||||
|
|
||||||
|
WriteResponse(new DiffOcrResponse
|
||||||
|
{
|
||||||
|
Text = text,
|
||||||
|
Lines = lines,
|
||||||
|
Region = new RegionRect { X = minX, Y = minY, Width = rw, Height = rh },
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Pre-process an image for better OCR: boost contrast and invert colors.
|
||||||
|
/// No upscaling — tooltip text is large enough at native resolution.
|
||||||
|
Bitmap PreprocessForOcr(Bitmap src)
|
||||||
|
{
|
||||||
|
int dw = src.Width, dh = src.Height;
|
||||||
|
var scaled = (Bitmap)src.Clone();
|
||||||
|
|
||||||
|
// Boost contrast: find min/max brightness, stretch to full 0-255 range
|
||||||
|
var data = scaled.LockBits(new Rectangle(0, 0, dw, dh), ImageLockMode.ReadWrite, PixelFormat.Format32bppArgb);
|
||||||
|
byte[] px = new byte[data.Stride * dh];
|
||||||
|
Marshal.Copy(data.Scan0, px, 0, px.Length);
|
||||||
|
int stride = data.Stride;
|
||||||
|
|
||||||
|
// Find 5th and 95th percentile brightness for robust stretching
|
||||||
|
int[] histogram = new int[256];
|
||||||
|
for (int y = 0; y < dh; y++)
|
||||||
|
for (int x = 0; x < dw; x++)
|
||||||
|
{
|
||||||
|
int i = y * stride + x * 4;
|
||||||
|
int bright = Math.Max(px[i], Math.Max(px[i + 1], px[i + 2]));
|
||||||
|
histogram[bright]++;
|
||||||
|
}
|
||||||
|
|
||||||
|
int totalPixels = dw * dh;
|
||||||
|
int lo = 0, hi = 255;
|
||||||
|
int cumLo = 0, cumHi = 0;
|
||||||
|
for (int b = 0; b < 256; b++)
|
||||||
|
{
|
||||||
|
cumLo += histogram[b];
|
||||||
|
if (cumLo >= totalPixels * 0.05) { lo = b; break; }
|
||||||
|
}
|
||||||
|
for (int b = 255; b >= 0; b--)
|
||||||
|
{
|
||||||
|
cumHi += histogram[b];
|
||||||
|
if (cumHi >= totalPixels * 0.05) { hi = b; break; }
|
||||||
|
}
|
||||||
|
if (hi <= lo) hi = lo + 1;
|
||||||
|
double scale = 255.0 / (hi - lo);
|
||||||
|
|
||||||
|
// Stretch contrast and invert colors (light text on dark → dark text on light for Tesseract)
|
||||||
|
for (int y = 0; y < dh; y++)
|
||||||
|
for (int x = 0; x < dw; x++)
|
||||||
|
{
|
||||||
|
int i = y * stride + x * 4;
|
||||||
|
px[i] = (byte)(255 - Math.Clamp((int)((px[i] - lo) * scale), 0, 255));
|
||||||
|
px[i + 1] = (byte)(255 - Math.Clamp((int)((px[i + 1] - lo) * scale), 0, 255));
|
||||||
|
px[i + 2] = (byte)(255 - Math.Clamp((int)((px[i + 2] - lo) * scale), 0, 255));
|
||||||
|
}
|
||||||
|
|
||||||
|
Marshal.Copy(px, 0, data.Scan0, px.Length);
|
||||||
|
scaled.UnlockBits(data);
|
||||||
|
return scaled;
|
||||||
|
}
|
||||||
|
|
||||||
// Pre-loaded empty cell templates (loaded lazily on first grid scan)
|
// Pre-loaded empty cell templates (loaded lazily on first grid scan)
|
||||||
// Stored as both grayscale (for occupied detection) and ARGB (for item border detection)
|
// Stored as both grayscale (for occupied detection) and ARGB (for item border detection)
|
||||||
byte[]? emptyTemplate70Gray = null;
|
byte[]? emptyTemplate70Gray = null;
|
||||||
|
|
@ -929,18 +1174,54 @@ Bitmap CaptureScreen(RegionRect? region)
|
||||||
return bitmap;
|
return bitmap;
|
||||||
}
|
}
|
||||||
|
|
||||||
// ── Bitmap → SoftwareBitmap conversion (in-memory) ─────────────────────────
|
// ── Bitmap → Tesseract Pix conversion (in-memory) ──────────────────────────
|
||||||
|
|
||||||
SoftwareBitmap BitmapToSoftwareBitmap(Bitmap bitmap)
|
Pix BitmapToPix(Bitmap bitmap)
|
||||||
{
|
{
|
||||||
using var ms = new MemoryStream();
|
using var ms = new MemoryStream();
|
||||||
bitmap.Save(ms, ImageFormat.Bmp);
|
bitmap.Save(ms, SdImageFormat.Png);
|
||||||
ms.Position = 0;
|
return Pix.LoadFromMemory(ms.ToArray());
|
||||||
|
}
|
||||||
|
|
||||||
var stream = ms.AsRandomAccessStream();
|
// ── Extract lines/words from Tesseract page result ──────────────────────────
|
||||||
var decoder = BitmapDecoder.CreateAsync(stream).AsTask().GetAwaiter().GetResult();
|
|
||||||
var softwareBitmap = decoder.GetSoftwareBitmapAsync().AsTask().GetAwaiter().GetResult();
|
List<OcrLineResult> ExtractLinesFromPage(Page page, int offsetX, int offsetY)
|
||||||
return softwareBitmap;
|
{
|
||||||
|
var lines = new List<OcrLineResult>();
|
||||||
|
using var iter = page.GetIterator();
|
||||||
|
if (iter == null) return lines;
|
||||||
|
|
||||||
|
iter.Begin();
|
||||||
|
|
||||||
|
do
|
||||||
|
{
|
||||||
|
var words = new List<OcrWordResult>();
|
||||||
|
do
|
||||||
|
{
|
||||||
|
var wordText = iter.GetText(PageIteratorLevel.Word);
|
||||||
|
if (string.IsNullOrWhiteSpace(wordText)) continue;
|
||||||
|
|
||||||
|
if (iter.TryGetBoundingBox(PageIteratorLevel.Word, out var bounds))
|
||||||
|
{
|
||||||
|
words.Add(new OcrWordResult
|
||||||
|
{
|
||||||
|
Text = wordText.Trim(),
|
||||||
|
X = bounds.X1 + offsetX,
|
||||||
|
Y = bounds.Y1 + offsetY,
|
||||||
|
Width = bounds.Width,
|
||||||
|
Height = bounds.Height,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
} while (iter.Next(PageIteratorLevel.TextLine, PageIteratorLevel.Word));
|
||||||
|
|
||||||
|
if (words.Count > 0)
|
||||||
|
{
|
||||||
|
var lineText = string.Join(" ", words.Select(w => w.Text));
|
||||||
|
lines.Add(new OcrLineResult { Text = lineText, Words = words });
|
||||||
|
}
|
||||||
|
} while (iter.Next(PageIteratorLevel.Block, PageIteratorLevel.TextLine));
|
||||||
|
|
||||||
|
return lines;
|
||||||
}
|
}
|
||||||
|
|
||||||
// ── Response writing ────────────────────────────────────────────────────────
|
// ── Response writing ────────────────────────────────────────────────────────
|
||||||
|
|
@ -952,14 +1233,14 @@ void WriteResponse(object response)
|
||||||
Console.Out.Flush();
|
Console.Out.Flush();
|
||||||
}
|
}
|
||||||
|
|
||||||
ImageFormat GetImageFormat(string path)
|
SdImageFormat GetImageFormat(string path)
|
||||||
{
|
{
|
||||||
var ext = Path.GetExtension(path).ToLowerInvariant();
|
var ext = Path.GetExtension(path).ToLowerInvariant();
|
||||||
return ext switch
|
return ext switch
|
||||||
{
|
{
|
||||||
".jpg" or ".jpeg" => ImageFormat.Jpeg,
|
".jpg" or ".jpeg" => SdImageFormat.Jpeg,
|
||||||
".bmp" => ImageFormat.Bmp,
|
".bmp" => SdImageFormat.Bmp,
|
||||||
_ => ImageFormat.Png,
|
_ => SdImageFormat.Png,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -1063,6 +1344,21 @@ class OcrResponse
|
||||||
public List<OcrLineResult> Lines { get; set; } = [];
|
public List<OcrLineResult> Lines { get; set; } = [];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
class DiffOcrResponse
|
||||||
|
{
|
||||||
|
[JsonPropertyName("ok")]
|
||||||
|
public bool Ok => true;
|
||||||
|
|
||||||
|
[JsonPropertyName("text")]
|
||||||
|
public string Text { get; set; } = "";
|
||||||
|
|
||||||
|
[JsonPropertyName("lines")]
|
||||||
|
public List<OcrLineResult> Lines { get; set; } = [];
|
||||||
|
|
||||||
|
[JsonPropertyName("region")]
|
||||||
|
public RegionRect? Region { get; set; }
|
||||||
|
}
|
||||||
|
|
||||||
class OcrLineResult
|
class OcrLineResult
|
||||||
{
|
{
|
||||||
[JsonPropertyName("text")]
|
[JsonPropertyName("text")]
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue