added test cases

This commit is contained in:
Boki 2026-02-12 13:08:19 -05:00
parent c1892230b7
commit 93e2234c4e
9 changed files with 320 additions and 141 deletions

View file

@ -71,6 +71,18 @@ export type OcrEngine = 'tesseract' | 'easyocr' | 'paddleocr';
export type OcrPreprocess = 'none' | 'bgsub' | 'tophat';
export interface DiffOcrParams {
diffThresh?: number;
maxGap?: number;
trimCutoff?: number;
kernelSize?: number;
upscale?: number;
dimPercentile?: number;
textThresh?: number;
softThreshold?: boolean;
useBackgroundSub?: boolean;
}
interface DaemonRequest {
cmd: string;
region?: Region;
@ -82,6 +94,7 @@ interface DaemonRequest {
maxCellSize?: number;
engine?: string;
preprocess?: string;
params?: DiffOcrParams;
}
interface DaemonResponse {
@ -186,12 +199,13 @@ export class OcrDaemon {
await this.sendWithRetry({ cmd: 'snapshot' }, REQUEST_TIMEOUT);
}
async diffOcr(savePath?: string, region?: Region, engine?: OcrEngine, preprocess?: OcrPreprocess): Promise<DiffOcrResponse> {
async diffOcr(savePath?: string, region?: Region, engine?: OcrEngine, preprocess?: OcrPreprocess, params?: DiffOcrParams): Promise<DiffOcrResponse> {
const req: DaemonRequest = { cmd: 'diff-ocr' };
if (savePath) req.path = savePath;
if (region) req.region = region;
if (engine && engine !== 'tesseract') req.engine = engine;
if (preprocess) req.preprocess = preprocess;
if (params && Object.keys(params).length > 0) req.params = params;
const timeout = (engine && engine !== 'tesseract') ? 120_000 : CAPTURE_TIMEOUT;
const resp = await this.sendWithRetry(req, timeout);
return {

View file

@ -1,7 +1,7 @@
import { mkdir } from 'fs/promises';
import { join } from 'path';
import { logger } from '../util/logger.js';
import { OcrDaemon, type OcrResponse, type OcrEngine, type OcrPreprocess, type DiffOcrResponse, type TemplateMatchResult } from './OcrDaemon.js';
import { OcrDaemon, type OcrResponse, type OcrEngine, type OcrPreprocess, type DiffOcrParams, type DiffOcrResponse, type TemplateMatchResult } from './OcrDaemon.js';
import { GridReader, type GridLayout, type CellCoord } from './GridReader.js';
import type { Region } from '../types.js';
@ -9,11 +9,24 @@ function elapsed(start: number): string {
return `${(performance.now() - start).toFixed(0)}ms`;
}
export interface OcrSettings {
engine: OcrEngine;
screenPreprocess: OcrPreprocess;
tooltipPreprocess: OcrPreprocess;
tooltipParams: DiffOcrParams;
saveDebugImages: boolean;
}
export class ScreenReader {
private daemon = new OcrDaemon();
readonly grid = new GridReader(this.daemon);
debugOcrEngine: OcrEngine = 'tesseract';
debugPreprocess: OcrPreprocess = 'bgsub';
settings: OcrSettings = {
engine: 'easyocr',
screenPreprocess: 'none',
tooltipPreprocess: 'bgsub',
tooltipParams: {},
saveDebugImages: true,
};
// ── Screenshot capture ──────────────────────────────────────────────
@ -147,19 +160,23 @@ export class ScreenReader {
fuzzy: boolean = false,
): Promise<{ x: number; y: number } | null> {
const t = performance.now();
const result = await this.daemon.ocr();
const { engine, screenPreprocess } = this.settings;
const pp = screenPreprocess !== 'none' ? screenPreprocess : undefined;
const result = await this.daemon.ocr(undefined, engine, pp);
const pos = this.findWordInOcrResult(result, searchText, fuzzy);
if (pos) {
logger.info({ searchText, x: pos.x, y: pos.y, totalMs: elapsed(t) }, 'Found text on screen');
logger.info({ searchText, engine, x: pos.x, y: pos.y, totalMs: elapsed(t) }, 'Found text on screen');
} else {
logger.info({ searchText, totalMs: elapsed(t) }, 'Text not found on screen');
logger.info({ searchText, engine, totalMs: elapsed(t) }, 'Text not found on screen');
}
return pos;
}
async readFullScreen(): Promise<string> {
const result = await this.daemon.ocr();
const { engine, screenPreprocess } = this.settings;
const pp = screenPreprocess !== 'none' ? screenPreprocess : undefined;
const result = await this.daemon.ocr(undefined, engine, pp);
return result.text;
}
@ -170,7 +187,9 @@ export class ScreenReader {
searchText: string,
): Promise<{ x: number; y: number } | null> {
const t = performance.now();
const result = await this.daemon.ocr(region);
const { engine, screenPreprocess } = this.settings;
const pp = screenPreprocess !== 'none' ? screenPreprocess : undefined;
const result = await this.daemon.ocr(region, engine, pp);
const pos = this.findWordInOcrResult(result, searchText);
if (pos) {
@ -185,7 +204,9 @@ export class ScreenReader {
}
async readRegionText(region: Region): Promise<string> {
const result = await this.daemon.ocr(region);
const { engine, screenPreprocess } = this.settings;
const pp = screenPreprocess !== 'none' ? screenPreprocess : undefined;
const result = await this.daemon.ocr(region, engine, pp);
return result.text;
}
@ -201,7 +222,9 @@ export class ScreenReader {
}
async diffOcr(savePath?: string, region?: Region): Promise<DiffOcrResponse> {
return this.daemon.diffOcr(savePath, region);
const { engine, tooltipPreprocess, tooltipParams } = this.settings;
const pp = tooltipPreprocess !== 'none' ? tooltipPreprocess : undefined;
return this.daemon.diffOcr(savePath, region, engine, pp, tooltipParams);
}
// ── Template matching ──────────────────────────────────────────────
@ -238,43 +261,6 @@ export class ScreenReader {
logger.info({ path, region }, 'Region screenshot saved');
}
// ── Debug OCR (alternative engines) ─────────────────────────────────
async debugDiffOcr(savePath?: string, region?: Region): Promise<DiffOcrResponse> {
const t = performance.now();
const result = await this.daemon.diffOcr(savePath, region, this.debugOcrEngine, this.debugPreprocess);
logger.info({ engine: this.debugOcrEngine, preprocess: this.debugPreprocess, ms: elapsed(t) }, 'debugDiffOcr');
return result;
}
async debugOcr(region?: Region): Promise<OcrResponse> {
const t = performance.now();
const result = await this.daemon.ocr(region, this.debugOcrEngine, this.debugPreprocess);
logger.info({ engine: this.debugOcrEngine, preprocess: this.debugPreprocess, ms: elapsed(t) }, 'debugOcr');
return result;
}
async debugReadFullScreen(): Promise<string> {
const result = await this.daemon.ocr(undefined, this.debugOcrEngine, this.debugPreprocess);
return result.text;
}
async debugFindTextOnScreen(
searchText: string,
fuzzy: boolean = false,
): Promise<{ x: number; y: number } | null> {
const t = performance.now();
const result = await this.daemon.ocr(undefined, this.debugOcrEngine, this.debugPreprocess);
const pos = this.findWordInOcrResult(result, searchText, fuzzy);
if (pos) {
logger.info({ searchText, engine: this.debugOcrEngine, preprocess: this.debugPreprocess, x: pos.x, y: pos.y, totalMs: elapsed(t) }, 'debugFindText found');
} else {
logger.info({ searchText, engine: this.debugOcrEngine, preprocess: this.debugPreprocess, totalMs: elapsed(t) }, 'debugFindText not found');
}
return pos;
}
// ── Lifecycle ───────────────────────────────────────────────────────
async dispose(): Promise<void> {

View file

@ -452,16 +452,10 @@
<div class="section-title">Debug Tools</div>
<div class="debug-panel">
<div class="debug-row">
<select id="ocrEngineSelect" onchange="setOcrEngine(this.value)" style="padding:6px 10px;background:#0d1117;border:1px solid #30363d;border-radius:6px;color:#e6edf3;font-size:13px">
<option value="tesseract">Tesseract</option>
<option value="easyocr">EasyOCR</option>
<option value="paddleocr">PaddleOCR</option>
</select>
<select id="ocrPreprocessSelect" onchange="setOcrPreprocess(this.value)" style="padding:6px 10px;background:#0d1117;border:1px solid #30363d;border-radius:6px;color:#e6edf3;font-size:13px">
<option value="none">No Preprocess</option>
<option value="bgsub" selected>BgSub</option>
<option value="tophat">TopHat</option>
</select>
<button onclick="openOcrSettings()" title="OCR Settings" style="display:flex;align-items:center;gap:4px">
<svg width="14" height="14" viewBox="0 0 20 20" fill="none" stroke="currentColor" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"><circle cx="10" cy="10" r="3"/><path d="M10 1.5v2M10 16.5v2M3.4 3.4l1.4 1.4M15.2 15.2l1.4 1.4M1.5 10h2M16.5 10h2M3.4 16.6l1.4-1.4M15.2 4.8l1.4-1.4"/></svg>
OCR Settings
</button>
<button onclick="debugScreenshot()">Screenshot</button>
<button onclick="debugOcr()">OCR Screen</button>
<button onclick="debugHideout()">Go Hideout</button>
@ -555,6 +549,115 @@
</div>
</div>
<div class="modal-overlay" id="ocrSettingsModal">
<div class="modal">
<div class="modal-header">
<h2>OCR Settings</h2>
<button class="modal-close" onclick="closeOcrSettings()">&times;</button>
</div>
<div style="margin-bottom:16px">
<div class="section-title" style="margin-bottom:6px">Engine</div>
<select id="ocrEngine" class="mode-select" style="width:100%">
<option value="tesseract">Tesseract</option>
<option value="easyocr">EasyOCR</option>
<option value="paddleocr">PaddleOCR</option>
</select>
</div>
<div style="margin-bottom:16px">
<div class="section-title" style="margin-bottom:6px">Screen OCR</div>
<select id="ocrScreenPreprocess" class="mode-select" style="width:100%" onchange="toggleOcrSections()">
<option value="none">None</option>
<option value="tophat">TopHat</option>
</select>
<div id="screenTophatParams" style="display:none;margin-top:8px">
<div class="settings-grid">
<div class="setting-row">
<label>Kernel Size</label>
<input type="number" id="ocrScreenKernel" value="41" />
</div>
</div>
</div>
</div>
<div style="margin-bottom:16px">
<div class="section-title" style="margin-bottom:6px">Tooltip OCR</div>
<select id="ocrTooltipPreprocess" class="mode-select" style="width:100%" onchange="toggleOcrSections()">
<option value="none">None</option>
<option value="bgsub">Background Subtraction</option>
<option value="tophat">TopHat</option>
</select>
<div id="tooltipBgsubParams" style="display:none;margin-top:8px">
<div class="settings-grid">
<div class="setting-row">
<label>Dim Percentile</label>
<input type="number" id="ocrDimPercentile" value="40" />
</div>
<div class="setting-row">
<label>Text Threshold</label>
<input type="number" id="ocrTextThresh" value="60" />
</div>
</div>
<div style="display:flex;align-items:center;gap:8px;margin-top:8px">
<label class="toggle">
<input type="checkbox" id="ocrSoftThreshold" />
<span class="slider"></span>
</label>
<span style="font-size:12px;color:#8b949e">Soft Threshold</span>
</div>
</div>
<div id="tooltipTophatParams" style="display:none;margin-top:8px">
<div class="settings-grid">
<div class="setting-row">
<label>Kernel Size</label>
<input type="number" id="ocrTooltipKernel" value="41" />
</div>
</div>
</div>
</div>
<div style="margin-bottom:16px">
<div class="section-title" style="margin-bottom:6px">Tooltip Detection</div>
<div class="settings-grid">
<div class="setting-row">
<label>Diff Threshold</label>
<input type="number" id="ocrDiffThresh" value="20" />
</div>
<div class="setting-row">
<label>Max Gap</label>
<input type="number" id="ocrMaxGap" value="20" />
</div>
<div class="setting-row">
<label>Trim Cutoff</label>
<input type="number" id="ocrTrimCutoff" value="0.4" step="0.1" />
</div>
<div class="setting-row">
<label>Upscale</label>
<input type="number" id="ocrUpscale" value="2" />
</div>
</div>
</div>
<div style="margin-bottom:16px">
<div class="section-title" style="margin-bottom:6px">Options</div>
<div style="display:flex;align-items:center;gap:8px">
<label class="toggle">
<input type="checkbox" id="ocrSaveDebugImages" checked />
<span class="slider"></span>
</label>
<span style="font-size:12px;color:#8b949e">Save debug images to disk</span>
</div>
</div>
<div class="settings-actions">
<span class="saved-badge" id="ocrSavedBadge">Saved</span>
<button onclick="closeOcrSettings()" style="margin-right:8px">Close</button>
<button class="primary" onclick="saveOcrSettings()">Save</button>
</div>
</div>
</div>
<script>
let ws;
let status = { paused: false, state: 'IDLE', links: [], tradesCompleted: 0, tradesFailed: 0, uptime: 0, settings: {} };
@ -639,18 +742,16 @@
function renderInventory() {
const container = document.getElementById('inventoryGrid');
const freeLabel = document.getElementById('invFreeCount');
if (!status.inventory) {
container.innerHTML = '<div class="empty-state" style="grid-column:1/-1">No active scrap session</div>';
freeLabel.textContent = '';
return;
}
const { grid, items, free } = status.inventory;
const grid = status.inventory ? status.inventory.grid : null;
const items = status.inventory ? status.inventory.items : [];
const free = status.inventory ? status.inventory.free : 60;
freeLabel.textContent = `${free}/60 free`;
let html = '';
for (let r = 0; r < 5; r++) {
for (let c = 0; c < 12; c++) {
const occupied = grid[r] && grid[r][c] ? 'occupied' : '';
const occupied = grid && grid[r] && grid[r][c] ? 'occupied' : '';
html += `<div class="inv-cell ${occupied}" data-r="${r}" data-c="${c}"></div>`;
}
}
@ -891,7 +992,7 @@
});
document.addEventListener('keydown', (e) => {
if (e.key === 'Escape') closeSettings();
if (e.key === 'Escape') { closeSettings(); closeOcrSettings(); }
});
async function saveSettings() {
@ -1010,46 +1111,98 @@
if (e.key === 'Enter') addLink();
});
// OCR engine/preprocess (sync — these are instant)
async function setOcrEngine(engine) {
await fetch('/api/debug/ocr-engine', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ engine }),
});
// --- OCR Settings modal ---
function openOcrSettings() {
loadOcrSettings();
document.getElementById('ocrSettingsModal').classList.add('open');
}
async function loadOcrEngine() {
function closeOcrSettings() {
document.getElementById('ocrSettingsModal').classList.remove('open');
}
document.getElementById('ocrSettingsModal').addEventListener('click', (e) => {
if (e.target === e.currentTarget) closeOcrSettings();
});
function toggleOcrSections() {
const screenPp = document.getElementById('ocrScreenPreprocess').value;
document.getElementById('screenTophatParams').style.display = screenPp === 'tophat' ? '' : 'none';
const tooltipPp = document.getElementById('ocrTooltipPreprocess').value;
document.getElementById('tooltipBgsubParams').style.display = tooltipPp === 'bgsub' ? '' : 'none';
document.getElementById('tooltipTophatParams').style.display = tooltipPp === 'tophat' ? '' : 'none';
}
async function loadOcrSettings() {
try {
const res = await fetch('/api/debug/ocr-engine');
const res = await fetch('/api/debug/ocr-settings');
const data = await res.json();
if (data.ok && data.engine) {
document.getElementById('ocrEngineSelect').value = data.engine;
}
if (!data.ok) return;
document.getElementById('ocrEngine').value = data.engine || 'easyocr';
document.getElementById('ocrScreenPreprocess').value = data.screenPreprocess || 'none';
document.getElementById('ocrTooltipPreprocess').value = data.tooltipPreprocess || 'bgsub';
document.getElementById('ocrSaveDebugImages').checked = data.saveDebugImages !== false;
const tp = data.tooltipParams || {};
document.getElementById('ocrDiffThresh').value = tp.diffThresh ?? 20;
document.getElementById('ocrMaxGap').value = tp.maxGap ?? 20;
document.getElementById('ocrTrimCutoff').value = tp.trimCutoff ?? 0.4;
document.getElementById('ocrUpscale').value = tp.upscale ?? 2;
document.getElementById('ocrDimPercentile').value = tp.dimPercentile ?? 40;
document.getElementById('ocrTextThresh').value = tp.textThresh ?? 60;
document.getElementById('ocrSoftThreshold').checked = !!tp.softThreshold;
document.getElementById('ocrScreenKernel').value = tp.kernelSize ?? 41;
document.getElementById('ocrTooltipKernel').value = tp.kernelSize ?? 41;
toggleOcrSections();
} catch {}
}
async function setOcrPreprocess(preprocess) {
await fetch('/api/debug/ocr-preprocess', {
async function saveOcrSettings() {
const tooltipPp = document.getElementById('ocrTooltipPreprocess').value;
const screenPp = document.getElementById('ocrScreenPreprocess').value;
const tooltipParams = {
diffThresh: parseInt(document.getElementById('ocrDiffThresh').value) || 20,
maxGap: parseInt(document.getElementById('ocrMaxGap').value) || 20,
trimCutoff: parseFloat(document.getElementById('ocrTrimCutoff').value) || 0.4,
upscale: parseInt(document.getElementById('ocrUpscale').value) || 2,
useBackgroundSub: tooltipPp === 'bgsub',
};
if (tooltipPp === 'bgsub') {
tooltipParams.dimPercentile = parseInt(document.getElementById('ocrDimPercentile').value) || 40;
tooltipParams.textThresh = parseInt(document.getElementById('ocrTextThresh').value) || 60;
tooltipParams.softThreshold = document.getElementById('ocrSoftThreshold').checked;
}
if (tooltipPp === 'tophat') {
tooltipParams.kernelSize = parseInt(document.getElementById('ocrTooltipKernel').value) || 41;
}
if (screenPp === 'tophat') {
tooltipParams.kernelSize = parseInt(document.getElementById('ocrScreenKernel').value) || 41;
}
const body = {
engine: document.getElementById('ocrEngine').value,
screenPreprocess: screenPp,
tooltipPreprocess: tooltipPp,
tooltipParams,
saveDebugImages: document.getElementById('ocrSaveDebugImages').checked,
};
await fetch('/api/debug/ocr-settings', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ preprocess }),
body: JSON.stringify(body),
});
}
async function loadOcrPreprocess() {
try {
const res = await fetch('/api/debug/ocr-preprocess');
const data = await res.json();
if (data.ok && data.preprocess) {
document.getElementById('ocrPreprocessSelect').value = data.preprocess;
}
} catch {}
const badge = document.getElementById('ocrSavedBadge');
badge.classList.add('show');
setTimeout(() => badge.classList.remove('show'), 2000);
}
connect();
loadOcrEngine();
loadOcrPreprocess();
loadOcrSettings();
</script>
</body>
</html>

View file

@ -5,7 +5,8 @@ import { sleep } from '../../util/sleep.js';
import { GRID_LAYOUTS } from '../../game/GridReader.js';
import type { Bot } from '../../bot/Bot.js';
import type { Server } from '../Server.js';
import type { OcrEngine, OcrPreprocess } from '../../game/OcrDaemon.js';
import type { OcrEngine, OcrPreprocess, DiffOcrParams } from '../../game/OcrDaemon.js';
import type { OcrSettings } from '../../game/ScreenReader.js';
export function debugRoutes(bot: Bot, server: Server): Router {
const router = Router();
@ -15,39 +16,23 @@ export function debugRoutes(bot: Bot, server: Server): Router {
return false;
};
// --- Sync: OCR engine/preprocess selection ---
// --- Sync: OCR settings ---
router.get('/ocr-engine', (req, res) => {
router.get('/ocr-settings', (req, res) => {
if (notReady(req, res)) return;
res.json({ ok: true, engine: bot.screenReader.debugOcrEngine });
res.json({ ok: true, ...bot.screenReader.settings });
});
router.post('/ocr-engine', (req, res) => {
router.post('/ocr-settings', (req, res) => {
if (notReady(req, res)) return;
const { engine } = req.body as { engine: string };
if (!['tesseract', 'easyocr', 'paddleocr'].includes(engine)) {
res.status(400).json({ error: 'Invalid engine. Must be tesseract, easyocr, or paddleocr.' });
return;
}
bot.screenReader.debugOcrEngine = engine as OcrEngine;
server.broadcastLog('info', `OCR engine set to: ${engine}`);
res.json({ ok: true });
});
router.get('/ocr-preprocess', (req, res) => {
if (notReady(req, res)) return;
res.json({ ok: true, preprocess: bot.screenReader.debugPreprocess });
});
router.post('/ocr-preprocess', (req, res) => {
if (notReady(req, res)) return;
const { preprocess } = req.body as { preprocess: string };
if (!['none', 'bgsub', 'tophat'].includes(preprocess)) {
res.status(400).json({ error: 'Invalid preprocess. Must be none, bgsub, or tophat.' });
return;
}
bot.screenReader.debugPreprocess = preprocess as OcrPreprocess;
server.broadcastLog('info', `OCR preprocess set to: ${preprocess}`);
const body = req.body as Partial<OcrSettings>;
const s = bot.screenReader.settings;
if (body.engine && ['tesseract', 'easyocr', 'paddleocr'].includes(body.engine)) s.engine = body.engine;
if (body.screenPreprocess && ['none', 'bgsub', 'tophat'].includes(body.screenPreprocess)) s.screenPreprocess = body.screenPreprocess;
if (body.tooltipPreprocess && ['none', 'bgsub', 'tophat'].includes(body.tooltipPreprocess)) s.tooltipPreprocess = body.tooltipPreprocess;
if (body.tooltipParams != null) s.tooltipParams = body.tooltipParams;
if (body.saveDebugImages != null) s.saveDebugImages = body.saveDebugImages;
server.broadcastLog('info', `OCR settings updated: engine=${s.engine} screen=${s.screenPreprocess} tooltip=${s.tooltipPreprocess}`);
res.json({ ok: true });
});
@ -68,8 +53,8 @@ export function debugRoutes(bot: Bot, server: Server): Router {
router.post('/ocr', (req, res) => {
if (notReady(req, res)) return;
res.json({ ok: true });
bot.screenReader.debugReadFullScreen().then(text => {
server.broadcastLog('info', `OCR [${bot.screenReader.debugOcrEngine}] (${text.length} chars): ${text.substring(0, 200)}`);
bot.screenReader.readFullScreen().then(text => {
server.broadcastLog('info', `OCR [${bot.screenReader.settings.engine}] (${text.length} chars): ${text.substring(0, 200)}`);
server.broadcastDebug('ocr', { text });
}).catch(err => {
logger.error({ err }, 'Debug OCR failed');
@ -82,11 +67,11 @@ export function debugRoutes(bot: Bot, server: Server): Router {
const { text } = req.body as { text: string };
if (!text) { res.status(400).json({ error: 'Missing text parameter' }); return; }
res.json({ ok: true });
bot.screenReader.debugFindTextOnScreen(text).then(pos => {
bot.screenReader.findTextOnScreen(text).then(pos => {
if (pos) {
server.broadcastLog('info', `Found "${text}" at (${pos.x}, ${pos.y}) [${bot.screenReader.debugOcrEngine}]`);
server.broadcastLog('info', `Found "${text}" at (${pos.x}, ${pos.y}) [${bot.screenReader.settings.engine}]`);
} else {
server.broadcastLog('warn', `"${text}" not found on screen [${bot.screenReader.debugOcrEngine}]`);
server.broadcastLog('warn', `"${text}" not found on screen [${bot.screenReader.settings.engine}]`);
}
server.broadcastDebug('find-text', { searchText: text, found: !!pos, position: pos });
}).catch(err => {
@ -101,14 +86,14 @@ export function debugRoutes(bot: Bot, server: Server): Router {
if (!text) { res.status(400).json({ error: 'Missing text parameter' }); return; }
res.json({ ok: true });
(async () => {
const pos = await bot.screenReader.debugFindTextOnScreen(text, !!fuzzy);
const pos = await bot.screenReader.findTextOnScreen(text, !!fuzzy);
if (pos) {
await bot.gameController.focusGame();
await bot.gameController.leftClickAt(pos.x, pos.y);
server.broadcastLog('info', `Found "${text}" at (${pos.x}, ${pos.y}) and clicked [${bot.screenReader.debugOcrEngine}]`);
server.broadcastLog('info', `Found "${text}" at (${pos.x}, ${pos.y}) and clicked [${bot.screenReader.settings.engine}]`);
server.broadcastDebug('find-and-click', { searchText: text, found: true, position: pos });
} else {
server.broadcastLog('warn', `"${text}" not found on screen [${bot.screenReader.debugOcrEngine}]`);
server.broadcastLog('warn', `"${text}" not found on screen [${bot.screenReader.settings.engine}]`);
server.broadcastDebug('find-and-click', { searchText: text, found: false, position: null });
}
})().catch(err => {
@ -234,7 +219,8 @@ export function debugRoutes(bot: Bot, server: Server): Router {
];
await bot.gameController.focusGame();
await mkdir('items', { recursive: true });
const saveImages = bot.screenReader.settings.saveDebugImages;
if (saveImages) await mkdir('items', { recursive: true });
const tooltips: Array<{ row: number; col: number; label: string; text: string }> = [];
const ts = Date.now();
const reg = result.layout.region;
@ -245,7 +231,7 @@ export function debugRoutes(bot: Bot, server: Server): Router {
bot.gameController.moveMouseInstant(reg.x + reg.width + 50, reg.y + reg.height / 2);
await sleep(50);
await bot.screenReader.snapshot();
await bot.screenReader.saveScreenshot(`items/${ts}_snapshot.png`);
if (saveImages) await bot.screenReader.saveScreenshot(`items/${ts}_snapshot.png`);
await sleep(200);
for (const cell of hoverCells) {
@ -257,7 +243,7 @@ export function debugRoutes(bot: Bot, server: Server): Router {
await sleep(50);
const afterMove = performance.now();
const imgPath = `items/${ts}_${cell.row}-${cell.col}.png`;
const imgPath = saveImages ? `items/${ts}_${cell.row}-${cell.col}.png` : undefined;
const diff = await bot.screenReader.diffOcr(imgPath);
const afterOcr = performance.now();
const text = diff.text.trim();

View file

@ -108,6 +108,8 @@ static class Daemon
var engine = request.Engine ?? "tesseract";
var preprocess = request.Preprocess ?? "none";
var kernelSize = request.Params?.KernelSize ?? 41;
// No preprocess + tesseract = original fast path
if (engine == "tesseract" && preprocess == "none")
return ocrHandler.HandleOcr(request);
@ -119,7 +121,7 @@ static class Daemon
Bitmap processed;
if (preprocess == "tophat")
{
processed = ImagePreprocessor.PreprocessForOcr(bitmap);
processed = ImagePreprocessor.PreprocessForOcr(bitmap, kernelSize: kernelSize);
}
else if (preprocess == "bgsub")
{
@ -152,16 +154,24 @@ static class Daemon
private static object HandleDiffOcrPipeline(OcrHandler ocrHandler, PythonOcrBridge pythonBridge, Request request)
{
var engine = request.Engine ?? "tesseract";
var preprocess = request.Preprocess ?? "bgsub";
var isPythonEngine = engine is "easyocr" or "paddleocr";
var p = request.Params?.Clone() ?? new DiffOcrParams();
if (request.Threshold > 0) p.DiffThresh = request.Threshold;
// No engine override + no preprocess override = original Tesseract path (supports test/tune params)
if (engine == "tesseract" && request.Preprocess == null)
// Determine preprocess mode: explicit request.Preprocess > params.UseBackgroundSub > default "bgsub"
string preprocess;
if (request.Preprocess != null)
preprocess = request.Preprocess;
else if (request.Params != null)
preprocess = p.UseBackgroundSub ? "bgsub" : "tophat";
else
preprocess = "bgsub";
// No engine override + no preprocess override + no params = original Tesseract path
if (engine == "tesseract" && request.Preprocess == null && request.Params == null)
return ocrHandler.HandleDiffOcr(request);
var sw = System.Diagnostics.Stopwatch.StartNew();
var p = new DiffOcrParams();
if (request.Threshold > 0) p.DiffThresh = request.Threshold;
var cropResult = ocrHandler.DiffCrop(request, p);
if (cropResult == null)
@ -174,13 +184,14 @@ static class Daemon
Bitmap processed;
if (preprocess == "bgsub")
{
int upscale = isPythonEngine ? 1 : 2;
int upscale = isPythonEngine ? 1 : p.Upscale;
processed = ImagePreprocessor.PreprocessWithBackgroundSub(
cropped, refCropped, dimPercentile: 40, textThresh: 60, upscale: upscale, softThreshold: false);
cropped, refCropped, dimPercentile: p.DimPercentile, textThresh: p.TextThresh,
upscale: upscale, softThreshold: p.SoftThreshold);
}
else if (preprocess == "tophat")
{
processed = ImagePreprocessor.PreprocessForOcr(cropped);
processed = ImagePreprocessor.PreprocessForOcr(cropped, kernelSize: p.KernelSize);
}
else // "none"
{

View file

@ -45,6 +45,9 @@ class Request
[JsonPropertyName("preprocess")]
public string? Preprocess { get; set; }
[JsonPropertyName("params")]
public DiffOcrParams? Params { get; set; }
}
class RegionRect

View file

@ -49,5 +49,31 @@
"Asking Price:",
"35x Divine Orb"
]
},
{
"id": "raphpith1",
"image": "images/raphpith.png",
"fullImage": "images/raphpith-snapshot.png",
"expected": [
"RATHPITH GLOBE",
"SACRED Focus",
"Focus",
"Quality: +20%",
"Energy Shield: 104",
"Requires: Level 75",
"16% Increased Energy Shield",
"+24 To Maximum Mana",
"+5% to all Elemental Resistances",
"NON-CHANNELLING SPELLS HAVE 3% INCREASED MAGNITUDE OF AlLMENTS PER 100 MAXIMUM LIFE",
"NON-CHANNELLING SPELLS DEAL 6% INCREASED DAMAGE PER 100 MAXIMUM MANA",
"+72 TO MAXIMUM LIFE",
"NON-CHANNELLING SPELLS HAVE 3% INCREASED CRITICAL HIT CHANCE PER 100 MAXIMUM LIFE",
"NON-CHANNELLING SPELLS DEAL 6% INCREASED DAMACE PER 100 MAXIMUM LIFE",
"Twice Corrupted",
"THE VAAL EMPTIED THEIR SLAVES OF BEATING HEARTS",
"AND LEFT A MOUNTAIN OF TWITCHING DEAD",
"Asking Price:",
"120x Divine Orb"
]
}
]

Binary file not shown.

After

Width:  |  Height:  |  Size: 6 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.6 MiB