diff --git a/src/game/OcrDaemon.ts b/src/game/OcrDaemon.ts index 21f9a2a..936e3d4 100644 --- a/src/game/OcrDaemon.ts +++ b/src/game/OcrDaemon.ts @@ -71,6 +71,18 @@ export type OcrEngine = 'tesseract' | 'easyocr' | 'paddleocr'; export type OcrPreprocess = 'none' | 'bgsub' | 'tophat'; +export interface DiffOcrParams { + diffThresh?: number; + maxGap?: number; + trimCutoff?: number; + kernelSize?: number; + upscale?: number; + dimPercentile?: number; + textThresh?: number; + softThreshold?: boolean; + useBackgroundSub?: boolean; +} + interface DaemonRequest { cmd: string; region?: Region; @@ -82,6 +94,7 @@ interface DaemonRequest { maxCellSize?: number; engine?: string; preprocess?: string; + params?: DiffOcrParams; } interface DaemonResponse { @@ -186,12 +199,13 @@ export class OcrDaemon { await this.sendWithRetry({ cmd: 'snapshot' }, REQUEST_TIMEOUT); } - async diffOcr(savePath?: string, region?: Region, engine?: OcrEngine, preprocess?: OcrPreprocess): Promise { + async diffOcr(savePath?: string, region?: Region, engine?: OcrEngine, preprocess?: OcrPreprocess, params?: DiffOcrParams): Promise { const req: DaemonRequest = { cmd: 'diff-ocr' }; if (savePath) req.path = savePath; if (region) req.region = region; if (engine && engine !== 'tesseract') req.engine = engine; if (preprocess) req.preprocess = preprocess; + if (params && Object.keys(params).length > 0) req.params = params; const timeout = (engine && engine !== 'tesseract') ? 120_000 : CAPTURE_TIMEOUT; const resp = await this.sendWithRetry(req, timeout); return { diff --git a/src/game/ScreenReader.ts b/src/game/ScreenReader.ts index 94e1afb..0eb643b 100644 --- a/src/game/ScreenReader.ts +++ b/src/game/ScreenReader.ts @@ -1,7 +1,7 @@ import { mkdir } from 'fs/promises'; import { join } from 'path'; import { logger } from '../util/logger.js'; -import { OcrDaemon, type OcrResponse, type OcrEngine, type OcrPreprocess, type DiffOcrResponse, type TemplateMatchResult } from './OcrDaemon.js'; +import { OcrDaemon, type OcrResponse, type OcrEngine, type OcrPreprocess, type DiffOcrParams, type DiffOcrResponse, type TemplateMatchResult } from './OcrDaemon.js'; import { GridReader, type GridLayout, type CellCoord } from './GridReader.js'; import type { Region } from '../types.js'; @@ -9,11 +9,24 @@ function elapsed(start: number): string { return `${(performance.now() - start).toFixed(0)}ms`; } +export interface OcrSettings { + engine: OcrEngine; + screenPreprocess: OcrPreprocess; + tooltipPreprocess: OcrPreprocess; + tooltipParams: DiffOcrParams; + saveDebugImages: boolean; +} + export class ScreenReader { private daemon = new OcrDaemon(); readonly grid = new GridReader(this.daemon); - debugOcrEngine: OcrEngine = 'tesseract'; - debugPreprocess: OcrPreprocess = 'bgsub'; + settings: OcrSettings = { + engine: 'easyocr', + screenPreprocess: 'none', + tooltipPreprocess: 'bgsub', + tooltipParams: {}, + saveDebugImages: true, + }; // ── Screenshot capture ────────────────────────────────────────────── @@ -147,19 +160,23 @@ export class ScreenReader { fuzzy: boolean = false, ): Promise<{ x: number; y: number } | null> { const t = performance.now(); - const result = await this.daemon.ocr(); + const { engine, screenPreprocess } = this.settings; + const pp = screenPreprocess !== 'none' ? screenPreprocess : undefined; + const result = await this.daemon.ocr(undefined, engine, pp); const pos = this.findWordInOcrResult(result, searchText, fuzzy); if (pos) { - logger.info({ searchText, x: pos.x, y: pos.y, totalMs: elapsed(t) }, 'Found text on screen'); + logger.info({ searchText, engine, x: pos.x, y: pos.y, totalMs: elapsed(t) }, 'Found text on screen'); } else { - logger.info({ searchText, totalMs: elapsed(t) }, 'Text not found on screen'); + logger.info({ searchText, engine, totalMs: elapsed(t) }, 'Text not found on screen'); } return pos; } async readFullScreen(): Promise { - const result = await this.daemon.ocr(); + const { engine, screenPreprocess } = this.settings; + const pp = screenPreprocess !== 'none' ? screenPreprocess : undefined; + const result = await this.daemon.ocr(undefined, engine, pp); return result.text; } @@ -170,7 +187,9 @@ export class ScreenReader { searchText: string, ): Promise<{ x: number; y: number } | null> { const t = performance.now(); - const result = await this.daemon.ocr(region); + const { engine, screenPreprocess } = this.settings; + const pp = screenPreprocess !== 'none' ? screenPreprocess : undefined; + const result = await this.daemon.ocr(region, engine, pp); const pos = this.findWordInOcrResult(result, searchText); if (pos) { @@ -185,7 +204,9 @@ export class ScreenReader { } async readRegionText(region: Region): Promise { - const result = await this.daemon.ocr(region); + const { engine, screenPreprocess } = this.settings; + const pp = screenPreprocess !== 'none' ? screenPreprocess : undefined; + const result = await this.daemon.ocr(region, engine, pp); return result.text; } @@ -201,7 +222,9 @@ export class ScreenReader { } async diffOcr(savePath?: string, region?: Region): Promise { - return this.daemon.diffOcr(savePath, region); + const { engine, tooltipPreprocess, tooltipParams } = this.settings; + const pp = tooltipPreprocess !== 'none' ? tooltipPreprocess : undefined; + return this.daemon.diffOcr(savePath, region, engine, pp, tooltipParams); } // ── Template matching ────────────────────────────────────────────── @@ -238,43 +261,6 @@ export class ScreenReader { logger.info({ path, region }, 'Region screenshot saved'); } - // ── Debug OCR (alternative engines) ───────────────────────────────── - - async debugDiffOcr(savePath?: string, region?: Region): Promise { - const t = performance.now(); - const result = await this.daemon.diffOcr(savePath, region, this.debugOcrEngine, this.debugPreprocess); - logger.info({ engine: this.debugOcrEngine, preprocess: this.debugPreprocess, ms: elapsed(t) }, 'debugDiffOcr'); - return result; - } - - async debugOcr(region?: Region): Promise { - const t = performance.now(); - const result = await this.daemon.ocr(region, this.debugOcrEngine, this.debugPreprocess); - logger.info({ engine: this.debugOcrEngine, preprocess: this.debugPreprocess, ms: elapsed(t) }, 'debugOcr'); - return result; - } - - async debugReadFullScreen(): Promise { - const result = await this.daemon.ocr(undefined, this.debugOcrEngine, this.debugPreprocess); - return result.text; - } - - async debugFindTextOnScreen( - searchText: string, - fuzzy: boolean = false, - ): Promise<{ x: number; y: number } | null> { - const t = performance.now(); - const result = await this.daemon.ocr(undefined, this.debugOcrEngine, this.debugPreprocess); - const pos = this.findWordInOcrResult(result, searchText, fuzzy); - - if (pos) { - logger.info({ searchText, engine: this.debugOcrEngine, preprocess: this.debugPreprocess, x: pos.x, y: pos.y, totalMs: elapsed(t) }, 'debugFindText found'); - } else { - logger.info({ searchText, engine: this.debugOcrEngine, preprocess: this.debugPreprocess, totalMs: elapsed(t) }, 'debugFindText not found'); - } - return pos; - } - // ── Lifecycle ─────────────────────────────────────────────────────── async dispose(): Promise { diff --git a/src/server/index.html b/src/server/index.html index 7dfe859..c6a9afb 100644 --- a/src/server/index.html +++ b/src/server/index.html @@ -452,16 +452,10 @@
Debug Tools
- - + @@ -555,6 +549,115 @@
+ + diff --git a/src/server/routes/debug.ts b/src/server/routes/debug.ts index 190976c..fe2bc65 100644 --- a/src/server/routes/debug.ts +++ b/src/server/routes/debug.ts @@ -5,7 +5,8 @@ import { sleep } from '../../util/sleep.js'; import { GRID_LAYOUTS } from '../../game/GridReader.js'; import type { Bot } from '../../bot/Bot.js'; import type { Server } from '../Server.js'; -import type { OcrEngine, OcrPreprocess } from '../../game/OcrDaemon.js'; +import type { OcrEngine, OcrPreprocess, DiffOcrParams } from '../../game/OcrDaemon.js'; +import type { OcrSettings } from '../../game/ScreenReader.js'; export function debugRoutes(bot: Bot, server: Server): Router { const router = Router(); @@ -15,39 +16,23 @@ export function debugRoutes(bot: Bot, server: Server): Router { return false; }; - // --- Sync: OCR engine/preprocess selection --- + // --- Sync: OCR settings --- - router.get('/ocr-engine', (req, res) => { + router.get('/ocr-settings', (req, res) => { if (notReady(req, res)) return; - res.json({ ok: true, engine: bot.screenReader.debugOcrEngine }); + res.json({ ok: true, ...bot.screenReader.settings }); }); - router.post('/ocr-engine', (req, res) => { + router.post('/ocr-settings', (req, res) => { if (notReady(req, res)) return; - const { engine } = req.body as { engine: string }; - if (!['tesseract', 'easyocr', 'paddleocr'].includes(engine)) { - res.status(400).json({ error: 'Invalid engine. Must be tesseract, easyocr, or paddleocr.' }); - return; - } - bot.screenReader.debugOcrEngine = engine as OcrEngine; - server.broadcastLog('info', `OCR engine set to: ${engine}`); - res.json({ ok: true }); - }); - - router.get('/ocr-preprocess', (req, res) => { - if (notReady(req, res)) return; - res.json({ ok: true, preprocess: bot.screenReader.debugPreprocess }); - }); - - router.post('/ocr-preprocess', (req, res) => { - if (notReady(req, res)) return; - const { preprocess } = req.body as { preprocess: string }; - if (!['none', 'bgsub', 'tophat'].includes(preprocess)) { - res.status(400).json({ error: 'Invalid preprocess. Must be none, bgsub, or tophat.' }); - return; - } - bot.screenReader.debugPreprocess = preprocess as OcrPreprocess; - server.broadcastLog('info', `OCR preprocess set to: ${preprocess}`); + const body = req.body as Partial; + const s = bot.screenReader.settings; + if (body.engine && ['tesseract', 'easyocr', 'paddleocr'].includes(body.engine)) s.engine = body.engine; + if (body.screenPreprocess && ['none', 'bgsub', 'tophat'].includes(body.screenPreprocess)) s.screenPreprocess = body.screenPreprocess; + if (body.tooltipPreprocess && ['none', 'bgsub', 'tophat'].includes(body.tooltipPreprocess)) s.tooltipPreprocess = body.tooltipPreprocess; + if (body.tooltipParams != null) s.tooltipParams = body.tooltipParams; + if (body.saveDebugImages != null) s.saveDebugImages = body.saveDebugImages; + server.broadcastLog('info', `OCR settings updated: engine=${s.engine} screen=${s.screenPreprocess} tooltip=${s.tooltipPreprocess}`); res.json({ ok: true }); }); @@ -68,8 +53,8 @@ export function debugRoutes(bot: Bot, server: Server): Router { router.post('/ocr', (req, res) => { if (notReady(req, res)) return; res.json({ ok: true }); - bot.screenReader.debugReadFullScreen().then(text => { - server.broadcastLog('info', `OCR [${bot.screenReader.debugOcrEngine}] (${text.length} chars): ${text.substring(0, 200)}`); + bot.screenReader.readFullScreen().then(text => { + server.broadcastLog('info', `OCR [${bot.screenReader.settings.engine}] (${text.length} chars): ${text.substring(0, 200)}`); server.broadcastDebug('ocr', { text }); }).catch(err => { logger.error({ err }, 'Debug OCR failed'); @@ -82,11 +67,11 @@ export function debugRoutes(bot: Bot, server: Server): Router { const { text } = req.body as { text: string }; if (!text) { res.status(400).json({ error: 'Missing text parameter' }); return; } res.json({ ok: true }); - bot.screenReader.debugFindTextOnScreen(text).then(pos => { + bot.screenReader.findTextOnScreen(text).then(pos => { if (pos) { - server.broadcastLog('info', `Found "${text}" at (${pos.x}, ${pos.y}) [${bot.screenReader.debugOcrEngine}]`); + server.broadcastLog('info', `Found "${text}" at (${pos.x}, ${pos.y}) [${bot.screenReader.settings.engine}]`); } else { - server.broadcastLog('warn', `"${text}" not found on screen [${bot.screenReader.debugOcrEngine}]`); + server.broadcastLog('warn', `"${text}" not found on screen [${bot.screenReader.settings.engine}]`); } server.broadcastDebug('find-text', { searchText: text, found: !!pos, position: pos }); }).catch(err => { @@ -101,14 +86,14 @@ export function debugRoutes(bot: Bot, server: Server): Router { if (!text) { res.status(400).json({ error: 'Missing text parameter' }); return; } res.json({ ok: true }); (async () => { - const pos = await bot.screenReader.debugFindTextOnScreen(text, !!fuzzy); + const pos = await bot.screenReader.findTextOnScreen(text, !!fuzzy); if (pos) { await bot.gameController.focusGame(); await bot.gameController.leftClickAt(pos.x, pos.y); - server.broadcastLog('info', `Found "${text}" at (${pos.x}, ${pos.y}) and clicked [${bot.screenReader.debugOcrEngine}]`); + server.broadcastLog('info', `Found "${text}" at (${pos.x}, ${pos.y}) and clicked [${bot.screenReader.settings.engine}]`); server.broadcastDebug('find-and-click', { searchText: text, found: true, position: pos }); } else { - server.broadcastLog('warn', `"${text}" not found on screen [${bot.screenReader.debugOcrEngine}]`); + server.broadcastLog('warn', `"${text}" not found on screen [${bot.screenReader.settings.engine}]`); server.broadcastDebug('find-and-click', { searchText: text, found: false, position: null }); } })().catch(err => { @@ -234,7 +219,8 @@ export function debugRoutes(bot: Bot, server: Server): Router { ]; await bot.gameController.focusGame(); - await mkdir('items', { recursive: true }); + const saveImages = bot.screenReader.settings.saveDebugImages; + if (saveImages) await mkdir('items', { recursive: true }); const tooltips: Array<{ row: number; col: number; label: string; text: string }> = []; const ts = Date.now(); const reg = result.layout.region; @@ -245,7 +231,7 @@ export function debugRoutes(bot: Bot, server: Server): Router { bot.gameController.moveMouseInstant(reg.x + reg.width + 50, reg.y + reg.height / 2); await sleep(50); await bot.screenReader.snapshot(); - await bot.screenReader.saveScreenshot(`items/${ts}_snapshot.png`); + if (saveImages) await bot.screenReader.saveScreenshot(`items/${ts}_snapshot.png`); await sleep(200); for (const cell of hoverCells) { @@ -257,7 +243,7 @@ export function debugRoutes(bot: Bot, server: Server): Router { await sleep(50); const afterMove = performance.now(); - const imgPath = `items/${ts}_${cell.row}-${cell.col}.png`; + const imgPath = saveImages ? `items/${ts}_${cell.row}-${cell.col}.png` : undefined; const diff = await bot.screenReader.diffOcr(imgPath); const afterOcr = performance.now(); const text = diff.text.trim(); diff --git a/tools/OcrDaemon/Daemon.cs b/tools/OcrDaemon/Daemon.cs index 6cfb898..7a03587 100644 --- a/tools/OcrDaemon/Daemon.cs +++ b/tools/OcrDaemon/Daemon.cs @@ -108,6 +108,8 @@ static class Daemon var engine = request.Engine ?? "tesseract"; var preprocess = request.Preprocess ?? "none"; + var kernelSize = request.Params?.KernelSize ?? 41; + // No preprocess + tesseract = original fast path if (engine == "tesseract" && preprocess == "none") return ocrHandler.HandleOcr(request); @@ -119,7 +121,7 @@ static class Daemon Bitmap processed; if (preprocess == "tophat") { - processed = ImagePreprocessor.PreprocessForOcr(bitmap); + processed = ImagePreprocessor.PreprocessForOcr(bitmap, kernelSize: kernelSize); } else if (preprocess == "bgsub") { @@ -152,16 +154,24 @@ static class Daemon private static object HandleDiffOcrPipeline(OcrHandler ocrHandler, PythonOcrBridge pythonBridge, Request request) { var engine = request.Engine ?? "tesseract"; - var preprocess = request.Preprocess ?? "bgsub"; var isPythonEngine = engine is "easyocr" or "paddleocr"; + var p = request.Params?.Clone() ?? new DiffOcrParams(); + if (request.Threshold > 0) p.DiffThresh = request.Threshold; - // No engine override + no preprocess override = original Tesseract path (supports test/tune params) - if (engine == "tesseract" && request.Preprocess == null) + // Determine preprocess mode: explicit request.Preprocess > params.UseBackgroundSub > default "bgsub" + string preprocess; + if (request.Preprocess != null) + preprocess = request.Preprocess; + else if (request.Params != null) + preprocess = p.UseBackgroundSub ? "bgsub" : "tophat"; + else + preprocess = "bgsub"; + + // No engine override + no preprocess override + no params = original Tesseract path + if (engine == "tesseract" && request.Preprocess == null && request.Params == null) return ocrHandler.HandleDiffOcr(request); var sw = System.Diagnostics.Stopwatch.StartNew(); - var p = new DiffOcrParams(); - if (request.Threshold > 0) p.DiffThresh = request.Threshold; var cropResult = ocrHandler.DiffCrop(request, p); if (cropResult == null) @@ -174,13 +184,14 @@ static class Daemon Bitmap processed; if (preprocess == "bgsub") { - int upscale = isPythonEngine ? 1 : 2; + int upscale = isPythonEngine ? 1 : p.Upscale; processed = ImagePreprocessor.PreprocessWithBackgroundSub( - cropped, refCropped, dimPercentile: 40, textThresh: 60, upscale: upscale, softThreshold: false); + cropped, refCropped, dimPercentile: p.DimPercentile, textThresh: p.TextThresh, + upscale: upscale, softThreshold: p.SoftThreshold); } else if (preprocess == "tophat") { - processed = ImagePreprocessor.PreprocessForOcr(cropped); + processed = ImagePreprocessor.PreprocessForOcr(cropped, kernelSize: p.KernelSize); } else // "none" { diff --git a/tools/OcrDaemon/Models.cs b/tools/OcrDaemon/Models.cs index 03ef8c4..51357e7 100644 --- a/tools/OcrDaemon/Models.cs +++ b/tools/OcrDaemon/Models.cs @@ -45,6 +45,9 @@ class Request [JsonPropertyName("preprocess")] public string? Preprocess { get; set; } + + [JsonPropertyName("params")] + public DiffOcrParams? Params { get; set; } } class RegionRect diff --git a/tools/OcrDaemon/tessdata/cases.json b/tools/OcrDaemon/tessdata/cases.json index 0c4c99f..a537042 100644 --- a/tools/OcrDaemon/tessdata/cases.json +++ b/tools/OcrDaemon/tessdata/cases.json @@ -49,5 +49,31 @@ "Asking Price:", "35x Divine Orb" ] + }, + { + "id": "raphpith1", + "image": "images/raphpith.png", + "fullImage": "images/raphpith-snapshot.png", + "expected": [ + "RATHPITH GLOBE", + "SACRED Focus", + "Focus", + "Quality: +20%", + "Energy Shield: 104", + "Requires: Level 75", + "16% Increased Energy Shield", + "+24 To Maximum Mana", + "+5% to all Elemental Resistances", + "NON-CHANNELLING SPELLS HAVE 3% INCREASED MAGNITUDE OF AlLMENTS PER 100 MAXIMUM LIFE", + "NON-CHANNELLING SPELLS DEAL 6% INCREASED DAMAGE PER 100 MAXIMUM MANA", + "+72 TO MAXIMUM LIFE", + "NON-CHANNELLING SPELLS HAVE 3% INCREASED CRITICAL HIT CHANCE PER 100 MAXIMUM LIFE", + "NON-CHANNELLING SPELLS DEAL 6% INCREASED DAMACE PER 100 MAXIMUM LIFE", + "Twice Corrupted", + "THE VAAL EMPTIED THEIR SLAVES OF BEATING HEARTS", + "AND LEFT A MOUNTAIN OF TWITCHING DEAD", + "Asking Price:", + "120x Divine Orb" + ] } ] \ No newline at end of file diff --git a/tools/OcrDaemon/tessdata/images/raphpith-snapshot.png b/tools/OcrDaemon/tessdata/images/raphpith-snapshot.png new file mode 100644 index 0000000..9adc93c Binary files /dev/null and b/tools/OcrDaemon/tessdata/images/raphpith-snapshot.png differ diff --git a/tools/OcrDaemon/tessdata/images/raphpith.png b/tools/OcrDaemon/tessdata/images/raphpith.png new file mode 100644 index 0000000..155ca9d Binary files /dev/null and b/tools/OcrDaemon/tessdata/images/raphpith.png differ