work on new crop

This commit is contained in:
Boki 2026-02-12 22:07:54 -05:00
parent 9845e7f9bf
commit a7fab55d44
22 changed files with 975 additions and 10 deletions

View file

@ -104,6 +104,22 @@ export interface DiffOcrParams {
ocr?: OcrParams;
}
export type TooltipMethod = 'diff' | 'edge';
export interface EdgeCropParams {
cannyLow?: number;
cannyHigh?: number;
minLineLength?: number;
roiSize?: number;
densityThreshold?: number;
ocrPad?: number;
}
export interface EdgeOcrParams {
crop?: EdgeCropParams;
ocr?: OcrParams;
}
interface DaemonRequest {
cmd: string;
region?: Region;
@ -116,6 +132,9 @@ interface DaemonRequest {
engine?: string;
preprocess?: string;
params?: DiffOcrParams;
edgeParams?: EdgeOcrParams;
cursorX?: number;
cursorY?: number;
}
interface DaemonResponse {
@ -236,6 +255,24 @@ export class OcrDaemon {
};
}
async edgeOcr(savePath?: string, region?: Region, engine?: OcrEngine, preprocess?: OcrPreprocess, edgeParams?: EdgeOcrParams, cursorX?: number, cursorY?: number): Promise<DiffOcrResponse> {
const req: DaemonRequest = { cmd: 'edge-ocr' };
if (savePath) req.path = savePath;
if (region) req.region = region;
if (engine && engine !== 'tesseract') req.engine = engine;
if (preprocess) req.preprocess = preprocess;
if (edgeParams && Object.keys(edgeParams).length > 0) req.edgeParams = edgeParams;
if (cursorX != null) req.cursorX = cursorX;
if (cursorY != null) req.cursorY = cursorY;
const timeout = (engine && engine !== 'tesseract') ? 120_000 : CAPTURE_TIMEOUT;
const resp = await this.sendWithRetry(req, timeout);
return {
text: resp.text ?? '',
lines: resp.lines ?? [],
region: resp.region,
};
}
async saveScreenshot(path: string, region?: Region): Promise<void> {
const req: DaemonRequest = { cmd: 'screenshot', path };
if (region) req.region = region;

View file

@ -1,7 +1,7 @@
import { mkdir } from 'fs/promises';
import { join } from 'path';
import { logger } from '../util/logger.js';
import { OcrDaemon, type OcrResponse, type OcrEngine, type OcrPreprocess, type DiffOcrParams, type DiffCropParams, type OcrParams, type DiffOcrResponse, type TemplateMatchResult } from './OcrDaemon.js';
import { OcrDaemon, type OcrResponse, type OcrEngine, type OcrPreprocess, type DiffOcrParams, type DiffCropParams, type OcrParams, type DiffOcrResponse, type TemplateMatchResult, type TooltipMethod, type EdgeOcrParams } from './OcrDaemon.js';
import { GridReader, type GridLayout, type CellCoord } from './GridReader.js';
import type { Region } from '../types.js';
@ -13,7 +13,9 @@ export interface OcrSettings {
engine: OcrEngine;
screenPreprocess: OcrPreprocess;
tooltipPreprocess: OcrPreprocess;
tooltipMethod: TooltipMethod;
tooltipParams: DiffOcrParams;
edgeParams: EdgeOcrParams;
saveDebugImages: boolean;
}
@ -24,10 +26,15 @@ export class ScreenReader {
engine: 'easyocr',
screenPreprocess: 'none',
tooltipPreprocess: 'tophat',
tooltipMethod: 'diff',
tooltipParams: {
crop: { diffThresh: 10 },
ocr: { kernelSize: 21 },
},
edgeParams: {
crop: {},
ocr: { kernelSize: 21 },
},
saveDebugImages: true,
};
@ -235,12 +242,16 @@ export class ScreenReader {
// ── Snapshot / Diff-OCR (for tooltip reading) ──────────────────────
async snapshot(): Promise<void> {
if (this.settings.tooltipMethod === 'edge') return; // no reference frame needed
await this.daemon.snapshot();
}
async diffOcr(savePath?: string, region?: Region): Promise<DiffOcrResponse> {
const { engine, tooltipPreprocess, tooltipParams } = this.settings;
const { engine, tooltipPreprocess, tooltipMethod, tooltipParams, edgeParams } = this.settings;
const pp = tooltipPreprocess !== 'none' ? tooltipPreprocess : undefined;
if (tooltipMethod === 'edge') {
return this.daemon.edgeOcr(savePath, region, engine, pp, edgeParams);
}
return this.daemon.diffOcr(savePath, region, engine, pp, tooltipParams);
}

View file

@ -581,6 +581,14 @@
</div>
</div>
<div style="margin-bottom:16px">
<div class="section-title" style="margin-bottom:6px">Tooltip Method</div>
<select id="ocrTooltipMethod" class="mode-select" style="width:100%" onchange="toggleOcrSections()">
<option value="diff">Diff Detection</option>
<option value="edge">Edge Detection</option>
</select>
</div>
<div style="margin-bottom:16px">
<div class="section-title" style="margin-bottom:6px">Tooltip Preprocess</div>
<select id="ocrTooltipPreprocess" class="mode-select" style="width:100%" onchange="toggleOcrSections()">
@ -590,8 +598,8 @@
</select>
</div>
<div style="margin-bottom:16px">
<div class="section-title" style="margin-bottom:6px">Crop Detection</div>
<div id="diffCropParams" style="margin-bottom:16px">
<div class="section-title" style="margin-bottom:6px">Crop Detection (Diff)</div>
<div class="settings-grid">
<div class="setting-row">
<label>Diff Threshold</label>
@ -608,6 +616,32 @@
</div>
</div>
<div id="edgeCropParams" style="margin-bottom:16px;display:none">
<div class="section-title" style="margin-bottom:6px">Crop Detection (Edge)</div>
<div class="settings-grid">
<div class="setting-row">
<label>Canny Low</label>
<input type="number" id="ocrCannyLow" value="50" />
</div>
<div class="setting-row">
<label>Canny High</label>
<input type="number" id="ocrCannyHigh" value="150" />
</div>
<div class="setting-row">
<label>Min Line Length</label>
<input type="number" id="ocrMinLineLength" value="100" />
</div>
<div class="setting-row">
<label>ROI Size</label>
<input type="number" id="ocrRoiSize" value="1400" />
</div>
<div class="setting-row">
<label>Density Threshold</label>
<input type="number" id="ocrDensityThreshold" value="0.15" step="0.01" />
</div>
</div>
</div>
<div style="margin-bottom:16px">
<div class="section-title" style="margin-bottom:6px">OCR Processing</div>
<div class="settings-grid">
@ -1160,7 +1194,24 @@
const screenPp = document.getElementById('ocrScreenPreprocess').value;
document.getElementById('screenTophatParams').style.display = screenPp === 'tophat' ? '' : 'none';
const tooltipPp = document.getElementById('ocrTooltipPreprocess').value;
const method = document.getElementById('ocrTooltipMethod').value;
const isEdge = method === 'edge';
// Show/hide method-specific crop params
document.getElementById('diffCropParams').style.display = isEdge ? 'none' : '';
document.getElementById('edgeCropParams').style.display = isEdge ? '' : 'none';
// Disable bgsub when edge (no reference frame)
const ppSelect = document.getElementById('ocrTooltipPreprocess');
const bgsubOption = ppSelect.querySelector('option[value="bgsub"]');
if (isEdge) {
bgsubOption.disabled = true;
if (ppSelect.value === 'bgsub') ppSelect.value = 'tophat';
} else {
bgsubOption.disabled = false;
}
const tooltipPp = ppSelect.value;
document.getElementById('tooltipBgsubParams').style.display = tooltipPp === 'bgsub' ? '' : 'none';
document.getElementById('tooltipTophatParams').style.display = tooltipPp === 'tophat' ? '' : 'none';
@ -1175,11 +1226,20 @@
if (!data.ok) return;
document.getElementById('ocrEngine').value = data.engine || 'easyocr';
document.getElementById('ocrScreenPreprocess').value = data.screenPreprocess || 'none';
document.getElementById('ocrTooltipMethod').value = data.tooltipMethod || 'diff';
document.getElementById('ocrTooltipPreprocess').value = data.tooltipPreprocess || 'tophat';
document.getElementById('ocrSaveDebugImages').checked = data.saveDebugImages !== false;
const tp = data.tooltipParams || {};
const crop = tp.crop || {};
const ocr = tp.ocr || {};
// Edge params
const ep = data.edgeParams || {};
const edgeCrop = ep.crop || {};
document.getElementById('ocrCannyLow').value = edgeCrop.cannyLow ?? 50;
document.getElementById('ocrCannyHigh').value = edgeCrop.cannyHigh ?? 150;
document.getElementById('ocrMinLineLength').value = edgeCrop.minLineLength ?? 100;
document.getElementById('ocrRoiSize').value = edgeCrop.roiSize ?? 1400;
document.getElementById('ocrDensityThreshold').value = edgeCrop.densityThreshold ?? 0.15;
document.getElementById('ocrDiffThresh').value = crop.diffThresh ?? 20;
document.getElementById('ocrMaxGap').value = crop.maxGap ?? 20;
document.getElementById('ocrTrimCutoff').value = crop.trimCutoff ?? 0.4;
@ -1237,11 +1297,29 @@
if (!isNaN(wt)) tooltipParams.ocr.widthThs = wt;
}
const tooltipMethod = document.getElementById('ocrTooltipMethod').value;
const edgeParams = {
crop: {
cannyLow: parseInt(document.getElementById('ocrCannyLow').value) || 50,
cannyHigh: parseInt(document.getElementById('ocrCannyHigh').value) || 150,
minLineLength: parseInt(document.getElementById('ocrMinLineLength').value) || 100,
roiSize: parseInt(document.getElementById('ocrRoiSize').value) || 1400,
densityThreshold: parseFloat(document.getElementById('ocrDensityThreshold').value) || 0.15,
},
ocr: {
upscale: parseInt(document.getElementById('ocrUpscale').value) || 2,
kernelSize: parseInt(document.getElementById('ocrTooltipKernel').value) || 21,
},
};
const body = {
engine,
screenPreprocess: screenPp,
tooltipMethod,
tooltipPreprocess: tooltipPp,
tooltipParams,
edgeParams,
saveDebugImages: document.getElementById('ocrSaveDebugImages').checked,
};

View file

@ -5,7 +5,7 @@ import { sleep } from '../../util/sleep.js';
import { GRID_LAYOUTS } from '../../game/GridReader.js';
import type { Bot } from '../../bot/Bot.js';
import type { Server } from '../Server.js';
import type { OcrEngine, OcrPreprocess, DiffOcrParams } from '../../game/OcrDaemon.js';
import type { OcrEngine, OcrPreprocess, DiffOcrParams, TooltipMethod, EdgeOcrParams } from '../../game/OcrDaemon.js';
import type { OcrSettings } from '../../game/ScreenReader.js';
export function debugRoutes(bot: Bot, server: Server): Router {
@ -30,7 +30,9 @@ export function debugRoutes(bot: Bot, server: Server): Router {
if (body.engine && ['tesseract', 'easyocr', 'paddleocr'].includes(body.engine)) s.engine = body.engine;
if (body.screenPreprocess && ['none', 'bgsub', 'tophat'].includes(body.screenPreprocess)) s.screenPreprocess = body.screenPreprocess;
if (body.tooltipPreprocess && ['none', 'bgsub', 'tophat'].includes(body.tooltipPreprocess)) s.tooltipPreprocess = body.tooltipPreprocess;
if (body.tooltipMethod && ['diff', 'edge'].includes(body.tooltipMethod)) s.tooltipMethod = body.tooltipMethod;
if (body.tooltipParams != null) s.tooltipParams = body.tooltipParams;
if (body.edgeParams != null) s.edgeParams = body.edgeParams;
if (body.saveDebugImages != null) s.saveDebugImages = body.saveDebugImages;
server.broadcastLog('info', `OCR settings updated: engine=${s.engine} screen=${s.screenPreprocess} tooltip=${s.tooltipPreprocess}`);
res.json({ ok: true });