working on crop

This commit is contained in:
Boki 2026-02-12 17:48:16 -05:00
parent 93e2234c4e
commit f74e3e1c85
12 changed files with 1135 additions and 220 deletions

View file

@ -214,24 +214,39 @@ export class Bot extends EventEmitter {
this.gameController, this.screenReader, this.logWatcher, this.config,
);
// /hideout + waitForAreaTransition
this.emit('log', 'info', 'Sending /hideout command...');
await this.gameController.focusGame();
const arrivedHome = await this.inventoryManager.waitForAreaTransition(
this.config.travelTimeoutMs,
() => this.gameController.goToHideout(),
);
if (arrivedHome) {
// Pre-warm OCR daemon + EasyOCR model in background (don't await yet)
const ocrWarmup = this.screenReader.warmup().catch(err => {
logger.warn({ err }, 'OCR warmup failed (will retry on first use)');
});
// Check if already in hideout from log tail
const alreadyInHideout = this.logWatcher.currentArea.toLowerCase().includes('hideout');
if (alreadyInHideout) {
logger.info({ area: this.logWatcher.currentArea }, 'Already in hideout, skipping /hideout command');
this.inventoryManager.setLocation(true);
this.logWatcher.currentArea = 'Hideout';
} else {
this.inventoryManager.setLocation(true);
this.logWatcher.currentArea = 'Hideout';
logger.warn('Timed out waiting for hideout transition on startup (may already be in hideout)');
this.emit('log', 'info', 'Sending /hideout command...');
await this.gameController.focusGame();
const arrivedHome = await this.inventoryManager.waitForAreaTransition(
this.config.travelTimeoutMs,
() => this.gameController.goToHideout(),
);
if (arrivedHome) {
this.inventoryManager.setLocation(true);
this.logWatcher.currentArea = 'Hideout';
} else {
this.inventoryManager.setLocation(true);
this.logWatcher.currentArea = 'Hideout';
logger.warn('Timed out waiting for hideout transition on startup (may already be in hideout)');
}
}
this.state = 'IN_HIDEOUT';
this.emit('log', 'info', 'In hideout, ready to trade');
// Ensure OCR warmup finished before proceeding to inventory scan
await ocrWarmup;
// Clear leftover inventory
this.emit('log', 'info', 'Checking inventory for leftover items...');
await this.inventoryManager.clearToStash();

View file

@ -71,16 +71,37 @@ export type OcrEngine = 'tesseract' | 'easyocr' | 'paddleocr';
export type OcrPreprocess = 'none' | 'bgsub' | 'tophat';
export interface DiffOcrParams {
export interface DiffCropParams {
diffThresh?: number;
rowThreshDiv?: number;
colThreshDiv?: number;
maxGap?: number;
trimCutoff?: number;
ocrPad?: number;
}
export interface OcrParams {
kernelSize?: number;
upscale?: number;
useBackgroundSub?: boolean;
dimPercentile?: number;
textThresh?: number;
softThreshold?: boolean;
useBackgroundSub?: boolean;
usePerLineOcr?: boolean;
lineGapTolerance?: number;
linePadY?: number;
psm?: number;
mergeGap?: number;
linkThreshold?: number;
textThreshold?: number;
lowText?: number;
widthThs?: number;
paragraph?: boolean;
}
export interface DiffOcrParams {
crop?: DiffCropParams;
ocr?: OcrParams;
}
interface DaemonRequest {
@ -236,6 +257,11 @@ export class OcrDaemon {
};
}
/** Eagerly spawn the daemon process so it's ready for the first real request. */
async warmup(): Promise<void> {
await this.ensureRunning();
}
async stop(): Promise<void> {
this.stopped = true;
if (this.proc) {

View file

@ -1,7 +1,7 @@
import { mkdir } from 'fs/promises';
import { join } from 'path';
import { logger } from '../util/logger.js';
import { OcrDaemon, type OcrResponse, type OcrEngine, type OcrPreprocess, type DiffOcrParams, type DiffOcrResponse, type TemplateMatchResult } from './OcrDaemon.js';
import { OcrDaemon, type OcrResponse, type OcrEngine, type OcrPreprocess, type DiffOcrParams, type DiffCropParams, type OcrParams, type DiffOcrResponse, type TemplateMatchResult } from './OcrDaemon.js';
import { GridReader, type GridLayout, type CellCoord } from './GridReader.js';
import type { Region } from '../types.js';
@ -23,11 +23,28 @@ export class ScreenReader {
settings: OcrSettings = {
engine: 'easyocr',
screenPreprocess: 'none',
tooltipPreprocess: 'bgsub',
tooltipParams: {},
tooltipPreprocess: 'tophat',
tooltipParams: {
crop: { diffThresh: 10 },
ocr: { kernelSize: 21 },
},
saveDebugImages: true,
};
/**
* Eagerly spawn the OCR daemon and warm up the EasyOCR model.
* Fire-and-forget a small OCR request so the Python model loads in the background.
*/
async warmup(): Promise<void> {
await this.daemon.warmup();
// Fire a small EasyOCR request to trigger Python model load
// Use a tiny 1×1 region to minimize work, we only care about loading the model
const { engine } = this.settings;
if (engine !== 'tesseract') {
await this.daemon.ocr({ x: 0, y: 0, width: 100, height: 100 }, engine);
}
}
// ── Screenshot capture ──────────────────────────────────────────────
async captureScreen(): Promise<Buffer> {

View file

@ -39,8 +39,12 @@ export class InventoryTracker {
}
}
// Record detected items
// Record detected items, filtering out impossibly large ones (max POE2 item = 2×4)
for (const item of items) {
if (item.w > 2 || item.h > 4) {
logger.warn({ row: item.row, col: item.col, w: item.w, h: item.h }, 'Ignoring oversized item (false positive)');
continue;
}
this.items.push({ row: item.row, col: item.col, w: item.w, h: item.h, postAction: defaultAction });
}

View file

@ -1,6 +1,6 @@
import { EventEmitter } from 'events';
import { watch } from 'chokidar';
import { createReadStream, statSync } from 'fs';
import { createReadStream, statSync, openSync, readSync, closeSync } from 'fs';
import { createInterface } from 'readline';
import { logger } from '../util/logger.js';
@ -32,6 +32,8 @@ export class ClientLogWatcher extends EventEmitter {
try {
const stats = statSync(this.logPath);
this.fileOffset = stats.size;
// Read tail of log to determine current area before we start watching
this.detectCurrentArea(stats.size);
} catch {
logger.warn({ path: this.logPath }, 'Log file not found yet, will watch for creation');
this.fileOffset = 0;
@ -47,7 +49,39 @@ export class ClientLogWatcher extends EventEmitter {
this.readNewLines();
});
logger.info({ path: this.logPath }, 'Watching Client.txt for game events');
logger.info({ path: this.logPath, currentArea: this.currentArea || '(unknown)' }, 'Watching Client.txt for game events');
}
/** Read the last chunk of the log file to determine the current area. */
private detectCurrentArea(fileSize: number): void {
const TAIL_BYTES = 8192;
const start = Math.max(0, fileSize - TAIL_BYTES);
const buf = Buffer.alloc(Math.min(TAIL_BYTES, fileSize));
const fd = openSync(this.logPath, 'r');
try {
readSync(fd, buf, 0, buf.length, start);
} finally {
closeSync(fd);
}
const tail = buf.toString('utf-8');
const lines = tail.split(/\r?\n/);
// Walk backwards to find the most recent area transition
for (let i = lines.length - 1; i >= 0; i--) {
const line = lines[i];
const sceneMatch = line.match(/\[SCENE\] Set Source \[(.+?)\]/);
if (sceneMatch && sceneMatch[1] !== '(null)') {
this.currentArea = sceneMatch[1];
logger.info({ area: this.currentArea }, 'Detected current area from log tail');
return;
}
const areaMatch = line.match(/You have entered (.+?)\.?$/);
if (areaMatch) {
this.currentArea = areaMatch[1];
logger.info({ area: this.currentArea }, 'Detected current area from log tail');
return;
}
}
}
private readNewLines(): void {

View file

@ -558,7 +558,7 @@
<div style="margin-bottom:16px">
<div class="section-title" style="margin-bottom:6px">Engine</div>
<select id="ocrEngine" class="mode-select" style="width:100%">
<select id="ocrEngine" class="mode-select" style="width:100%" onchange="toggleOcrSections()">
<option value="tesseract">Tesseract</option>
<option value="easyocr">EasyOCR</option>
<option value="paddleocr">PaddleOCR</option>
@ -582,12 +582,52 @@
</div>
<div style="margin-bottom:16px">
<div class="section-title" style="margin-bottom:6px">Tooltip OCR</div>
<div class="section-title" style="margin-bottom:6px">Tooltip Preprocess</div>
<select id="ocrTooltipPreprocess" class="mode-select" style="width:100%" onchange="toggleOcrSections()">
<option value="none">None</option>
<option value="bgsub">Background Subtraction</option>
<option value="tophat">TopHat</option>
</select>
</div>
<div style="margin-bottom:16px">
<div class="section-title" style="margin-bottom:6px">Crop Detection</div>
<div class="settings-grid">
<div class="setting-row">
<label>Diff Threshold</label>
<input type="number" id="ocrDiffThresh" value="20" />
</div>
<div class="setting-row">
<label>Max Gap</label>
<input type="number" id="ocrMaxGap" value="20" />
</div>
<div class="setting-row">
<label>Trim Cutoff</label>
<input type="number" id="ocrTrimCutoff" value="0.4" step="0.05" />
</div>
</div>
</div>
<div style="margin-bottom:16px">
<div class="section-title" style="margin-bottom:6px">OCR Processing</div>
<div class="settings-grid">
<div class="setting-row">
<label>Upscale</label>
<input type="number" id="ocrUpscale" value="2" />
</div>
<div class="setting-row">
<label>Merge Gap (px)</label>
<input type="number" id="ocrMergeGap" value="0" />
</div>
</div>
<div id="tooltipTophatParams" style="display:none;margin-top:8px">
<div class="settings-grid">
<div class="setting-row">
<label>Kernel Size</label>
<input type="number" id="ocrTooltipKernel" value="41" />
</div>
</div>
</div>
<div id="tooltipBgsubParams" style="display:none;margin-top:8px">
<div class="settings-grid">
<div class="setting-row">
@ -607,34 +647,24 @@
<span style="font-size:12px;color:#8b949e">Soft Threshold</span>
</div>
</div>
<div id="tooltipTophatParams" style="display:none;margin-top:8px">
<div id="easyocrParams" style="display:none;margin-top:8px">
<div class="settings-grid">
<div class="setting-row">
<label>Kernel Size</label>
<input type="number" id="ocrTooltipKernel" value="41" />
<label>Link Threshold</label>
<input type="number" id="ocrLinkThreshold" step="0.05" />
</div>
<div class="setting-row">
<label>Text Threshold</label>
<input type="number" id="ocrTextThreshold" step="0.05" />
</div>
<div class="setting-row">
<label>Low Text</label>
<input type="number" id="ocrLowText" step="0.05" />
</div>
<div class="setting-row">
<label>Width Threshold</label>
<input type="number" id="ocrWidthThs" step="0.05" />
</div>
</div>
</div>
</div>
<div style="margin-bottom:16px">
<div class="section-title" style="margin-bottom:6px">Tooltip Detection</div>
<div class="settings-grid">
<div class="setting-row">
<label>Diff Threshold</label>
<input type="number" id="ocrDiffThresh" value="20" />
</div>
<div class="setting-row">
<label>Max Gap</label>
<input type="number" id="ocrMaxGap" value="20" />
</div>
<div class="setting-row">
<label>Trim Cutoff</label>
<input type="number" id="ocrTrimCutoff" value="0.4" step="0.1" />
</div>
<div class="setting-row">
<label>Upscale</label>
<input type="number" id="ocrUpscale" value="2" />
</div>
</div>
</div>
@ -1133,6 +1163,9 @@
const tooltipPp = document.getElementById('ocrTooltipPreprocess').value;
document.getElementById('tooltipBgsubParams').style.display = tooltipPp === 'bgsub' ? '' : 'none';
document.getElementById('tooltipTophatParams').style.display = tooltipPp === 'tophat' ? '' : 'none';
const engine = document.getElementById('ocrEngine').value;
document.getElementById('easyocrParams').style.display = engine === 'easyocr' ? '' : 'none';
}
async function loadOcrSettings() {
@ -1142,18 +1175,24 @@
if (!data.ok) return;
document.getElementById('ocrEngine').value = data.engine || 'easyocr';
document.getElementById('ocrScreenPreprocess').value = data.screenPreprocess || 'none';
document.getElementById('ocrTooltipPreprocess').value = data.tooltipPreprocess || 'bgsub';
document.getElementById('ocrTooltipPreprocess').value = data.tooltipPreprocess || 'tophat';
document.getElementById('ocrSaveDebugImages').checked = data.saveDebugImages !== false;
const tp = data.tooltipParams || {};
document.getElementById('ocrDiffThresh').value = tp.diffThresh ?? 20;
document.getElementById('ocrMaxGap').value = tp.maxGap ?? 20;
document.getElementById('ocrTrimCutoff').value = tp.trimCutoff ?? 0.4;
document.getElementById('ocrUpscale').value = tp.upscale ?? 2;
document.getElementById('ocrDimPercentile').value = tp.dimPercentile ?? 40;
document.getElementById('ocrTextThresh').value = tp.textThresh ?? 60;
document.getElementById('ocrSoftThreshold').checked = !!tp.softThreshold;
document.getElementById('ocrScreenKernel').value = tp.kernelSize ?? 41;
document.getElementById('ocrTooltipKernel').value = tp.kernelSize ?? 41;
const crop = tp.crop || {};
const ocr = tp.ocr || {};
document.getElementById('ocrDiffThresh').value = crop.diffThresh ?? 20;
document.getElementById('ocrMaxGap').value = crop.maxGap ?? 20;
document.getElementById('ocrTrimCutoff').value = crop.trimCutoff ?? 0.4;
document.getElementById('ocrUpscale').value = ocr.upscale ?? 2;
document.getElementById('ocrMergeGap').value = ocr.mergeGap ?? 0;
document.getElementById('ocrTooltipKernel').value = ocr.kernelSize ?? 41;
document.getElementById('ocrDimPercentile').value = ocr.dimPercentile ?? 40;
document.getElementById('ocrTextThresh').value = ocr.textThresh ?? 60;
document.getElementById('ocrSoftThreshold').checked = !!ocr.softThreshold;
document.getElementById('ocrLinkThreshold').value = ocr.linkThreshold ?? '';
document.getElementById('ocrTextThreshold').value = ocr.textThreshold ?? '';
document.getElementById('ocrLowText').value = ocr.lowText ?? '';
document.getElementById('ocrWidthThs').value = ocr.widthThs ?? '';
toggleOcrSections();
} catch {}
}
@ -1161,29 +1200,45 @@
async function saveOcrSettings() {
const tooltipPp = document.getElementById('ocrTooltipPreprocess').value;
const screenPp = document.getElementById('ocrScreenPreprocess').value;
const engine = document.getElementById('ocrEngine').value;
const tooltipParams = {
diffThresh: parseInt(document.getElementById('ocrDiffThresh').value) || 20,
maxGap: parseInt(document.getElementById('ocrMaxGap').value) || 20,
trimCutoff: parseFloat(document.getElementById('ocrTrimCutoff').value) || 0.4,
upscale: parseInt(document.getElementById('ocrUpscale').value) || 2,
useBackgroundSub: tooltipPp === 'bgsub',
crop: {
diffThresh: parseInt(document.getElementById('ocrDiffThresh').value) || 20,
maxGap: parseInt(document.getElementById('ocrMaxGap').value) || 20,
trimCutoff: parseFloat(document.getElementById('ocrTrimCutoff').value) || 0.4,
},
ocr: {
upscale: parseInt(document.getElementById('ocrUpscale').value) || 2,
useBackgroundSub: tooltipPp === 'bgsub',
},
};
if (tooltipPp === 'bgsub') {
tooltipParams.dimPercentile = parseInt(document.getElementById('ocrDimPercentile').value) || 40;
tooltipParams.textThresh = parseInt(document.getElementById('ocrTextThresh').value) || 60;
tooltipParams.softThreshold = document.getElementById('ocrSoftThreshold').checked;
}
const mg = parseInt(document.getElementById('ocrMergeGap').value);
if (mg > 0) tooltipParams.ocr.mergeGap = mg;
if (tooltipPp === 'tophat') {
tooltipParams.kernelSize = parseInt(document.getElementById('ocrTooltipKernel').value) || 41;
tooltipParams.ocr.kernelSize = parseInt(document.getElementById('ocrTooltipKernel').value) || 21;
}
if (screenPp === 'tophat') {
tooltipParams.kernelSize = parseInt(document.getElementById('ocrScreenKernel').value) || 41;
if (tooltipPp === 'bgsub') {
tooltipParams.ocr.dimPercentile = parseInt(document.getElementById('ocrDimPercentile').value) || 40;
tooltipParams.ocr.textThresh = parseInt(document.getElementById('ocrTextThresh').value) || 60;
tooltipParams.ocr.softThreshold = document.getElementById('ocrSoftThreshold').checked;
}
if (engine === 'easyocr') {
const lt = parseFloat(document.getElementById('ocrLinkThreshold').value);
const tt = parseFloat(document.getElementById('ocrTextThreshold').value);
const low = parseFloat(document.getElementById('ocrLowText').value);
const wt = parseFloat(document.getElementById('ocrWidthThs').value);
if (!isNaN(lt)) tooltipParams.ocr.linkThreshold = lt;
if (!isNaN(tt)) tooltipParams.ocr.textThreshold = tt;
if (!isNaN(low)) tooltipParams.ocr.lowText = low;
if (!isNaN(wt)) tooltipParams.ocr.widthThs = wt;
}
const body = {
engine: document.getElementById('ocrEngine').value,
engine,
screenPreprocess: screenPp,
tooltipPreprocess: tooltipPp,
tooltipParams,