added easyOCR

This commit is contained in:
Boki 2026-02-12 01:04:19 -05:00
parent 37d6678577
commit 9f208b0606
27 changed files with 1780 additions and 112 deletions

View file

@ -1,5 +1,6 @@
import { EventEmitter } from 'events';
import { logger } from '../util/logger.js';
import type { LinkMode } from '../types.js';
import type { ConfigStore, SavedLink } from './ConfigStore.js';
export interface TradeLink {
@ -8,6 +9,7 @@ export interface TradeLink {
name: string;
label: string;
active: boolean;
mode: LinkMode;
addedAt: string;
}
@ -25,6 +27,11 @@ export interface BotStatus {
waitForMoreItemsMs: number;
betweenTradesDelayMs: number;
};
inventory?: {
grid: boolean[][];
items: { row: number; col: number; w: number; h: number }[];
free: number;
};
}
export class BotController extends EventEmitter {
@ -35,6 +42,7 @@ export class BotController extends EventEmitter {
private tradesFailed = 0;
private startTime = Date.now();
private store: ConfigStore;
private _inventory: BotStatus['inventory'] = undefined;
constructor(store: ConfigStore) {
super();
@ -69,7 +77,7 @@ export class BotController extends EventEmitter {
this.emit('resumed');
}
addLink(url: string, name: string = ''): TradeLink {
addLink(url: string, name: string = '', mode?: LinkMode): TradeLink {
url = this.stripLive(url);
const id = this.extractId(url);
const label = this.extractLabel(url);
@ -81,11 +89,12 @@ export class BotController extends EventEmitter {
name: name || savedLink?.name || '',
label,
active: savedLink?.active !== undefined ? savedLink.active : true,
mode: mode || savedLink?.mode || 'live',
addedAt: new Date().toISOString(),
};
this.links.set(id, link);
this.store.addLink(url, link.name);
logger.info({ id, url, name: link.name, active: link.active }, 'Trade link added');
this.store.addLink(url, link.name, link.mode);
logger.info({ id, url, name: link.name, active: link.active, mode: link.mode }, 'Trade link added');
this.emit('link-added', link);
return link;
}
@ -118,6 +127,15 @@ export class BotController extends EventEmitter {
this.store.updateLinkById(id, { name });
}
updateLinkMode(id: string, mode: LinkMode): void {
const link = this.links.get(id);
if (!link) return;
link.mode = mode;
this.store.updateLinkById(id, { mode });
logger.info({ id, mode }, 'Trade link mode updated');
this.emit('link-mode-changed', { id, mode, link });
}
isLinkActive(searchId: string): boolean {
const link = this.links.get(searchId);
return link ? link.active : false;
@ -153,9 +171,14 @@ export class BotController extends EventEmitter {
waitForMoreItemsMs: s.waitForMoreItemsMs,
betweenTradesDelayMs: s.betweenTradesDelayMs,
},
inventory: this._inventory,
};
}
setInventory(inv: BotStatus['inventory']): void {
this._inventory = inv;
}
getStore(): ConfigStore {
return this.store;
}

View file

@ -1,11 +1,13 @@
import { readFileSync, writeFileSync, existsSync } from 'fs';
import path from 'path';
import { logger } from '../util/logger.js';
import type { LinkMode } from '../types.js';
export interface SavedLink {
url: string;
name: string;
active: boolean;
mode: LinkMode;
addedAt: string;
}
@ -55,10 +57,11 @@ export class ConfigStore {
const parsed = JSON.parse(raw) as Partial<SavedSettings>;
const merged = { ...DEFAULTS, ...parsed };
// Migrate old links: add name/active fields, strip /live from URLs
merged.links = merged.links.map((l) => ({
merged.links = merged.links.map((l: any) => ({
url: l.url.replace(/\/live\/?$/, ''),
name: l.name || '',
active: l.active !== undefined ? l.active : true,
mode: l.mode || 'live',
addedAt: l.addedAt || new Date().toISOString(),
}));
logger.info({ path: this.filePath, linkCount: merged.links.length }, 'Loaded config.json');
@ -85,10 +88,10 @@ export class ConfigStore {
return this.data.links;
}
addLink(url: string, name: string = ''): void {
addLink(url: string, name: string = '', mode: LinkMode = 'live'): void {
url = url.replace(/\/live\/?$/, '');
if (this.data.links.some((l) => l.url === url)) return;
this.data.links.push({ url, name, active: true, addedAt: new Date().toISOString() });
this.data.links.push({ url, name, active: true, mode, addedAt: new Date().toISOString() });
this.save();
}
@ -105,7 +108,7 @@ export class ConfigStore {
this.save();
}
updateLinkById(id: string, updates: { name?: string; active?: boolean }): SavedLink | null {
updateLinkById(id: string, updates: { name?: string; active?: boolean; mode?: LinkMode }): SavedLink | null {
const link = this.data.links.find((l) => {
const parts = l.url.split('/');
return parts[parts.length - 1] === id;
@ -113,6 +116,7 @@ export class ConfigStore {
if (!link) return null;
if (updates.name !== undefined) link.name = updates.name;
if (updates.active !== undefined) link.active = updates.active;
if (updates.mode !== undefined) link.mode = updates.mode;
this.save();
return link;
}

View file

@ -8,6 +8,7 @@ import { logger } from '../util/logger.js';
import { sleep } from '../util/sleep.js';
import type { BotController } from './BotController.js';
import type { ScreenReader } from '../game/ScreenReader.js';
import type { OcrEngine } from '../game/OcrDaemon.js';
import { GRID_LAYOUTS } from '../game/GridReader.js';
import type { GameController } from '../game/GameController.js';
@ -54,12 +55,13 @@ export class DashboardServer {
// Links CRUD
this.app.post('/api/links', (req, res) => {
const { url, name } = req.body as { url: string; name?: string };
const { url, name, mode } = req.body as { url: string; name?: string; mode?: string };
if (!url || !url.includes('pathofexile.com/trade')) {
res.status(400).json({ error: 'Invalid trade URL' });
return;
}
this.bot.addLink(url, name || '');
const linkMode = mode === 'scrap' ? 'scrap' : 'live';
this.bot.addLink(url, name || '', linkMode);
this.broadcastStatus();
res.json({ ok: true });
});
@ -86,6 +88,18 @@ export class DashboardServer {
res.json({ ok: true });
});
// Change link mode
this.app.post('/api/links/:id/mode', (req, res) => {
const { mode } = req.body as { mode: string };
if (mode !== 'live' && mode !== 'scrap') {
res.status(400).json({ error: 'Invalid mode. Must be "live" or "scrap".' });
return;
}
this.bot.updateLinkMode(req.params.id, mode);
this.broadcastStatus();
res.json({ ok: true });
});
// Settings
this.app.post('/api/settings', (req, res) => {
const updates = req.body as Record<string, unknown>;
@ -108,11 +122,29 @@ export class DashboardServer {
}
});
// OCR engine selection
this.app.get('/api/debug/ocr-engine', (_req, res) => {
if (!this.debug) { res.status(503).json({ error: 'Debug not available' }); return; }
res.json({ ok: true, engine: this.debug.screenReader.debugOcrEngine });
});
this.app.post('/api/debug/ocr-engine', (req, res) => {
if (!this.debug) { res.status(503).json({ error: 'Debug not available' }); return; }
const { engine } = req.body as { engine: string };
if (!['tesseract', 'easyocr'].includes(engine)) {
res.status(400).json({ error: 'Invalid engine. Must be tesseract or easyocr.' });
return;
}
this.debug.screenReader.debugOcrEngine = engine as OcrEngine;
this.broadcastLog('info', `OCR engine set to: ${engine}`);
res.json({ ok: true });
});
this.app.post('/api/debug/ocr', async (_req, res) => {
if (!this.debug) { res.status(503).json({ error: 'Debug not available' }); return; }
try {
const text = await this.debug.screenReader.readFullScreen();
this.broadcastLog('info', `OCR result (${text.length} chars): ${text.substring(0, 200)}`);
const text = await this.debug.screenReader.debugReadFullScreen();
this.broadcastLog('info', `OCR [${this.debug.screenReader.debugOcrEngine}] (${text.length} chars): ${text.substring(0, 200)}`);
res.json({ ok: true, text });
} catch (err) {
logger.error({ err }, 'Debug OCR failed');
@ -125,11 +157,11 @@ export class DashboardServer {
const { text } = req.body as { text: string };
if (!text) { res.status(400).json({ error: 'Missing text parameter' }); return; }
try {
const pos = await this.debug.screenReader.findTextOnScreen(text);
const pos = await this.debug.screenReader.debugFindTextOnScreen(text);
if (pos) {
this.broadcastLog('info', `Found "${text}" at (${pos.x}, ${pos.y})`);
this.broadcastLog('info', `Found "${text}" at (${pos.x}, ${pos.y}) [${this.debug.screenReader.debugOcrEngine}]`);
} else {
this.broadcastLog('warn', `"${text}" not found on screen`);
this.broadcastLog('warn', `"${text}" not found on screen [${this.debug.screenReader.debugOcrEngine}]`);
}
res.json({ ok: true, found: !!pos, position: pos });
} catch (err) {
@ -233,17 +265,17 @@ export class DashboardServer {
this.app.post('/api/debug/find-and-click', async (req, res) => {
if (!this.debug) { res.status(503).json({ error: 'Debug not available' }); return; }
const { text } = req.body as { text: string };
const { text, fuzzy } = req.body as { text: string; fuzzy?: boolean };
if (!text) { res.status(400).json({ error: 'Missing text parameter' }); return; }
try {
const pos = await this.debug.screenReader.findTextOnScreen(text);
const pos = await this.debug.screenReader.debugFindTextOnScreen(text, !!fuzzy);
if (pos) {
await this.debug.gameController.focusGame();
await this.debug.gameController.leftClickAt(pos.x, pos.y);
this.broadcastLog('info', `Found "${text}" at (${pos.x}, ${pos.y}) and clicked`);
this.broadcastLog('info', `Found "${text}" at (${pos.x}, ${pos.y}) and clicked [${this.debug.screenReader.debugOcrEngine}]`);
res.json({ ok: true, found: true, position: pos });
} else {
this.broadcastLog('warn', `"${text}" not found on screen`);
this.broadcastLog('warn', `"${text}" not found on screen [${this.debug.screenReader.debugOcrEngine}]`);
res.json({ ok: true, found: false, position: null });
}
} catch (err) {

View file

@ -129,6 +129,34 @@
.link-item button { padding: 4px 12px; font-size: 12px; }
.link-item.inactive { opacity: 0.5; }
.mode-badge {
display: inline-block;
font-size: 10px;
padding: 2px 8px;
border-radius: 4px;
font-weight: 600;
text-transform: uppercase;
letter-spacing: 0.5px;
cursor: pointer;
user-select: none;
transition: background 0.15s;
}
.mode-badge.live { background: #1f6feb; color: #fff; }
.mode-badge.live:hover { background: #388bfd; }
.mode-badge.scrap { background: #9e6a03; color: #fff; }
.mode-badge.scrap:hover { background: #d29922; }
.mode-select {
padding: 6px 10px;
background: #0d1117;
border: 1px solid #30363d;
border-radius: 6px;
color: #e6edf3;
font-size: 13px;
outline: none;
}
.mode-select:focus { border-color: #58a6ff; }
/* Toggle switch */
.toggle { position: relative; width: 36px; height: 20px; cursor: pointer; flex-shrink: 0; }
.toggle input { opacity: 0; width: 0; height: 0; }
@ -316,6 +344,41 @@
}
.detect-badge.ok { background: #238636; color: #fff; }
.detect-badge.fallback { background: #9e6a03; color: #fff; }
/* Inventory grid */
.inv-header {
display: flex;
align-items: center;
justify-content: space-between;
margin-bottom: 10px;
}
.inv-free {
font-size: 12px;
color: #8b949e;
font-weight: 600;
}
.inventory-grid {
display: grid;
grid-template-columns: repeat(12, 1fr);
gap: 2px;
background: #161b22;
border: 1px solid #30363d;
border-radius: 8px;
padding: 10px;
}
.inv-cell {
aspect-ratio: 1;
border-radius: 3px;
background: #0d1117;
min-width: 0;
}
.inv-cell.occupied {
background: #238636;
}
.inv-cell.item-top { border-top: 2px solid #3fb950; }
.inv-cell.item-bottom { border-bottom: 2px solid #3fb950; }
.inv-cell.item-left { border-left: 2px solid #3fb950; }
.inv-cell.item-right { border-right: 2px solid #3fb950; }
</style>
</head>
<body>
@ -359,11 +422,25 @@
<button class="warning" id="pauseBtn" onclick="togglePause()">Pause</button>
</div>
<div class="section">
<div class="inv-header">
<div class="section-title" style="margin-bottom:0">Inventory</div>
<span class="inv-free" id="invFreeCount"></span>
</div>
<div class="inventory-grid" id="inventoryGrid">
<div class="empty-state" style="grid-column:1/-1">No active scrap session</div>
</div>
</div>
<div class="section">
<div class="section-title">Trade Links</div>
<div class="add-link">
<input type="text" id="nameInput" placeholder="Name (optional)" style="max-width:180px" />
<input type="text" id="urlInput" placeholder="Paste trade URL..." />
<select id="modeInput" class="mode-select" style="width:90px">
<option value="live">Live</option>
<option value="scrap">Scrap</option>
</select>
<button class="primary" onclick="addLink()">Add</button>
</div>
<div class="links-list" id="linksList">
@ -375,6 +452,10 @@
<div class="section-title">Debug Tools</div>
<div class="debug-panel">
<div class="debug-row">
<select id="ocrEngineSelect" onchange="setOcrEngine(this.value)" style="padding:6px 10px;background:#0d1117;border:1px solid #30363d;border-radius:6px;color:#e6edf3;font-size:13px">
<option value="tesseract">Tesseract</option>
<option value="easyocr">EasyOCR</option>
</select>
<button onclick="debugScreenshot()">Screenshot</button>
<button onclick="debugOcr()">OCR Screen</button>
<button onclick="debugHideout()">Go Hideout</button>
@ -382,6 +463,7 @@
<div class="debug-row">
<button onclick="debugFindAndClick('ANGE')">ANGE</button>
<button onclick="debugFindAndClick('STASH')">STASH</button>
<button onclick="debugFindAndClick('SALVAGE BENCH', true)">SALVAGE</button>
</div>
<div class="debug-row">
<button onclick="debugAngeOption('Currency')">Currency Exchange</button>
@ -512,6 +594,9 @@
// Settings (populate once on first status)
if (status.settings) populateSettings(status.settings);
// Inventory grid
renderInventory();
// Active links count
document.getElementById('linksValue').textContent = status.links.filter(l => l.active).length;
@ -527,6 +612,7 @@
<input type="checkbox" ${link.active ? 'checked' : ''} onchange="toggleLink('${esc(link.id)}', this.checked)" />
<span class="slider"></span>
</label>
<span class="mode-badge ${link.mode || 'live'}" onclick="cycleMode('${esc(link.id)}', '${link.mode || 'live'}')" title="Click to change mode">${esc(link.mode || 'live')}</span>
<div class="link-info">
<div class="link-name" contenteditable="true" spellcheck="false"
onblur="renameLink('${esc(link.id)}', this.textContent)"
@ -542,6 +628,41 @@
}
}
function renderInventory() {
const container = document.getElementById('inventoryGrid');
const freeLabel = document.getElementById('invFreeCount');
if (!status.inventory) {
container.innerHTML = '<div class="empty-state" style="grid-column:1/-1">No active scrap session</div>';
freeLabel.textContent = '';
return;
}
const { grid, items, free } = status.inventory;
freeLabel.textContent = `${free}/60 free`;
let html = '';
for (let r = 0; r < 5; r++) {
for (let c = 0; c < 12; c++) {
const occupied = grid[r] && grid[r][c] ? 'occupied' : '';
html += `<div class="inv-cell ${occupied}" data-r="${r}" data-c="${c}"></div>`;
}
}
container.innerHTML = html;
for (const item of items) {
for (let r = item.row; r < item.row + item.h; r++) {
for (let c = item.col; c < item.col + item.w; c++) {
const cell = container.querySelector(`[data-r="${r}"][data-c="${c}"]`);
if (cell) {
if (r === item.row) cell.classList.add('item-top');
if (r === item.row + item.h - 1) cell.classList.add('item-bottom');
if (c === item.col) cell.classList.add('item-left');
if (c === item.col + item.w - 1) cell.classList.add('item-right');
}
}
}
}
}
function addLog(data) {
const panel = document.getElementById('logPanel');
const line = document.createElement('div');
@ -561,12 +682,13 @@
async function addLink() {
const urlEl = document.getElementById('urlInput');
const nameEl = document.getElementById('nameInput');
const modeEl = document.getElementById('modeInput');
const url = urlEl.value.trim();
if (!url) return;
await fetch('/api/links', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ url, name: nameEl.value.trim() }),
body: JSON.stringify({ url, name: nameEl.value.trim(), mode: modeEl.value }),
});
urlEl.value = '';
nameEl.value = '';
@ -596,6 +718,15 @@
}, 300);
}
async function cycleMode(id, currentMode) {
const newMode = currentMode === 'live' ? 'scrap' : 'live';
await fetch('/api/links/' + encodeURIComponent(id) + '/mode', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ mode: newMode }),
});
}
function esc(s) {
const d = document.createElement('div');
d.textContent = s;
@ -795,14 +926,14 @@
}
}
async function debugFindAndClick(directText) {
async function debugFindAndClick(directText, fuzzy) {
const text = directText || document.getElementById('debugTextInput').value.trim();
if (!text) return;
showDebugResult(`Finding and clicking "${text}"...`);
showDebugResult(`Finding and clicking "${text}"${fuzzy ? ' (fuzzy)' : ''}...`);
const res = await fetch('/api/debug/find-and-click', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ text }),
body: JSON.stringify({ text, fuzzy: !!fuzzy }),
});
const data = await res.json();
if (data.found) {
@ -855,7 +986,26 @@
if (e.key === 'Enter') addLink();
});
async function setOcrEngine(engine) {
await fetch('/api/debug/ocr-engine', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ engine }),
});
}
async function loadOcrEngine() {
try {
const res = await fetch('/api/debug/ocr-engine');
const data = await res.json();
if (data.ok && data.engine) {
document.getElementById('ocrEngineSelect').value = data.engine;
}
} catch {}
}
connect();
loadOcrEngine();
</script>
</body>
</html>

View file

@ -0,0 +1,433 @@
import { join } from 'path';
import { GameController } from '../game/GameController.js';
import { ScreenReader } from '../game/ScreenReader.js';
import { GridReader, GRID_LAYOUTS } from '../game/GridReader.js';
import { ClientLogWatcher } from '../log/ClientLogWatcher.js';
import { TradeMonitor } from '../trade/TradeMonitor.js';
import { InventoryTracker } from '../inventory/InventoryTracker.js';
import { sleep, randomDelay } from '../util/sleep.js';
import { logger } from '../util/logger.js';
import type { Config, ScrapState, TradeItem } from '../types.js';
import type { Page } from 'playwright';
const SALVAGE_TEMPLATE = join('assets', 'salvage.png');
export class ScrapExecutor {
private inventory = new InventoryTracker();
private state: ScrapState = 'IDLE';
private stopped = false;
private atOwnHideout = true;
private currentSellerAccount = '';
private activePage: Page | null = null;
private gameController: GameController;
private screenReader: ScreenReader;
private logWatcher: ClientLogWatcher;
private tradeMonitor: TradeMonitor;
private config: Config;
constructor(
gameController: GameController,
screenReader: ScreenReader,
logWatcher: ClientLogWatcher,
tradeMonitor: TradeMonitor,
config: Config,
) {
this.gameController = gameController;
this.screenReader = screenReader;
this.logWatcher = logWatcher;
this.tradeMonitor = tradeMonitor;
this.config = config;
}
getState(): ScrapState {
return this.state;
}
getInventoryState(): { grid: boolean[][]; items: { row: number; col: number; w: number; h: number }[]; free: number } {
return {
grid: this.inventory.getGrid(),
items: this.inventory.getItems(),
free: this.inventory.freeCells,
};
}
/** Stop the scrap loop gracefully. */
async stop(): Promise<void> {
this.stopped = true;
if (this.activePage) {
try { await this.activePage.close(); } catch { /* best-effort */ }
this.activePage = null;
}
this.state = 'IDLE';
logger.info('Scrap executor stopped');
}
/** Main entry point — runs the full scrap loop. */
async runScrapLoop(tradeUrl: string): Promise<void> {
this.stopped = false;
logger.info({ tradeUrl }, 'Starting scrap loop');
// Scan real inventory to know current state
await this.scanInventory();
let { page, items } = await this.tradeMonitor.openScrapPage(tradeUrl);
this.activePage = page;
logger.info({ itemCount: items.length }, 'Trade page opened, items fetched');
while (!this.stopped) {
let salvageFailed = false;
for (const item of items) {
if (this.stopped) break;
// Check if this item fits before traveling
if (!this.inventory.canFit(item.w, item.h)) {
// If salvage already failed this page, don't retry — skip remaining items
if (salvageFailed) {
logger.info({ w: item.w, h: item.h }, 'Skipping item (salvage already failed this page)');
continue;
}
logger.info({ w: item.w, h: item.h, free: this.inventory.freeCells }, 'No room for item, running salvage cycle');
await this.salvageAndStore();
// Check if salvage succeeded (state is IDLE on success, FAILED otherwise)
if (this.state === 'FAILED') {
salvageFailed = true;
this.state = 'IDLE';
logger.warn('Salvage failed, skipping remaining items that do not fit');
continue;
}
// Re-scan inventory after salvage to get accurate state
await this.scanInventory();
}
// Still no room after salvage — skip this item
if (!this.inventory.canFit(item.w, item.h)) {
logger.warn({ w: item.w, h: item.h, free: this.inventory.freeCells }, 'Item still cannot fit after salvage, skipping');
continue;
}
const success = await this.buyItem(page, item);
if (!success) {
logger.warn({ itemId: item.id }, 'Failed to buy item, continuing');
continue;
}
await randomDelay(500, 1000);
}
if (this.stopped) break;
// Page exhausted — refresh and get new items
logger.info('Page exhausted, refreshing...');
items = await this.refreshPage(page);
logger.info({ itemCount: items.length }, 'Page refreshed');
if (items.length === 0) {
logger.info('No items after refresh, waiting before retry...');
await sleep(5000);
if (this.stopped) break;
items = await this.refreshPage(page);
}
}
this.activePage = null;
this.state = 'IDLE';
logger.info('Scrap loop ended');
}
/** Scan the real inventory via grid reader and initialize the tracker. */
private async scanInventory(): Promise<void> {
logger.info('Scanning inventory...');
await this.gameController.focusGame();
await sleep(300);
await this.gameController.openInventory();
const result = await this.screenReader.grid.scan('inventory');
// Build cells grid from occupied coords
const cells: boolean[][] = Array.from({ length: 5 }, () => Array(12).fill(false));
for (const cell of result.occupied) {
if (cell.row < 5 && cell.col < 12) {
cells[cell.row][cell.col] = true;
}
}
this.inventory.initFromScan(cells, result.items);
// Close inventory
await this.gameController.pressEscape();
await sleep(300);
}
/** Buy one item from a seller. */
private async buyItem(page: Page, item: TradeItem): Promise<boolean> {
try {
const alreadyAtSeller = !this.atOwnHideout
&& item.account
&& item.account === this.currentSellerAccount;
if (alreadyAtSeller) {
logger.info({ itemId: item.id, account: item.account }, 'Already at seller hideout, skipping travel');
} else {
this.state = 'TRAVELING';
// Register listener BEFORE clicking, then click inside the callback
const arrived = await this.waitForAreaTransition(
this.config.travelTimeoutMs,
async () => {
const clicked = await this.tradeMonitor.clickTravelToHideout(page, item.id);
if (!clicked) {
throw new Error('Failed to click Travel to Hideout');
}
},
);
if (!arrived) {
logger.error({ itemId: item.id }, 'Timed out waiting for hideout arrival');
this.state = 'FAILED';
return false;
}
this.atOwnHideout = false;
this.currentSellerAccount = item.account;
await this.gameController.focusGame();
await sleep(1500); // Wait for hideout to render
}
this.state = 'BUYING';
// CTRL+Click at seller stash position
const sellerLayout = GRID_LAYOUTS.seller;
const cellCenter = this.screenReader.grid.getCellCenter(sellerLayout, item.stashY, item.stashX);
logger.info({ itemId: item.id, stashX: item.stashX, stashY: item.stashY, screenX: cellCenter.x, screenY: cellCenter.y }, 'CTRL+clicking seller stash item');
await this.gameController.ctrlLeftClickAt(cellCenter.x, cellCenter.y);
await randomDelay(200, 400);
// Track in inventory
const placed = this.inventory.tryPlace(item.w, item.h);
if (!placed) {
logger.warn({ itemId: item.id, w: item.w, h: item.h }, 'Item bought but could not track in inventory');
}
logger.info({ itemId: item.id, free: this.inventory.freeCells }, 'Item bought successfully');
this.state = 'IDLE';
return true;
} catch (err) {
logger.error({ err, itemId: item.id }, 'Error buying item');
this.state = 'FAILED';
return false;
}
}
/** Salvage all items in inventory and store the materials. */
private async salvageAndStore(): Promise<void> {
try {
// Go to own hideout (skip if already there)
await this.gameController.focusGame();
await sleep(300);
if (this.atOwnHideout) {
logger.info('Already at own hideout, skipping /hideout');
} else {
this.state = 'TRAVELING';
// Register listener BEFORE sending /hideout command
const arrived = await this.waitForAreaTransition(
this.config.travelTimeoutMs,
() => this.gameController.goToHideout(),
);
if (!arrived) {
logger.error('Timed out going home for salvage');
this.state = 'FAILED';
return;
}
await sleep(1500); // Wait for hideout to render
}
this.atOwnHideout = true;
this.currentSellerAccount = '';
// Open salvage bench via nameplate OCR
this.state = 'SALVAGING';
const salvageNameplate = await this.findAndClickNameplate('SALVAGE BENCH');
if (!salvageNameplate) {
logger.error('Could not find Salvage nameplate');
this.state = 'FAILED';
return;
}
await sleep(1000); // Wait for salvage bench UI to open
// Template-match salvage.png to activate salvage mode within the bench UI
const salvageBtn = await this.screenReader.templateMatch(SALVAGE_TEMPLATE);
if (salvageBtn) {
await this.gameController.leftClickAt(salvageBtn.x, salvageBtn.y);
await sleep(500);
} else {
logger.warn('Could not find salvage button via template match, trying to proceed anyway');
}
// CTRL+Click each inventory item to salvage
const inventoryLayout = GRID_LAYOUTS.inventory;
const itemsToSalvage = this.inventory.getItems();
logger.info({ count: itemsToSalvage.length }, 'Salvaging inventory items');
await this.gameController.holdCtrl();
for (const item of itemsToSalvage) {
const center = this.screenReader.grid.getCellCenter(inventoryLayout, item.row, item.col);
await this.gameController.leftClickAt(center.x, center.y);
await sleep(150);
}
await this.gameController.releaseCtrl();
await sleep(500);
// Close salvage bench (Escape)
await this.gameController.pressEscape();
await sleep(500);
// Open stash to store salvaged materials
this.state = 'STORING';
const stashPos = await this.findAndClickNameplate('Stash');
if (!stashPos) {
logger.error('Could not find Stash nameplate');
this.state = 'FAILED';
return;
}
await sleep(1000); // Wait for stash to open
// CTRL+Click each remaining inventory item to store
await this.gameController.holdCtrl();
for (const item of itemsToSalvage) {
const center = this.screenReader.grid.getCellCenter(inventoryLayout, item.row, item.col);
await this.gameController.leftClickAt(center.x, center.y);
await sleep(150);
}
await this.gameController.releaseCtrl();
await sleep(500);
// Clear inventory tracker
this.inventory.clear();
this.state = 'IDLE';
logger.info('Salvage and store cycle complete');
} catch (err) {
logger.error({ err }, 'Salvage cycle failed');
// Try to recover UI state
try {
await this.gameController.pressEscape();
await sleep(300);
} catch {
// Best-effort
}
this.inventory.clear();
// Leave state as FAILED so the caller knows salvage didn't succeed
this.state = 'FAILED';
}
}
/** Refresh the trade page and return new items. */
private async refreshPage(page: Page): Promise<TradeItem[]> {
const items: TradeItem[] = [];
// Set up response listener before reloading
const responseHandler = async (response: { url(): string; json(): Promise<any> }) => {
if (response.url().includes('/api/trade2/fetch/')) {
try {
const json = await response.json();
if (json.result && Array.isArray(json.result)) {
for (const r of json.result) {
items.push({
id: r.id,
w: r.item?.w ?? 1,
h: r.item?.h ?? 1,
stashX: r.listing?.stash?.x ?? 0,
stashY: r.listing?.stash?.y ?? 0,
account: r.listing?.account?.name ?? '',
});
}
}
} catch {
// Response may not be JSON
}
}
};
page.on('response', responseHandler);
await page.reload({ waitUntil: 'networkidle' });
await sleep(2000);
page.off('response', responseHandler);
return items;
}
/**
* Wait for area transition via Client.txt log.
* If `triggerAction` is provided, the listener is registered BEFORE the action
* executes, preventing the race where the event fires before we listen.
*/
private waitForAreaTransition(
timeoutMs: number,
triggerAction?: () => Promise<void>,
): Promise<boolean> {
return new Promise((resolve) => {
let resolved = false;
const timer = setTimeout(() => {
if (!resolved) {
resolved = true;
this.logWatcher.removeListener('area-entered', handler);
resolve(false);
}
}, timeoutMs);
const handler = () => {
if (!resolved) {
resolved = true;
clearTimeout(timer);
resolve(true);
}
};
// Register listener FIRST
this.logWatcher.once('area-entered', handler);
// THEN trigger the action that causes the transition
if (triggerAction) {
triggerAction().catch(() => {
// If the action itself fails, clean up and resolve false
if (!resolved) {
resolved = true;
clearTimeout(timer);
this.logWatcher.removeListener('area-entered', handler);
resolve(false);
}
});
}
});
}
/** Find and click a nameplate by OCR text. */
private async findAndClickNameplate(
name: string,
maxRetries: number = 3,
retryDelayMs: number = 1000,
): Promise<{ x: number; y: number } | null> {
for (let attempt = 1; attempt <= maxRetries; attempt++) {
logger.info({ name, attempt, maxRetries }, 'Searching for nameplate...');
const pos = await this.screenReader.findTextOnScreen(name, true);
if (pos) {
logger.info({ name, x: pos.x, y: pos.y }, 'Clicking nameplate');
await this.gameController.leftClickAt(pos.x, pos.y);
return pos;
}
if (attempt < maxRetries) {
await sleep(retryDelayMs);
}
}
logger.warn({ name, maxRetries }, 'Nameplate not found after all retries');
return null;
}
}

View file

@ -124,4 +124,16 @@ export class GameController {
await this.inputSender.pressKey(VK.I);
await sleep(300);
}
async ctrlLeftClickAt(x: number, y: number): Promise<void> {
await this.inputSender.ctrlLeftClick(x, y);
}
async holdCtrl(): Promise<void> {
await this.inputSender.keyDown(VK.CONTROL);
}
async releaseCtrl(): Promise<void> {
await this.inputSender.keyUp(VK.CONTROL);
}
}

View file

@ -288,6 +288,14 @@ export class InputSender {
await randomDelay(30, 60);
}
async ctrlLeftClick(x: number, y: number): Promise<void> {
await this.keyDown(VK.CONTROL);
await randomDelay(30, 60);
await this.leftClick(x, y);
await this.keyUp(VK.CONTROL);
await randomDelay(30, 60);
}
private sendMouseInput(dx: number, dy: number, mouseData: number, flags: number): void {
const input = {
type: INPUT_MOUSE_TYPE,

View file

@ -58,6 +58,17 @@ export interface DetectGridResult {
cellHeight?: number;
}
export interface TemplateMatchResult {
found: boolean;
x: number;
y: number;
width: number;
height: number;
confidence: number;
}
export type OcrEngine = 'tesseract' | 'easyocr';
interface DaemonRequest {
cmd: string;
region?: Region;
@ -67,6 +78,7 @@ interface DaemonRequest {
threshold?: number;
minCellSize?: number;
maxCellSize?: number;
engine?: string;
}
interface DaemonResponse {
@ -84,6 +96,12 @@ interface DaemonResponse {
rows?: number;
cellWidth?: number;
cellHeight?: number;
found?: boolean;
x?: number;
y?: number;
width?: number;
height?: number;
confidence?: number;
error?: string;
}
@ -115,10 +133,13 @@ export class OcrDaemon {
// ── Public API ──────────────────────────────────────────────────────────
async ocr(region?: Region): Promise<OcrResponse> {
async ocr(region?: Region, engine?: OcrEngine): Promise<OcrResponse> {
const req: DaemonRequest = { cmd: 'ocr' };
if (region) req.region = region;
const resp = await this.sendWithRetry(req, REQUEST_TIMEOUT);
if (engine && engine !== 'tesseract') req.engine = engine;
// Python engines need longer timeout for first model load + download
const timeout = (engine && engine !== 'tesseract') ? 120_000 : CAPTURE_TIMEOUT;
const resp = await this.sendWithRetry(req, timeout);
return {
ok: true,
text: resp.text ?? '',
@ -161,11 +182,13 @@ export class OcrDaemon {
await this.sendWithRetry({ cmd: 'snapshot' }, REQUEST_TIMEOUT);
}
async diffOcr(savePath?: string, region?: Region): Promise<DiffOcrResponse> {
async diffOcr(savePath?: string, region?: Region, engine?: OcrEngine): Promise<DiffOcrResponse> {
const req: DaemonRequest = { cmd: 'diff-ocr' };
if (savePath) req.path = savePath;
if (region) req.region = region;
const resp = await this.sendWithRetry(req, REQUEST_TIMEOUT);
if (engine && engine !== 'tesseract') req.engine = engine;
const timeout = (engine && engine !== 'tesseract') ? 120_000 : CAPTURE_TIMEOUT;
const resp = await this.sendWithRetry(req, timeout);
return {
text: resp.text ?? '',
lines: resp.lines ?? [],
@ -179,6 +202,21 @@ export class OcrDaemon {
await this.sendWithRetry(req, REQUEST_TIMEOUT);
}
async templateMatch(templatePath: string, region?: Region): Promise<TemplateMatchResult | null> {
const req: DaemonRequest = { cmd: 'match-template', path: templatePath };
if (region) req.region = region;
const resp = await this.sendWithRetry(req, REQUEST_TIMEOUT);
if (!resp.found) return null;
return {
found: true,
x: resp.x!,
y: resp.y!,
width: resp.width!,
height: resp.height!,
confidence: resp.confidence!,
};
}
async stop(): Promise<void> {
this.stopped = true;
if (this.proc) {

View file

@ -1,7 +1,7 @@
import { mkdir } from 'fs/promises';
import { join } from 'path';
import { logger } from '../util/logger.js';
import { OcrDaemon, type OcrResponse, type DiffOcrResponse } from './OcrDaemon.js';
import { OcrDaemon, type OcrResponse, type OcrEngine, type DiffOcrResponse, type TemplateMatchResult } from './OcrDaemon.js';
import { GridReader, type GridLayout, type CellCoord } from './GridReader.js';
import type { Region } from '../types.js';
@ -12,6 +12,7 @@ function elapsed(start: number): string {
export class ScreenReader {
private daemon = new OcrDaemon();
readonly grid = new GridReader(this.daemon);
debugOcrEngine: OcrEngine = 'tesseract';
// ── Screenshot capture ──────────────────────────────────────────────
@ -31,32 +32,122 @@ export class ScreenReader {
// ── OCR helpers ─────────────────────────────────────────────────────
/** Bigram (Dice) similarity between two strings, 0..1. */
private static bigramSimilarity(a: string, b: string): number {
if (a.length < 2 || b.length < 2) return a === b ? 1 : 0;
const bigramsA = new Map<string, number>();
for (let i = 0; i < a.length - 1; i++) {
const bg = a.slice(i, i + 2);
bigramsA.set(bg, (bigramsA.get(bg) ?? 0) + 1);
}
let matches = 0;
for (let i = 0; i < b.length - 1; i++) {
const bg = b.slice(i, i + 2);
const count = bigramsA.get(bg);
if (count && count > 0) {
matches++;
bigramsA.set(bg, count - 1);
}
}
return (2 * matches) / (a.length - 1 + b.length - 1);
}
/** Normalize text for fuzzy comparison: lowercase, strip non-alphanumeric, collapse spaces. */
private static normalize(s: string): string {
return s.toLowerCase().replace(/[^a-z0-9]/g, '');
}
private findWordInOcrResult(
result: OcrResponse,
needle: string,
fuzzy: boolean = false,
): { x: number; y: number } | null {
const lower = needle.toLowerCase();
const FUZZY_THRESHOLD = 0.55;
// Multi-word: match against the full line text, return center of the line's bounding box
if (lower.includes(' ')) {
const needleNorm = ScreenReader.normalize(needle);
for (const line of result.lines) {
if (line.words.length === 0) continue;
const lineText = line.text.toLowerCase();
// Exact match
if (lineText.includes(lower)) {
return this.lineBounds(line);
}
// Fuzzy: normalize line text and check sliding windows
if (fuzzy) {
const lineNorm = ScreenReader.normalize(line.text);
// Check windows of similar length to the needle
const windowLen = needleNorm.length;
for (let i = 0; i <= lineNorm.length - windowLen + 2; i++) {
const window = lineNorm.slice(i, i + windowLen + 2);
const sim = ScreenReader.bigramSimilarity(needleNorm, window);
if (sim >= FUZZY_THRESHOLD) {
logger.info({ needle, matched: line.text, similarity: sim.toFixed(2) }, 'Fuzzy nameplate match');
return this.lineBounds(line);
}
}
}
}
return null;
}
// Single word: match against individual words
const needleNorm = ScreenReader.normalize(needle);
for (const line of result.lines) {
for (const word of line.words) {
// Exact match
if (word.text.toLowerCase().includes(lower)) {
return {
x: Math.round(word.x + word.width / 2),
y: Math.round(word.y + word.height / 2),
};
}
// Fuzzy match
if (fuzzy) {
const wordNorm = ScreenReader.normalize(word.text);
const sim = ScreenReader.bigramSimilarity(needleNorm, wordNorm);
if (sim >= FUZZY_THRESHOLD) {
logger.info({ needle, matched: word.text, similarity: sim.toFixed(2) }, 'Fuzzy word match');
return {
x: Math.round(word.x + word.width / 2),
y: Math.round(word.y + word.height / 2),
};
}
}
}
}
return null;
}
/** Get center of a line's bounding box from its words. */
private lineBounds(line: { words: { x: number; y: number; width: number; height: number }[] }): { x: number; y: number } {
const first = line.words[0];
const last = line.words[line.words.length - 1];
const x1 = first.x;
const y1 = first.y;
const x2 = last.x + last.width;
const y2 = Math.max(...line.words.map(w => w.y + w.height));
return {
x: Math.round((x1 + x2) / 2),
y: Math.round((y1 + y2) / 2),
};
}
// ── Full-screen methods ─────────────────────────────────────────────
async findTextOnScreen(
searchText: string,
fuzzy: boolean = false,
): Promise<{ x: number; y: number } | null> {
const t = performance.now();
const result = await this.daemon.ocr();
const pos = this.findWordInOcrResult(result, searchText);
const pos = this.findWordInOcrResult(result, searchText, fuzzy);
if (pos) {
logger.info({ searchText, x: pos.x, y: pos.y, totalMs: elapsed(t) }, 'Found text on screen');
@ -112,6 +203,19 @@ export class ScreenReader {
return this.daemon.diffOcr(savePath, region);
}
// ── Template matching ──────────────────────────────────────────────
async templateMatch(templatePath: string, region?: Region): Promise<TemplateMatchResult | null> {
const t = performance.now();
const result = await this.daemon.templateMatch(templatePath, region);
if (result) {
logger.info({ templatePath, x: result.x, y: result.y, confidence: result.confidence.toFixed(3), ms: elapsed(t) }, 'Template match found');
} else {
logger.info({ templatePath, ms: elapsed(t) }, 'Template match not found');
}
return result;
}
// ── Save utilities ──────────────────────────────────────────────────
async saveScreenshot(path: string): Promise<void> {
@ -133,6 +237,43 @@ export class ScreenReader {
logger.info({ path, region }, 'Region screenshot saved');
}
// ── Debug OCR (alternative engines) ─────────────────────────────────
async debugDiffOcr(savePath?: string, region?: Region): Promise<DiffOcrResponse> {
const t = performance.now();
const result = await this.daemon.diffOcr(savePath, region, this.debugOcrEngine);
logger.info({ engine: this.debugOcrEngine, ms: elapsed(t) }, 'debugDiffOcr');
return result;
}
async debugOcr(region?: Region): Promise<OcrResponse> {
const t = performance.now();
const result = await this.daemon.ocr(region, this.debugOcrEngine);
logger.info({ engine: this.debugOcrEngine, ms: elapsed(t) }, 'debugOcr');
return result;
}
async debugReadFullScreen(): Promise<string> {
const result = await this.daemon.ocr(undefined, this.debugOcrEngine);
return result.text;
}
async debugFindTextOnScreen(
searchText: string,
fuzzy: boolean = false,
): Promise<{ x: number; y: number } | null> {
const t = performance.now();
const result = await this.daemon.ocr(undefined, this.debugOcrEngine);
const pos = this.findWordInOcrResult(result, searchText, fuzzy);
if (pos) {
logger.info({ searchText, engine: this.debugOcrEngine, x: pos.x, y: pos.y, totalMs: elapsed(t) }, 'debugFindText found');
} else {
logger.info({ searchText, engine: this.debugOcrEngine, totalMs: elapsed(t) }, 'debugFindText not found');
}
return pos;
}
// ── Lifecycle ───────────────────────────────────────────────────────
async dispose(): Promise<void> {

View file

@ -5,11 +5,13 @@ import { GameController } from './game/GameController.js';
import { ScreenReader } from './game/ScreenReader.js';
import { ClientLogWatcher } from './log/ClientLogWatcher.js';
import { TradeExecutor } from './executor/TradeExecutor.js';
import { ScrapExecutor } from './executor/ScrapExecutor.js';
import { TradeQueue } from './executor/TradeQueue.js';
import { BotController } from './dashboard/BotController.js';
import { DashboardServer } from './dashboard/DashboardServer.js';
import { ConfigStore } from './dashboard/ConfigStore.js';
import { logger } from './util/logger.js';
import type { TradeLink } from './dashboard/BotController.js';
import type { Page } from 'playwright';
const program = new Command();
@ -73,6 +75,7 @@ program
const logWatcher = new ClientLogWatcher(config.poe2LogPath);
await logWatcher.start();
logWatcher.currentArea = 'Hideout'; // We just sent /hideout on startup
dashboard.broadcastLog('info', 'Watching Client.txt for game events');
const tradeMonitor = new TradeMonitor(config);
@ -89,23 +92,59 @@ program
const tradeQueue = new TradeQueue(executor, config);
// Helper to add a trade search
const activateLink = async (url: string) => {
// Track running scrap executors per link ID
const scrapExecutors = new Map<string, ScrapExecutor>();
// Activate a link based on its mode
const activateLink = async (link: TradeLink) => {
try {
await tradeMonitor.addSearch(url);
dashboard.broadcastLog('info', `Monitoring: ${url}`);
if (link.mode === 'scrap') {
// Start scrap loop for this link
const scrapExec = new ScrapExecutor(
gameController,
screenReader,
logWatcher,
tradeMonitor,
config,
);
scrapExecutors.set(link.id, scrapExec);
dashboard.broadcastLog('info', `Scrap loop started: ${link.name || link.label}`);
dashboard.broadcastStatus();
// Run in background (don't await — it's an infinite loop)
scrapExec.runScrapLoop(link.url).catch((err) => {
logger.error({ err, linkId: link.id }, 'Scrap loop error');
dashboard.broadcastLog('error', `Scrap loop failed: ${link.name || link.label}`);
scrapExecutors.delete(link.id);
});
} else {
// Live search mode
await tradeMonitor.addSearch(link.url);
dashboard.broadcastLog('info', `Monitoring: ${link.name || link.label}`);
dashboard.broadcastStatus();
} catch (err) {
logger.error({ err, url }, 'Failed to add trade search');
dashboard.broadcastLog('error', `Failed to add: ${url}`);
}
} catch (err) {
logger.error({ err, url: link.url }, 'Failed to activate link');
dashboard.broadcastLog('error', `Failed to activate: ${link.name || link.label}`);
}
};
// Deactivate a link based on its mode
const deactivateLink = async (id: string) => {
// Stop scrap executor if running
const scrapExec = scrapExecutors.get(id);
if (scrapExec) {
await scrapExec.stop();
scrapExecutors.delete(id);
}
// Pause live search if active
await tradeMonitor.pauseSearch(id);
};
// Load all saved + CLI links (only activate ones marked active)
for (const url of allUrls) {
const link = bot.addLink(url);
if (link.active) {
await activateLink(url);
await activateLink(link);
} else {
dashboard.broadcastLog('info', `Loaded (inactive): ${link.name || link.label}`);
}
@ -113,31 +152,40 @@ program
dashboard.broadcastLog('info', `Loaded ${allUrls.size} trade link(s) from config`);
// When dashboard adds a link, activate it in the trade monitor
bot.on('link-added', async (link) => {
// When dashboard adds a link, activate it
bot.on('link-added', async (link: TradeLink) => {
if (link.active) {
await activateLink(link.url);
await activateLink(link);
}
});
// When dashboard removes a link, deactivate it
bot.on('link-removed', async (id: string) => {
await tradeMonitor.removeSearch(id);
await deactivateLink(id);
dashboard.broadcastLog('info', `Removed search: ${id}`);
dashboard.broadcastStatus();
});
// When dashboard toggles a link active/inactive
bot.on('link-toggled', async (data: { id: string; active: boolean; link: { url: string; name: string } }) => {
bot.on('link-toggled', async (data: { id: string; active: boolean; link: TradeLink }) => {
if (data.active) {
await activateLink(data.link.url);
await activateLink(data.link);
dashboard.broadcastLog('info', `Activated: ${data.link.name || data.id}`);
} else {
await tradeMonitor.pauseSearch(data.id);
await deactivateLink(data.id);
dashboard.broadcastLog('info', `Deactivated: ${data.link.name || data.id}`);
}
});
// When link mode changes, restart with new mode if active
bot.on('link-mode-changed', async (data: { id: string; mode: string; link: TradeLink }) => {
if (data.link.active) {
await deactivateLink(data.id);
await activateLink(data.link);
dashboard.broadcastLog('info', `Mode changed to ${data.mode}: ${data.link.name || data.id}`);
}
});
// Wire up events: when new listings appear, queue them for trading
tradeMonitor.on('new-listings', (data: { searchId: string; itemIds: string[]; page: Page }) => {
if (bot.isPaused) {
@ -168,17 +216,54 @@ program
// Forward executor state changes to dashboard
const stateInterval = setInterval(() => {
// Feed inventory state from active scrap executors
let inventorySet = false;
for (const [, scrapExec] of scrapExecutors) {
const inv = scrapExec.getInventoryState();
if (inv) {
bot.setInventory(inv);
inventorySet = true;
break;
}
}
if (!inventorySet) bot.setInventory(undefined);
// Check live trade executor state
const execState = executor.getState();
if (execState !== 'IDLE') {
if (bot.state !== execState) {
bot.state = execState;
dashboard.broadcastStatus();
}
return;
}
// Check scrap executor states
for (const [, scrapExec] of scrapExecutors) {
const scrapState = scrapExec.getState();
if (scrapState !== 'IDLE') {
if (bot.state !== scrapState) {
bot.state = scrapState;
dashboard.broadcastStatus();
}
return;
}
}
// All idle
if (bot.state !== 'IDLE') {
bot.state = 'IDLE';
dashboard.broadcastStatus();
}
}, 500);
// Graceful shutdown
const shutdown = async () => {
logger.info('Shutting down...');
clearInterval(stateInterval);
for (const [, scrapExec] of scrapExecutors) {
await scrapExec.stop();
}
await screenReader.dispose();
await dashboard.stop();
await tradeMonitor.stop();

View file

@ -0,0 +1,115 @@
import { logger } from '../util/logger.js';
const ROWS = 5;
const COLS = 12;
interface PlacedItem {
row: number;
col: number;
w: number;
h: number;
}
export class InventoryTracker {
private grid: boolean[][];
private items: PlacedItem[] = [];
constructor() {
this.grid = Array.from({ length: ROWS }, () => Array(COLS).fill(false));
}
/** Initialize from a grid scan result (occupied cells + detected items). */
initFromScan(cells: boolean[][], items: { row: number; col: number; w: number; h: number }[]): void {
// Reset
for (let r = 0; r < ROWS; r++) {
this.grid[r].fill(false);
}
this.items = [];
// Mark occupied cells from scan
for (let r = 0; r < Math.min(cells.length, ROWS); r++) {
for (let c = 0; c < Math.min(cells[r].length, COLS); c++) {
this.grid[r][c] = cells[r][c];
}
}
// Record detected items
for (const item of items) {
this.items.push({ row: item.row, col: item.col, w: item.w, h: item.h });
}
logger.info({ occupied: ROWS * COLS - this.freeCells, items: this.items.length, free: this.freeCells }, 'Inventory initialized from scan');
}
/** Try to place an item of size w×h. Column-first to match game's left-priority placement. */
tryPlace(w: number, h: number): { row: number; col: number } | null {
for (let col = 0; col <= COLS - w; col++) {
for (let row = 0; row <= ROWS - h; row++) {
if (this.fits(row, col, w, h)) {
this.place(row, col, w, h);
logger.info({ row, col, w, h, free: this.freeCells }, 'Item placed in inventory');
return { row, col };
}
}
}
return null;
}
/** Check if an item of size w×h can fit anywhere. */
canFit(w: number, h: number): boolean {
for (let col = 0; col <= COLS - w; col++) {
for (let row = 0; row <= ROWS - h; row++) {
if (this.fits(row, col, w, h)) return true;
}
}
return false;
}
/** Get all placed items. */
getItems(): PlacedItem[] {
return [...this.items];
}
/** Get a copy of the occupancy grid. */
getGrid(): boolean[][] {
return this.grid.map(row => [...row]);
}
/** Clear entire grid. */
clear(): void {
for (let r = 0; r < ROWS; r++) {
this.grid[r].fill(false);
}
this.items = [];
logger.info('Inventory cleared');
}
/** Get remaining free cells count. */
get freeCells(): number {
let count = 0;
for (let r = 0; r < ROWS; r++) {
for (let c = 0; c < COLS; c++) {
if (!this.grid[r][c]) count++;
}
}
return count;
}
private fits(row: number, col: number, w: number, h: number): boolean {
for (let r = row; r < row + h; r++) {
for (let c = col; c < col + w; c++) {
if (this.grid[r][c]) return false;
}
}
return true;
}
private place(row: number, col: number, w: number, h: number): void {
for (let r = row; r < row + h; r++) {
for (let c = col; c < col + w; c++) {
this.grid[r][c] = true;
}
}
this.items.push({ row, col, w, h });
}
}

View file

@ -19,6 +19,9 @@ export class ClientLogWatcher extends EventEmitter {
private fileOffset: number = 0;
private logPath: string;
/** Last area we transitioned into (from [SCENE] Set Source or "You have entered"). */
currentArea: string = '';
constructor(logPath: string) {
super();
this.logPath = logPath;
@ -71,10 +74,25 @@ export class ClientLogWatcher extends EventEmitter {
private parseLine(line: string): void {
this.emit('line', line);
// Area transition: "You have entered Hideout"
// Area transition: "[SCENE] Set Source [Shoreline Hideout]"
// POE2 uses this format instead of "You have entered ..."
const sceneMatch = line.match(/\[SCENE\] Set Source \[(.+?)\]/);
if (sceneMatch) {
const area = sceneMatch[1];
// Skip the "(null)" transition — it's an intermediate state before the real area loads
if (area !== '(null)') {
this.currentArea = area;
logger.info({ area }, 'Area entered');
this.emit('area-entered', area);
}
return;
}
// Legacy fallback: "You have entered Hideout"
const areaMatch = line.match(/You have entered (.+?)\.?$/);
if (areaMatch) {
const area = areaMatch[1];
this.currentArea = area;
logger.info({ area }, 'Area entered');
this.emit('area-entered', area);
return;

View file

@ -3,7 +3,7 @@ import { chromium, type Browser, type BrowserContext, type Page, type WebSocket
import { SELECTORS } from './selectors.js';
import { logger } from '../util/logger.js';
import { sleep } from '../util/sleep.js';
import type { Config } from '../types.js';
import type { Config, TradeItem } from '../types.js';
// Stealth JS injected into every page to avoid Playwright detection
const STEALTH_SCRIPT = `
@ -226,6 +226,40 @@ export class TradeMonitor extends EventEmitter {
}
}
async openScrapPage(tradeUrl: string): Promise<{ page: Page; items: TradeItem[] }> {
if (!this.context) throw new Error('Browser not started');
const page = await this.context.newPage();
const items: TradeItem[] = [];
page.on('response', async (response) => {
if (response.url().includes('/api/trade2/fetch/')) {
try {
const json = await response.json();
if (json.result && Array.isArray(json.result)) {
for (const r of json.result) {
items.push({
id: r.id,
w: r.item?.w ?? 1,
h: r.item?.h ?? 1,
stashX: r.listing?.stash?.x ?? 0,
stashY: r.listing?.stash?.y ?? 0,
account: r.listing?.account?.name ?? '',
});
}
}
} catch {
// Response may not be JSON
}
}
});
await page.goto(tradeUrl, { waitUntil: 'networkidle' });
await sleep(2000); // ensure API response received
logger.info({ url: tradeUrl, itemCount: items.length }, 'Scrap page opened');
return { page, items };
}
extractSearchId(url: string): string {
const cleaned = url.replace(/\/live\/?$/, '');
const parts = cleaned.split('/');

View file

@ -56,3 +56,16 @@ export interface LogEvent {
type: 'area-entered' | 'whisper-received' | 'trade-accepted' | 'unknown';
data: Record<string, string>;
}
export type LinkMode = 'live' | 'scrap';
export type ScrapState = 'IDLE' | 'TRAVELING' | 'BUYING' | 'SALVAGING' | 'STORING' | 'FAILED';
export interface TradeItem {
id: string;
w: number;
h: number;
stashX: number;
stashY: number;
account: string;
}

View file

@ -53,6 +53,8 @@ static class Daemon
var ocrHandler = new OcrHandler(tessEngine);
var gridHandler = new GridHandler();
var detectGridHandler = new DetectGridHandler();
var templateMatchHandler = new TemplateMatchHandler();
var pythonBridge = new PythonOcrBridge();
// Main loop: read one JSON line, handle, write one JSON line
string? line;
@ -72,15 +74,20 @@ static class Daemon
object response = request.Cmd?.ToLowerInvariant() switch
{
"ocr" when request.Engine is "easyocr"
=> pythonBridge.HandleOcr(request, request.Engine),
"ocr" => ocrHandler.HandleOcr(request),
"screenshot" => ocrHandler.HandleScreenshot(request),
"capture" => ocrHandler.HandleCapture(request),
"snapshot" => ocrHandler.HandleSnapshot(request),
"diff-ocr" when request.Engine is "easyocr"
=> HandleDiffOcrPython(ocrHandler, pythonBridge, request),
"diff-ocr" => ocrHandler.HandleDiffOcr(request),
"test" => ocrHandler.HandleTest(request),
"tune" => ocrHandler.HandleTune(request),
"grid" => gridHandler.HandleGrid(request),
"detect-grid" => detectGridHandler.HandleDetectGrid(request),
"match-template" => templateMatchHandler.HandleTemplateMatch(request),
_ => new ErrorResponse($"Unknown command: {request.Cmd}"),
};
WriteResponse(response);
@ -91,9 +98,59 @@ static class Daemon
}
}
pythonBridge.Dispose();
return 0;
}
private static object HandleDiffOcrPython(OcrHandler ocrHandler, PythonOcrBridge pythonBridge, Request request)
{
var sw = System.Diagnostics.Stopwatch.StartNew();
var p = request.Threshold > 0
? new DiffOcrParams { DiffThresh = request.Threshold }
: new DiffOcrParams();
var cropResult = ocrHandler.DiffCrop(request, p);
if (cropResult == null)
return new OcrResponse { Text = "", Lines = [] };
var (cropped, refCropped, current, region) = cropResult.Value;
using var _current = current;
using var _refCropped = refCropped;
var diffMs = sw.ElapsedMilliseconds;
// Save crop to requested path if provided
if (!string.IsNullOrEmpty(request.Path))
{
var dir = Path.GetDirectoryName(request.Path);
if (!string.IsNullOrEmpty(dir) && !Directory.Exists(dir))
Directory.CreateDirectory(dir);
cropped.Save(request.Path, ImageUtils.GetImageFormat(request.Path));
}
// Send crop to Python via base64 over pipe (no temp file I/O)
sw.Restart();
var ocrResult = pythonBridge.OcrFromBitmap(cropped, request.Engine!);
cropped.Dispose();
var ocrMs = sw.ElapsedMilliseconds;
Console.Error.WriteLine($" diff-ocr-python: diff={diffMs}ms ocr={ocrMs}ms total={diffMs + ocrMs}ms crop={region.Width}x{region.Height}");
// Offset word coordinates to screen space
foreach (var line in ocrResult.Lines)
foreach (var word in line.Words)
{
word.X += region.X;
word.Y += region.Y;
}
return new DiffOcrResponse
{
Text = ocrResult.Text,
Lines = ocrResult.Lines,
Region = region,
};
}
private static void WriteResponse(object response)
{
var json = JsonSerializer.Serialize(response, JsonOptions);

View file

@ -69,12 +69,13 @@ class GridHandler
templateSum += templateGray[ty * templateW + tx];
innerCount++;
}
double tmplMean = innerCount > 0 ? (double)templateSum / innerCount : 0;
// Threshold for mean absolute difference — default 6
double diffThreshold = req.Threshold > 0 ? req.Threshold : 2;
// Threshold for brightness-normalized MAD
double diffThreshold = req.Threshold > 0 ? req.Threshold : 5;
bool debug = req.Debug;
if (debug) Console.Error.WriteLine($"Grid: {cols}x{rows}, cellW={cellW:F1}, cellH={cellH:F1}, border={border}, threshold={diffThreshold}");
if (debug) Console.Error.WriteLine($"Grid: {cols}x{rows}, cellW={cellW:F1}, cellH={cellH:F1}, border={border}, threshold={diffThreshold}, tmplMean={tmplMean:F1}");
var cells = new List<List<bool>>();
for (int row = 0; row < rows; row++)
@ -88,20 +89,29 @@ class GridHandler
int cw = (int)Math.Min(cellW, captureW - cx0);
int ch = (int)Math.Min(cellH, bitmap.Height - cy0);
// Compare inner pixels of cell vs template
long diffSum = 0;
int compared = 0;
int innerW = Math.Min(cw, templateW) - border;
int innerH = Math.Min(ch, templateH) - border;
// First pass: compute cell region mean brightness
long cellSum = 0;
int compared = 0;
for (int py = border; py < innerH; py++)
{
for (int px = border; px < innerW; px++)
{
int cellVal = captureGray[(cy0 + py) * captureW + (cx0 + px)];
int tmplVal = templateGray[py * templateW + px];
diffSum += Math.Abs(cellVal - tmplVal);
cellSum += captureGray[(cy0 + py) * captureW + (cx0 + px)];
compared++;
}
double cellMean = compared > 0 ? (double)cellSum / compared : 0;
double offset = cellMean - tmplMean;
// Second pass: MAD on brightness-normalized values
long diffSum = 0;
for (int py = border; py < innerH; py++)
for (int px = border; px < innerW; px++)
{
double cellVal = captureGray[(cy0 + py) * captureW + (cx0 + px)];
double tmplVal = templateGray[py * templateW + px];
diffSum += (long)Math.Abs(cellVal - tmplVal - offset);
}
double meanDiff = compared > 0 ? (double)diffSum / compared : 0;
bool occupied = meanDiff > diffThreshold;

View file

@ -39,6 +39,9 @@ class Request
[JsonPropertyName("targetCol")]
public int TargetCol { get; set; } = -1;
[JsonPropertyName("engine")]
public string? Engine { get; set; }
}
class RegionRect
@ -209,6 +212,30 @@ class DetectGridResponse
public double CellHeight { get; set; }
}
class TemplateMatchResponse
{
[JsonPropertyName("ok")]
public bool Ok => true;
[JsonPropertyName("found")]
public bool Found { get; set; }
[JsonPropertyName("x")]
public int X { get; set; }
[JsonPropertyName("y")]
public int Y { get; set; }
[JsonPropertyName("width")]
public int Width { get; set; }
[JsonPropertyName("height")]
public int Height { get; set; }
[JsonPropertyName("confidence")]
public double Confidence { get; set; }
}
class DiffOcrParams
{
[JsonPropertyName("diffThresh")]

View file

@ -3,6 +3,8 @@ namespace OcrDaemon;
using System.Drawing;
using System.Drawing.Imaging;
using System.Runtime.InteropServices;
using System.Threading;
using System.Threading.Tasks;
using System.Text.Json;
using OpenCvSharp;
using OpenCvSharp.Extensions;
@ -61,17 +63,20 @@ class OcrHandler(TesseractEngine engine)
? new DiffOcrParams { DiffThresh = req.Threshold }
: new DiffOcrParams());
public object HandleDiffOcr(Request req, DiffOcrParams p)
/// <summary>
/// Diff detection + crop only. Returns the raw tooltip crop bitmap and region,
/// or null if no tooltip detected. Caller is responsible for disposing the bitmap.
/// </summary>
public (Bitmap cropped, Bitmap refCropped, Bitmap current, RegionRect region)? DiffCrop(Request req, DiffOcrParams p)
{
if (_referenceFrame == null)
return new ErrorResponse("No reference snapshot stored. Send 'snapshot' first.");
return null;
using var current = ScreenCapture.CaptureOrLoad(req.File, null);
var current = ScreenCapture.CaptureOrLoad(req.File, null);
int w = Math.Min(_referenceFrame.Width, current.Width);
int h = Math.Min(_referenceFrame.Height, current.Height);
// Get raw pixels for both frames
var refData = _referenceFrame.LockBits(new Rectangle(0, 0, w, h), ImageLockMode.ReadOnly, PixelFormat.Format32bppArgb);
byte[] refPx = new byte[refData.Stride * h];
Marshal.Copy(refData.Scan0, refPx, 0, refPx.Length);
@ -83,49 +88,34 @@ class OcrHandler(TesseractEngine engine)
Marshal.Copy(curData.Scan0, curPx, 0, curPx.Length);
current.UnlockBits(curData);
// Detect pixels that got DARKER (tooltip = dark overlay).
// This filters out item highlight glow (brighter) and cursor changes.
int diffThresh = p.DiffThresh;
bool[] changed = new bool[w * h];
int totalChanged = 0;
for (int y = 0; y < h; y++)
// Pass 1: parallel row diff — compute rowCounts[] directly, no changed[] array
int[] rowCounts = new int[h];
Parallel.For(0, h, y =>
{
int count = 0;
int rowOffset = y * stride;
for (int x = 0; x < w; x++)
{
int i = y * stride + x * 4;
int darkerB = refPx[i] - curPx[i];
int darkerG = refPx[i + 1] - curPx[i + 1];
int darkerR = refPx[i + 2] - curPx[i + 2];
if (darkerB + darkerG + darkerR > diffThresh)
{
changed[y * w + x] = true;
totalChanged++;
}
}
int i = rowOffset + x * 4;
int darker = (refPx[i] - curPx[i]) + (refPx[i + 1] - curPx[i + 1]) + (refPx[i + 2] - curPx[i + 2]);
if (darker > diffThresh)
count++;
}
rowCounts[y] = count;
});
bool debug = req.Debug;
int totalChanged = 0;
for (int y = 0; y < h; y++) totalChanged += rowCounts[y];
if (totalChanged == 0)
{
if (debug) Console.Error.WriteLine(" diff-ocr: no changes detected");
return new OcrResponse { Text = "", Lines = [] };
current.Dispose();
return null;
}
// Two-pass density detection:
// Pass 1: Find row range using full-width row counts
// Pass 2: Find column range using only pixels within detected row range
// This makes the column threshold relative to tooltip height, not screen height.
int maxGap = p.MaxGap;
// Pass 1: count changed pixels per row, find longest active run
int[] rowCounts = new int[h];
for (int y = 0; y < h; y++)
for (int x = 0; x < w; x++)
if (changed[y * w + x])
rowCounts[y]++;
int rowThresh = w / p.RowThreshDiv;
int bestRowStart = 0, bestRowEnd = 0, bestRowLen = 0;
int curRowStart = -1, lastActiveRow = -1;
@ -149,12 +139,46 @@ class OcrHandler(TesseractEngine engine)
if (len > bestRowLen) { bestRowStart = curRowStart; bestRowEnd = lastActiveRow; bestRowLen = len; }
}
// Pass 2: count changed pixels per column, but only within the detected row range
// Pass 2: parallel column diff — only within the row range, recompute from raw pixels
int[] colCounts = new int[w];
int rowRangeLen = bestRowEnd - bestRowStart + 1;
if (rowRangeLen <= 200)
{
// Small range: serial is faster than Parallel overhead
for (int y = bestRowStart; y <= bestRowEnd; y++)
{
int rowOffset = y * stride;
for (int x = 0; x < w; x++)
if (changed[y * w + x])
{
int i = rowOffset + x * 4;
int darker = (refPx[i] - curPx[i]) + (refPx[i + 1] - curPx[i + 1]) + (refPx[i + 2] - curPx[i + 2]);
if (darker > diffThresh)
colCounts[x]++;
}
}
}
else
{
Parallel.For(bestRowStart, bestRowEnd + 1,
() => new int[w],
(y, _, localCols) =>
{
int rowOffset = y * stride;
for (int x = 0; x < w; x++)
{
int i = rowOffset + x * 4;
int darker = (refPx[i] - curPx[i]) + (refPx[i + 1] - curPx[i + 1]) + (refPx[i + 2] - curPx[i + 2]);
if (darker > diffThresh)
localCols[x]++;
}
return localCols;
},
localCols =>
{
for (int x = 0; x < w; x++)
Interlocked.Add(ref colCounts[x], localCols[x]);
});
}
int tooltipHeight = bestRowEnd - bestRowStart + 1;
int colThresh = tooltipHeight / p.ColThreshDiv;
@ -181,13 +205,13 @@ class OcrHandler(TesseractEngine engine)
if (len > bestColLen) { bestColStart = curColStart; bestColEnd = lastActiveCol; bestColLen = len; }
}
// Log density detection results
Console.Error.WriteLine($" diff-ocr: changed={totalChanged} rows={bestRowStart}-{bestRowEnd}({bestRowLen}) cols={bestColStart}-{bestColEnd}({bestColLen}) rowThresh={rowThresh} colThresh={colThresh}");
if (bestRowLen < 50 || bestColLen < 50)
{
Console.Error.WriteLine($" diff-ocr: no tooltip-sized region found (rows={bestRowLen}, cols={bestColLen})");
return new OcrResponse { Text = "", Lines = [] };
current.Dispose();
return null;
}
int minX = bestColStart;
@ -195,13 +219,9 @@ class OcrHandler(TesseractEngine engine)
int maxX = Math.Min(bestColEnd, w - 1);
int maxY = Math.Min(bestRowEnd, h - 1);
// Dynamic right-edge trim: if the rightmost columns are much sparser than
// the tooltip body, trim them. This handles the ~5% of cases where ambient
// noise extends the detected region slightly on the right.
int colSpan = maxX - minX + 1;
if (colSpan > 100)
{
// Compute median column density in the middle 50% of the range
int q1 = minX + colSpan / 4;
int q3 = minX + colSpan * 3 / 4;
long midSum = 0;
@ -209,21 +229,38 @@ class OcrHandler(TesseractEngine engine)
for (int x = q1; x <= q3; x++) { midSum += colCounts[x]; midCount++; }
double avgMidDensity = (double)midSum / midCount;
double cutoff = avgMidDensity * p.TrimCutoff;
// Trim from right while below cutoff
while (maxX > minX + 100 && colCounts[maxX] < cutoff)
maxX--;
}
int rw = maxX - minX + 1;
int rh = maxY - minY + 1;
if (debug) Console.Error.WriteLine($" diff-ocr: tooltip region ({minX},{minY}) {rw}x{rh}");
var cropped = CropFromBytes(curPx, stride, minX, minY, rw, rh);
var refCropped = CropFromBytes(refPx, stride, minX, minY, rw, rh);
var region = new RegionRect { X = minX, Y = minY, Width = rw, Height = rh };
// Crop tooltip region from both current and reference frames
using var cropped = current.Clone(new Rectangle(minX, minY, rw, rh), PixelFormat.Format32bppArgb);
using var refCropped = _referenceFrame.Clone(new Rectangle(minX, minY, rw, rh), PixelFormat.Format32bppArgb);
Console.Error.WriteLine($" diff-ocr: tooltip region ({minX},{minY}) {rw}x{rh}");
// Save before/after preprocessing images if path is provided
return (cropped, refCropped, current, region);
}
public object HandleDiffOcr(Request req, DiffOcrParams p)
{
if (_referenceFrame == null)
return new ErrorResponse("No reference snapshot stored. Send 'snapshot' first.");
var cropResult = DiffCrop(req, p);
if (cropResult == null)
return new OcrResponse { Text = "", Lines = [] };
var (cropped, refCropped, current, region) = cropResult.Value;
using var _current = current;
using var _cropped = cropped;
using var _refCropped = refCropped;
bool debug = req.Debug;
int minX = region.X, minY = region.Y, rw = region.Width, rh = region.Height;
// Save raw crop if path is provided
if (!string.IsNullOrEmpty(req.Path))
{
var dir = Path.GetDirectoryName(req.Path);
@ -634,6 +671,24 @@ class OcrHandler(TesseractEngine engine)
};
}
/// <summary>
/// Fast crop from raw pixel bytes — avoids slow GDI+ Bitmap.Clone().
/// </summary>
private static Bitmap CropFromBytes(byte[] px, int srcStride, int cropX, int cropY, int cropW, int cropH)
{
var bmp = new Bitmap(cropW, cropH, PixelFormat.Format32bppArgb);
var data = bmp.LockBits(new Rectangle(0, 0, cropW, cropH), ImageLockMode.WriteOnly, PixelFormat.Format32bppArgb);
int dstStride = data.Stride;
int rowBytes = cropW * 4;
for (int y = 0; y < cropH; y++)
{
int srcOffset = (cropY + y) * srcStride + cropX * 4;
Marshal.Copy(px, srcOffset, data.Scan0 + y * dstStride, rowBytes);
}
bmp.UnlockBits(data);
return bmp;
}
private static double LevenshteinSimilarity(string a, string b)
{
a = a.ToLowerInvariant();

View file

@ -0,0 +1,193 @@
namespace OcrDaemon;
using System.Diagnostics;
using System.Drawing;
using System.Text.Json;
using System.Text.Json.Serialization;
using SdImageFormat = System.Drawing.Imaging.ImageFormat;
/// <summary>
/// Manages a persistent Python subprocess for EasyOCR / PaddleOCR.
/// Lazy-starts on first request; reuses the process for subsequent calls.
/// Same stdin/stdout JSON-per-line protocol as the C# daemon itself.
/// </summary>
class PythonOcrBridge : IDisposable
{
private static readonly JsonSerializerOptions JsonOptions = new()
{
PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
};
private Process? _proc;
private readonly string _daemonScript;
private readonly string _pythonExe;
private readonly object _lock = new();
public PythonOcrBridge()
{
// Resolve paths relative to this exe
var exeDir = AppContext.BaseDirectory;
// exeDir = tools/OcrDaemon/bin/Release/net8.0-.../
// Walk up 4 levels to tools/
var toolsDir = Path.GetFullPath(Path.Combine(exeDir, "..", "..", "..", ".."));
_daemonScript = Path.GetFullPath(Path.Combine(toolsDir, "python-ocr", "daemon.py"));
// Use the venv Python if it exists, otherwise fall back to system python
var venvPython = Path.GetFullPath(Path.Combine(toolsDir, "python-ocr", ".venv", "Scripts", "python.exe"));
_pythonExe = File.Exists(venvPython) ? venvPython : "python";
}
/// <summary>
/// Run OCR on a screen region using the specified Python engine.
/// Captures screenshot, saves to temp file, sends to Python, returns OcrResponse.
/// </summary>
public object HandleOcr(Request req, string engine)
{
var tmpPath = Path.Combine(Path.GetTempPath(), $"ocr_{Guid.NewGuid():N}.png");
try
{
using var bitmap = ScreenCapture.CaptureOrLoad(req.File, req.Region);
bitmap.Save(tmpPath, SdImageFormat.Png);
return OcrFromFile(tmpPath, engine);
}
finally
{
try { File.Delete(tmpPath); } catch { /* ignore */ }
}
}
/// <summary>
/// Run OCR on an already-saved image file via the Python engine.
/// </summary>
public OcrResponse OcrFromFile(string imagePath, string engine)
{
EnsureRunning();
var pyReq = new { cmd = "ocr", engine, imagePath };
return SendPythonRequest(pyReq);
}
/// <summary>
/// Run OCR on a bitmap via the Python engine (base64 PNG over pipe, no temp file).
/// </summary>
public OcrResponse OcrFromBitmap(Bitmap bitmap, string engine)
{
EnsureRunning();
using var ms = new MemoryStream();
bitmap.Save(ms, SdImageFormat.Png);
var imageBase64 = Convert.ToBase64String(ms.ToArray());
var pyReq = new { cmd = "ocr", engine, imageBase64 };
return SendPythonRequest(pyReq);
}
private OcrResponse SendPythonRequest(object pyReq)
{
var json = JsonSerializer.Serialize(pyReq, JsonOptions);
string responseLine;
lock (_lock)
{
_proc!.StandardInput.WriteLine(json);
_proc.StandardInput.Flush();
responseLine = _proc.StandardOutput.ReadLine()
?? throw new Exception("Python daemon returned null");
}
var resp = JsonSerializer.Deserialize<PythonResponse>(responseLine, JsonOptions);
if (resp == null)
throw new Exception("Failed to parse Python OCR response");
if (!resp.Ok)
throw new Exception(resp.Error ?? "Python OCR failed");
return new OcrResponse
{
Text = resp.Text ?? "",
Lines = resp.Lines ?? [],
};
}
private void EnsureRunning()
{
if (_proc != null && !_proc.HasExited)
return;
_proc?.Dispose();
_proc = null;
if (!File.Exists(_daemonScript))
throw new Exception($"Python OCR daemon not found at {_daemonScript}");
Console.Error.WriteLine($"Spawning Python OCR daemon: {_pythonExe} {_daemonScript}");
_proc = new Process
{
StartInfo = new ProcessStartInfo
{
FileName = _pythonExe,
Arguments = $"\"{_daemonScript}\"",
UseShellExecute = false,
RedirectStandardInput = true,
RedirectStandardOutput = true,
RedirectStandardError = true,
CreateNoWindow = true,
}
};
_proc.ErrorDataReceived += (_, e) =>
{
if (!string.IsNullOrEmpty(e.Data))
Console.Error.WriteLine($"[python-ocr] {e.Data}");
};
_proc.Start();
_proc.BeginErrorReadLine();
// Wait for ready signal (up to 30s for first model load)
var readyLine = _proc.StandardOutput.ReadLine();
if (readyLine == null)
throw new Exception("Python OCR daemon exited before ready signal");
var ready = JsonSerializer.Deserialize<PythonResponse>(readyLine, JsonOptions);
if (ready?.Ready != true)
throw new Exception($"Python OCR daemon did not send ready signal: {readyLine}");
Console.Error.WriteLine("Python OCR daemon ready");
}
public void Dispose()
{
if (_proc != null && !_proc.HasExited)
{
try
{
_proc.StandardInput.Close();
_proc.WaitForExit(3000);
if (!_proc.HasExited) _proc.Kill();
}
catch { /* ignore */ }
}
_proc?.Dispose();
_proc = null;
}
private class PythonResponse
{
[JsonPropertyName("ok")]
public bool Ok { get; set; }
[JsonPropertyName("ready")]
public bool? Ready { get; set; }
[JsonPropertyName("text")]
public string? Text { get; set; }
[JsonPropertyName("lines")]
public List<OcrLineResult>? Lines { get; set; }
[JsonPropertyName("error")]
public string? Error { get; set; }
}
}

View file

@ -0,0 +1,60 @@
namespace OcrDaemon;
using System.Drawing;
using System.Drawing.Imaging;
using OpenCvSharp;
using OpenCvSharp.Extensions;
class TemplateMatchHandler
{
public object HandleTemplateMatch(Request req)
{
if (string.IsNullOrEmpty(req.Path))
return new ErrorResponse("match-template command requires 'path' (template image file)");
if (!System.IO.File.Exists(req.Path))
return new ErrorResponse($"Template file not found: {req.Path}");
using var screenshot = ScreenCapture.CaptureOrLoad(req.File, req.Region);
using var screenMat = BitmapConverter.ToMat(screenshot);
using var template = Cv2.ImRead(req.Path, ImreadModes.Color);
if (template.Empty())
return new ErrorResponse($"Failed to load template image: {req.Path}");
// Convert screenshot from BGRA to BGR if needed
using var screenBgr = new Mat();
if (screenMat.Channels() == 4)
Cv2.CvtColor(screenMat, screenBgr, ColorConversionCodes.BGRA2BGR);
else
screenMat.CopyTo(screenBgr);
// Template must fit within screenshot
if (template.Rows > screenBgr.Rows || template.Cols > screenBgr.Cols)
return new TemplateMatchResponse { Found = false };
using var result = new Mat();
Cv2.MatchTemplate(screenBgr, template, result, TemplateMatchModes.CCoeffNormed);
Cv2.MinMaxLoc(result, out _, out double maxVal, out _, out OpenCvSharp.Point maxLoc);
double threshold = req.Threshold > 0 ? req.Threshold / 100.0 : 0.7;
if (maxVal < threshold)
return new TemplateMatchResponse { Found = false, Confidence = maxVal };
// Calculate center coordinates — offset by region origin if provided
int offsetX = req.Region?.X ?? 0;
int offsetY = req.Region?.Y ?? 0;
return new TemplateMatchResponse
{
Found = true,
X = offsetX + maxLoc.X + template.Cols / 2,
Y = offsetY + maxLoc.Y + template.Rows / 2,
Width = template.Cols,
Height = template.Rows,
Confidence = maxVal,
};
}
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.3 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 397 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.6 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 157 KiB

Binary file not shown.

157
tools/python-ocr/daemon.py Normal file
View file

@ -0,0 +1,157 @@
"""
Persistent Python OCR daemon (stdin/stdout JSON-per-line protocol).
Supports EasyOCR engine, lazy-loaded on first use.
Managed as a subprocess by the C# OcrDaemon.
Request: {"cmd": "ocr", "engine": "easyocr", "imagePath": "C:\\temp\\screenshot.png"}
Response: {"ok": true, "text": "...", "lines": [{"text": "...", "words": [...]}]}
"""
import sys
import json
_easyocr_reader = None
def _redirect_stdout_to_stderr():
"""Redirect stdout to stderr so library print() calls don't corrupt the JSON protocol."""
real_stdout = sys.stdout
sys.stdout = sys.stderr
return real_stdout
def _restore_stdout(real_stdout):
sys.stdout = real_stdout
def get_easyocr():
global _easyocr_reader
if _easyocr_reader is None:
sys.stderr.write("Loading EasyOCR model...\n")
sys.stderr.flush()
# EasyOCR prints download progress to stdout — redirect during load
real_stdout = _redirect_stdout_to_stderr()
try:
import easyocr
_easyocr_reader = easyocr.Reader(["en"], gpu=True)
finally:
_restore_stdout(real_stdout)
sys.stderr.write("EasyOCR model loaded.\n")
sys.stderr.flush()
return _easyocr_reader
def bbox_to_rect(corners):
"""Convert 4-corner bbox [[x1,y1],[x2,y2],[x3,y3],[x4,y4]] to axis-aligned {x, y, width, height}."""
xs = [c[0] for c in corners]
ys = [c[1] for c in corners]
x = int(min(xs))
y = int(min(ys))
return x, y, int(max(xs)) - x, int(max(ys)) - y
def split_into_words(text, x, y, width, height):
"""Split a detection's text into individual words with proportional bounding boxes."""
parts = text.split()
if len(parts) <= 1:
return [{"text": text.strip(), "x": x, "y": y, "width": width, "height": height}]
total_chars = sum(len(p) for p in parts)
if total_chars == 0:
return [{"text": text.strip(), "x": x, "y": y, "width": width, "height": height}]
words = []
cx = x
for part in parts:
w = max(1, int(width * len(part) / total_chars))
words.append({"text": part, "x": cx, "y": y, "width": w, "height": height})
cx += w
return words
def run_easyocr(image_path):
from PIL import Image
import numpy as np
img = np.array(Image.open(image_path))
return run_easyocr_array(img)
def run_easyocr_array(img):
reader = get_easyocr()
# Redirect stdout during inference — easyocr can print warnings
real_stdout = _redirect_stdout_to_stderr()
try:
# batch_size=32: batch GPU recognition of detected text regions
results = reader.readtext(img, batch_size=32)
finally:
_restore_stdout(real_stdout)
# results: [(bbox_4corners, text, conf), ...]
lines = []
all_text_parts = []
for bbox, text, conf in results:
if not text.strip():
continue
x, y, w, h = bbox_to_rect(bbox)
words = split_into_words(text, x, y, w, h)
lines.append({"text": text.strip(), "words": words})
all_text_parts.append(text.strip())
return {"ok": True, "text": "\n".join(all_text_parts), "lines": lines}
def load_image(req):
"""Load image from either imagePath (file) or imageBase64 (base64-encoded PNG)."""
from PIL import Image
import numpy as np
image_base64 = req.get("imageBase64")
if image_base64:
import base64
import io
img_bytes = base64.b64decode(image_base64)
return np.array(Image.open(io.BytesIO(img_bytes)))
image_path = req.get("imagePath")
if image_path:
return np.array(Image.open(image_path))
return None
def handle_request(req):
cmd = req.get("cmd")
if cmd != "ocr":
return {"ok": False, "error": f"Unknown command: {cmd}"}
engine = req.get("engine", "")
img = load_image(req)
if img is None:
return {"ok": False, "error": "Missing imagePath or imageBase64"}
if engine == "easyocr":
return run_easyocr_array(img)
else:
return {"ok": False, "error": f"Unknown engine: {engine}"}
def main():
# Signal ready
sys.stdout.write(json.dumps({"ok": True, "ready": True}) + "\n")
sys.stdout.flush()
for line in sys.stdin:
line = line.strip()
if not line:
continue
try:
req = json.loads(line)
resp = handle_request(req)
except Exception as e:
resp = {"ok": False, "error": str(e)}
sys.stdout.write(json.dumps(resp) + "\n")
sys.stdout.flush()
if __name__ == "__main__":
main()

View file

@ -0,0 +1,3 @@
easyocr
pillow
numpy