work on new crop
1
crop-test-cmd.txt
Normal file
|
|
@ -0,0 +1 @@
|
|||
{"cmd":"crop-test","engine":"diff"}
|
||||
2
crop-test-stderr.txt
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
{"ok":true,"ready":true}
|
||||
{"ok":true,"method":"edge","avgIoU":0.7689866918165986,"results":[{"id":"1","iou":0.9028985507246376,"expected":{"x":0,"y":84,"width":1185,"height":690},"actual":{"x":0,"y":117,"width":1185,"height":623},"deltaTop":33,"deltaLeft":0,"deltaRight":0,"deltaBottom":-34},{"id":"2","iou":0.6861386480207926,"expected":{"x":304,"y":0,"width":679,"height":470},"actual":{"x":428,"y":40,"width":564,"height":474},"deltaTop":40,"deltaLeft":124,"deltaRight":9,"deltaBottom":44},{"id":"3","iou":0.8734518726233722,"expected":{"x":473,"y":334,"width":641,"height":580},"actual":{"x":472,"y":373,"width":609,"height":548},"deltaTop":39,"deltaLeft":-1,"deltaRight":-33,"deltaBottom":7},{"id":"4","iou":0.4827177898385173,"expected":{"x":209,"y":264,"width":888,"height":651},"actual":{"x":0,"y":294,"width":767,"height":634},"deltaTop":30,"deltaLeft":-209,"deltaRight":-330,"deltaBottom":13},{"id":"5","iou":0.8933684252502293,"expected":{"x":763,"y":0,"width":1111,"height":560},"actual":{"x":758,"y":39,"width":1080,"height":523},"deltaTop":39,"deltaLeft":-5,"deltaRight":-36,"deltaBottom":2},{"id":"6","iou":0.9159954398801851,"expected":{"x":1541,"y":154,"width":807,"height":460},"actual":{"x":1486,"y":157,"width":870,"height":460},"deltaTop":3,"deltaLeft":-55,"deltaRight":8,"deltaBottom":3},{"id":"7","iou":0.6283361163784564,"expected":{"x":1921,"y":40,"width":637,"height":330},"actual":{"x":1946,"y":72,"width":447,"height":302},"deltaTop":32,"deltaLeft":25,"deltaRight":-165,"deltaBottom":4}]}
|
||||
0
crop-test-stdout.txt
Normal file
|
|
@ -104,6 +104,22 @@ export interface DiffOcrParams {
|
|||
ocr?: OcrParams;
|
||||
}
|
||||
|
||||
export type TooltipMethod = 'diff' | 'edge';
|
||||
|
||||
export interface EdgeCropParams {
|
||||
cannyLow?: number;
|
||||
cannyHigh?: number;
|
||||
minLineLength?: number;
|
||||
roiSize?: number;
|
||||
densityThreshold?: number;
|
||||
ocrPad?: number;
|
||||
}
|
||||
|
||||
export interface EdgeOcrParams {
|
||||
crop?: EdgeCropParams;
|
||||
ocr?: OcrParams;
|
||||
}
|
||||
|
||||
interface DaemonRequest {
|
||||
cmd: string;
|
||||
region?: Region;
|
||||
|
|
@ -116,6 +132,9 @@ interface DaemonRequest {
|
|||
engine?: string;
|
||||
preprocess?: string;
|
||||
params?: DiffOcrParams;
|
||||
edgeParams?: EdgeOcrParams;
|
||||
cursorX?: number;
|
||||
cursorY?: number;
|
||||
}
|
||||
|
||||
interface DaemonResponse {
|
||||
|
|
@ -236,6 +255,24 @@ export class OcrDaemon {
|
|||
};
|
||||
}
|
||||
|
||||
async edgeOcr(savePath?: string, region?: Region, engine?: OcrEngine, preprocess?: OcrPreprocess, edgeParams?: EdgeOcrParams, cursorX?: number, cursorY?: number): Promise<DiffOcrResponse> {
|
||||
const req: DaemonRequest = { cmd: 'edge-ocr' };
|
||||
if (savePath) req.path = savePath;
|
||||
if (region) req.region = region;
|
||||
if (engine && engine !== 'tesseract') req.engine = engine;
|
||||
if (preprocess) req.preprocess = preprocess;
|
||||
if (edgeParams && Object.keys(edgeParams).length > 0) req.edgeParams = edgeParams;
|
||||
if (cursorX != null) req.cursorX = cursorX;
|
||||
if (cursorY != null) req.cursorY = cursorY;
|
||||
const timeout = (engine && engine !== 'tesseract') ? 120_000 : CAPTURE_TIMEOUT;
|
||||
const resp = await this.sendWithRetry(req, timeout);
|
||||
return {
|
||||
text: resp.text ?? '',
|
||||
lines: resp.lines ?? [],
|
||||
region: resp.region,
|
||||
};
|
||||
}
|
||||
|
||||
async saveScreenshot(path: string, region?: Region): Promise<void> {
|
||||
const req: DaemonRequest = { cmd: 'screenshot', path };
|
||||
if (region) req.region = region;
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
import { mkdir } from 'fs/promises';
|
||||
import { join } from 'path';
|
||||
import { logger } from '../util/logger.js';
|
||||
import { OcrDaemon, type OcrResponse, type OcrEngine, type OcrPreprocess, type DiffOcrParams, type DiffCropParams, type OcrParams, type DiffOcrResponse, type TemplateMatchResult } from './OcrDaemon.js';
|
||||
import { OcrDaemon, type OcrResponse, type OcrEngine, type OcrPreprocess, type DiffOcrParams, type DiffCropParams, type OcrParams, type DiffOcrResponse, type TemplateMatchResult, type TooltipMethod, type EdgeOcrParams } from './OcrDaemon.js';
|
||||
import { GridReader, type GridLayout, type CellCoord } from './GridReader.js';
|
||||
import type { Region } from '../types.js';
|
||||
|
||||
|
|
@ -13,7 +13,9 @@ export interface OcrSettings {
|
|||
engine: OcrEngine;
|
||||
screenPreprocess: OcrPreprocess;
|
||||
tooltipPreprocess: OcrPreprocess;
|
||||
tooltipMethod: TooltipMethod;
|
||||
tooltipParams: DiffOcrParams;
|
||||
edgeParams: EdgeOcrParams;
|
||||
saveDebugImages: boolean;
|
||||
}
|
||||
|
||||
|
|
@ -24,10 +26,15 @@ export class ScreenReader {
|
|||
engine: 'easyocr',
|
||||
screenPreprocess: 'none',
|
||||
tooltipPreprocess: 'tophat',
|
||||
tooltipMethod: 'diff',
|
||||
tooltipParams: {
|
||||
crop: { diffThresh: 10 },
|
||||
ocr: { kernelSize: 21 },
|
||||
},
|
||||
edgeParams: {
|
||||
crop: {},
|
||||
ocr: { kernelSize: 21 },
|
||||
},
|
||||
saveDebugImages: true,
|
||||
};
|
||||
|
||||
|
|
@ -235,12 +242,16 @@ export class ScreenReader {
|
|||
// ── Snapshot / Diff-OCR (for tooltip reading) ──────────────────────
|
||||
|
||||
async snapshot(): Promise<void> {
|
||||
if (this.settings.tooltipMethod === 'edge') return; // no reference frame needed
|
||||
await this.daemon.snapshot();
|
||||
}
|
||||
|
||||
async diffOcr(savePath?: string, region?: Region): Promise<DiffOcrResponse> {
|
||||
const { engine, tooltipPreprocess, tooltipParams } = this.settings;
|
||||
const { engine, tooltipPreprocess, tooltipMethod, tooltipParams, edgeParams } = this.settings;
|
||||
const pp = tooltipPreprocess !== 'none' ? tooltipPreprocess : undefined;
|
||||
if (tooltipMethod === 'edge') {
|
||||
return this.daemon.edgeOcr(savePath, region, engine, pp, edgeParams);
|
||||
}
|
||||
return this.daemon.diffOcr(savePath, region, engine, pp, tooltipParams);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -581,6 +581,14 @@
|
|||
</div>
|
||||
</div>
|
||||
|
||||
<div style="margin-bottom:16px">
|
||||
<div class="section-title" style="margin-bottom:6px">Tooltip Method</div>
|
||||
<select id="ocrTooltipMethod" class="mode-select" style="width:100%" onchange="toggleOcrSections()">
|
||||
<option value="diff">Diff Detection</option>
|
||||
<option value="edge">Edge Detection</option>
|
||||
</select>
|
||||
</div>
|
||||
|
||||
<div style="margin-bottom:16px">
|
||||
<div class="section-title" style="margin-bottom:6px">Tooltip Preprocess</div>
|
||||
<select id="ocrTooltipPreprocess" class="mode-select" style="width:100%" onchange="toggleOcrSections()">
|
||||
|
|
@ -590,8 +598,8 @@
|
|||
</select>
|
||||
</div>
|
||||
|
||||
<div style="margin-bottom:16px">
|
||||
<div class="section-title" style="margin-bottom:6px">Crop Detection</div>
|
||||
<div id="diffCropParams" style="margin-bottom:16px">
|
||||
<div class="section-title" style="margin-bottom:6px">Crop Detection (Diff)</div>
|
||||
<div class="settings-grid">
|
||||
<div class="setting-row">
|
||||
<label>Diff Threshold</label>
|
||||
|
|
@ -608,6 +616,32 @@
|
|||
</div>
|
||||
</div>
|
||||
|
||||
<div id="edgeCropParams" style="margin-bottom:16px;display:none">
|
||||
<div class="section-title" style="margin-bottom:6px">Crop Detection (Edge)</div>
|
||||
<div class="settings-grid">
|
||||
<div class="setting-row">
|
||||
<label>Canny Low</label>
|
||||
<input type="number" id="ocrCannyLow" value="50" />
|
||||
</div>
|
||||
<div class="setting-row">
|
||||
<label>Canny High</label>
|
||||
<input type="number" id="ocrCannyHigh" value="150" />
|
||||
</div>
|
||||
<div class="setting-row">
|
||||
<label>Min Line Length</label>
|
||||
<input type="number" id="ocrMinLineLength" value="100" />
|
||||
</div>
|
||||
<div class="setting-row">
|
||||
<label>ROI Size</label>
|
||||
<input type="number" id="ocrRoiSize" value="1400" />
|
||||
</div>
|
||||
<div class="setting-row">
|
||||
<label>Density Threshold</label>
|
||||
<input type="number" id="ocrDensityThreshold" value="0.15" step="0.01" />
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div style="margin-bottom:16px">
|
||||
<div class="section-title" style="margin-bottom:6px">OCR Processing</div>
|
||||
<div class="settings-grid">
|
||||
|
|
@ -1160,7 +1194,24 @@
|
|||
const screenPp = document.getElementById('ocrScreenPreprocess').value;
|
||||
document.getElementById('screenTophatParams').style.display = screenPp === 'tophat' ? '' : 'none';
|
||||
|
||||
const tooltipPp = document.getElementById('ocrTooltipPreprocess').value;
|
||||
const method = document.getElementById('ocrTooltipMethod').value;
|
||||
const isEdge = method === 'edge';
|
||||
|
||||
// Show/hide method-specific crop params
|
||||
document.getElementById('diffCropParams').style.display = isEdge ? 'none' : '';
|
||||
document.getElementById('edgeCropParams').style.display = isEdge ? '' : 'none';
|
||||
|
||||
// Disable bgsub when edge (no reference frame)
|
||||
const ppSelect = document.getElementById('ocrTooltipPreprocess');
|
||||
const bgsubOption = ppSelect.querySelector('option[value="bgsub"]');
|
||||
if (isEdge) {
|
||||
bgsubOption.disabled = true;
|
||||
if (ppSelect.value === 'bgsub') ppSelect.value = 'tophat';
|
||||
} else {
|
||||
bgsubOption.disabled = false;
|
||||
}
|
||||
|
||||
const tooltipPp = ppSelect.value;
|
||||
document.getElementById('tooltipBgsubParams').style.display = tooltipPp === 'bgsub' ? '' : 'none';
|
||||
document.getElementById('tooltipTophatParams').style.display = tooltipPp === 'tophat' ? '' : 'none';
|
||||
|
||||
|
|
@ -1175,11 +1226,20 @@
|
|||
if (!data.ok) return;
|
||||
document.getElementById('ocrEngine').value = data.engine || 'easyocr';
|
||||
document.getElementById('ocrScreenPreprocess').value = data.screenPreprocess || 'none';
|
||||
document.getElementById('ocrTooltipMethod').value = data.tooltipMethod || 'diff';
|
||||
document.getElementById('ocrTooltipPreprocess').value = data.tooltipPreprocess || 'tophat';
|
||||
document.getElementById('ocrSaveDebugImages').checked = data.saveDebugImages !== false;
|
||||
const tp = data.tooltipParams || {};
|
||||
const crop = tp.crop || {};
|
||||
const ocr = tp.ocr || {};
|
||||
// Edge params
|
||||
const ep = data.edgeParams || {};
|
||||
const edgeCrop = ep.crop || {};
|
||||
document.getElementById('ocrCannyLow').value = edgeCrop.cannyLow ?? 50;
|
||||
document.getElementById('ocrCannyHigh').value = edgeCrop.cannyHigh ?? 150;
|
||||
document.getElementById('ocrMinLineLength').value = edgeCrop.minLineLength ?? 100;
|
||||
document.getElementById('ocrRoiSize').value = edgeCrop.roiSize ?? 1400;
|
||||
document.getElementById('ocrDensityThreshold').value = edgeCrop.densityThreshold ?? 0.15;
|
||||
document.getElementById('ocrDiffThresh').value = crop.diffThresh ?? 20;
|
||||
document.getElementById('ocrMaxGap').value = crop.maxGap ?? 20;
|
||||
document.getElementById('ocrTrimCutoff').value = crop.trimCutoff ?? 0.4;
|
||||
|
|
@ -1237,11 +1297,29 @@
|
|||
if (!isNaN(wt)) tooltipParams.ocr.widthThs = wt;
|
||||
}
|
||||
|
||||
const tooltipMethod = document.getElementById('ocrTooltipMethod').value;
|
||||
|
||||
const edgeParams = {
|
||||
crop: {
|
||||
cannyLow: parseInt(document.getElementById('ocrCannyLow').value) || 50,
|
||||
cannyHigh: parseInt(document.getElementById('ocrCannyHigh').value) || 150,
|
||||
minLineLength: parseInt(document.getElementById('ocrMinLineLength').value) || 100,
|
||||
roiSize: parseInt(document.getElementById('ocrRoiSize').value) || 1400,
|
||||
densityThreshold: parseFloat(document.getElementById('ocrDensityThreshold').value) || 0.15,
|
||||
},
|
||||
ocr: {
|
||||
upscale: parseInt(document.getElementById('ocrUpscale').value) || 2,
|
||||
kernelSize: parseInt(document.getElementById('ocrTooltipKernel').value) || 21,
|
||||
},
|
||||
};
|
||||
|
||||
const body = {
|
||||
engine,
|
||||
screenPreprocess: screenPp,
|
||||
tooltipMethod,
|
||||
tooltipPreprocess: tooltipPp,
|
||||
tooltipParams,
|
||||
edgeParams,
|
||||
saveDebugImages: document.getElementById('ocrSaveDebugImages').checked,
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@ import { sleep } from '../../util/sleep.js';
|
|||
import { GRID_LAYOUTS } from '../../game/GridReader.js';
|
||||
import type { Bot } from '../../bot/Bot.js';
|
||||
import type { Server } from '../Server.js';
|
||||
import type { OcrEngine, OcrPreprocess, DiffOcrParams } from '../../game/OcrDaemon.js';
|
||||
import type { OcrEngine, OcrPreprocess, DiffOcrParams, TooltipMethod, EdgeOcrParams } from '../../game/OcrDaemon.js';
|
||||
import type { OcrSettings } from '../../game/ScreenReader.js';
|
||||
|
||||
export function debugRoutes(bot: Bot, server: Server): Router {
|
||||
|
|
@ -30,7 +30,9 @@ export function debugRoutes(bot: Bot, server: Server): Router {
|
|||
if (body.engine && ['tesseract', 'easyocr', 'paddleocr'].includes(body.engine)) s.engine = body.engine;
|
||||
if (body.screenPreprocess && ['none', 'bgsub', 'tophat'].includes(body.screenPreprocess)) s.screenPreprocess = body.screenPreprocess;
|
||||
if (body.tooltipPreprocess && ['none', 'bgsub', 'tophat'].includes(body.tooltipPreprocess)) s.tooltipPreprocess = body.tooltipPreprocess;
|
||||
if (body.tooltipMethod && ['diff', 'edge'].includes(body.tooltipMethod)) s.tooltipMethod = body.tooltipMethod;
|
||||
if (body.tooltipParams != null) s.tooltipParams = body.tooltipParams;
|
||||
if (body.edgeParams != null) s.edgeParams = body.edgeParams;
|
||||
if (body.saveDebugImages != null) s.saveDebugImages = body.saveDebugImages;
|
||||
server.broadcastLog('info', `OCR settings updated: engine=${s.engine} screen=${s.screenPreprocess} tooltip=${s.tooltipPreprocess}`);
|
||||
res.json({ ok: true });
|
||||
|
|
|
|||
|
|
@ -55,6 +55,7 @@ static class Daemon
|
|||
var gridHandler = new GridHandler();
|
||||
var detectGridHandler = new DetectGridHandler();
|
||||
var templateMatchHandler = new TemplateMatchHandler();
|
||||
var edgeCropHandler = new EdgeCropHandler();
|
||||
var pythonBridge = new PythonOcrBridge();
|
||||
|
||||
// Main loop: read one JSON line, handle, write one JSON line
|
||||
|
|
@ -80,8 +81,11 @@ static class Daemon
|
|||
"capture" => ocrHandler.HandleCapture(request),
|
||||
"snapshot" => ocrHandler.HandleSnapshot(request),
|
||||
"diff-ocr" => HandleDiffOcrPipeline(ocrHandler, pythonBridge, request),
|
||||
"edge-ocr" => HandleEdgeOcrPipeline(ocrHandler, edgeCropHandler, pythonBridge, request),
|
||||
"test" => ocrHandler.HandleTest(request),
|
||||
"tune" => ocrHandler.HandleTune(request),
|
||||
"crop-test" => HandleCropTest(ocrHandler, edgeCropHandler, request),
|
||||
"crop-tune" => HandleCropTune(ocrHandler, request),
|
||||
"grid" => gridHandler.HandleGrid(request),
|
||||
"detect-grid" => detectGridHandler.HandleDetectGrid(request),
|
||||
"match-template" => templateMatchHandler.HandleTemplateMatch(request),
|
||||
|
|
@ -251,6 +255,365 @@ static class Daemon
|
|||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Edge-based tooltip detection pipeline.
|
||||
/// EdgeCrop → preprocess (tophat only; bgsub falls back to tophat) → route to engine.
|
||||
/// </summary>
|
||||
private static object HandleEdgeOcrPipeline(OcrHandler ocrHandler, EdgeCropHandler edgeCropHandler, PythonOcrBridge pythonBridge, Request request)
|
||||
{
|
||||
var engine = request.Engine ?? "tesseract";
|
||||
var isPythonEngine = engine is "easyocr" or "paddleocr";
|
||||
var ep = request.EdgeParams ?? new EdgeOcrParams();
|
||||
var cropParams = ep.Crop;
|
||||
var ocrParams = ep.Ocr;
|
||||
|
||||
// Edge method only supports tophat (no reference frame for bgsub)
|
||||
string preprocess = request.Preprocess ?? "tophat";
|
||||
if (preprocess == "bgsub") preprocess = "tophat";
|
||||
|
||||
var sw = System.Diagnostics.Stopwatch.StartNew();
|
||||
|
||||
var cropResult = edgeCropHandler.EdgeCrop(request, cropParams);
|
||||
if (cropResult == null)
|
||||
return new OcrResponse { Text = "", Lines = [] };
|
||||
|
||||
var (cropped, fullCapture, region) = cropResult.Value;
|
||||
using var _fullCapture = fullCapture;
|
||||
|
||||
// Preprocess
|
||||
Bitmap processed;
|
||||
if (preprocess == "tophat")
|
||||
{
|
||||
processed = ImagePreprocessor.PreprocessForOcr(cropped, kernelSize: ocrParams.KernelSize, upscale: ocrParams.Upscale);
|
||||
}
|
||||
else // "none"
|
||||
{
|
||||
processed = (Bitmap)cropped.Clone();
|
||||
}
|
||||
cropped.Dispose();
|
||||
|
||||
var cropMs = sw.ElapsedMilliseconds;
|
||||
using var _processed = processed;
|
||||
|
||||
// Save debug images if path provided
|
||||
if (!string.IsNullOrEmpty(request.Path))
|
||||
{
|
||||
var dir = Path.GetDirectoryName(request.Path);
|
||||
if (!string.IsNullOrEmpty(dir) && !Directory.Exists(dir))
|
||||
Directory.CreateDirectory(dir);
|
||||
processed.Save(request.Path, ImageUtils.GetImageFormat(request.Path));
|
||||
|
||||
var ext = Path.GetExtension(request.Path);
|
||||
var fullPath = Path.ChangeExtension(request.Path, ".full" + ext);
|
||||
fullCapture.Save(fullPath, ImageUtils.GetImageFormat(fullPath));
|
||||
}
|
||||
|
||||
// Route to engine
|
||||
sw.Restart();
|
||||
if (engine == "tesseract")
|
||||
{
|
||||
var result = ocrHandler.RunTesseractOnBitmap(processed, region, pad: cropParams.OcrPad, upscale: ocrParams.Upscale);
|
||||
var ocrMs = sw.ElapsedMilliseconds;
|
||||
Console.Error.WriteLine($" edge-ocr-pipeline: engine={engine} preprocess={preprocess} crop={cropMs}ms ocr={ocrMs}ms region={region.Width}x{region.Height}");
|
||||
return result;
|
||||
}
|
||||
else // easyocr, paddleocr
|
||||
{
|
||||
var ocrResult = pythonBridge.OcrFromBitmap(processed, engine, ocrParams);
|
||||
var ocrMs = sw.ElapsedMilliseconds;
|
||||
Console.Error.WriteLine($" edge-ocr-pipeline: engine={engine} preprocess={preprocess} crop={cropMs}ms ocr={ocrMs}ms region={region.Width}x{region.Height}");
|
||||
|
||||
foreach (var line in ocrResult.Lines)
|
||||
foreach (var word in line.Words)
|
||||
{
|
||||
word.X += region.X;
|
||||
word.Y += region.Y;
|
||||
}
|
||||
|
||||
return new DiffOcrResponse
|
||||
{
|
||||
Text = ocrResult.Text,
|
||||
Lines = ocrResult.Lines,
|
||||
Region = region,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Coordinate-descent sweep over DiffCropParams to maximise avgIoU on crop.json ground truth.
|
||||
/// </summary>
|
||||
private static object HandleCropTune(OcrHandler ocrHandler, Request request)
|
||||
{
|
||||
var tessdataDir = Path.Combine(AppContext.BaseDirectory, "tessdata");
|
||||
var casesPath = Path.Combine(tessdataDir, "crop.json");
|
||||
if (!File.Exists(casesPath))
|
||||
return new ErrorResponse($"crop.json not found at {casesPath}");
|
||||
|
||||
var json = File.ReadAllText(casesPath);
|
||||
var cases = JsonSerializer.Deserialize<List<CropTestCase>>(json, JsonOptions);
|
||||
if (cases == null || cases.Count == 0)
|
||||
return new ErrorResponse("No test cases in crop.json");
|
||||
|
||||
// Preload valid test cases
|
||||
var validCases = new List<(CropTestCase tc, string imagePath, string snapshotPath)>();
|
||||
foreach (var tc in cases)
|
||||
{
|
||||
var imagePath = Path.Combine(tessdataDir, tc.Image);
|
||||
var snapshotPath = Path.Combine(tessdataDir, tc.SnapshotImage);
|
||||
if (File.Exists(imagePath) && File.Exists(snapshotPath))
|
||||
validCases.Add((tc, imagePath, snapshotPath));
|
||||
}
|
||||
if (validCases.Count == 0)
|
||||
return new ErrorResponse("No valid test cases found");
|
||||
|
||||
// Score function: compute avgIoU for a set of crop params
|
||||
double ScoreCropParams(DiffCropParams cp)
|
||||
{
|
||||
double totalIoU = 0;
|
||||
foreach (var (tc, imagePath, snapshotPath) in validCases)
|
||||
{
|
||||
ocrHandler.HandleSnapshot(new Request { File = snapshotPath });
|
||||
var cropResult = ocrHandler.DiffCrop(new Request { File = imagePath }, cp);
|
||||
if (cropResult == null) continue;
|
||||
|
||||
var (cropped, refCropped, current, region) = cropResult.Value;
|
||||
cropped.Dispose(); refCropped.Dispose(); current.Dispose();
|
||||
|
||||
int ax1 = region.X, ay1 = region.Y;
|
||||
int ax2 = region.X + region.Width, ay2 = region.Y + region.Height;
|
||||
int ex1 = tc.TopLeft.X, ey1 = tc.TopLeft.Y, ex2 = tc.BottomRight.X, ey2 = tc.BottomRight.Y;
|
||||
|
||||
int ix1 = Math.Max(ax1, ex1), iy1 = Math.Max(ay1, ey1);
|
||||
int ix2 = Math.Min(ax2, ex2), iy2 = Math.Min(ay2, ey2);
|
||||
int iw = Math.Max(0, ix2 - ix1), ih = Math.Max(0, iy2 - iy1);
|
||||
double intersection = (double)iw * ih;
|
||||
double expW = ex2 - ex1, expH = ey2 - ey1;
|
||||
double union = (double)region.Width * region.Height + expW * expH - intersection;
|
||||
totalIoU += union > 0 ? intersection / union : 0;
|
||||
}
|
||||
return totalIoU / validCases.Count;
|
||||
}
|
||||
|
||||
DiffCropParams CloneCrop(DiffCropParams p) => new()
|
||||
{
|
||||
DiffThresh = p.DiffThresh, RowThreshDiv = p.RowThreshDiv,
|
||||
ColThreshDiv = p.ColThreshDiv, MaxGap = p.MaxGap,
|
||||
TrimCutoff = p.TrimCutoff, OcrPad = p.OcrPad,
|
||||
};
|
||||
|
||||
// Start from provided params or defaults
|
||||
var best = request.Params?.Crop ?? new DiffCropParams();
|
||||
double bestScore = ScoreCropParams(best);
|
||||
int totalEvals = 1;
|
||||
Console.Error.WriteLine($" crop-tune: baseline avgIoU={bestScore:F4} {best}");
|
||||
|
||||
var intSweeps = new (string Name, int[] Values, Action<DiffCropParams, int> Set)[]
|
||||
{
|
||||
("diffThresh", [5, 10, 15, 20, 25, 30, 40], (c, v) => c.DiffThresh = v),
|
||||
("rowThreshDiv", [20, 30, 40, 50, 60, 80, 100], (c, v) => c.RowThreshDiv = v),
|
||||
("colThreshDiv", [5, 8, 10, 12, 15, 20], (c, v) => c.ColThreshDiv = v),
|
||||
("maxGap", [5, 10, 15, 20, 25, 30], (c, v) => c.MaxGap = v),
|
||||
};
|
||||
double[] trimValues = [0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.5];
|
||||
|
||||
const int maxRounds = 3;
|
||||
for (int round = 0; round < maxRounds; round++)
|
||||
{
|
||||
bool improved = false;
|
||||
Console.Error.WriteLine($"--- Round {round + 1} ---");
|
||||
|
||||
foreach (var (name, values, set) in intSweeps)
|
||||
{
|
||||
Console.Error.Write($" {name}: ");
|
||||
int bestVal = 0;
|
||||
double bestValScore = -1;
|
||||
|
||||
foreach (int v in values)
|
||||
{
|
||||
var trial = CloneCrop(best);
|
||||
set(trial, v);
|
||||
double score = ScoreCropParams(trial);
|
||||
totalEvals++;
|
||||
Console.Error.Write($"{v}={score:F4} ");
|
||||
if (score > bestValScore) { bestValScore = score; bestVal = v; }
|
||||
}
|
||||
Console.Error.WriteLine();
|
||||
|
||||
if (bestValScore > bestScore)
|
||||
{
|
||||
set(best, bestVal);
|
||||
bestScore = bestValScore;
|
||||
improved = true;
|
||||
Console.Error.WriteLine($" -> {name}={bestVal} avgIoU={bestScore:F4}");
|
||||
}
|
||||
}
|
||||
|
||||
// trimCutoff sweep
|
||||
{
|
||||
Console.Error.Write($" trimCutoff: ");
|
||||
double bestTrim = best.TrimCutoff;
|
||||
double bestTrimScore = bestScore;
|
||||
|
||||
foreach (double v in trimValues)
|
||||
{
|
||||
var trial = CloneCrop(best);
|
||||
trial.TrimCutoff = v;
|
||||
double score = ScoreCropParams(trial);
|
||||
totalEvals++;
|
||||
Console.Error.Write($"{v:F2}={score:F4} ");
|
||||
if (score > bestTrimScore) { bestTrimScore = score; bestTrim = v; }
|
||||
}
|
||||
Console.Error.WriteLine();
|
||||
|
||||
if (bestTrimScore > bestScore)
|
||||
{
|
||||
best.TrimCutoff = bestTrim;
|
||||
bestScore = bestTrimScore;
|
||||
improved = true;
|
||||
Console.Error.WriteLine($" -> trimCutoff={bestTrim:F2} avgIoU={bestScore:F4}");
|
||||
}
|
||||
}
|
||||
|
||||
Console.Error.WriteLine($" End of round {round + 1}: avgIoU={bestScore:F4} {best}");
|
||||
if (!improved) break;
|
||||
}
|
||||
|
||||
Console.Error.WriteLine($"\n crop-tune: BEST avgIoU={bestScore:F4} {best} evals={totalEvals}");
|
||||
|
||||
return new CropTuneResponse
|
||||
{
|
||||
BestAvgIoU = bestScore,
|
||||
BestParams = best,
|
||||
Iterations = totalEvals,
|
||||
};
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Crop accuracy test: runs diff and/or edge crop on test cases from crop.json,
|
||||
/// computes IoU and per-edge deltas vs ground truth.
|
||||
/// </summary>
|
||||
private static object HandleCropTest(OcrHandler ocrHandler, EdgeCropHandler edgeCropHandler, Request request)
|
||||
{
|
||||
var tessdataDir = Path.Combine(AppContext.BaseDirectory, "tessdata");
|
||||
var casesPath = Path.Combine(tessdataDir, "crop.json");
|
||||
if (!File.Exists(casesPath))
|
||||
return new ErrorResponse($"crop.json not found at {casesPath}");
|
||||
|
||||
var json = File.ReadAllText(casesPath);
|
||||
var cases = JsonSerializer.Deserialize<List<CropTestCase>>(json, JsonOptions);
|
||||
if (cases == null || cases.Count == 0)
|
||||
return new ErrorResponse("No test cases in crop.json");
|
||||
|
||||
var method = request.Engine ?? "diff"; // reuse engine field: "diff", "edge", or "both"
|
||||
var diffParams = request.Params?.Crop ?? new DiffCropParams();
|
||||
var edgeParams = request.EdgeParams?.Crop ?? new EdgeCropParams();
|
||||
|
||||
var results = new List<CropTestResult>();
|
||||
|
||||
foreach (var tc in cases)
|
||||
{
|
||||
var imagePath = Path.Combine(tessdataDir, tc.Image);
|
||||
var snapshotPath = Path.Combine(tessdataDir, tc.SnapshotImage);
|
||||
|
||||
if (!File.Exists(imagePath) || !File.Exists(snapshotPath))
|
||||
{
|
||||
Console.Error.WriteLine($" crop-test: SKIP {tc.Id} — missing files");
|
||||
results.Add(new CropTestResult { Id = tc.Id, IoU = 0 });
|
||||
continue;
|
||||
}
|
||||
|
||||
// Expected region
|
||||
int expX = tc.TopLeft.X;
|
||||
int expY = tc.TopLeft.Y;
|
||||
int expW = tc.BottomRight.X - tc.TopLeft.X;
|
||||
int expH = tc.BottomRight.Y - tc.TopLeft.Y;
|
||||
var expected = new RegionRect { X = expX, Y = expY, Width = expW, Height = expH };
|
||||
|
||||
RegionRect? actual = null;
|
||||
|
||||
if (method is "diff" or "both")
|
||||
{
|
||||
// Load snapshot as reference
|
||||
ocrHandler.HandleSnapshot(new Request { File = snapshotPath });
|
||||
var cropResult = ocrHandler.DiffCrop(new Request { File = imagePath }, diffParams);
|
||||
if (cropResult != null)
|
||||
{
|
||||
var (cropped, refCropped, current, region) = cropResult.Value;
|
||||
actual = region;
|
||||
cropped.Dispose();
|
||||
refCropped.Dispose();
|
||||
current.Dispose();
|
||||
}
|
||||
}
|
||||
|
||||
if (method == "edge")
|
||||
{
|
||||
// Default cursor to center of ground-truth bbox if not specified
|
||||
int cx = tc.CursorX ?? (tc.TopLeft.X + tc.BottomRight.X) / 2;
|
||||
int cy = tc.CursorY ?? (tc.TopLeft.Y + tc.BottomRight.Y) / 2;
|
||||
var cropResult = edgeCropHandler.EdgeCrop(
|
||||
new Request { File = imagePath, CursorX = cx, CursorY = cy },
|
||||
edgeParams);
|
||||
if (cropResult != null)
|
||||
{
|
||||
var (cropped, fullCapture, region) = cropResult.Value;
|
||||
actual = region;
|
||||
cropped.Dispose();
|
||||
fullCapture.Dispose();
|
||||
}
|
||||
}
|
||||
|
||||
// Compute IoU and deltas
|
||||
double iou = 0;
|
||||
int dTop = 0, dLeft = 0, dRight = 0, dBottom = 0;
|
||||
if (actual != null)
|
||||
{
|
||||
int ax1 = actual.X, ay1 = actual.Y;
|
||||
int ax2 = actual.X + actual.Width, ay2 = actual.Y + actual.Height;
|
||||
int ex1 = expX, ey1 = expY, ex2 = tc.BottomRight.X, ey2 = tc.BottomRight.Y;
|
||||
|
||||
int ix1 = Math.Max(ax1, ex1), iy1 = Math.Max(ay1, ey1);
|
||||
int ix2 = Math.Min(ax2, ex2), iy2 = Math.Min(ay2, ey2);
|
||||
int iw = Math.Max(0, ix2 - ix1), ih = Math.Max(0, iy2 - iy1);
|
||||
double intersection = (double)iw * ih;
|
||||
double union = (double)actual.Width * actual.Height + (double)expW * expH - intersection;
|
||||
iou = union > 0 ? intersection / union : 0;
|
||||
|
||||
dTop = ay1 - ey1; // positive = crop starts too low
|
||||
dLeft = ax1 - ex1; // positive = crop starts too far right
|
||||
dRight = ax2 - ex2; // positive = crop ends too far right
|
||||
dBottom = ay2 - ey2; // positive = crop ends too low
|
||||
}
|
||||
|
||||
Console.Error.WriteLine($" crop-test #{tc.Id}: IoU={iou:F3} expected=({expX},{expY})+{expW}x{expH} actual={FormatRegion(actual)} delta T={dTop:+0;-#} L={dLeft:+0;-#} R={dRight:+0;-#} B={dBottom:+0;-#}");
|
||||
|
||||
results.Add(new CropTestResult
|
||||
{
|
||||
Id = tc.Id,
|
||||
IoU = iou,
|
||||
Expected = expected,
|
||||
Actual = actual,
|
||||
DeltaTop = dTop,
|
||||
DeltaLeft = dLeft,
|
||||
DeltaRight = dRight,
|
||||
DeltaBottom = dBottom,
|
||||
});
|
||||
}
|
||||
|
||||
double avgIoU = results.Count > 0 ? results.Average(r => r.IoU) : 0;
|
||||
Console.Error.WriteLine($" crop-test: method={method} avgIoU={avgIoU:F3} ({results.Count} cases)");
|
||||
|
||||
return new CropTestResponse
|
||||
{
|
||||
Method = method,
|
||||
AvgIoU = avgIoU,
|
||||
Results = results,
|
||||
};
|
||||
}
|
||||
|
||||
private static string FormatRegion(RegionRect? r) =>
|
||||
r != null ? $"({r.X},{r.Y})+{r.Width}x{r.Height}" : "null";
|
||||
|
||||
private static void WriteResponse(object response)
|
||||
{
|
||||
var json = JsonSerializer.Serialize(response, JsonOptions);
|
||||
|
|
|
|||
205
tools/OcrDaemon/EdgeCropHandler.cs
Normal file
|
|
@ -0,0 +1,205 @@
|
|||
namespace OcrDaemon;
|
||||
|
||||
using System.Drawing;
|
||||
using System.Drawing.Imaging;
|
||||
using System.Runtime.InteropServices;
|
||||
|
||||
class EdgeCropHandler
|
||||
{
|
||||
[StructLayout(LayoutKind.Sequential)]
|
||||
private struct POINT { public int X, Y; }
|
||||
|
||||
[DllImport("user32.dll")]
|
||||
private static extern bool GetCursorPos(out POINT lpPoint);
|
||||
|
||||
public (Bitmap cropped, Bitmap fullCapture, RegionRect region)? EdgeCrop(Request req, EdgeCropParams p)
|
||||
{
|
||||
int cursorX, cursorY;
|
||||
if (req.CursorX.HasValue && req.CursorY.HasValue)
|
||||
{
|
||||
cursorX = req.CursorX.Value;
|
||||
cursorY = req.CursorY.Value;
|
||||
}
|
||||
else
|
||||
{
|
||||
GetCursorPos(out var pt);
|
||||
cursorX = pt.X;
|
||||
cursorY = pt.Y;
|
||||
}
|
||||
|
||||
var fullCapture = ScreenCapture.CaptureOrLoad(req.File, null);
|
||||
int w = fullCapture.Width;
|
||||
int h = fullCapture.Height;
|
||||
|
||||
var bmpData = fullCapture.LockBits(new Rectangle(0, 0, w, h), ImageLockMode.ReadOnly, PixelFormat.Format32bppArgb);
|
||||
byte[] px = new byte[bmpData.Stride * h];
|
||||
Marshal.Copy(bmpData.Scan0, px, 0, px.Length);
|
||||
fullCapture.UnlockBits(bmpData);
|
||||
int stride = bmpData.Stride;
|
||||
|
||||
int darkThresh = p.DarkThresh;
|
||||
int colGap = p.RunGapTolerance;
|
||||
int maxGap = p.MaxGap;
|
||||
|
||||
// ── Phase 1: Per-row horizontal extent ──
|
||||
// Scan left/right from cursorX per row. Gap tolerance bridges through text.
|
||||
// Percentile-based filtering for robustness.
|
||||
int bandHalf = p.MinDarkRun; // repurpose: half-height of horizontal scan band
|
||||
int bandTop = Math.Max(0, cursorY - bandHalf);
|
||||
int bandBot = Math.Min(h - 1, cursorY + bandHalf);
|
||||
|
||||
var leftExtents = new List<int>();
|
||||
var rightExtents = new List<int>();
|
||||
|
||||
for (int y = bandTop; y <= bandBot; y++)
|
||||
{
|
||||
int rowOff = y * stride;
|
||||
int ci = rowOff + cursorX * 4;
|
||||
int cBright = (px[ci] + px[ci + 1] + px[ci + 2]) / 3;
|
||||
if (cBright >= darkThresh) continue;
|
||||
|
||||
int leftEdge = cursorX;
|
||||
int gap = 0;
|
||||
for (int x = cursorX - 1; x >= 0; x--)
|
||||
{
|
||||
int i = rowOff + x * 4;
|
||||
int brightness = (px[i] + px[i + 1] + px[i + 2]) / 3;
|
||||
if (brightness < darkThresh) { leftEdge = x; gap = 0; }
|
||||
else if (++gap > colGap) break;
|
||||
}
|
||||
|
||||
int rightEdge = cursorX;
|
||||
gap = 0;
|
||||
for (int x = cursorX + 1; x < w; x++)
|
||||
{
|
||||
int i = rowOff + x * 4;
|
||||
int brightness = (px[i] + px[i + 1] + px[i + 2]) / 3;
|
||||
if (brightness < darkThresh) { rightEdge = x; gap = 0; }
|
||||
else if (++gap > colGap) break;
|
||||
}
|
||||
|
||||
leftExtents.Add(leftEdge);
|
||||
rightExtents.Add(rightEdge);
|
||||
}
|
||||
|
||||
if (leftExtents.Count < 10)
|
||||
{
|
||||
Console.Error.WriteLine($" edge-crop: too few dark rows ({leftExtents.Count})");
|
||||
fullCapture.Dispose();
|
||||
return null;
|
||||
}
|
||||
|
||||
leftExtents.Sort();
|
||||
rightExtents.Sort();
|
||||
|
||||
// Use RowThreshDiv/ColThreshDiv as percentile denominators
|
||||
// e.g., RowThreshDiv=4 → 25th percentile for left, ColThreshDiv=4 → 75th for right
|
||||
int leftPctIdx = leftExtents.Count / p.RowThreshDiv;
|
||||
int rightPctIdx = rightExtents.Count * (p.ColThreshDiv - 1) / p.ColThreshDiv;
|
||||
leftPctIdx = Math.Clamp(leftPctIdx, 0, leftExtents.Count - 1);
|
||||
rightPctIdx = Math.Clamp(rightPctIdx, 0, rightExtents.Count - 1);
|
||||
|
||||
int bestColStart = leftExtents[leftPctIdx];
|
||||
int bestColEnd = rightExtents[rightPctIdx];
|
||||
|
||||
Console.Error.WriteLine($" edge-crop: horizontal: left={bestColStart} right={bestColEnd} ({bestColEnd - bestColStart + 1}px) samples={leftExtents.Count} pctL={leftPctIdx}/{leftExtents.Count} pctR={rightPctIdx}/{rightExtents.Count}");
|
||||
|
||||
if (bestColEnd - bestColStart + 1 < 50)
|
||||
{
|
||||
Console.Error.WriteLine($" edge-crop: horizontal extent too small");
|
||||
fullCapture.Dispose();
|
||||
return null;
|
||||
}
|
||||
|
||||
// ── Phase 2: Per-column vertical extent ──
|
||||
int colBandHalf = (bestColEnd - bestColStart + 1) / 3;
|
||||
int colBandLeft = Math.Max(bestColStart, cursorX - colBandHalf);
|
||||
int colBandRight = Math.Min(bestColEnd, cursorX + colBandHalf);
|
||||
|
||||
var topExtents = new List<int>();
|
||||
var bottomExtents = new List<int>();
|
||||
|
||||
// Asymmetric gap: larger upward to bridge header decorations (~30-40px bright)
|
||||
int maxGapUp = maxGap * 3;
|
||||
|
||||
for (int x = colBandLeft; x <= colBandRight; x++)
|
||||
{
|
||||
int ci = cursorY * stride + x * 4;
|
||||
int cBright = (px[ci] + px[ci + 1] + px[ci + 2]) / 3;
|
||||
if (cBright >= darkThresh) continue;
|
||||
|
||||
int topEdge = cursorY;
|
||||
int gap = 0;
|
||||
for (int y = cursorY - 1; y >= 0; y--)
|
||||
{
|
||||
int i = y * stride + x * 4;
|
||||
int brightness = (px[i] + px[i + 1] + px[i + 2]) / 3;
|
||||
if (brightness < darkThresh) { topEdge = y; gap = 0; }
|
||||
else if (++gap > maxGapUp) break;
|
||||
}
|
||||
|
||||
int bottomEdge = cursorY;
|
||||
gap = 0;
|
||||
for (int y = cursorY + 1; y < h; y++)
|
||||
{
|
||||
int i = y * stride + x * 4;
|
||||
int brightness = (px[i] + px[i + 1] + px[i + 2]) / 3;
|
||||
if (brightness < darkThresh) { bottomEdge = y; gap = 0; }
|
||||
else if (++gap > maxGap) break;
|
||||
}
|
||||
|
||||
topExtents.Add(topEdge);
|
||||
bottomExtents.Add(bottomEdge);
|
||||
}
|
||||
|
||||
if (topExtents.Count < 10)
|
||||
{
|
||||
Console.Error.WriteLine($" edge-crop: too few dark columns ({topExtents.Count})");
|
||||
fullCapture.Dispose();
|
||||
return null;
|
||||
}
|
||||
|
||||
topExtents.Sort();
|
||||
bottomExtents.Sort();
|
||||
|
||||
int topPctIdx = topExtents.Count / p.RowThreshDiv;
|
||||
int botPctIdx = topExtents.Count * (p.ColThreshDiv - 1) / p.ColThreshDiv;
|
||||
topPctIdx = Math.Clamp(topPctIdx, 0, topExtents.Count - 1);
|
||||
botPctIdx = Math.Clamp(botPctIdx, 0, bottomExtents.Count - 1);
|
||||
|
||||
int bestRowStart = topExtents[topPctIdx];
|
||||
int bestRowEnd = bottomExtents[botPctIdx];
|
||||
|
||||
Console.Error.WriteLine($" edge-crop: vertical: top={bestRowStart} bottom={bestRowEnd} ({bestRowEnd - bestRowStart + 1}px) samples={topExtents.Count}");
|
||||
|
||||
if (bestRowEnd - bestRowStart + 1 < 50)
|
||||
{
|
||||
Console.Error.WriteLine($" edge-crop: vertical extent too small");
|
||||
fullCapture.Dispose();
|
||||
return null;
|
||||
}
|
||||
|
||||
int minX = bestColStart;
|
||||
int minY = bestRowStart;
|
||||
int maxX = bestColEnd;
|
||||
int maxY = bestRowEnd;
|
||||
|
||||
int rw = maxX - minX + 1;
|
||||
int rh = maxY - minY + 1;
|
||||
|
||||
Console.Error.WriteLine($" edge-crop: result ({minX},{minY}) {rw}x{rh}");
|
||||
|
||||
if (rw < 50 || rh < 50)
|
||||
{
|
||||
Console.Error.WriteLine($" edge-crop: region too small ({rw}x{rh})");
|
||||
fullCapture.Dispose();
|
||||
return null;
|
||||
}
|
||||
|
||||
var cropRect = new Rectangle(minX, minY, rw, rh);
|
||||
var cropped = fullCapture.Clone(cropRect, PixelFormat.Format32bppArgb);
|
||||
var region = new RegionRect { X = minX, Y = minY, Width = rw, Height = rh };
|
||||
|
||||
return (cropped, fullCapture, region);
|
||||
}
|
||||
}
|
||||
|
|
@ -48,6 +48,15 @@ class Request
|
|||
|
||||
[JsonPropertyName("params")]
|
||||
public DiffOcrParams? Params { get; set; }
|
||||
|
||||
[JsonPropertyName("edgeParams")]
|
||||
public EdgeOcrParams? EdgeParams { get; set; }
|
||||
|
||||
[JsonPropertyName("cursorX")]
|
||||
public int? CursorX { get; set; }
|
||||
|
||||
[JsonPropertyName("cursorY")]
|
||||
public int? CursorY { get; set; }
|
||||
}
|
||||
|
||||
class RegionRect
|
||||
|
|
@ -336,6 +345,47 @@ sealed class DiffOcrParams
|
|||
public override string ToString() => $"[{Crop}] [{Ocr}]";
|
||||
}
|
||||
|
||||
sealed class EdgeCropParams
|
||||
{
|
||||
[JsonPropertyName("darkThresh")]
|
||||
public int DarkThresh { get; set; } = 40;
|
||||
|
||||
[JsonPropertyName("minDarkRun")]
|
||||
public int MinDarkRun { get; set; } = 200;
|
||||
|
||||
[JsonPropertyName("runGapTolerance")]
|
||||
public int RunGapTolerance { get; set; } = 15;
|
||||
|
||||
[JsonPropertyName("rowThreshDiv")]
|
||||
public int RowThreshDiv { get; set; } = 40;
|
||||
|
||||
[JsonPropertyName("colThreshDiv")]
|
||||
public int ColThreshDiv { get; set; } = 8;
|
||||
|
||||
[JsonPropertyName("maxGap")]
|
||||
public int MaxGap { get; set; } = 15;
|
||||
|
||||
[JsonPropertyName("trimCutoff")]
|
||||
public double TrimCutoff { get; set; } = 0.3;
|
||||
|
||||
[JsonPropertyName("ocrPad")]
|
||||
public int OcrPad { get; set; } = 10;
|
||||
|
||||
public override string ToString() =>
|
||||
$"darkThresh={DarkThresh} minRun={MinDarkRun} runGap={RunGapTolerance} maxGap={MaxGap} trimCutoff={TrimCutoff:F2} rowDiv={RowThreshDiv} colDiv={ColThreshDiv}";
|
||||
}
|
||||
|
||||
sealed class EdgeOcrParams
|
||||
{
|
||||
[JsonPropertyName("crop")]
|
||||
public EdgeCropParams Crop { get; set; } = new();
|
||||
|
||||
[JsonPropertyName("ocr")]
|
||||
public OcrParams Ocr { get; set; } = new();
|
||||
|
||||
public override string ToString() => $"[{Crop}] [{Ocr}]";
|
||||
}
|
||||
|
||||
class TestCase
|
||||
{
|
||||
[JsonPropertyName("id")]
|
||||
|
|
@ -404,3 +454,95 @@ class TuneResponse
|
|||
[JsonPropertyName("iterations")]
|
||||
public int Iterations { get; set; }
|
||||
}
|
||||
|
||||
// ── Crop test models ────────────────────────────────────────────────────────
|
||||
|
||||
class PointXY
|
||||
{
|
||||
[JsonPropertyName("x")]
|
||||
public int X { get; set; }
|
||||
|
||||
[JsonPropertyName("y")]
|
||||
public int Y { get; set; }
|
||||
}
|
||||
|
||||
class CropTestCase
|
||||
{
|
||||
[JsonPropertyName("id")]
|
||||
public string Id { get; set; } = "";
|
||||
|
||||
[JsonPropertyName("image")]
|
||||
public string Image { get; set; } = "";
|
||||
|
||||
[JsonPropertyName("snapshotImage")]
|
||||
public string SnapshotImage { get; set; } = "";
|
||||
|
||||
[JsonPropertyName("topLeft")]
|
||||
public PointXY TopLeft { get; set; } = new();
|
||||
|
||||
[JsonPropertyName("bottomRight")]
|
||||
public PointXY BottomRight { get; set; } = new();
|
||||
|
||||
[JsonPropertyName("cursorX")]
|
||||
public int? CursorX { get; set; }
|
||||
|
||||
[JsonPropertyName("cursorY")]
|
||||
public int? CursorY { get; set; }
|
||||
}
|
||||
|
||||
class CropTestResult
|
||||
{
|
||||
[JsonPropertyName("id")]
|
||||
public string Id { get; set; } = "";
|
||||
|
||||
[JsonPropertyName("iou")]
|
||||
public double IoU { get; set; }
|
||||
|
||||
[JsonPropertyName("expected")]
|
||||
public RegionRect Expected { get; set; } = new();
|
||||
|
||||
[JsonPropertyName("actual")]
|
||||
public RegionRect? Actual { get; set; }
|
||||
|
||||
[JsonPropertyName("deltaTop")]
|
||||
public int DeltaTop { get; set; }
|
||||
|
||||
[JsonPropertyName("deltaLeft")]
|
||||
public int DeltaLeft { get; set; }
|
||||
|
||||
[JsonPropertyName("deltaRight")]
|
||||
public int DeltaRight { get; set; }
|
||||
|
||||
[JsonPropertyName("deltaBottom")]
|
||||
public int DeltaBottom { get; set; }
|
||||
}
|
||||
|
||||
class CropTestResponse
|
||||
{
|
||||
[JsonPropertyName("ok")]
|
||||
public bool Ok => true;
|
||||
|
||||
[JsonPropertyName("method")]
|
||||
public string Method { get; set; } = "";
|
||||
|
||||
[JsonPropertyName("avgIoU")]
|
||||
public double AvgIoU { get; set; }
|
||||
|
||||
[JsonPropertyName("results")]
|
||||
public List<CropTestResult> Results { get; set; } = [];
|
||||
}
|
||||
|
||||
class CropTuneResponse
|
||||
{
|
||||
[JsonPropertyName("ok")]
|
||||
public bool Ok => true;
|
||||
|
||||
[JsonPropertyName("bestAvgIoU")]
|
||||
public double BestAvgIoU { get; set; }
|
||||
|
||||
[JsonPropertyName("bestParams")]
|
||||
public DiffCropParams BestParams { get; set; } = new();
|
||||
|
||||
[JsonPropertyName("iterations")]
|
||||
public int Iterations { get; set; }
|
||||
}
|
||||
|
|
|
|||
|
|
@ -26,6 +26,9 @@
|
|||
<None Update="tessdata\cases.json">
|
||||
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
|
||||
</None>
|
||||
<None Update="tessdata\crop.json">
|
||||
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
|
||||
</None>
|
||||
<None Update="tessdata\poe2.user-words" Condition="Exists('tessdata\poe2.user-words')">
|
||||
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
|
||||
</None>
|
||||
|
|
|
|||
|
|
@ -269,6 +269,36 @@ class OcrHandler(TesseractEngine engine)
|
|||
int maxX = Math.Min(bestColEnd, w - 1);
|
||||
int maxY = Math.Min(bestRowEnd, h - 1);
|
||||
|
||||
// Boundary extension: scan outward from detected edges with a relaxed threshold
|
||||
// to capture low-signal regions (e.g. ornamental tooltip headers)
|
||||
int extRowThresh = Math.Max(1, rowThresh / 4);
|
||||
int extColThresh = Math.Max(1, colThresh / 4);
|
||||
|
||||
int extTop = Math.Max(0, minY - maxGap);
|
||||
for (int y = minY - 1; y >= extTop; y--)
|
||||
{
|
||||
if (rowCounts[y] >= extRowThresh) minY = y;
|
||||
else break;
|
||||
}
|
||||
int extBottom = Math.Min(h - 1, maxY + maxGap);
|
||||
for (int y = maxY + 1; y <= extBottom; y++)
|
||||
{
|
||||
if (rowCounts[y] >= extRowThresh) maxY = y;
|
||||
else break;
|
||||
}
|
||||
int extLeft = Math.Max(0, minX - maxGap);
|
||||
for (int x = minX - 1; x >= extLeft; x--)
|
||||
{
|
||||
if (colCounts[x] >= extColThresh) minX = x;
|
||||
else break;
|
||||
}
|
||||
int extRight = Math.Min(w - 1, maxX + maxGap);
|
||||
for (int x = maxX + 1; x <= extRight; x++)
|
||||
{
|
||||
if (colCounts[x] >= extColThresh) maxX = x;
|
||||
else break;
|
||||
}
|
||||
|
||||
// Trim low-density edges on both axes to avoid oversized crops.
|
||||
int colSpan = maxX - minX + 1;
|
||||
if (colSpan > 50)
|
||||
|
|
|
|||
|
|
@ -91,7 +91,6 @@ static class TestRunner
|
|||
continue;
|
||||
}
|
||||
|
||||
var options = new OcrOptions();
|
||||
List<string> actualSet;
|
||||
|
||||
if (!string.IsNullOrWhiteSpace(tc.BeforeImage))
|
||||
|
|
@ -115,7 +114,6 @@ static class TestRunner
|
|||
var response = ocrHandler.HandleDiffOcr(new Request
|
||||
{
|
||||
File = imagePath,
|
||||
Ocr = options,
|
||||
Path = savePath,
|
||||
});
|
||||
|
||||
|
|
@ -136,7 +134,7 @@ static class TestRunner
|
|||
else
|
||||
{
|
||||
using var bitmap = new Bitmap(imagePath);
|
||||
using var processed = ImagePreprocessor.PreprocessForOcr(bitmap, options);
|
||||
using var processed = ImagePreprocessor.PreprocessForOcr(bitmap);
|
||||
|
||||
if (!string.IsNullOrEmpty(savePreDir))
|
||||
{
|
||||
|
|
@ -146,7 +144,7 @@ static class TestRunner
|
|||
using var pix = ImageUtils.BitmapToPix(processed);
|
||||
using var page = engine.Process(pix);
|
||||
|
||||
var lines = ImageUtils.ExtractLinesFromPage(page, offsetX: 0, offsetY: 0, minConfidence: options.MinConfidence);
|
||||
var lines = ImageUtils.ExtractLinesFromPage(page, offsetX: 0, offsetY: 0);
|
||||
var actualLines = lines.Select(l => Normalize(l.Text)).Where(s => s.Length > 0).ToList();
|
||||
|
||||
var rawText = page.GetText() ?? string.Empty;
|
||||
|
|
|
|||
93
tools/OcrDaemon/tessdata/crop.json
Normal file
|
|
@ -0,0 +1,93 @@
|
|||
[
|
||||
{
|
||||
"id": "1",
|
||||
"image": "images/tooltip1.png",
|
||||
"snapshotImage": "images/tooltip-snapshot.png",
|
||||
"topLeft": {
|
||||
"x": 0,
|
||||
"y": 84
|
||||
},
|
||||
"bottomRight": {
|
||||
"x": 1185,
|
||||
"y": 774
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "2",
|
||||
"image": "images/tooltip2.png",
|
||||
"snapshotImage": "images/tooltip-snapshot.png",
|
||||
"topLeft": {
|
||||
"x": 304,
|
||||
"y": 0
|
||||
},
|
||||
"bottomRight": {
|
||||
"x": 983,
|
||||
"y": 470
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "3",
|
||||
"image": "images/tooltip3.png",
|
||||
"snapshotImage": "images/tooltip-snapshot.png",
|
||||
"topLeft": {
|
||||
"x": 473,
|
||||
"y": 334
|
||||
},
|
||||
"bottomRight": {
|
||||
"x": 1114,
|
||||
"y": 914
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "4",
|
||||
"image": "images/tooltip4.png",
|
||||
"snapshotImage": "images/tooltip-snapshot.png",
|
||||
"topLeft": {
|
||||
"x": 209,
|
||||
"y": 264
|
||||
},
|
||||
"bottomRight": {
|
||||
"x": 1097,
|
||||
"y": 915
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "5",
|
||||
"image": "images/tooltip5.png",
|
||||
"snapshotImage": "images/tooltip-snapshot.png",
|
||||
"topLeft": {
|
||||
"x": 763,
|
||||
"y": 0
|
||||
},
|
||||
"bottomRight": {
|
||||
"x": 1874,
|
||||
"y": 560
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "6",
|
||||
"image": "images/tooltip6.png",
|
||||
"snapshotImage": "images/tooltip-snapshot.png",
|
||||
"topLeft": {
|
||||
"x": 1541,
|
||||
"y": 154
|
||||
},
|
||||
"bottomRight": {
|
||||
"x": 2348,
|
||||
"y": 614
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "7",
|
||||
"image": "images/tooltip7.png",
|
||||
"snapshotImage": "images/tooltip-snapshot.png",
|
||||
"topLeft": {
|
||||
"x": 1921,
|
||||
"y": 40
|
||||
},
|
||||
"bottomRight": {
|
||||
"x": 2558,
|
||||
"y": 370
|
||||
}
|
||||
}
|
||||
]
|
||||
BIN
tools/OcrDaemon/tessdata/images/tooltip-snapshot.png
Normal file
|
After Width: | Height: | Size: 5.3 MiB |
BIN
tools/OcrDaemon/tessdata/images/tooltip1.png
Normal file
|
After Width: | Height: | Size: 5.1 MiB |
BIN
tools/OcrDaemon/tessdata/images/tooltip2.png
Normal file
|
After Width: | Height: | Size: 5.3 MiB |
BIN
tools/OcrDaemon/tessdata/images/tooltip3.png
Normal file
|
After Width: | Height: | Size: 5.2 MiB |
BIN
tools/OcrDaemon/tessdata/images/tooltip4.png
Normal file
|
After Width: | Height: | Size: 5.2 MiB |
BIN
tools/OcrDaemon/tessdata/images/tooltip5.png
Normal file
|
After Width: | Height: | Size: 4.9 MiB |
BIN
tools/OcrDaemon/tessdata/images/tooltip6.png
Normal file
|
After Width: | Height: | Size: 5.1 MiB |
BIN
tools/OcrDaemon/tessdata/images/tooltip7.png
Normal file
|
After Width: | Height: | Size: 5.3 MiB |