work on OCR
This commit is contained in:
parent
6600969947
commit
854a474435
13 changed files with 4374 additions and 38 deletions
166
tools/OcrDaemon/tessdata/generate-words.mjs
Normal file
166
tools/OcrDaemon/tessdata/generate-words.mjs
Normal file
|
|
@ -0,0 +1,166 @@
|
|||
#!/usr/bin/env node
|
||||
/**
|
||||
* Fetches POE2 trade API data and generates Tesseract user-words and user-patterns
|
||||
* files to improve OCR accuracy for tooltip text.
|
||||
*
|
||||
* Usage: node generate-words.mjs
|
||||
* Output: poe2.user-words, poe2.user-patterns (in same directory)
|
||||
*/
|
||||
|
||||
import { writeFileSync } from "fs";
|
||||
import { dirname, join } from "path";
|
||||
import { fileURLToPath } from "url";
|
||||
|
||||
const __dirname = dirname(fileURLToPath(import.meta.url));
|
||||
const UA = "OAuth poe2trade/1.0 (contact: poe2trade@users.noreply.github.com)";
|
||||
|
||||
async function fetchJson(path) {
|
||||
const url = `https://www.pathofexile.com/api/trade2/data/${path}`;
|
||||
const res = await fetch(url, { headers: { "User-Agent": UA } });
|
||||
if (!res.ok) throw new Error(`${url}: ${res.status}`);
|
||||
return res.json();
|
||||
}
|
||||
|
||||
async function main() {
|
||||
console.log("Fetching POE2 trade API data...");
|
||||
const [items, stats, static_, filters] = await Promise.all([
|
||||
fetchJson("items"),
|
||||
fetchJson("stats"),
|
||||
fetchJson("static"),
|
||||
fetchJson("filters"),
|
||||
]);
|
||||
|
||||
const words = new Set();
|
||||
|
||||
// Helper: split text into individual words and add each
|
||||
function addWords(text) {
|
||||
if (!text) return;
|
||||
// Remove # placeholders and special chars, split on whitespace
|
||||
const cleaned = text
|
||||
.replace(/#/g, "")
|
||||
.replace(/[{}()\[\]]/g, "")
|
||||
.replace(/[+\-]/g, " ");
|
||||
for (const word of cleaned.split(/\s+/)) {
|
||||
// Only keep words that are actual words (not numbers, not single chars)
|
||||
const trimmed = word.replace(/^[^a-zA-Z]+|[^a-zA-Z]+$/g, "");
|
||||
if (trimmed.length >= 2) words.add(trimmed);
|
||||
}
|
||||
}
|
||||
|
||||
// Helper: add a full phrase (multi-word item name) as-is
|
||||
function addPhrase(text) {
|
||||
if (!text) return;
|
||||
addWords(text);
|
||||
}
|
||||
|
||||
// Items: type names (base types like "Tribal Mask", "Leather Vest")
|
||||
for (const cat of items.result) {
|
||||
addPhrase(cat.label);
|
||||
for (const entry of cat.entries) {
|
||||
addPhrase(entry.type);
|
||||
addPhrase(entry.name);
|
||||
addPhrase(entry.text);
|
||||
}
|
||||
}
|
||||
|
||||
// Stats: mod text like "+#% to Chaos Resistance", "# to maximum Life"
|
||||
for (const cat of stats.result) {
|
||||
for (const entry of cat.entries) {
|
||||
addPhrase(entry.text);
|
||||
}
|
||||
}
|
||||
|
||||
// Static: currency/fragment names like "Divine Orb", "Scroll of Wisdom"
|
||||
for (const cat of static_.result) {
|
||||
addPhrase(cat.label);
|
||||
for (const entry of cat.entries) {
|
||||
addPhrase(entry.text);
|
||||
}
|
||||
}
|
||||
|
||||
// Filters: filter labels and option texts
|
||||
for (const cat of filters.result) {
|
||||
addPhrase(cat.title);
|
||||
if (cat.filters) {
|
||||
for (const f of cat.filters) {
|
||||
addPhrase(f.text);
|
||||
if (f.option?.options) {
|
||||
for (const opt of f.option.options) {
|
||||
addPhrase(opt.text);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Add common tooltip keywords not in trade API
|
||||
const extraWords = [
|
||||
// Section headers
|
||||
"Quality", "Requires", "Level", "Asking", "Price",
|
||||
"Corrupted", "Mirrored", "Unmodifiable",
|
||||
"Twice", "Sockets",
|
||||
// Attributes
|
||||
"Strength", "Dexterity", "Intelligence", "Spirit",
|
||||
// Defense types
|
||||
"Armour", "Evasion", "Rating", "Energy", "Shield",
|
||||
// Damage types
|
||||
"Physical", "Elemental", "Lightning", "Cold", "Fire", "Chaos",
|
||||
// Common mod words
|
||||
"increased", "reduced", "more", "less",
|
||||
"added", "converted", "regeneration",
|
||||
"maximum", "minimum", "total",
|
||||
"Resistance", "Damage", "Speed", "Duration",
|
||||
"Critical", "Hit", "Chance", "Multiplier",
|
||||
"Attack", "Cast", "Spell", "Minion", "Skill",
|
||||
"Mana", "Life", "Rarity",
|
||||
// Item classes
|
||||
"Helmet", "Gloves", "Boots", "Body", "Belt",
|
||||
"Ring", "Amulet", "Shield", "Quiver",
|
||||
"Sword", "Axe", "Mace", "Dagger", "Wand", "Staff", "Bow",
|
||||
"Sceptre", "Crossbow", "Flail", "Spear",
|
||||
// Rarity
|
||||
"Normal", "Magic", "Rare", "Unique",
|
||||
];
|
||||
for (const w of extraWords) words.add(w);
|
||||
|
||||
// Sort and write user-words
|
||||
const sortedWords = [...words].sort((a, b) => a.toLowerCase().localeCompare(b.toLowerCase()));
|
||||
const wordsPath = join(__dirname, "poe2.user-words");
|
||||
writeFileSync(wordsPath, sortedWords.join("\n") + "\n");
|
||||
console.log(`Wrote ${sortedWords.length} words to ${wordsPath}`);
|
||||
|
||||
// Generate user-patterns for common tooltip formats
|
||||
const patterns = [
|
||||
// Stat values: "+12% to Chaos Resistance", "+3 to Level"
|
||||
"\\+\\d+%",
|
||||
"\\+\\d+",
|
||||
"\\-\\d+%",
|
||||
"\\-\\d+",
|
||||
// Ranges: "10-20"
|
||||
"\\d+-\\d+",
|
||||
// Currency amounts: "7x Divine Orb", "35x Divine Orb"
|
||||
"\\d+x",
|
||||
// Quality: "+20%"
|
||||
"\\d+%",
|
||||
// Level requirements: "Level \\d+"
|
||||
"Level \\d+",
|
||||
// Asking Price section
|
||||
"Asking Price:",
|
||||
// Item level
|
||||
"Item Level: \\d+",
|
||||
// Requires line
|
||||
"Requires:",
|
||||
// Rating values
|
||||
"Rating: \\d+",
|
||||
"Shield: \\d+",
|
||||
"Quality: \\+\\d+%",
|
||||
];
|
||||
const patternsPath = join(__dirname, "poe2.user-patterns");
|
||||
writeFileSync(patternsPath, patterns.join("\n") + "\n");
|
||||
console.log(`Wrote ${patterns.length} patterns to ${patternsPath}`);
|
||||
}
|
||||
|
||||
main().catch((e) => {
|
||||
console.error(e);
|
||||
process.exit(1);
|
||||
});
|
||||
Loading…
Add table
Add a link
Reference in a new issue