poe2-bot/tools/OcrDaemon/tessdata/generate-words.mjs
2026-02-11 17:42:28 -05:00

166 lines
4.9 KiB
JavaScript

#!/usr/bin/env node
/**
* Fetches POE2 trade API data and generates Tesseract user-words and user-patterns
* files to improve OCR accuracy for tooltip text.
*
* Usage: node generate-words.mjs
* Output: poe2.user-words, poe2.user-patterns (in same directory)
*/
import { writeFileSync } from "fs";
import { dirname, join } from "path";
import { fileURLToPath } from "url";
const __dirname = dirname(fileURLToPath(import.meta.url));
const UA = "OAuth poe2trade/1.0 (contact: poe2trade@users.noreply.github.com)";
async function fetchJson(path) {
const url = `https://www.pathofexile.com/api/trade2/data/${path}`;
const res = await fetch(url, { headers: { "User-Agent": UA } });
if (!res.ok) throw new Error(`${url}: ${res.status}`);
return res.json();
}
async function main() {
console.log("Fetching POE2 trade API data...");
const [items, stats, static_, filters] = await Promise.all([
fetchJson("items"),
fetchJson("stats"),
fetchJson("static"),
fetchJson("filters"),
]);
const words = new Set();
// Helper: split text into individual words and add each
function addWords(text) {
if (!text) return;
// Remove # placeholders and special chars, split on whitespace
const cleaned = text
.replace(/#/g, "")
.replace(/[{}()\[\]]/g, "")
.replace(/[+\-]/g, " ");
for (const word of cleaned.split(/\s+/)) {
// Only keep words that are actual words (not numbers, not single chars)
const trimmed = word.replace(/^[^a-zA-Z]+|[^a-zA-Z]+$/g, "");
if (trimmed.length >= 2) words.add(trimmed);
}
}
// Helper: add a full phrase (multi-word item name) as-is
function addPhrase(text) {
if (!text) return;
addWords(text);
}
// Items: type names (base types like "Tribal Mask", "Leather Vest")
for (const cat of items.result) {
addPhrase(cat.label);
for (const entry of cat.entries) {
addPhrase(entry.type);
addPhrase(entry.name);
addPhrase(entry.text);
}
}
// Stats: mod text like "+#% to Chaos Resistance", "# to maximum Life"
for (const cat of stats.result) {
for (const entry of cat.entries) {
addPhrase(entry.text);
}
}
// Static: currency/fragment names like "Divine Orb", "Scroll of Wisdom"
for (const cat of static_.result) {
addPhrase(cat.label);
for (const entry of cat.entries) {
addPhrase(entry.text);
}
}
// Filters: filter labels and option texts
for (const cat of filters.result) {
addPhrase(cat.title);
if (cat.filters) {
for (const f of cat.filters) {
addPhrase(f.text);
if (f.option?.options) {
for (const opt of f.option.options) {
addPhrase(opt.text);
}
}
}
}
}
// Add common tooltip keywords not in trade API
const extraWords = [
// Section headers
"Quality", "Requires", "Level", "Asking", "Price",
"Corrupted", "Mirrored", "Unmodifiable",
"Twice", "Sockets",
// Attributes
"Strength", "Dexterity", "Intelligence", "Spirit",
// Defense types
"Armour", "Evasion", "Rating", "Energy", "Shield",
// Damage types
"Physical", "Elemental", "Lightning", "Cold", "Fire", "Chaos",
// Common mod words
"increased", "reduced", "more", "less",
"added", "converted", "regeneration",
"maximum", "minimum", "total",
"Resistance", "Damage", "Speed", "Duration",
"Critical", "Hit", "Chance", "Multiplier",
"Attack", "Cast", "Spell", "Minion", "Skill",
"Mana", "Life", "Rarity",
// Item classes
"Helmet", "Gloves", "Boots", "Body", "Belt",
"Ring", "Amulet", "Shield", "Quiver",
"Sword", "Axe", "Mace", "Dagger", "Wand", "Staff", "Bow",
"Sceptre", "Crossbow", "Flail", "Spear",
// Rarity
"Normal", "Magic", "Rare", "Unique",
];
for (const w of extraWords) words.add(w);
// Sort and write user-words
const sortedWords = [...words].sort((a, b) => a.toLowerCase().localeCompare(b.toLowerCase()));
const wordsPath = join(__dirname, "poe2.user-words");
writeFileSync(wordsPath, sortedWords.join("\n") + "\n");
console.log(`Wrote ${sortedWords.length} words to ${wordsPath}`);
// Generate user-patterns for common tooltip formats
const patterns = [
// Stat values: "+12% to Chaos Resistance", "+3 to Level"
"\\+\\d+%",
"\\+\\d+",
"\\-\\d+%",
"\\-\\d+",
// Ranges: "10-20"
"\\d+-\\d+",
// Currency amounts: "7x Divine Orb", "35x Divine Orb"
"\\d+x",
// Quality: "+20%"
"\\d+%",
// Level requirements: "Level \\d+"
"Level \\d+",
// Asking Price section
"Asking Price:",
// Item level
"Item Level: \\d+",
// Requires line
"Requires:",
// Rating values
"Rating: \\d+",
"Shield: \\d+",
"Quality: \\+\\d+%",
];
const patternsPath = join(__dirname, "poe2.user-patterns");
writeFileSync(patternsPath, patterns.join("\n") + "\n");
console.log(`Wrote ${patterns.length} patterns to ${patternsPath}`);
}
main().catch((e) => {
console.error(e);
process.exit(1);
});