work on OCR

This commit is contained in:
Boki 2026-02-11 17:42:28 -05:00
parent 6600969947
commit 854a474435
13 changed files with 4374 additions and 38 deletions

View file

@ -0,0 +1,166 @@
#!/usr/bin/env node
/**
* Fetches POE2 trade API data and generates Tesseract user-words and user-patterns
* files to improve OCR accuracy for tooltip text.
*
* Usage: node generate-words.mjs
* Output: poe2.user-words, poe2.user-patterns (in same directory)
*/
import { writeFileSync } from "fs";
import { dirname, join } from "path";
import { fileURLToPath } from "url";
const __dirname = dirname(fileURLToPath(import.meta.url));
const UA = "OAuth poe2trade/1.0 (contact: poe2trade@users.noreply.github.com)";
async function fetchJson(path) {
const url = `https://www.pathofexile.com/api/trade2/data/${path}`;
const res = await fetch(url, { headers: { "User-Agent": UA } });
if (!res.ok) throw new Error(`${url}: ${res.status}`);
return res.json();
}
async function main() {
console.log("Fetching POE2 trade API data...");
const [items, stats, static_, filters] = await Promise.all([
fetchJson("items"),
fetchJson("stats"),
fetchJson("static"),
fetchJson("filters"),
]);
const words = new Set();
// Helper: split text into individual words and add each
function addWords(text) {
if (!text) return;
// Remove # placeholders and special chars, split on whitespace
const cleaned = text
.replace(/#/g, "")
.replace(/[{}()\[\]]/g, "")
.replace(/[+\-]/g, " ");
for (const word of cleaned.split(/\s+/)) {
// Only keep words that are actual words (not numbers, not single chars)
const trimmed = word.replace(/^[^a-zA-Z]+|[^a-zA-Z]+$/g, "");
if (trimmed.length >= 2) words.add(trimmed);
}
}
// Helper: add a full phrase (multi-word item name) as-is
function addPhrase(text) {
if (!text) return;
addWords(text);
}
// Items: type names (base types like "Tribal Mask", "Leather Vest")
for (const cat of items.result) {
addPhrase(cat.label);
for (const entry of cat.entries) {
addPhrase(entry.type);
addPhrase(entry.name);
addPhrase(entry.text);
}
}
// Stats: mod text like "+#% to Chaos Resistance", "# to maximum Life"
for (const cat of stats.result) {
for (const entry of cat.entries) {
addPhrase(entry.text);
}
}
// Static: currency/fragment names like "Divine Orb", "Scroll of Wisdom"
for (const cat of static_.result) {
addPhrase(cat.label);
for (const entry of cat.entries) {
addPhrase(entry.text);
}
}
// Filters: filter labels and option texts
for (const cat of filters.result) {
addPhrase(cat.title);
if (cat.filters) {
for (const f of cat.filters) {
addPhrase(f.text);
if (f.option?.options) {
for (const opt of f.option.options) {
addPhrase(opt.text);
}
}
}
}
}
// Add common tooltip keywords not in trade API
const extraWords = [
// Section headers
"Quality", "Requires", "Level", "Asking", "Price",
"Corrupted", "Mirrored", "Unmodifiable",
"Twice", "Sockets",
// Attributes
"Strength", "Dexterity", "Intelligence", "Spirit",
// Defense types
"Armour", "Evasion", "Rating", "Energy", "Shield",
// Damage types
"Physical", "Elemental", "Lightning", "Cold", "Fire", "Chaos",
// Common mod words
"increased", "reduced", "more", "less",
"added", "converted", "regeneration",
"maximum", "minimum", "total",
"Resistance", "Damage", "Speed", "Duration",
"Critical", "Hit", "Chance", "Multiplier",
"Attack", "Cast", "Spell", "Minion", "Skill",
"Mana", "Life", "Rarity",
// Item classes
"Helmet", "Gloves", "Boots", "Body", "Belt",
"Ring", "Amulet", "Shield", "Quiver",
"Sword", "Axe", "Mace", "Dagger", "Wand", "Staff", "Bow",
"Sceptre", "Crossbow", "Flail", "Spear",
// Rarity
"Normal", "Magic", "Rare", "Unique",
];
for (const w of extraWords) words.add(w);
// Sort and write user-words
const sortedWords = [...words].sort((a, b) => a.toLowerCase().localeCompare(b.toLowerCase()));
const wordsPath = join(__dirname, "poe2.user-words");
writeFileSync(wordsPath, sortedWords.join("\n") + "\n");
console.log(`Wrote ${sortedWords.length} words to ${wordsPath}`);
// Generate user-patterns for common tooltip formats
const patterns = [
// Stat values: "+12% to Chaos Resistance", "+3 to Level"
"\\+\\d+%",
"\\+\\d+",
"\\-\\d+%",
"\\-\\d+",
// Ranges: "10-20"
"\\d+-\\d+",
// Currency amounts: "7x Divine Orb", "35x Divine Orb"
"\\d+x",
// Quality: "+20%"
"\\d+%",
// Level requirements: "Level \\d+"
"Level \\d+",
// Asking Price section
"Asking Price:",
// Item level
"Item Level: \\d+",
// Requires line
"Requires:",
// Rating values
"Rating: \\d+",
"Shield: \\d+",
"Quality: \\+\\d+%",
];
const patternsPath = join(__dirname, "poe2.user-patterns");
writeFileSync(patternsPath, patterns.join("\n") + "\n");
console.log(`Wrote ${patterns.length} patterns to ${patternsPath}`);
}
main().catch((e) => {
console.error(e);
process.exit(1);
});

View file

@ -0,0 +1,14 @@
\+\d+%
\+\d+
\-\d+%
\-\d+
\d+-\d+
\d+x
\d+%
Level \d+
Asking Price:
Item Level: \d+
Requires:
Rating: \d+
Shield: \d+
Quality: \+\d+%

File diff suppressed because it is too large Load diff

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long