diff --git a/fonts.conf b/fonts.conf new file mode 100644 index 0000000..798fcf5 --- /dev/null +++ b/fonts.conf @@ -0,0 +1,7 @@ + + + +/usr/local/share/fonts/fontin + + + diff --git a/tools/OcrDaemon/OcrDaemon.csproj b/tools/OcrDaemon/OcrDaemon.csproj index a21aa76..d695273 100644 --- a/tools/OcrDaemon/OcrDaemon.csproj +++ b/tools/OcrDaemon/OcrDaemon.csproj @@ -16,6 +16,9 @@ PreserveNewest + + PreserveNewest + diff --git a/tools/OcrDaemon/Program.cs b/tools/OcrDaemon/Program.cs index 44d49aa..bedaa61 100644 --- a/tools/OcrDaemon/Program.cs +++ b/tools/OcrDaemon/Program.cs @@ -12,11 +12,13 @@ SetProcessDPIAware(); // Pre-create the Tesseract OCR engine (reused across all requests) var tessdataPath = Path.Combine(AppContext.BaseDirectory, "tessdata"); +var tessLang = File.Exists(Path.Combine(tessdataPath, "poe2.traineddata")) ? "poe2" : "eng"; TesseractEngine tessEngine; try { - tessEngine = new TesseractEngine(tessdataPath, "eng", EngineMode.LstmOnly); + tessEngine = new TesseractEngine(tessdataPath, tessLang, EngineMode.LstmOnly); tessEngine.DefaultPageSegMode = PageSegMode.Auto; + Console.Error.WriteLine($"Tesseract engine loaded with language: {tessLang}"); } catch (Exception ex) { diff --git a/tools/OcrDaemon/tessdata/poe2.traineddata b/tools/OcrDaemon/tessdata/poe2.traineddata new file mode 100644 index 0000000..da48649 Binary files /dev/null and b/tools/OcrDaemon/tessdata/poe2.traineddata differ diff --git a/tools/training/continue_training.sh b/tools/training/continue_training.sh new file mode 100644 index 0000000..25b0119 --- /dev/null +++ b/tools/training/continue_training.sh @@ -0,0 +1,104 @@ +#!/usr/bin/env bash +# Continue LSTM fine-tuning in batches until improvement plateaus. +# Stops when BCER improvement drops below 0.3% per batch. +# +# Run from PowerShell: +# MSYS_NO_PATHCONV=1 wsl -d Ubuntu-22.04 -u root bash /mnt/c/Users/boki/repos/poe2trade/tools/training/continue_training.sh + +set -euo pipefail + +WORK_DIR="$HOME/poe2-tesseract-training" +TESSDATA_DIR="/mnt/c/Users/boki/repos/poe2trade/tools/OcrDaemon/tessdata" +BATCH_SIZE=400 +MIN_IMPROVEMENT=0.3 # stop if BCER improves less than this per batch +MAX_TOTAL=5000 # absolute safety cap + +cd "$WORK_DIR" + +# Parse BCER from checkpoint filename like poe2_4.230_102_800.checkpoint +get_best_bcer() { + ls -1 output/poe2_*.checkpoint 2>/dev/null \ + | grep -v 'poe2_checkpoint$' \ + | sed 's/.*poe2_\([0-9.]*\)_.*/\1/' \ + | sort -n \ + | head -1 +} + +# Get max iterations from checkpoint filename +get_max_iter() { + ls -1 output/poe2_*.checkpoint 2>/dev/null \ + | grep -v 'poe2_checkpoint$' \ + | sed 's/.*_\([0-9]*\)\.checkpoint/\1/' \ + | sort -n \ + | tail -1 +} + +prev_bcer=$(get_best_bcer) +current_max=$(get_max_iter) + +echo "=== Continuing LSTM Training ===" +echo "Starting BCER: ${prev_bcer}%" +echo "Starting iterations: $current_max" +echo "Batch size: $BATCH_SIZE" +echo "Min improvement threshold: ${MIN_IMPROVEMENT}%" +echo "" + +batch=0 +while true; do + batch=$((batch + 1)) + new_max=$((current_max + BATCH_SIZE)) + + if [ "$new_max" -gt "$MAX_TOTAL" ]; then + echo "Reached safety cap of $MAX_TOTAL iterations. Stopping." + break + fi + + echo "── Batch $batch: iterations $current_max → $new_max ──" + + lstmtraining \ + --continue_from output/poe2_checkpoint \ + --traineddata eng.traineddata \ + --train_listfile training_files.txt \ + --model_output output/poe2 \ + --max_iterations "$new_max" \ + --target_error_rate 0.005 \ + --debug_interval -1 2>&1 | tail -5 + + new_bcer=$(get_best_bcer) + echo "" + echo "Batch $batch result: BCER ${prev_bcer}% → ${new_bcer}%" + + # Calculate improvement using awk (bash can't do float math) + improvement=$(awk "BEGIN {printf \"%.3f\", $prev_bcer - $new_bcer}") + echo "Improvement: ${improvement}%" + + # Check if improvement is below threshold + stop=$(awk "BEGIN {print ($improvement < $MIN_IMPROVEMENT) ? 1 : 0}") + + if [ "$stop" -eq 1 ]; then + echo "" + echo "Improvement (${improvement}%) < threshold (${MIN_IMPROVEMENT}%). Stopping." + break + fi + + prev_bcer="$new_bcer" + current_max="$new_max" + echo "" +done + +# Package final model +echo "" +echo "=== Packaging final model ===" +final_bcer=$(get_best_bcer) +echo "Final BCER: ${final_bcer}%" + +lstmtraining --stop_training \ + --continue_from output/poe2_checkpoint \ + --traineddata eng.traineddata \ + --model_output output/poe2.traineddata + +cp output/poe2.traineddata "$TESSDATA_DIR/poe2.traineddata" +echo "Model saved to: $TESSDATA_DIR/poe2.traineddata" +ls -lh "$TESSDATA_DIR/poe2.traineddata" +echo "" +echo "=== Done ===" diff --git a/tools/training/poe2_training_text.txt b/tools/training/poe2_training_text.txt new file mode 100644 index 0000000..46ba7fa --- /dev/null +++ b/tools/training/poe2_training_text.txt @@ -0,0 +1,365 @@ +Sekhema Sandals +Deicide Axe +Expert Shortbow +Vaal Rapier +Gemini Claw +Imperial Staff +Maelstrom Mace +Ezomyte Spiked Shield +Champion Kite Shield +Prophecy Wand +Occultist's Vestment +Assassin's Garb +Glorious Plate +Sadist Garb +Saintly Chainmail +Sorcerer Boots +Arcanist Gloves +Hubris Circlet +Titanium Spirit Shield +Fossilised Spirit Shield +Zodiac Leather +Vaal Regalia +Nightmare Bascinet +Archon Kite Shield +Crusader Boots +Fingerless Silk Gloves +Gripped Gloves +Spiked Gloves +Bone Helmet +Two-Toned Boots +Crystal Belt +Stygian Vise +Marble Amulet +Onyx Amulet +Turquoise Amulet +Agate Amulet +Citrine Amulet +Moonstone Ring +Diamond Ring +Opal Ring +Vermillion Ring +Cerulean Ring +Steel Ring +Unset Ring +Prismatic Ring +Amethyst Ring +Topaz Ring +Sapphire Ring +Ruby Ring +Cobalt Jewel +Crimson Jewel +Viridian Jewel +Prismatic Jewel +Large Cluster Jewel +Medium Cluster Jewel +Small Cluster Jewel +Headhunter +Mageblood +Shavronne's Wrappings +Aegis Aurora +Ashes of the Stars +The Squire +Nimis +Heatshiver +Forbidden Flame +Forbidden Flesh +Dying Sun +Bottled Faith +Impossible Escape +Thread of Hope +Inspired Learning +Unnatural Instinct +Watcher's Eye +Sublime Vision +Voices +The Apothecary +The Doctor +The Nurse +House of Mirrors +The Fiend +The Demon +Brother's Stash +Seven Years Bad Luck +Unrequited Love +The Price of Devotion +Love Through Ice +Chaos Orb +Divine Orb +Exalted Orb +Mirror of Kalandra +Orb of Alteration +Orb of Alchemy +Orb of Scouring +Orb of Annulment +Orb of Fusing +Orb of Chance +Orb of Regret +Jeweller's Orb +Chromatic Orb +Blessed Orb +Regal Orb +Vaal Orb +Glassblower's Bauble +Gemcutter's Prism +Cartographer's Chisel +Armourer's Scrap +Blacksmith's Whetstone +Awakener's Orb +Maven's Orb +Elevated Sextant +Orb of Dominance +Orb of Conflict +Sacred Orb +Grand Eldritch Ember +Grand Eldritch Ichor +Exceptional Eldritch Ember +Exceptional Eldritch Ichor +Perfect Fossil +Dense Fossil +Pristine Fossil +Jagged Fossil +Serrated Fossil +Essence of Contempt +Essence of Greed +Essence of Hatred +Essence of Wrath +Deafening Essence of Contempt +Deafening Essence of Zeal ++45% to Fire Resistance ++42% to Cold Resistance ++38% to Lightning Resistance ++15% to all Elemental Resistances ++12% to Chaos Resistance ++120 to maximum Life ++85 to maximum Life ++60 to maximum Energy Shield ++35 to maximum Mana ++50 to Strength ++50 to Dexterity ++50 to Intelligence +Adds 12 to 24 Fire Damage +Adds 8 to 16 Cold Damage +Adds 1 to 42 Lightning Damage +Adds 15 to 28 Physical Damage +Adds 5 to 10 Chaos Damage ++1 to Level of all Skill Gems ++2 to Level of all Spell Skill Gems ++1 to Level of all Fire Skill Gems ++2 to Level of all Minion Skill Gems +25% increased Critical Strike Chance ++1.5% to Critical Strike Multiplier +15% increased Attack Speed +20% increased Cast Speed +10% increased Movement Speed +30% increased Spell Damage +45% increased Elemental Damage +120% increased Physical Damage +15% increased Area of Effect +30% increased Projectile Speed +25% increased Projectile Damage ++2 to Level of Socketed Gems ++30% to Quality of Socketed Gems +Socketed Gems are Supported by Level 20 Spell Echo +Socketed Gems are Supported by Level 1 Empower +Regenerate 50 Life per second +Regenerate 2% of Life per second +0.4% of Physical Attack Damage Leeched as Life +Gain 10% of Physical Damage as Extra Fire Damage +20% of Physical Damage Converted to Fire Damage +Enemies have -9% to Fire Resistance +You have Onslaught while on Full Life +Gain Unholy Might for 4 seconds on Critical Strike +Quality: +20% +Energy Shield: 450 +Armour: 2500 +Evasion Rating: 1800 +Physical Damage: 180-320 +Elemental Damage: 45-90 +Critical Strike Chance: 6.50% +Attacks per Second: 1.45 +Weapon Range: 13 +Requires Level 70 +Requires Level 80 +Requires 155 Str +Requires 155 Dex +Requires 155 Int +Requires 100 Str 100 Dex +Item Level: 86 +Item Level: 83 +Item Level: 75 +Corrupted +Mirrored +Unidentified +Shaper Item +Elder Item +Crusader Item +Hunter Item +Redeemer Item +Warlord Item +Synthesised +Fractured +Split +Veiled Prefix +Veiled Suffix +Prefixes Cannot Be Changed +Suffixes Cannot Be Changed +Can have up to 3 Crafted Modifiers +Cannot roll Attack Modifiers +Cannot roll Caster Modifiers +1x Divine Orb +2x Chaos Orb +5x Exalted Orb +10x Chaos Orb +1x Mirror of Kalandra +0.5 Divine Orb +3.5 Chaos Orb +15 Chaos Orb +100 Divine Orb +Price: 1 divine +Price: 5 chaos +Price: 10 exalted +Listed 2 hours ago +Listed 5 minutes ago +Listed 1 day ago +Online +AFK +Whisper +Direct Whisper +Travel to Hideout +Asking Price: +Warning: This item's price +Accept Trade +Cancel Trade +Stash Tab +Map Tab +Currency Tab +Divination Tab +Fragment Tab +Unique Tab +Essence Tab +Delve Tab +Blight Tab +Metamorph Tab +Delirium Tab +Heist Tab +Expedition Tab +Sentinel Tab +Normal +Magic +Rare +Unique +Currency +Divination Card +Gem +Flask +Map +Prophecy +Scarab +Fossil +Resonator +Incubator +Oil +Catalyst +Delirium Orb +Vial +Invitation +Timeless Emblem +Timeless Splinter +Breach Splinter +Breachstone +Sacrifice Fragment +Mortal Fragment +Uber Fragment +Maven's Invitation +The Feared +The Formed +The Twisted +The Forgotten +The Hidden +The Elderslayers +The Shaper +The Elder +Uber Elder +Sirus, Awakener of Worlds +Maven, The Cruelty +Cortex +The Alluring Abyss +Hall of Grandmasters +Doryani's Machinarium +Pillars of Arun +Augmented Distant Memory +Rewritten Distant Memory +Altered Distant Memory +Twisted Distant Memory +Infused Engineer's Orb +Infused Beachhead +Winged Scarab +Gilded Scarab +Polished Scarab +Rusted Scarab +Tier 16 Map +Tier 15 Map +Tier 14 Map +Area Level: 83 +Area Level: 81 +Monster Level: 85 +Quantity: +120% +Rarity: +350% +Pack Size: +45% ++1 Level of Contained Gems +Blood-filled Vessel +Sacred Blossom +Writhing Invitation +Life +120 +Mana +60 +ES +80 +Str +50 +Dex +45 +Int +55 +Res +42% +DPS 450.5 +pDPS 380.2 +eDPS 125.8 +Total: 15 Chaos Orb +Total: 3.5 Divine Orb +Buyout: 10 chaos +Buyout: 1 divine +exact price 5 divine +~price 2 divine +~b/o 15 chaos +ilvl 86 +ilvl 83 +ilvl 75 +6-Link +5-Link +4-Link +6L +5L +4L +R-R-R-G-G-B +R-G-B-B-G-R +White Sockets +3 White +Implicit Modifier +Explicit Modifier +Enchantment +Crafted Modifier +Fractured Modifier +Lab Enchant +Eldritch Implicit +Grand Spectrum +Transcendent Flesh +Transcendent Mind +Transcendent Spirit +Brutal Restraint +Elegant Hubris +Glorious Vanity +Lethal Pride +Militant Faith +Darkness Enthroned +Replica Shroud of the Lightless +Farrul's Fur +Doppelganger Guise +Melding of the Flesh +Crystallised Omniscience diff --git a/tools/training/train.sh b/tools/training/train.sh new file mode 100644 index 0000000..21ae458 --- /dev/null +++ b/tools/training/train.sh @@ -0,0 +1,419 @@ +#!/usr/bin/env bash +# Fine-tune Tesseract 5 LSTM on Fontin / Fontin SmallCaps for POE2 +# +# IMPORTANT: Run inside WSL Ubuntu (NOT docker-desktop). +# +# From PowerShell: +# wsl -d Ubuntu-22.04 bash /mnt/c/Users/boki/repos/poe2trade/tools/training/train.sh +# +# Or from inside WSL Ubuntu: +# cd /mnt/c/Users/boki/repos/poe2trade/tools/training +# bash train.sh +# +# Prerequisites: +# - WSL Ubuntu installed: wsl --install -d Ubuntu-22.04 +# - Internet access (downloads fonts + Tesseract tools if needed) + +set -euo pipefail + +# ── Sanity checks ──────────────────────────────────────────────────────────── + +if [ -z "${BASH_VERSION:-}" ]; then + echo "ERROR: This script requires bash. Run with: bash train.sh" + echo "If you're in docker-desktop WSL, switch to Ubuntu:" + echo " wsl -d Ubuntu-22.04 bash /mnt/c/Users/boki/repos/poe2trade/tools/training/train.sh" + exit 1 +fi + +# Detect which WSL distro we're in +if grep -qi alpine /etc/os-release 2>/dev/null || [ -f /etc/alpine-release ]; then + echo "ERROR: You're in docker-desktop (Alpine). Use Ubuntu instead:" + echo " wsl -d Ubuntu-22.04 bash /mnt/c/Users/boki/repos/poe2trade/tools/training/train.sh" + exit 1 +fi + +# ── Resolve paths ──────────────────────────────────────────────────────────── + +# Handle both /mnt/c/ (Ubuntu) and /mnt/host/c/ (docker-desktop) paths +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" +TESSDATA_DIR="$PROJECT_ROOT/tools/OcrDaemon/tessdata" +TRAINING_TEXT="$SCRIPT_DIR/poe2_training_text.txt" +WORK_DIR="$HOME/poe2-tesseract-training" +FONT_DIR="/usr/local/share/fonts/fontin" +MAX_ITERATIONS=800 +TARGET_ERROR=0.01 + +if [ ! -f "$TRAINING_TEXT" ]; then + echo "ERROR: Training text not found at: $TRAINING_TEXT" + exit 1 +fi + +if [ ! -f "$TESSDATA_DIR/eng.traineddata" ]; then + echo "ERROR: eng.traineddata not found at: $TESSDATA_DIR/" + exit 1 +fi + +echo "=== POE2 Tesseract Fine-Tuning ===" +echo "Script dir: $SCRIPT_DIR" +echo "Work dir: $WORK_DIR" +echo "Training text: $TRAINING_TEXT" +echo "Tessdata: $TESSDATA_DIR" +echo "" + +# ── Step 1: Install Tesseract training tools ───────────────────────────────── + +install_tesseract_tools() { + echo "── Step 1: Installing Tesseract training tools ──" + + if command -v text2image >/dev/null 2>&1 && command -v lstmtraining >/dev/null 2>&1; then + echo "Tesseract training tools already installed." + tesseract --version 2>&1 | head -1 + return 0 + fi + + echo "Installing Tesseract 5.x and training tools..." + + # Update package list first + sudo apt-get update -qq + + # Try PPA for latest Tesseract + if sudo add-apt-repository -y ppa:alex-p/tesseract-ocr-devel 2>/dev/null; then + sudo apt-get update -qq + fi + + sudo apt-get install -y tesseract-ocr tesseract-ocr-eng \ + libicu-dev libpango1.0-dev libcairo2-dev \ + wget unzip fontconfig 2>&1 | tail -3 + + # Check if we got text2image from the package + if command -v text2image >/dev/null 2>&1; then + echo "Installed via apt." + tesseract --version 2>&1 | head -1 + return 0 + fi + + # Build from source as fallback + echo "Package didn't include training tools, building from source..." + sudo apt-get install -y automake g++ git libtool libleptonica-dev \ + make pkg-config libicu-dev libpango1.0-dev libcairo2-dev 2>&1 | tail -3 + + local BUILD_DIR="$HOME/tesseract-build" + if [ ! -d "$BUILD_DIR" ]; then + git clone --depth 1 --branch 5.3.4 https://github.com/tesseract-ocr/tesseract.git "$BUILD_DIR" + fi + + pushd "$BUILD_DIR" > /dev/null + ./autogen.sh + ./configure + make -j"$(nproc)" + make training + sudo make install + sudo make training-install + sudo ldconfig + popd > /dev/null + + echo "Built from source." + tesseract --version 2>&1 | head -1 +} + +# ── Step 2: Download and install Fontin fonts ──────────────────────────────── + +install_fonts() { + echo "" + echo "── Step 2: Installing Fontin fonts ──" + + if fc-list 2>/dev/null | grep -qi fontin; then + echo "Fontin fonts already installed:" + fc-list | grep -i fontin + return 0 + fi + + local FONT_TMP="$HOME/fontin-download" + mkdir -p "$FONT_TMP" + pushd "$FONT_TMP" > /dev/null + + echo "Downloading Fontin fonts..." + # wfonts.com zip includes Regular, Bold, Italic, SmallCaps (all in one) + curl -sL "https://static.wfonts.com/download/data/2015/03/10/fontin/fontin.zip" -o fontin.zip || { + echo "ERROR: Failed to download Fontin fonts." + echo "Download manually and place .otf files in $FONT_DIR" + popd > /dev/null + return 1 + } + + # Verify it's a real zip + if ! file fontin.zip | grep -q "Zip archive"; then + echo "ERROR: Downloaded file is not a zip. Font site may be blocking downloads." + echo "Download manually from https://www.wfonts.com/font/fontin" + echo "Place .otf files in $FONT_DIR" + popd > /dev/null + return 1 + fi + + unzip -qo fontin.zip -d fontin/ 2>/dev/null || true + + sudo mkdir -p "$FONT_DIR" + # Copy OTF files + find fontin/ \( -name '*.otf' -o -name '*.OTF' \) \ + -exec sudo cp {} "$FONT_DIR/" \; 2>/dev/null || true + sudo fc-cache -fv > /dev/null 2>&1 + + echo "Installed fonts:" + fc-list | grep -i fontin || echo "WARNING: Fonts not found after install!" + + popd > /dev/null + rm -rf "$FONT_TMP" +} + +# ── Step 3: Generate training images ───────────────────────────────────────── + +generate_training_images() { + echo "" + echo "── Step 3: Generating training images ──" + + mkdir -p "$WORK_DIR/ground-truth" "$WORK_DIR/output" + cd "$WORK_DIR" + + # Copy eng.traineddata from project + if [ ! -f "$WORK_DIR/eng.traineddata" ]; then + cp "$TESSDATA_DIR/eng.traineddata" "$WORK_DIR/" + fi + + # Extract LSTM model for fine-tuning + if [ ! -f "$WORK_DIR/eng.lstm" ]; then + echo "Extracting LSTM model from eng.traineddata..." + combine_tessdata -e eng.traineddata eng.lstm + fi + + # Discover available Fontin font names + echo "Available Fontin fonts:" + text2image --list_available_fonts --fonts_dir "$FONT_DIR" 2>/dev/null | grep -i fontin || true + + # Read font names into a file (avoids bash array portability issues) + # text2image output format is " 6: Fontin Bold" — extract font name after first colon + local FONT_LIST="$WORK_DIR/font_list.txt" + text2image --list_available_fonts --fonts_dir "$FONT_DIR" 2>/dev/null \ + | grep -i fontin \ + | cut -d: -f2- \ + | sed 's/^[[:space:]]*//;s/[[:space:]]*$//' \ + > "$FONT_LIST" 2>/dev/null || true + + if [ ! -s "$FONT_LIST" ]; then + echo "WARNING: No Fontin fonts auto-detected. Trying common names..." + cat > "$FONT_LIST" <<'NAMES' +Fontin Bold +Fontin Medium +Fontin Medium Italic +Fontin SmallCaps, Medium +NAMES + fi + + echo "Will generate images for:" + cat "$FONT_LIST" + + local count=0 + for exp in -1 0 1; do + while IFS= read -r font_name; do + [ -z "$font_name" ] && continue + local safe + safe=$(echo "$font_name" | tr ' ' '-' | tr '[:upper:]' '[:lower:]') + local outbase="ground-truth/poe2.${safe}.exp${exp}" + + echo " Generating: $outbase (font='$font_name', exposure=$exp)" + if text2image \ + --text "$TRAINING_TEXT" \ + --outputbase "$outbase" \ + --font "$font_name" \ + --fonts_dir "$FONT_DIR" \ + --ptsize 16 \ + --xsize 3600 \ + --ysize 480 \ + --char_spacing 0.0 \ + --exposure "$exp" \ + --leading 32 2>&1; then + count=$((count + 1)) + else + echo " WARNING: Failed for font '$font_name' exposure=$exp, skipping" + fi + done < "$FONT_LIST" + done + + echo "Generated $count training image sets." + + if [ "$count" -eq 0 ]; then + echo "ERROR: No training images generated. Check font installation." + return 1 + fi +} + +# ── Step 4: Generate LSTMF files ───────────────────────────────────────────── + +generate_lstmf() { + echo "" + echo "── Step 4: Generating .lstmf training files ──" + + cd "$WORK_DIR" + + local count=0 + for tif in ground-truth/*.tif; do + [ -f "$tif" ] || continue + local base="${tif%.tif}" + + if [ -f "${base}.lstmf" ]; then + echo " Skipping (exists): ${base}.lstmf" + count=$((count + 1)) + continue + fi + + echo " Processing: $tif" + if tesseract "$tif" "$base" --psm 6 lstm.train 2>&1; then + count=$((count + 1)) + else + echo " WARNING: Failed to process $tif" + fi + done + + # Create training file list + : > training_files.txt + for f in ground-truth/*.lstmf; do + [ -f "$f" ] && echo "$f" >> training_files.txt + done + local total + total=$(wc -l < training_files.txt) + + echo "Created $total .lstmf files." + + if [ "$total" -eq 0 ]; then + echo "ERROR: No .lstmf files generated." + return 1 + fi +} + +# ── Step 5: Fine-tune LSTM ─────────────────────────────────────────────────── + +fine_tune() { + echo "" + echo "── Step 5: Fine-tuning LSTM (max $MAX_ITERATIONS iterations) ──" + + cd "$WORK_DIR" + + lstmtraining \ + --continue_from eng.lstm \ + --traineddata eng.traineddata \ + --train_listfile training_files.txt \ + --model_output output/poe2 \ + --max_iterations "$MAX_ITERATIONS" \ + --target_error_rate "$TARGET_ERROR" \ + --debug_interval -1 + + echo "Fine-tuning complete." +} + +# ── Step 6: Package model ──────────────────────────────────────────────────── + +package_model() { + echo "" + echo "── Step 6: Packaging poe2.traineddata ──" + + cd "$WORK_DIR" + + # Find the checkpoint file + local checkpoint="" + if [ -f "output/poe2_checkpoint" ]; then + checkpoint="output/poe2_checkpoint" + elif [ -f "output/poe2checkpoint" ]; then + checkpoint="output/poe2checkpoint" + else + # Find most recent checkpoint + checkpoint=$(ls -t output/poe2* 2>/dev/null | head -1) + fi + + if [ -z "$checkpoint" ] || [ ! -f "$checkpoint" ]; then + echo "ERROR: No checkpoint found in output/" + ls -la output/ 2>/dev/null || true + return 1 + fi + + echo "Using checkpoint: $checkpoint" + + lstmtraining --stop_training \ + --continue_from "$checkpoint" \ + --traineddata eng.traineddata \ + --model_output output/poe2.traineddata + + # Copy to project tessdata + cp output/poe2.traineddata "$TESSDATA_DIR/poe2.traineddata" + + echo "Model saved to: $TESSDATA_DIR/poe2.traineddata" + ls -lh "$TESSDATA_DIR/poe2.traineddata" +} + +# ── Step 7: Quick validation ───────────────────────────────────────────────── + +validate() { + echo "" + echo "── Step 7: Quick validation ──" + + cd "$WORK_DIR" + + # Create a small test file + printf 'Quality: +20%%\nAdds 12 to 24 Fire Damage\n+45%% to Fire Resistance\n' > /tmp/poe2_test.txt + + # Get first available fontin font name + local test_font + test_font=$(text2image --list_available_fonts --fonts_dir "$FONT_DIR" 2>/dev/null | grep -i fontin | head -1 | sed 's/^[[:space:]]*//;s/[[:space:]]*$//') + + if [ -z "$test_font" ]; then + echo "Skipping validation (no Fontin font available)" + return 0 + fi + + if ! text2image \ + --text /tmp/poe2_test.txt \ + --outputbase /tmp/poe2_test \ + --font "$test_font" \ + --fonts_dir "$FONT_DIR" \ + --ptsize 16 --xsize 1200 --ysize 200 \ + --exposure 0 --leading 32 2>&1; then + echo "Skipping validation (could not generate test image)" + return 0 + fi + + echo "=== eng model ===" + tesseract /tmp/poe2_test.tif stdout -l eng --psm 6 2>/dev/null || true + + echo "" + echo "=== poe2 model ===" + TESSDATA_PREFIX="$TESSDATA_DIR" tesseract /tmp/poe2_test.tif stdout -l poe2 --psm 6 2>/dev/null || { + tesseract /tmp/poe2_test.tif stdout --tessdata-dir "$TESSDATA_DIR" -l poe2 --psm 6 2>/dev/null || true + } + + rm -f /tmp/poe2_test.txt /tmp/poe2_test.tif /tmp/poe2_test.box + + echo "" + echo "Compare the outputs above — poe2 should be more accurate on Fontin text." +} + +# ── Main ───────────────────────────────────────────────────────────────────── + +main() { + install_tesseract_tools + install_fonts + generate_training_images + generate_lstmf + fine_tune + package_model + validate + + echo "" + echo "=== Done! ===" + echo "poe2.traineddata has been copied to: $TESSDATA_DIR/" + echo "" + echo "Next steps:" + echo " 1. Build the daemon: dotnet build tools/OcrDaemon -c Release" + echo " 2. Start the bot: npx tsx src/index.ts" + echo " 3. Test OCR quality in the dashboard" +} + +main "$@"