diff --git a/fonts.conf b/fonts.conf
new file mode 100644
index 0000000..798fcf5
--- /dev/null
+++ b/fonts.conf
@@ -0,0 +1,7 @@
+
+
+
+/usr/local/share/fonts/fontin
+
+
+
diff --git a/tools/OcrDaemon/OcrDaemon.csproj b/tools/OcrDaemon/OcrDaemon.csproj
index a21aa76..d695273 100644
--- a/tools/OcrDaemon/OcrDaemon.csproj
+++ b/tools/OcrDaemon/OcrDaemon.csproj
@@ -16,6 +16,9 @@
PreserveNewest
+
+ PreserveNewest
+
diff --git a/tools/OcrDaemon/Program.cs b/tools/OcrDaemon/Program.cs
index 44d49aa..bedaa61 100644
--- a/tools/OcrDaemon/Program.cs
+++ b/tools/OcrDaemon/Program.cs
@@ -12,11 +12,13 @@ SetProcessDPIAware();
// Pre-create the Tesseract OCR engine (reused across all requests)
var tessdataPath = Path.Combine(AppContext.BaseDirectory, "tessdata");
+var tessLang = File.Exists(Path.Combine(tessdataPath, "poe2.traineddata")) ? "poe2" : "eng";
TesseractEngine tessEngine;
try
{
- tessEngine = new TesseractEngine(tessdataPath, "eng", EngineMode.LstmOnly);
+ tessEngine = new TesseractEngine(tessdataPath, tessLang, EngineMode.LstmOnly);
tessEngine.DefaultPageSegMode = PageSegMode.Auto;
+ Console.Error.WriteLine($"Tesseract engine loaded with language: {tessLang}");
}
catch (Exception ex)
{
diff --git a/tools/OcrDaemon/tessdata/poe2.traineddata b/tools/OcrDaemon/tessdata/poe2.traineddata
new file mode 100644
index 0000000..da48649
Binary files /dev/null and b/tools/OcrDaemon/tessdata/poe2.traineddata differ
diff --git a/tools/training/continue_training.sh b/tools/training/continue_training.sh
new file mode 100644
index 0000000..25b0119
--- /dev/null
+++ b/tools/training/continue_training.sh
@@ -0,0 +1,104 @@
+#!/usr/bin/env bash
+# Continue LSTM fine-tuning in batches until improvement plateaus.
+# Stops when BCER improvement drops below 0.3% per batch.
+#
+# Run from PowerShell:
+# MSYS_NO_PATHCONV=1 wsl -d Ubuntu-22.04 -u root bash /mnt/c/Users/boki/repos/poe2trade/tools/training/continue_training.sh
+
+set -euo pipefail
+
+WORK_DIR="$HOME/poe2-tesseract-training"
+TESSDATA_DIR="/mnt/c/Users/boki/repos/poe2trade/tools/OcrDaemon/tessdata"
+BATCH_SIZE=400
+MIN_IMPROVEMENT=0.3 # stop if BCER improves less than this per batch
+MAX_TOTAL=5000 # absolute safety cap
+
+cd "$WORK_DIR"
+
+# Parse BCER from checkpoint filename like poe2_4.230_102_800.checkpoint
+get_best_bcer() {
+ ls -1 output/poe2_*.checkpoint 2>/dev/null \
+ | grep -v 'poe2_checkpoint$' \
+ | sed 's/.*poe2_\([0-9.]*\)_.*/\1/' \
+ | sort -n \
+ | head -1
+}
+
+# Get max iterations from checkpoint filename
+get_max_iter() {
+ ls -1 output/poe2_*.checkpoint 2>/dev/null \
+ | grep -v 'poe2_checkpoint$' \
+ | sed 's/.*_\([0-9]*\)\.checkpoint/\1/' \
+ | sort -n \
+ | tail -1
+}
+
+prev_bcer=$(get_best_bcer)
+current_max=$(get_max_iter)
+
+echo "=== Continuing LSTM Training ==="
+echo "Starting BCER: ${prev_bcer}%"
+echo "Starting iterations: $current_max"
+echo "Batch size: $BATCH_SIZE"
+echo "Min improvement threshold: ${MIN_IMPROVEMENT}%"
+echo ""
+
+batch=0
+while true; do
+ batch=$((batch + 1))
+ new_max=$((current_max + BATCH_SIZE))
+
+ if [ "$new_max" -gt "$MAX_TOTAL" ]; then
+ echo "Reached safety cap of $MAX_TOTAL iterations. Stopping."
+ break
+ fi
+
+ echo "── Batch $batch: iterations $current_max → $new_max ──"
+
+ lstmtraining \
+ --continue_from output/poe2_checkpoint \
+ --traineddata eng.traineddata \
+ --train_listfile training_files.txt \
+ --model_output output/poe2 \
+ --max_iterations "$new_max" \
+ --target_error_rate 0.005 \
+ --debug_interval -1 2>&1 | tail -5
+
+ new_bcer=$(get_best_bcer)
+ echo ""
+ echo "Batch $batch result: BCER ${prev_bcer}% → ${new_bcer}%"
+
+ # Calculate improvement using awk (bash can't do float math)
+ improvement=$(awk "BEGIN {printf \"%.3f\", $prev_bcer - $new_bcer}")
+ echo "Improvement: ${improvement}%"
+
+ # Check if improvement is below threshold
+ stop=$(awk "BEGIN {print ($improvement < $MIN_IMPROVEMENT) ? 1 : 0}")
+
+ if [ "$stop" -eq 1 ]; then
+ echo ""
+ echo "Improvement (${improvement}%) < threshold (${MIN_IMPROVEMENT}%). Stopping."
+ break
+ fi
+
+ prev_bcer="$new_bcer"
+ current_max="$new_max"
+ echo ""
+done
+
+# Package final model
+echo ""
+echo "=== Packaging final model ==="
+final_bcer=$(get_best_bcer)
+echo "Final BCER: ${final_bcer}%"
+
+lstmtraining --stop_training \
+ --continue_from output/poe2_checkpoint \
+ --traineddata eng.traineddata \
+ --model_output output/poe2.traineddata
+
+cp output/poe2.traineddata "$TESSDATA_DIR/poe2.traineddata"
+echo "Model saved to: $TESSDATA_DIR/poe2.traineddata"
+ls -lh "$TESSDATA_DIR/poe2.traineddata"
+echo ""
+echo "=== Done ==="
diff --git a/tools/training/poe2_training_text.txt b/tools/training/poe2_training_text.txt
new file mode 100644
index 0000000..46ba7fa
--- /dev/null
+++ b/tools/training/poe2_training_text.txt
@@ -0,0 +1,365 @@
+Sekhema Sandals
+Deicide Axe
+Expert Shortbow
+Vaal Rapier
+Gemini Claw
+Imperial Staff
+Maelstrom Mace
+Ezomyte Spiked Shield
+Champion Kite Shield
+Prophecy Wand
+Occultist's Vestment
+Assassin's Garb
+Glorious Plate
+Sadist Garb
+Saintly Chainmail
+Sorcerer Boots
+Arcanist Gloves
+Hubris Circlet
+Titanium Spirit Shield
+Fossilised Spirit Shield
+Zodiac Leather
+Vaal Regalia
+Nightmare Bascinet
+Archon Kite Shield
+Crusader Boots
+Fingerless Silk Gloves
+Gripped Gloves
+Spiked Gloves
+Bone Helmet
+Two-Toned Boots
+Crystal Belt
+Stygian Vise
+Marble Amulet
+Onyx Amulet
+Turquoise Amulet
+Agate Amulet
+Citrine Amulet
+Moonstone Ring
+Diamond Ring
+Opal Ring
+Vermillion Ring
+Cerulean Ring
+Steel Ring
+Unset Ring
+Prismatic Ring
+Amethyst Ring
+Topaz Ring
+Sapphire Ring
+Ruby Ring
+Cobalt Jewel
+Crimson Jewel
+Viridian Jewel
+Prismatic Jewel
+Large Cluster Jewel
+Medium Cluster Jewel
+Small Cluster Jewel
+Headhunter
+Mageblood
+Shavronne's Wrappings
+Aegis Aurora
+Ashes of the Stars
+The Squire
+Nimis
+Heatshiver
+Forbidden Flame
+Forbidden Flesh
+Dying Sun
+Bottled Faith
+Impossible Escape
+Thread of Hope
+Inspired Learning
+Unnatural Instinct
+Watcher's Eye
+Sublime Vision
+Voices
+The Apothecary
+The Doctor
+The Nurse
+House of Mirrors
+The Fiend
+The Demon
+Brother's Stash
+Seven Years Bad Luck
+Unrequited Love
+The Price of Devotion
+Love Through Ice
+Chaos Orb
+Divine Orb
+Exalted Orb
+Mirror of Kalandra
+Orb of Alteration
+Orb of Alchemy
+Orb of Scouring
+Orb of Annulment
+Orb of Fusing
+Orb of Chance
+Orb of Regret
+Jeweller's Orb
+Chromatic Orb
+Blessed Orb
+Regal Orb
+Vaal Orb
+Glassblower's Bauble
+Gemcutter's Prism
+Cartographer's Chisel
+Armourer's Scrap
+Blacksmith's Whetstone
+Awakener's Orb
+Maven's Orb
+Elevated Sextant
+Orb of Dominance
+Orb of Conflict
+Sacred Orb
+Grand Eldritch Ember
+Grand Eldritch Ichor
+Exceptional Eldritch Ember
+Exceptional Eldritch Ichor
+Perfect Fossil
+Dense Fossil
+Pristine Fossil
+Jagged Fossil
+Serrated Fossil
+Essence of Contempt
+Essence of Greed
+Essence of Hatred
+Essence of Wrath
+Deafening Essence of Contempt
+Deafening Essence of Zeal
++45% to Fire Resistance
++42% to Cold Resistance
++38% to Lightning Resistance
++15% to all Elemental Resistances
++12% to Chaos Resistance
++120 to maximum Life
++85 to maximum Life
++60 to maximum Energy Shield
++35 to maximum Mana
++50 to Strength
++50 to Dexterity
++50 to Intelligence
+Adds 12 to 24 Fire Damage
+Adds 8 to 16 Cold Damage
+Adds 1 to 42 Lightning Damage
+Adds 15 to 28 Physical Damage
+Adds 5 to 10 Chaos Damage
++1 to Level of all Skill Gems
++2 to Level of all Spell Skill Gems
++1 to Level of all Fire Skill Gems
++2 to Level of all Minion Skill Gems
+25% increased Critical Strike Chance
++1.5% to Critical Strike Multiplier
+15% increased Attack Speed
+20% increased Cast Speed
+10% increased Movement Speed
+30% increased Spell Damage
+45% increased Elemental Damage
+120% increased Physical Damage
+15% increased Area of Effect
+30% increased Projectile Speed
+25% increased Projectile Damage
++2 to Level of Socketed Gems
++30% to Quality of Socketed Gems
+Socketed Gems are Supported by Level 20 Spell Echo
+Socketed Gems are Supported by Level 1 Empower
+Regenerate 50 Life per second
+Regenerate 2% of Life per second
+0.4% of Physical Attack Damage Leeched as Life
+Gain 10% of Physical Damage as Extra Fire Damage
+20% of Physical Damage Converted to Fire Damage
+Enemies have -9% to Fire Resistance
+You have Onslaught while on Full Life
+Gain Unholy Might for 4 seconds on Critical Strike
+Quality: +20%
+Energy Shield: 450
+Armour: 2500
+Evasion Rating: 1800
+Physical Damage: 180-320
+Elemental Damage: 45-90
+Critical Strike Chance: 6.50%
+Attacks per Second: 1.45
+Weapon Range: 13
+Requires Level 70
+Requires Level 80
+Requires 155 Str
+Requires 155 Dex
+Requires 155 Int
+Requires 100 Str 100 Dex
+Item Level: 86
+Item Level: 83
+Item Level: 75
+Corrupted
+Mirrored
+Unidentified
+Shaper Item
+Elder Item
+Crusader Item
+Hunter Item
+Redeemer Item
+Warlord Item
+Synthesised
+Fractured
+Split
+Veiled Prefix
+Veiled Suffix
+Prefixes Cannot Be Changed
+Suffixes Cannot Be Changed
+Can have up to 3 Crafted Modifiers
+Cannot roll Attack Modifiers
+Cannot roll Caster Modifiers
+1x Divine Orb
+2x Chaos Orb
+5x Exalted Orb
+10x Chaos Orb
+1x Mirror of Kalandra
+0.5 Divine Orb
+3.5 Chaos Orb
+15 Chaos Orb
+100 Divine Orb
+Price: 1 divine
+Price: 5 chaos
+Price: 10 exalted
+Listed 2 hours ago
+Listed 5 minutes ago
+Listed 1 day ago
+Online
+AFK
+Whisper
+Direct Whisper
+Travel to Hideout
+Asking Price:
+Warning: This item's price
+Accept Trade
+Cancel Trade
+Stash Tab
+Map Tab
+Currency Tab
+Divination Tab
+Fragment Tab
+Unique Tab
+Essence Tab
+Delve Tab
+Blight Tab
+Metamorph Tab
+Delirium Tab
+Heist Tab
+Expedition Tab
+Sentinel Tab
+Normal
+Magic
+Rare
+Unique
+Currency
+Divination Card
+Gem
+Flask
+Map
+Prophecy
+Scarab
+Fossil
+Resonator
+Incubator
+Oil
+Catalyst
+Delirium Orb
+Vial
+Invitation
+Timeless Emblem
+Timeless Splinter
+Breach Splinter
+Breachstone
+Sacrifice Fragment
+Mortal Fragment
+Uber Fragment
+Maven's Invitation
+The Feared
+The Formed
+The Twisted
+The Forgotten
+The Hidden
+The Elderslayers
+The Shaper
+The Elder
+Uber Elder
+Sirus, Awakener of Worlds
+Maven, The Cruelty
+Cortex
+The Alluring Abyss
+Hall of Grandmasters
+Doryani's Machinarium
+Pillars of Arun
+Augmented Distant Memory
+Rewritten Distant Memory
+Altered Distant Memory
+Twisted Distant Memory
+Infused Engineer's Orb
+Infused Beachhead
+Winged Scarab
+Gilded Scarab
+Polished Scarab
+Rusted Scarab
+Tier 16 Map
+Tier 15 Map
+Tier 14 Map
+Area Level: 83
+Area Level: 81
+Monster Level: 85
+Quantity: +120%
+Rarity: +350%
+Pack Size: +45%
++1 Level of Contained Gems
+Blood-filled Vessel
+Sacred Blossom
+Writhing Invitation
+Life +120
+Mana +60
+ES +80
+Str +50
+Dex +45
+Int +55
+Res +42%
+DPS 450.5
+pDPS 380.2
+eDPS 125.8
+Total: 15 Chaos Orb
+Total: 3.5 Divine Orb
+Buyout: 10 chaos
+Buyout: 1 divine
+exact price 5 divine
+~price 2 divine
+~b/o 15 chaos
+ilvl 86
+ilvl 83
+ilvl 75
+6-Link
+5-Link
+4-Link
+6L
+5L
+4L
+R-R-R-G-G-B
+R-G-B-B-G-R
+White Sockets
+3 White
+Implicit Modifier
+Explicit Modifier
+Enchantment
+Crafted Modifier
+Fractured Modifier
+Lab Enchant
+Eldritch Implicit
+Grand Spectrum
+Transcendent Flesh
+Transcendent Mind
+Transcendent Spirit
+Brutal Restraint
+Elegant Hubris
+Glorious Vanity
+Lethal Pride
+Militant Faith
+Darkness Enthroned
+Replica Shroud of the Lightless
+Farrul's Fur
+Doppelganger Guise
+Melding of the Flesh
+Crystallised Omniscience
diff --git a/tools/training/train.sh b/tools/training/train.sh
new file mode 100644
index 0000000..21ae458
--- /dev/null
+++ b/tools/training/train.sh
@@ -0,0 +1,419 @@
+#!/usr/bin/env bash
+# Fine-tune Tesseract 5 LSTM on Fontin / Fontin SmallCaps for POE2
+#
+# IMPORTANT: Run inside WSL Ubuntu (NOT docker-desktop).
+#
+# From PowerShell:
+# wsl -d Ubuntu-22.04 bash /mnt/c/Users/boki/repos/poe2trade/tools/training/train.sh
+#
+# Or from inside WSL Ubuntu:
+# cd /mnt/c/Users/boki/repos/poe2trade/tools/training
+# bash train.sh
+#
+# Prerequisites:
+# - WSL Ubuntu installed: wsl --install -d Ubuntu-22.04
+# - Internet access (downloads fonts + Tesseract tools if needed)
+
+set -euo pipefail
+
+# ── Sanity checks ────────────────────────────────────────────────────────────
+
+if [ -z "${BASH_VERSION:-}" ]; then
+ echo "ERROR: This script requires bash. Run with: bash train.sh"
+ echo "If you're in docker-desktop WSL, switch to Ubuntu:"
+ echo " wsl -d Ubuntu-22.04 bash /mnt/c/Users/boki/repos/poe2trade/tools/training/train.sh"
+ exit 1
+fi
+
+# Detect which WSL distro we're in
+if grep -qi alpine /etc/os-release 2>/dev/null || [ -f /etc/alpine-release ]; then
+ echo "ERROR: You're in docker-desktop (Alpine). Use Ubuntu instead:"
+ echo " wsl -d Ubuntu-22.04 bash /mnt/c/Users/boki/repos/poe2trade/tools/training/train.sh"
+ exit 1
+fi
+
+# ── Resolve paths ────────────────────────────────────────────────────────────
+
+# Handle both /mnt/c/ (Ubuntu) and /mnt/host/c/ (docker-desktop) paths
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
+TESSDATA_DIR="$PROJECT_ROOT/tools/OcrDaemon/tessdata"
+TRAINING_TEXT="$SCRIPT_DIR/poe2_training_text.txt"
+WORK_DIR="$HOME/poe2-tesseract-training"
+FONT_DIR="/usr/local/share/fonts/fontin"
+MAX_ITERATIONS=800
+TARGET_ERROR=0.01
+
+if [ ! -f "$TRAINING_TEXT" ]; then
+ echo "ERROR: Training text not found at: $TRAINING_TEXT"
+ exit 1
+fi
+
+if [ ! -f "$TESSDATA_DIR/eng.traineddata" ]; then
+ echo "ERROR: eng.traineddata not found at: $TESSDATA_DIR/"
+ exit 1
+fi
+
+echo "=== POE2 Tesseract Fine-Tuning ==="
+echo "Script dir: $SCRIPT_DIR"
+echo "Work dir: $WORK_DIR"
+echo "Training text: $TRAINING_TEXT"
+echo "Tessdata: $TESSDATA_DIR"
+echo ""
+
+# ── Step 1: Install Tesseract training tools ─────────────────────────────────
+
+install_tesseract_tools() {
+ echo "── Step 1: Installing Tesseract training tools ──"
+
+ if command -v text2image >/dev/null 2>&1 && command -v lstmtraining >/dev/null 2>&1; then
+ echo "Tesseract training tools already installed."
+ tesseract --version 2>&1 | head -1
+ return 0
+ fi
+
+ echo "Installing Tesseract 5.x and training tools..."
+
+ # Update package list first
+ sudo apt-get update -qq
+
+ # Try PPA for latest Tesseract
+ if sudo add-apt-repository -y ppa:alex-p/tesseract-ocr-devel 2>/dev/null; then
+ sudo apt-get update -qq
+ fi
+
+ sudo apt-get install -y tesseract-ocr tesseract-ocr-eng \
+ libicu-dev libpango1.0-dev libcairo2-dev \
+ wget unzip fontconfig 2>&1 | tail -3
+
+ # Check if we got text2image from the package
+ if command -v text2image >/dev/null 2>&1; then
+ echo "Installed via apt."
+ tesseract --version 2>&1 | head -1
+ return 0
+ fi
+
+ # Build from source as fallback
+ echo "Package didn't include training tools, building from source..."
+ sudo apt-get install -y automake g++ git libtool libleptonica-dev \
+ make pkg-config libicu-dev libpango1.0-dev libcairo2-dev 2>&1 | tail -3
+
+ local BUILD_DIR="$HOME/tesseract-build"
+ if [ ! -d "$BUILD_DIR" ]; then
+ git clone --depth 1 --branch 5.3.4 https://github.com/tesseract-ocr/tesseract.git "$BUILD_DIR"
+ fi
+
+ pushd "$BUILD_DIR" > /dev/null
+ ./autogen.sh
+ ./configure
+ make -j"$(nproc)"
+ make training
+ sudo make install
+ sudo make training-install
+ sudo ldconfig
+ popd > /dev/null
+
+ echo "Built from source."
+ tesseract --version 2>&1 | head -1
+}
+
+# ── Step 2: Download and install Fontin fonts ────────────────────────────────
+
+install_fonts() {
+ echo ""
+ echo "── Step 2: Installing Fontin fonts ──"
+
+ if fc-list 2>/dev/null | grep -qi fontin; then
+ echo "Fontin fonts already installed:"
+ fc-list | grep -i fontin
+ return 0
+ fi
+
+ local FONT_TMP="$HOME/fontin-download"
+ mkdir -p "$FONT_TMP"
+ pushd "$FONT_TMP" > /dev/null
+
+ echo "Downloading Fontin fonts..."
+ # wfonts.com zip includes Regular, Bold, Italic, SmallCaps (all in one)
+ curl -sL "https://static.wfonts.com/download/data/2015/03/10/fontin/fontin.zip" -o fontin.zip || {
+ echo "ERROR: Failed to download Fontin fonts."
+ echo "Download manually and place .otf files in $FONT_DIR"
+ popd > /dev/null
+ return 1
+ }
+
+ # Verify it's a real zip
+ if ! file fontin.zip | grep -q "Zip archive"; then
+ echo "ERROR: Downloaded file is not a zip. Font site may be blocking downloads."
+ echo "Download manually from https://www.wfonts.com/font/fontin"
+ echo "Place .otf files in $FONT_DIR"
+ popd > /dev/null
+ return 1
+ fi
+
+ unzip -qo fontin.zip -d fontin/ 2>/dev/null || true
+
+ sudo mkdir -p "$FONT_DIR"
+ # Copy OTF files
+ find fontin/ \( -name '*.otf' -o -name '*.OTF' \) \
+ -exec sudo cp {} "$FONT_DIR/" \; 2>/dev/null || true
+ sudo fc-cache -fv > /dev/null 2>&1
+
+ echo "Installed fonts:"
+ fc-list | grep -i fontin || echo "WARNING: Fonts not found after install!"
+
+ popd > /dev/null
+ rm -rf "$FONT_TMP"
+}
+
+# ── Step 3: Generate training images ─────────────────────────────────────────
+
+generate_training_images() {
+ echo ""
+ echo "── Step 3: Generating training images ──"
+
+ mkdir -p "$WORK_DIR/ground-truth" "$WORK_DIR/output"
+ cd "$WORK_DIR"
+
+ # Copy eng.traineddata from project
+ if [ ! -f "$WORK_DIR/eng.traineddata" ]; then
+ cp "$TESSDATA_DIR/eng.traineddata" "$WORK_DIR/"
+ fi
+
+ # Extract LSTM model for fine-tuning
+ if [ ! -f "$WORK_DIR/eng.lstm" ]; then
+ echo "Extracting LSTM model from eng.traineddata..."
+ combine_tessdata -e eng.traineddata eng.lstm
+ fi
+
+ # Discover available Fontin font names
+ echo "Available Fontin fonts:"
+ text2image --list_available_fonts --fonts_dir "$FONT_DIR" 2>/dev/null | grep -i fontin || true
+
+ # Read font names into a file (avoids bash array portability issues)
+ # text2image output format is " 6: Fontin Bold" — extract font name after first colon
+ local FONT_LIST="$WORK_DIR/font_list.txt"
+ text2image --list_available_fonts --fonts_dir "$FONT_DIR" 2>/dev/null \
+ | grep -i fontin \
+ | cut -d: -f2- \
+ | sed 's/^[[:space:]]*//;s/[[:space:]]*$//' \
+ > "$FONT_LIST" 2>/dev/null || true
+
+ if [ ! -s "$FONT_LIST" ]; then
+ echo "WARNING: No Fontin fonts auto-detected. Trying common names..."
+ cat > "$FONT_LIST" <<'NAMES'
+Fontin Bold
+Fontin Medium
+Fontin Medium Italic
+Fontin SmallCaps, Medium
+NAMES
+ fi
+
+ echo "Will generate images for:"
+ cat "$FONT_LIST"
+
+ local count=0
+ for exp in -1 0 1; do
+ while IFS= read -r font_name; do
+ [ -z "$font_name" ] && continue
+ local safe
+ safe=$(echo "$font_name" | tr ' ' '-' | tr '[:upper:]' '[:lower:]')
+ local outbase="ground-truth/poe2.${safe}.exp${exp}"
+
+ echo " Generating: $outbase (font='$font_name', exposure=$exp)"
+ if text2image \
+ --text "$TRAINING_TEXT" \
+ --outputbase "$outbase" \
+ --font "$font_name" \
+ --fonts_dir "$FONT_DIR" \
+ --ptsize 16 \
+ --xsize 3600 \
+ --ysize 480 \
+ --char_spacing 0.0 \
+ --exposure "$exp" \
+ --leading 32 2>&1; then
+ count=$((count + 1))
+ else
+ echo " WARNING: Failed for font '$font_name' exposure=$exp, skipping"
+ fi
+ done < "$FONT_LIST"
+ done
+
+ echo "Generated $count training image sets."
+
+ if [ "$count" -eq 0 ]; then
+ echo "ERROR: No training images generated. Check font installation."
+ return 1
+ fi
+}
+
+# ── Step 4: Generate LSTMF files ─────────────────────────────────────────────
+
+generate_lstmf() {
+ echo ""
+ echo "── Step 4: Generating .lstmf training files ──"
+
+ cd "$WORK_DIR"
+
+ local count=0
+ for tif in ground-truth/*.tif; do
+ [ -f "$tif" ] || continue
+ local base="${tif%.tif}"
+
+ if [ -f "${base}.lstmf" ]; then
+ echo " Skipping (exists): ${base}.lstmf"
+ count=$((count + 1))
+ continue
+ fi
+
+ echo " Processing: $tif"
+ if tesseract "$tif" "$base" --psm 6 lstm.train 2>&1; then
+ count=$((count + 1))
+ else
+ echo " WARNING: Failed to process $tif"
+ fi
+ done
+
+ # Create training file list
+ : > training_files.txt
+ for f in ground-truth/*.lstmf; do
+ [ -f "$f" ] && echo "$f" >> training_files.txt
+ done
+ local total
+ total=$(wc -l < training_files.txt)
+
+ echo "Created $total .lstmf files."
+
+ if [ "$total" -eq 0 ]; then
+ echo "ERROR: No .lstmf files generated."
+ return 1
+ fi
+}
+
+# ── Step 5: Fine-tune LSTM ───────────────────────────────────────────────────
+
+fine_tune() {
+ echo ""
+ echo "── Step 5: Fine-tuning LSTM (max $MAX_ITERATIONS iterations) ──"
+
+ cd "$WORK_DIR"
+
+ lstmtraining \
+ --continue_from eng.lstm \
+ --traineddata eng.traineddata \
+ --train_listfile training_files.txt \
+ --model_output output/poe2 \
+ --max_iterations "$MAX_ITERATIONS" \
+ --target_error_rate "$TARGET_ERROR" \
+ --debug_interval -1
+
+ echo "Fine-tuning complete."
+}
+
+# ── Step 6: Package model ────────────────────────────────────────────────────
+
+package_model() {
+ echo ""
+ echo "── Step 6: Packaging poe2.traineddata ──"
+
+ cd "$WORK_DIR"
+
+ # Find the checkpoint file
+ local checkpoint=""
+ if [ -f "output/poe2_checkpoint" ]; then
+ checkpoint="output/poe2_checkpoint"
+ elif [ -f "output/poe2checkpoint" ]; then
+ checkpoint="output/poe2checkpoint"
+ else
+ # Find most recent checkpoint
+ checkpoint=$(ls -t output/poe2* 2>/dev/null | head -1)
+ fi
+
+ if [ -z "$checkpoint" ] || [ ! -f "$checkpoint" ]; then
+ echo "ERROR: No checkpoint found in output/"
+ ls -la output/ 2>/dev/null || true
+ return 1
+ fi
+
+ echo "Using checkpoint: $checkpoint"
+
+ lstmtraining --stop_training \
+ --continue_from "$checkpoint" \
+ --traineddata eng.traineddata \
+ --model_output output/poe2.traineddata
+
+ # Copy to project tessdata
+ cp output/poe2.traineddata "$TESSDATA_DIR/poe2.traineddata"
+
+ echo "Model saved to: $TESSDATA_DIR/poe2.traineddata"
+ ls -lh "$TESSDATA_DIR/poe2.traineddata"
+}
+
+# ── Step 7: Quick validation ─────────────────────────────────────────────────
+
+validate() {
+ echo ""
+ echo "── Step 7: Quick validation ──"
+
+ cd "$WORK_DIR"
+
+ # Create a small test file
+ printf 'Quality: +20%%\nAdds 12 to 24 Fire Damage\n+45%% to Fire Resistance\n' > /tmp/poe2_test.txt
+
+ # Get first available fontin font name
+ local test_font
+ test_font=$(text2image --list_available_fonts --fonts_dir "$FONT_DIR" 2>/dev/null | grep -i fontin | head -1 | sed 's/^[[:space:]]*//;s/[[:space:]]*$//')
+
+ if [ -z "$test_font" ]; then
+ echo "Skipping validation (no Fontin font available)"
+ return 0
+ fi
+
+ if ! text2image \
+ --text /tmp/poe2_test.txt \
+ --outputbase /tmp/poe2_test \
+ --font "$test_font" \
+ --fonts_dir "$FONT_DIR" \
+ --ptsize 16 --xsize 1200 --ysize 200 \
+ --exposure 0 --leading 32 2>&1; then
+ echo "Skipping validation (could not generate test image)"
+ return 0
+ fi
+
+ echo "=== eng model ==="
+ tesseract /tmp/poe2_test.tif stdout -l eng --psm 6 2>/dev/null || true
+
+ echo ""
+ echo "=== poe2 model ==="
+ TESSDATA_PREFIX="$TESSDATA_DIR" tesseract /tmp/poe2_test.tif stdout -l poe2 --psm 6 2>/dev/null || {
+ tesseract /tmp/poe2_test.tif stdout --tessdata-dir "$TESSDATA_DIR" -l poe2 --psm 6 2>/dev/null || true
+ }
+
+ rm -f /tmp/poe2_test.txt /tmp/poe2_test.tif /tmp/poe2_test.box
+
+ echo ""
+ echo "Compare the outputs above — poe2 should be more accurate on Fontin text."
+}
+
+# ── Main ─────────────────────────────────────────────────────────────────────
+
+main() {
+ install_tesseract_tools
+ install_fonts
+ generate_training_images
+ generate_lstmf
+ fine_tune
+ package_model
+ validate
+
+ echo ""
+ echo "=== Done! ==="
+ echo "poe2.traineddata has been copied to: $TESSDATA_DIR/"
+ echo ""
+ echo "Next steps:"
+ echo " 1. Build the daemon: dotnet build tools/OcrDaemon -c Release"
+ echo " 2. Start the bot: npx tsx src/index.ts"
+ echo " 3. Test OCR quality in the dashboard"
+}
+
+main "$@"