#!/usr/bin/env bash # Fine-tune Tesseract 5 LSTM on Fontin / Fontin SmallCaps for POE2 # # IMPORTANT: Run inside WSL Ubuntu (NOT docker-desktop). # # From PowerShell: # wsl -d Ubuntu-22.04 bash /mnt/c/Users/boki/repos/poe2trade/tools/training/train.sh # # Or from inside WSL Ubuntu: # cd /mnt/c/Users/boki/repos/poe2trade/tools/training # bash train.sh # # Prerequisites: # - WSL Ubuntu installed: wsl --install -d Ubuntu-22.04 # - Internet access (downloads fonts + Tesseract tools if needed) set -euo pipefail # ── Sanity checks ──────────────────────────────────────────────────────────── if [ -z "${BASH_VERSION:-}" ]; then echo "ERROR: This script requires bash. Run with: bash train.sh" echo "If you're in docker-desktop WSL, switch to Ubuntu:" echo " wsl -d Ubuntu-22.04 bash /mnt/c/Users/boki/repos/poe2trade/tools/training/train.sh" exit 1 fi # Detect which WSL distro we're in if grep -qi alpine /etc/os-release 2>/dev/null || [ -f /etc/alpine-release ]; then echo "ERROR: You're in docker-desktop (Alpine). Use Ubuntu instead:" echo " wsl -d Ubuntu-22.04 bash /mnt/c/Users/boki/repos/poe2trade/tools/training/train.sh" exit 1 fi # ── Resolve paths ──────────────────────────────────────────────────────────── # Handle both /mnt/c/ (Ubuntu) and /mnt/host/c/ (docker-desktop) paths SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" TESSDATA_DIR="$PROJECT_ROOT/tools/OcrDaemon/tessdata" TRAINING_TEXT="$SCRIPT_DIR/poe2_training_text.txt" WORK_DIR="$HOME/poe2-tesseract-training" FONT_DIR="/usr/local/share/fonts/fontin" MAX_ITERATIONS=800 TARGET_ERROR=0.01 if [ ! -f "$TRAINING_TEXT" ]; then echo "ERROR: Training text not found at: $TRAINING_TEXT" exit 1 fi if [ ! -f "$TESSDATA_DIR/eng.traineddata" ]; then echo "ERROR: eng.traineddata not found at: $TESSDATA_DIR/" exit 1 fi echo "=== POE2 Tesseract Fine-Tuning ===" echo "Script dir: $SCRIPT_DIR" echo "Work dir: $WORK_DIR" echo "Training text: $TRAINING_TEXT" echo "Tessdata: $TESSDATA_DIR" echo "" # ── Step 1: Install Tesseract training tools ───────────────────────────────── install_tesseract_tools() { echo "── Step 1: Installing Tesseract training tools ──" if command -v text2image >/dev/null 2>&1 && command -v lstmtraining >/dev/null 2>&1; then echo "Tesseract training tools already installed." tesseract --version 2>&1 | head -1 return 0 fi echo "Installing Tesseract 5.x and training tools..." # Update package list first sudo apt-get update -qq # Try PPA for latest Tesseract if sudo add-apt-repository -y ppa:alex-p/tesseract-ocr-devel 2>/dev/null; then sudo apt-get update -qq fi sudo apt-get install -y tesseract-ocr tesseract-ocr-eng \ libicu-dev libpango1.0-dev libcairo2-dev \ wget unzip fontconfig 2>&1 | tail -3 # Check if we got text2image from the package if command -v text2image >/dev/null 2>&1; then echo "Installed via apt." tesseract --version 2>&1 | head -1 return 0 fi # Build from source as fallback echo "Package didn't include training tools, building from source..." sudo apt-get install -y automake g++ git libtool libleptonica-dev \ make pkg-config libicu-dev libpango1.0-dev libcairo2-dev 2>&1 | tail -3 local BUILD_DIR="$HOME/tesseract-build" if [ ! -d "$BUILD_DIR" ]; then git clone --depth 1 --branch 5.3.4 https://github.com/tesseract-ocr/tesseract.git "$BUILD_DIR" fi pushd "$BUILD_DIR" > /dev/null ./autogen.sh ./configure make -j"$(nproc)" make training sudo make install sudo make training-install sudo ldconfig popd > /dev/null echo "Built from source." tesseract --version 2>&1 | head -1 } # ── Step 2: Download and install Fontin fonts ──────────────────────────────── install_fonts() { echo "" echo "── Step 2: Installing Fontin fonts ──" if fc-list 2>/dev/null | grep -qi fontin; then echo "Fontin fonts already installed:" fc-list | grep -i fontin return 0 fi local FONT_TMP="$HOME/fontin-download" mkdir -p "$FONT_TMP" pushd "$FONT_TMP" > /dev/null echo "Downloading Fontin fonts..." # wfonts.com zip includes Regular, Bold, Italic, SmallCaps (all in one) curl -sL "https://static.wfonts.com/download/data/2015/03/10/fontin/fontin.zip" -o fontin.zip || { echo "ERROR: Failed to download Fontin fonts." echo "Download manually and place .otf files in $FONT_DIR" popd > /dev/null return 1 } # Verify it's a real zip if ! file fontin.zip | grep -q "Zip archive"; then echo "ERROR: Downloaded file is not a zip. Font site may be blocking downloads." echo "Download manually from https://www.wfonts.com/font/fontin" echo "Place .otf files in $FONT_DIR" popd > /dev/null return 1 fi unzip -qo fontin.zip -d fontin/ 2>/dev/null || true sudo mkdir -p "$FONT_DIR" # Copy OTF files find fontin/ \( -name '*.otf' -o -name '*.OTF' \) \ -exec sudo cp {} "$FONT_DIR/" \; 2>/dev/null || true sudo fc-cache -fv > /dev/null 2>&1 echo "Installed fonts:" fc-list | grep -i fontin || echo "WARNING: Fonts not found after install!" popd > /dev/null rm -rf "$FONT_TMP" } # ── Step 3: Generate training images ───────────────────────────────────────── generate_training_images() { echo "" echo "── Step 3: Generating training images ──" mkdir -p "$WORK_DIR/ground-truth" "$WORK_DIR/output" cd "$WORK_DIR" # Copy eng.traineddata from project if [ ! -f "$WORK_DIR/eng.traineddata" ]; then cp "$TESSDATA_DIR/eng.traineddata" "$WORK_DIR/" fi # Extract LSTM model for fine-tuning if [ ! -f "$WORK_DIR/eng.lstm" ]; then echo "Extracting LSTM model from eng.traineddata..." combine_tessdata -e eng.traineddata eng.lstm fi # Discover available Fontin font names echo "Available Fontin fonts:" text2image --list_available_fonts --fonts_dir "$FONT_DIR" 2>/dev/null | grep -i fontin || true # Read font names into a file (avoids bash array portability issues) # text2image output format is " 6: Fontin Bold" — extract font name after first colon local FONT_LIST="$WORK_DIR/font_list.txt" text2image --list_available_fonts --fonts_dir "$FONT_DIR" 2>/dev/null \ | grep -i fontin \ | cut -d: -f2- \ | sed 's/^[[:space:]]*//;s/[[:space:]]*$//' \ > "$FONT_LIST" 2>/dev/null || true if [ ! -s "$FONT_LIST" ]; then echo "WARNING: No Fontin fonts auto-detected. Trying common names..." cat > "$FONT_LIST" <<'NAMES' Fontin Bold Fontin Medium Fontin Medium Italic Fontin SmallCaps, Medium NAMES fi echo "Will generate images for:" cat "$FONT_LIST" local count=0 for exp in -1 0 1; do while IFS= read -r font_name; do [ -z "$font_name" ] && continue local safe safe=$(echo "$font_name" | tr ' ' '-' | tr '[:upper:]' '[:lower:]') local outbase="ground-truth/poe2.${safe}.exp${exp}" echo " Generating: $outbase (font='$font_name', exposure=$exp)" if text2image \ --text "$TRAINING_TEXT" \ --outputbase "$outbase" \ --font "$font_name" \ --fonts_dir "$FONT_DIR" \ --ptsize 16 \ --xsize 3600 \ --ysize 480 \ --char_spacing 0.0 \ --exposure "$exp" \ --leading 32 2>&1; then count=$((count + 1)) else echo " WARNING: Failed for font '$font_name' exposure=$exp, skipping" fi done < "$FONT_LIST" done echo "Generated $count training image sets." if [ "$count" -eq 0 ]; then echo "ERROR: No training images generated. Check font installation." return 1 fi } # ── Step 4: Generate LSTMF files ───────────────────────────────────────────── generate_lstmf() { echo "" echo "── Step 4: Generating .lstmf training files ──" cd "$WORK_DIR" local count=0 for tif in ground-truth/*.tif; do [ -f "$tif" ] || continue local base="${tif%.tif}" if [ -f "${base}.lstmf" ]; then echo " Skipping (exists): ${base}.lstmf" count=$((count + 1)) continue fi echo " Processing: $tif" if tesseract "$tif" "$base" --psm 6 lstm.train 2>&1; then count=$((count + 1)) else echo " WARNING: Failed to process $tif" fi done # Create training file list : > training_files.txt for f in ground-truth/*.lstmf; do [ -f "$f" ] && echo "$f" >> training_files.txt done local total total=$(wc -l < training_files.txt) echo "Created $total .lstmf files." if [ "$total" -eq 0 ]; then echo "ERROR: No .lstmf files generated." return 1 fi } # ── Step 5: Fine-tune LSTM ─────────────────────────────────────────────────── fine_tune() { echo "" echo "── Step 5: Fine-tuning LSTM (max $MAX_ITERATIONS iterations) ──" cd "$WORK_DIR" lstmtraining \ --continue_from eng.lstm \ --traineddata eng.traineddata \ --train_listfile training_files.txt \ --model_output output/poe2 \ --max_iterations "$MAX_ITERATIONS" \ --target_error_rate "$TARGET_ERROR" \ --debug_interval -1 echo "Fine-tuning complete." } # ── Step 6: Package model ──────────────────────────────────────────────────── package_model() { echo "" echo "── Step 6: Packaging poe2.traineddata ──" cd "$WORK_DIR" # Find the checkpoint file local checkpoint="" if [ -f "output/poe2_checkpoint" ]; then checkpoint="output/poe2_checkpoint" elif [ -f "output/poe2checkpoint" ]; then checkpoint="output/poe2checkpoint" else # Find most recent checkpoint checkpoint=$(ls -t output/poe2* 2>/dev/null | head -1) fi if [ -z "$checkpoint" ] || [ ! -f "$checkpoint" ]; then echo "ERROR: No checkpoint found in output/" ls -la output/ 2>/dev/null || true return 1 fi echo "Using checkpoint: $checkpoint" lstmtraining --stop_training \ --continue_from "$checkpoint" \ --traineddata eng.traineddata \ --model_output output/poe2.traineddata # Copy to project tessdata cp output/poe2.traineddata "$TESSDATA_DIR/poe2.traineddata" echo "Model saved to: $TESSDATA_DIR/poe2.traineddata" ls -lh "$TESSDATA_DIR/poe2.traineddata" } # ── Step 7: Quick validation ───────────────────────────────────────────────── validate() { echo "" echo "── Step 7: Quick validation ──" cd "$WORK_DIR" # Create a small test file printf 'Quality: +20%%\nAdds 12 to 24 Fire Damage\n+45%% to Fire Resistance\n' > /tmp/poe2_test.txt # Get first available fontin font name local test_font test_font=$(text2image --list_available_fonts --fonts_dir "$FONT_DIR" 2>/dev/null | grep -i fontin | head -1 | sed 's/^[[:space:]]*//;s/[[:space:]]*$//') if [ -z "$test_font" ]; then echo "Skipping validation (no Fontin font available)" return 0 fi if ! text2image \ --text /tmp/poe2_test.txt \ --outputbase /tmp/poe2_test \ --font "$test_font" \ --fonts_dir "$FONT_DIR" \ --ptsize 16 --xsize 1200 --ysize 200 \ --exposure 0 --leading 32 2>&1; then echo "Skipping validation (could not generate test image)" return 0 fi echo "=== eng model ===" tesseract /tmp/poe2_test.tif stdout -l eng --psm 6 2>/dev/null || true echo "" echo "=== poe2 model ===" TESSDATA_PREFIX="$TESSDATA_DIR" tesseract /tmp/poe2_test.tif stdout -l poe2 --psm 6 2>/dev/null || { tesseract /tmp/poe2_test.tif stdout --tessdata-dir "$TESSDATA_DIR" -l poe2 --psm 6 2>/dev/null || true } rm -f /tmp/poe2_test.txt /tmp/poe2_test.tif /tmp/poe2_test.box echo "" echo "Compare the outputs above — poe2 should be more accurate on Fontin text." } # ── Main ───────────────────────────────────────────────────────────────────── main() { install_tesseract_tools install_fonts generate_training_images generate_lstmf fine_tune package_model validate echo "" echo "=== Done! ===" echo "poe2.traineddata has been copied to: $TESSDATA_DIR/" echo "" echo "Next steps:" echo " 1. Build the daemon: dotnet build tools/OcrDaemon -c Release" echo " 2. Start the bot: npx tsx src/index.ts" echo " 3. Test OCR quality in the dashboard" } main "$@"