poe2-bot/tools/python-detect/annotate.py

"""
Bounding-box annotator with select / move / resize / tag / filter / predict.

Controls
--------
  Left drag (empty area) : draw new box
  Left click (on box)    : select it
  Left drag (box body)   : move it
  Left drag (corner)     : resize it
  Right-click (on box)   : cycle class
  1-9                    : set class of selected box (or default new-box class)
  Delete                 : remove selected box
  Space / Enter          : save + next image
  Left / Right arrow     : prev / next image
  P                      : predict — run YOLO model on current image
  F                      : cycle filter (All > Unlabeled > Empty > Labeled > per-class)
  Z                      : undo
  X                      : delete image file + next
  E                      : deselect
  Q / Escape             : quit (auto-saves current)

Toolbar buttons at the top are also clickable.
Aspect ratio is always preserved (letterboxed).
Saves YOLO-format .txt labels alongside images.
"""

import cv2
import numpy as np
import os
import sys
import glob

# ── Classes ──────────────────────────────────────────────────────
# Passed in by manage.py via run_annotator(img_dir, classes).
# Standalone fallback: single-class kulemak.
DEFAULT_CLASSES = ["kulemak"]
COLORS = [
    (0, 255, 255),   # cyan-yellow  (kulemak)
    (255, 0, 255),   # magenta      (arbiter)
    (0, 255, 0),     # green
    (255, 128, 0),   # orange
    (128, 0, 255),   # purple
]

HANDLE_R = 7          # corner handle radius (px)
MIN_BOX  = 0.01       # min normalised box dimension
PREDICT_CONF = 0.20   # confidence threshold for auto-predict

# Layout
TOOLBAR_Y = 32        # top of toolbar row (below info line)
TOOLBAR_H = 30        # toolbar row height
IMG_TOP   = TOOLBAR_Y + TOOLBAR_H + 4   # image area starts here
HELP_H    = 22        # reserved at bottom for help text

# Windows arrow / special key codes from cv2.waitKeyEx
K_LEFT  = 2424832
K_RIGHT = 2555904
K_DEL   = 3014656


# ── Box dataclass ─────────────────────────────────────────────────
class Box:
    __slots__ = ("cx", "cy", "w", "h", "cls_id")

    def __init__(self, cx, cy, w, h, cls_id=0):
        self.cx, self.cy, self.w, self.h, self.cls_id = cx, cy, w, h, cls_id

    @property
    def x1(self): return self.cx - self.w / 2
    @property
    def y1(self): return self.cy - self.h / 2
    @property
    def x2(self): return self.cx + self.w / 2
    @property
    def y2(self): return self.cy + self.h / 2

    def set_corners(self, x1, y1, x2, y2):
        self.cx = (x1 + x2) / 2
        self.cy = (y1 + y2) / 2
        self.w  = abs(x2 - x1)
        self.h  = abs(y2 - y1)

    def contains(self, nx, ny):
        return self.x1 <= nx <= self.x2 and self.y1 <= ny <= self.y2

    def corner_at(self, nx, ny, thr):
        for hx, hy, tag in [
            (self.x1, self.y1, "tl"), (self.x2, self.y1, "tr"),
            (self.x1, self.y2, "bl"), (self.x2, self.y2, "br"),
        ]:
            if abs(nx - hx) < thr and abs(ny - hy) < thr:
                return tag
        return None

    def copy(self):
        return Box(self.cx, self.cy, self.w, self.h, self.cls_id)


# ── Toolbar button ────────────────────────────────────────────────
class Button:
    __slots__ = ("label", "action", "x1", "y1", "x2", "y2")

    def __init__(self, label, action):
        self.label = label
        self.action = action
        self.x1 = self.y1 = self.x2 = self.y2 = 0

    def hit(self, wx, wy):
        return self.x1 <= wx <= self.x2 and self.y1 <= wy <= self.y2


# ── Main tool ─────────────────────────────────────────────────────
class Annotator:

    def __init__(self, img_dir, classes=None):
        self.classes = classes or DEFAULT_CLASSES
        self.img_dir = os.path.abspath(img_dir)
        self.all_files = self._scan()

        # filter
        self.FILTERS = ["all", "unlabeled", "empty", "labeled"] + \
                       [f"class:{i}" for i in range(len(self.classes))]
        self.filt_idx = 0
        self.files = list(self.all_files)
        self.pos = 0

        # image state
        self.img    = None
        self.iw     = 0
        self.ih     = 0
        self.boxes  = []
        self.sel    = -1
        self.cur_cls = 0
        self.dirty  = False
        self.undo_stack = []

        # drag state
        self.mode     = None
        self.d_start  = None
        self.d_anchor = None
        self.d_orig   = None
        self.mouse_n  = None

        # display
        self.WIN = "Annotator"
        self.ww  = 1600
        self.wh  = 900
        self._cache     = None
        self._cache_key = None

        # toolbar buttons (laid out during _draw)
        self.buttons = [
            Button("[P] Predict",   "predict"),
            Button("[Space] Save+Next", "save_next"),
            Button("[F] Filter",    "filter"),
            Button("[Z] Undo",      "undo"),
            Button("[X] Del Image", "del_img"),
        ]

        # YOLO model (lazy-loaded)
        self._model = None
        self._model_tried = False

        # stats
        self.n_saved   = 0
        self.n_deleted = 0

    # ── file scanning ─────────────────────────────────────────────

    def _scan(self):
        files = []
        for ext in ("*.jpg", "*.jpeg", "*.png"):
            files.extend(glob.glob(os.path.join(self.img_dir, ext)))
        files.sort()
        return files

    @staticmethod
    def _lbl(fp):
        return os.path.splitext(fp)[0] + ".txt"

    @staticmethod
    def _is_empty_label(lp):
        """Label file exists but has no boxes (negative example)."""
        if not os.path.exists(lp):
            return False
        with open(lp) as f:
            return f.read().strip() == ""

    @staticmethod
    def _has_labels(lp):
        """Label file exists and contains at least one box."""
        if not os.path.exists(lp):
            return False
        with open(lp) as f:
            return f.read().strip() != ""

    def _refilter(self):
        mode = self.FILTERS[self.filt_idx]
        if mode == "all":
            self.files = [f for f in self.all_files if os.path.exists(f)]
        elif mode == "unlabeled":
            self.files = [f for f in self.all_files
                          if os.path.exists(f) and not os.path.exists(self._lbl(f))]
        elif mode == "empty":
            self.files = [f for f in self.all_files
                          if os.path.exists(f) and self._is_empty_label(self._lbl(f))]
        elif mode == "labeled":
            self.files = [f for f in self.all_files
                          if os.path.exists(f) and self._has_labels(self._lbl(f))]
        elif mode.startswith("class:"):
            cid = int(mode.split(":")[1])
            self.files = []
            for f in self.all_files:
                if not os.path.exists(f):
                    continue
                lp = self._lbl(f)
                if os.path.exists(lp):
                    with open(lp) as fh:
                        if any(l.strip().startswith(f"{cid} ") for l in fh):
                            self.files.append(f)
        self.pos = max(0, min(self.pos, len(self.files) - 1))

    # ── I/O ───────────────────────────────────────────────────────

    def _load(self):
        if not self.files:
            return False
        self.img = cv2.imread(self.files[self.pos])
        if self.img is None:
            return False
        self.ih, self.iw = self.img.shape[:2]
        self._cache = None
        self._load_boxes()
        self.sel = -1
        self.dirty = False
        self.undo_stack.clear()
        return True

    def _load_boxes(self):
        self.boxes = []
        lp = self._lbl(self.files[self.pos])
        if not os.path.exists(lp):
            return
        with open(lp) as f:
            for line in f:
                p = line.strip().split()
                if len(p) >= 5:
                    self.boxes.append(
                        Box(float(p[1]), float(p[2]),
                            float(p[3]), float(p[4]), int(p[0])))

    def _save(self):
        if not self.files:
            return
        lp = self._lbl(self.files[self.pos])
        with open(lp, "w") as f:
            for b in self.boxes:
                f.write(f"{b.cls_id} {b.cx:.6f} {b.cy:.6f} "
                        f"{b.w:.6f} {b.h:.6f}\n")
        self.n_saved += 1
        self.dirty = False

    def _push_undo(self):
        self.undo_stack.append([b.copy() for b in self.boxes])
        if len(self.undo_stack) > 50:
            self.undo_stack.pop(0)

    def _pop_undo(self):
        if not self.undo_stack:
            return
        self.boxes = self.undo_stack.pop()
        self.sel = -1
        self.dirty = True

    # ── YOLO predict ──────────────────────────────────────────────

    def _ensure_model(self):
        if self._model_tried:
            return self._model is not None
        self._model_tried = True
        model_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "models")
        # Find the latest boss-v*.pt by version number, fallback to any .pt
        import re
        best_name, best_ver = None, -1
        if os.path.isdir(model_dir):
            for name in os.listdir(model_dir):
                if not name.endswith(".pt"):
                    continue
                m = re.match(r"boss-v(\d+)\.pt$", name)
                if m and int(m.group(1)) > best_ver:
                    best_ver = int(m.group(1))
                    best_name = name
                elif best_name is None:
                    best_name = name
        if best_name:
            path = os.path.join(model_dir, best_name)
            print(f"  Loading model: {best_name} ...")
            from ultralytics import YOLO
            self._model = YOLO(path)
            print(f"  Model loaded.")
            return True
        print(f"  No .pt model found in {model_dir}")
        return False

    def _predict(self):
        if self.img is None or not self.files:
            return
        if not self._ensure_model():
            return
        self._push_undo()
        results = self._model(self.files[self.pos], conf=PREDICT_CONF, verbose=False)
        det = results[0].boxes
        self.boxes = []
        for box in det:
            cls_id = int(box.cls[0])
            cx, cy, w, h = box.xywhn[0].tolist()
            conf = box.conf[0].item()
            self.boxes.append(Box(cx, cy, w, h, cls_id))
        self.sel = -1
        self.dirty = True
        print(f"  Predicted {len(self.boxes)} box(es)")

    # ── coordinate transforms (letterbox) ─────────────────────────

    def _xform(self):
        """Returns (scale, offset_x, offset_y) for letterbox display."""
        avail_h = max(1, self.wh - IMG_TOP - HELP_H)
        s = min(self.ww / self.iw, avail_h / self.ih)
        dw = int(self.iw * s)
        dh = int(self.ih * s)
        ox = (self.ww - dw) // 2
        oy = IMG_TOP + (avail_h - dh) // 2
        return s, ox, oy

    def _to_norm(self, wx, wy):
        s, ox, oy = self._xform()
        return (wx - ox) / (self.iw * s), (wy - oy) / (self.ih * s)

    def _to_win(self, nx, ny):
        s, ox, oy = self._xform()
        return int(nx * self.iw * s + ox), int(ny * self.ih * s + oy)

    def _corner_thr(self):
        s, _, _ = self._xform()
        return (HANDLE_R + 4) / (min(self.iw, self.ih) * s)

    # ── hit-test ──────────────────────────────────────────────────

    def _hit(self, nx, ny):
        thr = self._corner_thr()
        if 0 <= self.sel < len(self.boxes):
            b = self.boxes[self.sel]
            c = b.corner_at(nx, ny, thr)
            if c:
                return self.sel, c
            if b.contains(nx, ny):
                return self.sel, "inside"
        for i, b in enumerate(self.boxes):
            c = b.corner_at(nx, ny, thr)
            if c:
                return i, c
        for i, b in enumerate(self.boxes):
            if b.contains(nx, ny):
                return i, "inside"
        return -1, None

    # ── drawing ───────────────────────────────────────────────────

    def _scaled_base(self):
        s, ox, oy = self._xform()
        sz = (int(self.iw * s), int(self.ih * s))
        key = (sz, self.ww, self.wh)
        if self._cache is not None and self._cache_key == key:
            return self._cache.copy(), s, ox, oy
        canvas = np.zeros((self.wh, self.ww, 3), np.uint8)
        resized = cv2.resize(self.img, sz, interpolation=cv2.INTER_AREA)
        canvas[oy:oy + sz[1], ox:ox + sz[0]] = resized
        self._cache = canvas
        self._cache_key = key
        return canvas.copy(), s, ox, oy

    def _draw(self):
        if self.img is None:
            canvas = np.zeros((self.wh, self.ww, 3), np.uint8)
            cv2.putText(canvas, "No images", (self.ww // 2 - 60, self.wh // 2),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.7, (128, 128, 128), 1)
            cv2.imshow(self.WIN, canvas)
            return

        canvas, s, ox, oy = self._scaled_base()

        # ── Annotation boxes ──
        for i, b in enumerate(self.boxes):
            col = COLORS[b.cls_id % len(COLORS)]
            is_sel = i == self.sel
            p1 = self._to_win(b.x1, b.y1)
            p2 = self._to_win(b.x2, b.y2)
            cv2.rectangle(canvas, p1, p2, col, 3 if is_sel else 2)
            name = self.classes[b.cls_id] if b.cls_id < len(self.classes) else f"c{b.cls_id}"
            (tw, th), _ = cv2.getTextSize(name, cv2.FONT_HERSHEY_SIMPLEX, 0.55, 1)
            cv2.rectangle(canvas, (p1[0], p1[1] - th - 8),
                          (p1[0] + tw + 6, p1[1]), col, -1)
            cv2.putText(canvas, name, (p1[0] + 3, p1[1] - 5),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.55, (0, 0, 0), 1)
            if is_sel:
                for hx, hy in [p1, (p2[0], p1[1]), (p1[0], p2[1]), p2]:
                    cv2.circle(canvas, (hx, hy), HANDLE_R, (255, 255, 255), -1)
                    cv2.circle(canvas, (hx, hy), HANDLE_R, col, 2)

        # rubber-band
        if self.mode == "draw" and self.d_start and self.mouse_n:
            col = COLORS[self.cur_cls % len(COLORS)]
            cv2.rectangle(canvas,
                          self._to_win(*self.d_start),
                          self._to_win(*self.mouse_n), col, 2)

        # ── HUD info line ──
        if self.files:
            fname = os.path.basename(self.files[self.pos])
            n = len(self.files)
            filt = self.FILTERS[self.filt_idx]
            cname = self.classes[self.cur_cls] if self.cur_cls < len(self.classes) \
                else f"c{self.cur_cls}"
            info = (f"[{self.pos + 1}/{n}]  {fname}  |  "
                    f"filter: {filt}  |  new class: {cname}  |  "
                    f"boxes: {len(self.boxes)}")
            if self.dirty:
                info += "  *"
            cv2.putText(canvas, info, (10, 22),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (220, 220, 220), 1)

        # class legend (top-right)
        for i, c in enumerate(self.classes):
            txt = f"{i + 1}: {c}"
            (tw, _), _ = cv2.getTextSize(txt, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
            cv2.putText(canvas, txt,
                        (self.ww - tw - 12, 22 + i * 20),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5,
                        COLORS[i % len(COLORS)], 1)

        # ── Toolbar buttons ──
        bx = 10
        for btn in self.buttons:
            (tw, th), _ = cv2.getTextSize(btn.label, cv2.FONT_HERSHEY_SIMPLEX, 0.45, 1)
            bw = tw + 16
            bh = TOOLBAR_H - 4
            btn.x1 = bx
            btn.y1 = TOOLBAR_Y
            btn.x2 = bx + bw
            btn.y2 = TOOLBAR_Y + bh
            # button bg
            cv2.rectangle(canvas, (btn.x1, btn.y1), (btn.x2, btn.y2),
                          (60, 60, 60), -1)
            cv2.rectangle(canvas, (btn.x1, btn.y1), (btn.x2, btn.y2),
                          (140, 140, 140), 1)
            # button text
            cv2.putText(canvas, btn.label,
                        (bx + 8, TOOLBAR_Y + bh - 7),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.45, (220, 220, 220), 1)
            bx = btn.x2 + 6

        # ── Help bar (bottom) ──
        cv2.putText(
            canvas,
            "drag=draw | click=select | drag=move/resize | RClick=cycle class"
            " | 1-9=class | Del=remove box | E=deselect",
            (10, self.wh - 6),
            cv2.FONT_HERSHEY_SIMPLEX, 0.36, (120, 120, 120), 1)

        cv2.imshow(self.WIN, canvas)

    # ── mouse ─────────────────────────────────────────────────────

    def _on_mouse(self, ev, wx, wy, flags, _):
        nx, ny = self._to_norm(wx, wy)
        self.mouse_n = (nx, ny)

        if ev == cv2.EVENT_LBUTTONDOWN:
            # check toolbar buttons first
            for btn in self.buttons:
                if btn.hit(wx, wy):
                    self._do_action(btn.action)
                    return

            # only interact with image area below toolbar
            if wy < IMG_TOP:
                return

            idx, what = self._hit(nx, ny)
            if what in ("tl", "tr", "bl", "br"):
                self._push_undo()
                self.sel = idx
                self.mode = "resize"
                b = self.boxes[idx]
                opp = {"tl": (b.x2, b.y2), "tr": (b.x1, b.y2),
                       "bl": (b.x2, b.y1), "br": (b.x1, b.y1)}
                self.d_anchor = opp[what]
                self.d_start = (nx, ny)
            elif what == "inside":
                self._push_undo()
                self.sel = idx
                self.mode = "move"
                self.d_start = (nx, ny)
                b = self.boxes[idx]
                self.d_orig = (b.cx, b.cy)
            else:
                self.sel = -1
                self.mode = "draw"
                self.d_start = (nx, ny)
            self._draw()

        elif ev == cv2.EVENT_MOUSEMOVE:
            if self.mode == "draw":
                self._draw()
            elif self.mode == "move" and self.d_start and \
                    0 <= self.sel < len(self.boxes):
                b = self.boxes[self.sel]
                b.cx = self.d_orig[0] + (nx - self.d_start[0])
                b.cy = self.d_orig[1] + (ny - self.d_start[1])
                self.dirty = True
                self._draw()
            elif self.mode == "resize" and self.d_anchor:
                b = self.boxes[self.sel]
                ax, ay = self.d_anchor
                b.set_corners(min(ax, nx), min(ay, ny),
                              max(ax, nx), max(ay, ny))
                self.dirty = True
                self._draw()

        elif ev == cv2.EVENT_LBUTTONUP:
            if self.mode == "draw" and self.d_start:
                x1, y1 = min(self.d_start[0], nx), min(self.d_start[1], ny)
                x2, y2 = max(self.d_start[0], nx), max(self.d_start[1], ny)
                if (x2 - x1) > MIN_BOX and (y2 - y1) > MIN_BOX:
                    self._push_undo()
                    b = Box(0, 0, 0, 0, self.cur_cls)
                    b.set_corners(x1, y1, x2, y2)
                    self.boxes.append(b)
                    self.sel = len(self.boxes) - 1
                    self.dirty = True
            self.mode = None
            self.d_start = self.d_anchor = self.d_orig = None
            self._draw()

        elif ev == cv2.EVENT_RBUTTONDOWN:
            idx, _ = self._hit(nx, ny)
            if idx >= 0:
                self._push_undo()
                self.sel = idx
                self.boxes[idx].cls_id = \
                    (self.boxes[idx].cls_id + 1) % len(self.classes)
                self.dirty = True
                self._draw()

    # ── actions (shared by keys + buttons) ────────────────────────

    def _do_action(self, action):
        if action == "predict":
            self._predict()
            self._draw()
        elif action == "save_next":
            self._do_save_next()
        elif action == "filter":
            self._do_filter()
        elif action == "undo":
            self._pop_undo()
            self._draw()
        elif action == "del_img":
            self._do_del_img()

    def _do_save_next(self):
        if not self.files:
            return
        self._save()
        fname = os.path.basename(self.files[self.pos])
        print(f"  Saved {fname} ({len(self.boxes)} box(es))")
        self._goto(self.pos + 1)

    def _do_filter(self):
        self.filt_idx = (self.filt_idx + 1) % len(self.FILTERS)
        if self.dirty:
            self._save()
        self._refilter()
        if self.files:
            self._load()
            self._draw()
            print(f"  Filter: {self.FILTERS[self.filt_idx]}"
                  f" ({len(self.files)} images)")
        else:
            self.img = None
            self._draw()
            print(f"  Filter: {self.FILTERS[self.filt_idx]} (0 images)")

    def _do_del_img(self):
        if not self.files:
            return
        fp = self.files[self.pos]
        lp = self._lbl(fp)
        if os.path.exists(fp):
            os.remove(fp)
        if os.path.exists(lp):
            os.remove(lp)
        self.n_deleted += 1
        print(f"  Deleted {os.path.basename(fp)}")
        self.all_files = [f for f in self.all_files if f != fp]
        self.dirty = False
        self._refilter()
        if not self.files:
            self.img = None
            self._draw()
            return
        self.pos = min(self.pos, len(self.files) - 1)
        self._load()
        self._draw()

    # ── navigation ────────────────────────────────────────────────

    def _goto(self, new_pos):
        if self.dirty:
            self._save()
        new_pos = max(0, min(new_pos, len(self.files) - 1))
        if new_pos == self.pos and self.img is not None:
            return
        self.pos = new_pos
        self._load()
        self._draw()

    # ── main loop ─────────────────────────────────────────────────

    def run(self):
        if not self.all_files:
            print(f"No images in {self.img_dir}")
            return

        cv2.namedWindow(self.WIN, cv2.WINDOW_NORMAL)
        cv2.resizeWindow(self.WIN, self.ww, self.wh)
        cv2.setMouseCallback(self.WIN, self._on_mouse)

        self._refilter()
        if not self.files:
            print("No images match current filter")
            return
        self._load()
        self._draw()

        while True:
            key = cv2.waitKeyEx(30)

            # detect window close (user clicked X)
            if cv2.getWindowProperty(self.WIN, cv2.WND_PROP_VISIBLE) < 1:
                if self.dirty:
                    self._save()
                break

            # detect window resize
            try:
                r = cv2.getWindowImageRect(self.WIN)
                if r[2] > 0 and r[3] > 0 and \
                        (r[2] != self.ww or r[3] != self.wh):
                    self.ww, self.wh = r[2], r[3]
                    self._cache = None
                    self._draw()
            except cv2.error:
                pass

            if key == -1:
                continue

            # Quit
            if key in (ord("q"), 27):
                if self.dirty:
                    self._save()
                break

            # Save + next
            if key in (32, 13):
                self._do_save_next()
                continue

            # Navigation
            if key == K_LEFT:
                self._goto(self.pos - 1)
                continue
            if key == K_RIGHT:
                self._goto(self.pos + 1)
                continue

            # Predict
            if key == ord("p"):
                self._predict()
                self._draw()
                continue

            # Delete selected box
            if key == K_DEL or key == 8:
                if 0 <= self.sel < len(self.boxes):
                    self._push_undo()
                    self.boxes.pop(self.sel)
                    self.sel = -1
                    self.dirty = True
                    self._draw()
                continue

            # Delete image
            if key == ord("x"):
                self._do_del_img()
                continue

            # Undo
            if key == ord("z"):
                self._pop_undo()
                self._draw()
                continue

            # Filter
            if key == ord("f"):
                self._do_filter()
                continue

            # Number keys -> set class
            if ord("1") <= key <= ord("9"):
                cls_id = key - ord("1")
                if cls_id < len(self.classes):
                    if 0 <= self.sel < len(self.boxes):
                        self._push_undo()
                        self.boxes[self.sel].cls_id = cls_id
                        self.dirty = True
                    self.cur_cls = cls_id
                    self._draw()
                continue

            # Deselect
            if key == ord("e"):
                self.sel = -1
                self._draw()
                continue

        cv2.destroyAllWindows()
        total = len(self.all_files)
        labeled = sum(1 for f in self.all_files if self._has_labels(self._lbl(f)))
        empty = sum(1 for f in self.all_files if self._is_empty_label(self._lbl(f)))
        unlabeled = total - labeled - empty
        print(f"\nDone. Saved: {self.n_saved}, Deleted: {self.n_deleted}")
        print(f"Dataset: {total} images, {labeled} labeled, "
              f"{empty} empty, {unlabeled} unlabeled")


def run_annotator(img_dir, classes=None):
    """Entry point callable from manage.py or standalone."""
    tool = Annotator(img_dir, classes)
    tool.run()


def main():
    img_dir = sys.argv[1] if len(sys.argv) > 1 else "../../training-data/kulemak/raw"
    run_annotator(img_dir)


if __name__ == "__main__":
    main()