""" Bounding-box annotator with select / move / resize / tag / filter / predict. Controls -------- Left drag (empty area) : draw new box Left click (on box) : select it Left drag (box body) : move it Left drag (corner) : resize it Right-click (on box) : cycle class 1-9 : set class of selected box (or default new-box class) Delete : remove selected box Space / Enter : save + next image Left / Right arrow : prev / next image P : predict — run YOLO model on current image F : cycle filter (All > Unlabeled > Empty > Labeled > per-class) Z : undo X : delete image file + next E : deselect Q / Escape : quit (auto-saves current) Toolbar buttons at the top are also clickable. Aspect ratio is always preserved (letterboxed). Saves YOLO-format .txt labels alongside images. """ import cv2 import numpy as np import os import sys import glob # ── Classes ────────────────────────────────────────────────────── # Passed in by manage.py via run_annotator(img_dir, classes). # Standalone fallback: single-class kulemak. DEFAULT_CLASSES = ["kulemak"] COLORS = [ (0, 255, 255), # cyan-yellow (kulemak) (255, 0, 255), # magenta (arbiter) (0, 255, 0), # green (255, 128, 0), # orange (128, 0, 255), # purple ] HANDLE_R = 7 # corner handle radius (px) MIN_BOX = 0.01 # min normalised box dimension PREDICT_CONF = 0.20 # confidence threshold for auto-predict # Layout TOOLBAR_Y = 32 # top of toolbar row (below info line) TOOLBAR_H = 30 # toolbar row height IMG_TOP = TOOLBAR_Y + TOOLBAR_H + 4 # image area starts here HELP_H = 22 # reserved at bottom for help text # Windows arrow / special key codes from cv2.waitKeyEx K_LEFT = 2424832 K_RIGHT = 2555904 K_DEL = 3014656 # ── Box dataclass ───────────────────────────────────────────────── class Box: __slots__ = ("cx", "cy", "w", "h", "cls_id") def __init__(self, cx, cy, w, h, cls_id=0): self.cx, self.cy, self.w, self.h, self.cls_id = cx, cy, w, h, cls_id @property def x1(self): return self.cx - self.w / 2 @property def y1(self): return self.cy - self.h / 2 @property def x2(self): return self.cx + self.w / 2 @property def y2(self): return self.cy + self.h / 2 def set_corners(self, x1, y1, x2, y2): self.cx = (x1 + x2) / 2 self.cy = (y1 + y2) / 2 self.w = abs(x2 - x1) self.h = abs(y2 - y1) def contains(self, nx, ny): return self.x1 <= nx <= self.x2 and self.y1 <= ny <= self.y2 def corner_at(self, nx, ny, thr): for hx, hy, tag in [ (self.x1, self.y1, "tl"), (self.x2, self.y1, "tr"), (self.x1, self.y2, "bl"), (self.x2, self.y2, "br"), ]: if abs(nx - hx) < thr and abs(ny - hy) < thr: return tag return None def copy(self): return Box(self.cx, self.cy, self.w, self.h, self.cls_id) # ── Toolbar button ──────────────────────────────────────────────── class Button: __slots__ = ("label", "action", "x1", "y1", "x2", "y2") def __init__(self, label, action): self.label = label self.action = action self.x1 = self.y1 = self.x2 = self.y2 = 0 def hit(self, wx, wy): return self.x1 <= wx <= self.x2 and self.y1 <= wy <= self.y2 # ── Main tool ───────────────────────────────────────────────────── class Annotator: def __init__(self, img_dir, classes=None): self.classes = classes or DEFAULT_CLASSES self.img_dir = os.path.abspath(img_dir) self.all_files = self._scan() # filter self.FILTERS = ["all", "unlabeled", "empty", "labeled"] + \ [f"class:{i}" for i in range(len(self.classes))] self.filt_idx = 0 self.files = list(self.all_files) self.pos = 0 # image state self.img = None self.iw = 0 self.ih = 0 self.boxes = [] self.sel = -1 self.cur_cls = 0 self.dirty = False self.undo_stack = [] # drag state self.mode = None self.d_start = None self.d_anchor = None self.d_orig = None self.mouse_n = None # display self.WIN = "Annotator" self.ww = 1600 self.wh = 900 self._cache = None self._cache_key = None # toolbar buttons (laid out during _draw) self.buttons = [ Button("[P] Predict", "predict"), Button("[Space] Save+Next", "save_next"), Button("[F] Filter", "filter"), Button("[Z] Undo", "undo"), Button("[X] Del Image", "del_img"), ] # YOLO model (lazy-loaded) self._model = None self._model_tried = False # stats self.n_saved = 0 self.n_deleted = 0 # ── file scanning ───────────────────────────────────────────── def _scan(self): files = [] for ext in ("*.jpg", "*.jpeg", "*.png"): files.extend(glob.glob(os.path.join(self.img_dir, ext))) files.sort() return files @staticmethod def _lbl(fp): return os.path.splitext(fp)[0] + ".txt" @staticmethod def _is_empty_label(lp): """Label file exists but has no boxes (negative example).""" if not os.path.exists(lp): return False with open(lp) as f: return f.read().strip() == "" @staticmethod def _has_labels(lp): """Label file exists and contains at least one box.""" if not os.path.exists(lp): return False with open(lp) as f: return f.read().strip() != "" def _refilter(self): mode = self.FILTERS[self.filt_idx] if mode == "all": self.files = [f for f in self.all_files if os.path.exists(f)] elif mode == "unlabeled": self.files = [f for f in self.all_files if os.path.exists(f) and not os.path.exists(self._lbl(f))] elif mode == "empty": self.files = [f for f in self.all_files if os.path.exists(f) and self._is_empty_label(self._lbl(f))] elif mode == "labeled": self.files = [f for f in self.all_files if os.path.exists(f) and self._has_labels(self._lbl(f))] elif mode.startswith("class:"): cid = int(mode.split(":")[1]) self.files = [] for f in self.all_files: if not os.path.exists(f): continue lp = self._lbl(f) if os.path.exists(lp): with open(lp) as fh: if any(l.strip().startswith(f"{cid} ") for l in fh): self.files.append(f) self.pos = max(0, min(self.pos, len(self.files) - 1)) # ── I/O ─────────────────────────────────────────────────────── def _load(self): if not self.files: return False self.img = cv2.imread(self.files[self.pos]) if self.img is None: return False self.ih, self.iw = self.img.shape[:2] self._cache = None self._load_boxes() self.sel = -1 self.dirty = False self.undo_stack.clear() return True def _load_boxes(self): self.boxes = [] lp = self._lbl(self.files[self.pos]) if not os.path.exists(lp): return with open(lp) as f: for line in f: p = line.strip().split() if len(p) >= 5: self.boxes.append( Box(float(p[1]), float(p[2]), float(p[3]), float(p[4]), int(p[0]))) def _save(self): if not self.files: return lp = self._lbl(self.files[self.pos]) with open(lp, "w") as f: for b in self.boxes: f.write(f"{b.cls_id} {b.cx:.6f} {b.cy:.6f} " f"{b.w:.6f} {b.h:.6f}\n") self.n_saved += 1 self.dirty = False def _push_undo(self): self.undo_stack.append([b.copy() for b in self.boxes]) if len(self.undo_stack) > 50: self.undo_stack.pop(0) def _pop_undo(self): if not self.undo_stack: return self.boxes = self.undo_stack.pop() self.sel = -1 self.dirty = True # ── YOLO predict ────────────────────────────────────────────── def _ensure_model(self): if self._model_tried: return self._model is not None self._model_tried = True model_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "models") # Find the latest boss-v*.pt by version number, fallback to any .pt import re best_name, best_ver = None, -1 if os.path.isdir(model_dir): for name in os.listdir(model_dir): if not name.endswith(".pt"): continue m = re.match(r"boss-v(\d+)\.pt$", name) if m and int(m.group(1)) > best_ver: best_ver = int(m.group(1)) best_name = name elif best_name is None: best_name = name if best_name: path = os.path.join(model_dir, best_name) print(f" Loading model: {best_name} ...") from ultralytics import YOLO self._model = YOLO(path) print(f" Model loaded.") return True print(f" No .pt model found in {model_dir}") return False def _predict(self): if self.img is None or not self.files: return if not self._ensure_model(): return self._push_undo() results = self._model(self.files[self.pos], conf=PREDICT_CONF, verbose=False) det = results[0].boxes self.boxes = [] for box in det: cls_id = int(box.cls[0]) cx, cy, w, h = box.xywhn[0].tolist() conf = box.conf[0].item() self.boxes.append(Box(cx, cy, w, h, cls_id)) self.sel = -1 self.dirty = True print(f" Predicted {len(self.boxes)} box(es)") # ── coordinate transforms (letterbox) ───────────────────────── def _xform(self): """Returns (scale, offset_x, offset_y) for letterbox display.""" avail_h = max(1, self.wh - IMG_TOP - HELP_H) s = min(self.ww / self.iw, avail_h / self.ih) dw = int(self.iw * s) dh = int(self.ih * s) ox = (self.ww - dw) // 2 oy = IMG_TOP + (avail_h - dh) // 2 return s, ox, oy def _to_norm(self, wx, wy): s, ox, oy = self._xform() return (wx - ox) / (self.iw * s), (wy - oy) / (self.ih * s) def _to_win(self, nx, ny): s, ox, oy = self._xform() return int(nx * self.iw * s + ox), int(ny * self.ih * s + oy) def _corner_thr(self): s, _, _ = self._xform() return (HANDLE_R + 4) / (min(self.iw, self.ih) * s) # ── hit-test ────────────────────────────────────────────────── def _hit(self, nx, ny): thr = self._corner_thr() if 0 <= self.sel < len(self.boxes): b = self.boxes[self.sel] c = b.corner_at(nx, ny, thr) if c: return self.sel, c if b.contains(nx, ny): return self.sel, "inside" for i, b in enumerate(self.boxes): c = b.corner_at(nx, ny, thr) if c: return i, c for i, b in enumerate(self.boxes): if b.contains(nx, ny): return i, "inside" return -1, None # ── drawing ─────────────────────────────────────────────────── def _scaled_base(self): s, ox, oy = self._xform() sz = (int(self.iw * s), int(self.ih * s)) key = (sz, self.ww, self.wh) if self._cache is not None and self._cache_key == key: return self._cache.copy(), s, ox, oy canvas = np.zeros((self.wh, self.ww, 3), np.uint8) resized = cv2.resize(self.img, sz, interpolation=cv2.INTER_AREA) canvas[oy:oy + sz[1], ox:ox + sz[0]] = resized self._cache = canvas self._cache_key = key return canvas.copy(), s, ox, oy def _draw(self): if self.img is None: canvas = np.zeros((self.wh, self.ww, 3), np.uint8) cv2.putText(canvas, "No images", (self.ww // 2 - 60, self.wh // 2), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (128, 128, 128), 1) cv2.imshow(self.WIN, canvas) return canvas, s, ox, oy = self._scaled_base() # ── Annotation boxes ── for i, b in enumerate(self.boxes): col = COLORS[b.cls_id % len(COLORS)] is_sel = i == self.sel p1 = self._to_win(b.x1, b.y1) p2 = self._to_win(b.x2, b.y2) cv2.rectangle(canvas, p1, p2, col, 3 if is_sel else 2) name = self.classes[b.cls_id] if b.cls_id < len(self.classes) else f"c{b.cls_id}" (tw, th), _ = cv2.getTextSize(name, cv2.FONT_HERSHEY_SIMPLEX, 0.55, 1) cv2.rectangle(canvas, (p1[0], p1[1] - th - 8), (p1[0] + tw + 6, p1[1]), col, -1) cv2.putText(canvas, name, (p1[0] + 3, p1[1] - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.55, (0, 0, 0), 1) if is_sel: for hx, hy in [p1, (p2[0], p1[1]), (p1[0], p2[1]), p2]: cv2.circle(canvas, (hx, hy), HANDLE_R, (255, 255, 255), -1) cv2.circle(canvas, (hx, hy), HANDLE_R, col, 2) # rubber-band if self.mode == "draw" and self.d_start and self.mouse_n: col = COLORS[self.cur_cls % len(COLORS)] cv2.rectangle(canvas, self._to_win(*self.d_start), self._to_win(*self.mouse_n), col, 2) # ── HUD info line ── if self.files: fname = os.path.basename(self.files[self.pos]) n = len(self.files) filt = self.FILTERS[self.filt_idx] cname = self.classes[self.cur_cls] if self.cur_cls < len(self.classes) \ else f"c{self.cur_cls}" info = (f"[{self.pos + 1}/{n}] {fname} | " f"filter: {filt} | new class: {cname} | " f"boxes: {len(self.boxes)}") if self.dirty: info += " *" cv2.putText(canvas, info, (10, 22), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (220, 220, 220), 1) # class legend (top-right) for i, c in enumerate(self.classes): txt = f"{i + 1}: {c}" (tw, _), _ = cv2.getTextSize(txt, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1) cv2.putText(canvas, txt, (self.ww - tw - 12, 22 + i * 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, COLORS[i % len(COLORS)], 1) # ── Toolbar buttons ── bx = 10 for btn in self.buttons: (tw, th), _ = cv2.getTextSize(btn.label, cv2.FONT_HERSHEY_SIMPLEX, 0.45, 1) bw = tw + 16 bh = TOOLBAR_H - 4 btn.x1 = bx btn.y1 = TOOLBAR_Y btn.x2 = bx + bw btn.y2 = TOOLBAR_Y + bh # button bg cv2.rectangle(canvas, (btn.x1, btn.y1), (btn.x2, btn.y2), (60, 60, 60), -1) cv2.rectangle(canvas, (btn.x1, btn.y1), (btn.x2, btn.y2), (140, 140, 140), 1) # button text cv2.putText(canvas, btn.label, (bx + 8, TOOLBAR_Y + bh - 7), cv2.FONT_HERSHEY_SIMPLEX, 0.45, (220, 220, 220), 1) bx = btn.x2 + 6 # ── Help bar (bottom) ── cv2.putText( canvas, "drag=draw | click=select | drag=move/resize | RClick=cycle class" " | 1-9=class | Del=remove box | E=deselect", (10, self.wh - 6), cv2.FONT_HERSHEY_SIMPLEX, 0.36, (120, 120, 120), 1) cv2.imshow(self.WIN, canvas) # ── mouse ───────────────────────────────────────────────────── def _on_mouse(self, ev, wx, wy, flags, _): if self.img is None: return nx, ny = self._to_norm(wx, wy) self.mouse_n = (nx, ny) if ev == cv2.EVENT_LBUTTONDOWN: # check toolbar buttons first for btn in self.buttons: if btn.hit(wx, wy): self._do_action(btn.action) return # only interact with image area below toolbar if wy < IMG_TOP: return idx, what = self._hit(nx, ny) if what in ("tl", "tr", "bl", "br"): self._push_undo() self.sel = idx self.mode = "resize" b = self.boxes[idx] opp = {"tl": (b.x2, b.y2), "tr": (b.x1, b.y2), "bl": (b.x2, b.y1), "br": (b.x1, b.y1)} self.d_anchor = opp[what] self.d_start = (nx, ny) elif what == "inside": self._push_undo() self.sel = idx self.mode = "move" self.d_start = (nx, ny) b = self.boxes[idx] self.d_orig = (b.cx, b.cy) else: self.sel = -1 self.mode = "draw" self.d_start = (nx, ny) self._draw() elif ev == cv2.EVENT_MOUSEMOVE: if self.mode == "draw": self._draw() elif self.mode == "move" and self.d_start and \ 0 <= self.sel < len(self.boxes): b = self.boxes[self.sel] b.cx = self.d_orig[0] + (nx - self.d_start[0]) b.cy = self.d_orig[1] + (ny - self.d_start[1]) self.dirty = True self._draw() elif self.mode == "resize" and self.d_anchor: b = self.boxes[self.sel] ax, ay = self.d_anchor b.set_corners(min(ax, nx), min(ay, ny), max(ax, nx), max(ay, ny)) self.dirty = True self._draw() elif ev == cv2.EVENT_LBUTTONUP: if self.mode == "draw" and self.d_start: x1, y1 = min(self.d_start[0], nx), min(self.d_start[1], ny) x2, y2 = max(self.d_start[0], nx), max(self.d_start[1], ny) if (x2 - x1) > MIN_BOX and (y2 - y1) > MIN_BOX: self._push_undo() b = Box(0, 0, 0, 0, self.cur_cls) b.set_corners(x1, y1, x2, y2) self.boxes.append(b) self.sel = len(self.boxes) - 1 self.dirty = True self.mode = None self.d_start = self.d_anchor = self.d_orig = None self._draw() elif ev == cv2.EVENT_RBUTTONDOWN: idx, _ = self._hit(nx, ny) if idx >= 0: self._push_undo() self.sel = idx self.boxes[idx].cls_id = \ (self.boxes[idx].cls_id + 1) % len(self.classes) self.dirty = True self._draw() # ── actions (shared by keys + buttons) ──────────────────────── def _do_action(self, action): if action == "predict": self._predict() self._draw() elif action == "save_next": self._do_save_next() elif action == "filter": self._do_filter() elif action == "undo": self._pop_undo() self._draw() elif action == "del_img": self._do_del_img() def _do_save_next(self): if not self.files: return self._save() fname = os.path.basename(self.files[self.pos]) print(f" Saved {fname} ({len(self.boxes)} box(es))") self._goto(self.pos + 1) def _do_filter(self): self.filt_idx = (self.filt_idx + 1) % len(self.FILTERS) if self.dirty: self._save() self._refilter() if self.files: self._load() self._draw() print(f" Filter: {self.FILTERS[self.filt_idx]}" f" ({len(self.files)} images)") else: self.img = None self._draw() print(f" Filter: {self.FILTERS[self.filt_idx]} (0 images)") def _do_del_img(self): if not self.files: return fp = self.files[self.pos] lp = self._lbl(fp) if os.path.exists(fp): os.remove(fp) if os.path.exists(lp): os.remove(lp) self.n_deleted += 1 print(f" Deleted {os.path.basename(fp)}") self.all_files = [f for f in self.all_files if f != fp] self.dirty = False self._refilter() if not self.files: self.img = None self._draw() return self.pos = min(self.pos, len(self.files) - 1) self._load() self._draw() # ── navigation ──────────────────────────────────────────────── def _goto(self, new_pos): if self.dirty: self._save() new_pos = max(0, min(new_pos, len(self.files) - 1)) if new_pos == self.pos and self.img is not None: return self.pos = new_pos self._load() self._draw() # ── main loop ───────────────────────────────────────────────── def run(self): cv2.namedWindow(self.WIN, cv2.WINDOW_NORMAL) cv2.resizeWindow(self.WIN, self.ww, self.wh) cv2.setMouseCallback(self.WIN, self._on_mouse) if not self.all_files: print(f"No images in {self.img_dir}") self._draw() else: self._refilter() if self.files: self._load() self._draw() while True: key = cv2.waitKeyEx(30) # detect window close (user clicked X) if cv2.getWindowProperty(self.WIN, cv2.WND_PROP_VISIBLE) < 1: if self.dirty: self._save() break # detect window resize try: r = cv2.getWindowImageRect(self.WIN) if r[2] > 0 and r[3] > 0 and \ (r[2] != self.ww or r[3] != self.wh): self.ww, self.wh = r[2], r[3] self._cache = None self._draw() except cv2.error: pass if key == -1: continue # Quit if key in (ord("q"), 27): if self.dirty: self._save() break # Save + next if key in (32, 13): self._do_save_next() continue # Navigation if key == K_LEFT: self._goto(self.pos - 1) continue if key == K_RIGHT: self._goto(self.pos + 1) continue # Predict if key == ord("p"): self._predict() self._draw() continue # Delete selected box if key == K_DEL or key == 8: if 0 <= self.sel < len(self.boxes): self._push_undo() self.boxes.pop(self.sel) self.sel = -1 self.dirty = True self._draw() continue # Delete image if key == ord("x"): self._do_del_img() continue # Undo if key == ord("z"): self._pop_undo() self._draw() continue # Filter if key == ord("f"): self._do_filter() continue # Number keys -> set class if ord("1") <= key <= ord("9"): cls_id = key - ord("1") if cls_id < len(self.classes): if 0 <= self.sel < len(self.boxes): self._push_undo() self.boxes[self.sel].cls_id = cls_id self.dirty = True self.cur_cls = cls_id self._draw() continue # Deselect if key == ord("e"): self.sel = -1 self._draw() continue cv2.destroyAllWindows() total = len(self.all_files) labeled = sum(1 for f in self.all_files if self._has_labels(self._lbl(f))) empty = sum(1 for f in self.all_files if self._is_empty_label(self._lbl(f))) unlabeled = total - labeled - empty print(f"\nDone. Saved: {self.n_saved}, Deleted: {self.n_deleted}") print(f"Dataset: {total} images, {labeled} labeled, " f"{empty} empty, {unlabeled} unlabeled") def run_annotator(img_dir, classes=None): """Entry point callable from manage.py or standalone.""" tool = Annotator(img_dir, classes) tool.run() def main(): img_dir = sys.argv[1] if len(sys.argv) > 1 else "../../training-data/kulemak/raw" run_annotator(img_dir) if __name__ == "__main__": main()