"""KI-based person segmentation, mask utilities and compositing.""" from __future__ import annotations import os from collections import deque from pathlib import Path from typing import Optional, Tuple import numpy as np from PIL import Image _MODEL_URL = ( "https://storage.googleapis.com/mediapipe-models/" "image_segmenter/selfie_multiclass_256x256/float32/latest/" "selfie_multiclass_256x256.tflite" ) _MODEL_DIR = Path(os.environ.get("APPDATA", "")) / "FotoApp" _MODEL_PATH = _MODEL_DIR / "selfie_multiclass.tflite" _segmenter = None def _ensure_model(progress_cb=None) -> str: """Download the segmentation model on first use (~16 MB).""" _MODEL_DIR.mkdir(parents=True, exist_ok=True) path = str(_MODEL_PATH) if os.path.isfile(path) and os.path.getsize(path) > 1_000_000: return path if progress_cb: progress_cb("KI-Modell wird heruntergeladen (~16 MB) …") import urllib.request urllib.request.urlretrieve(_MODEL_URL, path) return path def _get_segmenter(progress_cb=None): """Lazy-init the MediaPipe ImageSegmenter (cached).""" global _segmenter if _segmenter is not None: return _segmenter import mediapipe as mp from mediapipe.tasks.python import BaseOptions, vision model_path = _ensure_model(progress_cb) if progress_cb: progress_cb("KI-Modell wird geladen …") options = vision.ImageSegmenterOptions( base_options=BaseOptions(model_asset_path=model_path), output_category_mask=True, ) _segmenter = vision.ImageSegmenter.create_from_options(options) return _segmenter # ─── KI segmentation ──────────────────────────────────────────────────────── def segment_person(img: Image.Image, progress_cb=None) -> np.ndarray: """Run MediaPipe Selfie Segmentation (Tasks API), returning float01 alpha mask. Very fast (~1-2 s), lightweight (~16 MB model, low RAM). The model is automatically downloaded on first use. Category 0 = background, categories 1-5 = person parts. """ import mediapipe as mp segmenter = _get_segmenter(progress_cb) if progress_cb: progress_cb("Segmentierung läuft …") rgb_arr = np.asarray(img.convert("RGB")) mp_img = mp.Image(image_format=mp.ImageFormat.SRGB, data=rgb_arr) result = segmenter.segment(mp_img) cat_mask = result.category_mask.numpy_view() # category 0 = background; anything > 0 = person mask = (cat_mask.squeeze() > 0).astype(np.float32) return mask # ─── Mask operations ──────────────────────────────────────────────────────── def feather_mask(mask: np.ndarray, radius_px: float) -> np.ndarray: """Gaussian blur on alpha mask for soft edges.""" if radius_px <= 0: return mask import cv2 ksize = int(radius_px) * 2 + 1 blurred = cv2.GaussianBlur(mask, (ksize, ksize), sigmaX=radius_px / 2.0) return np.clip(blurred, 0.0, 1.0).astype(np.float32) def apply_brush_stroke( mask: np.ndarray, points: list[Tuple[int, int]], radius: int, hardness: float, add: bool, ) -> np.ndarray: """Paint on *mask* along *points* with a circular brush. *hardness* 0..1 (0 = very soft, 1 = hard edge). *add* True = paint foreground (white), False = erase (black). """ h, w = mask.shape[:2] for cx, cy in points: y_min = max(0, cy - radius) y_max = min(h, cy + radius + 1) x_min = max(0, cx - radius) x_max = min(w, cx + radius + 1) if y_min >= y_max or x_min >= x_max: continue yy, xx = np.mgrid[y_min:y_max, x_min:x_max] dist = np.sqrt((xx - cx) ** 2 + (yy - cy) ** 2).astype(np.float32) if hardness >= 0.99: strength = (dist <= radius).astype(np.float32) else: inner = radius * hardness outer = float(radius) t = np.clip((dist - inner) / max(outer - inner, 1e-6), 0.0, 1.0) strength = 1.0 - t strength[dist > radius] = 0.0 patch = mask[y_min:y_max, x_min:x_max] if add: mask[y_min:y_max, x_min:x_max] = np.maximum(patch, strength) else: mask[y_min:y_max, x_min:x_max] = np.minimum(patch, 1.0 - strength) return mask # ─── Compositing ───────────────────────────────────────────────────────────── def composite_fg_bg( fg_rgb: np.ndarray, alpha: np.ndarray, bg_mode: str, bg_color: Tuple[int, int, int] = (255, 255, 255), bg_blur_radius: float = 0.0, bg_image: Optional[np.ndarray] = None, ) -> np.ndarray: """Composite foreground over background. *fg_rgb*: float01 (H, W, 3) – the colour-graded image. *alpha*: float01 (H, W) – person mask (feathered). *bg_mode*: ``"original"`` | ``"blur"`` | ``"color"`` | ``"transparent"`` | ``"image"``. Returns float01 (H, W, 3) for non-transparent modes or (H, W, 4) for transparent. """ a = alpha[..., None] if bg_mode == "original": return fg_rgb if bg_mode == "transparent": rgba = np.concatenate([fg_rgb, alpha[..., None]], axis=-1) return np.clip(rgba, 0.0, 1.0).astype(np.float32) if bg_mode == "blur": import cv2 ksize = max(1, int(bg_blur_radius)) * 2 + 1 bg = cv2.GaussianBlur(fg_rgb, (ksize, ksize), sigmaX=bg_blur_radius / 2.0) elif bg_mode == "color": bg = np.full_like(fg_rgb, [c / 255.0 for c in bg_color]) elif bg_mode == "image" and bg_image is not None: h, w = fg_rgb.shape[:2] bg = np.asarray( Image.fromarray((bg_image * 255).astype(np.uint8)).resize( (w, h), Image.Resampling.LANCZOS ), dtype=np.float32, ) / 255.0 else: return fg_rgb result = fg_rgb * a + bg * (1.0 - a) return np.clip(result, 0.0, 1.0).astype(np.float32) # ─── Mask undo stack ───────────────────────────────────────────────────────── class MaskHistory: """Simple undo buffer for mask edits (max *maxlen* snapshots).""" def __init__(self, maxlen: int = 20): self._stack: deque[np.ndarray] = deque(maxlen=maxlen) def push(self, mask: np.ndarray): self._stack.append(mask.copy()) def undo(self) -> Optional[np.ndarray]: if self._stack: return self._stack.pop() return None def clear(self): self._stack.clear() @property def can_undo(self) -> bool: return len(self._stack) > 0