aza/APP/fotoapp - Kopie/segmentation.py

"""KI-based person segmentation, mask utilities and compositing."""
from __future__ import annotations

import os
from collections import deque
from pathlib import Path
from typing import Optional, Tuple

import numpy as np
from PIL import Image

_MODEL_URL = (
    "https://storage.googleapis.com/mediapipe-models/"
    "image_segmenter/selfie_multiclass_256x256/float32/latest/"
    "selfie_multiclass_256x256.tflite"
)
_MODEL_DIR = Path(os.environ.get("APPDATA", "")) / "FotoApp"
_MODEL_PATH = _MODEL_DIR / "selfie_multiclass.tflite"

_segmenter = None


def _ensure_model(progress_cb=None) -> str:
    """Download the segmentation model on first use (~16 MB)."""
    _MODEL_DIR.mkdir(parents=True, exist_ok=True)
    path = str(_MODEL_PATH)
    if os.path.isfile(path) and os.path.getsize(path) > 1_000_000:
        return path
    if progress_cb:
        progress_cb("KI-Modell wird heruntergeladen (~16 MB) …")
    import urllib.request
    urllib.request.urlretrieve(_MODEL_URL, path)
    return path


def _get_segmenter(progress_cb=None):
    """Lazy-init the MediaPipe ImageSegmenter (cached)."""
    global _segmenter
    if _segmenter is not None:
        return _segmenter

    import mediapipe as mp
    from mediapipe.tasks.python import BaseOptions, vision

    model_path = _ensure_model(progress_cb)
    if progress_cb:
        progress_cb("KI-Modell wird geladen …")

    options = vision.ImageSegmenterOptions(
        base_options=BaseOptions(model_asset_path=model_path),
        output_category_mask=True,
    )
    _segmenter = vision.ImageSegmenter.create_from_options(options)
    return _segmenter


# ─── KI segmentation ────────────────────────────────────────────────────────

def segment_person(img: Image.Image, progress_cb=None) -> np.ndarray:
    """Run MediaPipe Selfie Segmentation (Tasks API), returning float01 alpha mask.

    Very fast (~1-2 s), lightweight (~16 MB model, low RAM).
    The model is automatically downloaded on first use.
    Category 0 = background, categories 1-5 = person parts.
    """
    import mediapipe as mp

    segmenter = _get_segmenter(progress_cb)

    if progress_cb:
        progress_cb("Segmentierung läuft …")

    rgb_arr = np.asarray(img.convert("RGB"))
    mp_img = mp.Image(image_format=mp.ImageFormat.SRGB, data=rgb_arr)
    result = segmenter.segment(mp_img)

    cat_mask = result.category_mask.numpy_view()
    # category 0 = background; anything > 0 = person
    mask = (cat_mask.squeeze() > 0).astype(np.float32)
    return mask


# ─── Mask operations ────────────────────────────────────────────────────────

def feather_mask(mask: np.ndarray, radius_px: float) -> np.ndarray:
    """Gaussian blur on alpha mask for soft edges."""
    if radius_px <= 0:
        return mask
    import cv2
    ksize = int(radius_px) * 2 + 1
    blurred = cv2.GaussianBlur(mask, (ksize, ksize), sigmaX=radius_px / 2.0)
    return np.clip(blurred, 0.0, 1.0).astype(np.float32)


def apply_brush_stroke(
    mask: np.ndarray,
    points: list[Tuple[int, int]],
    radius: int,
    hardness: float,
    add: bool,
) -> np.ndarray:
    """Paint on *mask* along *points* with a circular brush.

    *hardness* 0..1  (0 = very soft, 1 = hard edge).
    *add* True = paint foreground (white), False = erase (black).
    """
    h, w = mask.shape[:2]
    for cx, cy in points:
        y_min = max(0, cy - radius)
        y_max = min(h, cy + radius + 1)
        x_min = max(0, cx - radius)
        x_max = min(w, cx + radius + 1)
        if y_min >= y_max or x_min >= x_max:
            continue

        yy, xx = np.mgrid[y_min:y_max, x_min:x_max]
        dist = np.sqrt((xx - cx) ** 2 + (yy - cy) ** 2).astype(np.float32)

        if hardness >= 0.99:
            strength = (dist <= radius).astype(np.float32)
        else:
            inner = radius * hardness
            outer = float(radius)
            t = np.clip((dist - inner) / max(outer - inner, 1e-6), 0.0, 1.0)
            strength = 1.0 - t
            strength[dist > radius] = 0.0

        patch = mask[y_min:y_max, x_min:x_max]
        if add:
            mask[y_min:y_max, x_min:x_max] = np.maximum(patch, strength)
        else:
            mask[y_min:y_max, x_min:x_max] = np.minimum(patch, 1.0 - strength)

    return mask


# ─── Compositing ─────────────────────────────────────────────────────────────

def composite_fg_bg(
    fg_rgb: np.ndarray,
    alpha: np.ndarray,
    bg_mode: str,
    bg_color: Tuple[int, int, int] = (255, 255, 255),
    bg_blur_radius: float = 0.0,
    bg_image: Optional[np.ndarray] = None,
) -> np.ndarray:
    """Composite foreground over background.

    *fg_rgb*: float01 (H, W, 3) – the colour-graded image.
    *alpha*: float01 (H, W) – person mask (feathered).
    *bg_mode*: ``"original"`` | ``"blur"`` | ``"color"`` | ``"transparent"`` | ``"image"``.

    Returns float01 (H, W, 3) for non-transparent modes or (H, W, 4) for transparent.
    """
    a = alpha[..., None]

    if bg_mode == "original":
        return fg_rgb

    if bg_mode == "transparent":
        rgba = np.concatenate([fg_rgb, alpha[..., None]], axis=-1)
        return np.clip(rgba, 0.0, 1.0).astype(np.float32)

    if bg_mode == "blur":
        import cv2
        ksize = max(1, int(bg_blur_radius)) * 2 + 1
        bg = cv2.GaussianBlur(fg_rgb, (ksize, ksize), sigmaX=bg_blur_radius / 2.0)
    elif bg_mode == "color":
        bg = np.full_like(fg_rgb, [c / 255.0 for c in bg_color])
    elif bg_mode == "image" and bg_image is not None:
        h, w = fg_rgb.shape[:2]
        bg = np.asarray(
            Image.fromarray((bg_image * 255).astype(np.uint8)).resize(
                (w, h), Image.Resampling.LANCZOS
            ),
            dtype=np.float32,
        ) / 255.0
    else:
        return fg_rgb

    result = fg_rgb * a + bg * (1.0 - a)
    return np.clip(result, 0.0, 1.0).astype(np.float32)


# ─── Mask undo stack ─────────────────────────────────────────────────────────

class MaskHistory:
    """Simple undo buffer for mask edits (max *maxlen* snapshots)."""

    def __init__(self, maxlen: int = 20):
        self._stack: deque[np.ndarray] = deque(maxlen=maxlen)

    def push(self, mask: np.ndarray):
        self._stack.append(mask.copy())

    def undo(self) -> Optional[np.ndarray]:
        if self._stack:
            return self._stack.pop()
        return None

    def clear(self):
        self._stack.clear()

    @property
    def can_undo(self) -> bool:
        return len(self._stack) > 0