aza/AzA march 2026/aza_audio.py

# -*- coding: utf-8 -*-
"""
AudioRecorder – Aufnahme direkt als M4A (AAC via ffmpeg-Pipe).
Kein WAV-Zwischenschritt. Fallback auf WAV nur wenn ffmpeg fehlt.
"""

import os
import shutil
import subprocess
import tempfile
import wave
from datetime import datetime
from typing import List, Optional

import numpy as np

try:
    import sounddevice as sd
except Exception:
    sd = None

CHUNK_MAX_SECONDS = 600

_AUDIO_BACKUP_SUBDIR = "Audio_Backup"


def get_audio_backup_dir() -> str:
    """Gibt den sicheren Backup-Ordner für Audio zurück und erstellt ihn bei Bedarf."""
    docs = os.path.join(os.path.expanduser("~"), "Documents")
    if not os.path.isdir(docs):
        docs = os.path.expanduser("~")
    backup_dir = os.path.join(docs, "KG_Diktat_Ablage", _AUDIO_BACKUP_SUBDIR)
    os.makedirs(backup_dir, exist_ok=True)
    return backup_dir


def persist_audio_safe(temp_path: str) -> str:
    """Kopiert Audio in den sicheren Backup-Ordner. Gibt neuen Pfad zurück."""
    backup_dir = get_audio_backup_dir()
    ext = os.path.splitext(temp_path)[1] or ".m4a"
    ts = datetime.now().strftime("%Y%m%d_%H%M%S")
    safe_name = f"aufnahme_{ts}{ext}"
    safe_path = os.path.join(backup_dir, safe_name)
    shutil.copy2(temp_path, safe_path)
    return safe_path


def cleanup_old_audio_backups(max_age_days: int = 30):
    """Löscht Audio-Backups älter als max_age_days (nur erfolgreich transkribierte)."""
    backup_dir = get_audio_backup_dir()
    cutoff = datetime.now().timestamp() - max_age_days * 86400
    try:
        for f in os.listdir(backup_dir):
            fp = os.path.join(backup_dir, f)
            if os.path.isfile(fp) and os.path.getmtime(fp) < cutoff:
                try:
                    os.remove(fp)
                except Exception:
                    pass
    except Exception:
        pass


_NO_WINDOW = getattr(subprocess, "CREATE_NO_WINDOW", 0)

_WINDOWS_SOUND_SETTINGS = "Einstellungen > System > Sound > Eingabe"

_mic_check_cache: dict = {}


def _fail(msg: str, dev_name=None, dev_index=None) -> dict:
    return {"ok": False, "device_name": dev_name, "device_index": dev_index, "message": msg}


def check_microphone(force: bool = False) -> dict:
    """Prüft ob ein brauchbares Mikrofon verfügbar ist.

    Returns dict:
        ok (bool), device_name (str|None), device_index (int|None),
        message (str – deutsch, benutzerfreundlich)
    """
    if not force and _mic_check_cache.get("result"):
        return _mic_check_cache["result"]

    def _cache(r):
        _mic_check_cache["result"] = r
        return r

    if sd is None:
        return _cache(_fail(
            "Audio-Modul nicht verfügbar.\n\n"
            "Das Paket 'sounddevice' konnte nicht geladen werden.\n"
            "Aufnahme und Diktat sind nicht möglich."
        ))

    # --- Schritt 1: Default-Input-Device abfragen ---
    dev_index = None
    dev_name = None
    try:
        info = sd.query_devices(kind="input")
        dev_name = info["name"]
        dev_index = sd.default.device[0]
    except Exception:
        pass

    # --- Schritt 2: Fallback – alle Geräte durchsuchen ---
    if dev_name is None:
        try:
            all_devs = sd.query_devices()
            for i, d in enumerate(all_devs):
                try:
                    if d["max_input_channels"] > 0:
                        dev_name = d["name"]
                        dev_index = i
                        break
                except (KeyError, TypeError, IndexError):
                    continue
        except Exception:
            pass

    if dev_name is None:
        return _cache(_fail(
            "Kein Mikrofon gefunden.\n\n"
            "Bitte schliessen Sie ein Mikrofon an oder\n"
            "aktivieren Sie es in den Windows-Einstellungen:\n\n"
            f"  {_WINDOWS_SOUND_SETTINGS}"
        ))

    # --- Schritt 3: Kanäle prüfen ---
    try:
        info = sd.query_devices(dev_index) if dev_index is not None else sd.query_devices(kind="input")
        max_ch = info["max_input_channels"]
    except Exception:
        max_ch = 0

    if max_ch < 1:
        return _cache(_fail(
            f"Gerät '{dev_name}' hat keine Eingangskanäle.\n\n"
            "Bitte ein anderes Mikrofon auswählen:\n\n"
            f"  {_WINDOWS_SOUND_SETTINGS}",
            dev_name, dev_index,
        ))

    # --- Schritt 4: Kurzer Öffnungstest ---
    try:
        test_stream = sd.InputStream(
            device=dev_index,
            samplerate=16000,
            channels=1,
            dtype="float32",
            blocksize=1024,
        )
        test_stream.close()
    except Exception as e:
        err = str(e)
        return _cache(_fail(
            f"Mikrofon '{dev_name}' konnte nicht geöffnet werden.\n\n"
            "Mögliche Ursachen:\n"
            "  - Mikrofon ist von einer anderen App belegt\n"
            "  - Zugriff in Windows-Datenschutz blockiert\n"
            "  - Gerät ist deaktiviert oder getrennt\n\n"
            f"Windows-Einstellungen:\n  {_WINDOWS_SOUND_SETTINGS}\n\n"
            f"(Technisch: {err[:120]})",
            dev_name, dev_index,
        ))

    result = {
        "ok": True,
        "device_name": dev_name,
        "device_index": dev_index,
        "message": f"Mikrofon bereit: {dev_name}",
    }
    return _cache(result)


def invalidate_mic_cache():
    """Setzt den Mikrofon-Cache zurück (z.B. nach Gerätewechsel)."""
    _mic_check_cache.clear()


def _find_ffmpeg() -> Optional[str]:
    path = shutil.which("ffmpeg")
    if path:
        return path
    script_dir = os.path.dirname(os.path.abspath(__file__))
    for candidate in (
        os.path.join(script_dir, "ffmpeg.exe"),
        os.path.join(script_dir, "_internal", "ffmpeg.exe"),
    ):
        if os.path.isfile(candidate):
            return candidate
    return None


class AudioRecorder:
    """Nimmt Audio auf und streamt es direkt in ffmpeg (M4A/AAC).

    Wenn ffmpeg verfuegbar: Audio wird waehrend der Aufnahme in Echtzeit
    als M4A kodiert – kein WAV-Zwischenschritt, sofort kleine Datei.
    Wenn ffmpeg fehlt: Fallback auf WAV (16kHz mono 16-bit PCM).
    """

    def __init__(self, samplerate=16000, channels=1):
        self.samplerate = samplerate
        self.channels = channels
        self._stream = None
        self._ffmpeg_proc: Optional[subprocess.Popen] = None
        self._output_path: Optional[str] = None
        self._recording = False
        self._wav_fallback = False
        self._frames: list = []

    def start(self):
        mic = check_microphone()
        if not mic["ok"]:
            raise RuntimeError(mic["message"])

        self._recording = True
        self._wav_fallback = False
        self._frames = []
        self._ffmpeg_proc = None
        self._device_index = mic.get("device_index")

        ffmpeg = _find_ffmpeg()
        if ffmpeg:
            fd, self._output_path = tempfile.mkstemp(suffix=".m4a", prefix="kg_rec_")
            os.close(fd)
            try:
                self._ffmpeg_proc = subprocess.Popen(
                    [ffmpeg, "-y",
                     "-f", "s16le", "-ar", str(self.samplerate),
                     "-ac", str(self.channels), "-i", "pipe:0",
                     "-c:a", "aac", "-b:a", "64k",
                     "-movflags", "+faststart",
                     self._output_path],
                    stdin=subprocess.PIPE,
                    stdout=subprocess.DEVNULL,
                    stderr=subprocess.DEVNULL,
                    creationflags=_NO_WINDOW,
                )
            except Exception:
                self._ffmpeg_proc = None
                self._wav_fallback = True
                self._output_path = None
        else:
            self._wav_fallback = True

        def callback(indata, frames, time_info, status):
            if not self._recording:
                return
            pcm = (np.clip(indata, -1.0, 1.0) * 32767.0).astype(np.int16)
            if self._ffmpeg_proc and self._ffmpeg_proc.stdin:
                try:
                    self._ffmpeg_proc.stdin.write(pcm.tobytes())
                except Exception:
                    pass
            else:
                self._frames.append(indata.copy())

        try:
            self._stream = sd.InputStream(
                device=self._device_index,
                samplerate=self.samplerate,
                channels=self.channels,
                callback=callback,
                dtype="float32",
                blocksize=0,
            )
            self._stream.start()
        except Exception as e:
            invalidate_mic_cache()
            err = str(e)
            if "device" in err.lower() or "portaudio" in err.lower() or "-1" in err:
                raise RuntimeError(
                    "Mikrofon konnte nicht geöffnet werden.\n\n"
                    "Bitte prüfen Sie:\n"
                    "  - Ist ein Mikrofon angeschlossen?\n"
                    "  - Ist es in Windows aktiviert?\n\n"
                    f"Windows: {_WINDOWS_SOUND_SETTINGS}\n\n"
                    f"(Technisch: {err[:120]})"
                ) from None
            raise

    def stop_and_save(self) -> str:
        """Stoppt Aufnahme, gibt Pfad zur fertigen Audiodatei zurueck."""
        if not self._stream:
            raise RuntimeError("Recorder wurde nicht gestartet.")

        self._recording = False
        self._stream.stop()
        self._stream.close()
        self._stream = None

        if self._ffmpeg_proc and self._ffmpeg_proc.stdin:
            try:
                self._ffmpeg_proc.stdin.close()
            except Exception:
                pass
            try:
                self._ffmpeg_proc.wait(timeout=30)
            except Exception:
                try:
                    self._ffmpeg_proc.kill()
                except Exception:
                    pass

            if (self._output_path
                    and os.path.isfile(self._output_path)
                    and os.path.getsize(self._output_path) > 0):
                self._ffmpeg_proc = None
                return self._output_path

            self._ffmpeg_proc = None
            self._wav_fallback = True

        if self._wav_fallback or not self._output_path:
            return self._save_wav_fallback()

        return self._output_path

    def stop_and_save_wav(self) -> str:
        """Legacy-Alias."""
        return self.stop_and_save()

    def _save_wav_fallback(self) -> str:
        if not self._frames:
            raise RuntimeError("Keine Audio-Daten aufgenommen (leer).")

        audio = np.concatenate(self._frames, axis=0)
        audio = np.clip(audio, -1.0, 1.0)
        pcm16 = (audio * 32767.0).astype(np.int16)

        fd, path = tempfile.mkstemp(suffix=".wav", prefix="kg_rec_")
        os.close(fd)
        with wave.open(path, "wb") as wf:
            wf.setnchannels(self.channels)
            wf.setsampwidth(2)
            wf.setframerate(self.samplerate)
            wf.writeframes(pcm16.tobytes())
        return path


# ── Chunking ──────────────────────────────────────────────────────────

def split_audio_into_chunks(audio_path: str, max_seconds: int = CHUNK_MAX_SECONDS) -> List[str]:
    ext = os.path.splitext(audio_path)[1].lower()
    if ext == ".m4a":
        return _split_m4a(audio_path, max_seconds)
    return _split_wav(audio_path, max_seconds)


def _split_m4a(m4a_path: str, max_seconds: int) -> List[str]:
    ffmpeg = _find_ffmpeg()
    if not ffmpeg:
        return [m4a_path]

    try:
        probe = subprocess.run(
            [ffmpeg, "-i", m4a_path, "-f", "null", "-"],
            capture_output=True, timeout=30, creationflags=_NO_WINDOW,
        )
        duration_s = None
        for line in (probe.stderr or b"").decode("utf-8", errors="replace").splitlines():
            if "Duration:" in line:
                parts = line.split("Duration:")[1].split(",")[0].strip()
                h, m, s = parts.split(":")
                duration_s = int(h) * 3600 + int(m) * 60 + float(s)
                break
        if duration_s is None or duration_s <= max_seconds:
            return [m4a_path]
    except Exception:
        return [m4a_path]

    chunks: List[str] = []
    offset = 0.0
    idx = 0
    while offset < duration_s:
        fd, chunk_path = tempfile.mkstemp(suffix=f"_chunk{idx}.m4a", prefix="kg_rec_")
        os.close(fd)
        result = subprocess.run(
            [ffmpeg, "-y", "-ss", str(offset), "-i", m4a_path,
             "-t", str(max_seconds), "-c", "copy", chunk_path],
            capture_output=True, timeout=120, creationflags=_NO_WINDOW,
        )
        if result.returncode == 0 and os.path.isfile(chunk_path) and os.path.getsize(chunk_path) > 0:
            chunks.append(chunk_path)
        else:
            try:
                os.remove(chunk_path)
            except Exception:
                pass
            break
        offset += max_seconds
        idx += 1

    return chunks if chunks else [m4a_path]


def _split_wav(wav_path: str, max_seconds: int) -> List[str]:
    with wave.open(wav_path, "rb") as wf:
        n_channels = wf.getnchannels()
        sampwidth = wf.getsampwidth()
        framerate = wf.getframerate()
        n_frames = wf.getnframes()

    duration_s = n_frames / framerate
    if duration_s <= max_seconds:
        return [wav_path]

    chunk_frames = int(max_seconds * framerate)
    chunks: List[str] = []

    with wave.open(wav_path, "rb") as wf:
        frames_remaining = n_frames
        idx = 0
        while frames_remaining > 0:
            read_count = min(chunk_frames, frames_remaining)
            data = wf.readframes(read_count)
            fd, chunk_path = tempfile.mkstemp(suffix=f"_chunk{idx}.wav", prefix="kg_rec_")
            os.close(fd)
            with wave.open(chunk_path, "wb") as cf:
                cf.setnchannels(n_channels)
                cf.setsampwidth(sampwidth)
                cf.setframerate(framerate)
                cf.writeframes(data)
            chunks.append(chunk_path)
            frames_remaining -= read_count
            idx += 1

    return chunks


split_wav_into_chunks = split_audio_into_chunks


def test_audio_device(duration_sec: float = 1.5) -> dict:
    """Quick microphone test: records briefly and checks for signal.

    Returns dict with keys:
        ok (bool), device (str|None), message (str)
    """
    if sd is None:
        return {
            "ok": False,
            "device": None,
            "message": "Python-Paket 'sounddevice' ist nicht verfügbar.\n"
                       "Audio-Aufnahme nicht möglich.",
        }

    try:
        dev_info = sd.query_devices(kind="input")
        device_name = dev_info.get("name", "Unbekanntes Gerät")
    except Exception:
        return {
            "ok": False,
            "device": None,
            "message": "Kein Eingabegerät (Mikrofon) gefunden.\n"
                       "Bitte Mikrofon anschliessen und erneut versuchen.",
        }

    try:
        audio = sd.rec(
            int(duration_sec * 16000),
            samplerate=16000,
            channels=1,
            dtype="float32",
            blocking=True,
        )
    except Exception as exc:
        return {
            "ok": False,
            "device": device_name,
            "message": f"Aufnahmetest fehlgeschlagen:\n{exc}",
        }

    if audio is None or len(audio) == 0:
        return {
            "ok": False,
            "device": device_name,
            "message": "Keine Audio-Daten empfangen.\n"
                       "Bitte Mikrofon-Zugriff in den Windows-Einstellungen prüfen.",
        }

    peak = float(np.max(np.abs(audio)))
    rms = float(np.sqrt(np.mean(audio ** 2)))

    if peak < 0.001:
        return {
            "ok": False,
            "device": device_name,
            "message": f"Gerät: {device_name}\n\n"
                       f"Kein Signal erkannt (Peak={peak:.4f}).\n"
                       "Mikrofon ist möglicherweise stummgeschaltet oder defekt.",
        }

    level_pct = min(100, int(rms * 1000))
    return {
        "ok": True,
        "device": device_name,
        "message": f"Gerät: {device_name}\n\n"
                   f"Audio-Signal erkannt.\n"
                   f"Pegel: {level_pct}%  (Peak={peak:.3f}, RMS={rms:.4f})\n\n"
                   "Mikrofon funktioniert.",
    }