update

2026-04-22 22:33:46 +02:00
parent 7bf1e0dbb2
commit d4822fc8dc
5156 changed files with 829337 additions and 44 deletions
--- a/(18)/aza_audio.py
+++ b/(18)/aza_audio.py
@@ -0,0 +1,504 @@
+# -*- coding: utf-8 -*-
+"""
+AudioRecorder – Aufnahme direkt als M4A (AAC via ffmpeg-Pipe).
+Kein WAV-Zwischenschritt. Fallback auf WAV nur wenn ffmpeg fehlt.
+"""
+
+import os
+import shutil
+import subprocess
+import tempfile
+import wave
+from datetime import datetime
+from typing import List, Optional
+
+import numpy as np
+
+try:
+    import sounddevice as sd
+except Exception:
+    sd = None
+
+CHUNK_MAX_SECONDS = 600
+
+_AUDIO_BACKUP_SUBDIR = "Audio_Backup"
+
+
+def get_audio_backup_dir() -> str:
+    """Gibt den sicheren Backup-Ordner für Audio zurück und erstellt ihn bei Bedarf."""
+    docs = os.path.join(os.path.expanduser("~"), "Documents")
+    if not os.path.isdir(docs):
+        docs = os.path.expanduser("~")
+    backup_dir = os.path.join(docs, "KG_Diktat_Ablage", _AUDIO_BACKUP_SUBDIR)
+    os.makedirs(backup_dir, exist_ok=True)
+    return backup_dir
+
+
+def persist_audio_safe(temp_path: str) -> str:
+    """Kopiert Audio in den sicheren Backup-Ordner. Gibt neuen Pfad zurück."""
+    backup_dir = get_audio_backup_dir()
+    ext = os.path.splitext(temp_path)[1] or ".m4a"
+    ts = datetime.now().strftime("%Y%m%d_%H%M%S")
+    safe_name = f"aufnahme_{ts}{ext}"
+    safe_path = os.path.join(backup_dir, safe_name)
+    shutil.copy2(temp_path, safe_path)
+    return safe_path
+
+
+def cleanup_old_audio_backups(max_age_days: int = 30):
+    """Löscht Audio-Backups älter als max_age_days (nur erfolgreich transkribierte)."""
+    backup_dir = get_audio_backup_dir()
+    cutoff = datetime.now().timestamp() - max_age_days * 86400
+    try:
+        for f in os.listdir(backup_dir):
+            fp = os.path.join(backup_dir, f)
+            if os.path.isfile(fp) and os.path.getmtime(fp) < cutoff:
+                try:
+                    os.remove(fp)
+                except Exception:
+                    pass
+    except Exception:
+        pass
+
+
+_NO_WINDOW = getattr(subprocess, "CREATE_NO_WINDOW", 0)
+
+_WINDOWS_SOUND_SETTINGS = "Einstellungen > System > Sound > Eingabe"
+
+_mic_check_cache: dict = {}
+
+
+def _fail(msg: str, dev_name=None, dev_index=None) -> dict:
+    return {"ok": False, "device_name": dev_name, "device_index": dev_index, "message": msg}
+
+
+def check_microphone(force: bool = False) -> dict:
+    """Prüft ob ein brauchbares Mikrofon verfügbar ist.
+
+    Returns dict:
+        ok (bool), device_name (str|None), device_index (int|None),
+        message (str – deutsch, benutzerfreundlich)
+    """
+    if not force and _mic_check_cache.get("result"):
+        return _mic_check_cache["result"]
+
+    def _cache(r):
+        _mic_check_cache["result"] = r
+        return r
+
+    if sd is None:
+        return _cache(_fail(
+            "Audio-Modul nicht verfügbar.\n\n"
+            "Das Paket 'sounddevice' konnte nicht geladen werden.\n"
+            "Aufnahme und Diktat sind nicht möglich."
+        ))
+
+    # --- Schritt 1: Default-Input-Device abfragen ---
+    dev_index = None
+    dev_name = None
+    try:
+        info = sd.query_devices(kind="input")
+        dev_name = info["name"]
+        dev_index = sd.default.device[0]
+    except Exception:
+        pass
+
+    # --- Schritt 2: Fallback – alle Geräte durchsuchen ---
+    if dev_name is None:
+        try:
+            all_devs = sd.query_devices()
+            for i, d in enumerate(all_devs):
+                try:
+                    if d["max_input_channels"] > 0:
+                        dev_name = d["name"]
+                        dev_index = i
+                        break
+                except (KeyError, TypeError, IndexError):
+                    continue
+        except Exception:
+            pass
+
+    if dev_name is None:
+        return _cache(_fail(
+            "Kein Mikrofon gefunden.\n\n"
+            "Bitte schliessen Sie ein Mikrofon an oder\n"
+            "aktivieren Sie es in den Windows-Einstellungen:\n\n"
+            f"  {_WINDOWS_SOUND_SETTINGS}"
+        ))
+
+    # --- Schritt 3: Kanäle prüfen ---
+    try:
+        info = sd.query_devices(dev_index) if dev_index is not None else sd.query_devices(kind="input")
+        max_ch = info["max_input_channels"]
+    except Exception:
+        max_ch = 0
+
+    if max_ch < 1:
+        return _cache(_fail(
+            f"Gerät '{dev_name}' hat keine Eingangskanäle.\n\n"
+            "Bitte ein anderes Mikrofon auswählen:\n\n"
+            f"  {_WINDOWS_SOUND_SETTINGS}",
+            dev_name, dev_index,
+        ))
+
+    # --- Schritt 4: Kurzer Öffnungstest ---
+    try:
+        test_stream = sd.InputStream(
+            device=dev_index,
+            samplerate=16000,
+            channels=1,
+            dtype="float32",
+            blocksize=1024,
+        )
+        test_stream.close()
+    except Exception as e:
+        err = str(e)
+        return _cache(_fail(
+            f"Mikrofon '{dev_name}' konnte nicht geöffnet werden.\n\n"
+            "Mögliche Ursachen:\n"
+            "  - Mikrofon ist von einer anderen App belegt\n"
+            "  - Zugriff in Windows-Datenschutz blockiert\n"
+            "  - Gerät ist deaktiviert oder getrennt\n\n"
+            f"Windows-Einstellungen:\n  {_WINDOWS_SOUND_SETTINGS}\n\n"
+            f"(Technisch: {err[:120]})",
+            dev_name, dev_index,
+        ))
+
+    result = {
+        "ok": True,
+        "device_name": dev_name,
+        "device_index": dev_index,
+        "message": f"Mikrofon bereit: {dev_name}",
+    }
+    return _cache(result)
+
+
+def invalidate_mic_cache():
+    """Setzt den Mikrofon-Cache zurück (z.B. nach Gerätewechsel)."""
+    _mic_check_cache.clear()
+
+
+def _find_ffmpeg() -> Optional[str]:
+    path = shutil.which("ffmpeg")
+    if path:
+        return path
+    script_dir = os.path.dirname(os.path.abspath(__file__))
+    for candidate in (
+        os.path.join(script_dir, "ffmpeg.exe"),
+        os.path.join(script_dir, "_internal", "ffmpeg.exe"),
+    ):
+        if os.path.isfile(candidate):
+            return candidate
+    return None
+
+
+class AudioRecorder:
+    """Nimmt Audio auf und streamt es direkt in ffmpeg (M4A/AAC).
+
+    Wenn ffmpeg verfuegbar: Audio wird waehrend der Aufnahme in Echtzeit
+    als M4A kodiert – kein WAV-Zwischenschritt, sofort kleine Datei.
+    Wenn ffmpeg fehlt: Fallback auf WAV (16kHz mono 16-bit PCM).
+    """
+
+    def __init__(self, samplerate=16000, channels=1):
+        self.samplerate = samplerate
+        self.channels = channels
+        self._stream = None
+        self._ffmpeg_proc: Optional[subprocess.Popen] = None
+        self._output_path: Optional[str] = None
+        self._recording = False
+        self._wav_fallback = False
+        self._frames: list = []
+
+    def start(self):
+        mic = check_microphone()
+        if not mic["ok"]:
+            raise RuntimeError(mic["message"])
+
+        self._recording = True
+        self._wav_fallback = False
+        self._frames = []
+        self._ffmpeg_proc = None
+        self._device_index = mic.get("device_index")
+
+        ffmpeg = _find_ffmpeg()
+        if ffmpeg:
+            fd, self._output_path = tempfile.mkstemp(suffix=".m4a", prefix="kg_rec_")
+            os.close(fd)
+            try:
+                self._ffmpeg_proc = subprocess.Popen(
+                    [ffmpeg, "-y",
+                     "-f", "s16le", "-ar", str(self.samplerate),
+                     "-ac", str(self.channels), "-i", "pipe:0",
+                     "-c:a", "aac", "-b:a", "64k",
+                     "-movflags", "+faststart",
+                     self._output_path],
+                    stdin=subprocess.PIPE,
+                    stdout=subprocess.DEVNULL,
+                    stderr=subprocess.DEVNULL,
+                    creationflags=_NO_WINDOW,
+                )
+            except Exception:
+                self._ffmpeg_proc = None
+                self._wav_fallback = True
+                self._output_path = None
+        else:
+            self._wav_fallback = True
+
+        def callback(indata, frames, time_info, status):
+            if not self._recording:
+                return
+            pcm = (np.clip(indata, -1.0, 1.0) * 32767.0).astype(np.int16)
+            if self._ffmpeg_proc and self._ffmpeg_proc.stdin:
+                try:
+                    self._ffmpeg_proc.stdin.write(pcm.tobytes())
+                except Exception:
+                    pass
+            else:
+                self._frames.append(indata.copy())
+
+        try:
+            self._stream = sd.InputStream(
+                device=self._device_index,
+                samplerate=self.samplerate,
+                channels=self.channels,
+                callback=callback,
+                dtype="float32",
+                blocksize=0,
+            )
+            self._stream.start()
+        except Exception as e:
+            invalidate_mic_cache()
+            err = str(e)
+            if "device" in err.lower() or "portaudio" in err.lower() or "-1" in err:
+                raise RuntimeError(
+                    "Mikrofon konnte nicht geöffnet werden.\n\n"
+                    "Bitte prüfen Sie:\n"
+                    "  - Ist ein Mikrofon angeschlossen?\n"
+                    "  - Ist es in Windows aktiviert?\n\n"
+                    f"Windows: {_WINDOWS_SOUND_SETTINGS}\n\n"
+                    f"(Technisch: {err[:120]})"
+                ) from None
+            raise
+
+    def stop_and_save(self) -> str:
+        """Stoppt Aufnahme, gibt Pfad zur fertigen Audiodatei zurueck."""
+        if not self._stream:
+            raise RuntimeError("Recorder wurde nicht gestartet.")
+
+        self._recording = False
+        self._stream.stop()
+        self._stream.close()
+        self._stream = None
+
+        if self._ffmpeg_proc and self._ffmpeg_proc.stdin:
+            try:
+                self._ffmpeg_proc.stdin.close()
+            except Exception:
+                pass
+            try:
+                self._ffmpeg_proc.wait(timeout=30)
+            except Exception:
+                try:
+                    self._ffmpeg_proc.kill()
+                except Exception:
+                    pass
+
+            if (self._output_path
+                    and os.path.isfile(self._output_path)
+                    and os.path.getsize(self._output_path) > 0):
+                self._ffmpeg_proc = None
+                return self._output_path
+
+            self._ffmpeg_proc = None
+            self._wav_fallback = True
+
+        if self._wav_fallback or not self._output_path:
+            return self._save_wav_fallback()
+
+        return self._output_path
+
+    def stop_and_save_wav(self) -> str:
+        """Legacy-Alias."""
+        return self.stop_and_save()
+
+    def _save_wav_fallback(self) -> str:
+        if not self._frames:
+            raise RuntimeError("Keine Audio-Daten aufgenommen (leer).")
+
+        audio = np.concatenate(self._frames, axis=0)
+        audio = np.clip(audio, -1.0, 1.0)
+        pcm16 = (audio * 32767.0).astype(np.int16)
+
+        fd, path = tempfile.mkstemp(suffix=".wav", prefix="kg_rec_")
+        os.close(fd)
+        with wave.open(path, "wb") as wf:
+            wf.setnchannels(self.channels)
+            wf.setsampwidth(2)
+            wf.setframerate(self.samplerate)
+            wf.writeframes(pcm16.tobytes())
+        return path
+
+
+# ── Chunking ──────────────────────────────────────────────────────────
+
+def split_audio_into_chunks(audio_path: str, max_seconds: int = CHUNK_MAX_SECONDS) -> List[str]:
+    ext = os.path.splitext(audio_path)[1].lower()
+    if ext == ".m4a":
+        return _split_m4a(audio_path, max_seconds)
+    return _split_wav(audio_path, max_seconds)
+
+
+def _split_m4a(m4a_path: str, max_seconds: int) -> List[str]:
+    ffmpeg = _find_ffmpeg()
+    if not ffmpeg:
+        return [m4a_path]
+
+    try:
+        probe = subprocess.run(
+            [ffmpeg, "-i", m4a_path, "-f", "null", "-"],
+            capture_output=True, timeout=30, creationflags=_NO_WINDOW,
+        )
+        duration_s = None
+        for line in (probe.stderr or b"").decode("utf-8", errors="replace").splitlines():
+            if "Duration:" in line:
+                parts = line.split("Duration:")[1].split(",")[0].strip()
+                h, m, s = parts.split(":")
+                duration_s = int(h) * 3600 + int(m) * 60 + float(s)
+                break
+        if duration_s is None or duration_s <= max_seconds:
+            return [m4a_path]
+    except Exception:
+        return [m4a_path]
+
+    chunks: List[str] = []
+    offset = 0.0
+    idx = 0
+    while offset < duration_s:
+        fd, chunk_path = tempfile.mkstemp(suffix=f"_chunk{idx}.m4a", prefix="kg_rec_")
+        os.close(fd)
+        result = subprocess.run(
+            [ffmpeg, "-y", "-ss", str(offset), "-i", m4a_path,
+             "-t", str(max_seconds), "-c", "copy", chunk_path],
+            capture_output=True, timeout=120, creationflags=_NO_WINDOW,
+        )
+        if result.returncode == 0 and os.path.isfile(chunk_path) and os.path.getsize(chunk_path) > 0:
+            chunks.append(chunk_path)
+        else:
+            try:
+                os.remove(chunk_path)
+            except Exception:
+                pass
+            break
+        offset += max_seconds
+        idx += 1
+
+    return chunks if chunks else [m4a_path]
+
+
+def _split_wav(wav_path: str, max_seconds: int) -> List[str]:
+    with wave.open(wav_path, "rb") as wf:
+        n_channels = wf.getnchannels()
+        sampwidth = wf.getsampwidth()
+        framerate = wf.getframerate()
+        n_frames = wf.getnframes()
+
+    duration_s = n_frames / framerate
+    if duration_s <= max_seconds:
+        return [wav_path]
+
+    chunk_frames = int(max_seconds * framerate)
+    chunks: List[str] = []
+
+    with wave.open(wav_path, "rb") as wf:
+        frames_remaining = n_frames
+        idx = 0
+        while frames_remaining > 0:
+            read_count = min(chunk_frames, frames_remaining)
+            data = wf.readframes(read_count)
+            fd, chunk_path = tempfile.mkstemp(suffix=f"_chunk{idx}.wav", prefix="kg_rec_")
+            os.close(fd)
+            with wave.open(chunk_path, "wb") as cf:
+                cf.setnchannels(n_channels)
+                cf.setsampwidth(sampwidth)
+                cf.setframerate(framerate)
+                cf.writeframes(data)
+            chunks.append(chunk_path)
+            frames_remaining -= read_count
+            idx += 1
+
+    return chunks
+
+
+split_wav_into_chunks = split_audio_into_chunks
+
+
+def test_audio_device(duration_sec: float = 1.5) -> dict:
+    """Quick microphone test: records briefly and checks for signal.
+
+    Returns dict with keys:
+        ok (bool), device (str|None), message (str)
+    """
+    if sd is None:
+        return {
+            "ok": False,
+            "device": None,
+            "message": "Python-Paket 'sounddevice' ist nicht verfügbar.\n"
+                       "Audio-Aufnahme nicht möglich.",
+        }
+
+    try:
+        dev_info = sd.query_devices(kind="input")
+        device_name = dev_info.get("name", "Unbekanntes Gerät")
+    except Exception:
+        return {
+            "ok": False,
+            "device": None,
+            "message": "Kein Eingabegerät (Mikrofon) gefunden.\n"
+                       "Bitte Mikrofon anschliessen und erneut versuchen.",
+        }
+
+    try:
+        audio = sd.rec(
+            int(duration_sec * 16000),
+            samplerate=16000,
+            channels=1,
+            dtype="float32",
+            blocking=True,
+        )
+    except Exception as exc:
+        return {
+            "ok": False,
+            "device": device_name,
+            "message": f"Aufnahmetest fehlgeschlagen:\n{exc}",
+        }
+
+    if audio is None or len(audio) == 0:
+        return {
+            "ok": False,
+            "device": device_name,
+            "message": "Keine Audio-Daten empfangen.\n"
+                       "Bitte Mikrofon-Zugriff in den Windows-Einstellungen prüfen.",
+        }
+
+    peak = float(np.max(np.abs(audio)))
+    rms = float(np.sqrt(np.mean(audio ** 2)))
+
+    if peak < 0.001:
+        return {
+            "ok": False,
+            "device": device_name,
+            "message": f"Gerät: {device_name}\n\n"
+                       f"Kein Signal erkannt (Peak={peak:.4f}).\n"
+                       "Mikrofon ist möglicherweise stummgeschaltet oder defekt.",
+        }
+
+    level_pct = min(100, int(rms * 1000))
+    return {
+        "ok": True,
+        "device": device_name,
+        "message": f"Gerät: {device_name}\n\n"
+                   f"Audio-Signal erkannt.\n"
+                   f"Pegel: {level_pct}%  (Peak={peak:.3f}, RMS={rms:.4f})\n\n"
+                   "Mikrofon funktioniert.",
+    }