aza/AzA march 2026 - Kopie (5)/aza_audio.py

# -*- coding: utf-8 -*-
"""
AudioRecorder – Aufnahme direkt als M4A (AAC via ffmpeg-Pipe).
Kein WAV-Zwischenschritt. Fallback auf WAV nur wenn ffmpeg fehlt.
"""

import os
import shutil
import subprocess
import tempfile
import wave
from typing import List, Optional

import numpy as np

try:
    import sounddevice as sd
except Exception:
    sd = None

CHUNK_MAX_SECONDS = 600
_NO_WINDOW = getattr(subprocess, "CREATE_NO_WINDOW", 0)


def _find_ffmpeg() -> Optional[str]:
    path = shutil.which("ffmpeg")
    if path:
        return path
    script_dir = os.path.dirname(os.path.abspath(__file__))
    for candidate in (
        os.path.join(script_dir, "ffmpeg.exe"),
        os.path.join(script_dir, "_internal", "ffmpeg.exe"),
    ):
        if os.path.isfile(candidate):
            return candidate
    return None


class AudioRecorder:
    """Nimmt Audio auf und streamt es direkt in ffmpeg (M4A/AAC).

    Wenn ffmpeg verfuegbar: Audio wird waehrend der Aufnahme in Echtzeit
    als M4A kodiert – kein WAV-Zwischenschritt, sofort kleine Datei.
    Wenn ffmpeg fehlt: Fallback auf WAV (16kHz mono 16-bit PCM).
    """

    def __init__(self, samplerate=16000, channels=1):
        self.samplerate = samplerate
        self.channels = channels
        self._stream = None
        self._ffmpeg_proc: Optional[subprocess.Popen] = None
        self._output_path: Optional[str] = None
        self._recording = False
        self._wav_fallback = False
        self._frames: list = []

    def start(self):
        if sd is None:
            raise RuntimeError(
                "Python-Paket 'sounddevice' fehlt.\n\n"
                "Installiere es mit:\n"
                "  py -3.11 -m pip install sounddevice"
            )

        self._recording = True
        self._wav_fallback = False
        self._frames = []
        self._ffmpeg_proc = None

        ffmpeg = _find_ffmpeg()
        if ffmpeg:
            fd, self._output_path = tempfile.mkstemp(suffix=".m4a", prefix="kg_rec_")
            os.close(fd)
            try:
                self._ffmpeg_proc = subprocess.Popen(
                    [ffmpeg, "-y",
                     "-f", "s16le", "-ar", str(self.samplerate),
                     "-ac", str(self.channels), "-i", "pipe:0",
                     "-c:a", "aac", "-b:a", "64k",
                     "-movflags", "+faststart",
                     self._output_path],
                    stdin=subprocess.PIPE,
                    stdout=subprocess.DEVNULL,
                    stderr=subprocess.DEVNULL,
                    creationflags=_NO_WINDOW,
                )
            except Exception:
                self._ffmpeg_proc = None
                self._wav_fallback = True
                self._output_path = None
        else:
            self._wav_fallback = True

        def callback(indata, frames, time_info, status):
            if not self._recording:
                return
            pcm = (np.clip(indata, -1.0, 1.0) * 32767.0).astype(np.int16)
            if self._ffmpeg_proc and self._ffmpeg_proc.stdin:
                try:
                    self._ffmpeg_proc.stdin.write(pcm.tobytes())
                except Exception:
                    pass
            else:
                self._frames.append(indata.copy())

        self._stream = sd.InputStream(
            samplerate=self.samplerate,
            channels=self.channels,
            callback=callback,
            dtype="float32",
            blocksize=0,
        )
        self._stream.start()

    def stop_and_save(self) -> str:
        """Stoppt Aufnahme, gibt Pfad zur fertigen Audiodatei zurueck."""
        if not self._stream:
            raise RuntimeError("Recorder wurde nicht gestartet.")

        self._recording = False
        self._stream.stop()
        self._stream.close()
        self._stream = None

        if self._ffmpeg_proc and self._ffmpeg_proc.stdin:
            try:
                self._ffmpeg_proc.stdin.close()
            except Exception:
                pass
            try:
                self._ffmpeg_proc.wait(timeout=30)
            except Exception:
                try:
                    self._ffmpeg_proc.kill()
                except Exception:
                    pass

            if (self._output_path
                    and os.path.isfile(self._output_path)
                    and os.path.getsize(self._output_path) > 0):
                self._ffmpeg_proc = None
                return self._output_path

            self._ffmpeg_proc = None
            self._wav_fallback = True

        if self._wav_fallback or not self._output_path:
            return self._save_wav_fallback()

        return self._output_path

    def stop_and_save_wav(self) -> str:
        """Legacy-Alias."""
        return self.stop_and_save()

    def _save_wav_fallback(self) -> str:
        if not self._frames:
            raise RuntimeError("Keine Audio-Daten aufgenommen (leer).")

        audio = np.concatenate(self._frames, axis=0)
        audio = np.clip(audio, -1.0, 1.0)
        pcm16 = (audio * 32767.0).astype(np.int16)

        fd, path = tempfile.mkstemp(suffix=".wav", prefix="kg_rec_")
        os.close(fd)
        with wave.open(path, "wb") as wf:
            wf.setnchannels(self.channels)
            wf.setsampwidth(2)
            wf.setframerate(self.samplerate)
            wf.writeframes(pcm16.tobytes())
        return path


# ── Chunking ──────────────────────────────────────────────────────────

def split_audio_into_chunks(audio_path: str, max_seconds: int = CHUNK_MAX_SECONDS) -> List[str]:
    ext = os.path.splitext(audio_path)[1].lower()
    if ext == ".m4a":
        return _split_m4a(audio_path, max_seconds)
    return _split_wav(audio_path, max_seconds)


def _split_m4a(m4a_path: str, max_seconds: int) -> List[str]:
    ffmpeg = _find_ffmpeg()
    if not ffmpeg:
        return [m4a_path]

    try:
        probe = subprocess.run(
            [ffmpeg, "-i", m4a_path, "-f", "null", "-"],
            capture_output=True, timeout=30, creationflags=_NO_WINDOW,
        )
        duration_s = None
        for line in (probe.stderr or b"").decode("utf-8", errors="replace").splitlines():
            if "Duration:" in line:
                parts = line.split("Duration:")[1].split(",")[0].strip()
                h, m, s = parts.split(":")
                duration_s = int(h) * 3600 + int(m) * 60 + float(s)
                break
        if duration_s is None or duration_s <= max_seconds:
            return [m4a_path]
    except Exception:
        return [m4a_path]

    chunks: List[str] = []
    offset = 0.0
    idx = 0
    while offset < duration_s:
        fd, chunk_path = tempfile.mkstemp(suffix=f"_chunk{idx}.m4a", prefix="kg_rec_")
        os.close(fd)
        result = subprocess.run(
            [ffmpeg, "-y", "-ss", str(offset), "-i", m4a_path,
             "-t", str(max_seconds), "-c", "copy", chunk_path],
            capture_output=True, timeout=120, creationflags=_NO_WINDOW,
        )
        if result.returncode == 0 and os.path.isfile(chunk_path) and os.path.getsize(chunk_path) > 0:
            chunks.append(chunk_path)
        else:
            try:
                os.remove(chunk_path)
            except Exception:
                pass
            break
        offset += max_seconds
        idx += 1

    return chunks if chunks else [m4a_path]


def _split_wav(wav_path: str, max_seconds: int) -> List[str]:
    with wave.open(wav_path, "rb") as wf:
        n_channels = wf.getnchannels()
        sampwidth = wf.getsampwidth()
        framerate = wf.getframerate()
        n_frames = wf.getnframes()

    duration_s = n_frames / framerate
    if duration_s <= max_seconds:
        return [wav_path]

    chunk_frames = int(max_seconds * framerate)
    chunks: List[str] = []

    with wave.open(wav_path, "rb") as wf:
        frames_remaining = n_frames
        idx = 0
        while frames_remaining > 0:
            read_count = min(chunk_frames, frames_remaining)
            data = wf.readframes(read_count)
            fd, chunk_path = tempfile.mkstemp(suffix=f"_chunk{idx}.wav", prefix="kg_rec_")
            os.close(fd)
            with wave.open(chunk_path, "wb") as cf:
                cf.setnchannels(n_channels)
                cf.setsampwidth(sampwidth)
                cf.setframerate(framerate)
                cf.writeframes(data)
            chunks.append(chunk_path)
            frames_remaining -= read_count
            idx += 1

    return chunks


split_wav_into_chunks = split_audio_into_chunks


def test_audio_device(duration_sec: float = 1.5) -> dict:
    """Quick microphone test: records briefly and checks for signal.

    Returns dict with keys:
        ok (bool), device (str|None), message (str)
    """
    if sd is None:
        return {
            "ok": False,
            "device": None,
            "message": "Python-Paket 'sounddevice' ist nicht verfügbar.\n"
                       "Audio-Aufnahme nicht möglich.",
        }

    try:
        dev_info = sd.query_devices(kind="input")
        device_name = dev_info.get("name", "Unbekanntes Gerät")
    except Exception:
        return {
            "ok": False,
            "device": None,
            "message": "Kein Eingabegerät (Mikrofon) gefunden.\n"
                       "Bitte Mikrofon anschliessen und erneut versuchen.",
        }

    try:
        audio = sd.rec(
            int(duration_sec * 16000),
            samplerate=16000,
            channels=1,
            dtype="float32",
            blocking=True,
        )
    except Exception as exc:
        return {
            "ok": False,
            "device": device_name,
            "message": f"Aufnahmetest fehlgeschlagen:\n{exc}",
        }

    if audio is None or len(audio) == 0:
        return {
            "ok": False,
            "device": device_name,
            "message": "Keine Audio-Daten empfangen.\n"
                       "Bitte Mikrofon-Zugriff in den Windows-Einstellungen prüfen.",
        }

    peak = float(np.max(np.abs(audio)))
    rms = float(np.sqrt(np.mean(audio ** 2)))

    if peak < 0.001:
        return {
            "ok": False,
            "device": device_name,
            "message": f"Gerät: {device_name}\n\n"
                       f"Kein Signal erkannt (Peak={peak:.4f}).\n"
                       "Mikrofon ist möglicherweise stummgeschaltet oder defekt.",
        }

    level_pct = min(100, int(rms * 1000))
    return {
        "ok": True,
        "device": device_name,
        "message": f"Gerät: {device_name}\n\n"
                   f"Audio-Signal erkannt.\n"
                   f"Pegel: {level_pct}%  (Peak={peak:.3f}, RMS={rms:.4f})\n\n"
                   "Mikrofon funktioniert.",
    }