# -*- coding: utf-8 -*- """ AudioRecorder – Aufnahme direkt als M4A (AAC via ffmpeg-Pipe). Kein WAV-Zwischenschritt. Fallback auf WAV nur wenn ffmpeg fehlt. """ import os import shutil import subprocess import tempfile import wave from typing import List, Optional import numpy as np try: import sounddevice as sd except Exception: sd = None CHUNK_MAX_SECONDS = 600 _NO_WINDOW = getattr(subprocess, "CREATE_NO_WINDOW", 0) def _find_ffmpeg() -> Optional[str]: path = shutil.which("ffmpeg") if path: return path script_dir = os.path.dirname(os.path.abspath(__file__)) for candidate in ( os.path.join(script_dir, "ffmpeg.exe"), os.path.join(script_dir, "_internal", "ffmpeg.exe"), ): if os.path.isfile(candidate): return candidate return None class AudioRecorder: """Nimmt Audio auf und streamt es direkt in ffmpeg (M4A/AAC). Wenn ffmpeg verfuegbar: Audio wird waehrend der Aufnahme in Echtzeit als M4A kodiert – kein WAV-Zwischenschritt, sofort kleine Datei. Wenn ffmpeg fehlt: Fallback auf WAV (16kHz mono 16-bit PCM). """ def __init__(self, samplerate=16000, channels=1): self.samplerate = samplerate self.channels = channels self._stream = None self._ffmpeg_proc: Optional[subprocess.Popen] = None self._output_path: Optional[str] = None self._recording = False self._wav_fallback = False self._frames: list = [] def start(self): if sd is None: raise RuntimeError( "Python-Paket 'sounddevice' fehlt.\n\n" "Installiere es mit:\n" " py -3.11 -m pip install sounddevice" ) self._recording = True self._wav_fallback = False self._frames = [] self._ffmpeg_proc = None ffmpeg = _find_ffmpeg() if ffmpeg: fd, self._output_path = tempfile.mkstemp(suffix=".m4a", prefix="kg_rec_") os.close(fd) try: self._ffmpeg_proc = subprocess.Popen( [ffmpeg, "-y", "-f", "s16le", "-ar", str(self.samplerate), "-ac", str(self.channels), "-i", "pipe:0", "-c:a", "aac", "-b:a", "64k", "-movflags", "+faststart", self._output_path], stdin=subprocess.PIPE, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, creationflags=_NO_WINDOW, ) except Exception: self._ffmpeg_proc = None self._wav_fallback = True self._output_path = None else: self._wav_fallback = True def callback(indata, frames, time_info, status): if not self._recording: return pcm = (np.clip(indata, -1.0, 1.0) * 32767.0).astype(np.int16) if self._ffmpeg_proc and self._ffmpeg_proc.stdin: try: self._ffmpeg_proc.stdin.write(pcm.tobytes()) except Exception: pass else: self._frames.append(indata.copy()) self._stream = sd.InputStream( samplerate=self.samplerate, channels=self.channels, callback=callback, dtype="float32", blocksize=0, ) self._stream.start() def stop_and_save(self) -> str: """Stoppt Aufnahme, gibt Pfad zur fertigen Audiodatei zurueck.""" if not self._stream: raise RuntimeError("Recorder wurde nicht gestartet.") self._recording = False self._stream.stop() self._stream.close() self._stream = None if self._ffmpeg_proc and self._ffmpeg_proc.stdin: try: self._ffmpeg_proc.stdin.close() except Exception: pass try: self._ffmpeg_proc.wait(timeout=30) except Exception: try: self._ffmpeg_proc.kill() except Exception: pass if (self._output_path and os.path.isfile(self._output_path) and os.path.getsize(self._output_path) > 0): self._ffmpeg_proc = None return self._output_path self._ffmpeg_proc = None self._wav_fallback = True if self._wav_fallback or not self._output_path: return self._save_wav_fallback() return self._output_path def stop_and_save_wav(self) -> str: """Legacy-Alias.""" return self.stop_and_save() def _save_wav_fallback(self) -> str: if not self._frames: raise RuntimeError("Keine Audio-Daten aufgenommen (leer).") audio = np.concatenate(self._frames, axis=0) audio = np.clip(audio, -1.0, 1.0) pcm16 = (audio * 32767.0).astype(np.int16) fd, path = tempfile.mkstemp(suffix=".wav", prefix="kg_rec_") os.close(fd) with wave.open(path, "wb") as wf: wf.setnchannels(self.channels) wf.setsampwidth(2) wf.setframerate(self.samplerate) wf.writeframes(pcm16.tobytes()) return path # ── Chunking ────────────────────────────────────────────────────────── def split_audio_into_chunks(audio_path: str, max_seconds: int = CHUNK_MAX_SECONDS) -> List[str]: ext = os.path.splitext(audio_path)[1].lower() if ext == ".m4a": return _split_m4a(audio_path, max_seconds) return _split_wav(audio_path, max_seconds) def _split_m4a(m4a_path: str, max_seconds: int) -> List[str]: ffmpeg = _find_ffmpeg() if not ffmpeg: return [m4a_path] try: probe = subprocess.run( [ffmpeg, "-i", m4a_path, "-f", "null", "-"], capture_output=True, timeout=30, creationflags=_NO_WINDOW, ) duration_s = None for line in (probe.stderr or b"").decode("utf-8", errors="replace").splitlines(): if "Duration:" in line: parts = line.split("Duration:")[1].split(",")[0].strip() h, m, s = parts.split(":") duration_s = int(h) * 3600 + int(m) * 60 + float(s) break if duration_s is None or duration_s <= max_seconds: return [m4a_path] except Exception: return [m4a_path] chunks: List[str] = [] offset = 0.0 idx = 0 while offset < duration_s: fd, chunk_path = tempfile.mkstemp(suffix=f"_chunk{idx}.m4a", prefix="kg_rec_") os.close(fd) result = subprocess.run( [ffmpeg, "-y", "-ss", str(offset), "-i", m4a_path, "-t", str(max_seconds), "-c", "copy", chunk_path], capture_output=True, timeout=120, creationflags=_NO_WINDOW, ) if result.returncode == 0 and os.path.isfile(chunk_path) and os.path.getsize(chunk_path) > 0: chunks.append(chunk_path) else: try: os.remove(chunk_path) except Exception: pass break offset += max_seconds idx += 1 return chunks if chunks else [m4a_path] def _split_wav(wav_path: str, max_seconds: int) -> List[str]: with wave.open(wav_path, "rb") as wf: n_channels = wf.getnchannels() sampwidth = wf.getsampwidth() framerate = wf.getframerate() n_frames = wf.getnframes() duration_s = n_frames / framerate if duration_s <= max_seconds: return [wav_path] chunk_frames = int(max_seconds * framerate) chunks: List[str] = [] with wave.open(wav_path, "rb") as wf: frames_remaining = n_frames idx = 0 while frames_remaining > 0: read_count = min(chunk_frames, frames_remaining) data = wf.readframes(read_count) fd, chunk_path = tempfile.mkstemp(suffix=f"_chunk{idx}.wav", prefix="kg_rec_") os.close(fd) with wave.open(chunk_path, "wb") as cf: cf.setnchannels(n_channels) cf.setsampwidth(sampwidth) cf.setframerate(framerate) cf.writeframes(data) chunks.append(chunk_path) frames_remaining -= read_count idx += 1 return chunks split_wav_into_chunks = split_audio_into_chunks def test_audio_device(duration_sec: float = 1.5) -> dict: """Quick microphone test: records briefly and checks for signal. Returns dict with keys: ok (bool), device (str|None), message (str) """ if sd is None: return { "ok": False, "device": None, "message": "Python-Paket 'sounddevice' ist nicht verfügbar.\n" "Audio-Aufnahme nicht möglich.", } try: dev_info = sd.query_devices(kind="input") device_name = dev_info.get("name", "Unbekanntes Gerät") except Exception: return { "ok": False, "device": None, "message": "Kein Eingabegerät (Mikrofon) gefunden.\n" "Bitte Mikrofon anschliessen und erneut versuchen.", } try: audio = sd.rec( int(duration_sec * 16000), samplerate=16000, channels=1, dtype="float32", blocking=True, ) except Exception as exc: return { "ok": False, "device": device_name, "message": f"Aufnahmetest fehlgeschlagen:\n{exc}", } if audio is None or len(audio) == 0: return { "ok": False, "device": device_name, "message": "Keine Audio-Daten empfangen.\n" "Bitte Mikrofon-Zugriff in den Windows-Einstellungen prüfen.", } peak = float(np.max(np.abs(audio))) rms = float(np.sqrt(np.mean(audio ** 2))) if peak < 0.001: return { "ok": False, "device": device_name, "message": f"Gerät: {device_name}\n\n" f"Kein Signal erkannt (Peak={peak:.4f}).\n" "Mikrofon ist möglicherweise stummgeschaltet oder defekt.", } level_pct = min(100, int(rms * 1000)) return { "ok": True, "device": device_name, "message": f"Gerät: {device_name}\n\n" f"Audio-Signal erkannt.\n" f"Pegel: {level_pct}% (Peak={peak:.3f}, RMS={rms:.4f})\n\n" "Mikrofon funktioniert.", }