Files
aza/AzA march 2026 - Kopie (2)/aza_audio.py

336 lines
11 KiB
Python
Raw Normal View History

2026-03-25 13:42:48 +01:00
# -*- coding: utf-8 -*-
"""
AudioRecorder Aufnahme direkt als M4A (AAC via ffmpeg-Pipe).
Kein WAV-Zwischenschritt. Fallback auf WAV nur wenn ffmpeg fehlt.
"""
import os
import shutil
import subprocess
import tempfile
import wave
from typing import List, Optional
import numpy as np
try:
import sounddevice as sd
except Exception:
sd = None
CHUNK_MAX_SECONDS = 600
_NO_WINDOW = getattr(subprocess, "CREATE_NO_WINDOW", 0)
def _find_ffmpeg() -> Optional[str]:
path = shutil.which("ffmpeg")
if path:
return path
script_dir = os.path.dirname(os.path.abspath(__file__))
for candidate in (
os.path.join(script_dir, "ffmpeg.exe"),
os.path.join(script_dir, "_internal", "ffmpeg.exe"),
):
if os.path.isfile(candidate):
return candidate
return None
class AudioRecorder:
"""Nimmt Audio auf und streamt es direkt in ffmpeg (M4A/AAC).
Wenn ffmpeg verfuegbar: Audio wird waehrend der Aufnahme in Echtzeit
als M4A kodiert kein WAV-Zwischenschritt, sofort kleine Datei.
Wenn ffmpeg fehlt: Fallback auf WAV (16kHz mono 16-bit PCM).
"""
def __init__(self, samplerate=16000, channels=1):
self.samplerate = samplerate
self.channels = channels
self._stream = None
self._ffmpeg_proc: Optional[subprocess.Popen] = None
self._output_path: Optional[str] = None
self._recording = False
self._wav_fallback = False
self._frames: list = []
def start(self):
if sd is None:
raise RuntimeError(
"Python-Paket 'sounddevice' fehlt.\n\n"
"Installiere es mit:\n"
" py -3.11 -m pip install sounddevice"
)
self._recording = True
self._wav_fallback = False
self._frames = []
self._ffmpeg_proc = None
ffmpeg = _find_ffmpeg()
if ffmpeg:
fd, self._output_path = tempfile.mkstemp(suffix=".m4a", prefix="kg_rec_")
os.close(fd)
try:
self._ffmpeg_proc = subprocess.Popen(
[ffmpeg, "-y",
"-f", "s16le", "-ar", str(self.samplerate),
"-ac", str(self.channels), "-i", "pipe:0",
"-c:a", "aac", "-b:a", "64k",
"-movflags", "+faststart",
self._output_path],
stdin=subprocess.PIPE,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
creationflags=_NO_WINDOW,
)
except Exception:
self._ffmpeg_proc = None
self._wav_fallback = True
self._output_path = None
else:
self._wav_fallback = True
def callback(indata, frames, time_info, status):
if not self._recording:
return
pcm = (np.clip(indata, -1.0, 1.0) * 32767.0).astype(np.int16)
if self._ffmpeg_proc and self._ffmpeg_proc.stdin:
try:
self._ffmpeg_proc.stdin.write(pcm.tobytes())
except Exception:
pass
else:
self._frames.append(indata.copy())
self._stream = sd.InputStream(
samplerate=self.samplerate,
channels=self.channels,
callback=callback,
dtype="float32",
blocksize=0,
)
self._stream.start()
def stop_and_save(self) -> str:
"""Stoppt Aufnahme, gibt Pfad zur fertigen Audiodatei zurueck."""
if not self._stream:
raise RuntimeError("Recorder wurde nicht gestartet.")
self._recording = False
self._stream.stop()
self._stream.close()
self._stream = None
if self._ffmpeg_proc and self._ffmpeg_proc.stdin:
try:
self._ffmpeg_proc.stdin.close()
except Exception:
pass
try:
self._ffmpeg_proc.wait(timeout=30)
except Exception:
try:
self._ffmpeg_proc.kill()
except Exception:
pass
if (self._output_path
and os.path.isfile(self._output_path)
and os.path.getsize(self._output_path) > 0):
self._ffmpeg_proc = None
return self._output_path
self._ffmpeg_proc = None
self._wav_fallback = True
if self._wav_fallback or not self._output_path:
return self._save_wav_fallback()
return self._output_path
def stop_and_save_wav(self) -> str:
"""Legacy-Alias."""
return self.stop_and_save()
def _save_wav_fallback(self) -> str:
if not self._frames:
raise RuntimeError("Keine Audio-Daten aufgenommen (leer).")
audio = np.concatenate(self._frames, axis=0)
audio = np.clip(audio, -1.0, 1.0)
pcm16 = (audio * 32767.0).astype(np.int16)
fd, path = tempfile.mkstemp(suffix=".wav", prefix="kg_rec_")
os.close(fd)
with wave.open(path, "wb") as wf:
wf.setnchannels(self.channels)
wf.setsampwidth(2)
wf.setframerate(self.samplerate)
wf.writeframes(pcm16.tobytes())
return path
# ── Chunking ──────────────────────────────────────────────────────────
def split_audio_into_chunks(audio_path: str, max_seconds: int = CHUNK_MAX_SECONDS) -> List[str]:
ext = os.path.splitext(audio_path)[1].lower()
if ext == ".m4a":
return _split_m4a(audio_path, max_seconds)
return _split_wav(audio_path, max_seconds)
def _split_m4a(m4a_path: str, max_seconds: int) -> List[str]:
ffmpeg = _find_ffmpeg()
if not ffmpeg:
return [m4a_path]
try:
probe = subprocess.run(
[ffmpeg, "-i", m4a_path, "-f", "null", "-"],
capture_output=True, timeout=30, creationflags=_NO_WINDOW,
)
duration_s = None
for line in (probe.stderr or b"").decode("utf-8", errors="replace").splitlines():
if "Duration:" in line:
parts = line.split("Duration:")[1].split(",")[0].strip()
h, m, s = parts.split(":")
duration_s = int(h) * 3600 + int(m) * 60 + float(s)
break
if duration_s is None or duration_s <= max_seconds:
return [m4a_path]
except Exception:
return [m4a_path]
chunks: List[str] = []
offset = 0.0
idx = 0
while offset < duration_s:
fd, chunk_path = tempfile.mkstemp(suffix=f"_chunk{idx}.m4a", prefix="kg_rec_")
os.close(fd)
result = subprocess.run(
[ffmpeg, "-y", "-ss", str(offset), "-i", m4a_path,
"-t", str(max_seconds), "-c", "copy", chunk_path],
capture_output=True, timeout=120, creationflags=_NO_WINDOW,
)
if result.returncode == 0 and os.path.isfile(chunk_path) and os.path.getsize(chunk_path) > 0:
chunks.append(chunk_path)
else:
try:
os.remove(chunk_path)
except Exception:
pass
break
offset += max_seconds
idx += 1
return chunks if chunks else [m4a_path]
def _split_wav(wav_path: str, max_seconds: int) -> List[str]:
with wave.open(wav_path, "rb") as wf:
n_channels = wf.getnchannels()
sampwidth = wf.getsampwidth()
framerate = wf.getframerate()
n_frames = wf.getnframes()
duration_s = n_frames / framerate
if duration_s <= max_seconds:
return [wav_path]
chunk_frames = int(max_seconds * framerate)
chunks: List[str] = []
with wave.open(wav_path, "rb") as wf:
frames_remaining = n_frames
idx = 0
while frames_remaining > 0:
read_count = min(chunk_frames, frames_remaining)
data = wf.readframes(read_count)
fd, chunk_path = tempfile.mkstemp(suffix=f"_chunk{idx}.wav", prefix="kg_rec_")
os.close(fd)
with wave.open(chunk_path, "wb") as cf:
cf.setnchannels(n_channels)
cf.setsampwidth(sampwidth)
cf.setframerate(framerate)
cf.writeframes(data)
chunks.append(chunk_path)
frames_remaining -= read_count
idx += 1
return chunks
split_wav_into_chunks = split_audio_into_chunks
def test_audio_device(duration_sec: float = 1.5) -> dict:
"""Quick microphone test: records briefly and checks for signal.
Returns dict with keys:
ok (bool), device (str|None), message (str)
"""
if sd is None:
return {
"ok": False,
"device": None,
"message": "Python-Paket 'sounddevice' ist nicht verfügbar.\n"
"Audio-Aufnahme nicht möglich.",
}
try:
dev_info = sd.query_devices(kind="input")
device_name = dev_info.get("name", "Unbekanntes Gerät")
except Exception:
return {
"ok": False,
"device": None,
"message": "Kein Eingabegerät (Mikrofon) gefunden.\n"
"Bitte Mikrofon anschliessen und erneut versuchen.",
}
try:
audio = sd.rec(
int(duration_sec * 16000),
samplerate=16000,
channels=1,
dtype="float32",
blocking=True,
)
except Exception as exc:
return {
"ok": False,
"device": device_name,
"message": f"Aufnahmetest fehlgeschlagen:\n{exc}",
}
if audio is None or len(audio) == 0:
return {
"ok": False,
"device": device_name,
"message": "Keine Audio-Daten empfangen.\n"
"Bitte Mikrofon-Zugriff in den Windows-Einstellungen prüfen.",
}
peak = float(np.max(np.abs(audio)))
rms = float(np.sqrt(np.mean(audio ** 2)))
if peak < 0.001:
return {
"ok": False,
"device": device_name,
"message": f"Gerät: {device_name}\n\n"
f"Kein Signal erkannt (Peak={peak:.4f}).\n"
"Mikrofon ist möglicherweise stummgeschaltet oder defekt.",
}
level_pct = min(100, int(rms * 1000))
return {
"ok": True,
"device": device_name,
"message": f"Gerät: {device_name}\n\n"
f"Audio-Signal erkannt.\n"
f"Pegel: {level_pct}% (Peak={peak:.3f}, RMS={rms:.4f})\n\n"
"Mikrofon funktioniert.",
}