Files
aza/AzA march 2026 - Kopie (5)/aza_audio.py
2026-03-30 07:59:11 +02:00

336 lines
11 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# -*- coding: utf-8 -*-
"""
AudioRecorder Aufnahme direkt als M4A (AAC via ffmpeg-Pipe).
Kein WAV-Zwischenschritt. Fallback auf WAV nur wenn ffmpeg fehlt.
"""
import os
import shutil
import subprocess
import tempfile
import wave
from typing import List, Optional
import numpy as np
try:
import sounddevice as sd
except Exception:
sd = None
CHUNK_MAX_SECONDS = 600
_NO_WINDOW = getattr(subprocess, "CREATE_NO_WINDOW", 0)
def _find_ffmpeg() -> Optional[str]:
path = shutil.which("ffmpeg")
if path:
return path
script_dir = os.path.dirname(os.path.abspath(__file__))
for candidate in (
os.path.join(script_dir, "ffmpeg.exe"),
os.path.join(script_dir, "_internal", "ffmpeg.exe"),
):
if os.path.isfile(candidate):
return candidate
return None
class AudioRecorder:
"""Nimmt Audio auf und streamt es direkt in ffmpeg (M4A/AAC).
Wenn ffmpeg verfuegbar: Audio wird waehrend der Aufnahme in Echtzeit
als M4A kodiert kein WAV-Zwischenschritt, sofort kleine Datei.
Wenn ffmpeg fehlt: Fallback auf WAV (16kHz mono 16-bit PCM).
"""
def __init__(self, samplerate=16000, channels=1):
self.samplerate = samplerate
self.channels = channels
self._stream = None
self._ffmpeg_proc: Optional[subprocess.Popen] = None
self._output_path: Optional[str] = None
self._recording = False
self._wav_fallback = False
self._frames: list = []
def start(self):
if sd is None:
raise RuntimeError(
"Python-Paket 'sounddevice' fehlt.\n\n"
"Installiere es mit:\n"
" py -3.11 -m pip install sounddevice"
)
self._recording = True
self._wav_fallback = False
self._frames = []
self._ffmpeg_proc = None
ffmpeg = _find_ffmpeg()
if ffmpeg:
fd, self._output_path = tempfile.mkstemp(suffix=".m4a", prefix="kg_rec_")
os.close(fd)
try:
self._ffmpeg_proc = subprocess.Popen(
[ffmpeg, "-y",
"-f", "s16le", "-ar", str(self.samplerate),
"-ac", str(self.channels), "-i", "pipe:0",
"-c:a", "aac", "-b:a", "64k",
"-movflags", "+faststart",
self._output_path],
stdin=subprocess.PIPE,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
creationflags=_NO_WINDOW,
)
except Exception:
self._ffmpeg_proc = None
self._wav_fallback = True
self._output_path = None
else:
self._wav_fallback = True
def callback(indata, frames, time_info, status):
if not self._recording:
return
pcm = (np.clip(indata, -1.0, 1.0) * 32767.0).astype(np.int16)
if self._ffmpeg_proc and self._ffmpeg_proc.stdin:
try:
self._ffmpeg_proc.stdin.write(pcm.tobytes())
except Exception:
pass
else:
self._frames.append(indata.copy())
self._stream = sd.InputStream(
samplerate=self.samplerate,
channels=self.channels,
callback=callback,
dtype="float32",
blocksize=0,
)
self._stream.start()
def stop_and_save(self) -> str:
"""Stoppt Aufnahme, gibt Pfad zur fertigen Audiodatei zurueck."""
if not self._stream:
raise RuntimeError("Recorder wurde nicht gestartet.")
self._recording = False
self._stream.stop()
self._stream.close()
self._stream = None
if self._ffmpeg_proc and self._ffmpeg_proc.stdin:
try:
self._ffmpeg_proc.stdin.close()
except Exception:
pass
try:
self._ffmpeg_proc.wait(timeout=30)
except Exception:
try:
self._ffmpeg_proc.kill()
except Exception:
pass
if (self._output_path
and os.path.isfile(self._output_path)
and os.path.getsize(self._output_path) > 0):
self._ffmpeg_proc = None
return self._output_path
self._ffmpeg_proc = None
self._wav_fallback = True
if self._wav_fallback or not self._output_path:
return self._save_wav_fallback()
return self._output_path
def stop_and_save_wav(self) -> str:
"""Legacy-Alias."""
return self.stop_and_save()
def _save_wav_fallback(self) -> str:
if not self._frames:
raise RuntimeError("Keine Audio-Daten aufgenommen (leer).")
audio = np.concatenate(self._frames, axis=0)
audio = np.clip(audio, -1.0, 1.0)
pcm16 = (audio * 32767.0).astype(np.int16)
fd, path = tempfile.mkstemp(suffix=".wav", prefix="kg_rec_")
os.close(fd)
with wave.open(path, "wb") as wf:
wf.setnchannels(self.channels)
wf.setsampwidth(2)
wf.setframerate(self.samplerate)
wf.writeframes(pcm16.tobytes())
return path
# ── Chunking ──────────────────────────────────────────────────────────
def split_audio_into_chunks(audio_path: str, max_seconds: int = CHUNK_MAX_SECONDS) -> List[str]:
ext = os.path.splitext(audio_path)[1].lower()
if ext == ".m4a":
return _split_m4a(audio_path, max_seconds)
return _split_wav(audio_path, max_seconds)
def _split_m4a(m4a_path: str, max_seconds: int) -> List[str]:
ffmpeg = _find_ffmpeg()
if not ffmpeg:
return [m4a_path]
try:
probe = subprocess.run(
[ffmpeg, "-i", m4a_path, "-f", "null", "-"],
capture_output=True, timeout=30, creationflags=_NO_WINDOW,
)
duration_s = None
for line in (probe.stderr or b"").decode("utf-8", errors="replace").splitlines():
if "Duration:" in line:
parts = line.split("Duration:")[1].split(",")[0].strip()
h, m, s = parts.split(":")
duration_s = int(h) * 3600 + int(m) * 60 + float(s)
break
if duration_s is None or duration_s <= max_seconds:
return [m4a_path]
except Exception:
return [m4a_path]
chunks: List[str] = []
offset = 0.0
idx = 0
while offset < duration_s:
fd, chunk_path = tempfile.mkstemp(suffix=f"_chunk{idx}.m4a", prefix="kg_rec_")
os.close(fd)
result = subprocess.run(
[ffmpeg, "-y", "-ss", str(offset), "-i", m4a_path,
"-t", str(max_seconds), "-c", "copy", chunk_path],
capture_output=True, timeout=120, creationflags=_NO_WINDOW,
)
if result.returncode == 0 and os.path.isfile(chunk_path) and os.path.getsize(chunk_path) > 0:
chunks.append(chunk_path)
else:
try:
os.remove(chunk_path)
except Exception:
pass
break
offset += max_seconds
idx += 1
return chunks if chunks else [m4a_path]
def _split_wav(wav_path: str, max_seconds: int) -> List[str]:
with wave.open(wav_path, "rb") as wf:
n_channels = wf.getnchannels()
sampwidth = wf.getsampwidth()
framerate = wf.getframerate()
n_frames = wf.getnframes()
duration_s = n_frames / framerate
if duration_s <= max_seconds:
return [wav_path]
chunk_frames = int(max_seconds * framerate)
chunks: List[str] = []
with wave.open(wav_path, "rb") as wf:
frames_remaining = n_frames
idx = 0
while frames_remaining > 0:
read_count = min(chunk_frames, frames_remaining)
data = wf.readframes(read_count)
fd, chunk_path = tempfile.mkstemp(suffix=f"_chunk{idx}.wav", prefix="kg_rec_")
os.close(fd)
with wave.open(chunk_path, "wb") as cf:
cf.setnchannels(n_channels)
cf.setsampwidth(sampwidth)
cf.setframerate(framerate)
cf.writeframes(data)
chunks.append(chunk_path)
frames_remaining -= read_count
idx += 1
return chunks
split_wav_into_chunks = split_audio_into_chunks
def test_audio_device(duration_sec: float = 1.5) -> dict:
"""Quick microphone test: records briefly and checks for signal.
Returns dict with keys:
ok (bool), device (str|None), message (str)
"""
if sd is None:
return {
"ok": False,
"device": None,
"message": "Python-Paket 'sounddevice' ist nicht verfügbar.\n"
"Audio-Aufnahme nicht möglich.",
}
try:
dev_info = sd.query_devices(kind="input")
device_name = dev_info.get("name", "Unbekanntes Gerät")
except Exception:
return {
"ok": False,
"device": None,
"message": "Kein Eingabegerät (Mikrofon) gefunden.\n"
"Bitte Mikrofon anschliessen und erneut versuchen.",
}
try:
audio = sd.rec(
int(duration_sec * 16000),
samplerate=16000,
channels=1,
dtype="float32",
blocking=True,
)
except Exception as exc:
return {
"ok": False,
"device": device_name,
"message": f"Aufnahmetest fehlgeschlagen:\n{exc}",
}
if audio is None or len(audio) == 0:
return {
"ok": False,
"device": device_name,
"message": "Keine Audio-Daten empfangen.\n"
"Bitte Mikrofon-Zugriff in den Windows-Einstellungen prüfen.",
}
peak = float(np.max(np.abs(audio)))
rms = float(np.sqrt(np.mean(audio ** 2)))
if peak < 0.001:
return {
"ok": False,
"device": device_name,
"message": f"Gerät: {device_name}\n\n"
f"Kein Signal erkannt (Peak={peak:.4f}).\n"
"Mikrofon ist möglicherweise stummgeschaltet oder defekt.",
}
level_pct = min(100, int(rms * 1000))
return {
"ok": True,
"device": device_name,
"message": f"Gerät: {device_name}\n\n"
f"Audio-Signal erkannt.\n"
f"Pegel: {level_pct}% (Peak={peak:.3f}, RMS={rms:.4f})\n\n"
"Mikrofon funktioniert.",
}