1265 lines
49 KiB
Python
1265 lines
49 KiB
Python
# -*- coding: utf-8 -*-
|
||
"""
|
||
AZA MedWork – Audio-Notiz (Standalone)
|
||
|
||
Eigenstaendige Audio-Notiz (Transkription).
|
||
Gleicher Look wie im Hauptfenster.
|
||
|
||
Start:
|
||
python diktat_app.py
|
||
ODER Doppelklick auf "Audio-Notiz starten.bat"
|
||
"""
|
||
|
||
import os
|
||
import re
|
||
import sys
|
||
import tempfile
|
||
import threading
|
||
import time
|
||
import wave
|
||
import json
|
||
import tkinter as tk
|
||
from tkinter import ttk, messagebox
|
||
from tkinter.scrolledtext import ScrolledText
|
||
from datetime import datetime
|
||
|
||
_APP_DIR = os.path.dirname(os.path.abspath(__file__))
|
||
|
||
|
||
def _find_project_root(start_dir: str) -> str:
|
||
"""Sucht den Projektordner, der aza_audio.py enthaelt."""
|
||
cur = os.path.abspath(start_dir)
|
||
for _ in range(8):
|
||
if os.path.isfile(os.path.join(cur, "aza_audio.py")):
|
||
return cur
|
||
parent = os.path.dirname(cur)
|
||
if parent == cur:
|
||
break
|
||
cur = parent
|
||
return os.path.normpath(os.path.join(start_dir, "..", ".."))
|
||
|
||
|
||
_PROJECT_ROOT = _find_project_root(_APP_DIR)
|
||
if _PROJECT_ROOT not in sys.path:
|
||
sys.path.insert(0, _PROJECT_ROOT)
|
||
|
||
_IMPORT_ERRORS = []
|
||
|
||
try:
|
||
from dotenv import load_dotenv
|
||
except Exception:
|
||
load_dotenv = None
|
||
_IMPORT_ERRORS.append("python-dotenv fehlt")
|
||
|
||
if load_dotenv:
|
||
load_dotenv(os.path.join(_PROJECT_ROOT, ".env"))
|
||
# Fallback: lokale .env im Add-on-Ordner
|
||
load_dotenv(os.path.join(_APP_DIR, ".env"))
|
||
|
||
|
||
def _apply_proxy_env_from_openai_vars():
|
||
"""Mappt OPENAI_*_PROXY auf Standard-Proxy-Variablen."""
|
||
http_proxy = os.getenv("OPENAI_HTTP_PROXY", "").strip()
|
||
https_proxy = os.getenv("OPENAI_HTTPS_PROXY", "").strip()
|
||
if http_proxy:
|
||
os.environ["HTTP_PROXY"] = http_proxy
|
||
os.environ["http_proxy"] = http_proxy
|
||
if https_proxy:
|
||
os.environ["HTTPS_PROXY"] = https_proxy
|
||
os.environ["https_proxy"] = https_proxy
|
||
|
||
|
||
_apply_proxy_env_from_openai_vars()
|
||
|
||
try:
|
||
from openai import OpenAI
|
||
except Exception:
|
||
OpenAI = None
|
||
_IMPORT_ERRORS.append("openai fehlt")
|
||
|
||
try:
|
||
from aza_audio import AudioRecorder
|
||
except Exception:
|
||
AudioRecorder = None
|
||
_IMPORT_ERRORS.append("aza_audio.py nicht gefunden")
|
||
|
||
def _get_data_dir() -> str:
|
||
"""Schreibbares Datenverzeichnis (%APPDATA%\\AZA Desktop)."""
|
||
try:
|
||
from aza_config import get_writable_data_dir
|
||
return get_writable_data_dir()
|
||
except Exception:
|
||
return os.path.join(os.path.expanduser("~"), "AppData", "Roaming", "AZA Desktop")
|
||
|
||
_DATA_DIR = _get_data_dir()
|
||
_SETTINGS_FILE = os.path.join(_DATA_DIR, "audio_notiz_settings.json")
|
||
|
||
TRANSCRIBE_MODEL = os.getenv("TRANSCRIBE_MODEL", "gpt-4o-mini-transcribe")
|
||
|
||
WHISPER_MEDICAL_PROMPT = (
|
||
"Medizinische Dokumentation auf Deutsch. "
|
||
"Capillitium, Fotodynamische Therapie, PDT, Basalzellkarzinom, Plattenepithelkarzinom, "
|
||
"Spinaliom, Spinaliom der Haut, Spinalzellkarzinom, "
|
||
"Melanom, Exzision, Biopsie, Kryotherapie, Kuerettage, Histologie, Dermatoskopie, "
|
||
"Naevus, Naevi, Naevuszellnaevus, dysplastischer Naevus, "
|
||
"Compound-Naevus, junktionaler Naevus, dermaler Naevus, Spitz-Naevus, "
|
||
"Erythem, Papel, Pustel, Makula, Plaque, Nodulus, Nodus, "
|
||
"Vesikel, Bulla, Erosion, Ulkus, Rhagade, Kruste, Squama, "
|
||
"Effloreszenzen, Lichenifikation, Exkoriation, "
|
||
"seborrhoische Keratose, Fibrom, Lipom, Atherom, Epidermoidzyste, "
|
||
"Verruca vulgaris, Verrucae, Kondylome, Molluscum contagiosum, "
|
||
"Haemangiom, Angiom, Keloid, hypertrophe Narbe, "
|
||
"Tinea, Mykose, Onychomykose, Herpes simplex, Herpes zoster, "
|
||
"Erysipel, Impetigo, Abszess, Phlegmone, Skabies, "
|
||
"Pemphigus, Pemphigoid, Lichen ruber, Lichen sclerosus, "
|
||
"Vitiligo, Pruritus, Prurigo, Mykosis fungoides, "
|
||
"Shave-Biopsie, Stanzbiopsie, Inzisionsbiopsie, "
|
||
"Breslow-Dicke, Clark-Level, Sentinel-Lymphknoten, "
|
||
"Auflichtmikroskopie, Phototherapie, UVB, PUVA, "
|
||
"Anamnese, Diagnose, Therapie, Procedere, subjektiv, objektiv, "
|
||
"Abdomen, Thorax, Extremitaeten, zervikal, lumbal, thorakal, sakral, "
|
||
"Sonographie, Roentgen, MRI, CT, EKG, Laborwerte, Blutbild, "
|
||
"Hypertonie, Diabetes mellitus, Hypercholesterinaemie, Hypothyreose, "
|
||
"Antikoagulation, Thrombozytenaggregationshemmer, NSAR, ACE-Hemmer, "
|
||
"Immunsuppression, Kortikosteroide, Biologika, Methotrexat, "
|
||
"Psoriasis, Ekzem, Dermatitis, Urtikaria, Alopezie, Akne, Rosazea, "
|
||
"Aktinische Keratose, Morbus Bowen, Lentigo maligna, "
|
||
"Januar 2026, Februar 2026, Maerz 2026, April 2026, Mai 2026, "
|
||
"Status nach, Z.n., s/p, i.v., p.o., s.c., "
|
||
"ICD-10, SOAP, Krankengeschichte, Kostengutsprache, Arztbrief."
|
||
)
|
||
|
||
WHISPER_PROMPT_PREFIX = "Medizinische Dokumentation auf Deutsch"
|
||
|
||
# ── Farben (identisch mit Hauptfenster) ──
|
||
BG = "#B9ECFA"
|
||
BTN_BG = "#7EC8E3"
|
||
BTN_FG = "#1a4d6d"
|
||
BTN_ACTIVE = "#5AB9E8"
|
||
HDR_FG = "#1a4d6d"
|
||
STATUS_BG = "#FFE4CC"
|
||
STATUS_FG = "#BD4500"
|
||
TXT_BG = "#F5FCFF"
|
||
MINI_FG = "#5A90B0"
|
||
MINI_FG_HOVER = "#1a4d6d"
|
||
REC_RED = "#D04040"
|
||
REC_DOT = "#FF3030"
|
||
|
||
_SAVE_DIR = os.path.join(_DATA_DIR, "kg_diktat_ablage", "Audio_Notiz")
|
||
_LOGO_PATH = os.path.join(_PROJECT_ROOT, "logo.png")
|
||
|
||
_TRAILING_CMD_RE = re.compile(
|
||
r"[.,;:\s]*(?:"
|
||
r"(?:stopp?|stoppen)\s*dikt(?:at|ad)|"
|
||
r"dikt(?:at|ad)\s*(?:stopp?|stoppen)|"
|
||
r"(?:schliessen|schließen|beenden)\s*(?:dikt(?:at|ad)|app)|"
|
||
r"(?:dikt(?:at|ad)|app)\s*(?:schliessen|schließen|beenden)"
|
||
r")[.,;:\s]*$",
|
||
re.IGNORECASE
|
||
)
|
||
_SILENCE_RMS = 0.04
|
||
_SILENCE_SEC = 1.5
|
||
_CMD_COOLDOWN_SEC = 3.0
|
||
_CMD_SPEECH_SEC = 5.0
|
||
_CMD_PROMPT = (
|
||
"Sprachbefehl auf Deutsch. "
|
||
"Stop Diktat, Stopp Diktat, Diktat stoppen, Diktat schliessen, Diktat beenden."
|
||
)
|
||
|
||
|
||
def _friendly_error_message(err) -> str:
|
||
raw = str(err or "").strip()
|
||
text = raw.lower()
|
||
|
||
if ("connection error" in text or "api connection" in text or
|
||
"timed out" in text or "timeout" in text or
|
||
"name or service not known" in text or
|
||
"temporary failure in name resolution" in text):
|
||
return (
|
||
"Verbindungsfehler zu OpenAI.\n\n"
|
||
"Bitte pruefen:\n"
|
||
"- Internetverbindung aktiv\n"
|
||
"- VPN/Proxy/Firewall blockiert nicht\n"
|
||
"- OpenAI-Dienst erreichbar\n\n"
|
||
"Optional (.env):\n"
|
||
"- OPENAI_HTTP_PROXY=http://user:pass@proxy:port\n"
|
||
"- OPENAI_HTTPS_PROXY=http://user:pass@proxy:port\n"
|
||
"- OPENAI_BASE_URL=https://... (nur bei Gateway)\n\n"
|
||
f"Technischer Hinweis: {raw[:140]}"
|
||
)
|
||
|
||
if ("invalid_api_key" in text or "incorrect api key" in text or
|
||
"authentication" in text or "401" in text):
|
||
return (
|
||
"API-Key ist ungueltig oder fehlt.\n\n"
|
||
"Bitte OPENAI_API_KEY in der .env pruefen."
|
||
)
|
||
|
||
if ("rate limit" in text or "429" in text or "quota" in text):
|
||
return (
|
||
"OpenAI-Limit erreicht (Rate Limit / Kontingent).\n\n"
|
||
"Bitte kurz warten oder Abrechnung/Kontingent pruefen."
|
||
)
|
||
|
||
return raw[:200] if raw else "Unbekannter Fehler."
|
||
|
||
|
||
def _is_connection_error_text(msg: str) -> bool:
|
||
t = (msg or "").lower()
|
||
return (
|
||
"connection error" in t or
|
||
"api connection" in t or
|
||
"timed out" in t or
|
||
"timeout" in t or
|
||
"name or service not known" in t or
|
||
"temporary failure in name resolution" in t
|
||
)
|
||
|
||
|
||
def _detect_voice_command(text):
|
||
cleaned = (text or "").strip().lower()
|
||
if not cleaned:
|
||
return None
|
||
cleaned = cleaned.replace("ß", "ss")
|
||
cleaned = re.sub(r"[^a-z0-9äöü\s]", " ", cleaned)
|
||
cleaned = re.sub(r"\s+", " ", cleaned).strip()
|
||
|
||
close_patterns = (
|
||
r"\bdikt(?:at|ad)\s*(?:schliessen|beenden)\b",
|
||
r"\b(?:schliessen|beenden)\s*dikt(?:at|ad)\b",
|
||
r"\bapp\s*(?:schliessen|beenden)\b",
|
||
)
|
||
for pat in close_patterns:
|
||
if re.search(pat, cleaned):
|
||
return "close"
|
||
|
||
stop_patterns = (
|
||
r"\bdikt(?:at|ad)\s*(?:stop|stopp|stoppen)\b",
|
||
r"\b(?:stop|stopp|stoppen)\s*dikt(?:at|ad)\b",
|
||
)
|
||
for pat in stop_patterns:
|
||
if re.search(pat, cleaned):
|
||
return "stop"
|
||
|
||
return None
|
||
|
||
|
||
def _win_clipboard_set(text: str) -> bool:
|
||
if sys.platform != "win32":
|
||
return False
|
||
try:
|
||
import ctypes
|
||
from ctypes import wintypes
|
||
CF_UNICODETEXT = 13
|
||
GMEM_DDESHARE = 0x2000
|
||
kernel32 = ctypes.WinDLL("kernel32")
|
||
user32 = ctypes.WinDLL("user32")
|
||
user32.OpenClipboard.argtypes = [wintypes.HWND]
|
||
user32.OpenClipboard.restype = wintypes.BOOL
|
||
user32.CloseClipboard.argtypes = []
|
||
user32.EmptyClipboard.argtypes = []
|
||
user32.SetClipboardData.argtypes = [wintypes.UINT, wintypes.HANDLE]
|
||
user32.SetClipboardData.restype = wintypes.HANDLE
|
||
kernel32.GlobalAlloc.argtypes = [wintypes.UINT, ctypes.c_size_t]
|
||
kernel32.GlobalAlloc.restype = wintypes.HANDLE
|
||
kernel32.GlobalLock.argtypes = [wintypes.HANDLE]
|
||
kernel32.GlobalLock.restype = ctypes.c_void_p
|
||
kernel32.GlobalUnlock.argtypes = [wintypes.HANDLE]
|
||
encoded = text.encode("utf-16-le") + b"\x00\x00"
|
||
hMem = kernel32.GlobalAlloc(GMEM_DDESHARE, len(encoded))
|
||
pMem = kernel32.GlobalLock(hMem)
|
||
ctypes.memmove(pMem, encoded, len(encoded))
|
||
kernel32.GlobalUnlock(hMem)
|
||
user32.OpenClipboard(0)
|
||
user32.EmptyClipboard()
|
||
user32.SetClipboardData(CF_UNICODETEXT, hMem)
|
||
user32.CloseClipboard()
|
||
return True
|
||
except Exception:
|
||
return False
|
||
|
||
|
||
def _sanitize_markdown_for_plain_text(raw_text: str) -> str:
|
||
lines = (raw_text or "").replace("\r\n", "\n").replace("\r", "\n").split("\n")
|
||
out_lines = []
|
||
for raw_line in lines:
|
||
line = raw_line
|
||
line = re.sub(r"^\s*#{1,6}\s+", "", line)
|
||
line = re.sub(r"^\s*\d+\.\s+", "", line)
|
||
line = re.sub(r"^\s*[-*•]\s+", "", line)
|
||
line = re.sub(r"\*\*(.+?)\*\*", r"\1", line)
|
||
line = re.sub(r"__(.+?)__", r"\1", line)
|
||
line = re.sub(r"(?<!\*)\*(?!\s)(.+?)(?<!\s)\*(?!\*)", r"\1", line)
|
||
line = re.sub(r"(?<!_)_(?!\s)(.+?)(?<!\s)_(?!_)", r"\1", line)
|
||
out_lines.append(line)
|
||
return "\n".join(out_lines).strip()
|
||
|
||
|
||
def _load_settings() -> dict:
|
||
try:
|
||
if os.path.isfile(_SETTINGS_FILE):
|
||
with open(_SETTINGS_FILE, "r", encoding="utf-8") as f:
|
||
return json.load(f)
|
||
except Exception:
|
||
pass
|
||
return {}
|
||
|
||
|
||
def _save_settings(data: dict):
|
||
try:
|
||
with open(_SETTINGS_FILE, "w", encoding="utf-8") as f:
|
||
json.dump(data, f, indent=2)
|
||
except Exception:
|
||
pass
|
||
|
||
|
||
def _auto_save(text: str) -> str:
|
||
"""Speichert Text automatisch als .txt mit Timestamp. Gibt Dateipfad zurueck."""
|
||
os.makedirs(_SAVE_DIR, exist_ok=True)
|
||
ts = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
|
||
filename = f"AudioNotiz_{ts}.txt"
|
||
path = os.path.join(_SAVE_DIR, filename)
|
||
with open(path, "w", encoding="utf-8") as f:
|
||
f.write(text)
|
||
return path
|
||
|
||
|
||
def apply_punctuation(text: str) -> str:
|
||
if not text or not text.strip():
|
||
return text
|
||
t = text
|
||
t = re.sub(r"\s+neuer\s+Absatz\s*", "\n\n", t, flags=re.IGNORECASE)
|
||
t = re.sub(r"\s+neue\s+Zeile\s*", "\n", t, flags=re.IGNORECASE)
|
||
t = re.sub(r"\s+Zeilenumbruch\s*", "\n", t, flags=re.IGNORECASE)
|
||
t = re.sub(r"\s+Absatz\s+", "\n\n", t, flags=re.IGNORECASE)
|
||
t = re.sub(r"\s+Absatz\s*$", "\n\n", t, flags=re.IGNORECASE)
|
||
t = re.sub(r"\s+Absatzzeichen\s*", "\n\n", t, flags=re.IGNORECASE)
|
||
t = re.sub(r"\s+Punkt\s+", ". ", t, flags=re.IGNORECASE)
|
||
t = re.sub(r"\s+Punkt\s*$", ".", t, flags=re.IGNORECASE)
|
||
t = re.sub(r"\s+Komma\s+", ", ", t, flags=re.IGNORECASE)
|
||
t = re.sub(r"\s+Komma\s*$", ",", t, flags=re.IGNORECASE)
|
||
t = re.sub(r"\s+Semikolon\s+", "; ", t, flags=re.IGNORECASE)
|
||
t = re.sub(r"\s+Semikolon\s*$", ";", t, flags=re.IGNORECASE)
|
||
t = re.sub(r"\s+Strichpunkt\s+", "; ", t, flags=re.IGNORECASE)
|
||
t = re.sub(r"\s+Strichpunkt\s*$", ";", t, flags=re.IGNORECASE)
|
||
t = re.sub(r"\s+Doppelpunkt\s+", ": ", t, flags=re.IGNORECASE)
|
||
t = re.sub(r"\s+Doppelpunkt\s*$", ":", t, flags=re.IGNORECASE)
|
||
t = re.sub(r"\s+Fragezeichen\s+", "? ", t, flags=re.IGNORECASE)
|
||
t = re.sub(r"\s+Fragezeichen\s*$", "?", t, flags=re.IGNORECASE)
|
||
t = re.sub(r"\s+Ausrufezeichen\s+", "! ", t, flags=re.IGNORECASE)
|
||
t = re.sub(r"\s+Ausrufezeichen\s*$", "!", t, flags=re.IGNORECASE)
|
||
t = re.sub(r"\s+Gedankenstrich\s+", " \u2013 ", t, flags=re.IGNORECASE)
|
||
t = re.sub(r"\s+Gedankenstrich\s*$", " \u2013", t, flags=re.IGNORECASE)
|
||
t = re.sub(r"\s+Bindestrich\s+", "-", t, flags=re.IGNORECASE)
|
||
t = re.sub(r"\s+Schr\u00e4gstrich\s+", "/", t, flags=re.IGNORECASE)
|
||
t = re.sub(r"\s+Klammer\s+auf\s+", " (", t, flags=re.IGNORECASE)
|
||
t = re.sub(r"\s+Klammer\s+zu\s+", ") ", t, flags=re.IGNORECASE)
|
||
t = re.sub(r"\s+Auslassungspunkte\s+", "\u2026 ", t, flags=re.IGNORECASE)
|
||
t = re.sub(r"\s+Auslassungspunkte\s*$", "\u2026", t, flags=re.IGNORECASE)
|
||
ord_map = [
|
||
(r"\b(erstens)\b", "1."), (r"\b(zweitens)\b", "2."),
|
||
(r"\b(drittens)\b", "3."), (r"\b(viertens)\b", "4."),
|
||
(r"\b(f\u00fcnftens)\b", "5."), (r"\b(sechstens)\b", "6."),
|
||
(r"\b(siebtens)\b", "7."), (r"\b(achtens)\b", "8."),
|
||
(r"\b(neuntens)\b", "9."), (r"\b(zehntens)\b", "10."),
|
||
]
|
||
for pat, repl in ord_map:
|
||
t = re.sub(pat, repl, t, flags=re.IGNORECASE)
|
||
_year_words = {
|
||
"zweitausendzwanzig": "2020", "zweitausendeinundzwanzig": "2021",
|
||
"zweitausendzweiundzwanzig": "2022", "zweitausenddreiundzwanzig": "2023",
|
||
"zweitausendvierundzwanzig": "2024", "zweitausendf\u00fcnfundzwanzig": "2025",
|
||
"zweitausendsechsundzwanzig": "2026", "zweitausendsiebenundzwanzig": "2027",
|
||
"zweitausendachtundzwanzig": "2028", "zweitausendneunundzwanzig": "2029",
|
||
"zweitausenddreissig": "2030", "zweitausenddrei\u00dfig": "2030",
|
||
"neunzehnhundertneunzig": "1990", "zweitausend": "2000",
|
||
}
|
||
for word, year in sorted(_year_words.items(), key=lambda x: -len(x[0])):
|
||
t = re.sub(r"\b" + word + r"\b", year, t, flags=re.IGNORECASE)
|
||
_day_words = {
|
||
"ersten": "1.", "zweiten": "2.", "dritten": "3.", "vierten": "4.",
|
||
"f\u00fcnften": "5.", "sechsten": "6.", "siebten": "7.", "achten": "8.",
|
||
"neunten": "9.", "zehnten": "10.", "elften": "11.", "zw\u00f6lften": "12.",
|
||
"dreizehnten": "13.", "vierzehnten": "14.", "f\u00fcnfzehnten": "15.",
|
||
"sechzehnten": "16.", "siebzehnten": "17.", "achtzehnten": "18.",
|
||
"neunzehnten": "19.", "zwanzigsten": "20.", "einundzwanzigsten": "21.",
|
||
"zweiundzwanzigsten": "22.", "dreiundzwanzigsten": "23.",
|
||
"vierundzwanzigsten": "24.", "f\u00fcnfundzwanzigsten": "25.",
|
||
"sechsundzwanzigsten": "26.", "siebenundzwanzigsten": "27.",
|
||
"achtundzwanzigsten": "28.", "neunundzwanzigsten": "29.",
|
||
"dreissigsten": "30.", "drei\u00dfigsten": "30.",
|
||
"einunddreissigsten": "31.", "einunddrei\u00dfigsten": "31.",
|
||
}
|
||
_months = (r"(?:Januar|Februar|M\u00e4rz|April|Mai|Juni|Juli|August|"
|
||
r"September|Oktober|November|Dezember)")
|
||
for word, day in sorted(_day_words.items(), key=lambda x: -len(x[0])):
|
||
t = re.sub(r"\b" + word + r"\s+" + _months,
|
||
lambda m: day + " " + m.group(0).split()[-1], t, flags=re.IGNORECASE)
|
||
return t
|
||
|
||
|
||
# ── RoundedButton (gleich wie Hauptfenster) ──
|
||
|
||
class RoundedButton(tk.Canvas):
|
||
def __init__(self, parent, text, command=None, bg=BTN_BG, fg=BTN_FG,
|
||
active_bg=BTN_ACTIVE, radius=8, width=120, height=26,
|
||
canvas_bg=None, **kw):
|
||
kw.setdefault("highlightthickness", 0)
|
||
if canvas_bg is not None:
|
||
kw["bg"] = canvas_bg
|
||
super().__init__(parent, width=width, height=height, **kw)
|
||
self._command = command
|
||
self._bg = bg
|
||
self._fg = fg
|
||
self._active_bg = active_bg
|
||
self._radius = radius
|
||
self._text = text
|
||
self.bind("<Button-1>", self._on_click)
|
||
self.bind("<Enter>", self._on_enter)
|
||
self.bind("<Leave>", self._on_leave)
|
||
self.bind("<Configure>", lambda e: self._draw())
|
||
self._draw()
|
||
|
||
def _draw(self, bg=None):
|
||
self.delete("all")
|
||
w = self.winfo_width() or int(self["width"])
|
||
h = self.winfo_height() or int(self["height"])
|
||
r = self._radius
|
||
c = bg or self._bg
|
||
self.create_arc(0, 0, 2*r, 2*r, start=90, extent=90, fill=c, outline=c)
|
||
self.create_arc(w-2*r, 0, w, 2*r, start=0, extent=90, fill=c, outline=c)
|
||
self.create_arc(0, h-2*r, 2*r, h, start=180, extent=90, fill=c, outline=c)
|
||
self.create_arc(w-2*r, h-2*r, w, h, start=270, extent=90, fill=c, outline=c)
|
||
self.create_rectangle(r, 0, w-r, h, fill=c, outline=c)
|
||
self.create_rectangle(0, r, w, h-r, fill=c, outline=c)
|
||
self.create_text(w//2, h//2, text=self._text, fill=self._fg,
|
||
font=("Segoe UI", 9))
|
||
|
||
def configure(self, **kw):
|
||
if "text" in kw:
|
||
self._text = kw.pop("text")
|
||
self._draw()
|
||
if kw:
|
||
super().configure(**kw)
|
||
|
||
def _on_click(self, e):
|
||
if self._command:
|
||
self._command()
|
||
|
||
def _on_enter(self, e):
|
||
self._draw(self._active_bg)
|
||
|
||
def _on_leave(self, e):
|
||
self._draw()
|
||
|
||
|
||
class DiktatApp(tk.Tk):
|
||
|
||
def __init__(self, _as_toplevel_of=None):
|
||
if _as_toplevel_of is not None:
|
||
tk.Tk.__init__ = lambda *a, **k: None
|
||
self._toplevel = tk.Toplevel(_as_toplevel_of)
|
||
self._toplevel.title("Audio-Notiz \u2013 nur Transkription")
|
||
self._toplevel.configure(bg=BG)
|
||
self._toplevel.attributes("-topmost", True)
|
||
self._is_embedded = True
|
||
self._proxy_win = self._toplevel
|
||
for attr in ("title", "configure", "attributes", "geometry", "minsize",
|
||
"protocol", "bind", "after", "update_idletasks",
|
||
"winfo_screenwidth", "winfo_screenheight", "winfo_width",
|
||
"winfo_height", "winfo_x", "winfo_y", "winfo_exists",
|
||
"deiconify", "iconify", "withdraw", "lift", "focus_force",
|
||
"destroy", "overrideredirect", "wm_attributes",
|
||
"winfo_toplevel", "tk"):
|
||
if hasattr(self._toplevel, attr) and attr != "destroy":
|
||
try:
|
||
setattr(self, attr, getattr(self._toplevel, attr))
|
||
except (AttributeError, TypeError):
|
||
pass
|
||
self._toplevel.protocol("WM_DELETE_WINDOW", self._safe_destroy)
|
||
self._init_app()
|
||
return
|
||
|
||
super().__init__()
|
||
self._is_embedded = False
|
||
self._proxy_win = self
|
||
self.title("Audio-Notiz \u2013 nur Transkription")
|
||
self.configure(bg=BG)
|
||
self.attributes("-topmost", True)
|
||
self._init_app()
|
||
|
||
def _safe_destroy(self):
|
||
try:
|
||
if self._is_recording:
|
||
self._is_recording = False
|
||
self._voice_cmd_active = False
|
||
if self._recorder:
|
||
try:
|
||
self._recorder.stop_and_save_wav()
|
||
except Exception:
|
||
pass
|
||
self._recorder = None
|
||
except Exception:
|
||
pass
|
||
try:
|
||
if hasattr(self, "_toplevel") and self._toplevel.winfo_exists():
|
||
self._toplevel.destroy()
|
||
except Exception:
|
||
pass
|
||
|
||
def _init_app(self):
|
||
|
||
self._logo_photo = None
|
||
self._logo_photo_small = None
|
||
self._load_logo()
|
||
|
||
self._settings = _load_settings()
|
||
saved_geom = self._settings.get("geometry", "300x290")
|
||
self.geometry(saved_geom)
|
||
self.minsize(300, 280)
|
||
if "+" not in saved_geom:
|
||
self.update_idletasks()
|
||
sw = self.winfo_screenwidth()
|
||
sh = self.winfo_screenheight()
|
||
self.geometry(f"+{(sw - 300) // 2}+{(sh - 290) // 2}")
|
||
|
||
api_key = None
|
||
try:
|
||
from openai_runtime_config import get_openai_api_key
|
||
api_key = get_openai_api_key()
|
||
except Exception:
|
||
pass
|
||
if not api_key:
|
||
api_key = os.getenv("OPENAI_API_KEY", "").strip()
|
||
base_url = os.getenv("OPENAI_BASE_URL", "").strip()
|
||
self._client = None
|
||
if OpenAI and api_key:
|
||
client_kwargs = dict(api_key=api_key, timeout=60.0, max_retries=2)
|
||
if base_url:
|
||
client_kwargs["base_url"] = base_url
|
||
self._client = OpenAI(**client_kwargs)
|
||
|
||
self._recorder = None
|
||
self._is_recording = False
|
||
self._minimized = False
|
||
self._geom_before = None
|
||
self._restoring = False
|
||
self._font_size = self._settings.get("font_size", 8)
|
||
|
||
self._build_ui()
|
||
try:
|
||
from aza_global_paste import start_global_right_click_paste_listener
|
||
start_global_right_click_paste_listener()
|
||
except Exception:
|
||
pass
|
||
self._save_geom_after_id = None
|
||
self.bind("<Configure>", self._on_configure)
|
||
|
||
if _IMPORT_ERRORS:
|
||
self.after(500, lambda: messagebox.showerror(
|
||
"Start-Fehler",
|
||
"Audio-Notiz konnte nicht vollstaendig starten:\n- "
|
||
+ "\n- ".join(_IMPORT_ERRORS)
|
||
+ "\n\nTipps:\n"
|
||
"1) Starte die App im Projektordner.\n"
|
||
"2) Installiere fehlende Pakete mit pip.",
|
||
parent=self))
|
||
return
|
||
|
||
if not self._client:
|
||
self.after(500, lambda: messagebox.showerror(
|
||
"API-Key fehlt",
|
||
"OPENAI_API_KEY ist nicht gesetzt.\n\n"
|
||
"Lege eine '.env' Datei an:\nOPENAI_API_KEY=sk-...",
|
||
parent=self))
|
||
|
||
def _load_logo(self):
|
||
try:
|
||
if os.path.exists(_LOGO_PATH):
|
||
from PIL import Image, ImageTk
|
||
img = Image.open(_LOGO_PATH)
|
||
self._logo_photo = ImageTk.PhotoImage(img.resize((24, 24), Image.Resampling.LANCZOS))
|
||
self._logo_photo_small = ImageTk.PhotoImage(img.resize((18, 18), Image.Resampling.LANCZOS))
|
||
img_icon = img.resize((57, 57), Image.Resampling.LANCZOS)
|
||
import tempfile
|
||
tmp = tempfile.NamedTemporaryFile(suffix=".ico", delete=False)
|
||
img_icon.save(tmp.name, format="ICO")
|
||
tmp.close()
|
||
self.iconbitmap(tmp.name)
|
||
try:
|
||
os.unlink(tmp.name)
|
||
except Exception:
|
||
pass
|
||
except Exception:
|
||
pass
|
||
|
||
# ── Drag (Fenster verschieben via Header/Logo) ──
|
||
|
||
def _drag_start(self, e):
|
||
self._drag_x = e.x_root - self.winfo_x()
|
||
self._drag_y = e.y_root - self.winfo_y()
|
||
|
||
def _drag_move(self, e):
|
||
x = e.x_root - self._drag_x
|
||
y = e.y_root - self._drag_y
|
||
self.geometry(f"+{x}+{y}")
|
||
|
||
def _make_draggable(self, widget):
|
||
widget.configure(cursor="fleur")
|
||
widget.bind("<Button-1>", self._drag_start)
|
||
widget.bind("<B1-Motion>", self._drag_move)
|
||
|
||
# ── UI ──
|
||
|
||
def _build_ui(self):
|
||
self._drag_x = 0
|
||
self._drag_y = 0
|
||
|
||
# Header
|
||
self._header = tk.Frame(self, bg=BG)
|
||
self._header.pack(fill="x")
|
||
|
||
self._logo_lbl = None
|
||
if self._logo_photo:
|
||
self._logo_lbl = tk.Label(self._header, image=self._logo_photo, bg=BG)
|
||
self._logo_lbl.pack(side="left", padx=(8, 0), pady=4)
|
||
self._make_draggable(self._logo_lbl)
|
||
self._title_lbl = tk.Label(self._header, text="Audio-Notiz", font=("Segoe UI", 12, "bold"),
|
||
bg=BG, fg=HDR_FG)
|
||
self._title_lbl.pack(side="left", padx=(4, 0), pady=6)
|
||
self._make_draggable(self._title_lbl)
|
||
self._make_draggable(self._header)
|
||
|
||
# Minimize button
|
||
self._btn_minimize = tk.Label(self._header, text="\u2014", font=("Segoe UI", 12, "bold"),
|
||
bg=BG, fg=MINI_FG, cursor="hand2", padx=6)
|
||
self._btn_minimize.pack(side="right", padx=(0, 8))
|
||
self._btn_minimize.bind("<Button-1>", lambda e: self._toggle_minimize())
|
||
self._btn_minimize.bind("<Enter>", lambda e: self._btn_minimize.configure(fg=MINI_FG_HOVER))
|
||
self._btn_minimize.bind("<Leave>", lambda e: self._btn_minimize.configure(fg=MINI_FG))
|
||
|
||
# Recording indicator (red dot, hidden initially)
|
||
self._rec_dot = tk.Label(self._header, text="", font=("Segoe UI", 8),
|
||
bg=BG, fg=REC_DOT)
|
||
self._rec_blink_id = None
|
||
|
||
# Mini controls (hidden initially)
|
||
self._mini_frame = None
|
||
self._mini_status_bar = None
|
||
|
||
# Main content
|
||
self._main_f = tk.Frame(self, bg=BG, padx=12, pady=12)
|
||
self._main_f.pack(fill="both", expand=True)
|
||
|
||
# Label + Schriftgroessen-Steuerung
|
||
label_frame = tk.Frame(self._main_f, bg=BG)
|
||
label_frame.pack(fill="x", anchor="w")
|
||
tk.Label(label_frame, text="Audio-Notiz (nur Transkription):", bg=BG, fg=HDR_FG,
|
||
font=("Segoe UI", 9)).pack(side="left")
|
||
|
||
# Schriftgroesse Aa ▲▼ (wie Hauptfenster)
|
||
ctrl = tk.Frame(label_frame, bg=BG)
|
||
ctrl.pack(side="right", padx=4)
|
||
tk.Label(ctrl, text="Aa", font=("Segoe UI", 8), bg=BG, fg=MINI_FG).pack(side="left", padx=(0, 1))
|
||
self._size_lbl = tk.Label(ctrl, text=str(self._font_size), font=("Segoe UI", 8),
|
||
bg=BG, fg=MINI_FG, width=2, anchor="center")
|
||
self._size_lbl.pack(side="left")
|
||
|
||
btn_up = tk.Label(ctrl, text="\u25B2", font=("Segoe UI", 7), bg=BG, fg=MINI_FG, cursor="hand2")
|
||
btn_up.pack(side="left", padx=1)
|
||
btn_up.bind("<Button-1>", lambda e: self._change_font(1))
|
||
btn_up.bind("<Enter>", lambda e: btn_up.configure(fg=MINI_FG_HOVER))
|
||
btn_up.bind("<Leave>", lambda e: btn_up.configure(fg=MINI_FG))
|
||
|
||
btn_dn = tk.Label(ctrl, text="\u25BC", font=("Segoe UI", 7), bg=BG, fg=MINI_FG, cursor="hand2")
|
||
btn_dn.pack(side="left", padx=1)
|
||
btn_dn.bind("<Button-1>", lambda e: self._change_font(-1))
|
||
btn_dn.bind("<Enter>", lambda e: btn_dn.configure(fg=MINI_FG_HOVER))
|
||
btn_dn.bind("<Leave>", lambda e: btn_dn.configure(fg=MINI_FG))
|
||
|
||
# Textfeld
|
||
self._txt = ScrolledText(self._main_f, wrap="word",
|
||
font=("Segoe UI", self._font_size),
|
||
bg=TXT_BG, height=8)
|
||
self._txt.pack(fill="both", expand=True, pady=(4, 4))
|
||
|
||
# Statusleiste
|
||
self._status_var = tk.StringVar(value="Modus: Medizinische Audio-Notiz aktiv")
|
||
status_bar = tk.Frame(self._main_f, bg=STATUS_BG, height=24, padx=8, pady=4)
|
||
status_bar.pack(fill="x", pady=(4, 0))
|
||
status_bar.pack_propagate(False)
|
||
tk.Label(status_bar, textvariable=self._status_var, fg=STATUS_FG, bg=STATUS_BG,
|
||
font=("Segoe UI", 8), anchor="w").pack(side="left", fill="x", expand=True)
|
||
|
||
# Autocopy-Checkbox
|
||
self._autocopy_var = tk.BooleanVar(value=self._is_autocopy_enabled())
|
||
cb_row = tk.Frame(self._main_f, bg=BG)
|
||
cb_row.pack(fill="x", pady=(2, 0))
|
||
cb_autocopy = ttk.Checkbutton(
|
||
cb_row, text="Autocopy nach Transkription",
|
||
variable=self._autocopy_var, command=self._save_autocopy_pref,
|
||
)
|
||
cb_autocopy.pack(side="left")
|
||
|
||
self._rclick_paste_var = tk.BooleanVar(value=self._is_rclick_paste_enabled())
|
||
cb_rclick = ttk.Checkbutton(
|
||
cb_row, text="Rechtsklick = Einfügen",
|
||
variable=self._rclick_paste_var, command=self._save_rclick_pref,
|
||
)
|
||
cb_rclick.pack(side="left", padx=(12, 0))
|
||
|
||
# Buttons: Icon-basiert (▶ Start / ■ Stop als Toggle, Neu, Kopieren)
|
||
btn_row = tk.Frame(self._main_f, bg=BG)
|
||
btn_row.pack(fill="x", pady=(4, 0))
|
||
|
||
self._btn_record_toggle = RoundedButton(btn_row, "\u25b6 Start",
|
||
command=self._toggle_recording,
|
||
width=80, height=26, canvas_bg=BG)
|
||
self._btn_record_toggle.pack(side="left")
|
||
|
||
RoundedButton(btn_row, "Neu", command=self._do_neu,
|
||
width=60, height=26, canvas_bg=BG).pack(side="left", padx=(6, 0))
|
||
|
||
RoundedButton(btn_row, "Kopieren", command=self._do_kopieren,
|
||
width=80, height=26, canvas_bg=BG).pack(side="left", padx=(6, 0))
|
||
|
||
RoundedButton(btn_row, "Verbindung testen", command=self._do_test_connection,
|
||
width=130, height=26, canvas_bg=BG).pack(side="left", padx=(6, 0))
|
||
|
||
# ── Schriftgroesse ──
|
||
|
||
def _change_font(self, delta):
|
||
self._font_size = max(5, min(20, self._font_size + delta))
|
||
self._txt.configure(font=("Segoe UI", self._font_size))
|
||
self._size_lbl.configure(text=str(self._font_size))
|
||
self._settings["font_size"] = self._font_size
|
||
_save_settings(self._settings)
|
||
|
||
# ── Recording-Indikator (roter blinkender Punkt) ──
|
||
|
||
def _start_rec_blink(self):
|
||
self._rec_dot.pack(side="left", padx=(2, 0))
|
||
self._blink_visible = True
|
||
self._do_blink()
|
||
|
||
def _do_blink(self):
|
||
if not self._is_recording:
|
||
return
|
||
self._blink_visible = not self._blink_visible
|
||
self._rec_dot.configure(text="\u25cf" if self._blink_visible else "",
|
||
fg=REC_DOT)
|
||
self._rec_blink_id = self.after(500, self._do_blink)
|
||
|
||
def _stop_rec_blink(self):
|
||
if self._rec_blink_id:
|
||
self.after_cancel(self._rec_blink_id)
|
||
self._rec_blink_id = None
|
||
self._rec_dot.pack_forget()
|
||
|
||
def _update_btn_states(self):
|
||
if self._is_recording:
|
||
self._btn_record_toggle.configure(text="\u25a0 Stop")
|
||
self._btn_record_toggle._bg = REC_RED
|
||
self._btn_record_toggle._fg = "white"
|
||
self._btn_record_toggle._active_bg = "#B03030"
|
||
self._btn_record_toggle._draw()
|
||
else:
|
||
self._btn_record_toggle.configure(text="\u25b6 Start")
|
||
self._btn_record_toggle._bg = BTN_BG
|
||
self._btn_record_toggle._fg = BTN_FG
|
||
self._btn_record_toggle._active_bg = BTN_ACTIVE
|
||
self._btn_record_toggle._draw()
|
||
|
||
# ── Geometrie speichern ──
|
||
|
||
def _on_configure(self, e):
|
||
if e.widget is not self:
|
||
return
|
||
if self._minimized and not self._restoring and e.height > 110:
|
||
self._restore()
|
||
if not self._minimized:
|
||
if self._save_geom_after_id:
|
||
self.after_cancel(self._save_geom_after_id)
|
||
self._save_geom_after_id = self.after(400, self._save_geom)
|
||
|
||
def _save_geom(self):
|
||
try:
|
||
self._settings["geometry"] = self.geometry()
|
||
_save_settings(self._settings)
|
||
except Exception:
|
||
pass
|
||
|
||
def _is_autocopy_enabled(self) -> bool:
|
||
try:
|
||
from aza_persistence import is_autocopy_after_diktat_enabled
|
||
return is_autocopy_after_diktat_enabled()
|
||
except Exception:
|
||
return True
|
||
|
||
def _save_autocopy_pref(self):
|
||
try:
|
||
from aza_persistence import save_autocopy_prefs
|
||
save_autocopy_prefs(autocopy=self._autocopy_var.get())
|
||
except Exception:
|
||
pass
|
||
|
||
def _is_rclick_paste_enabled(self) -> bool:
|
||
try:
|
||
from aza_persistence import is_global_right_click_paste_enabled
|
||
return is_global_right_click_paste_enabled()
|
||
except Exception:
|
||
return True
|
||
|
||
def _save_rclick_pref(self):
|
||
try:
|
||
from aza_persistence import save_autocopy_prefs
|
||
save_autocopy_prefs(global_right_click=self._rclick_paste_var.get())
|
||
except Exception:
|
||
pass
|
||
|
||
# ── Minimierung ──
|
||
|
||
def _toggle_minimize(self):
|
||
if self._minimized:
|
||
self._restore()
|
||
if self._geom_before:
|
||
try:
|
||
self.geometry(self._geom_before)
|
||
except Exception:
|
||
pass
|
||
else:
|
||
self._minimize()
|
||
|
||
def _minimize(self):
|
||
self._geom_before = self.geometry()
|
||
self._main_f.pack_forget()
|
||
self._btn_minimize.configure(text="\u25a1")
|
||
self._minimized = True
|
||
self.minsize(100, 78)
|
||
self.geometry("100x78")
|
||
|
||
if not self._mini_frame:
|
||
self._mini_frame = tk.Frame(self, bg=BG)
|
||
|
||
# ▶ / ■ Toggle
|
||
self._m_toggle = tk.Label(self._mini_frame, text="\u25b6", font=("Segoe UI", 12, "bold"),
|
||
bg=BG, fg=MINI_FG, cursor="hand2")
|
||
self._m_toggle.pack(side="left", padx=(4, 0))
|
||
self._m_toggle.bind("<Button-1>", lambda e: self._toggle_recording())
|
||
|
||
# ● Recording dot
|
||
self._m_dot = tk.Label(self._mini_frame, text="", font=("Segoe UI", 8),
|
||
bg=BG, fg=REC_DOT)
|
||
self._m_dot.pack(side="left", padx=(2, 0))
|
||
|
||
# Neu
|
||
self._m_neu = tk.Label(self._mini_frame, text="Neu", font=("Segoe UI", 7),
|
||
bg=BG, fg=MINI_FG, cursor="hand2")
|
||
self._m_neu.pack(side="left", padx=(4, 0))
|
||
self._m_neu.bind("<Button-1>", lambda e: self._do_neu())
|
||
self._m_neu.bind("<Enter>", lambda e: self._m_neu.configure(fg=MINI_FG_HOVER))
|
||
self._m_neu.bind("<Leave>", lambda e: self._m_neu.configure(fg=MINI_FG))
|
||
|
||
self._update_mini_states()
|
||
self._mini_frame.pack(fill="x", padx=2)
|
||
|
||
if not self._mini_status_bar:
|
||
self._mini_status_bar = tk.Frame(self, bg=STATUS_BG, height=16, padx=4, pady=1)
|
||
tk.Label(self._mini_status_bar, textvariable=self._status_var,
|
||
fg=STATUS_FG, bg=STATUS_BG,
|
||
font=("Segoe UI", 6), anchor="w").pack(side="left", fill="x", expand=True)
|
||
self._mini_status_bar.pack(fill="x")
|
||
|
||
def _update_mini_states(self):
|
||
if not self._mini_frame:
|
||
return
|
||
if self._is_recording:
|
||
self._m_toggle.configure(text="\u25a0", fg=REC_RED)
|
||
self._m_dot.configure(text="\u25cf")
|
||
else:
|
||
self._m_toggle.configure(text="\u25b6", fg=MINI_FG)
|
||
self._m_dot.configure(text="")
|
||
|
||
def _restore(self):
|
||
if not self._minimized:
|
||
return
|
||
self._restoring = True
|
||
if self._mini_frame:
|
||
self._mini_frame.pack_forget()
|
||
if self._mini_status_bar:
|
||
self._mini_status_bar.pack_forget()
|
||
self._main_f.pack(fill="both", expand=True)
|
||
self._btn_minimize.configure(text="\u2014")
|
||
self._minimized = False
|
||
self.minsize(300, 280)
|
||
self.after(200, self._finish_restore)
|
||
|
||
def _finish_restore(self):
|
||
self._restoring = False
|
||
|
||
def _toggle_recording(self):
|
||
if self._is_recording:
|
||
self._do_stop()
|
||
else:
|
||
self._do_start()
|
||
|
||
def _do_test_connection(self):
|
||
if _IMPORT_ERRORS:
|
||
messagebox.showerror(
|
||
"Start-Fehler",
|
||
"Fehlende Komponenten:\n- " + "\n- ".join(_IMPORT_ERRORS),
|
||
parent=self
|
||
)
|
||
return
|
||
if not self._client:
|
||
messagebox.showerror(
|
||
"API-Key fehlt",
|
||
"OPENAI_API_KEY ist nicht gesetzt.",
|
||
parent=self
|
||
)
|
||
return
|
||
if self._is_recording:
|
||
self._status_var.set("Verbindungstest waehrend Aufnahme nicht verfuegbar.")
|
||
return
|
||
|
||
self._status_var.set("Teste Verbindung zu OpenAI…")
|
||
|
||
def worker():
|
||
try:
|
||
# Kleiner API-Call ohne Audio-Upload fuer schnellen Verbindungscheck.
|
||
self._client.models.list()
|
||
self.after(0, lambda: self._status_var.set("Verbindung OK."))
|
||
self.after(0, lambda: messagebox.showinfo(
|
||
"Verbindungstest",
|
||
"Verbindung zu OpenAI ist in Ordnung.",
|
||
parent=self
|
||
))
|
||
except Exception as e:
|
||
msg = _friendly_error_message(e)
|
||
self.after(0, lambda m=msg: self._status_var.set(f"Fehler: {m.splitlines()[0][:80]}"))
|
||
self.after(0, lambda m=msg: messagebox.showerror("Verbindungstest", m, parent=self))
|
||
|
||
threading.Thread(target=worker, daemon=True).start()
|
||
|
||
# ── Sprachbefehl "Stop Diktat" ──
|
||
|
||
def _transcribe_cmd(self, wav_path: str) -> str:
|
||
"""Kurze Transkription mit einfachem Prompt fuer Befehlserkennung."""
|
||
with open(wav_path, "rb") as f:
|
||
is_gpt = "gpt-" in TRANSCRIBE_MODEL
|
||
params = dict(model=TRANSCRIBE_MODEL, file=f, language="de")
|
||
params["prompt"] = _CMD_PROMPT
|
||
if not is_gpt:
|
||
params["temperature"] = 0.0
|
||
resp = self._client.audio.transcriptions.create(**params)
|
||
return getattr(resp, "text", "") or ""
|
||
|
||
def _start_voice_cmd_monitor(self):
|
||
self._voice_cmd_active = True
|
||
|
||
def _monitor():
|
||
import numpy as np
|
||
import time as _t
|
||
last_check = 0.0
|
||
print("[VoiceCmd] Monitor gestartet", file=sys.stderr)
|
||
while self._voice_cmd_active and self._is_recording:
|
||
_t.sleep(0.3)
|
||
if not self._voice_cmd_active or not self._is_recording:
|
||
break
|
||
rec = self._recorder
|
||
if not rec or not rec._frames:
|
||
continue
|
||
sr = rec.samplerate
|
||
try:
|
||
frames_snap = list(rec._frames[-300:])
|
||
except Exception:
|
||
continue
|
||
if len(frames_snap) < 5:
|
||
continue
|
||
try:
|
||
audio = np.concatenate(frames_snap, axis=0).flatten()
|
||
except Exception:
|
||
continue
|
||
silence_n = int(_SILENCE_SEC * sr)
|
||
if len(audio) < silence_n + int(0.5 * sr):
|
||
continue
|
||
tail = audio[-silence_n:]
|
||
tail_rms = float(np.sqrt(np.mean(tail ** 2)))
|
||
if tail_rms >= _SILENCE_RMS:
|
||
continue
|
||
now = _t.time()
|
||
if now - last_check < _CMD_COOLDOWN_SEC:
|
||
continue
|
||
last_check = now
|
||
speech_end = len(audio) - silence_n
|
||
speech_start = max(0, speech_end - int(_CMD_SPEECH_SEC * sr))
|
||
speech = audio[speech_start:speech_end]
|
||
speech_rms = float(np.sqrt(np.mean(speech ** 2)))
|
||
if speech_rms < _SILENCE_RMS:
|
||
continue
|
||
print(f"[VoiceCmd] Stille erkannt (RMS={tail_rms:.4f}), "
|
||
f"pruefe Sprache (RMS={speech_rms:.4f}, "
|
||
f"{len(speech)/sr:.1f}s)", file=sys.stderr)
|
||
try:
|
||
pcm = (np.clip(speech, -1.0, 1.0) * 32767).astype(np.int16)
|
||
fd, tmp = tempfile.mkstemp(suffix=".wav", prefix="vcmd_")
|
||
os.close(fd)
|
||
with wave.open(tmp, "wb") as wf:
|
||
wf.setnchannels(1)
|
||
wf.setsampwidth(2)
|
||
wf.setframerate(sr)
|
||
wf.writeframes(pcm.tobytes())
|
||
text = self._transcribe_cmd(tmp)
|
||
try:
|
||
os.remove(tmp)
|
||
except Exception:
|
||
pass
|
||
print(f"[VoiceCmd] Erkannt: '{text}'", file=sys.stderr)
|
||
if text:
|
||
cmd = _detect_voice_command(text)
|
||
if cmd == "close":
|
||
print("[VoiceCmd] >>> DIKTAT SCHLIESSEN erkannt! <<<",
|
||
file=sys.stderr)
|
||
self._voice_cmd_active = False
|
||
self.after(0, lambda: self._status_var.set(
|
||
"Sprachbefehl erkannt: Schliessen\u2026"))
|
||
self.after(100, self._do_close_from_voice)
|
||
return
|
||
if cmd == "stop":
|
||
print("[VoiceCmd] >>> STOP DIKTAT erkannt! <<<",
|
||
file=sys.stderr)
|
||
self._voice_cmd_active = False
|
||
self.after(0, lambda: self._status_var.set(
|
||
"Sprachbefehl erkannt: Stop\u2026"))
|
||
self.after(100, self._do_stop)
|
||
return
|
||
except Exception as exc:
|
||
print(f"[VoiceCmd] Fehler: {exc}", file=sys.stderr)
|
||
print("[VoiceCmd] Monitor beendet", file=sys.stderr)
|
||
|
||
threading.Thread(target=_monitor, daemon=True).start()
|
||
|
||
def _stop_voice_cmd_monitor(self):
|
||
self._voice_cmd_active = False
|
||
|
||
# ── Aufnahme ──
|
||
|
||
def _do_start(self):
|
||
if self._is_recording:
|
||
return
|
||
if _IMPORT_ERRORS:
|
||
messagebox.showerror("Start-Fehler",
|
||
"Fehlende Komponenten:\n- " + "\n- ".join(_IMPORT_ERRORS),
|
||
parent=self)
|
||
return
|
||
if not self._client:
|
||
messagebox.showerror("API-Key fehlt",
|
||
"OPENAI_API_KEY ist nicht gesetzt.", parent=self)
|
||
return
|
||
self._start_recording()
|
||
|
||
def _do_stop(self, close_after=False):
|
||
if not self._is_recording:
|
||
return
|
||
self._stop_and_transcribe(close_after=close_after)
|
||
|
||
def _do_close_from_voice(self):
|
||
if self._is_recording:
|
||
self._do_stop(close_after=True)
|
||
else:
|
||
self.destroy()
|
||
|
||
def _start_recording(self):
|
||
if AudioRecorder is None:
|
||
messagebox.showerror("Start-Fehler",
|
||
"AudioRecorder ist nicht verfuegbar (aza_audio.py fehlt).",
|
||
parent=self)
|
||
return
|
||
if not self._recorder:
|
||
self._recorder = AudioRecorder()
|
||
try:
|
||
self._recorder.start()
|
||
self._is_recording = True
|
||
self._update_btn_states()
|
||
self._update_mini_states()
|
||
self._start_rec_blink()
|
||
self._start_voice_cmd_monitor()
|
||
self._status_var.set(
|
||
"Aufnahme l\u00e4uft\u2026 (\"Diktat stoppen\" oder \"Diktat schliessen\")"
|
||
)
|
||
except Exception as e:
|
||
messagebox.showerror("Aufnahme-Fehler", str(e), parent=self)
|
||
self._status_var.set("Bereit.")
|
||
|
||
def _stop_and_transcribe(self, close_after=False):
|
||
self._is_recording = False
|
||
self._stop_voice_cmd_monitor()
|
||
self._stop_rec_blink()
|
||
try:
|
||
self._update_btn_states()
|
||
self._update_mini_states()
|
||
except Exception:
|
||
pass
|
||
self._status_var.set("Transkribiere\u2026")
|
||
|
||
rec = self._recorder
|
||
self._recorder = None
|
||
|
||
def worker():
|
||
try:
|
||
if rec is None:
|
||
self.after(0, lambda: self._status_var.set("Kein Recorder aktiv."))
|
||
return
|
||
wav_path = rec.stop_and_save_wav()
|
||
|
||
try:
|
||
with wave.open(wav_path, "rb") as wf:
|
||
duration = wf.getnframes() / float(wf.getframerate())
|
||
if duration < 0.3:
|
||
if os.path.exists(wav_path):
|
||
os.remove(wav_path)
|
||
self.after(0, lambda: self._status_var.set("Kein Audio erkannt."))
|
||
if close_after:
|
||
self.after(200, self.destroy)
|
||
return
|
||
except Exception:
|
||
pass
|
||
|
||
text = self._transcribe(wav_path)
|
||
|
||
try:
|
||
if os.path.exists(wav_path):
|
||
os.remove(wav_path)
|
||
except Exception:
|
||
pass
|
||
|
||
if not text or not text.strip():
|
||
self.after(0, lambda: self._status_var.set("Kein Text erkannt."))
|
||
if close_after:
|
||
self.after(200, self.destroy)
|
||
return
|
||
|
||
text = apply_punctuation(text)
|
||
text = text.replace("\u00df", "ss")
|
||
text = _TRAILING_CMD_RE.sub("", text).rstrip()
|
||
|
||
if not text or not text.strip():
|
||
self.after(0, lambda: self._status_var.set("Kein Text erkannt."))
|
||
if close_after:
|
||
self.after(200, self.destroy)
|
||
return
|
||
|
||
save_msg = ""
|
||
try:
|
||
saved_path = _auto_save(text)
|
||
save_msg = f" Gespeichert \u2713"
|
||
except Exception as save_err:
|
||
save_msg = f" Speichern fehlgeschlagen: {str(save_err)[:50]}"
|
||
|
||
def _insert_and_maybe_close(t=text, sm=save_msg):
|
||
self._insert_text(t, sm)
|
||
if close_after:
|
||
self.after(200, self.destroy)
|
||
|
||
self.after(0, _insert_and_maybe_close)
|
||
|
||
except Exception as e:
|
||
self.after(0, lambda err=e: self._on_error(err))
|
||
|
||
threading.Thread(target=worker, daemon=True).start()
|
||
|
||
def _transcribe(self, wav_path: str) -> str:
|
||
with open(wav_path, "rb") as f:
|
||
is_gpt = "gpt-" in TRANSCRIBE_MODEL
|
||
params = dict(model=TRANSCRIBE_MODEL, file=f, language="de")
|
||
params["prompt"] = WHISPER_MEDICAL_PROMPT
|
||
if not is_gpt:
|
||
params["temperature"] = 0.0
|
||
last_err = None
|
||
for attempt in range(3):
|
||
try:
|
||
resp = self._client.audio.transcriptions.create(**params)
|
||
break
|
||
except Exception as e:
|
||
last_err = e
|
||
if attempt < 2 and _is_connection_error_text(str(e)):
|
||
time.sleep(1.0 + attempt)
|
||
continue
|
||
raise
|
||
if last_err and "resp" not in locals():
|
||
raise last_err
|
||
text = getattr(resp, "text", "")
|
||
if text is None:
|
||
text = ""
|
||
if text.strip().startswith(WHISPER_PROMPT_PREFIX):
|
||
text = ""
|
||
return text
|
||
|
||
def _insert_text(self, text: str, save_msg: str = ""):
|
||
self._txt.configure(state="normal")
|
||
pos = self._txt.index(tk.INSERT)
|
||
self._txt.insert(pos, text)
|
||
full = self._txt.get("1.0", "end").strip()
|
||
if full and self._autocopy_var.get():
|
||
if not _win_clipboard_set(full):
|
||
try:
|
||
self.clipboard_clear()
|
||
self.clipboard_append(_sanitize_markdown_for_plain_text(full))
|
||
except Exception:
|
||
pass
|
||
self._status_var.set(f"Fertig. Kopiert.{save_msg}")
|
||
else:
|
||
self._status_var.set(f"Fertig.{save_msg}")
|
||
|
||
def _on_error(self, err):
|
||
err_msg = _friendly_error_message(err)
|
||
try:
|
||
parent_win = getattr(self, "_proxy_win", self)
|
||
messagebox.showerror("Fehler", err_msg, parent=parent_win)
|
||
except Exception:
|
||
messagebox.showerror("Fehler", err_msg)
|
||
try:
|
||
self._status_var.set(f"Fehler: {err_msg.splitlines()[0][:80]}")
|
||
except Exception:
|
||
pass
|
||
|
||
def _do_neu(self):
|
||
if self._is_recording:
|
||
if not messagebox.askyesno("Aufnahme l\u00e4uft",
|
||
"Aktuelle Aufnahme verwerfen und neu starten?",
|
||
parent=self):
|
||
return
|
||
self._is_recording = False
|
||
self._stop_rec_blink()
|
||
self._update_btn_states()
|
||
self._update_mini_states()
|
||
try:
|
||
wav_path = self._recorder.stop_and_save_wav()
|
||
if os.path.exists(wav_path):
|
||
os.remove(wav_path)
|
||
except Exception:
|
||
pass
|
||
self._recorder = None
|
||
self._txt.configure(state="normal")
|
||
self._txt.delete("1.0", "end")
|
||
self._status_var.set("Bereit.")
|
||
self._do_start()
|
||
|
||
def _do_kopieren(self):
|
||
text = self._txt.get("1.0", "end").strip()
|
||
if text:
|
||
if not _win_clipboard_set(text):
|
||
try:
|
||
self.clipboard_clear()
|
||
self.clipboard_append(_sanitize_markdown_for_plain_text(text))
|
||
except Exception:
|
||
pass
|
||
self._status_var.set("Audio-Notiz kopiert.")
|
||
else:
|
||
self._status_var.set("Nichts zum Kopieren.")
|
||
|
||
|
||
if __name__ == "__main__":
|
||
app = DiktatApp()
|
||
app.mainloop()
|