# -*- coding: utf-8 -*- """ AZA MedWork – Audio-Notiz (Standalone) Eigenstaendige Audio-Notiz (Transkription). Gleicher Look wie im Hauptfenster. Start: python diktat_app.py ODER Doppelklick auf "Audio-Notiz starten.bat" """ import os import re import sys import tempfile import threading import time import wave import json import tkinter as tk from tkinter import ttk, messagebox from tkinter.scrolledtext import ScrolledText from datetime import datetime _APP_DIR = os.path.dirname(os.path.abspath(__file__)) def _find_project_root(start_dir: str) -> str: """Sucht den Projektordner, der aza_audio.py enthaelt.""" cur = os.path.abspath(start_dir) for _ in range(8): if os.path.isfile(os.path.join(cur, "aza_audio.py")): return cur parent = os.path.dirname(cur) if parent == cur: break cur = parent return os.path.normpath(os.path.join(start_dir, "..", "..")) _PROJECT_ROOT = _find_project_root(_APP_DIR) if _PROJECT_ROOT not in sys.path: sys.path.insert(0, _PROJECT_ROOT) _IMPORT_ERRORS = [] try: from dotenv import load_dotenv except Exception: load_dotenv = None _IMPORT_ERRORS.append("python-dotenv fehlt") if load_dotenv: load_dotenv(os.path.join(_PROJECT_ROOT, ".env")) # Fallback: lokale .env im Add-on-Ordner load_dotenv(os.path.join(_APP_DIR, ".env")) def _apply_proxy_env_from_openai_vars(): """Mappt OPENAI_*_PROXY auf Standard-Proxy-Variablen.""" http_proxy = os.getenv("OPENAI_HTTP_PROXY", "").strip() https_proxy = os.getenv("OPENAI_HTTPS_PROXY", "").strip() if http_proxy: os.environ["HTTP_PROXY"] = http_proxy os.environ["http_proxy"] = http_proxy if https_proxy: os.environ["HTTPS_PROXY"] = https_proxy os.environ["https_proxy"] = https_proxy _apply_proxy_env_from_openai_vars() try: from openai import OpenAI except Exception: OpenAI = None _IMPORT_ERRORS.append("openai fehlt") try: from aza_audio import AudioRecorder except Exception: AudioRecorder = None _IMPORT_ERRORS.append("aza_audio.py nicht gefunden") def _get_data_dir() -> str: """Schreibbares Datenverzeichnis (%APPDATA%\\AZA Desktop).""" try: from aza_config import get_writable_data_dir return get_writable_data_dir() except Exception: return os.path.join(os.path.expanduser("~"), "AppData", "Roaming", "AZA Desktop") _DATA_DIR = _get_data_dir() _SETTINGS_FILE = os.path.join(_DATA_DIR, "audio_notiz_settings.json") TRANSCRIBE_MODEL = os.getenv("TRANSCRIBE_MODEL", "gpt-4o-mini-transcribe") WHISPER_MEDICAL_PROMPT = ( "Medizinische Dokumentation auf Deutsch. " "Capillitium, Fotodynamische Therapie, PDT, Basalzellkarzinom, Plattenepithelkarzinom, " "Spinaliom, Spinaliom der Haut, Spinalzellkarzinom, " "Melanom, Exzision, Biopsie, Kryotherapie, Kuerettage, Histologie, Dermatoskopie, " "Naevus, Naevi, Naevuszellnaevus, dysplastischer Naevus, " "Compound-Naevus, junktionaler Naevus, dermaler Naevus, Spitz-Naevus, " "Erythem, Papel, Pustel, Makula, Plaque, Nodulus, Nodus, " "Vesikel, Bulla, Erosion, Ulkus, Rhagade, Kruste, Squama, " "Effloreszenzen, Lichenifikation, Exkoriation, " "seborrhoische Keratose, Fibrom, Lipom, Atherom, Epidermoidzyste, " "Verruca vulgaris, Verrucae, Kondylome, Molluscum contagiosum, " "Haemangiom, Angiom, Keloid, hypertrophe Narbe, " "Tinea, Mykose, Onychomykose, Herpes simplex, Herpes zoster, " "Erysipel, Impetigo, Abszess, Phlegmone, Skabies, " "Pemphigus, Pemphigoid, Lichen ruber, Lichen sclerosus, " "Vitiligo, Pruritus, Prurigo, Mykosis fungoides, " "Shave-Biopsie, Stanzbiopsie, Inzisionsbiopsie, " "Breslow-Dicke, Clark-Level, Sentinel-Lymphknoten, " "Auflichtmikroskopie, Phototherapie, UVB, PUVA, " "Anamnese, Diagnose, Therapie, Procedere, subjektiv, objektiv, " "Abdomen, Thorax, Extremitaeten, zervikal, lumbal, thorakal, sakral, " "Sonographie, Roentgen, MRI, CT, EKG, Laborwerte, Blutbild, " "Hypertonie, Diabetes mellitus, Hypercholesterinaemie, Hypothyreose, " "Antikoagulation, Thrombozytenaggregationshemmer, NSAR, ACE-Hemmer, " "Immunsuppression, Kortikosteroide, Biologika, Methotrexat, " "Psoriasis, Ekzem, Dermatitis, Urtikaria, Alopezie, Akne, Rosazea, " "Aktinische Keratose, Morbus Bowen, Lentigo maligna, " "Januar 2026, Februar 2026, Maerz 2026, April 2026, Mai 2026, " "Status nach, Z.n., s/p, i.v., p.o., s.c., " "ICD-10, SOAP, Krankengeschichte, Kostengutsprache, Arztbrief." ) WHISPER_PROMPT_PREFIX = "Medizinische Dokumentation auf Deutsch" # ── Farben (identisch mit Hauptfenster) ── BG = "#B9ECFA" BTN_BG = "#7EC8E3" BTN_FG = "#1a4d6d" BTN_ACTIVE = "#5AB9E8" HDR_FG = "#1a4d6d" STATUS_BG = "#FFE4CC" STATUS_FG = "#BD4500" TXT_BG = "#F5FCFF" MINI_FG = "#5A90B0" MINI_FG_HOVER = "#1a4d6d" REC_RED = "#D04040" REC_DOT = "#FF3030" _SAVE_DIR = os.path.join(_DATA_DIR, "kg_diktat_ablage", "Audio_Notiz") _LOGO_PATH = os.path.join(_PROJECT_ROOT, "logo.png") _TRAILING_CMD_RE = re.compile( r"[.,;:\s]*(?:" r"(?:stopp?|stoppen)\s*dikt(?:at|ad)|" r"dikt(?:at|ad)\s*(?:stopp?|stoppen)|" r"(?:schliessen|schließen|beenden)\s*(?:dikt(?:at|ad)|app)|" r"(?:dikt(?:at|ad)|app)\s*(?:schliessen|schließen|beenden)" r")[.,;:\s]*$", re.IGNORECASE ) _SILENCE_RMS = 0.04 _SILENCE_SEC = 1.5 _CMD_COOLDOWN_SEC = 3.0 _CMD_SPEECH_SEC = 5.0 _CMD_PROMPT = ( "Sprachbefehl auf Deutsch. " "Stop Diktat, Stopp Diktat, Diktat stoppen, Diktat schliessen, Diktat beenden." ) def _friendly_error_message(err) -> str: raw = str(err or "").strip() text = raw.lower() if ("connection error" in text or "api connection" in text or "timed out" in text or "timeout" in text or "name or service not known" in text or "temporary failure in name resolution" in text): return ( "Verbindungsfehler zu OpenAI.\n\n" "Bitte pruefen:\n" "- Internetverbindung aktiv\n" "- VPN/Proxy/Firewall blockiert nicht\n" "- OpenAI-Dienst erreichbar\n\n" "Optional (.env):\n" "- OPENAI_HTTP_PROXY=http://user:pass@proxy:port\n" "- OPENAI_HTTPS_PROXY=http://user:pass@proxy:port\n" "- OPENAI_BASE_URL=https://... (nur bei Gateway)\n\n" f"Technischer Hinweis: {raw[:140]}" ) if ("invalid_api_key" in text or "incorrect api key" in text or "authentication" in text or "401" in text): return ( "API-Key ist ungueltig oder fehlt.\n\n" "Bitte OPENAI_API_KEY in der .env pruefen." ) if ("rate limit" in text or "429" in text or "quota" in text): return ( "OpenAI-Limit erreicht (Rate Limit / Kontingent).\n\n" "Bitte kurz warten oder Abrechnung/Kontingent pruefen." ) return raw[:200] if raw else "Unbekannter Fehler." def _is_connection_error_text(msg: str) -> bool: t = (msg or "").lower() return ( "connection error" in t or "api connection" in t or "timed out" in t or "timeout" in t or "name or service not known" in t or "temporary failure in name resolution" in t ) def _detect_voice_command(text): cleaned = (text or "").strip().lower() if not cleaned: return None cleaned = cleaned.replace("ß", "ss") cleaned = re.sub(r"[^a-z0-9äöü\s]", " ", cleaned) cleaned = re.sub(r"\s+", " ", cleaned).strip() close_patterns = ( r"\bdikt(?:at|ad)\s*(?:schliessen|beenden)\b", r"\b(?:schliessen|beenden)\s*dikt(?:at|ad)\b", r"\bapp\s*(?:schliessen|beenden)\b", ) for pat in close_patterns: if re.search(pat, cleaned): return "close" stop_patterns = ( r"\bdikt(?:at|ad)\s*(?:stop|stopp|stoppen)\b", r"\b(?:stop|stopp|stoppen)\s*dikt(?:at|ad)\b", ) for pat in stop_patterns: if re.search(pat, cleaned): return "stop" return None def _win_clipboard_set(text: str) -> bool: if sys.platform != "win32": return False try: import ctypes from ctypes import wintypes CF_UNICODETEXT = 13 GMEM_DDESHARE = 0x2000 kernel32 = ctypes.WinDLL("kernel32") user32 = ctypes.WinDLL("user32") user32.OpenClipboard.argtypes = [wintypes.HWND] user32.OpenClipboard.restype = wintypes.BOOL user32.CloseClipboard.argtypes = [] user32.EmptyClipboard.argtypes = [] user32.SetClipboardData.argtypes = [wintypes.UINT, wintypes.HANDLE] user32.SetClipboardData.restype = wintypes.HANDLE kernel32.GlobalAlloc.argtypes = [wintypes.UINT, ctypes.c_size_t] kernel32.GlobalAlloc.restype = wintypes.HANDLE kernel32.GlobalLock.argtypes = [wintypes.HANDLE] kernel32.GlobalLock.restype = ctypes.c_void_p kernel32.GlobalUnlock.argtypes = [wintypes.HANDLE] encoded = text.encode("utf-16-le") + b"\x00\x00" hMem = kernel32.GlobalAlloc(GMEM_DDESHARE, len(encoded)) pMem = kernel32.GlobalLock(hMem) ctypes.memmove(pMem, encoded, len(encoded)) kernel32.GlobalUnlock(hMem) user32.OpenClipboard(0) user32.EmptyClipboard() user32.SetClipboardData(CF_UNICODETEXT, hMem) user32.CloseClipboard() return True except Exception: return False def _sanitize_markdown_for_plain_text(raw_text: str) -> str: lines = (raw_text or "").replace("\r\n", "\n").replace("\r", "\n").split("\n") out_lines = [] for raw_line in lines: line = raw_line line = re.sub(r"^\s*#{1,6}\s+", "", line) line = re.sub(r"^\s*\d+\.\s+", "", line) line = re.sub(r"^\s*[-*•]\s+", "", line) line = re.sub(r"\*\*(.+?)\*\*", r"\1", line) line = re.sub(r"__(.+?)__", r"\1", line) line = re.sub(r"(? dict: try: if os.path.isfile(_SETTINGS_FILE): with open(_SETTINGS_FILE, "r", encoding="utf-8") as f: return json.load(f) except Exception: pass return {} def _save_settings(data: dict): try: with open(_SETTINGS_FILE, "w", encoding="utf-8") as f: json.dump(data, f, indent=2) except Exception: pass def _auto_save(text: str) -> str: """Speichert Text automatisch als .txt mit Timestamp. Gibt Dateipfad zurueck.""" os.makedirs(_SAVE_DIR, exist_ok=True) ts = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") filename = f"AudioNotiz_{ts}.txt" path = os.path.join(_SAVE_DIR, filename) with open(path, "w", encoding="utf-8") as f: f.write(text) return path def apply_punctuation(text: str) -> str: if not text or not text.strip(): return text t = text t = re.sub(r"\s+neuer\s+Absatz\s*", "\n\n", t, flags=re.IGNORECASE) t = re.sub(r"\s+neue\s+Zeile\s*", "\n", t, flags=re.IGNORECASE) t = re.sub(r"\s+Zeilenumbruch\s*", "\n", t, flags=re.IGNORECASE) t = re.sub(r"\s+Absatz\s+", "\n\n", t, flags=re.IGNORECASE) t = re.sub(r"\s+Absatz\s*$", "\n\n", t, flags=re.IGNORECASE) t = re.sub(r"\s+Absatzzeichen\s*", "\n\n", t, flags=re.IGNORECASE) t = re.sub(r"\s+Punkt\s+", ". ", t, flags=re.IGNORECASE) t = re.sub(r"\s+Punkt\s*$", ".", t, flags=re.IGNORECASE) t = re.sub(r"\s+Komma\s+", ", ", t, flags=re.IGNORECASE) t = re.sub(r"\s+Komma\s*$", ",", t, flags=re.IGNORECASE) t = re.sub(r"\s+Semikolon\s+", "; ", t, flags=re.IGNORECASE) t = re.sub(r"\s+Semikolon\s*$", ";", t, flags=re.IGNORECASE) t = re.sub(r"\s+Strichpunkt\s+", "; ", t, flags=re.IGNORECASE) t = re.sub(r"\s+Strichpunkt\s*$", ";", t, flags=re.IGNORECASE) t = re.sub(r"\s+Doppelpunkt\s+", ": ", t, flags=re.IGNORECASE) t = re.sub(r"\s+Doppelpunkt\s*$", ":", t, flags=re.IGNORECASE) t = re.sub(r"\s+Fragezeichen\s+", "? ", t, flags=re.IGNORECASE) t = re.sub(r"\s+Fragezeichen\s*$", "?", t, flags=re.IGNORECASE) t = re.sub(r"\s+Ausrufezeichen\s+", "! ", t, flags=re.IGNORECASE) t = re.sub(r"\s+Ausrufezeichen\s*$", "!", t, flags=re.IGNORECASE) t = re.sub(r"\s+Gedankenstrich\s+", " \u2013 ", t, flags=re.IGNORECASE) t = re.sub(r"\s+Gedankenstrich\s*$", " \u2013", t, flags=re.IGNORECASE) t = re.sub(r"\s+Bindestrich\s+", "-", t, flags=re.IGNORECASE) t = re.sub(r"\s+Schr\u00e4gstrich\s+", "/", t, flags=re.IGNORECASE) t = re.sub(r"\s+Klammer\s+auf\s+", " (", t, flags=re.IGNORECASE) t = re.sub(r"\s+Klammer\s+zu\s+", ") ", t, flags=re.IGNORECASE) t = re.sub(r"\s+Auslassungspunkte\s+", "\u2026 ", t, flags=re.IGNORECASE) t = re.sub(r"\s+Auslassungspunkte\s*$", "\u2026", t, flags=re.IGNORECASE) ord_map = [ (r"\b(erstens)\b", "1."), (r"\b(zweitens)\b", "2."), (r"\b(drittens)\b", "3."), (r"\b(viertens)\b", "4."), (r"\b(f\u00fcnftens)\b", "5."), (r"\b(sechstens)\b", "6."), (r"\b(siebtens)\b", "7."), (r"\b(achtens)\b", "8."), (r"\b(neuntens)\b", "9."), (r"\b(zehntens)\b", "10."), ] for pat, repl in ord_map: t = re.sub(pat, repl, t, flags=re.IGNORECASE) _year_words = { "zweitausendzwanzig": "2020", "zweitausendeinundzwanzig": "2021", "zweitausendzweiundzwanzig": "2022", "zweitausenddreiundzwanzig": "2023", "zweitausendvierundzwanzig": "2024", "zweitausendf\u00fcnfundzwanzig": "2025", "zweitausendsechsundzwanzig": "2026", "zweitausendsiebenundzwanzig": "2027", "zweitausendachtundzwanzig": "2028", "zweitausendneunundzwanzig": "2029", "zweitausenddreissig": "2030", "zweitausenddrei\u00dfig": "2030", "neunzehnhundertneunzig": "1990", "zweitausend": "2000", } for word, year in sorted(_year_words.items(), key=lambda x: -len(x[0])): t = re.sub(r"\b" + word + r"\b", year, t, flags=re.IGNORECASE) _day_words = { "ersten": "1.", "zweiten": "2.", "dritten": "3.", "vierten": "4.", "f\u00fcnften": "5.", "sechsten": "6.", "siebten": "7.", "achten": "8.", "neunten": "9.", "zehnten": "10.", "elften": "11.", "zw\u00f6lften": "12.", "dreizehnten": "13.", "vierzehnten": "14.", "f\u00fcnfzehnten": "15.", "sechzehnten": "16.", "siebzehnten": "17.", "achtzehnten": "18.", "neunzehnten": "19.", "zwanzigsten": "20.", "einundzwanzigsten": "21.", "zweiundzwanzigsten": "22.", "dreiundzwanzigsten": "23.", "vierundzwanzigsten": "24.", "f\u00fcnfundzwanzigsten": "25.", "sechsundzwanzigsten": "26.", "siebenundzwanzigsten": "27.", "achtundzwanzigsten": "28.", "neunundzwanzigsten": "29.", "dreissigsten": "30.", "drei\u00dfigsten": "30.", "einunddreissigsten": "31.", "einunddrei\u00dfigsten": "31.", } _months = (r"(?:Januar|Februar|M\u00e4rz|April|Mai|Juni|Juli|August|" r"September|Oktober|November|Dezember)") for word, day in sorted(_day_words.items(), key=lambda x: -len(x[0])): t = re.sub(r"\b" + word + r"\s+" + _months, lambda m: day + " " + m.group(0).split()[-1], t, flags=re.IGNORECASE) return t # ── RoundedButton (gleich wie Hauptfenster) ── class RoundedButton(tk.Canvas): def __init__(self, parent, text, command=None, bg=BTN_BG, fg=BTN_FG, active_bg=BTN_ACTIVE, radius=8, width=120, height=26, canvas_bg=None, **kw): kw.setdefault("highlightthickness", 0) if canvas_bg is not None: kw["bg"] = canvas_bg super().__init__(parent, width=width, height=height, **kw) self._command = command self._bg = bg self._fg = fg self._active_bg = active_bg self._radius = radius self._text = text self.bind("", self._on_click) self.bind("", self._on_enter) self.bind("", self._on_leave) self.bind("", lambda e: self._draw()) self._draw() def _draw(self, bg=None): self.delete("all") w = self.winfo_width() or int(self["width"]) h = self.winfo_height() or int(self["height"]) r = self._radius c = bg or self._bg self.create_arc(0, 0, 2*r, 2*r, start=90, extent=90, fill=c, outline=c) self.create_arc(w-2*r, 0, w, 2*r, start=0, extent=90, fill=c, outline=c) self.create_arc(0, h-2*r, 2*r, h, start=180, extent=90, fill=c, outline=c) self.create_arc(w-2*r, h-2*r, w, h, start=270, extent=90, fill=c, outline=c) self.create_rectangle(r, 0, w-r, h, fill=c, outline=c) self.create_rectangle(0, r, w, h-r, fill=c, outline=c) self.create_text(w//2, h//2, text=self._text, fill=self._fg, font=("Segoe UI", 9)) def configure(self, **kw): if "text" in kw: self._text = kw.pop("text") self._draw() if kw: super().configure(**kw) def _on_click(self, e): if self._command: self._command() def _on_enter(self, e): self._draw(self._active_bg) def _on_leave(self, e): self._draw() class DiktatApp(tk.Tk): def __init__(self, _as_toplevel_of=None): if _as_toplevel_of is not None: tk.Tk.__init__ = lambda *a, **k: None self._toplevel = tk.Toplevel(_as_toplevel_of) self._toplevel.title("Audio-Notiz \u2013 nur Transkription") self._toplevel.configure(bg=BG) self._toplevel.attributes("-topmost", True) self._is_embedded = True self._proxy_win = self._toplevel for attr in ("title", "configure", "attributes", "geometry", "minsize", "protocol", "bind", "after", "update_idletasks", "winfo_screenwidth", "winfo_screenheight", "winfo_width", "winfo_height", "winfo_x", "winfo_y", "winfo_exists", "deiconify", "iconify", "withdraw", "lift", "focus_force", "destroy", "overrideredirect", "wm_attributes", "winfo_toplevel", "tk"): if hasattr(self._toplevel, attr) and attr != "destroy": try: setattr(self, attr, getattr(self._toplevel, attr)) except (AttributeError, TypeError): pass self._toplevel.protocol("WM_DELETE_WINDOW", self._safe_destroy) self._init_app() return super().__init__() self._is_embedded = False self._proxy_win = self self.title("Audio-Notiz \u2013 nur Transkription") self.configure(bg=BG) self.attributes("-topmost", True) self._init_app() def _safe_destroy(self): try: if self._is_recording: self._is_recording = False self._voice_cmd_active = False if self._recorder: try: self._recorder.stop_and_save_wav() except Exception: pass self._recorder = None except Exception: pass try: if hasattr(self, "_toplevel") and self._toplevel.winfo_exists(): self._toplevel.destroy() except Exception: pass def _init_app(self): self._logo_photo = None self._logo_photo_small = None self._load_logo() self._settings = _load_settings() saved_geom = self._settings.get("geometry", "300x290") self.geometry(saved_geom) self.minsize(300, 280) if "+" not in saved_geom: self.update_idletasks() sw = self.winfo_screenwidth() sh = self.winfo_screenheight() self.geometry(f"+{(sw - 300) // 2}+{(sh - 290) // 2}") api_key = None try: from openai_runtime_config import get_openai_api_key api_key = get_openai_api_key() except Exception: pass if not api_key: api_key = os.getenv("OPENAI_API_KEY", "").strip() base_url = os.getenv("OPENAI_BASE_URL", "").strip() self._client = None if OpenAI and api_key: client_kwargs = dict(api_key=api_key, timeout=60.0, max_retries=2) if base_url: client_kwargs["base_url"] = base_url self._client = OpenAI(**client_kwargs) self._recorder = None self._is_recording = False self._minimized = False self._geom_before = None self._restoring = False self._font_size = self._settings.get("font_size", 8) self._build_ui() try: from aza_global_paste import start_global_right_click_paste_listener start_global_right_click_paste_listener() except Exception: pass self._save_geom_after_id = None self.bind("", self._on_configure) if _IMPORT_ERRORS: self.after(500, lambda: messagebox.showerror( "Start-Fehler", "Audio-Notiz konnte nicht vollstaendig starten:\n- " + "\n- ".join(_IMPORT_ERRORS) + "\n\nTipps:\n" "1) Starte die App im Projektordner.\n" "2) Installiere fehlende Pakete mit pip.", parent=self)) return if not self._client: self.after(500, lambda: messagebox.showerror( "API-Key fehlt", "OPENAI_API_KEY ist nicht gesetzt.\n\n" "Lege eine '.env' Datei an:\nOPENAI_API_KEY=sk-...", parent=self)) def _load_logo(self): try: if os.path.exists(_LOGO_PATH): from PIL import Image, ImageTk img = Image.open(_LOGO_PATH) self._logo_photo = ImageTk.PhotoImage(img.resize((24, 24), Image.Resampling.LANCZOS)) self._logo_photo_small = ImageTk.PhotoImage(img.resize((18, 18), Image.Resampling.LANCZOS)) img_icon = img.resize((57, 57), Image.Resampling.LANCZOS) import tempfile tmp = tempfile.NamedTemporaryFile(suffix=".ico", delete=False) img_icon.save(tmp.name, format="ICO") tmp.close() self.iconbitmap(tmp.name) try: os.unlink(tmp.name) except Exception: pass except Exception: pass # ── Drag (Fenster verschieben via Header/Logo) ── def _drag_start(self, e): self._drag_x = e.x_root - self.winfo_x() self._drag_y = e.y_root - self.winfo_y() def _drag_move(self, e): x = e.x_root - self._drag_x y = e.y_root - self._drag_y self.geometry(f"+{x}+{y}") def _make_draggable(self, widget): widget.configure(cursor="fleur") widget.bind("", self._drag_start) widget.bind("", self._drag_move) # ── UI ── def _build_ui(self): self._drag_x = 0 self._drag_y = 0 # Header self._header = tk.Frame(self, bg=BG) self._header.pack(fill="x") self._logo_lbl = None if self._logo_photo: self._logo_lbl = tk.Label(self._header, image=self._logo_photo, bg=BG) self._logo_lbl.pack(side="left", padx=(8, 0), pady=4) self._make_draggable(self._logo_lbl) self._title_lbl = tk.Label(self._header, text="Audio-Notiz", font=("Segoe UI", 12, "bold"), bg=BG, fg=HDR_FG) self._title_lbl.pack(side="left", padx=(4, 0), pady=6) self._make_draggable(self._title_lbl) self._make_draggable(self._header) # Minimize button self._btn_minimize = tk.Label(self._header, text="\u2014", font=("Segoe UI", 12, "bold"), bg=BG, fg=MINI_FG, cursor="hand2", padx=6) self._btn_minimize.pack(side="right", padx=(0, 8)) self._btn_minimize.bind("", lambda e: self._toggle_minimize()) self._btn_minimize.bind("", lambda e: self._btn_minimize.configure(fg=MINI_FG_HOVER)) self._btn_minimize.bind("", lambda e: self._btn_minimize.configure(fg=MINI_FG)) # Recording indicator (red dot, hidden initially) self._rec_dot = tk.Label(self._header, text="", font=("Segoe UI", 8), bg=BG, fg=REC_DOT) self._rec_blink_id = None # Mini controls (hidden initially) self._mini_frame = None self._mini_status_bar = None # Main content self._main_f = tk.Frame(self, bg=BG, padx=12, pady=12) self._main_f.pack(fill="both", expand=True) # Label + Schriftgroessen-Steuerung label_frame = tk.Frame(self._main_f, bg=BG) label_frame.pack(fill="x", anchor="w") tk.Label(label_frame, text="Audio-Notiz (nur Transkription):", bg=BG, fg=HDR_FG, font=("Segoe UI", 9)).pack(side="left") # Schriftgroesse Aa ▲▼ (wie Hauptfenster) ctrl = tk.Frame(label_frame, bg=BG) ctrl.pack(side="right", padx=4) tk.Label(ctrl, text="Aa", font=("Segoe UI", 8), bg=BG, fg=MINI_FG).pack(side="left", padx=(0, 1)) self._size_lbl = tk.Label(ctrl, text=str(self._font_size), font=("Segoe UI", 8), bg=BG, fg=MINI_FG, width=2, anchor="center") self._size_lbl.pack(side="left") btn_up = tk.Label(ctrl, text="\u25B2", font=("Segoe UI", 7), bg=BG, fg=MINI_FG, cursor="hand2") btn_up.pack(side="left", padx=1) btn_up.bind("", lambda e: self._change_font(1)) btn_up.bind("", lambda e: btn_up.configure(fg=MINI_FG_HOVER)) btn_up.bind("", lambda e: btn_up.configure(fg=MINI_FG)) btn_dn = tk.Label(ctrl, text="\u25BC", font=("Segoe UI", 7), bg=BG, fg=MINI_FG, cursor="hand2") btn_dn.pack(side="left", padx=1) btn_dn.bind("", lambda e: self._change_font(-1)) btn_dn.bind("", lambda e: btn_dn.configure(fg=MINI_FG_HOVER)) btn_dn.bind("", lambda e: btn_dn.configure(fg=MINI_FG)) # Textfeld self._txt = ScrolledText(self._main_f, wrap="word", font=("Segoe UI", self._font_size), bg=TXT_BG, height=8) self._txt.pack(fill="both", expand=True, pady=(4, 4)) # Statusleiste self._status_var = tk.StringVar(value="Modus: Medizinische Audio-Notiz aktiv") status_bar = tk.Frame(self._main_f, bg=STATUS_BG, height=24, padx=8, pady=4) status_bar.pack(fill="x", pady=(4, 0)) status_bar.pack_propagate(False) tk.Label(status_bar, textvariable=self._status_var, fg=STATUS_FG, bg=STATUS_BG, font=("Segoe UI", 8), anchor="w").pack(side="left", fill="x", expand=True) # Autocopy-Checkbox self._autocopy_var = tk.BooleanVar(value=self._is_autocopy_enabled()) cb_row = tk.Frame(self._main_f, bg=BG) cb_row.pack(fill="x", pady=(2, 0)) cb_autocopy = ttk.Checkbutton( cb_row, text="Autocopy nach Transkription", variable=self._autocopy_var, command=self._save_autocopy_pref, ) cb_autocopy.pack(side="left") self._rclick_paste_var = tk.BooleanVar(value=self._is_rclick_paste_enabled()) cb_rclick = ttk.Checkbutton( cb_row, text="Rechtsklick = Einfügen", variable=self._rclick_paste_var, command=self._save_rclick_pref, ) cb_rclick.pack(side="left", padx=(12, 0)) # Buttons: Icon-basiert (▶ Start / ■ Stop als Toggle, Neu, Kopieren) btn_row = tk.Frame(self._main_f, bg=BG) btn_row.pack(fill="x", pady=(4, 0)) self._btn_record_toggle = RoundedButton(btn_row, "\u25b6 Start", command=self._toggle_recording, width=80, height=26, canvas_bg=BG) self._btn_record_toggle.pack(side="left") RoundedButton(btn_row, "Neu", command=self._do_neu, width=60, height=26, canvas_bg=BG).pack(side="left", padx=(6, 0)) RoundedButton(btn_row, "Kopieren", command=self._do_kopieren, width=80, height=26, canvas_bg=BG).pack(side="left", padx=(6, 0)) RoundedButton(btn_row, "Verbindung testen", command=self._do_test_connection, width=130, height=26, canvas_bg=BG).pack(side="left", padx=(6, 0)) # ── Schriftgroesse ── def _change_font(self, delta): self._font_size = max(5, min(20, self._font_size + delta)) self._txt.configure(font=("Segoe UI", self._font_size)) self._size_lbl.configure(text=str(self._font_size)) self._settings["font_size"] = self._font_size _save_settings(self._settings) # ── Recording-Indikator (roter blinkender Punkt) ── def _start_rec_blink(self): self._rec_dot.pack(side="left", padx=(2, 0)) self._blink_visible = True self._do_blink() def _do_blink(self): if not self._is_recording: return self._blink_visible = not self._blink_visible self._rec_dot.configure(text="\u25cf" if self._blink_visible else "", fg=REC_DOT) self._rec_blink_id = self.after(500, self._do_blink) def _stop_rec_blink(self): if self._rec_blink_id: self.after_cancel(self._rec_blink_id) self._rec_blink_id = None self._rec_dot.pack_forget() def _update_btn_states(self): if self._is_recording: self._btn_record_toggle.configure(text="\u25a0 Stop") self._btn_record_toggle._bg = REC_RED self._btn_record_toggle._fg = "white" self._btn_record_toggle._active_bg = "#B03030" self._btn_record_toggle._draw() else: self._btn_record_toggle.configure(text="\u25b6 Start") self._btn_record_toggle._bg = BTN_BG self._btn_record_toggle._fg = BTN_FG self._btn_record_toggle._active_bg = BTN_ACTIVE self._btn_record_toggle._draw() # ── Geometrie speichern ── def _on_configure(self, e): if e.widget is not self: return if self._minimized and not self._restoring and e.height > 110: self._restore() if not self._minimized: if self._save_geom_after_id: self.after_cancel(self._save_geom_after_id) self._save_geom_after_id = self.after(400, self._save_geom) def _save_geom(self): try: self._settings["geometry"] = self.geometry() _save_settings(self._settings) except Exception: pass def _is_autocopy_enabled(self) -> bool: try: from aza_persistence import is_autocopy_after_diktat_enabled return is_autocopy_after_diktat_enabled() except Exception: return True def _save_autocopy_pref(self): try: from aza_persistence import save_autocopy_prefs save_autocopy_prefs(autocopy=self._autocopy_var.get()) except Exception: pass def _is_rclick_paste_enabled(self) -> bool: try: from aza_persistence import is_global_right_click_paste_enabled return is_global_right_click_paste_enabled() except Exception: return True def _save_rclick_pref(self): try: from aza_persistence import save_autocopy_prefs save_autocopy_prefs(global_right_click=self._rclick_paste_var.get()) except Exception: pass # ── Minimierung ── def _toggle_minimize(self): if self._minimized: self._restore() if self._geom_before: try: self.geometry(self._geom_before) except Exception: pass else: self._minimize() def _minimize(self): self._geom_before = self.geometry() self._main_f.pack_forget() self._btn_minimize.configure(text="\u25a1") self._minimized = True self.minsize(100, 78) self.geometry("100x78") if not self._mini_frame: self._mini_frame = tk.Frame(self, bg=BG) # ▶ / ■ Toggle self._m_toggle = tk.Label(self._mini_frame, text="\u25b6", font=("Segoe UI", 12, "bold"), bg=BG, fg=MINI_FG, cursor="hand2") self._m_toggle.pack(side="left", padx=(4, 0)) self._m_toggle.bind("", lambda e: self._toggle_recording()) # ● Recording dot self._m_dot = tk.Label(self._mini_frame, text="", font=("Segoe UI", 8), bg=BG, fg=REC_DOT) self._m_dot.pack(side="left", padx=(2, 0)) # Neu self._m_neu = tk.Label(self._mini_frame, text="Neu", font=("Segoe UI", 7), bg=BG, fg=MINI_FG, cursor="hand2") self._m_neu.pack(side="left", padx=(4, 0)) self._m_neu.bind("", lambda e: self._do_neu()) self._m_neu.bind("", lambda e: self._m_neu.configure(fg=MINI_FG_HOVER)) self._m_neu.bind("", lambda e: self._m_neu.configure(fg=MINI_FG)) self._update_mini_states() self._mini_frame.pack(fill="x", padx=2) if not self._mini_status_bar: self._mini_status_bar = tk.Frame(self, bg=STATUS_BG, height=16, padx=4, pady=1) tk.Label(self._mini_status_bar, textvariable=self._status_var, fg=STATUS_FG, bg=STATUS_BG, font=("Segoe UI", 6), anchor="w").pack(side="left", fill="x", expand=True) self._mini_status_bar.pack(fill="x") def _update_mini_states(self): if not self._mini_frame: return if self._is_recording: self._m_toggle.configure(text="\u25a0", fg=REC_RED) self._m_dot.configure(text="\u25cf") else: self._m_toggle.configure(text="\u25b6", fg=MINI_FG) self._m_dot.configure(text="") def _restore(self): if not self._minimized: return self._restoring = True if self._mini_frame: self._mini_frame.pack_forget() if self._mini_status_bar: self._mini_status_bar.pack_forget() self._main_f.pack(fill="both", expand=True) self._btn_minimize.configure(text="\u2014") self._minimized = False self.minsize(300, 280) self.after(200, self._finish_restore) def _finish_restore(self): self._restoring = False def _toggle_recording(self): if self._is_recording: self._do_stop() else: self._do_start() def _do_test_connection(self): if _IMPORT_ERRORS: messagebox.showerror( "Start-Fehler", "Fehlende Komponenten:\n- " + "\n- ".join(_IMPORT_ERRORS), parent=self ) return if not self._client: messagebox.showerror( "API-Key fehlt", "OPENAI_API_KEY ist nicht gesetzt.", parent=self ) return if self._is_recording: self._status_var.set("Verbindungstest waehrend Aufnahme nicht verfuegbar.") return self._status_var.set("Teste Verbindung zu OpenAI…") def worker(): try: # Kleiner API-Call ohne Audio-Upload fuer schnellen Verbindungscheck. self._client.models.list() self.after(0, lambda: self._status_var.set("Verbindung OK.")) self.after(0, lambda: messagebox.showinfo( "Verbindungstest", "Verbindung zu OpenAI ist in Ordnung.", parent=self )) except Exception as e: msg = _friendly_error_message(e) self.after(0, lambda m=msg: self._status_var.set(f"Fehler: {m.splitlines()[0][:80]}")) self.after(0, lambda m=msg: messagebox.showerror("Verbindungstest", m, parent=self)) threading.Thread(target=worker, daemon=True).start() # ── Sprachbefehl "Stop Diktat" ── def _transcribe_cmd(self, wav_path: str) -> str: """Kurze Transkription mit einfachem Prompt fuer Befehlserkennung.""" with open(wav_path, "rb") as f: is_gpt = "gpt-" in TRANSCRIBE_MODEL params = dict(model=TRANSCRIBE_MODEL, file=f, language="de") params["prompt"] = _CMD_PROMPT if not is_gpt: params["temperature"] = 0.0 resp = self._client.audio.transcriptions.create(**params) return getattr(resp, "text", "") or "" def _start_voice_cmd_monitor(self): self._voice_cmd_active = True def _monitor(): import numpy as np import time as _t last_check = 0.0 print("[VoiceCmd] Monitor gestartet", file=sys.stderr) while self._voice_cmd_active and self._is_recording: _t.sleep(0.3) if not self._voice_cmd_active or not self._is_recording: break rec = self._recorder if not rec or not rec._frames: continue sr = rec.samplerate try: frames_snap = list(rec._frames[-300:]) except Exception: continue if len(frames_snap) < 5: continue try: audio = np.concatenate(frames_snap, axis=0).flatten() except Exception: continue silence_n = int(_SILENCE_SEC * sr) if len(audio) < silence_n + int(0.5 * sr): continue tail = audio[-silence_n:] tail_rms = float(np.sqrt(np.mean(tail ** 2))) if tail_rms >= _SILENCE_RMS: continue now = _t.time() if now - last_check < _CMD_COOLDOWN_SEC: continue last_check = now speech_end = len(audio) - silence_n speech_start = max(0, speech_end - int(_CMD_SPEECH_SEC * sr)) speech = audio[speech_start:speech_end] speech_rms = float(np.sqrt(np.mean(speech ** 2))) if speech_rms < _SILENCE_RMS: continue print(f"[VoiceCmd] Stille erkannt (RMS={tail_rms:.4f}), " f"pruefe Sprache (RMS={speech_rms:.4f}, " f"{len(speech)/sr:.1f}s)", file=sys.stderr) try: pcm = (np.clip(speech, -1.0, 1.0) * 32767).astype(np.int16) fd, tmp = tempfile.mkstemp(suffix=".wav", prefix="vcmd_") os.close(fd) with wave.open(tmp, "wb") as wf: wf.setnchannels(1) wf.setsampwidth(2) wf.setframerate(sr) wf.writeframes(pcm.tobytes()) text = self._transcribe_cmd(tmp) try: os.remove(tmp) except Exception: pass print(f"[VoiceCmd] Erkannt: '{text}'", file=sys.stderr) if text: cmd = _detect_voice_command(text) if cmd == "close": print("[VoiceCmd] >>> DIKTAT SCHLIESSEN erkannt! <<<", file=sys.stderr) self._voice_cmd_active = False self.after(0, lambda: self._status_var.set( "Sprachbefehl erkannt: Schliessen\u2026")) self.after(100, self._do_close_from_voice) return if cmd == "stop": print("[VoiceCmd] >>> STOP DIKTAT erkannt! <<<", file=sys.stderr) self._voice_cmd_active = False self.after(0, lambda: self._status_var.set( "Sprachbefehl erkannt: Stop\u2026")) self.after(100, self._do_stop) return except Exception as exc: print(f"[VoiceCmd] Fehler: {exc}", file=sys.stderr) print("[VoiceCmd] Monitor beendet", file=sys.stderr) threading.Thread(target=_monitor, daemon=True).start() def _stop_voice_cmd_monitor(self): self._voice_cmd_active = False # ── Aufnahme ── def _do_start(self): if self._is_recording: return if _IMPORT_ERRORS: messagebox.showerror("Start-Fehler", "Fehlende Komponenten:\n- " + "\n- ".join(_IMPORT_ERRORS), parent=self) return if not self._client: messagebox.showerror("API-Key fehlt", "OPENAI_API_KEY ist nicht gesetzt.", parent=self) return self._start_recording() def _do_stop(self, close_after=False): if not self._is_recording: return self._stop_and_transcribe(close_after=close_after) def _do_close_from_voice(self): if self._is_recording: self._do_stop(close_after=True) else: self.destroy() def _start_recording(self): if AudioRecorder is None: messagebox.showerror("Start-Fehler", "AudioRecorder ist nicht verfuegbar (aza_audio.py fehlt).", parent=self) return if not self._recorder: self._recorder = AudioRecorder() try: self._recorder.start() self._is_recording = True self._update_btn_states() self._update_mini_states() self._start_rec_blink() self._start_voice_cmd_monitor() self._status_var.set( "Aufnahme l\u00e4uft\u2026 (\"Diktat stoppen\" oder \"Diktat schliessen\")" ) except Exception as e: messagebox.showerror("Aufnahme-Fehler", str(e), parent=self) self._status_var.set("Bereit.") def _stop_and_transcribe(self, close_after=False): self._is_recording = False self._stop_voice_cmd_monitor() self._stop_rec_blink() try: self._update_btn_states() self._update_mini_states() except Exception: pass self._status_var.set("Transkribiere\u2026") rec = self._recorder self._recorder = None def worker(): try: if rec is None: self.after(0, lambda: self._status_var.set("Kein Recorder aktiv.")) return wav_path = rec.stop_and_save_wav() try: with wave.open(wav_path, "rb") as wf: duration = wf.getnframes() / float(wf.getframerate()) if duration < 0.3: if os.path.exists(wav_path): os.remove(wav_path) self.after(0, lambda: self._status_var.set("Kein Audio erkannt.")) if close_after: self.after(200, self.destroy) return except Exception: pass text = self._transcribe(wav_path) try: if os.path.exists(wav_path): os.remove(wav_path) except Exception: pass if not text or not text.strip(): self.after(0, lambda: self._status_var.set("Kein Text erkannt.")) if close_after: self.after(200, self.destroy) return text = apply_punctuation(text) text = text.replace("\u00df", "ss") text = _TRAILING_CMD_RE.sub("", text).rstrip() if not text or not text.strip(): self.after(0, lambda: self._status_var.set("Kein Text erkannt.")) if close_after: self.after(200, self.destroy) return save_msg = "" try: saved_path = _auto_save(text) save_msg = f" Gespeichert \u2713" except Exception as save_err: save_msg = f" Speichern fehlgeschlagen: {str(save_err)[:50]}" def _insert_and_maybe_close(t=text, sm=save_msg): self._insert_text(t, sm) if close_after: self.after(200, self.destroy) self.after(0, _insert_and_maybe_close) except Exception as e: self.after(0, lambda err=e: self._on_error(err)) threading.Thread(target=worker, daemon=True).start() def _transcribe(self, wav_path: str) -> str: with open(wav_path, "rb") as f: is_gpt = "gpt-" in TRANSCRIBE_MODEL params = dict(model=TRANSCRIBE_MODEL, file=f, language="de") params["prompt"] = WHISPER_MEDICAL_PROMPT if not is_gpt: params["temperature"] = 0.0 last_err = None for attempt in range(3): try: resp = self._client.audio.transcriptions.create(**params) break except Exception as e: last_err = e if attempt < 2 and _is_connection_error_text(str(e)): time.sleep(1.0 + attempt) continue raise if last_err and "resp" not in locals(): raise last_err text = getattr(resp, "text", "") if text is None: text = "" if text.strip().startswith(WHISPER_PROMPT_PREFIX): text = "" return text def _insert_text(self, text: str, save_msg: str = ""): self._txt.configure(state="normal") pos = self._txt.index(tk.INSERT) self._txt.insert(pos, text) full = self._txt.get("1.0", "end").strip() if full and self._autocopy_var.get(): if not _win_clipboard_set(full): try: self.clipboard_clear() self.clipboard_append(_sanitize_markdown_for_plain_text(full)) except Exception: pass self._status_var.set(f"Fertig. Kopiert.{save_msg}") else: self._status_var.set(f"Fertig.{save_msg}") def _on_error(self, err): err_msg = _friendly_error_message(err) try: parent_win = getattr(self, "_proxy_win", self) messagebox.showerror("Fehler", err_msg, parent=parent_win) except Exception: messagebox.showerror("Fehler", err_msg) try: self._status_var.set(f"Fehler: {err_msg.splitlines()[0][:80]}") except Exception: pass def _do_neu(self): if self._is_recording: if not messagebox.askyesno("Aufnahme l\u00e4uft", "Aktuelle Aufnahme verwerfen und neu starten?", parent=self): return self._is_recording = False self._stop_rec_blink() self._update_btn_states() self._update_mini_states() try: wav_path = self._recorder.stop_and_save_wav() if os.path.exists(wav_path): os.remove(wav_path) except Exception: pass self._recorder = None self._txt.configure(state="normal") self._txt.delete("1.0", "end") self._status_var.set("Bereit.") self._do_start() def _do_kopieren(self): text = self._txt.get("1.0", "end").strip() if text: if not _win_clipboard_set(text): try: self.clipboard_clear() self.clipboard_append(_sanitize_markdown_for_plain_text(text)) except Exception: pass self._status_var.set("Audio-Notiz kopiert.") else: self._status_var.set("Nichts zum Kopieren.") if __name__ == "__main__": app = DiktatApp() app.mainloop()