Files
aza/AzA march 2026/apps/diktat/diktat_app.py
2026-03-25 22:03:39 +01:00

1265 lines
49 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# -*- coding: utf-8 -*-
"""
AZA MedWork Audio-Notiz (Standalone)
Eigenstaendige Audio-Notiz (Transkription).
Gleicher Look wie im Hauptfenster.
Start:
python diktat_app.py
ODER Doppelklick auf "Audio-Notiz starten.bat"
"""
import os
import re
import sys
import tempfile
import threading
import time
import wave
import json
import tkinter as tk
from tkinter import ttk, messagebox
from tkinter.scrolledtext import ScrolledText
from datetime import datetime
_APP_DIR = os.path.dirname(os.path.abspath(__file__))
def _find_project_root(start_dir: str) -> str:
"""Sucht den Projektordner, der aza_audio.py enthaelt."""
cur = os.path.abspath(start_dir)
for _ in range(8):
if os.path.isfile(os.path.join(cur, "aza_audio.py")):
return cur
parent = os.path.dirname(cur)
if parent == cur:
break
cur = parent
return os.path.normpath(os.path.join(start_dir, "..", ".."))
_PROJECT_ROOT = _find_project_root(_APP_DIR)
if _PROJECT_ROOT not in sys.path:
sys.path.insert(0, _PROJECT_ROOT)
_IMPORT_ERRORS = []
try:
from dotenv import load_dotenv
except Exception:
load_dotenv = None
_IMPORT_ERRORS.append("python-dotenv fehlt")
if load_dotenv:
load_dotenv(os.path.join(_PROJECT_ROOT, ".env"))
# Fallback: lokale .env im Add-on-Ordner
load_dotenv(os.path.join(_APP_DIR, ".env"))
def _apply_proxy_env_from_openai_vars():
"""Mappt OPENAI_*_PROXY auf Standard-Proxy-Variablen."""
http_proxy = os.getenv("OPENAI_HTTP_PROXY", "").strip()
https_proxy = os.getenv("OPENAI_HTTPS_PROXY", "").strip()
if http_proxy:
os.environ["HTTP_PROXY"] = http_proxy
os.environ["http_proxy"] = http_proxy
if https_proxy:
os.environ["HTTPS_PROXY"] = https_proxy
os.environ["https_proxy"] = https_proxy
_apply_proxy_env_from_openai_vars()
try:
from openai import OpenAI
except Exception:
OpenAI = None
_IMPORT_ERRORS.append("openai fehlt")
try:
from aza_audio import AudioRecorder
except Exception:
AudioRecorder = None
_IMPORT_ERRORS.append("aza_audio.py nicht gefunden")
def _get_data_dir() -> str:
"""Schreibbares Datenverzeichnis (%APPDATA%\\AZA Desktop)."""
try:
from aza_config import get_writable_data_dir
return get_writable_data_dir()
except Exception:
return os.path.join(os.path.expanduser("~"), "AppData", "Roaming", "AZA Desktop")
_DATA_DIR = _get_data_dir()
_SETTINGS_FILE = os.path.join(_DATA_DIR, "audio_notiz_settings.json")
TRANSCRIBE_MODEL = os.getenv("TRANSCRIBE_MODEL", "gpt-4o-mini-transcribe")
WHISPER_MEDICAL_PROMPT = (
"Medizinische Dokumentation auf Deutsch. "
"Capillitium, Fotodynamische Therapie, PDT, Basalzellkarzinom, Plattenepithelkarzinom, "
"Spinaliom, Spinaliom der Haut, Spinalzellkarzinom, "
"Melanom, Exzision, Biopsie, Kryotherapie, Kuerettage, Histologie, Dermatoskopie, "
"Naevus, Naevi, Naevuszellnaevus, dysplastischer Naevus, "
"Compound-Naevus, junktionaler Naevus, dermaler Naevus, Spitz-Naevus, "
"Erythem, Papel, Pustel, Makula, Plaque, Nodulus, Nodus, "
"Vesikel, Bulla, Erosion, Ulkus, Rhagade, Kruste, Squama, "
"Effloreszenzen, Lichenifikation, Exkoriation, "
"seborrhoische Keratose, Fibrom, Lipom, Atherom, Epidermoidzyste, "
"Verruca vulgaris, Verrucae, Kondylome, Molluscum contagiosum, "
"Haemangiom, Angiom, Keloid, hypertrophe Narbe, "
"Tinea, Mykose, Onychomykose, Herpes simplex, Herpes zoster, "
"Erysipel, Impetigo, Abszess, Phlegmone, Skabies, "
"Pemphigus, Pemphigoid, Lichen ruber, Lichen sclerosus, "
"Vitiligo, Pruritus, Prurigo, Mykosis fungoides, "
"Shave-Biopsie, Stanzbiopsie, Inzisionsbiopsie, "
"Breslow-Dicke, Clark-Level, Sentinel-Lymphknoten, "
"Auflichtmikroskopie, Phototherapie, UVB, PUVA, "
"Anamnese, Diagnose, Therapie, Procedere, subjektiv, objektiv, "
"Abdomen, Thorax, Extremitaeten, zervikal, lumbal, thorakal, sakral, "
"Sonographie, Roentgen, MRI, CT, EKG, Laborwerte, Blutbild, "
"Hypertonie, Diabetes mellitus, Hypercholesterinaemie, Hypothyreose, "
"Antikoagulation, Thrombozytenaggregationshemmer, NSAR, ACE-Hemmer, "
"Immunsuppression, Kortikosteroide, Biologika, Methotrexat, "
"Psoriasis, Ekzem, Dermatitis, Urtikaria, Alopezie, Akne, Rosazea, "
"Aktinische Keratose, Morbus Bowen, Lentigo maligna, "
"Januar 2026, Februar 2026, Maerz 2026, April 2026, Mai 2026, "
"Status nach, Z.n., s/p, i.v., p.o., s.c., "
"ICD-10, SOAP, Krankengeschichte, Kostengutsprache, Arztbrief."
)
WHISPER_PROMPT_PREFIX = "Medizinische Dokumentation auf Deutsch"
# ── Farben (identisch mit Hauptfenster) ──
BG = "#B9ECFA"
BTN_BG = "#7EC8E3"
BTN_FG = "#1a4d6d"
BTN_ACTIVE = "#5AB9E8"
HDR_FG = "#1a4d6d"
STATUS_BG = "#FFE4CC"
STATUS_FG = "#BD4500"
TXT_BG = "#F5FCFF"
MINI_FG = "#5A90B0"
MINI_FG_HOVER = "#1a4d6d"
REC_RED = "#D04040"
REC_DOT = "#FF3030"
_SAVE_DIR = os.path.join(_DATA_DIR, "kg_diktat_ablage", "Audio_Notiz")
_LOGO_PATH = os.path.join(_PROJECT_ROOT, "logo.png")
_TRAILING_CMD_RE = re.compile(
r"[.,;:\s]*(?:"
r"(?:stopp?|stoppen)\s*dikt(?:at|ad)|"
r"dikt(?:at|ad)\s*(?:stopp?|stoppen)|"
r"(?:schliessen|schließen|beenden)\s*(?:dikt(?:at|ad)|app)|"
r"(?:dikt(?:at|ad)|app)\s*(?:schliessen|schließen|beenden)"
r")[.,;:\s]*$",
re.IGNORECASE
)
_SILENCE_RMS = 0.04
_SILENCE_SEC = 1.5
_CMD_COOLDOWN_SEC = 3.0
_CMD_SPEECH_SEC = 5.0
_CMD_PROMPT = (
"Sprachbefehl auf Deutsch. "
"Stop Diktat, Stopp Diktat, Diktat stoppen, Diktat schliessen, Diktat beenden."
)
def _friendly_error_message(err) -> str:
raw = str(err or "").strip()
text = raw.lower()
if ("connection error" in text or "api connection" in text or
"timed out" in text or "timeout" in text or
"name or service not known" in text or
"temporary failure in name resolution" in text):
return (
"Verbindungsfehler zu OpenAI.\n\n"
"Bitte pruefen:\n"
"- Internetverbindung aktiv\n"
"- VPN/Proxy/Firewall blockiert nicht\n"
"- OpenAI-Dienst erreichbar\n\n"
"Optional (.env):\n"
"- OPENAI_HTTP_PROXY=http://user:pass@proxy:port\n"
"- OPENAI_HTTPS_PROXY=http://user:pass@proxy:port\n"
"- OPENAI_BASE_URL=https://... (nur bei Gateway)\n\n"
f"Technischer Hinweis: {raw[:140]}"
)
if ("invalid_api_key" in text or "incorrect api key" in text or
"authentication" in text or "401" in text):
return (
"API-Key ist ungueltig oder fehlt.\n\n"
"Bitte OPENAI_API_KEY in der .env pruefen."
)
if ("rate limit" in text or "429" in text or "quota" in text):
return (
"OpenAI-Limit erreicht (Rate Limit / Kontingent).\n\n"
"Bitte kurz warten oder Abrechnung/Kontingent pruefen."
)
return raw[:200] if raw else "Unbekannter Fehler."
def _is_connection_error_text(msg: str) -> bool:
t = (msg or "").lower()
return (
"connection error" in t or
"api connection" in t or
"timed out" in t or
"timeout" in t or
"name or service not known" in t or
"temporary failure in name resolution" in t
)
def _detect_voice_command(text):
cleaned = (text or "").strip().lower()
if not cleaned:
return None
cleaned = cleaned.replace("ß", "ss")
cleaned = re.sub(r"[^a-z0-9äöü\s]", " ", cleaned)
cleaned = re.sub(r"\s+", " ", cleaned).strip()
close_patterns = (
r"\bdikt(?:at|ad)\s*(?:schliessen|beenden)\b",
r"\b(?:schliessen|beenden)\s*dikt(?:at|ad)\b",
r"\bapp\s*(?:schliessen|beenden)\b",
)
for pat in close_patterns:
if re.search(pat, cleaned):
return "close"
stop_patterns = (
r"\bdikt(?:at|ad)\s*(?:stop|stopp|stoppen)\b",
r"\b(?:stop|stopp|stoppen)\s*dikt(?:at|ad)\b",
)
for pat in stop_patterns:
if re.search(pat, cleaned):
return "stop"
return None
def _win_clipboard_set(text: str) -> bool:
if sys.platform != "win32":
return False
try:
import ctypes
from ctypes import wintypes
CF_UNICODETEXT = 13
GMEM_DDESHARE = 0x2000
kernel32 = ctypes.WinDLL("kernel32")
user32 = ctypes.WinDLL("user32")
user32.OpenClipboard.argtypes = [wintypes.HWND]
user32.OpenClipboard.restype = wintypes.BOOL
user32.CloseClipboard.argtypes = []
user32.EmptyClipboard.argtypes = []
user32.SetClipboardData.argtypes = [wintypes.UINT, wintypes.HANDLE]
user32.SetClipboardData.restype = wintypes.HANDLE
kernel32.GlobalAlloc.argtypes = [wintypes.UINT, ctypes.c_size_t]
kernel32.GlobalAlloc.restype = wintypes.HANDLE
kernel32.GlobalLock.argtypes = [wintypes.HANDLE]
kernel32.GlobalLock.restype = ctypes.c_void_p
kernel32.GlobalUnlock.argtypes = [wintypes.HANDLE]
encoded = text.encode("utf-16-le") + b"\x00\x00"
hMem = kernel32.GlobalAlloc(GMEM_DDESHARE, len(encoded))
pMem = kernel32.GlobalLock(hMem)
ctypes.memmove(pMem, encoded, len(encoded))
kernel32.GlobalUnlock(hMem)
user32.OpenClipboard(0)
user32.EmptyClipboard()
user32.SetClipboardData(CF_UNICODETEXT, hMem)
user32.CloseClipboard()
return True
except Exception:
return False
def _sanitize_markdown_for_plain_text(raw_text: str) -> str:
lines = (raw_text or "").replace("\r\n", "\n").replace("\r", "\n").split("\n")
out_lines = []
for raw_line in lines:
line = raw_line
line = re.sub(r"^\s*#{1,6}\s+", "", line)
line = re.sub(r"^\s*\d+\.\s+", "", line)
line = re.sub(r"^\s*[-*•]\s+", "", line)
line = re.sub(r"\*\*(.+?)\*\*", r"\1", line)
line = re.sub(r"__(.+?)__", r"\1", line)
line = re.sub(r"(?<!\*)\*(?!\s)(.+?)(?<!\s)\*(?!\*)", r"\1", line)
line = re.sub(r"(?<!_)_(?!\s)(.+?)(?<!\s)_(?!_)", r"\1", line)
out_lines.append(line)
return "\n".join(out_lines).strip()
def _load_settings() -> dict:
try:
if os.path.isfile(_SETTINGS_FILE):
with open(_SETTINGS_FILE, "r", encoding="utf-8") as f:
return json.load(f)
except Exception:
pass
return {}
def _save_settings(data: dict):
try:
with open(_SETTINGS_FILE, "w", encoding="utf-8") as f:
json.dump(data, f, indent=2)
except Exception:
pass
def _auto_save(text: str) -> str:
"""Speichert Text automatisch als .txt mit Timestamp. Gibt Dateipfad zurueck."""
os.makedirs(_SAVE_DIR, exist_ok=True)
ts = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
filename = f"AudioNotiz_{ts}.txt"
path = os.path.join(_SAVE_DIR, filename)
with open(path, "w", encoding="utf-8") as f:
f.write(text)
return path
def apply_punctuation(text: str) -> str:
if not text or not text.strip():
return text
t = text
t = re.sub(r"\s+neuer\s+Absatz\s*", "\n\n", t, flags=re.IGNORECASE)
t = re.sub(r"\s+neue\s+Zeile\s*", "\n", t, flags=re.IGNORECASE)
t = re.sub(r"\s+Zeilenumbruch\s*", "\n", t, flags=re.IGNORECASE)
t = re.sub(r"\s+Absatz\s+", "\n\n", t, flags=re.IGNORECASE)
t = re.sub(r"\s+Absatz\s*$", "\n\n", t, flags=re.IGNORECASE)
t = re.sub(r"\s+Absatzzeichen\s*", "\n\n", t, flags=re.IGNORECASE)
t = re.sub(r"\s+Punkt\s+", ". ", t, flags=re.IGNORECASE)
t = re.sub(r"\s+Punkt\s*$", ".", t, flags=re.IGNORECASE)
t = re.sub(r"\s+Komma\s+", ", ", t, flags=re.IGNORECASE)
t = re.sub(r"\s+Komma\s*$", ",", t, flags=re.IGNORECASE)
t = re.sub(r"\s+Semikolon\s+", "; ", t, flags=re.IGNORECASE)
t = re.sub(r"\s+Semikolon\s*$", ";", t, flags=re.IGNORECASE)
t = re.sub(r"\s+Strichpunkt\s+", "; ", t, flags=re.IGNORECASE)
t = re.sub(r"\s+Strichpunkt\s*$", ";", t, flags=re.IGNORECASE)
t = re.sub(r"\s+Doppelpunkt\s+", ": ", t, flags=re.IGNORECASE)
t = re.sub(r"\s+Doppelpunkt\s*$", ":", t, flags=re.IGNORECASE)
t = re.sub(r"\s+Fragezeichen\s+", "? ", t, flags=re.IGNORECASE)
t = re.sub(r"\s+Fragezeichen\s*$", "?", t, flags=re.IGNORECASE)
t = re.sub(r"\s+Ausrufezeichen\s+", "! ", t, flags=re.IGNORECASE)
t = re.sub(r"\s+Ausrufezeichen\s*$", "!", t, flags=re.IGNORECASE)
t = re.sub(r"\s+Gedankenstrich\s+", " \u2013 ", t, flags=re.IGNORECASE)
t = re.sub(r"\s+Gedankenstrich\s*$", " \u2013", t, flags=re.IGNORECASE)
t = re.sub(r"\s+Bindestrich\s+", "-", t, flags=re.IGNORECASE)
t = re.sub(r"\s+Schr\u00e4gstrich\s+", "/", t, flags=re.IGNORECASE)
t = re.sub(r"\s+Klammer\s+auf\s+", " (", t, flags=re.IGNORECASE)
t = re.sub(r"\s+Klammer\s+zu\s+", ") ", t, flags=re.IGNORECASE)
t = re.sub(r"\s+Auslassungspunkte\s+", "\u2026 ", t, flags=re.IGNORECASE)
t = re.sub(r"\s+Auslassungspunkte\s*$", "\u2026", t, flags=re.IGNORECASE)
ord_map = [
(r"\b(erstens)\b", "1."), (r"\b(zweitens)\b", "2."),
(r"\b(drittens)\b", "3."), (r"\b(viertens)\b", "4."),
(r"\b(f\u00fcnftens)\b", "5."), (r"\b(sechstens)\b", "6."),
(r"\b(siebtens)\b", "7."), (r"\b(achtens)\b", "8."),
(r"\b(neuntens)\b", "9."), (r"\b(zehntens)\b", "10."),
]
for pat, repl in ord_map:
t = re.sub(pat, repl, t, flags=re.IGNORECASE)
_year_words = {
"zweitausendzwanzig": "2020", "zweitausendeinundzwanzig": "2021",
"zweitausendzweiundzwanzig": "2022", "zweitausenddreiundzwanzig": "2023",
"zweitausendvierundzwanzig": "2024", "zweitausendf\u00fcnfundzwanzig": "2025",
"zweitausendsechsundzwanzig": "2026", "zweitausendsiebenundzwanzig": "2027",
"zweitausendachtundzwanzig": "2028", "zweitausendneunundzwanzig": "2029",
"zweitausenddreissig": "2030", "zweitausenddrei\u00dfig": "2030",
"neunzehnhundertneunzig": "1990", "zweitausend": "2000",
}
for word, year in sorted(_year_words.items(), key=lambda x: -len(x[0])):
t = re.sub(r"\b" + word + r"\b", year, t, flags=re.IGNORECASE)
_day_words = {
"ersten": "1.", "zweiten": "2.", "dritten": "3.", "vierten": "4.",
"f\u00fcnften": "5.", "sechsten": "6.", "siebten": "7.", "achten": "8.",
"neunten": "9.", "zehnten": "10.", "elften": "11.", "zw\u00f6lften": "12.",
"dreizehnten": "13.", "vierzehnten": "14.", "f\u00fcnfzehnten": "15.",
"sechzehnten": "16.", "siebzehnten": "17.", "achtzehnten": "18.",
"neunzehnten": "19.", "zwanzigsten": "20.", "einundzwanzigsten": "21.",
"zweiundzwanzigsten": "22.", "dreiundzwanzigsten": "23.",
"vierundzwanzigsten": "24.", "f\u00fcnfundzwanzigsten": "25.",
"sechsundzwanzigsten": "26.", "siebenundzwanzigsten": "27.",
"achtundzwanzigsten": "28.", "neunundzwanzigsten": "29.",
"dreissigsten": "30.", "drei\u00dfigsten": "30.",
"einunddreissigsten": "31.", "einunddrei\u00dfigsten": "31.",
}
_months = (r"(?:Januar|Februar|M\u00e4rz|April|Mai|Juni|Juli|August|"
r"September|Oktober|November|Dezember)")
for word, day in sorted(_day_words.items(), key=lambda x: -len(x[0])):
t = re.sub(r"\b" + word + r"\s+" + _months,
lambda m: day + " " + m.group(0).split()[-1], t, flags=re.IGNORECASE)
return t
# ── RoundedButton (gleich wie Hauptfenster) ──
class RoundedButton(tk.Canvas):
def __init__(self, parent, text, command=None, bg=BTN_BG, fg=BTN_FG,
active_bg=BTN_ACTIVE, radius=8, width=120, height=26,
canvas_bg=None, **kw):
kw.setdefault("highlightthickness", 0)
if canvas_bg is not None:
kw["bg"] = canvas_bg
super().__init__(parent, width=width, height=height, **kw)
self._command = command
self._bg = bg
self._fg = fg
self._active_bg = active_bg
self._radius = radius
self._text = text
self.bind("<Button-1>", self._on_click)
self.bind("<Enter>", self._on_enter)
self.bind("<Leave>", self._on_leave)
self.bind("<Configure>", lambda e: self._draw())
self._draw()
def _draw(self, bg=None):
self.delete("all")
w = self.winfo_width() or int(self["width"])
h = self.winfo_height() or int(self["height"])
r = self._radius
c = bg or self._bg
self.create_arc(0, 0, 2*r, 2*r, start=90, extent=90, fill=c, outline=c)
self.create_arc(w-2*r, 0, w, 2*r, start=0, extent=90, fill=c, outline=c)
self.create_arc(0, h-2*r, 2*r, h, start=180, extent=90, fill=c, outline=c)
self.create_arc(w-2*r, h-2*r, w, h, start=270, extent=90, fill=c, outline=c)
self.create_rectangle(r, 0, w-r, h, fill=c, outline=c)
self.create_rectangle(0, r, w, h-r, fill=c, outline=c)
self.create_text(w//2, h//2, text=self._text, fill=self._fg,
font=("Segoe UI", 9))
def configure(self, **kw):
if "text" in kw:
self._text = kw.pop("text")
self._draw()
if kw:
super().configure(**kw)
def _on_click(self, e):
if self._command:
self._command()
def _on_enter(self, e):
self._draw(self._active_bg)
def _on_leave(self, e):
self._draw()
class DiktatApp(tk.Tk):
def __init__(self, _as_toplevel_of=None):
if _as_toplevel_of is not None:
tk.Tk.__init__ = lambda *a, **k: None
self._toplevel = tk.Toplevel(_as_toplevel_of)
self._toplevel.title("Audio-Notiz \u2013 nur Transkription")
self._toplevel.configure(bg=BG)
self._toplevel.attributes("-topmost", True)
self._is_embedded = True
self._proxy_win = self._toplevel
for attr in ("title", "configure", "attributes", "geometry", "minsize",
"protocol", "bind", "after", "update_idletasks",
"winfo_screenwidth", "winfo_screenheight", "winfo_width",
"winfo_height", "winfo_x", "winfo_y", "winfo_exists",
"deiconify", "iconify", "withdraw", "lift", "focus_force",
"destroy", "overrideredirect", "wm_attributes",
"winfo_toplevel", "tk"):
if hasattr(self._toplevel, attr) and attr != "destroy":
try:
setattr(self, attr, getattr(self._toplevel, attr))
except (AttributeError, TypeError):
pass
self._toplevel.protocol("WM_DELETE_WINDOW", self._safe_destroy)
self._init_app()
return
super().__init__()
self._is_embedded = False
self._proxy_win = self
self.title("Audio-Notiz \u2013 nur Transkription")
self.configure(bg=BG)
self.attributes("-topmost", True)
self._init_app()
def _safe_destroy(self):
try:
if self._is_recording:
self._is_recording = False
self._voice_cmd_active = False
if self._recorder:
try:
self._recorder.stop_and_save_wav()
except Exception:
pass
self._recorder = None
except Exception:
pass
try:
if hasattr(self, "_toplevel") and self._toplevel.winfo_exists():
self._toplevel.destroy()
except Exception:
pass
def _init_app(self):
self._logo_photo = None
self._logo_photo_small = None
self._load_logo()
self._settings = _load_settings()
saved_geom = self._settings.get("geometry", "300x290")
self.geometry(saved_geom)
self.minsize(300, 280)
if "+" not in saved_geom:
self.update_idletasks()
sw = self.winfo_screenwidth()
sh = self.winfo_screenheight()
self.geometry(f"+{(sw - 300) // 2}+{(sh - 290) // 2}")
api_key = None
try:
from openai_runtime_config import get_openai_api_key
api_key = get_openai_api_key()
except Exception:
pass
if not api_key:
api_key = os.getenv("OPENAI_API_KEY", "").strip()
base_url = os.getenv("OPENAI_BASE_URL", "").strip()
self._client = None
if OpenAI and api_key:
client_kwargs = dict(api_key=api_key, timeout=60.0, max_retries=2)
if base_url:
client_kwargs["base_url"] = base_url
self._client = OpenAI(**client_kwargs)
self._recorder = None
self._is_recording = False
self._minimized = False
self._geom_before = None
self._restoring = False
self._font_size = self._settings.get("font_size", 8)
self._build_ui()
try:
from aza_global_paste import start_global_right_click_paste_listener
start_global_right_click_paste_listener()
except Exception:
pass
self._save_geom_after_id = None
self.bind("<Configure>", self._on_configure)
if _IMPORT_ERRORS:
self.after(500, lambda: messagebox.showerror(
"Start-Fehler",
"Audio-Notiz konnte nicht vollstaendig starten:\n- "
+ "\n- ".join(_IMPORT_ERRORS)
+ "\n\nTipps:\n"
"1) Starte die App im Projektordner.\n"
"2) Installiere fehlende Pakete mit pip.",
parent=self))
return
if not self._client:
self.after(500, lambda: messagebox.showerror(
"API-Key fehlt",
"OPENAI_API_KEY ist nicht gesetzt.\n\n"
"Lege eine '.env' Datei an:\nOPENAI_API_KEY=sk-...",
parent=self))
def _load_logo(self):
try:
if os.path.exists(_LOGO_PATH):
from PIL import Image, ImageTk
img = Image.open(_LOGO_PATH)
self._logo_photo = ImageTk.PhotoImage(img.resize((24, 24), Image.Resampling.LANCZOS))
self._logo_photo_small = ImageTk.PhotoImage(img.resize((18, 18), Image.Resampling.LANCZOS))
img_icon = img.resize((57, 57), Image.Resampling.LANCZOS)
import tempfile
tmp = tempfile.NamedTemporaryFile(suffix=".ico", delete=False)
img_icon.save(tmp.name, format="ICO")
tmp.close()
self.iconbitmap(tmp.name)
try:
os.unlink(tmp.name)
except Exception:
pass
except Exception:
pass
# ── Drag (Fenster verschieben via Header/Logo) ──
def _drag_start(self, e):
self._drag_x = e.x_root - self.winfo_x()
self._drag_y = e.y_root - self.winfo_y()
def _drag_move(self, e):
x = e.x_root - self._drag_x
y = e.y_root - self._drag_y
self.geometry(f"+{x}+{y}")
def _make_draggable(self, widget):
widget.configure(cursor="fleur")
widget.bind("<Button-1>", self._drag_start)
widget.bind("<B1-Motion>", self._drag_move)
# ── UI ──
def _build_ui(self):
self._drag_x = 0
self._drag_y = 0
# Header
self._header = tk.Frame(self, bg=BG)
self._header.pack(fill="x")
self._logo_lbl = None
if self._logo_photo:
self._logo_lbl = tk.Label(self._header, image=self._logo_photo, bg=BG)
self._logo_lbl.pack(side="left", padx=(8, 0), pady=4)
self._make_draggable(self._logo_lbl)
self._title_lbl = tk.Label(self._header, text="Audio-Notiz", font=("Segoe UI", 12, "bold"),
bg=BG, fg=HDR_FG)
self._title_lbl.pack(side="left", padx=(4, 0), pady=6)
self._make_draggable(self._title_lbl)
self._make_draggable(self._header)
# Minimize button
self._btn_minimize = tk.Label(self._header, text="\u2014", font=("Segoe UI", 12, "bold"),
bg=BG, fg=MINI_FG, cursor="hand2", padx=6)
self._btn_minimize.pack(side="right", padx=(0, 8))
self._btn_minimize.bind("<Button-1>", lambda e: self._toggle_minimize())
self._btn_minimize.bind("<Enter>", lambda e: self._btn_minimize.configure(fg=MINI_FG_HOVER))
self._btn_minimize.bind("<Leave>", lambda e: self._btn_minimize.configure(fg=MINI_FG))
# Recording indicator (red dot, hidden initially)
self._rec_dot = tk.Label(self._header, text="", font=("Segoe UI", 8),
bg=BG, fg=REC_DOT)
self._rec_blink_id = None
# Mini controls (hidden initially)
self._mini_frame = None
self._mini_status_bar = None
# Main content
self._main_f = tk.Frame(self, bg=BG, padx=12, pady=12)
self._main_f.pack(fill="both", expand=True)
# Label + Schriftgroessen-Steuerung
label_frame = tk.Frame(self._main_f, bg=BG)
label_frame.pack(fill="x", anchor="w")
tk.Label(label_frame, text="Audio-Notiz (nur Transkription):", bg=BG, fg=HDR_FG,
font=("Segoe UI", 9)).pack(side="left")
# Schriftgroesse Aa ▲▼ (wie Hauptfenster)
ctrl = tk.Frame(label_frame, bg=BG)
ctrl.pack(side="right", padx=4)
tk.Label(ctrl, text="Aa", font=("Segoe UI", 8), bg=BG, fg=MINI_FG).pack(side="left", padx=(0, 1))
self._size_lbl = tk.Label(ctrl, text=str(self._font_size), font=("Segoe UI", 8),
bg=BG, fg=MINI_FG, width=2, anchor="center")
self._size_lbl.pack(side="left")
btn_up = tk.Label(ctrl, text="\u25B2", font=("Segoe UI", 7), bg=BG, fg=MINI_FG, cursor="hand2")
btn_up.pack(side="left", padx=1)
btn_up.bind("<Button-1>", lambda e: self._change_font(1))
btn_up.bind("<Enter>", lambda e: btn_up.configure(fg=MINI_FG_HOVER))
btn_up.bind("<Leave>", lambda e: btn_up.configure(fg=MINI_FG))
btn_dn = tk.Label(ctrl, text="\u25BC", font=("Segoe UI", 7), bg=BG, fg=MINI_FG, cursor="hand2")
btn_dn.pack(side="left", padx=1)
btn_dn.bind("<Button-1>", lambda e: self._change_font(-1))
btn_dn.bind("<Enter>", lambda e: btn_dn.configure(fg=MINI_FG_HOVER))
btn_dn.bind("<Leave>", lambda e: btn_dn.configure(fg=MINI_FG))
# Textfeld
self._txt = ScrolledText(self._main_f, wrap="word",
font=("Segoe UI", self._font_size),
bg=TXT_BG, height=8)
self._txt.pack(fill="both", expand=True, pady=(4, 4))
# Statusleiste
self._status_var = tk.StringVar(value="Modus: Medizinische Audio-Notiz aktiv")
status_bar = tk.Frame(self._main_f, bg=STATUS_BG, height=24, padx=8, pady=4)
status_bar.pack(fill="x", pady=(4, 0))
status_bar.pack_propagate(False)
tk.Label(status_bar, textvariable=self._status_var, fg=STATUS_FG, bg=STATUS_BG,
font=("Segoe UI", 8), anchor="w").pack(side="left", fill="x", expand=True)
# Autocopy-Checkbox
self._autocopy_var = tk.BooleanVar(value=self._is_autocopy_enabled())
cb_row = tk.Frame(self._main_f, bg=BG)
cb_row.pack(fill="x", pady=(2, 0))
cb_autocopy = ttk.Checkbutton(
cb_row, text="Autocopy nach Transkription",
variable=self._autocopy_var, command=self._save_autocopy_pref,
)
cb_autocopy.pack(side="left")
self._rclick_paste_var = tk.BooleanVar(value=self._is_rclick_paste_enabled())
cb_rclick = ttk.Checkbutton(
cb_row, text="Rechtsklick = Einfügen",
variable=self._rclick_paste_var, command=self._save_rclick_pref,
)
cb_rclick.pack(side="left", padx=(12, 0))
# Buttons: Icon-basiert (▶ Start / ■ Stop als Toggle, Neu, Kopieren)
btn_row = tk.Frame(self._main_f, bg=BG)
btn_row.pack(fill="x", pady=(4, 0))
self._btn_record_toggle = RoundedButton(btn_row, "\u25b6 Start",
command=self._toggle_recording,
width=80, height=26, canvas_bg=BG)
self._btn_record_toggle.pack(side="left")
RoundedButton(btn_row, "Neu", command=self._do_neu,
width=60, height=26, canvas_bg=BG).pack(side="left", padx=(6, 0))
RoundedButton(btn_row, "Kopieren", command=self._do_kopieren,
width=80, height=26, canvas_bg=BG).pack(side="left", padx=(6, 0))
RoundedButton(btn_row, "Verbindung testen", command=self._do_test_connection,
width=130, height=26, canvas_bg=BG).pack(side="left", padx=(6, 0))
# ── Schriftgroesse ──
def _change_font(self, delta):
self._font_size = max(5, min(20, self._font_size + delta))
self._txt.configure(font=("Segoe UI", self._font_size))
self._size_lbl.configure(text=str(self._font_size))
self._settings["font_size"] = self._font_size
_save_settings(self._settings)
# ── Recording-Indikator (roter blinkender Punkt) ──
def _start_rec_blink(self):
self._rec_dot.pack(side="left", padx=(2, 0))
self._blink_visible = True
self._do_blink()
def _do_blink(self):
if not self._is_recording:
return
self._blink_visible = not self._blink_visible
self._rec_dot.configure(text="\u25cf" if self._blink_visible else "",
fg=REC_DOT)
self._rec_blink_id = self.after(500, self._do_blink)
def _stop_rec_blink(self):
if self._rec_blink_id:
self.after_cancel(self._rec_blink_id)
self._rec_blink_id = None
self._rec_dot.pack_forget()
def _update_btn_states(self):
if self._is_recording:
self._btn_record_toggle.configure(text="\u25a0 Stop")
self._btn_record_toggle._bg = REC_RED
self._btn_record_toggle._fg = "white"
self._btn_record_toggle._active_bg = "#B03030"
self._btn_record_toggle._draw()
else:
self._btn_record_toggle.configure(text="\u25b6 Start")
self._btn_record_toggle._bg = BTN_BG
self._btn_record_toggle._fg = BTN_FG
self._btn_record_toggle._active_bg = BTN_ACTIVE
self._btn_record_toggle._draw()
# ── Geometrie speichern ──
def _on_configure(self, e):
if e.widget is not self:
return
if self._minimized and not self._restoring and e.height > 110:
self._restore()
if not self._minimized:
if self._save_geom_after_id:
self.after_cancel(self._save_geom_after_id)
self._save_geom_after_id = self.after(400, self._save_geom)
def _save_geom(self):
try:
self._settings["geometry"] = self.geometry()
_save_settings(self._settings)
except Exception:
pass
def _is_autocopy_enabled(self) -> bool:
try:
from aza_persistence import is_autocopy_after_diktat_enabled
return is_autocopy_after_diktat_enabled()
except Exception:
return True
def _save_autocopy_pref(self):
try:
from aza_persistence import save_autocopy_prefs
save_autocopy_prefs(autocopy=self._autocopy_var.get())
except Exception:
pass
def _is_rclick_paste_enabled(self) -> bool:
try:
from aza_persistence import is_global_right_click_paste_enabled
return is_global_right_click_paste_enabled()
except Exception:
return True
def _save_rclick_pref(self):
try:
from aza_persistence import save_autocopy_prefs
save_autocopy_prefs(global_right_click=self._rclick_paste_var.get())
except Exception:
pass
# ── Minimierung ──
def _toggle_minimize(self):
if self._minimized:
self._restore()
if self._geom_before:
try:
self.geometry(self._geom_before)
except Exception:
pass
else:
self._minimize()
def _minimize(self):
self._geom_before = self.geometry()
self._main_f.pack_forget()
self._btn_minimize.configure(text="\u25a1")
self._minimized = True
self.minsize(100, 78)
self.geometry("100x78")
if not self._mini_frame:
self._mini_frame = tk.Frame(self, bg=BG)
# ▶ / ■ Toggle
self._m_toggle = tk.Label(self._mini_frame, text="\u25b6", font=("Segoe UI", 12, "bold"),
bg=BG, fg=MINI_FG, cursor="hand2")
self._m_toggle.pack(side="left", padx=(4, 0))
self._m_toggle.bind("<Button-1>", lambda e: self._toggle_recording())
# ● Recording dot
self._m_dot = tk.Label(self._mini_frame, text="", font=("Segoe UI", 8),
bg=BG, fg=REC_DOT)
self._m_dot.pack(side="left", padx=(2, 0))
# Neu
self._m_neu = tk.Label(self._mini_frame, text="Neu", font=("Segoe UI", 7),
bg=BG, fg=MINI_FG, cursor="hand2")
self._m_neu.pack(side="left", padx=(4, 0))
self._m_neu.bind("<Button-1>", lambda e: self._do_neu())
self._m_neu.bind("<Enter>", lambda e: self._m_neu.configure(fg=MINI_FG_HOVER))
self._m_neu.bind("<Leave>", lambda e: self._m_neu.configure(fg=MINI_FG))
self._update_mini_states()
self._mini_frame.pack(fill="x", padx=2)
if not self._mini_status_bar:
self._mini_status_bar = tk.Frame(self, bg=STATUS_BG, height=16, padx=4, pady=1)
tk.Label(self._mini_status_bar, textvariable=self._status_var,
fg=STATUS_FG, bg=STATUS_BG,
font=("Segoe UI", 6), anchor="w").pack(side="left", fill="x", expand=True)
self._mini_status_bar.pack(fill="x")
def _update_mini_states(self):
if not self._mini_frame:
return
if self._is_recording:
self._m_toggle.configure(text="\u25a0", fg=REC_RED)
self._m_dot.configure(text="\u25cf")
else:
self._m_toggle.configure(text="\u25b6", fg=MINI_FG)
self._m_dot.configure(text="")
def _restore(self):
if not self._minimized:
return
self._restoring = True
if self._mini_frame:
self._mini_frame.pack_forget()
if self._mini_status_bar:
self._mini_status_bar.pack_forget()
self._main_f.pack(fill="both", expand=True)
self._btn_minimize.configure(text="\u2014")
self._minimized = False
self.minsize(300, 280)
self.after(200, self._finish_restore)
def _finish_restore(self):
self._restoring = False
def _toggle_recording(self):
if self._is_recording:
self._do_stop()
else:
self._do_start()
def _do_test_connection(self):
if _IMPORT_ERRORS:
messagebox.showerror(
"Start-Fehler",
"Fehlende Komponenten:\n- " + "\n- ".join(_IMPORT_ERRORS),
parent=self
)
return
if not self._client:
messagebox.showerror(
"API-Key fehlt",
"OPENAI_API_KEY ist nicht gesetzt.",
parent=self
)
return
if self._is_recording:
self._status_var.set("Verbindungstest waehrend Aufnahme nicht verfuegbar.")
return
self._status_var.set("Teste Verbindung zu OpenAI…")
def worker():
try:
# Kleiner API-Call ohne Audio-Upload fuer schnellen Verbindungscheck.
self._client.models.list()
self.after(0, lambda: self._status_var.set("Verbindung OK."))
self.after(0, lambda: messagebox.showinfo(
"Verbindungstest",
"Verbindung zu OpenAI ist in Ordnung.",
parent=self
))
except Exception as e:
msg = _friendly_error_message(e)
self.after(0, lambda m=msg: self._status_var.set(f"Fehler: {m.splitlines()[0][:80]}"))
self.after(0, lambda m=msg: messagebox.showerror("Verbindungstest", m, parent=self))
threading.Thread(target=worker, daemon=True).start()
# ── Sprachbefehl "Stop Diktat" ──
def _transcribe_cmd(self, wav_path: str) -> str:
"""Kurze Transkription mit einfachem Prompt fuer Befehlserkennung."""
with open(wav_path, "rb") as f:
is_gpt = "gpt-" in TRANSCRIBE_MODEL
params = dict(model=TRANSCRIBE_MODEL, file=f, language="de")
params["prompt"] = _CMD_PROMPT
if not is_gpt:
params["temperature"] = 0.0
resp = self._client.audio.transcriptions.create(**params)
return getattr(resp, "text", "") or ""
def _start_voice_cmd_monitor(self):
self._voice_cmd_active = True
def _monitor():
import numpy as np
import time as _t
last_check = 0.0
print("[VoiceCmd] Monitor gestartet", file=sys.stderr)
while self._voice_cmd_active and self._is_recording:
_t.sleep(0.3)
if not self._voice_cmd_active or not self._is_recording:
break
rec = self._recorder
if not rec or not rec._frames:
continue
sr = rec.samplerate
try:
frames_snap = list(rec._frames[-300:])
except Exception:
continue
if len(frames_snap) < 5:
continue
try:
audio = np.concatenate(frames_snap, axis=0).flatten()
except Exception:
continue
silence_n = int(_SILENCE_SEC * sr)
if len(audio) < silence_n + int(0.5 * sr):
continue
tail = audio[-silence_n:]
tail_rms = float(np.sqrt(np.mean(tail ** 2)))
if tail_rms >= _SILENCE_RMS:
continue
now = _t.time()
if now - last_check < _CMD_COOLDOWN_SEC:
continue
last_check = now
speech_end = len(audio) - silence_n
speech_start = max(0, speech_end - int(_CMD_SPEECH_SEC * sr))
speech = audio[speech_start:speech_end]
speech_rms = float(np.sqrt(np.mean(speech ** 2)))
if speech_rms < _SILENCE_RMS:
continue
print(f"[VoiceCmd] Stille erkannt (RMS={tail_rms:.4f}), "
f"pruefe Sprache (RMS={speech_rms:.4f}, "
f"{len(speech)/sr:.1f}s)", file=sys.stderr)
try:
pcm = (np.clip(speech, -1.0, 1.0) * 32767).astype(np.int16)
fd, tmp = tempfile.mkstemp(suffix=".wav", prefix="vcmd_")
os.close(fd)
with wave.open(tmp, "wb") as wf:
wf.setnchannels(1)
wf.setsampwidth(2)
wf.setframerate(sr)
wf.writeframes(pcm.tobytes())
text = self._transcribe_cmd(tmp)
try:
os.remove(tmp)
except Exception:
pass
print(f"[VoiceCmd] Erkannt: '{text}'", file=sys.stderr)
if text:
cmd = _detect_voice_command(text)
if cmd == "close":
print("[VoiceCmd] >>> DIKTAT SCHLIESSEN erkannt! <<<",
file=sys.stderr)
self._voice_cmd_active = False
self.after(0, lambda: self._status_var.set(
"Sprachbefehl erkannt: Schliessen\u2026"))
self.after(100, self._do_close_from_voice)
return
if cmd == "stop":
print("[VoiceCmd] >>> STOP DIKTAT erkannt! <<<",
file=sys.stderr)
self._voice_cmd_active = False
self.after(0, lambda: self._status_var.set(
"Sprachbefehl erkannt: Stop\u2026"))
self.after(100, self._do_stop)
return
except Exception as exc:
print(f"[VoiceCmd] Fehler: {exc}", file=sys.stderr)
print("[VoiceCmd] Monitor beendet", file=sys.stderr)
threading.Thread(target=_monitor, daemon=True).start()
def _stop_voice_cmd_monitor(self):
self._voice_cmd_active = False
# ── Aufnahme ──
def _do_start(self):
if self._is_recording:
return
if _IMPORT_ERRORS:
messagebox.showerror("Start-Fehler",
"Fehlende Komponenten:\n- " + "\n- ".join(_IMPORT_ERRORS),
parent=self)
return
if not self._client:
messagebox.showerror("API-Key fehlt",
"OPENAI_API_KEY ist nicht gesetzt.", parent=self)
return
self._start_recording()
def _do_stop(self, close_after=False):
if not self._is_recording:
return
self._stop_and_transcribe(close_after=close_after)
def _do_close_from_voice(self):
if self._is_recording:
self._do_stop(close_after=True)
else:
self.destroy()
def _start_recording(self):
if AudioRecorder is None:
messagebox.showerror("Start-Fehler",
"AudioRecorder ist nicht verfuegbar (aza_audio.py fehlt).",
parent=self)
return
if not self._recorder:
self._recorder = AudioRecorder()
try:
self._recorder.start()
self._is_recording = True
self._update_btn_states()
self._update_mini_states()
self._start_rec_blink()
self._start_voice_cmd_monitor()
self._status_var.set(
"Aufnahme l\u00e4uft\u2026 (\"Diktat stoppen\" oder \"Diktat schliessen\")"
)
except Exception as e:
messagebox.showerror("Aufnahme-Fehler", str(e), parent=self)
self._status_var.set("Bereit.")
def _stop_and_transcribe(self, close_after=False):
self._is_recording = False
self._stop_voice_cmd_monitor()
self._stop_rec_blink()
try:
self._update_btn_states()
self._update_mini_states()
except Exception:
pass
self._status_var.set("Transkribiere\u2026")
rec = self._recorder
self._recorder = None
def worker():
try:
if rec is None:
self.after(0, lambda: self._status_var.set("Kein Recorder aktiv."))
return
wav_path = rec.stop_and_save_wav()
try:
with wave.open(wav_path, "rb") as wf:
duration = wf.getnframes() / float(wf.getframerate())
if duration < 0.3:
if os.path.exists(wav_path):
os.remove(wav_path)
self.after(0, lambda: self._status_var.set("Kein Audio erkannt."))
if close_after:
self.after(200, self.destroy)
return
except Exception:
pass
text = self._transcribe(wav_path)
try:
if os.path.exists(wav_path):
os.remove(wav_path)
except Exception:
pass
if not text or not text.strip():
self.after(0, lambda: self._status_var.set("Kein Text erkannt."))
if close_after:
self.after(200, self.destroy)
return
text = apply_punctuation(text)
text = text.replace("\u00df", "ss")
text = _TRAILING_CMD_RE.sub("", text).rstrip()
if not text or not text.strip():
self.after(0, lambda: self._status_var.set("Kein Text erkannt."))
if close_after:
self.after(200, self.destroy)
return
save_msg = ""
try:
saved_path = _auto_save(text)
save_msg = f" Gespeichert \u2713"
except Exception as save_err:
save_msg = f" Speichern fehlgeschlagen: {str(save_err)[:50]}"
def _insert_and_maybe_close(t=text, sm=save_msg):
self._insert_text(t, sm)
if close_after:
self.after(200, self.destroy)
self.after(0, _insert_and_maybe_close)
except Exception as e:
self.after(0, lambda err=e: self._on_error(err))
threading.Thread(target=worker, daemon=True).start()
def _transcribe(self, wav_path: str) -> str:
with open(wav_path, "rb") as f:
is_gpt = "gpt-" in TRANSCRIBE_MODEL
params = dict(model=TRANSCRIBE_MODEL, file=f, language="de")
params["prompt"] = WHISPER_MEDICAL_PROMPT
if not is_gpt:
params["temperature"] = 0.0
last_err = None
for attempt in range(3):
try:
resp = self._client.audio.transcriptions.create(**params)
break
except Exception as e:
last_err = e
if attempt < 2 and _is_connection_error_text(str(e)):
time.sleep(1.0 + attempt)
continue
raise
if last_err and "resp" not in locals():
raise last_err
text = getattr(resp, "text", "")
if text is None:
text = ""
if text.strip().startswith(WHISPER_PROMPT_PREFIX):
text = ""
return text
def _insert_text(self, text: str, save_msg: str = ""):
self._txt.configure(state="normal")
pos = self._txt.index(tk.INSERT)
self._txt.insert(pos, text)
full = self._txt.get("1.0", "end").strip()
if full and self._autocopy_var.get():
if not _win_clipboard_set(full):
try:
self.clipboard_clear()
self.clipboard_append(_sanitize_markdown_for_plain_text(full))
except Exception:
pass
self._status_var.set(f"Fertig. Kopiert.{save_msg}")
else:
self._status_var.set(f"Fertig.{save_msg}")
def _on_error(self, err):
err_msg = _friendly_error_message(err)
try:
parent_win = getattr(self, "_proxy_win", self)
messagebox.showerror("Fehler", err_msg, parent=parent_win)
except Exception:
messagebox.showerror("Fehler", err_msg)
try:
self._status_var.set(f"Fehler: {err_msg.splitlines()[0][:80]}")
except Exception:
pass
def _do_neu(self):
if self._is_recording:
if not messagebox.askyesno("Aufnahme l\u00e4uft",
"Aktuelle Aufnahme verwerfen und neu starten?",
parent=self):
return
self._is_recording = False
self._stop_rec_blink()
self._update_btn_states()
self._update_mini_states()
try:
wav_path = self._recorder.stop_and_save_wav()
if os.path.exists(wav_path):
os.remove(wav_path)
except Exception:
pass
self._recorder = None
self._txt.configure(state="normal")
self._txt.delete("1.0", "end")
self._status_var.set("Bereit.")
self._do_start()
def _do_kopieren(self):
text = self._txt.get("1.0", "end").strip()
if text:
if not _win_clipboard_set(text):
try:
self.clipboard_clear()
self.clipboard_append(_sanitize_markdown_for_plain_text(text))
except Exception:
pass
self._status_var.set("Audio-Notiz kopiert.")
else:
self._status_var.set("Nichts zum Kopieren.")
if __name__ == "__main__":
app = DiktatApp()
app.mainloop()