update

2026-05-20 00:09:28 +02:00
parent 968bf7d102
commit 51b5ddc6f2
695 changed files with 999722 additions and 270 deletions
--- a/(28)/aza_note_dictation_text.py
+++ b/(28)/aza_note_dictation_text.py
@@ -0,0 +1,116 @@
+# -*- coding: utf-8 -*-
+"""Deterministische Nachbearbeitung fuer Kurz-Diktate (z. B. Empfang-Notizen).
+
+- Keine KI, kein Serverroundtrip, keine Anbindung an die Haupt-KG-/Whisper-Diktatpipeline.
+- Parallele Logik fuer den Browser: ``normalizeNoteDictationText`` /
+  ``normalizeMedicalDictationTerms`` in ``web/empfang.html`` (bei Aenderungen hier
+  bitte JS-Spiegel anpassen).
+
+Wiederverwendung: Desktop oder andere lokale Diktatfelder duerfen diese Funktionen
+importieren, ohne ``basis14``/KG-Code zu beruehren.
+"""
+
+from __future__ import annotations
+
+import re
+from typing import List, Tuple
+
+# Ganze Phrase, durch Whitespace/Wortgrenze begrenzt (kein Ersatz innerhalb Woerter).
+_NOTE_DICTATION_PHRASES: Tuple[Tuple[str, str], ...] = (
+    ("neue zeile bitte", "\n"),
+    ("neuer absatz bitte", "\n\n"),
+    ("neuen absatz bitte", "\n\n"),
+    ("neue zeile", "\n"),
+    ("neuer absatz", "\n\n"),
+    ("neuen absatz", "\n\n"),
+)
+
+# Laenger zuerst, damit Teilstrings nicht zerteilen.
+_MEDICAL_CANONICAL: Tuple[Tuple[str, str], ...] = (
+    ("aktinische keratose", "Aktinische Keratose"),
+    ("seborrhoische keratose", "Seborrhoische Keratose"),
+    ("dermatoskopie", "Dermatoskopie"),
+    ("kryotherapie", "Kryotherapie"),
+    ("onychomykose", "Onychomykose"),
+    ("urtikaria", "Urtikaria"),
+    ("dermatitis", "Dermatitis"),
+    ("histologie", "Histologie"),
+    ("biopsie", "Biopsie"),
+    ("exzision", "Exzision"),
+    ("psoriasis", "Psoriasis"),
+    ("rosazea", "Rosazea"),
+    ("melanom", "Melanom"),
+    ("spinaliom", "Spinaliom"),
+    ("basaliom", "Basaliom"),
+    ("mykose", "Mykose"),
+    ("tinea", "Tinea"),
+    ("ekzem", "Ekzem"),
+    ("naevus", "Nävus"),
+    ("nevus", "Nävus"),
+    ("systemisch", "systemisch"),
+    ("topisch", "topisch"),
+)
+
+
+def _phrase_replace_all(text: str) -> str:
+    s = text
+    for phrase, repl in _NOTE_DICTATION_PHRASES:
+        pat = r"(?<!\S)" + re.escape(phrase) + r"(?!\S)"
+        s = re.sub(pat, repl, s, flags=re.IGNORECASE)
+    return s
+
+
+def _collapse_inline_spaces_segment(seg: str) -> str:
+    return re.sub(r"[ \t]+", " ", seg).strip()
+
+
+def _finalize_whitespace(text: str) -> str:
+    lines = text.split("\n")
+    cleaned = [_collapse_inline_spaces_segment(line) for line in lines]
+    out = "\n".join(cleaned)
+    out = re.sub(r"\n{3,}", "\n\n", out)
+    return out.rstrip()
+
+
+def normalize_note_dictation_text(text: str) -> str:
+    """Ersetzt sichere gesprochene Steuerphrasen (Ganze Phrase, case-insensitive).
+
+    Kein ersetzen von einzelnem «Absatz» (Mehrdeutigkeit). Kein «Punkt»/«Komma».
+    """
+    if text is None:
+        return ""
+    s = str(text).strip()
+    if not s:
+        return ""
+    s = _phrase_replace_all(s)
+    return _finalize_whitespace(s)
+
+
+def _compile_medical_patterns() -> List[Tuple[re.Pattern[str], str]]:
+    out: List[Tuple[re.Pattern[str], str]] = []
+    for raw, canon in _MEDICAL_CANONICAL:
+        pat = r"(?<!\S)" + re.escape(raw) + r"(?!\S)"
+        out.append((re.compile(pat, re.IGNORECASE), canon))
+    return out
+
+
+_MEDICAL_PATTERNS = _compile_medical_patterns()
+
+
+def normalize_medical_dictation_terms(text: str) -> str:
+    """Nur klare, wortumgrenzte Treffer; Canonical-Schreibweise, keine Interpretation."""
+    if text is None:
+        return ""
+    s = str(text)
+    if not s.strip():
+        return s
+    for rx, canon in _MEDICAL_PATTERNS:
+        s = rx.sub(canon, s)
+    return s
+
+
+def normalize_note_dictation_pipeline(text: str) -> str:
+    """Reihenfolge: Steuerbefehle -> medizinische Schreibweise -> Whitespace."""
+    s = normalize_note_dictation_text(text)
+    s = normalize_medical_dictation_terms(s)
+    return _finalize_whitespace(s)