update
This commit is contained in:
116
AzA march 2026 - Kopie (28)/aza_note_dictation_text.py
Normal file
116
AzA march 2026 - Kopie (28)/aza_note_dictation_text.py
Normal file
@@ -0,0 +1,116 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""Deterministische Nachbearbeitung fuer Kurz-Diktate (z. B. Empfang-Notizen).
|
||||
|
||||
- Keine KI, kein Serverroundtrip, keine Anbindung an die Haupt-KG-/Whisper-Diktatpipeline.
|
||||
- Parallele Logik fuer den Browser: ``normalizeNoteDictationText`` /
|
||||
``normalizeMedicalDictationTerms`` in ``web/empfang.html`` (bei Aenderungen hier
|
||||
bitte JS-Spiegel anpassen).
|
||||
|
||||
Wiederverwendung: Desktop oder andere lokale Diktatfelder duerfen diese Funktionen
|
||||
importieren, ohne ``basis14``/KG-Code zu beruehren.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from typing import List, Tuple
|
||||
|
||||
# Ganze Phrase, durch Whitespace/Wortgrenze begrenzt (kein Ersatz innerhalb Woerter).
|
||||
_NOTE_DICTATION_PHRASES: Tuple[Tuple[str, str], ...] = (
|
||||
("neue zeile bitte", "\n"),
|
||||
("neuer absatz bitte", "\n\n"),
|
||||
("neuen absatz bitte", "\n\n"),
|
||||
("neue zeile", "\n"),
|
||||
("neuer absatz", "\n\n"),
|
||||
("neuen absatz", "\n\n"),
|
||||
)
|
||||
|
||||
# Laenger zuerst, damit Teilstrings nicht zerteilen.
|
||||
_MEDICAL_CANONICAL: Tuple[Tuple[str, str], ...] = (
|
||||
("aktinische keratose", "Aktinische Keratose"),
|
||||
("seborrhoische keratose", "Seborrhoische Keratose"),
|
||||
("dermatoskopie", "Dermatoskopie"),
|
||||
("kryotherapie", "Kryotherapie"),
|
||||
("onychomykose", "Onychomykose"),
|
||||
("urtikaria", "Urtikaria"),
|
||||
("dermatitis", "Dermatitis"),
|
||||
("histologie", "Histologie"),
|
||||
("biopsie", "Biopsie"),
|
||||
("exzision", "Exzision"),
|
||||
("psoriasis", "Psoriasis"),
|
||||
("rosazea", "Rosazea"),
|
||||
("melanom", "Melanom"),
|
||||
("spinaliom", "Spinaliom"),
|
||||
("basaliom", "Basaliom"),
|
||||
("mykose", "Mykose"),
|
||||
("tinea", "Tinea"),
|
||||
("ekzem", "Ekzem"),
|
||||
("naevus", "Nävus"),
|
||||
("nevus", "Nävus"),
|
||||
("systemisch", "systemisch"),
|
||||
("topisch", "topisch"),
|
||||
)
|
||||
|
||||
|
||||
def _phrase_replace_all(text: str) -> str:
|
||||
s = text
|
||||
for phrase, repl in _NOTE_DICTATION_PHRASES:
|
||||
pat = r"(?<!\S)" + re.escape(phrase) + r"(?!\S)"
|
||||
s = re.sub(pat, repl, s, flags=re.IGNORECASE)
|
||||
return s
|
||||
|
||||
|
||||
def _collapse_inline_spaces_segment(seg: str) -> str:
|
||||
return re.sub(r"[ \t]+", " ", seg).strip()
|
||||
|
||||
|
||||
def _finalize_whitespace(text: str) -> str:
|
||||
lines = text.split("\n")
|
||||
cleaned = [_collapse_inline_spaces_segment(line) for line in lines]
|
||||
out = "\n".join(cleaned)
|
||||
out = re.sub(r"\n{3,}", "\n\n", out)
|
||||
return out.rstrip()
|
||||
|
||||
|
||||
def normalize_note_dictation_text(text: str) -> str:
|
||||
"""Ersetzt sichere gesprochene Steuerphrasen (Ganze Phrase, case-insensitive).
|
||||
|
||||
Kein ersetzen von einzelnem «Absatz» (Mehrdeutigkeit). Kein «Punkt»/«Komma».
|
||||
"""
|
||||
if text is None:
|
||||
return ""
|
||||
s = str(text).strip()
|
||||
if not s:
|
||||
return ""
|
||||
s = _phrase_replace_all(s)
|
||||
return _finalize_whitespace(s)
|
||||
|
||||
|
||||
def _compile_medical_patterns() -> List[Tuple[re.Pattern[str], str]]:
|
||||
out: List[Tuple[re.Pattern[str], str]] = []
|
||||
for raw, canon in _MEDICAL_CANONICAL:
|
||||
pat = r"(?<!\S)" + re.escape(raw) + r"(?!\S)"
|
||||
out.append((re.compile(pat, re.IGNORECASE), canon))
|
||||
return out
|
||||
|
||||
|
||||
_MEDICAL_PATTERNS = _compile_medical_patterns()
|
||||
|
||||
|
||||
def normalize_medical_dictation_terms(text: str) -> str:
|
||||
"""Nur klare, wortumgrenzte Treffer; Canonical-Schreibweise, keine Interpretation."""
|
||||
if text is None:
|
||||
return ""
|
||||
s = str(text)
|
||||
if not s.strip():
|
||||
return s
|
||||
for rx, canon in _MEDICAL_PATTERNS:
|
||||
s = rx.sub(canon, s)
|
||||
return s
|
||||
|
||||
|
||||
def normalize_note_dictation_pipeline(text: str) -> str:
|
||||
"""Reihenfolge: Steuerbefehle -> medizinische Schreibweise -> Whitespace."""
|
||||
s = normalize_note_dictation_text(text)
|
||||
s = normalize_medical_dictation_terms(s)
|
||||
return _finalize_whitespace(s)
|
||||
Reference in New Issue
Block a user