Files
aza/AzA march 2026/workforce_planner/ai/service.py
2026-03-25 22:03:39 +01:00

148 lines
4.4 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# -*- coding: utf-8 -*-
"""
KI-Service zentrale Stelle für alle OpenAI-Aufrufe.
Kein Client hat direkten Zugriff auf den API-Key.
Desktop und Web schicken Requests hierher.
"""
import os
import tempfile
from typing import Optional
from openai import OpenAI
from dotenv import load_dotenv
load_dotenv()
_client: Optional[OpenAI] = None
TRANSCRIBE_MODEL = "whisper-1"
DEFAULT_SUMMARY_MODEL = "gpt-4o"
WHISPER_MEDICAL_PROMPT = (
"Medizinische Konsultation, Arzt-Patient-Gespräch, Schweizerdeutsch und Hochdeutsch. "
"Medizinische Fachbegriffe: Anamnese, Status, Befund, Diagnose, Therapie, Verlauf, "
"Medikation, Dosierung, Labor, Röntgen, MRI, CT, Sonographie, EKG, Spirometrie, "
"ICD-10, SOAP, Krankengeschichte, Kostengutsprache, Arztbrief."
)
DEFAULT_SYSTEM_PROMPT = (
"Du bist ein medizinischer Dokumentationsassistent. "
"Erstelle aus dem Transkript eine strukturierte Krankengeschichte im SOAP-Format. "
"Verwende medizinische Fachterminologie. Sprache: Deutsch."
)
def _get_client() -> OpenAI:
global _client
if _client is None:
api_key = os.getenv("OPENAI_API_KEY", "").strip()
if not api_key:
raise RuntimeError("OPENAI_API_KEY nicht gesetzt")
_client = OpenAI(api_key=api_key)
return _client
def transcribe_audio(wav_bytes: bytes, language: str = "de") -> dict:
"""WAV-Bytes transkribieren → {"text": "...", "tokens_estimated": int}"""
client = _get_client()
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
tmp.write(wav_bytes)
tmp_path = tmp.name
try:
with open(tmp_path, "rb") as f:
resp = client.audio.transcriptions.create(
model=TRANSCRIBE_MODEL,
file=f,
language=language,
prompt=WHISPER_MEDICAL_PROMPT,
)
text = getattr(resp, "text", "") or ""
if text.strip().startswith("Medizinische Dokumentation auf Deutsch"):
text = ""
tokens_est = len(text) // 4
return {"text": text, "tokens_estimated": tokens_est}
finally:
try:
os.unlink(tmp_path)
except OSError:
pass
def summarize_transcript(
transcript: str,
system_prompt: str = "",
model: str = DEFAULT_SUMMARY_MODEL,
) -> dict:
"""Transkript → Krankengeschichte (SOAP)."""
client = _get_client()
sys_prompt = system_prompt or DEFAULT_SYSTEM_PROMPT
resp = client.chat.completions.create(
model=model,
messages=[
{"role": "system", "content": sys_prompt},
{"role": "user", "content": f"TRANSKRIPT:\n{transcript}"},
],
)
content = resp.choices[0].message.content or ""
total_tokens = 0
if hasattr(resp, "usage") and resp.usage:
total_tokens = getattr(resp.usage, "total_tokens", 0)
return {"text": content, "tokens_used": total_tokens, "model": model}
def merge_kg(
existing_kg: str,
full_transcript: str,
system_prompt: str = "",
model: str = DEFAULT_SUMMARY_MODEL,
) -> dict:
"""Bestehende KG mit neuem Transkript zusammenführen."""
client = _get_client()
sys_prompt = system_prompt or DEFAULT_SYSTEM_PROMPT
user_text = (
f"BESTEHENDE KRANKENGESCHICHTE:\n{existing_kg}\n\n"
f"VOLLSTÄNDIGES TRANSKRIPT (bisher + Ergänzung):\n{full_transcript}\n\n"
"Aktualisiere die KG: neue Informationen aus dem Transkript in die "
"passenden Abschnitte einfügen, gleiche Überschriften beibehalten."
)
resp = client.chat.completions.create(
model=model,
messages=[
{"role": "system", "content": sys_prompt},
{"role": "user", "content": user_text},
],
)
content = resp.choices[0].message.content or ""
total_tokens = 0
if hasattr(resp, "usage") and resp.usage:
total_tokens = getattr(resp.usage, "total_tokens", 0)
return {"text": content, "tokens_used": total_tokens, "model": model}
def chat_completion(
messages: list[dict],
model: str = DEFAULT_SUMMARY_MODEL,
) -> dict:
"""Generischer Chat-Completion Aufruf."""
client = _get_client()
resp = client.chat.completions.create(model=model, messages=messages)
content = resp.choices[0].message.content or ""
total_tokens = 0
if hasattr(resp, "usage") and resp.usage:
total_tokens = getattr(resp.usage, "total_tokens", 0)
return {"text": content, "tokens_used": total_tokens, "model": model}