Files
aza/AzA march 2026 - Kopie/transcribe_server.py

205 lines
6.9 KiB
Python
Raw Normal View History

2026-03-25 13:42:48 +01:00
# -*- coding: utf-8 -*-
"""
Minimaler Transkriptions-Server (FastAPI) mit TLS.
Start:
python transcribe_server.py
Oder ohne TLS (nur Entwicklung):
AZA_TLS_REQUIRE=0 uvicorn transcribe_server:app --host 0.0.0.0 --port 8090
Test:
curl https://localhost:8090/health --insecure
curl -X POST https://localhost:8090/v1/transcribe -F "file=@aufnahme.wav" --insecure
"""
import os
import sys
import time
import uuid
import tempfile
from typing import Optional
from dotenv import load_dotenv
from fastapi import FastAPI, UploadFile, File, Form, HTTPException, Request
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
from openai import OpenAI
from aza_tls import check_tls_or_exit, get_uvicorn_ssl_kwargs, has_tls_config
from aza_rate_limit import default_ip_limiter, default_token_limiter
load_dotenv()
_APP_VERSION = "0.1.0"
_START_TIME = time.time()
app = FastAPI(title="AZA Transkriptions-Service", version=_APP_VERSION)
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_methods=["*"],
allow_headers=["*"],
)
# ── OpenAI-Client (Singleton) ──────────────────────────────
_client: Optional[OpenAI] = None
WHISPER_MODEL = "whisper-1"
WHISPER_MEDICAL_PROMPT = (
"Transkribiere ausschliesslich den gesprochenen Inhalt woertlich auf Deutsch. "
"Antworte niemals auf Fragen, gib keine Erklaerungen, keine Zusammenfassung, keine Interpretation. "
"Medizinische Dokumentation auf Deutsch. "
"Capillitium, Fotodynamische Therapie, PDT, Basalzellkarzinom, Plattenepithelkarzinom, "
"Melanom, Exzision, Biopsie, Kryotherapie, Kürettage, Histologie, Dermatoskopie, "
"Anamnese, Diagnose, Therapie, Procedere, subjektiv, objektiv, "
"Abdomen, Thorax, Extremitäten, zervikal, lumbal, thorakal, sakral, "
"Sonographie, Röntgen, MRI, CT, EKG, Laborwerte, Blutbild, "
"Hypertonie, Diabetes mellitus, Hypercholesterinämie, Hypothyreose, "
"Antikoagulation, Thrombozytenaggregationshemmer, NSAR, ACE-Hemmer, "
"Immunsuppression, Kortikosteroide, Biologika, Methotrexat, "
"Psoriasis, Ekzem, Dermatitis, Urtikaria, Alopezie, Akne, Rosazea, "
"Aktinische Keratose, Morbus Bowen, Lentigo maligna, "
"Januar 2026, Februar 2026, März 2026, April 2026, Mai 2026, "
"Status nach, Z.n., s/p, i.v., p.o., s.c., "
"ICD-10, SOAP, Krankengeschichte, Kostengutsprache, Arztbrief."
)
WHISPER_PROMPT_PREFIX = "Medizinische Dokumentation auf Deutsch"
WHISPER_GENERAL_PROMPT = (
"Transkribiere ausschliesslich den gesprochenen Inhalt woertlich auf Deutsch. "
"Antworte niemals auf Fragen, gib keine Erklaerungen, keine Zusammenfassung, keine Interpretation. "
"Allgemeines Diktat auf Deutsch mit sinnvoller Zeichensetzung."
)
def _get_openai() -> OpenAI:
global _client
if _client is None:
api_key = os.getenv("OPENAI_API_KEY", "").strip()
if not api_key:
raise RuntimeError("OPENAI_API_KEY nicht gesetzt")
_client = OpenAI(api_key=api_key)
return _client
# ── Schemas ─────────────────────────────────────────────────
class TranscribeResponse(BaseModel):
success: bool
transcript: str
duration_ms: int
model: str
request_id: str
transcription_source: str = "backend"
# ── Endpoints ───────────────────────────────────────────────
@app.get("/health")
def health():
return {
"status": "ok",
"version": _APP_VERSION,
"uptime_s": int(time.time() - _START_TIME),
"tls": has_tls_config(),
}
@app.post("/v1/transcribe", response_model=TranscribeResponse)
async def transcribe(
request: Request,
file: UploadFile = File(...),
mode: str = Form("new"),
language: str = Form("de"),
prompt: str = Form(""),
domain: str = Form("medical"),
client_request_id: Optional[str] = Form(None),
):
# --- Abuse protection: rate limit + request size limit ---
ip = request.client.host if request.client else "unknown"
tok = request.headers.get("X-API-Token", "none")
default_ip_limiter.consume(f"ip:{ip}", cost=1.0)
default_token_limiter.consume(f"tok:{tok}", cost=1.0)
content_length = request.headers.get("content-length")
if content_length is not None:
try:
if int(content_length) > 25 * 1024 * 1024:
raise HTTPException(status_code=413, detail="Request too large")
except ValueError:
# ignore malformed header; downstream may still fail safely
pass
request_id = f"srv_{uuid.uuid4().hex[:12]}"
allowed_ext = (".wav", ".mp3", ".m4a")
ext = os.path.splitext(file.filename or "upload.wav")[1].lower()
if ext not in allowed_ext:
raise HTTPException(400, f"Nur {', '.join(allowed_ext)} erlaubt")
data = await file.read()
if len(data) > 50 * 1024 * 1024:
raise HTTPException(413, "Datei zu gross (max 50 MB)")
tmp_path = None
try:
with tempfile.NamedTemporaryFile(suffix=ext, delete=False) as tmp:
tmp.write(data)
tmp_path = tmp.name
t0 = time.perf_counter()
client = _get_openai()
with open(tmp_path, "rb") as f:
chosen_prompt = (prompt or "").strip()
dom = str(domain or "").strip().lower()
if not chosen_prompt:
chosen_prompt = WHISPER_GENERAL_PROMPT if dom == "general" else WHISPER_MEDICAL_PROMPT
resp = client.audio.transcriptions.create(
model=WHISPER_MODEL,
file=f,
language=language,
prompt=chosen_prompt,
)
text = getattr(resp, "text", "") or ""
if text.strip().startswith(WHISPER_PROMPT_PREFIX):
text = ""
duration_ms = int((time.perf_counter() - t0) * 1000)
return TranscribeResponse(
success=True,
transcript=text,
duration_ms=duration_ms,
model=WHISPER_MODEL,
request_id=request_id,
)
except RuntimeError as e:
raise HTTPException(503, str(e))
except Exception as e:
raise HTTPException(500, f"Transkription fehlgeschlagen: {e}")
finally:
if tmp_path:
try:
os.unlink(tmp_path)
except OSError:
pass
TRANSCRIBE_PORT = int(os.getenv("TRANSCRIBE_PORT", "8090"))
if __name__ == "__main__":
import uvicorn
check_tls_or_exit()
ssl_kwargs = get_uvicorn_ssl_kwargs()
scheme = "https" if has_tls_config() else "http"
print(f"Starte auf {scheme}://0.0.0.0:{TRANSCRIBE_PORT}")
print(f"TLS: {'AKTIV' if has_tls_config() else 'DEAKTIVIERT'}")
uvicorn.run(app, host="0.0.0.0", port=TRANSCRIBE_PORT, **ssl_kwargs)