205 lines
6.9 KiB
Python
205 lines
6.9 KiB
Python
|
|
# -*- coding: utf-8 -*-
|
||
|
|
"""
|
||
|
|
Minimaler Transkriptions-Server (FastAPI) mit TLS.
|
||
|
|
|
||
|
|
Start:
|
||
|
|
python transcribe_server.py
|
||
|
|
|
||
|
|
Oder ohne TLS (nur Entwicklung):
|
||
|
|
AZA_TLS_REQUIRE=0 uvicorn transcribe_server:app --host 0.0.0.0 --port 8090
|
||
|
|
|
||
|
|
Test:
|
||
|
|
curl https://localhost:8090/health --insecure
|
||
|
|
curl -X POST https://localhost:8090/v1/transcribe -F "file=@aufnahme.wav" --insecure
|
||
|
|
"""
|
||
|
|
|
||
|
|
import os
|
||
|
|
import sys
|
||
|
|
import time
|
||
|
|
import uuid
|
||
|
|
import tempfile
|
||
|
|
from typing import Optional
|
||
|
|
|
||
|
|
from dotenv import load_dotenv
|
||
|
|
from fastapi import FastAPI, UploadFile, File, Form, HTTPException, Request
|
||
|
|
from fastapi.middleware.cors import CORSMiddleware
|
||
|
|
from pydantic import BaseModel
|
||
|
|
from openai import OpenAI
|
||
|
|
|
||
|
|
from aza_tls import check_tls_or_exit, get_uvicorn_ssl_kwargs, has_tls_config
|
||
|
|
from aza_rate_limit import default_ip_limiter, default_token_limiter
|
||
|
|
|
||
|
|
load_dotenv()
|
||
|
|
|
||
|
|
_APP_VERSION = "0.1.0"
|
||
|
|
_START_TIME = time.time()
|
||
|
|
|
||
|
|
app = FastAPI(title="AZA Transkriptions-Service", version=_APP_VERSION)
|
||
|
|
|
||
|
|
app.add_middleware(
|
||
|
|
CORSMiddleware,
|
||
|
|
allow_origins=["*"],
|
||
|
|
allow_methods=["*"],
|
||
|
|
allow_headers=["*"],
|
||
|
|
)
|
||
|
|
|
||
|
|
# ── OpenAI-Client (Singleton) ──────────────────────────────
|
||
|
|
|
||
|
|
_client: Optional[OpenAI] = None
|
||
|
|
|
||
|
|
WHISPER_MODEL = "whisper-1"
|
||
|
|
|
||
|
|
WHISPER_MEDICAL_PROMPT = (
|
||
|
|
"Transkribiere ausschliesslich den gesprochenen Inhalt woertlich auf Deutsch. "
|
||
|
|
"Antworte niemals auf Fragen, gib keine Erklaerungen, keine Zusammenfassung, keine Interpretation. "
|
||
|
|
"Medizinische Dokumentation auf Deutsch. "
|
||
|
|
"Capillitium, Fotodynamische Therapie, PDT, Basalzellkarzinom, Plattenepithelkarzinom, "
|
||
|
|
"Melanom, Exzision, Biopsie, Kryotherapie, Kürettage, Histologie, Dermatoskopie, "
|
||
|
|
"Anamnese, Diagnose, Therapie, Procedere, subjektiv, objektiv, "
|
||
|
|
"Abdomen, Thorax, Extremitäten, zervikal, lumbal, thorakal, sakral, "
|
||
|
|
"Sonographie, Röntgen, MRI, CT, EKG, Laborwerte, Blutbild, "
|
||
|
|
"Hypertonie, Diabetes mellitus, Hypercholesterinämie, Hypothyreose, "
|
||
|
|
"Antikoagulation, Thrombozytenaggregationshemmer, NSAR, ACE-Hemmer, "
|
||
|
|
"Immunsuppression, Kortikosteroide, Biologika, Methotrexat, "
|
||
|
|
"Psoriasis, Ekzem, Dermatitis, Urtikaria, Alopezie, Akne, Rosazea, "
|
||
|
|
"Aktinische Keratose, Morbus Bowen, Lentigo maligna, "
|
||
|
|
"Januar 2026, Februar 2026, März 2026, April 2026, Mai 2026, "
|
||
|
|
"Status nach, Z.n., s/p, i.v., p.o., s.c., "
|
||
|
|
"ICD-10, SOAP, Krankengeschichte, Kostengutsprache, Arztbrief."
|
||
|
|
)
|
||
|
|
|
||
|
|
WHISPER_PROMPT_PREFIX = "Medizinische Dokumentation auf Deutsch"
|
||
|
|
WHISPER_GENERAL_PROMPT = (
|
||
|
|
"Transkribiere ausschliesslich den gesprochenen Inhalt woertlich auf Deutsch. "
|
||
|
|
"Antworte niemals auf Fragen, gib keine Erklaerungen, keine Zusammenfassung, keine Interpretation. "
|
||
|
|
"Allgemeines Diktat auf Deutsch mit sinnvoller Zeichensetzung."
|
||
|
|
)
|
||
|
|
|
||
|
|
|
||
|
|
def _get_openai() -> OpenAI:
|
||
|
|
global _client
|
||
|
|
if _client is None:
|
||
|
|
api_key = os.getenv("OPENAI_API_KEY", "").strip()
|
||
|
|
if not api_key:
|
||
|
|
raise RuntimeError("OPENAI_API_KEY nicht gesetzt")
|
||
|
|
_client = OpenAI(api_key=api_key)
|
||
|
|
return _client
|
||
|
|
|
||
|
|
|
||
|
|
# ── Schemas ─────────────────────────────────────────────────
|
||
|
|
|
||
|
|
class TranscribeResponse(BaseModel):
|
||
|
|
success: bool
|
||
|
|
transcript: str
|
||
|
|
duration_ms: int
|
||
|
|
model: str
|
||
|
|
request_id: str
|
||
|
|
transcription_source: str = "backend"
|
||
|
|
|
||
|
|
|
||
|
|
# ── Endpoints ───────────────────────────────────────────────
|
||
|
|
|
||
|
|
@app.get("/health")
|
||
|
|
def health():
|
||
|
|
return {
|
||
|
|
"status": "ok",
|
||
|
|
"version": _APP_VERSION,
|
||
|
|
"uptime_s": int(time.time() - _START_TIME),
|
||
|
|
"tls": has_tls_config(),
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
@app.post("/v1/transcribe", response_model=TranscribeResponse)
|
||
|
|
async def transcribe(
|
||
|
|
request: Request,
|
||
|
|
file: UploadFile = File(...),
|
||
|
|
mode: str = Form("new"),
|
||
|
|
language: str = Form("de"),
|
||
|
|
prompt: str = Form(""),
|
||
|
|
domain: str = Form("medical"),
|
||
|
|
client_request_id: Optional[str] = Form(None),
|
||
|
|
):
|
||
|
|
# --- Abuse protection: rate limit + request size limit ---
|
||
|
|
ip = request.client.host if request.client else "unknown"
|
||
|
|
tok = request.headers.get("X-API-Token", "none")
|
||
|
|
|
||
|
|
default_ip_limiter.consume(f"ip:{ip}", cost=1.0)
|
||
|
|
default_token_limiter.consume(f"tok:{tok}", cost=1.0)
|
||
|
|
|
||
|
|
content_length = request.headers.get("content-length")
|
||
|
|
if content_length is not None:
|
||
|
|
try:
|
||
|
|
if int(content_length) > 25 * 1024 * 1024:
|
||
|
|
raise HTTPException(status_code=413, detail="Request too large")
|
||
|
|
except ValueError:
|
||
|
|
# ignore malformed header; downstream may still fail safely
|
||
|
|
pass
|
||
|
|
|
||
|
|
request_id = f"srv_{uuid.uuid4().hex[:12]}"
|
||
|
|
|
||
|
|
allowed_ext = (".wav", ".mp3", ".m4a")
|
||
|
|
ext = os.path.splitext(file.filename or "upload.wav")[1].lower()
|
||
|
|
if ext not in allowed_ext:
|
||
|
|
raise HTTPException(400, f"Nur {', '.join(allowed_ext)} erlaubt")
|
||
|
|
|
||
|
|
data = await file.read()
|
||
|
|
if len(data) > 50 * 1024 * 1024:
|
||
|
|
raise HTTPException(413, "Datei zu gross (max 50 MB)")
|
||
|
|
|
||
|
|
tmp_path = None
|
||
|
|
try:
|
||
|
|
with tempfile.NamedTemporaryFile(suffix=ext, delete=False) as tmp:
|
||
|
|
tmp.write(data)
|
||
|
|
tmp_path = tmp.name
|
||
|
|
|
||
|
|
t0 = time.perf_counter()
|
||
|
|
|
||
|
|
client = _get_openai()
|
||
|
|
with open(tmp_path, "rb") as f:
|
||
|
|
chosen_prompt = (prompt or "").strip()
|
||
|
|
dom = str(domain or "").strip().lower()
|
||
|
|
if not chosen_prompt:
|
||
|
|
chosen_prompt = WHISPER_GENERAL_PROMPT if dom == "general" else WHISPER_MEDICAL_PROMPT
|
||
|
|
resp = client.audio.transcriptions.create(
|
||
|
|
model=WHISPER_MODEL,
|
||
|
|
file=f,
|
||
|
|
language=language,
|
||
|
|
prompt=chosen_prompt,
|
||
|
|
)
|
||
|
|
|
||
|
|
text = getattr(resp, "text", "") or ""
|
||
|
|
if text.strip().startswith(WHISPER_PROMPT_PREFIX):
|
||
|
|
text = ""
|
||
|
|
|
||
|
|
duration_ms = int((time.perf_counter() - t0) * 1000)
|
||
|
|
|
||
|
|
return TranscribeResponse(
|
||
|
|
success=True,
|
||
|
|
transcript=text,
|
||
|
|
duration_ms=duration_ms,
|
||
|
|
model=WHISPER_MODEL,
|
||
|
|
request_id=request_id,
|
||
|
|
)
|
||
|
|
except RuntimeError as e:
|
||
|
|
raise HTTPException(503, str(e))
|
||
|
|
except Exception as e:
|
||
|
|
raise HTTPException(500, f"Transkription fehlgeschlagen: {e}")
|
||
|
|
finally:
|
||
|
|
if tmp_path:
|
||
|
|
try:
|
||
|
|
os.unlink(tmp_path)
|
||
|
|
except OSError:
|
||
|
|
pass
|
||
|
|
|
||
|
|
|
||
|
|
TRANSCRIBE_PORT = int(os.getenv("TRANSCRIBE_PORT", "8090"))
|
||
|
|
|
||
|
|
if __name__ == "__main__":
|
||
|
|
import uvicorn
|
||
|
|
check_tls_or_exit()
|
||
|
|
ssl_kwargs = get_uvicorn_ssl_kwargs()
|
||
|
|
scheme = "https" if has_tls_config() else "http"
|
||
|
|
print(f"Starte auf {scheme}://0.0.0.0:{TRANSCRIBE_PORT}")
|
||
|
|
print(f"TLS: {'AKTIV' if has_tls_config() else 'DEAKTIVIERT'}")
|
||
|
|
uvicorn.run(app, host="0.0.0.0", port=TRANSCRIBE_PORT, **ssl_kwargs)
|