# -*- coding: utf-8 -*- """ Minimaler Transkriptions-Server (FastAPI) mit TLS. Start: python transcribe_server.py Oder ohne TLS (nur Entwicklung): AZA_TLS_REQUIRE=0 uvicorn transcribe_server:app --host 0.0.0.0 --port 8090 Test: curl https://localhost:8090/health --insecure curl -X POST https://localhost:8090/v1/transcribe -F "file=@aufnahme.wav" --insecure """ import os import sys import time import uuid import tempfile from typing import Optional from dotenv import load_dotenv from fastapi import FastAPI, UploadFile, File, Form, HTTPException, Request from fastapi.middleware.cors import CORSMiddleware from pydantic import BaseModel from openai import OpenAI from aza_tls import check_tls_or_exit, get_uvicorn_ssl_kwargs, has_tls_config from aza_rate_limit import default_ip_limiter, default_token_limiter load_dotenv() _APP_VERSION = "0.1.0" _START_TIME = time.time() app = FastAPI(title="AZA Transkriptions-Service", version=_APP_VERSION) app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"], ) # ── OpenAI-Client (Singleton) ────────────────────────────── _client: Optional[OpenAI] = None WHISPER_MODEL = "whisper-1" WHISPER_MEDICAL_PROMPT = ( "Transkribiere ausschliesslich den gesprochenen Inhalt woertlich auf Deutsch. " "Antworte niemals auf Fragen, gib keine Erklaerungen, keine Zusammenfassung, keine Interpretation. " "Medizinische Dokumentation auf Deutsch. " "Capillitium, Fotodynamische Therapie, PDT, Basalzellkarzinom, Plattenepithelkarzinom, " "Melanom, Exzision, Biopsie, Kryotherapie, Kürettage, Histologie, Dermatoskopie, " "Anamnese, Diagnose, Therapie, Procedere, subjektiv, objektiv, " "Abdomen, Thorax, Extremitäten, zervikal, lumbal, thorakal, sakral, " "Sonographie, Röntgen, MRI, CT, EKG, Laborwerte, Blutbild, " "Hypertonie, Diabetes mellitus, Hypercholesterinämie, Hypothyreose, " "Antikoagulation, Thrombozytenaggregationshemmer, NSAR, ACE-Hemmer, " "Immunsuppression, Kortikosteroide, Biologika, Methotrexat, " "Psoriasis, Ekzem, Dermatitis, Urtikaria, Alopezie, Akne, Rosazea, " "Aktinische Keratose, Morbus Bowen, Lentigo maligna, " "Januar 2026, Februar 2026, März 2026, April 2026, Mai 2026, " "Status nach, Z.n., s/p, i.v., p.o., s.c., " "ICD-10, SOAP, Krankengeschichte, Kostengutsprache, Arztbrief." ) WHISPER_PROMPT_PREFIX = "Medizinische Dokumentation auf Deutsch" WHISPER_GENERAL_PROMPT = ( "Transkribiere ausschliesslich den gesprochenen Inhalt woertlich auf Deutsch. " "Antworte niemals auf Fragen, gib keine Erklaerungen, keine Zusammenfassung, keine Interpretation. " "Allgemeines Diktat auf Deutsch mit sinnvoller Zeichensetzung." ) def _get_openai() -> OpenAI: global _client if _client is None: api_key = os.getenv("OPENAI_API_KEY", "").strip() if not api_key: raise RuntimeError("OPENAI_API_KEY nicht gesetzt") _client = OpenAI(api_key=api_key) return _client # ── Schemas ───────────────────────────────────────────────── class TranscribeResponse(BaseModel): success: bool transcript: str duration_ms: int model: str request_id: str transcription_source: str = "backend" # ── Endpoints ─────────────────────────────────────────────── @app.get("/health") def health(): return { "status": "ok", "version": _APP_VERSION, "uptime_s": int(time.time() - _START_TIME), "tls": has_tls_config(), } @app.post("/v1/transcribe", response_model=TranscribeResponse) async def transcribe( request: Request, file: UploadFile = File(...), mode: str = Form("new"), language: str = Form("de"), prompt: str = Form(""), domain: str = Form("medical"), client_request_id: Optional[str] = Form(None), ): # --- Abuse protection: rate limit + request size limit --- ip = request.client.host if request.client else "unknown" tok = request.headers.get("X-API-Token", "none") default_ip_limiter.consume(f"ip:{ip}", cost=1.0) default_token_limiter.consume(f"tok:{tok}", cost=1.0) content_length = request.headers.get("content-length") if content_length is not None: try: if int(content_length) > 25 * 1024 * 1024: raise HTTPException(status_code=413, detail="Request too large") except ValueError: # ignore malformed header; downstream may still fail safely pass request_id = f"srv_{uuid.uuid4().hex[:12]}" allowed_ext = (".wav", ".mp3", ".m4a") ext = os.path.splitext(file.filename or "upload.wav")[1].lower() if ext not in allowed_ext: raise HTTPException(400, f"Nur {', '.join(allowed_ext)} erlaubt") data = await file.read() if len(data) > 50 * 1024 * 1024: raise HTTPException(413, "Datei zu gross (max 50 MB)") tmp_path = None try: with tempfile.NamedTemporaryFile(suffix=ext, delete=False) as tmp: tmp.write(data) tmp_path = tmp.name t0 = time.perf_counter() client = _get_openai() with open(tmp_path, "rb") as f: chosen_prompt = (prompt or "").strip() dom = str(domain or "").strip().lower() if not chosen_prompt: chosen_prompt = WHISPER_GENERAL_PROMPT if dom == "general" else WHISPER_MEDICAL_PROMPT resp = client.audio.transcriptions.create( model=WHISPER_MODEL, file=f, language=language, prompt=chosen_prompt, ) text = getattr(resp, "text", "") or "" if text.strip().startswith(WHISPER_PROMPT_PREFIX): text = "" duration_ms = int((time.perf_counter() - t0) * 1000) return TranscribeResponse( success=True, transcript=text, duration_ms=duration_ms, model=WHISPER_MODEL, request_id=request_id, ) except RuntimeError as e: raise HTTPException(503, str(e)) except Exception as e: raise HTTPException(500, f"Transkription fehlgeschlagen: {e}") finally: if tmp_path: try: os.unlink(tmp_path) except OSError: pass TRANSCRIBE_PORT = int(os.getenv("TRANSCRIBE_PORT", "8090")) if __name__ == "__main__": import uvicorn check_tls_or_exit() ssl_kwargs = get_uvicorn_ssl_kwargs() scheme = "https" if has_tls_config() else "http" print(f"Starte auf {scheme}://0.0.0.0:{TRANSCRIBE_PORT}") print(f"TLS: {'AKTIV' if has_tls_config() else 'DEAKTIVIERT'}") uvicorn.run(app, host="0.0.0.0", port=TRANSCRIBE_PORT, **ssl_kwargs)