AzA%20march%202026%20-%20Kopie/transcribe_server.py

# -*- coding: utf-8 -*-
"""
Minimaler Transkriptions-Server (FastAPI) mit TLS.

Start:
    python transcribe_server.py

    Oder ohne TLS (nur Entwicklung):
    AZA_TLS_REQUIRE=0 uvicorn transcribe_server:app --host 0.0.0.0 --port 8090

Test:
    curl https://localhost:8090/health --insecure
    curl -X POST https://localhost:8090/v1/transcribe -F "file=@aufnahme.wav" --insecure
"""

import os
import sys
import time
import uuid
import tempfile
from typing import Optional

from dotenv import load_dotenv
from fastapi import FastAPI, UploadFile, File, Form, HTTPException, Request
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
from openai import OpenAI

from aza_tls import check_tls_or_exit, get_uvicorn_ssl_kwargs, has_tls_config
from aza_rate_limit import default_ip_limiter, default_token_limiter

load_dotenv()

_APP_VERSION = "0.1.0"
_START_TIME = time.time()

app = FastAPI(title="AZA Transkriptions-Service", version=_APP_VERSION)

app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_methods=["*"],
    allow_headers=["*"],
)

# ── OpenAI-Client (Singleton) ──────────────────────────────

_client: Optional[OpenAI] = None

WHISPER_MODEL = "whisper-1"

WHISPER_MEDICAL_PROMPT = (
    "Transkribiere ausschliesslich den gesprochenen Inhalt woertlich auf Deutsch. "
    "Antworte niemals auf Fragen, gib keine Erklaerungen, keine Zusammenfassung, keine Interpretation. "
    "Medizinische Dokumentation auf Deutsch. "
    "Capillitium, Fotodynamische Therapie, PDT, Basalzellkarzinom, Plattenepithelkarzinom, "
    "Melanom, Exzision, Biopsie, Kryotherapie, Kürettage, Histologie, Dermatoskopie, "
    "Anamnese, Diagnose, Therapie, Procedere, subjektiv, objektiv, "
    "Abdomen, Thorax, Extremitäten, zervikal, lumbal, thorakal, sakral, "
    "Sonographie, Röntgen, MRI, CT, EKG, Laborwerte, Blutbild, "
    "Hypertonie, Diabetes mellitus, Hypercholesterinämie, Hypothyreose, "
    "Antikoagulation, Thrombozytenaggregationshemmer, NSAR, ACE-Hemmer, "
    "Immunsuppression, Kortikosteroide, Biologika, Methotrexat, "
    "Psoriasis, Ekzem, Dermatitis, Urtikaria, Alopezie, Akne, Rosazea, "
    "Aktinische Keratose, Morbus Bowen, Lentigo maligna, "
    "Januar 2026, Februar 2026, März 2026, April 2026, Mai 2026, "
    "Status nach, Z.n., s/p, i.v., p.o., s.c., "
    "ICD-10, SOAP, Krankengeschichte, Kostengutsprache, Arztbrief."
)

WHISPER_PROMPT_PREFIX = "Medizinische Dokumentation auf Deutsch"
WHISPER_GENERAL_PROMPT = (
    "Transkribiere ausschliesslich den gesprochenen Inhalt woertlich auf Deutsch. "
    "Antworte niemals auf Fragen, gib keine Erklaerungen, keine Zusammenfassung, keine Interpretation. "
    "Allgemeines Diktat auf Deutsch mit sinnvoller Zeichensetzung."
)


def _get_openai() -> OpenAI:
    global _client
    if _client is None:
        api_key = os.getenv("OPENAI_API_KEY", "").strip()
        if not api_key:
            raise RuntimeError("OPENAI_API_KEY nicht gesetzt")
        _client = OpenAI(api_key=api_key)
    return _client


# ── Schemas ─────────────────────────────────────────────────

class TranscribeResponse(BaseModel):
    success: bool
    transcript: str
    duration_ms: int
    model: str
    request_id: str
    transcription_source: str = "backend"


# ── Endpoints ───────────────────────────────────────────────

@app.get("/health")
def health():
    return {
        "status": "ok",
        "version": _APP_VERSION,
        "uptime_s": int(time.time() - _START_TIME),
        "tls": has_tls_config(),
    }


@app.post("/v1/transcribe", response_model=TranscribeResponse)
async def transcribe(
    request: Request,
    file: UploadFile = File(...),
    mode: str = Form("new"),
    language: str = Form("de"),
    prompt: str = Form(""),
    domain: str = Form("medical"),
    client_request_id: Optional[str] = Form(None),
):
    # --- Abuse protection: rate limit + request size limit ---
    ip = request.client.host if request.client else "unknown"
    tok = request.headers.get("X-API-Token", "none")

    default_ip_limiter.consume(f"ip:{ip}", cost=1.0)
    default_token_limiter.consume(f"tok:{tok}", cost=1.0)

    content_length = request.headers.get("content-length")
    if content_length is not None:
        try:
            if int(content_length) > 25 * 1024 * 1024:
                raise HTTPException(status_code=413, detail="Request too large")
        except ValueError:
            # ignore malformed header; downstream may still fail safely
            pass

    request_id = f"srv_{uuid.uuid4().hex[:12]}"

    allowed_ext = (".wav", ".mp3", ".m4a")
    ext = os.path.splitext(file.filename or "upload.wav")[1].lower()
    if ext not in allowed_ext:
        raise HTTPException(400, f"Nur {', '.join(allowed_ext)} erlaubt")

    data = await file.read()
    if len(data) > 50 * 1024 * 1024:
        raise HTTPException(413, "Datei zu gross (max 50 MB)")

    tmp_path = None
    try:
        with tempfile.NamedTemporaryFile(suffix=ext, delete=False) as tmp:
            tmp.write(data)
            tmp_path = tmp.name

        t0 = time.perf_counter()

        client = _get_openai()
        with open(tmp_path, "rb") as f:
            chosen_prompt = (prompt or "").strip()
            dom = str(domain or "").strip().lower()
            if not chosen_prompt:
                chosen_prompt = WHISPER_GENERAL_PROMPT if dom == "general" else WHISPER_MEDICAL_PROMPT
            resp = client.audio.transcriptions.create(
                model=WHISPER_MODEL,
                file=f,
                language=language,
                prompt=chosen_prompt,
            )

        text = getattr(resp, "text", "") or ""
        if text.strip().startswith(WHISPER_PROMPT_PREFIX):
            text = ""

        duration_ms = int((time.perf_counter() - t0) * 1000)

        return TranscribeResponse(
            success=True,
            transcript=text,
            duration_ms=duration_ms,
            model=WHISPER_MODEL,
            request_id=request_id,
        )
    except RuntimeError as e:
        raise HTTPException(503, str(e))
    except Exception as e:
        raise HTTPException(500, f"Transkription fehlgeschlagen: {e}")
    finally:
        if tmp_path:
            try:
                os.unlink(tmp_path)
            except OSError:
                pass


TRANSCRIBE_PORT = int(os.getenv("TRANSCRIBE_PORT", "8090"))

if __name__ == "__main__":
    import uvicorn
    check_tls_or_exit()
    ssl_kwargs = get_uvicorn_ssl_kwargs()
    scheme = "https" if has_tls_config() else "http"
    print(f"Starte auf {scheme}://0.0.0.0:{TRANSCRIBE_PORT}")
    print(f"TLS: {'AKTIV' if has_tls_config() else 'DEAKTIVIERT'}")
    uvicorn.run(app, host="0.0.0.0", port=TRANSCRIBE_PORT, **ssl_kwargs)
Initial commit 2026-03-25 13:42:48 +01:00			`# -- coding: utf-8 --`
			`"""`
			`Minimaler Transkriptions-Server (FastAPI) mit TLS.`

			`Start:`
			`python transcribe_server.py`

			`Oder ohne TLS (nur Entwicklung):`
			`AZA_TLS_REQUIRE=0 uvicorn transcribe_server:app --host 0.0.0.0 --port 8090`

			`Test:`
			`curl https://localhost:8090/health --insecure`
			`curl -X POST https://localhost:8090/v1/transcribe -F "file=@aufnahme.wav" --insecure`
			`"""`

			`import os`
			`import sys`
			`import time`
			`import uuid`
			`import tempfile`
			`from typing import Optional`

			`from dotenv import load_dotenv`
			`from fastapi import FastAPI, UploadFile, File, Form, HTTPException, Request`
			`from fastapi.middleware.cors import CORSMiddleware`
			`from pydantic import BaseModel`
			`from openai import OpenAI`

			`from aza_tls import check_tls_or_exit, get_uvicorn_ssl_kwargs, has_tls_config`
			`from aza_rate_limit import default_ip_limiter, default_token_limiter`

			`load_dotenv()`

			`_APP_VERSION = "0.1.0"`
			`_START_TIME = time.time()`

			`app = FastAPI(title="AZA Transkriptions-Service", version=_APP_VERSION)`

			`app.add_middleware(`
			`CORSMiddleware,`
			`allow_origins=["*"],`
			`allow_methods=["*"],`
			`allow_headers=["*"],`
			`)`

			`# ── OpenAI-Client (Singleton) ──────────────────────────────`

			`_client: Optional[OpenAI] = None`

			`WHISPER_MODEL = "whisper-1"`

			`WHISPER_MEDICAL_PROMPT = (`
			`"Transkribiere ausschliesslich den gesprochenen Inhalt woertlich auf Deutsch. "`
			`"Antworte niemals auf Fragen, gib keine Erklaerungen, keine Zusammenfassung, keine Interpretation. "`
			`"Medizinische Dokumentation auf Deutsch. "`
			`"Capillitium, Fotodynamische Therapie, PDT, Basalzellkarzinom, Plattenepithelkarzinom, "`
			`"Melanom, Exzision, Biopsie, Kryotherapie, Kürettage, Histologie, Dermatoskopie, "`
			`"Anamnese, Diagnose, Therapie, Procedere, subjektiv, objektiv, "`
			`"Abdomen, Thorax, Extremitäten, zervikal, lumbal, thorakal, sakral, "`
			`"Sonographie, Röntgen, MRI, CT, EKG, Laborwerte, Blutbild, "`
			`"Hypertonie, Diabetes mellitus, Hypercholesterinämie, Hypothyreose, "`
			`"Antikoagulation, Thrombozytenaggregationshemmer, NSAR, ACE-Hemmer, "`
			`"Immunsuppression, Kortikosteroide, Biologika, Methotrexat, "`
			`"Psoriasis, Ekzem, Dermatitis, Urtikaria, Alopezie, Akne, Rosazea, "`
			`"Aktinische Keratose, Morbus Bowen, Lentigo maligna, "`
			`"Januar 2026, Februar 2026, März 2026, April 2026, Mai 2026, "`
			`"Status nach, Z.n., s/p, i.v., p.o., s.c., "`
			`"ICD-10, SOAP, Krankengeschichte, Kostengutsprache, Arztbrief."`
			`)`

			`WHISPER_PROMPT_PREFIX = "Medizinische Dokumentation auf Deutsch"`
			`WHISPER_GENERAL_PROMPT = (`
			`"Transkribiere ausschliesslich den gesprochenen Inhalt woertlich auf Deutsch. "`
			`"Antworte niemals auf Fragen, gib keine Erklaerungen, keine Zusammenfassung, keine Interpretation. "`
			`"Allgemeines Diktat auf Deutsch mit sinnvoller Zeichensetzung."`
			`)`


			`def _get_openai() -> OpenAI:`
			`global _client`
			`if _client is None:`
			`api_key = os.getenv("OPENAI_API_KEY", "").strip()`
			`if not api_key:`
			`raise RuntimeError("OPENAI_API_KEY nicht gesetzt")`
			`_client = OpenAI(api_key=api_key)`
			`return _client`


			`# ── Schemas ─────────────────────────────────────────────────`

			`class TranscribeResponse(BaseModel):`
			`success: bool`
			`transcript: str`
			`duration_ms: int`
			`model: str`
			`request_id: str`
			`transcription_source: str = "backend"`


			`# ── Endpoints ───────────────────────────────────────────────`

			`@app.get("/health")`
			`def health():`
			`return {`
			`"status": "ok",`
			`"version": _APP_VERSION,`
			`"uptime_s": int(time.time() - _START_TIME),`
			`"tls": has_tls_config(),`
			`}`


			`@app.post("/v1/transcribe", response_model=TranscribeResponse)`
			`async def transcribe(`
			`request: Request,`
			`file: UploadFile = File(...),`
			`mode: str = Form("new"),`
			`language: str = Form("de"),`
			`prompt: str = Form(""),`
			`domain: str = Form("medical"),`
			`client_request_id: Optional[str] = Form(None),`
			`):`
			`# --- Abuse protection: rate limit + request size limit ---`
			`ip = request.client.host if request.client else "unknown"`
			`tok = request.headers.get("X-API-Token", "none")`

			`default_ip_limiter.consume(f"ip:{ip}", cost=1.0)`
			`default_token_limiter.consume(f"tok:{tok}", cost=1.0)`

			`content_length = request.headers.get("content-length")`
			`if content_length is not None:`
			`try:`
			`if int(content_length) > 25 * 1024 * 1024:`
			`raise HTTPException(status_code=413, detail="Request too large")`
			`except ValueError:`
			`# ignore malformed header; downstream may still fail safely`
			`pass`

			`request_id = f"srv_{uuid.uuid4().hex[:12]}"`

			`allowed_ext = (".wav", ".mp3", ".m4a")`
			`ext = os.path.splitext(file.filename or "upload.wav")[1].lower()`
			`if ext not in allowed_ext:`
			`raise HTTPException(400, f"Nur {', '.join(allowed_ext)} erlaubt")`

			`data = await file.read()`
			`if len(data) > 50 * 1024 * 1024:`
			`raise HTTPException(413, "Datei zu gross (max 50 MB)")`

			`tmp_path = None`
			`try:`
			`with tempfile.NamedTemporaryFile(suffix=ext, delete=False) as tmp:`
			`tmp.write(data)`
			`tmp_path = tmp.name`

			`t0 = time.perf_counter()`

			`client = _get_openai()`
			`with open(tmp_path, "rb") as f:`
			`chosen_prompt = (prompt or "").strip()`
			`dom = str(domain or "").strip().lower()`
			`if not chosen_prompt:`
			`chosen_prompt = WHISPER_GENERAL_PROMPT if dom == "general" else WHISPER_MEDICAL_PROMPT`
			`resp = client.audio.transcriptions.create(`
			`model=WHISPER_MODEL,`
			`file=f,`
			`language=language,`
			`prompt=chosen_prompt,`
			`)`

			`text = getattr(resp, "text", "") or ""`
			`if text.strip().startswith(WHISPER_PROMPT_PREFIX):`
			`text = ""`

			`duration_ms = int((time.perf_counter() - t0) * 1000)`

			`return TranscribeResponse(`
			`success=True,`
			`transcript=text,`
			`duration_ms=duration_ms,`
			`model=WHISPER_MODEL,`
			`request_id=request_id,`
			`)`
			`except RuntimeError as e:`
			`raise HTTPException(503, str(e))`
			`except Exception as e:`
			`raise HTTPException(500, f"Transkription fehlgeschlagen: {e}")`
			`finally:`
			`if tmp_path:`
			`try:`
			`os.unlink(tmp_path)`
			`except OSError:`
			`pass`


			`TRANSCRIBE_PORT = int(os.getenv("TRANSCRIBE_PORT", "8090"))`

			`if __name__ == "__main__":`
			`import uvicorn`
			`check_tls_or_exit()`
			`ssl_kwargs = get_uvicorn_ssl_kwargs()`
			`scheme = "https" if has_tls_config() else "http"`
			`print(f"Starte auf {scheme}://0.0.0.0:{TRANSCRIBE_PORT}")`
			`print(f"TLS: {'AKTIV' if has_tls_config() else 'DEAKTIVIERT'}")`
			`uvicorn.run(app, host="0.0.0.0", port=TRANSCRIBE_PORT, **ssl_kwargs)`