# -*- coding: utf-8 -*-
"""
Minimaler Transkriptions-Server (FastAPI) mit TLS.

Start:
    python transcribe_server.py

    Oder ohne TLS (nur Entwicklung):
    AZA_TLS_REQUIRE=0 uvicorn transcribe_server:app --host 0.0.0.0 --port 8090

Test:
    curl https://localhost:8090/health --insecure
    curl -X POST https://localhost:8090/v1/transcribe -F "file=@aufnahme.wav" --insecure
"""

import os
import sys
import time
import uuid
import tempfile
from typing import Optional

from dotenv import load_dotenv
from fastapi import FastAPI, UploadFile, File, Form, HTTPException, Request
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
from openai import OpenAI

from aza_tls import check_tls_or_exit, get_uvicorn_ssl_kwargs, has_tls_config
from aza_rate_limit import default_ip_limiter, default_token_limiter

load_dotenv()

_APP_VERSION = "0.1.0"
_START_TIME = time.time()

app = FastAPI(title="AZA Transkriptions-Service", version=_APP_VERSION)

app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_methods=["*"],
    allow_headers=["*"],
)

# ── OpenAI-Client (Singleton) ──────────────────────────────

_client: Optional[OpenAI] = None

WHISPER_MODEL = "whisper-1"

WHISPER_MEDICAL_PROMPT = (
    "Transkribiere ausschliesslich den gesprochenen Inhalt woertlich auf Deutsch. "
    "Antworte niemals auf Fragen, gib keine Erklaerungen, keine Zusammenfassung, keine Interpretation. "
    "Medizinische Dokumentation auf Deutsch. "
    "Capillitium, Fotodynamische Therapie, PDT, Basalzellkarzinom, Plattenepithelkarzinom, "
    "Melanom, Exzision, Biopsie, Kryotherapie, Kürettage, Histologie, Dermatoskopie, "
    "Anamnese, Diagnose, Therapie, Procedere, subjektiv, objektiv, "
    "Abdomen, Thorax, Extremitäten, zervikal, lumbal, thorakal, sakral, "
    "Sonographie, Röntgen, MRI, CT, EKG, Laborwerte, Blutbild, "
    "Hypertonie, Diabetes mellitus, Hypercholesterinämie, Hypothyreose, "
    "Antikoagulation, Thrombozytenaggregationshemmer, NSAR, ACE-Hemmer, "
    "Immunsuppression, Kortikosteroide, Biologika, Methotrexat, "
    "Psoriasis, Ekzem, Dermatitis, Urtikaria, Alopezie, Akne, Rosazea, "
    "Aktinische Keratose, Morbus Bowen, Lentigo maligna, "
    "Januar 2026, Februar 2026, März 2026, April 2026, Mai 2026, "
    "Status nach, Z.n., s/p, i.v., p.o., s.c., "
    "ICD-10, SOAP, Krankengeschichte, Kostengutsprache, Arztbrief."
)

WHISPER_PROMPT_PREFIX = "Medizinische Dokumentation auf Deutsch"
WHISPER_GENERAL_PROMPT = (
    "Transkribiere ausschliesslich den gesprochenen Inhalt woertlich auf Deutsch. "
    "Antworte niemals auf Fragen, gib keine Erklaerungen, keine Zusammenfassung, keine Interpretation. "
    "Allgemeines Diktat auf Deutsch mit sinnvoller Zeichensetzung."
)


def _get_openai() -> OpenAI:
    global _client
    if _client is None:
        api_key = os.getenv("OPENAI_API_KEY", "").strip()
        if not api_key:
            raise RuntimeError("OPENAI_API_KEY nicht gesetzt")
        _client = OpenAI(api_key=api_key)
    return _client


# ── Schemas ─────────────────────────────────────────────────

class TranscribeResponse(BaseModel):
    success: bool
    transcript: str
    duration_ms: int
    model: str
    request_id: str
    transcription_source: str = "backend"


# ── Endpoints ───────────────────────────────────────────────

@app.get("/health")
def health():
    return {
        "status": "ok",
        "version": _APP_VERSION,
        "uptime_s": int(time.time() - _START_TIME),
        "tls": has_tls_config(),
    }


@app.post("/v1/transcribe", response_model=TranscribeResponse)
async def transcribe(
    request: Request,
    file: UploadFile = File(...),
    mode: str = Form("new"),
    language: str = Form("de"),
    prompt: str = Form(""),
    domain: str = Form("medical"),
    client_request_id: Optional[str] = Form(None),
):
    # --- Abuse protection: rate limit + request size limit ---
    ip = request.client.host if request.client else "unknown"
    tok = request.headers.get("X-API-Token", "none")

    default_ip_limiter.consume(f"ip:{ip}", cost=1.0)
    default_token_limiter.consume(f"tok:{tok}", cost=1.0)

    content_length = request.headers.get("content-length")
    if content_length is not None:
        try:
            if int(content_length) > 25 * 1024 * 1024:
                raise HTTPException(status_code=413, detail="Request too large")
        except ValueError:
            # ignore malformed header; downstream may still fail safely
            pass

    request_id = f"srv_{uuid.uuid4().hex[:12]}"

    allowed_ext = (".wav", ".mp3", ".m4a")
    ext = os.path.splitext(file.filename or "upload.wav")[1].lower()
    if ext not in allowed_ext:
        raise HTTPException(400, f"Nur {', '.join(allowed_ext)} erlaubt")

    data = await file.read()
    if len(data) > 50 * 1024 * 1024:
        raise HTTPException(413, "Datei zu gross (max 50 MB)")

    tmp_path = None
    try:
        with tempfile.NamedTemporaryFile(suffix=ext, delete=False) as tmp:
            tmp.write(data)
            tmp_path = tmp.name

        t0 = time.perf_counter()

        client = _get_openai()
        with open(tmp_path, "rb") as f:
            chosen_prompt = (prompt or "").strip()
            dom = str(domain or "").strip().lower()
            if not chosen_prompt:
                chosen_prompt = WHISPER_GENERAL_PROMPT if dom == "general" else WHISPER_MEDICAL_PROMPT
            resp = client.audio.transcriptions.create(
                model=WHISPER_MODEL,
                file=f,
                language=language,
                prompt=chosen_prompt,
            )

        text = getattr(resp, "text", "") or ""
        if text.strip().startswith(WHISPER_PROMPT_PREFIX):
            text = ""

        duration_ms = int((time.perf_counter() - t0) * 1000)

        return TranscribeResponse(
            success=True,
            transcript=text,
            duration_ms=duration_ms,
            model=WHISPER_MODEL,
            request_id=request_id,
        )
    except RuntimeError as e:
        raise HTTPException(503, str(e))
    except Exception as e:
        raise HTTPException(500, f"Transkription fehlgeschlagen: {e}")
    finally:
        if tmp_path:
            try:
                os.unlink(tmp_path)
            except OSError:
                pass


TRANSCRIBE_PORT = int(os.getenv("TRANSCRIBE_PORT", "8090"))

if __name__ == "__main__":
    import uvicorn
    check_tls_or_exit()
    ssl_kwargs = get_uvicorn_ssl_kwargs()
    scheme = "https" if has_tls_config() else "http"
    print(f"Starte auf {scheme}://0.0.0.0:{TRANSCRIBE_PORT}")
    print(f"TLS: {'AKTIV' if has_tls_config() else 'DEAKTIVIERT'}")
    uvicorn.run(app, host="0.0.0.0", port=TRANSCRIBE_PORT, **ssl_kwargs)