from __future__ import annotations

import json
import os
import re
import urllib.parse
import urllib.request
from dataclasses import dataclass
from typing import Any

from openai import OpenAI

from services.live_event_search import SearchResult


@dataclass(frozen=True)
class EventCandidate:
    name: str
    startDate: str | None
    endDate: str | None
    city: str
    country: str
    urlCandidate: str
    shortDescription: str
    organizer: str
    specialtyTags: list[str]
    regionTags: list[str]
    rationale: str
    confidence: float


def _clean_text(t: str) -> str:
    t = re.sub(r"<[^>]+>", " ", t or "")
    t = re.sub(r"\s+", " ", t).strip()
    return t


def _safe_date(value: Any) -> str | None:
    s = str(value or "").strip()
    if not s:
        return None
    if re.fullmatch(r"\d{4}-\d{2}-\d{2}", s):
        return s
    return None


def _extract_page_excerpt(url: str, max_chars: int = 14000) -> str:
    u = (url or "").strip()
    if not u:
        return ""
    try:
        req = urllib.request.Request(
            u,
            headers={"User-Agent": "AZA-LiveEventSearch/1.0"},
            method="GET",
        )
        with urllib.request.urlopen(req, timeout=8) as resp:
            raw = resp.read(max_chars * 2).decode("utf-8", errors="ignore")
        return _clean_text(raw)[:max_chars]
    except Exception:
        return ""


def _make_prompt(
    search_rows: list[SearchResult],
    specialty: str,
    regions: list[str],
    excerpts: list[dict[str, str]],
) -> str:
    search_payload = [
        {"title": r.title, "snippet": r.snippet, "url": r.url}
        for r in search_rows
    ]
    rules = (
        "Du extrahierst medizinische Kongresse/Weiterbildungen als STRICT JSON.\n"
        "NUR aus den gelieferten Daten, NICHT raten, NICHT erfinden.\n"
        "Wenn Datum/Ort unklar: null setzen und confidence reduzieren.\n"
        "urlCandidate MUSS eine gelieferte URL oder eine URL aus pageExcerpts sein.\n"
        "Antwortformat: JSON-Objekt mit key 'events' (Array).\n"
        "Jedes Event: {name,startDate,endDate,city,country,urlCandidate,shortDescription,organizer,specialtyTags,regionTags,rationale,confidence}\n"
        "startDate/endDate als ISO YYYY-MM-DD oder null.\n"
        "confidence zwischen 0 und 1."
    )
    payload = {
        "specialty": specialty,
        "regions": regions,
        "searchResults": search_payload,
        "pageExcerpts": excerpts,
    }
    return rules + "\n\nDATA:\n" + json.dumps(payload, ensure_ascii=False)


def _extract_json_block(text: str) -> dict:
    try:
        data = json.loads(text)
        if isinstance(data, dict):
            return data
    except Exception:
        pass
    match = re.search(r"\{.*\}", text, flags=re.DOTALL)
    if not match:
        return {"events": []}
    try:
        data = json.loads(match.group(0))
        if isinstance(data, dict):
            return data
    except Exception:
        pass
    return {"events": []}


def _call_openai_json(prompt: str) -> dict:
    key = os.getenv("OPENAI_API_KEY", "").strip()
    if not key:
        raise RuntimeError("OPENAI_API_KEY nicht gesetzt")
    model = os.getenv("EVENT_LLM_MODEL", "gpt-4o-mini").strip()
    client = OpenAI(api_key=key)
    resp = client.chat.completions.create(
        model=model,
        temperature=0,
        messages=[
            {"role": "system", "content": "Gib nur valides JSON zurück."},
            {"role": "user", "content": prompt},
        ],
    )
    txt = ""
    try:
        txt = (resp.choices[0].message.content or "").strip()
    except Exception:
        txt = ""
    return _extract_json_block(txt)


def _call_gemini_json(prompt: str) -> dict:
    key = os.getenv("GEMINI_API_KEY", "").strip()
    if not key:
        raise RuntimeError("GEMINI_API_KEY nicht gesetzt")
    model = os.getenv("GEMINI_MODEL", "gemini-1.5-flash").strip()
    url = (
        f"https://generativelanguage.googleapis.com/v1beta/models/"
        f"{urllib.parse.quote(model)}:generateContent?key={urllib.parse.quote(key)}"
    )
    body = {
        "contents": [{"parts": [{"text": prompt}]}],
        "generationConfig": {"temperature": 0},
    }
    req = urllib.request.Request(
        url,
        headers={"Content-Type": "application/json"},
        data=json.dumps(body).encode("utf-8"),
        method="POST",
    )
    with urllib.request.urlopen(req, timeout=20) as resp:
        raw = resp.read().decode("utf-8", errors="ignore")
    data = json.loads(raw)
    txt = ""
    try:
        txt = data["candidates"][0]["content"]["parts"][0]["text"]
    except Exception:
        txt = ""
    return _extract_json_block(txt)


def _normalize_candidate(row: dict, default_specialty: str, default_regions: list[str]) -> EventCandidate | None:
    if not isinstance(row, dict):
        return None
    name = str(row.get("name") or "").strip()
    url_candidate = str(row.get("urlCandidate") or "").strip()
    if not name or not url_candidate:
        return None
    try:
        confidence = float(row.get("confidence", 0.0))
    except Exception:
        confidence = 0.0
    confidence = max(0.0, min(1.0, confidence))
    specialty_tags = row.get("specialtyTags") if isinstance(row.get("specialtyTags"), list) else [default_specialty]
    region_tags = row.get("regionTags") if isinstance(row.get("regionTags"), list) else list(default_regions)
    return EventCandidate(
        name=name,
        startDate=_safe_date(row.get("startDate")),
        endDate=_safe_date(row.get("endDate")) or _safe_date(row.get("startDate")),
        city=str(row.get("city") or "").strip(),
        country=str(row.get("country") or "").strip(),
        urlCandidate=url_candidate,
        shortDescription=str(row.get("shortDescription") or "").strip()[:600],
        organizer=str(row.get("organizer") or "").strip(),
        specialtyTags=[str(x).strip().lower() for x in specialty_tags if str(x).strip()],
        regionTags=[str(x).strip().upper() for x in region_tags if str(x).strip()],
        rationale=str(row.get("rationale") or "").strip()[:300],
        confidence=confidence,
    )


def extract_event_candidates(
    search_rows: list[SearchResult],
    specialty: str,
    regions: list[str],
) -> list[EventCandidate]:
    if not search_rows:
        return []
    top_rows = list(search_rows[:10])
    excerpts = []
    for row in top_rows:
        text = _extract_page_excerpt(row.url)
        if text:
            excerpts.append({"url": row.url, "excerpt": text})
    prompt = _make_prompt(top_rows, specialty=specialty, regions=regions, excerpts=excerpts)
    provider = os.getenv("LLM_PROVIDER", "openai").strip().lower()
    if provider == "gemini":
        payload = _call_gemini_json(prompt)
    else:
        payload = _call_openai_json(prompt)
    rows = payload.get("events") if isinstance(payload.get("events"), list) else []
    out: list[EventCandidate] = []
    for row in rows:
        cand = _normalize_candidate(row, default_specialty=specialty, default_regions=regions)
        if cand is not None:
            out.append(cand)
    return out