Files
aza/AzA march 2026 - Kopie (13)/services/event_llm_direct.py
2026-04-19 20:41:37 +02:00

169 lines
5.9 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""Kongress-Suche via OpenAI web search so simpel wie eine ChatGPT-Anfrage."""
from __future__ import annotations
import json
import os
import re
from dataclasses import dataclass
from datetime import date
from openai import OpenAI
SPECIALTY_DE = {
"dermatology": "Dermatologie", "general-medicine": "Allgemeinmedizin",
"internal-medicine": "Innere Medizin", "gynecology": "Gynäkologie",
"anesthesiology": "Anästhesiologie", "cardiology": "Kardiologie",
"oncology": "Onkologie", "pediatrics": "Pädiatrie",
"neurology": "Neurologie", "psychiatry": "Psychiatrie",
"surgery": "Chirurgie", "ophthalmology": "Ophthalmologie",
"ent": "HNO", "urology": "Urologie", "orthopedics": "Orthopädie",
"radiology": "Radiologie", "rheumatology": "Rheumatologie",
"endocrinology": "Endokrinologie", "gastroenterology": "Gastroenterologie",
"pulmonology": "Pneumologie", "nephrology": "Nephrologie",
"infectiology": "Infektiologie", "emergency-medicine": "Notfallmedizin",
"pathology": "Pathologie", "allergology": "Allergologie",
}
REGION_DE = {"CH": "Schweiz", "EU": "Europa", "WORLD": "weltweit", "US": "USA", "CA": "Kanada"}
COUNTRY_MAP = {
"schweiz": "CH", "suisse": "CH", "switzerland": "CH",
"deutschland": "DE", "germany": "DE", "österreich": "AT", "austria": "AT",
"frankreich": "FR", "france": "FR", "italien": "IT", "italy": "IT",
"spanien": "ES", "spain": "ES", "grossbritannien": "GB", "uk": "GB",
"united kingdom": "GB", "griechenland": "GR", "greece": "GR",
"niederlande": "NL", "netherlands": "NL", "usa": "US", "united states": "US",
"finnland": "FI", "finland": "FI", "dänemark": "DK", "denmark": "DK",
"schweden": "SE", "sweden": "SE", "portugal": "PT",
"belgien": "BE", "belgium": "BE", "china": "CN", "japan": "JP",
}
EU_SET = {"DE","AT","FR","IT","ES","GB","GR","NL","BE","PT","FI","DK","SE","CZ",
"PL","IE","NO","HU","RO","BG","HR","SK","SI","LT","LV","EE","CY","MT","LU","CH"}
@dataclass
class EventCandidate:
name: str = ""
startDate: str | None = None
endDate: str | None = None
city: str = ""
country: str = ""
urlCandidate: str = ""
shortDescription: str = ""
organizer: str = ""
specialtyTags: list[str] | None = None
regionTags: list[str] | None = None
confidence: float = 0.9
def _parse_json(text: str) -> list[dict]:
"""Parse JSON aus LLM-Antwort robust gegen ```json ... ``` Wrapper."""
t = text.strip()
t = re.sub(r"^```[a-zA-Z]*\s*", "", t)
t = re.sub(r"\s*```\s*$", "", t)
t = t.strip()
try:
obj = json.loads(t)
if isinstance(obj, dict) and "events" in obj:
return obj["events"]
if isinstance(obj, list):
return obj
except Exception:
pass
m = re.search(r"\{[\s\S]*\}", t)
if m:
try:
obj = json.loads(m.group(0))
if isinstance(obj, dict) and "events" in obj:
return obj["events"]
except Exception:
pass
return []
def _norm_country(raw: str) -> str:
s = raw.strip()
if len(s) == 2 and s.isalpha():
return s.upper()
return COUNTRY_MAP.get(s.lower(), s.upper()[:2] if len(s) >= 2 else s)
def _region_tags(cc: str) -> list[str]:
tags = set()
if cc == "CH":
tags.add("CH")
if cc in EU_SET:
tags.add("EU")
if cc in ("US", "CA"):
tags.add("US")
return sorted(tags) or ["EU"]
def _safe_date(v) -> str | None:
s = str(v or "").strip()
return s if re.fullmatch(r"\d{4}-\d{2}-\d{2}", s) else None
def query_events_direct(
specialty: str,
regions: list[str],
from_date: date,
to_date: date,
lang: str = "de",
limit: int = 40,
) -> list[EventCandidate]:
key = os.getenv("OPENAI_API_KEY", "").strip()
if not key:
raise RuntimeError("OPENAI_API_KEY nicht gesetzt")
spec = SPECIALTY_DE.get(specialty, specialty)
reg = ", ".join(REGION_DE.get(r.upper(), r) for r in regions) or "Europa, Schweiz"
today = date.today().isoformat()
prompt = (
f"Suche im Internet nach den nächsten Kongressen und Weiterbildungen "
f"für {spec} in {reg} ab heute ({today}) "
f"bis {to_date.isoformat()}.\n\n"
f"Liste alle wichtigen Kongresse auf die du findest (mindestens 15). "
f"Gib die Antwort als JSON zurück:\n"
'{"events": [{"name": "...", "startDate": "YYYY-MM-DD", '
'"endDate": "YYYY-MM-DD", "city": "...", "country": "CH", '
'"url": "...", "description": "...", "organizer": "..."}]}'
)
model = os.getenv("EVENT_SEARCH_MODEL", "gpt-4o-mini-search-preview").strip()
client = OpenAI(api_key=key, timeout=80)
resp = client.chat.completions.create(
model=model,
messages=[
{"role": "system", "content": "Suche im Internet nach echten Kongressen. Antwort NUR als JSON."},
{"role": "user", "content": prompt},
],
)
txt = (resp.choices[0].message.content or "").strip()
rows = _parse_json(txt)
out: list[EventCandidate] = []
for r in rows:
if not isinstance(r, dict):
continue
name = str(r.get("name") or "").strip()
if not name:
continue
cc = _norm_country(str(r.get("country") or ""))
out.append(EventCandidate(
name=name,
startDate=_safe_date(r.get("startDate")),
endDate=_safe_date(r.get("endDate")) or _safe_date(r.get("startDate")),
city=str(r.get("city") or "").strip(),
country=cc,
urlCandidate=str(r.get("url") or r.get("urlCandidate") or "").strip(),
shortDescription=str(r.get("description") or r.get("shortDescription") or "").strip()[:600],
organizer=str(r.get("organizer") or "").strip(),
specialtyTags=[specialty],
regionTags=_region_tags(cc),
confidence=0.9,
))
return out