169 lines
5.9 KiB
Python
169 lines
5.9 KiB
Python
|
|
"""Kongress-Suche via OpenAI web search – so simpel wie eine ChatGPT-Anfrage."""
|
|||
|
|
from __future__ import annotations
|
|||
|
|
|
|||
|
|
import json
|
|||
|
|
import os
|
|||
|
|
import re
|
|||
|
|
from dataclasses import dataclass
|
|||
|
|
from datetime import date
|
|||
|
|
|
|||
|
|
from openai import OpenAI
|
|||
|
|
|
|||
|
|
SPECIALTY_DE = {
|
|||
|
|
"dermatology": "Dermatologie", "general-medicine": "Allgemeinmedizin",
|
|||
|
|
"internal-medicine": "Innere Medizin", "gynecology": "Gynäkologie",
|
|||
|
|
"anesthesiology": "Anästhesiologie", "cardiology": "Kardiologie",
|
|||
|
|
"oncology": "Onkologie", "pediatrics": "Pädiatrie",
|
|||
|
|
"neurology": "Neurologie", "psychiatry": "Psychiatrie",
|
|||
|
|
"surgery": "Chirurgie", "ophthalmology": "Ophthalmologie",
|
|||
|
|
"ent": "HNO", "urology": "Urologie", "orthopedics": "Orthopädie",
|
|||
|
|
"radiology": "Radiologie", "rheumatology": "Rheumatologie",
|
|||
|
|
"endocrinology": "Endokrinologie", "gastroenterology": "Gastroenterologie",
|
|||
|
|
"pulmonology": "Pneumologie", "nephrology": "Nephrologie",
|
|||
|
|
"infectiology": "Infektiologie", "emergency-medicine": "Notfallmedizin",
|
|||
|
|
"pathology": "Pathologie", "allergology": "Allergologie",
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
REGION_DE = {"CH": "Schweiz", "EU": "Europa", "WORLD": "weltweit", "US": "USA", "CA": "Kanada"}
|
|||
|
|
|
|||
|
|
COUNTRY_MAP = {
|
|||
|
|
"schweiz": "CH", "suisse": "CH", "switzerland": "CH",
|
|||
|
|
"deutschland": "DE", "germany": "DE", "österreich": "AT", "austria": "AT",
|
|||
|
|
"frankreich": "FR", "france": "FR", "italien": "IT", "italy": "IT",
|
|||
|
|
"spanien": "ES", "spain": "ES", "grossbritannien": "GB", "uk": "GB",
|
|||
|
|
"united kingdom": "GB", "griechenland": "GR", "greece": "GR",
|
|||
|
|
"niederlande": "NL", "netherlands": "NL", "usa": "US", "united states": "US",
|
|||
|
|
"finnland": "FI", "finland": "FI", "dänemark": "DK", "denmark": "DK",
|
|||
|
|
"schweden": "SE", "sweden": "SE", "portugal": "PT",
|
|||
|
|
"belgien": "BE", "belgium": "BE", "china": "CN", "japan": "JP",
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
EU_SET = {"DE","AT","FR","IT","ES","GB","GR","NL","BE","PT","FI","DK","SE","CZ",
|
|||
|
|
"PL","IE","NO","HU","RO","BG","HR","SK","SI","LT","LV","EE","CY","MT","LU","CH"}
|
|||
|
|
|
|||
|
|
|
|||
|
|
@dataclass
|
|||
|
|
class EventCandidate:
|
|||
|
|
name: str = ""
|
|||
|
|
startDate: str | None = None
|
|||
|
|
endDate: str | None = None
|
|||
|
|
city: str = ""
|
|||
|
|
country: str = ""
|
|||
|
|
urlCandidate: str = ""
|
|||
|
|
shortDescription: str = ""
|
|||
|
|
organizer: str = ""
|
|||
|
|
specialtyTags: list[str] | None = None
|
|||
|
|
regionTags: list[str] | None = None
|
|||
|
|
confidence: float = 0.9
|
|||
|
|
|
|||
|
|
|
|||
|
|
def _parse_json(text: str) -> list[dict]:
|
|||
|
|
"""Parse JSON aus LLM-Antwort – robust gegen ```json ... ``` Wrapper."""
|
|||
|
|
t = text.strip()
|
|||
|
|
t = re.sub(r"^```[a-zA-Z]*\s*", "", t)
|
|||
|
|
t = re.sub(r"\s*```\s*$", "", t)
|
|||
|
|
t = t.strip()
|
|||
|
|
try:
|
|||
|
|
obj = json.loads(t)
|
|||
|
|
if isinstance(obj, dict) and "events" in obj:
|
|||
|
|
return obj["events"]
|
|||
|
|
if isinstance(obj, list):
|
|||
|
|
return obj
|
|||
|
|
except Exception:
|
|||
|
|
pass
|
|||
|
|
m = re.search(r"\{[\s\S]*\}", t)
|
|||
|
|
if m:
|
|||
|
|
try:
|
|||
|
|
obj = json.loads(m.group(0))
|
|||
|
|
if isinstance(obj, dict) and "events" in obj:
|
|||
|
|
return obj["events"]
|
|||
|
|
except Exception:
|
|||
|
|
pass
|
|||
|
|
return []
|
|||
|
|
|
|||
|
|
|
|||
|
|
def _norm_country(raw: str) -> str:
|
|||
|
|
s = raw.strip()
|
|||
|
|
if len(s) == 2 and s.isalpha():
|
|||
|
|
return s.upper()
|
|||
|
|
return COUNTRY_MAP.get(s.lower(), s.upper()[:2] if len(s) >= 2 else s)
|
|||
|
|
|
|||
|
|
|
|||
|
|
def _region_tags(cc: str) -> list[str]:
|
|||
|
|
tags = set()
|
|||
|
|
if cc == "CH":
|
|||
|
|
tags.add("CH")
|
|||
|
|
if cc in EU_SET:
|
|||
|
|
tags.add("EU")
|
|||
|
|
if cc in ("US", "CA"):
|
|||
|
|
tags.add("US")
|
|||
|
|
return sorted(tags) or ["EU"]
|
|||
|
|
|
|||
|
|
|
|||
|
|
def _safe_date(v) -> str | None:
|
|||
|
|
s = str(v or "").strip()
|
|||
|
|
return s if re.fullmatch(r"\d{4}-\d{2}-\d{2}", s) else None
|
|||
|
|
|
|||
|
|
|
|||
|
|
def query_events_direct(
|
|||
|
|
specialty: str,
|
|||
|
|
regions: list[str],
|
|||
|
|
from_date: date,
|
|||
|
|
to_date: date,
|
|||
|
|
lang: str = "de",
|
|||
|
|
limit: int = 40,
|
|||
|
|
) -> list[EventCandidate]:
|
|||
|
|
key = os.getenv("OPENAI_API_KEY", "").strip()
|
|||
|
|
if not key:
|
|||
|
|
raise RuntimeError("OPENAI_API_KEY nicht gesetzt")
|
|||
|
|
|
|||
|
|
spec = SPECIALTY_DE.get(specialty, specialty)
|
|||
|
|
reg = ", ".join(REGION_DE.get(r.upper(), r) for r in regions) or "Europa, Schweiz"
|
|||
|
|
today = date.today().isoformat()
|
|||
|
|
|
|||
|
|
prompt = (
|
|||
|
|
f"Suche im Internet nach den nächsten Kongressen und Weiterbildungen "
|
|||
|
|
f"für {spec} in {reg} ab heute ({today}) "
|
|||
|
|
f"bis {to_date.isoformat()}.\n\n"
|
|||
|
|
f"Liste alle wichtigen Kongresse auf die du findest (mindestens 15). "
|
|||
|
|
f"Gib die Antwort als JSON zurück:\n"
|
|||
|
|
'{"events": [{"name": "...", "startDate": "YYYY-MM-DD", '
|
|||
|
|
'"endDate": "YYYY-MM-DD", "city": "...", "country": "CH", '
|
|||
|
|
'"url": "...", "description": "...", "organizer": "..."}]}'
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
model = os.getenv("EVENT_SEARCH_MODEL", "gpt-4o-mini-search-preview").strip()
|
|||
|
|
client = OpenAI(api_key=key, timeout=80)
|
|||
|
|
resp = client.chat.completions.create(
|
|||
|
|
model=model,
|
|||
|
|
messages=[
|
|||
|
|
{"role": "system", "content": "Suche im Internet nach echten Kongressen. Antwort NUR als JSON."},
|
|||
|
|
{"role": "user", "content": prompt},
|
|||
|
|
],
|
|||
|
|
)
|
|||
|
|
txt = (resp.choices[0].message.content or "").strip()
|
|||
|
|
rows = _parse_json(txt)
|
|||
|
|
|
|||
|
|
out: list[EventCandidate] = []
|
|||
|
|
for r in rows:
|
|||
|
|
if not isinstance(r, dict):
|
|||
|
|
continue
|
|||
|
|
name = str(r.get("name") or "").strip()
|
|||
|
|
if not name:
|
|||
|
|
continue
|
|||
|
|
cc = _norm_country(str(r.get("country") or ""))
|
|||
|
|
out.append(EventCandidate(
|
|||
|
|
name=name,
|
|||
|
|
startDate=_safe_date(r.get("startDate")),
|
|||
|
|
endDate=_safe_date(r.get("endDate")) or _safe_date(r.get("startDate")),
|
|||
|
|
city=str(r.get("city") or "").strip(),
|
|||
|
|
country=cc,
|
|||
|
|
urlCandidate=str(r.get("url") or r.get("urlCandidate") or "").strip(),
|
|||
|
|
shortDescription=str(r.get("description") or r.get("shortDescription") or "").strip()[:600],
|
|||
|
|
organizer=str(r.get("organizer") or "").strip(),
|
|||
|
|
specialtyTags=[specialty],
|
|||
|
|
regionTags=_region_tags(cc),
|
|||
|
|
confidence=0.9,
|
|||
|
|
))
|
|||
|
|
return out
|