172 lines
5.7 KiB
Python
172 lines
5.7 KiB
Python
"""Live medical news search using OpenAI web search (like ChatGPT)."""
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import os
|
|
import re
|
|
from dataclasses import dataclass, asdict
|
|
from datetime import date
|
|
from typing import Any
|
|
|
|
from openai import OpenAI
|
|
|
|
SPECIALTY_LABELS = {
|
|
"dermatology": "Dermatologie",
|
|
"general-medicine": "Allgemeinmedizin",
|
|
"internal-medicine": "Innere Medizin",
|
|
"gynecology": "Gynäkologie",
|
|
"anesthesiology": "Anästhesiologie",
|
|
"cardiology": "Kardiologie",
|
|
"oncology": "Onkologie",
|
|
"pediatrics": "Pädiatrie",
|
|
"neurology": "Neurologie",
|
|
"psychiatry": "Psychiatrie",
|
|
"surgery": "Chirurgie",
|
|
"ophthalmology": "Ophthalmologie",
|
|
"ent": "HNO",
|
|
"urology": "Urologie",
|
|
"orthopedics": "Orthopädie",
|
|
"radiology": "Radiologie",
|
|
"rheumatology": "Rheumatologie",
|
|
"endocrinology": "Endokrinologie",
|
|
"gastroenterology": "Gastroenterologie",
|
|
"pulmonology": "Pneumologie",
|
|
"nephrology": "Nephrologie",
|
|
"infectiology": "Infektiologie",
|
|
"emergency-medicine": "Notfallmedizin",
|
|
"pathology": "Pathologie",
|
|
"allergology": "Allergologie",
|
|
}
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class NewsCandidate:
|
|
title: str
|
|
url: str
|
|
source: str
|
|
publishedAt: str
|
|
summary: str
|
|
tags: list[str]
|
|
language: str
|
|
|
|
|
|
def _extract_json_block(text: str) -> dict:
|
|
cleaned = text.strip()
|
|
cleaned = re.sub(r"^```[a-zA-Z]*\s*", "", cleaned)
|
|
cleaned = re.sub(r"\s*```\s*$", "", cleaned)
|
|
cleaned = cleaned.strip()
|
|
try:
|
|
data = json.loads(cleaned)
|
|
if isinstance(data, dict):
|
|
return data
|
|
except Exception:
|
|
pass
|
|
match = re.search(r"\{[\s\S]*\}", cleaned)
|
|
if match:
|
|
try:
|
|
data = json.loads(match.group(0))
|
|
if isinstance(data, dict):
|
|
return data
|
|
except Exception:
|
|
pass
|
|
return {"news": []}
|
|
|
|
|
|
def _build_news_prompt(
|
|
specialties: list[str],
|
|
limit: int,
|
|
) -> str:
|
|
spec_labels = [SPECIALTY_LABELS.get(s, s) for s in specialties]
|
|
spec_text = ", ".join(spec_labels) if spec_labels else "Medizin allgemein"
|
|
today_str = date.today().isoformat()
|
|
|
|
return (
|
|
f"Suche im Internet nach den NEUESTEN und WICHTIGSTEN medizinischen News "
|
|
f"und Forschungsergebnissen. Heutiges Datum: {today_str}\n\n"
|
|
f"Fachgebiete: {spec_text}\n\n"
|
|
"Ich brauche aktuelle, relevante Nachrichten aus der Medizin:\n"
|
|
"- Neue Studien und Forschungsergebnisse\n"
|
|
"- Neue Therapien und Medikamente (Zulassungen, Phase-III-Ergebnisse)\n"
|
|
"- Leitlinien-Updates\n"
|
|
"- Wichtige Konferenz-Highlights und Abstracts\n"
|
|
"- Gesundheitspolitische Nachrichten\n"
|
|
"- Sicherheitswarnungen (FDA, EMA, Swissmedic)\n\n"
|
|
"Bevorzuge Quellen wie: NEJM, Lancet, JAMA, BMJ, Nature Medicine, "
|
|
"Deutsches Ärzteblatt, Swiss Medical Weekly, Medical Tribune, "
|
|
"PubMed, Medscape, aerzteblatt.de\n\n"
|
|
f"Liefere mindestens 10, maximal {limit} Ergebnisse.\n\n"
|
|
"WICHTIG: Antwort als REINES JSON, kein anderer Text.\n"
|
|
'{"news": [...]}\n'
|
|
"Felder pro News-Item:\n"
|
|
"title (Titel der Nachricht),\n"
|
|
"url (DIREKTER Link zum Artikel),\n"
|
|
"source (Name der Quelle, z.B. 'NEJM', 'Lancet'),\n"
|
|
"publishedAt (YYYY-MM-DD, Veröffentlichungsdatum),\n"
|
|
"summary (2-4 Sätze Zusammenfassung),\n"
|
|
'tags (Array der Fachgebiete, z.B. ["dermatology", "oncology"]),\n'
|
|
"language (Sprache des Originalartikels, z.B. 'en', 'de')"
|
|
)
|
|
|
|
|
|
def _normalize_news(row: dict, default_tags: list[str]) -> NewsCandidate | None:
|
|
if not isinstance(row, dict):
|
|
return None
|
|
title = str(row.get("title") or "").strip()
|
|
url = str(row.get("url") or "").strip()
|
|
if not title:
|
|
return None
|
|
tags = row.get("tags") if isinstance(row.get("tags"), list) else list(default_tags)
|
|
return NewsCandidate(
|
|
title=title,
|
|
url=url,
|
|
source=str(row.get("source") or "").strip(),
|
|
publishedAt=str(row.get("publishedAt") or "").strip(),
|
|
summary=str(row.get("summary") or row.get("description") or "").strip()[:800],
|
|
tags=[str(t).strip().lower() for t in tags if str(t).strip()],
|
|
language=str(row.get("language") or "en").strip().lower(),
|
|
)
|
|
|
|
|
|
def search_medical_news(
|
|
specialties: list[str],
|
|
limit: int = 30,
|
|
) -> list[NewsCandidate]:
|
|
key = os.getenv("OPENAI_API_KEY", "").strip()
|
|
if not key:
|
|
raise RuntimeError("OPENAI_API_KEY nicht gesetzt")
|
|
|
|
prompt = _build_news_prompt(specialties=specialties, limit=limit)
|
|
search_model = os.getenv("NEWS_SEARCH_MODEL", "gpt-4o-mini-search-preview").strip()
|
|
|
|
client = OpenAI(api_key=key, timeout=80)
|
|
resp = client.chat.completions.create(
|
|
model=search_model,
|
|
messages=[
|
|
{
|
|
"role": "system",
|
|
"content": (
|
|
"Du bist ein medizinischer Nachrichtenassistent. "
|
|
"Suche im Internet nach den neuesten medizinischen Nachrichten "
|
|
"und liefere die Ergebnisse als JSON. "
|
|
"Gib NUR real existierende Artikel mit funktionierenden Links an."
|
|
),
|
|
},
|
|
{"role": "user", "content": prompt},
|
|
],
|
|
)
|
|
txt = ""
|
|
try:
|
|
txt = (resp.choices[0].message.content or "").strip()
|
|
except Exception:
|
|
txt = ""
|
|
|
|
payload = _extract_json_block(txt)
|
|
rows = payload.get("news") if isinstance(payload.get("news"), list) else []
|
|
|
|
out: list[NewsCandidate] = []
|
|
for row in rows:
|
|
cand = _normalize_news(row, default_tags=specialties)
|
|
if cand is not None:
|
|
out.append(cand)
|
|
return out
|