update
This commit is contained in:
171
backup 24.2.26 - Kopie/services/news_llm_search.py
Normal file
171
backup 24.2.26 - Kopie/services/news_llm_search.py
Normal file
@@ -0,0 +1,171 @@
|
||||
"""Live medical news search using OpenAI web search (like ChatGPT)."""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
from dataclasses import dataclass, asdict
|
||||
from datetime import date
|
||||
from typing import Any
|
||||
|
||||
from openai import OpenAI
|
||||
|
||||
SPECIALTY_LABELS = {
|
||||
"dermatology": "Dermatologie",
|
||||
"general-medicine": "Allgemeinmedizin",
|
||||
"internal-medicine": "Innere Medizin",
|
||||
"gynecology": "Gynäkologie",
|
||||
"anesthesiology": "Anästhesiologie",
|
||||
"cardiology": "Kardiologie",
|
||||
"oncology": "Onkologie",
|
||||
"pediatrics": "Pädiatrie",
|
||||
"neurology": "Neurologie",
|
||||
"psychiatry": "Psychiatrie",
|
||||
"surgery": "Chirurgie",
|
||||
"ophthalmology": "Ophthalmologie",
|
||||
"ent": "HNO",
|
||||
"urology": "Urologie",
|
||||
"orthopedics": "Orthopädie",
|
||||
"radiology": "Radiologie",
|
||||
"rheumatology": "Rheumatologie",
|
||||
"endocrinology": "Endokrinologie",
|
||||
"gastroenterology": "Gastroenterologie",
|
||||
"pulmonology": "Pneumologie",
|
||||
"nephrology": "Nephrologie",
|
||||
"infectiology": "Infektiologie",
|
||||
"emergency-medicine": "Notfallmedizin",
|
||||
"pathology": "Pathologie",
|
||||
"allergology": "Allergologie",
|
||||
}
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class NewsCandidate:
|
||||
title: str
|
||||
url: str
|
||||
source: str
|
||||
publishedAt: str
|
||||
summary: str
|
||||
tags: list[str]
|
||||
language: str
|
||||
|
||||
|
||||
def _extract_json_block(text: str) -> dict:
|
||||
cleaned = text.strip()
|
||||
cleaned = re.sub(r"^```[a-zA-Z]*\s*", "", cleaned)
|
||||
cleaned = re.sub(r"\s*```\s*$", "", cleaned)
|
||||
cleaned = cleaned.strip()
|
||||
try:
|
||||
data = json.loads(cleaned)
|
||||
if isinstance(data, dict):
|
||||
return data
|
||||
except Exception:
|
||||
pass
|
||||
match = re.search(r"\{[\s\S]*\}", cleaned)
|
||||
if match:
|
||||
try:
|
||||
data = json.loads(match.group(0))
|
||||
if isinstance(data, dict):
|
||||
return data
|
||||
except Exception:
|
||||
pass
|
||||
return {"news": []}
|
||||
|
||||
|
||||
def _build_news_prompt(
|
||||
specialties: list[str],
|
||||
limit: int,
|
||||
) -> str:
|
||||
spec_labels = [SPECIALTY_LABELS.get(s, s) for s in specialties]
|
||||
spec_text = ", ".join(spec_labels) if spec_labels else "Medizin allgemein"
|
||||
today_str = date.today().isoformat()
|
||||
|
||||
return (
|
||||
f"Suche im Internet nach den NEUESTEN und WICHTIGSTEN medizinischen News "
|
||||
f"und Forschungsergebnissen. Heutiges Datum: {today_str}\n\n"
|
||||
f"Fachgebiete: {spec_text}\n\n"
|
||||
"Ich brauche aktuelle, relevante Nachrichten aus der Medizin:\n"
|
||||
"- Neue Studien und Forschungsergebnisse\n"
|
||||
"- Neue Therapien und Medikamente (Zulassungen, Phase-III-Ergebnisse)\n"
|
||||
"- Leitlinien-Updates\n"
|
||||
"- Wichtige Konferenz-Highlights und Abstracts\n"
|
||||
"- Gesundheitspolitische Nachrichten\n"
|
||||
"- Sicherheitswarnungen (FDA, EMA, Swissmedic)\n\n"
|
||||
"Bevorzuge Quellen wie: NEJM, Lancet, JAMA, BMJ, Nature Medicine, "
|
||||
"Deutsches Ärzteblatt, Swiss Medical Weekly, Medical Tribune, "
|
||||
"PubMed, Medscape, aerzteblatt.de\n\n"
|
||||
f"Liefere mindestens 10, maximal {limit} Ergebnisse.\n\n"
|
||||
"WICHTIG: Antwort als REINES JSON, kein anderer Text.\n"
|
||||
'{"news": [...]}\n'
|
||||
"Felder pro News-Item:\n"
|
||||
"title (Titel der Nachricht),\n"
|
||||
"url (DIREKTER Link zum Artikel),\n"
|
||||
"source (Name der Quelle, z.B. 'NEJM', 'Lancet'),\n"
|
||||
"publishedAt (YYYY-MM-DD, Veröffentlichungsdatum),\n"
|
||||
"summary (2-4 Sätze Zusammenfassung),\n"
|
||||
'tags (Array der Fachgebiete, z.B. ["dermatology", "oncology"]),\n'
|
||||
"language (Sprache des Originalartikels, z.B. 'en', 'de')"
|
||||
)
|
||||
|
||||
|
||||
def _normalize_news(row: dict, default_tags: list[str]) -> NewsCandidate | None:
|
||||
if not isinstance(row, dict):
|
||||
return None
|
||||
title = str(row.get("title") or "").strip()
|
||||
url = str(row.get("url") or "").strip()
|
||||
if not title:
|
||||
return None
|
||||
tags = row.get("tags") if isinstance(row.get("tags"), list) else list(default_tags)
|
||||
return NewsCandidate(
|
||||
title=title,
|
||||
url=url,
|
||||
source=str(row.get("source") or "").strip(),
|
||||
publishedAt=str(row.get("publishedAt") or "").strip(),
|
||||
summary=str(row.get("summary") or row.get("description") or "").strip()[:800],
|
||||
tags=[str(t).strip().lower() for t in tags if str(t).strip()],
|
||||
language=str(row.get("language") or "en").strip().lower(),
|
||||
)
|
||||
|
||||
|
||||
def search_medical_news(
|
||||
specialties: list[str],
|
||||
limit: int = 30,
|
||||
) -> list[NewsCandidate]:
|
||||
key = os.getenv("OPENAI_API_KEY", "").strip()
|
||||
if not key:
|
||||
raise RuntimeError("OPENAI_API_KEY nicht gesetzt")
|
||||
|
||||
prompt = _build_news_prompt(specialties=specialties, limit=limit)
|
||||
search_model = os.getenv("NEWS_SEARCH_MODEL", "gpt-4o-mini-search-preview").strip()
|
||||
|
||||
client = OpenAI(api_key=key, timeout=80)
|
||||
resp = client.chat.completions.create(
|
||||
model=search_model,
|
||||
messages=[
|
||||
{
|
||||
"role": "system",
|
||||
"content": (
|
||||
"Du bist ein medizinischer Nachrichtenassistent. "
|
||||
"Suche im Internet nach den neuesten medizinischen Nachrichten "
|
||||
"und liefere die Ergebnisse als JSON. "
|
||||
"Gib NUR real existierende Artikel mit funktionierenden Links an."
|
||||
),
|
||||
},
|
||||
{"role": "user", "content": prompt},
|
||||
],
|
||||
)
|
||||
txt = ""
|
||||
try:
|
||||
txt = (resp.choices[0].message.content or "").strip()
|
||||
except Exception:
|
||||
txt = ""
|
||||
|
||||
payload = _extract_json_block(txt)
|
||||
rows = payload.get("news") if isinstance(payload.get("news"), list) else []
|
||||
|
||||
out: list[NewsCandidate] = []
|
||||
for row in rows:
|
||||
cand = _normalize_news(row, default_tags=specialties)
|
||||
if cand is not None:
|
||||
out.append(cand)
|
||||
return out
|
||||
Reference in New Issue
Block a user