update
This commit is contained in:
208
AzA march 2026 - Kopie (18)/services/live_event_search.py
Normal file
208
AzA march 2026 - Kopie (18)/services/live_event_search.py
Normal file
@@ -0,0 +1,208 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import urllib.parse
|
||||
import urllib.request
|
||||
from dataclasses import dataclass
|
||||
from datetime import date
|
||||
from typing import Iterable
|
||||
|
||||
|
||||
class SearchProviderConfigError(RuntimeError):
|
||||
pass
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class SearchResult:
|
||||
title: str
|
||||
snippet: str
|
||||
url: str
|
||||
|
||||
|
||||
def _http_get_json(url: str, timeout: int = 10) -> dict:
|
||||
req = urllib.request.Request(
|
||||
url,
|
||||
headers={"User-Agent": "AZA-LiveEventSearch/1.0"},
|
||||
method="GET",
|
||||
)
|
||||
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
||||
payload = resp.read().decode("utf-8", errors="ignore")
|
||||
data = json.loads(payload)
|
||||
if not isinstance(data, dict):
|
||||
raise RuntimeError("Ungültige JSON-Antwort der Websuche")
|
||||
return data
|
||||
|
||||
|
||||
def _normalize_results(rows: Iterable[dict]) -> list[SearchResult]:
|
||||
out: list[SearchResult] = []
|
||||
for row in rows:
|
||||
if not isinstance(row, dict):
|
||||
continue
|
||||
title = str(row.get("title") or "").strip()
|
||||
snippet = str(row.get("snippet") or "").strip()
|
||||
url = str(row.get("url") or "").strip()
|
||||
if not url:
|
||||
continue
|
||||
out.append(SearchResult(title=title, snippet=snippet, url=url))
|
||||
return out
|
||||
|
||||
|
||||
def _search_google_cse(query: str, num_results: int) -> list[SearchResult]:
|
||||
key = os.getenv("GOOGLE_CSE_API_KEY", "").strip()
|
||||
cx = os.getenv("GOOGLE_CSE_CX", "").strip()
|
||||
if not key or not cx:
|
||||
raise SearchProviderConfigError("Google CSE nicht konfiguriert (GOOGLE_CSE_API_KEY/GOOGLE_CSE_CX fehlen).")
|
||||
params = urllib.parse.urlencode(
|
||||
{
|
||||
"key": key,
|
||||
"cx": cx,
|
||||
"q": query,
|
||||
"num": max(1, min(int(num_results), 10)),
|
||||
"safe": "off",
|
||||
}
|
||||
)
|
||||
url = f"https://www.googleapis.com/customsearch/v1?{params}"
|
||||
data = _http_get_json(url, timeout=10)
|
||||
items = data.get("items") if isinstance(data.get("items"), list) else []
|
||||
return _normalize_results(
|
||||
{
|
||||
"title": it.get("title", ""),
|
||||
"snippet": it.get("snippet", ""),
|
||||
"url": it.get("link", ""),
|
||||
}
|
||||
for it in items
|
||||
if isinstance(it, dict)
|
||||
)
|
||||
|
||||
|
||||
def _search_serpapi(query: str, num_results: int) -> list[SearchResult]:
|
||||
key = os.getenv("SERPAPI_API_KEY", "").strip()
|
||||
if not key:
|
||||
raise SearchProviderConfigError("SerpAPI nicht konfiguriert (SERPAPI_API_KEY fehlt).")
|
||||
params = urllib.parse.urlencode(
|
||||
{
|
||||
"api_key": key,
|
||||
"engine": "google",
|
||||
"q": query,
|
||||
"num": max(1, min(int(num_results), 10)),
|
||||
}
|
||||
)
|
||||
url = f"https://serpapi.com/search.json?{params}"
|
||||
data = _http_get_json(url, timeout=12)
|
||||
rows = data.get("organic_results") if isinstance(data.get("organic_results"), list) else []
|
||||
return _normalize_results(
|
||||
{
|
||||
"title": it.get("title", ""),
|
||||
"snippet": it.get("snippet", ""),
|
||||
"url": it.get("link", ""),
|
||||
}
|
||||
for it in rows
|
||||
if isinstance(it, dict)
|
||||
)
|
||||
|
||||
|
||||
def _search_bing(query: str, num_results: int) -> list[SearchResult]:
|
||||
key = os.getenv("BING_API_KEY", "").strip()
|
||||
if not key:
|
||||
raise SearchProviderConfigError("Bing Web Search nicht konfiguriert (BING_API_KEY fehlt).")
|
||||
params = urllib.parse.urlencode(
|
||||
{
|
||||
"q": query,
|
||||
"count": max(1, min(int(num_results), 10)),
|
||||
"textDecorations": "false",
|
||||
"textFormat": "Raw",
|
||||
}
|
||||
)
|
||||
url = f"https://api.bing.microsoft.com/v7.0/search?{params}"
|
||||
req = urllib.request.Request(
|
||||
url,
|
||||
headers={
|
||||
"User-Agent": "AZA-LiveEventSearch/1.0",
|
||||
"Ocp-Apim-Subscription-Key": key,
|
||||
},
|
||||
method="GET",
|
||||
)
|
||||
with urllib.request.urlopen(req, timeout=10) as resp:
|
||||
payload = resp.read().decode("utf-8", errors="ignore")
|
||||
data = json.loads(payload)
|
||||
rows = (((data or {}).get("webPages") or {}).get("value")) if isinstance(data, dict) else []
|
||||
if not isinstance(rows, list):
|
||||
rows = []
|
||||
return _normalize_results(
|
||||
{
|
||||
"title": it.get("name", ""),
|
||||
"snippet": it.get("snippet", ""),
|
||||
"url": it.get("url", ""),
|
||||
}
|
||||
for it in rows
|
||||
if isinstance(it, dict)
|
||||
)
|
||||
|
||||
|
||||
def search_web(query: str, num_results: int = 8) -> list[SearchResult]:
|
||||
provider = os.getenv("WEBSEARCH_PROVIDER", "").strip().lower()
|
||||
if provider == "google_cse":
|
||||
return _search_google_cse(query, num_results=num_results)
|
||||
if provider == "serpapi":
|
||||
return _search_serpapi(query, num_results=num_results)
|
||||
if provider == "bing":
|
||||
return _search_bing(query, num_results=num_results)
|
||||
raise SearchProviderConfigError(
|
||||
"Web Search nicht konfiguriert. Setze WEBSEARCH_PROVIDER auf google_cse, serpapi oder bing."
|
||||
)
|
||||
|
||||
|
||||
def build_queries(
|
||||
specialty: str,
|
||||
regions: list[str],
|
||||
from_date: date,
|
||||
to_date: date,
|
||||
lang: str = "de",
|
||||
max_queries: int = 10,
|
||||
) -> list[str]:
|
||||
spec = (specialty or "medical").strip()
|
||||
years: list[str] = []
|
||||
for y in range(from_date.year, to_date.year + 1):
|
||||
years.append(str(y))
|
||||
year_hint = " ".join(years[:3]) if years else str(from_date.year)
|
||||
|
||||
region_tokens: list[str] = []
|
||||
region_norm = {r.strip().upper() for r in regions if str(r).strip()}
|
||||
if "CH" in region_norm:
|
||||
region_tokens.extend(["Schweiz", "Suisse", "Switzerland"])
|
||||
if "EU" in region_norm:
|
||||
region_tokens.extend(["Europa", "Europe"])
|
||||
if "WORLD" in region_norm:
|
||||
region_tokens.extend(["global", "international"])
|
||||
if "US" in region_norm:
|
||||
region_tokens.extend(["USA", "United States"])
|
||||
if "CA" in region_norm:
|
||||
region_tokens.extend(["Canada"])
|
||||
if not region_tokens:
|
||||
region_tokens.extend(["Europe", "Switzerland"])
|
||||
|
||||
loc_hint = " ".join(dict.fromkeys(region_tokens))
|
||||
lang_hint = "Deutsch" if str(lang).lower().startswith("de") else "English"
|
||||
|
||||
base = [
|
||||
f"{spec} congress {year_hint} {loc_hint} dates registration",
|
||||
f"{spec} conference {year_hint} {loc_hint} official event page",
|
||||
f"{spec} course {year_hint} {loc_hint} CME",
|
||||
f"{spec} Weiterbildung {year_hint} {loc_hint}",
|
||||
f"{spec} Fortbildung {year_hint} {loc_hint}",
|
||||
f"{spec} Kongress {year_hint} {loc_hint}",
|
||||
f"{spec} congress {year_hint} site:org",
|
||||
f"{spec} symposium {year_hint} {loc_hint}",
|
||||
f"{spec} annual meeting {year_hint} {loc_hint}",
|
||||
f"{spec} event {year_hint} {loc_hint} {lang_hint}",
|
||||
]
|
||||
out: list[str] = []
|
||||
for q in base:
|
||||
q_clean = " ".join(str(q).split())
|
||||
if q_clean and q_clean not in out:
|
||||
out.append(q_clean)
|
||||
if len(out) >= max(6, min(int(max_queries), 12)):
|
||||
break
|
||||
return out
|
||||
|
||||
Reference in New Issue
Block a user