breed_enricher: Wikimedia-Foto-Fetch + alle Rassen in einem Rutsch (limit 2000)
- breed_enricher: _fetch_wikimedia_photo() (de/en Fallback via Wikipedia API) - breed_enricher: foto_url nach KI-Anreicherung automatisch befüllen - scheduler: Startup- und Nacht-Job auf limit=2000 (357 ausstehende Rassen) - scheduler: Status-Report alle 2h statt 2×/Tag - dogs.py: sitting_access → sitting_subscriptions (SQL-Fix)
This commit is contained in:
parent
988cffcbd4
commit
28cad893d1
3 changed files with 57 additions and 15 deletions
|
|
@ -12,6 +12,8 @@ import re
|
|||
import sys
|
||||
import os
|
||||
|
||||
import httpx
|
||||
|
||||
# Pfad zum Backend-Verzeichnis sicherstellen (beim direkten Aufruf)
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
|
|
@ -168,6 +170,31 @@ def _parse_json(raw: str) -> dict:
|
|||
raise ValueError(f"Kein gültiges JSON in Antwort gefunden: {raw[:200]}")
|
||||
|
||||
|
||||
async def _fetch_wikimedia_photo(name: str) -> str | None:
|
||||
"""Sucht ein lizenzfreies Foto via Wikipedia pageimages API (de → en Fallback)."""
|
||||
for lang in ("de", "en"):
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=8) as client:
|
||||
resp = await client.get(
|
||||
f"https://{lang}.wikipedia.org/w/api.php",
|
||||
params={
|
||||
"action": "query",
|
||||
"titles": name,
|
||||
"prop": "pageimages",
|
||||
"format": "json",
|
||||
"pithumbsize": 800,
|
||||
"redirects": 1,
|
||||
},
|
||||
)
|
||||
pages = resp.json().get("query", {}).get("pages", {})
|
||||
for page in pages.values():
|
||||
if "thumbnail" in page:
|
||||
return page["thumbnail"]["source"]
|
||||
except Exception as e:
|
||||
logger.debug("Wikimedia-Foto (%s) fehlgeschlagen für %s: %s", lang, name, e)
|
||||
return None
|
||||
|
||||
|
||||
async def enrich_breeds(limit: int = 10) -> int:
|
||||
"""
|
||||
Reichert bis zu `limit` Rassen an, bei denen ki_enriched = 0.
|
||||
|
|
@ -177,7 +204,7 @@ async def enrich_breeds(limit: int = 10) -> int:
|
|||
"""
|
||||
with db() as conn:
|
||||
rassen = conn.execute(
|
||||
"""SELECT id, name, slug, herkunft FROM wiki_rassen
|
||||
"""SELECT id, name, slug, herkunft, foto_url FROM wiki_rassen
|
||||
WHERE ki_enriched = 0
|
||||
ORDER BY name ASC
|
||||
LIMIT ?""",
|
||||
|
|
@ -242,6 +269,22 @@ async def enrich_breeds(limit: int = 10) -> int:
|
|||
enriched_count += 1
|
||||
except Exception as e:
|
||||
logger.error("DB-Update fehlgeschlagen für %s: %s", name, e)
|
||||
await asyncio.sleep(2)
|
||||
continue
|
||||
|
||||
# Foto von Wikimedia holen, falls noch keins vorhanden
|
||||
if not rasse["foto_url"]:
|
||||
foto_url = await _fetch_wikimedia_photo(name)
|
||||
if foto_url:
|
||||
try:
|
||||
with db() as conn:
|
||||
conn.execute(
|
||||
"UPDATE wiki_rassen SET foto_url=? WHERE id=?",
|
||||
(foto_url, rasse_id),
|
||||
)
|
||||
logger.info("Wikimedia-Foto gesetzt: %s", name)
|
||||
except Exception as e:
|
||||
logger.error("Foto-Update fehlgeschlagen für %s: %s", name, e)
|
||||
|
||||
await asyncio.sleep(2)
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue