breed_enricher: Wikimedia-Foto-Fetch + alle Rassen in einem Rutsch (limit 2000)
- breed_enricher: _fetch_wikimedia_photo() (de/en Fallback via Wikipedia API) - breed_enricher: foto_url nach KI-Anreicherung automatisch befüllen - scheduler: Startup- und Nacht-Job auf limit=2000 (357 ausstehende Rassen) - scheduler: Status-Report alle 2h statt 2×/Tag - dogs.py: sitting_access → sitting_subscriptions (SQL-Fix)
This commit is contained in:
parent
988cffcbd4
commit
28cad893d1
3 changed files with 57 additions and 15 deletions
|
|
@ -267,7 +267,7 @@ async def get_dog_skills(dog_id: int, user=Depends(get_current_user)):
|
||||||
uid = user["id"]
|
uid = user["id"]
|
||||||
with db() as conn:
|
with db() as conn:
|
||||||
dog = conn.execute(
|
dog = conn.execute(
|
||||||
"SELECT id, user_id FROM dogs WHERE id=? AND (user_id=? OR id IN (SELECT dog_id FROM sitting_access WHERE friend_id=? AND expires_at > datetime('now')))",
|
"SELECT id, user_id FROM dogs WHERE id=? AND (user_id=? OR id IN (SELECT dog_id FROM sitting_subscriptions WHERE sitter_id=? AND valid_until >= date('now')))",
|
||||||
(dog_id, uid, uid)
|
(dog_id, uid, uid)
|
||||||
).fetchone()
|
).fetchone()
|
||||||
if not dog:
|
if not dog:
|
||||||
|
|
|
||||||
|
|
@ -106,15 +106,14 @@ def start():
|
||||||
replace_existing=True,
|
replace_existing=True,
|
||||||
misfire_grace_time=3600,
|
misfire_grace_time=3600,
|
||||||
)
|
)
|
||||||
# 2× täglich Status-Report per Mail (06:00, 18:00)
|
# Alle 2 Stunden Status-Report per Mail
|
||||||
for _h in [6, 18]:
|
_scheduler.add_job(
|
||||||
_scheduler.add_job(
|
_job_status_report,
|
||||||
_job_status_report,
|
CronTrigger(minute=0, hour="*/2"),
|
||||||
CronTrigger(hour=_h, minute=0),
|
id="status_report",
|
||||||
id=f"status_report_{_h:02d}",
|
replace_existing=True,
|
||||||
replace_existing=True,
|
misfire_grace_time=1800,
|
||||||
misfire_grace_time=1800,
|
)
|
||||||
)
|
|
||||||
# Einmalig beim Start (nach 90s) — erste 50 Rassen sofort anreichern
|
# Einmalig beim Start (nach 90s) — erste 50 Rassen sofort anreichern
|
||||||
_scheduler.add_job(
|
_scheduler.add_job(
|
||||||
_job_wiki_enrich_startup,
|
_job_wiki_enrich_startup,
|
||||||
|
|
@ -634,10 +633,10 @@ def _log_job(job_id: str, status: str, result: str):
|
||||||
# JOB: KI-Anreicherung der Rassen-Daten (nächtlich)
|
# JOB: KI-Anreicherung der Rassen-Daten (nächtlich)
|
||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
async def _job_wiki_enrich():
|
async def _job_wiki_enrich():
|
||||||
"""Reichert 20 noch nicht angereicherte Rassen mit KI-Daten an."""
|
"""Reichert alle noch nicht angereicherten Rassen mit KI-Daten an."""
|
||||||
try:
|
try:
|
||||||
from scraper.breed_enricher import enrich_breeds
|
from scraper.breed_enricher import enrich_breeds
|
||||||
enriched = await enrich_breeds(limit=20)
|
enriched = await enrich_breeds(limit=2000)
|
||||||
msg = f"{enriched} Rassen angereichert"
|
msg = f"{enriched} Rassen angereichert"
|
||||||
logger.info(f"Wiki-KI-Anreicherung (nächtlich): {msg}.")
|
logger.info(f"Wiki-KI-Anreicherung (nächtlich): {msg}.")
|
||||||
_log_job("wiki_enrich_nightly", "ok", msg)
|
_log_job("wiki_enrich_nightly", "ok", msg)
|
||||||
|
|
@ -647,10 +646,10 @@ async def _job_wiki_enrich():
|
||||||
|
|
||||||
|
|
||||||
async def _job_wiki_enrich_startup():
|
async def _job_wiki_enrich_startup():
|
||||||
"""Beim Start: erste 50 Rassen sofort anreichern."""
|
"""Beim Start: alle Rassen sofort anreichern."""
|
||||||
try:
|
try:
|
||||||
from scraper.breed_enricher import enrich_breeds
|
from scraper.breed_enricher import enrich_breeds
|
||||||
enriched = await enrich_breeds(limit=50)
|
enriched = await enrich_breeds(limit=2000)
|
||||||
msg = f"{enriched} Rassen angereichert (Startup)"
|
msg = f"{enriched} Rassen angereichert (Startup)"
|
||||||
logger.info(f"Wiki-KI-Anreicherung (Startup): {msg}.")
|
logger.info(f"Wiki-KI-Anreicherung (Startup): {msg}.")
|
||||||
_log_job("wiki_enrich_startup", "ok", msg)
|
_log_job("wiki_enrich_startup", "ok", msg)
|
||||||
|
|
|
||||||
|
|
@ -12,6 +12,8 @@ import re
|
||||||
import sys
|
import sys
|
||||||
import os
|
import os
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
|
||||||
# Pfad zum Backend-Verzeichnis sicherstellen (beim direkten Aufruf)
|
# Pfad zum Backend-Verzeichnis sicherstellen (beim direkten Aufruf)
|
||||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
|
|
@ -168,6 +170,31 @@ def _parse_json(raw: str) -> dict:
|
||||||
raise ValueError(f"Kein gültiges JSON in Antwort gefunden: {raw[:200]}")
|
raise ValueError(f"Kein gültiges JSON in Antwort gefunden: {raw[:200]}")
|
||||||
|
|
||||||
|
|
||||||
|
async def _fetch_wikimedia_photo(name: str) -> str | None:
|
||||||
|
"""Sucht ein lizenzfreies Foto via Wikipedia pageimages API (de → en Fallback)."""
|
||||||
|
for lang in ("de", "en"):
|
||||||
|
try:
|
||||||
|
async with httpx.AsyncClient(timeout=8) as client:
|
||||||
|
resp = await client.get(
|
||||||
|
f"https://{lang}.wikipedia.org/w/api.php",
|
||||||
|
params={
|
||||||
|
"action": "query",
|
||||||
|
"titles": name,
|
||||||
|
"prop": "pageimages",
|
||||||
|
"format": "json",
|
||||||
|
"pithumbsize": 800,
|
||||||
|
"redirects": 1,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
pages = resp.json().get("query", {}).get("pages", {})
|
||||||
|
for page in pages.values():
|
||||||
|
if "thumbnail" in page:
|
||||||
|
return page["thumbnail"]["source"]
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug("Wikimedia-Foto (%s) fehlgeschlagen für %s: %s", lang, name, e)
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
async def enrich_breeds(limit: int = 10) -> int:
|
async def enrich_breeds(limit: int = 10) -> int:
|
||||||
"""
|
"""
|
||||||
Reichert bis zu `limit` Rassen an, bei denen ki_enriched = 0.
|
Reichert bis zu `limit` Rassen an, bei denen ki_enriched = 0.
|
||||||
|
|
@ -177,7 +204,7 @@ async def enrich_breeds(limit: int = 10) -> int:
|
||||||
"""
|
"""
|
||||||
with db() as conn:
|
with db() as conn:
|
||||||
rassen = conn.execute(
|
rassen = conn.execute(
|
||||||
"""SELECT id, name, slug, herkunft FROM wiki_rassen
|
"""SELECT id, name, slug, herkunft, foto_url FROM wiki_rassen
|
||||||
WHERE ki_enriched = 0
|
WHERE ki_enriched = 0
|
||||||
ORDER BY name ASC
|
ORDER BY name ASC
|
||||||
LIMIT ?""",
|
LIMIT ?""",
|
||||||
|
|
@ -242,6 +269,22 @@ async def enrich_breeds(limit: int = 10) -> int:
|
||||||
enriched_count += 1
|
enriched_count += 1
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error("DB-Update fehlgeschlagen für %s: %s", name, e)
|
logger.error("DB-Update fehlgeschlagen für %s: %s", name, e)
|
||||||
|
await asyncio.sleep(2)
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Foto von Wikimedia holen, falls noch keins vorhanden
|
||||||
|
if not rasse["foto_url"]:
|
||||||
|
foto_url = await _fetch_wikimedia_photo(name)
|
||||||
|
if foto_url:
|
||||||
|
try:
|
||||||
|
with db() as conn:
|
||||||
|
conn.execute(
|
||||||
|
"UPDATE wiki_rassen SET foto_url=? WHERE id=?",
|
||||||
|
(foto_url, rasse_id),
|
||||||
|
)
|
||||||
|
logger.info("Wikimedia-Foto gesetzt: %s", name)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error("Foto-Update fehlgeschlagen für %s: %s", name, e)
|
||||||
|
|
||||||
await asyncio.sleep(2)
|
await asyncio.sleep(2)
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue