"""Fetches breed data from TheDogAPI and seeds the wiki_rassen table.""" import httpx, re, logging, os from database import db MEDIA_DIR = os.getenv("MEDIA_DIR", "/data/media") BREEDS_DIR = os.path.join(MEDIA_DIR, "breeds") logger = logging.getLogger(__name__) def _slug(name: str) -> str: return re.sub(r'[^a-z0-9]+', '-', name.lower()).strip('-') def _derive_groesse(weight_max_kg: float) -> str: if weight_max_kg <= 10: return 'klein' if weight_max_kg <= 25: return 'mittel' if weight_max_kg <= 40: return 'gross' return 'sehr_gross' def _derive_aktivitaet(bred_for: str, temperament: str, group: str) -> str: text = f"{bred_for or ''} {temperament or ''} {group or ''}".lower() high_keywords = ['herding', 'hunting', 'sporting', 'working', 'energetic', 'active', 'agile'] low_keywords = ['companion', 'toy', 'lap', 'gentle', 'calm', 'quiet'] if any(k in text for k in high_keywords): return 'hoch' if any(k in text for k in low_keywords): return 'niedrig' return 'mittel' def _derive_erfahrung(temperament: str, group: str) -> str: text = f"{temperament or ''} {group or ''}".lower() expert = ['stubborn', 'independent', 'dominant', 'terrier', 'herding'] advanced = ['protective', 'reserved', 'working', 'guard'] if any(k in text for k in expert): return 'fortgeschritten' if any(k in text for k in advanced): return 'fortgeschritten' return 'anfaenger' def _derive_kinder(temperament: str) -> int: if not temperament: return 1 bad = ['aggressive', 'aloof', 'reserved with strangers'] return 0 if any(k in temperament.lower() for k in bad) else 1 def _parse_weight_kg(weight_metric: str): """Parse '7 - 14' or '14' -> (min, max) in kg""" try: parts = [p.strip() for p in weight_metric.replace(',', '.').split('-')] nums = [float(p) for p in parts if p] if len(nums) >= 2: return nums[0], nums[1] if len(nums) == 1: return nums[0], nums[0] except Exception: pass return None, None async def mirror_breed_photos(): """Download CDN breed photos to local storage and update foto_url in DB.""" os.makedirs(BREEDS_DIR, exist_ok=True) with db() as conn: rows = conn.execute( "SELECT id, external_id, foto_url FROM wiki_rassen WHERE foto_url LIKE 'http%' AND foto_url NOT LIKE '/media/%'" ).fetchall() if not rows: logger.info("Breed photos: nothing to mirror") return 0 mirrored = 0 async with httpx.AsyncClient(timeout=30, follow_redirects=True) as client: for row_id, ext_id, cdn_url in rows: local_path = os.path.join(BREEDS_DIR, f"{ext_id}.jpg") local_url = f"/media/breeds/{ext_id}.jpg" # Skip if already downloaded if os.path.exists(local_path): with db() as conn: conn.execute("UPDATE wiki_rassen SET foto_url=? WHERE id=?", (local_url, row_id)) mirrored += 1 continue try: r = await client.get(cdn_url) if r.status_code == 200: with open(local_path, "wb") as f: f.write(r.content) with db() as conn: conn.execute("UPDATE wiki_rassen SET foto_url=? WHERE id=?", (local_url, row_id)) mirrored += 1 else: logger.warning(f"Breed photo {ext_id}: HTTP {r.status_code}") except Exception as e: logger.warning(f"Breed photo {ext_id} download failed: {e}") logger.info(f"Breed photos mirrored: {mirrored}/{len(rows)}") return mirrored async def fetch_and_seed_breeds(): """Fetch all breeds from TheDogAPI and upsert into wiki_rassen.""" api_key = os.getenv("THEDOGAPI_KEY", "") try: async with httpx.AsyncClient(timeout=30) as client: r = await client.get('https://api.thedogapi.com/v1/breeds', headers={'x-api-key': api_key}) r.raise_for_status() breeds = r.json() except Exception as e: logger.error(f"TheDogAPI fetch failed: {e}") return 0 seeded = 0 with db() as conn: for b in breeds: try: w_min, w_max = _parse_weight_kg(b.get('weight', {}).get('metric', '') or '') groesse = _derive_groesse(w_max or 20) aktivitaet = _derive_aktivitaet(b.get('bred_for',''), b.get('temperament',''), b.get('breed_group','')) erfahrung = _derive_erfahrung(b.get('temperament',''), b.get('breed_group','')) kinder = _derive_kinder(b.get('temperament','')) wohnung = 1 if groesse == 'klein' and aktivitaet in ('niedrig','mittel') else 0 foto_url = b.get('image', {}).get('url') or None slug = _slug(b['name']) conn.execute(""" INSERT INTO wiki_rassen (external_id, name, gruppe, herkunft, temperament, gewicht_min_kg, gewicht_max_kg, groesse, lebensdauer, foto_url, bred_for, aktivitaet, wohnung_geeignet, kinder_geeignet, erfahrung, slug) VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?) ON CONFLICT(external_id) DO UPDATE SET foto_url=excluded.foto_url, temperament=excluded.temperament """, ( b['id'], b['name'], b.get('breed_group'), b.get('origin'), b.get('temperament'), w_min, w_max, groesse, b.get('life_span'), foto_url, b.get('bred_for'), aktivitaet, wohnung, kinder, erfahrung, slug )) seeded += 1 except Exception as e: logger.warning(f"Breed {b.get('name')} seed failed: {e}") logger.info(f"Breeds seeded: {seeded}") return seeded