breed_enricher: Wikipedia-grounded via Haiku, ki_source-Spalte, Gemma-Reset; SW by-v337
This commit is contained in:
parent
eb6eaea04e
commit
d90d4f1eeb
6 changed files with 201 additions and 98 deletions
|
|
@ -918,6 +918,7 @@ def _migrate(conn_factory):
|
||||||
("wikipedia_url_de","TEXT"),
|
("wikipedia_url_de","TEXT"),
|
||||||
("ki_enriched", "INTEGER DEFAULT 0"),
|
("ki_enriched", "INTEGER DEFAULT 0"),
|
||||||
("ki_model", "TEXT"),
|
("ki_model", "TEXT"),
|
||||||
|
("ki_source", "TEXT"),
|
||||||
]:
|
]:
|
||||||
try:
|
try:
|
||||||
conn.execute(f"ALTER TABLE wiki_rassen ADD COLUMN {col} {typedef}")
|
conn.execute(f"ALTER TABLE wiki_rassen ADD COLUMN {col} {typedef}")
|
||||||
|
|
|
||||||
|
|
@ -600,6 +600,16 @@ async def wiki_evaluate(sample: int = 20, user=Depends(require_mod)):
|
||||||
return await evaluate_enrichment(sample_size=sample)
|
return await evaluate_enrichment(sample_size=sample)
|
||||||
|
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# POST /api/admin/wiki/reset-gemma — Gemma-Einträge zurücksetzen
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
@router.post("/wiki/reset-gemma")
|
||||||
|
async def wiki_reset_gemma(user=Depends(require_mod)):
|
||||||
|
from scraper.breed_enricher import reset_gemma_entries
|
||||||
|
count = reset_gemma_entries()
|
||||||
|
return {"reset": count}
|
||||||
|
|
||||||
|
|
||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
# POST /api/admin/wiki/translate-temperament — einmalige Migration
|
# POST /api/admin/wiki/translate-temperament — einmalige Migration
|
||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
|
|
|
||||||
|
|
@ -1,8 +1,15 @@
|
||||||
"""
|
"""
|
||||||
BAN YARO — Rassen-Anreicherung via KI
|
BAN YARO — Rassen-Anreicherung (Wikipedia-grounded)
|
||||||
|
|
||||||
Nutzt ki.complete() um fehlende Rassen-Daten (Beschreibung, Vorkommen, etc.)
|
Strategie:
|
||||||
per Claude API anzureichern und in wiki_rassen zurückzuschreiben.
|
1. Wikipedia-Einleitungstext abrufen (de → en Fallback)
|
||||||
|
2. Claude Haiku extrahiert Fakten NUR aus dem Quelltext
|
||||||
|
3. Kein Wikipedia-Artikel → ki_enriched=2, ki_source='none' (nicht veröffentlichen)
|
||||||
|
|
||||||
|
ki_enriched-Werte:
|
||||||
|
0 = noch nicht verarbeitet
|
||||||
|
1 = angereichert (mit Wikipedia-Quelle)
|
||||||
|
2 = kein Wikipedia-Artikel gefunden, übersprungen
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
|
|
@ -14,15 +21,15 @@ import os
|
||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
|
|
||||||
# Pfad zum Backend-Verzeichnis sicherstellen (beim direkten Aufruf)
|
|
||||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
from database import db
|
from database import db
|
||||||
from ki import complete, KIUnavailableError
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
_SYSTEM = "Du bist ein Hunde-Experte. Antworte immer auf Deutsch."
|
_HAIKU_MODEL = "claude-haiku-4-5-20251001"
|
||||||
|
|
||||||
|
_WP_HEADERS = {"User-Agent": "Banyaro/1.0 (https://banyaro.de; mail@banyaro.de) httpx"}
|
||||||
|
|
||||||
# Übersetzungstabelle für englische TheDogAPI-Temperamentwörter
|
# Übersetzungstabelle für englische TheDogAPI-Temperamentwörter
|
||||||
_TEMPER_DE: dict[str, str] = {
|
_TEMPER_DE: dict[str, str] = {
|
||||||
|
|
@ -91,18 +98,51 @@ _TEMPER_DE: dict[str, str] = {
|
||||||
"work-focused": "arbeitsorientiert",
|
"work-focused": "arbeitsorientiert",
|
||||||
}
|
}
|
||||||
|
|
||||||
# Datenmüll aus TheDogAPI/Wikidata der aus dem Temperament-Feld entfernt wird
|
|
||||||
_TEMPER_GARBAGE = {
|
_TEMPER_GARBAGE = {
|
||||||
"hunderasse", "dog breed", "breed of dog", "extinct dog breed",
|
"hunderasse", "dog breed", "breed of dog", "extinct dog breed",
|
||||||
"dog", "hund", "rasse",
|
"dog", "hund", "rasse",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
_DIRECT_FIELDS = {
|
||||||
|
"beschreibung", "vorkommen_de",
|
||||||
|
"groesse", "gewicht_min_kg", "gewicht_max_kg",
|
||||||
|
"lebensdauer", "aktivitaet", "erfahrung",
|
||||||
|
"kinder_geeignet", "wohnung_geeignet", "temperament",
|
||||||
|
}
|
||||||
|
|
||||||
|
_SYSTEM = (
|
||||||
|
"Du bist ein Datenprozessor für eine Hunderassen-Referenz-Datenbank. "
|
||||||
|
"Extrahiere Informationen AUSSCHLIESSLICH aus dem gegebenen Quelltext. "
|
||||||
|
"Setze null wenn eine Information nicht im Text steht. "
|
||||||
|
"Erfinde keine Werte."
|
||||||
|
)
|
||||||
|
|
||||||
|
_PROMPT = '''\
|
||||||
|
Extrahiere strukturierte Daten für die Hunderasse "{name}" aus diesem Wikipedia-Text.
|
||||||
|
|
||||||
|
--- WIKIPEDIA ({lang}) ---
|
||||||
|
{wiki_text}
|
||||||
|
--- ENDE ---
|
||||||
|
|
||||||
|
Antworte NUR mit einem JSON-Objekt. Fehlende Informationen = null.
|
||||||
|
|
||||||
|
{{
|
||||||
|
"beschreibung": "3-5 informative Sätze über Charakter, Wesen und Verwendung aus dem Text. Schließe mit: Auf banyaro.app findest du weitere Informationen zu dieser Rasse.",
|
||||||
|
"vorkommen_de": "1-2 Sätze zur Verbreitung in Deutschland/DACH, nur wenn im Text erwähnt, sonst null",
|
||||||
|
"groesse": "klein|mittel|gross|sehr_gross oder null",
|
||||||
|
"gewicht_min_kg": Zahl_oder_null,
|
||||||
|
"gewicht_max_kg": Zahl_oder_null,
|
||||||
|
"lebensdauer": "X-Y Jahre oder null",
|
||||||
|
"aktivitaet": "niedrig|mittel|hoch|sehr_hoch oder null",
|
||||||
|
"erfahrung": "anfaenger|fortgeschritten|experte oder null",
|
||||||
|
"kinder_geeignet": true|false|null,
|
||||||
|
"wohnung_geeignet": true|false|null,
|
||||||
|
"temperament": "kommagetrennte Eigenschaften auf Deutsch aus dem Text, oder null"
|
||||||
|
}}
|
||||||
|
'''
|
||||||
|
|
||||||
|
|
||||||
def translate_temperament(text: str) -> str | None:
|
def translate_temperament(text: str) -> str | None:
|
||||||
"""
|
|
||||||
Übersetzt englische Temperament-Chips ins Deutsche und entfernt Datenmüll.
|
|
||||||
Gibt None zurück wenn nach Bereinigung nichts übrig bleibt.
|
|
||||||
"""
|
|
||||||
if not text:
|
if not text:
|
||||||
return text
|
return text
|
||||||
parts = [p.strip() for p in text.split(",")]
|
parts = [p.strip() for p in text.split(",")]
|
||||||
|
|
@ -114,70 +154,64 @@ def translate_temperament(text: str) -> str | None:
|
||||||
result.append(_TEMPER_DE.get(low, part))
|
result.append(_TEMPER_DE.get(low, part))
|
||||||
return ", ".join(result) if result else None
|
return ", ".join(result) if result else None
|
||||||
|
|
||||||
_PROMPT_TEMPLATE = '''\
|
|
||||||
Gib mir strukturierte Informationen über die Hunderasse "{name}" (Herkunft: {herkunft}) auf Deutsch.
|
|
||||||
Antworte NUR mit einem JSON-Objekt, keine Erklärung darum.
|
|
||||||
|
|
||||||
Format:
|
|
||||||
{{
|
|
||||||
"beschreibung": "3-5 Sätze über Charakter und Wesen der Rasse. Schließe mit: Auf banyaro.app findest du weitere Informationen zu dieser Rasse.",
|
|
||||||
"vorkommen_de": "1-2 Sätze wie verbreitet die Rasse in Deutschland/DACH ist. Quelle: banyaro.app Hunde-Wiki.",
|
|
||||||
"groesse": "klein|mittel|gross|sehr_gross",
|
|
||||||
"gewicht_min_kg": Zahl_oder_null,
|
|
||||||
"gewicht_max_kg": Zahl_oder_null,
|
|
||||||
"lebensdauer": "X-Y Jahre oder null",
|
|
||||||
"aktivitaet": "niedrig|mittel|hoch|sehr_hoch",
|
|
||||||
"erfahrung": "anfaenger|fortgeschritten|experte",
|
|
||||||
"kinder_geeignet": true_oder_false,
|
|
||||||
"wohnung_geeignet": true_oder_false,
|
|
||||||
"temperament": "kommagetrennte Eigenschaftsliste auf Deutsch, z.B. freundlich, verspielt, loyal"
|
|
||||||
}}
|
|
||||||
'''
|
|
||||||
|
|
||||||
# Felder die direkt in wiki_rassen geschrieben werden (wenn nicht null)
|
|
||||||
_DIRECT_FIELDS = {
|
|
||||||
"beschreibung", "vorkommen_de",
|
|
||||||
"groesse", "gewicht_min_kg", "gewicht_max_kg",
|
|
||||||
"lebensdauer", "aktivitaet", "erfahrung",
|
|
||||||
"kinder_geeignet", "wohnung_geeignet", "temperament",
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def _parse_json(raw: str) -> dict:
|
def _parse_json(raw: str) -> dict:
|
||||||
"""JSON aus KI-Antwort extrahieren — toleriert ```json ... ``` Wrapper."""
|
|
||||||
# Versuche direkt
|
|
||||||
try:
|
try:
|
||||||
return json.loads(raw)
|
return json.loads(raw)
|
||||||
except json.JSONDecodeError:
|
except json.JSONDecodeError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
# Suche nach ```json ... ``` Block
|
|
||||||
match = re.search(r"```(?:json)?\s*([\s\S]+?)\s*```", raw)
|
match = re.search(r"```(?:json)?\s*([\s\S]+?)\s*```", raw)
|
||||||
if match:
|
if match:
|
||||||
try:
|
try:
|
||||||
return json.loads(match.group(1))
|
return json.loads(match.group(1))
|
||||||
except json.JSONDecodeError:
|
except json.JSONDecodeError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
# Suche nach erstem { ... } Block
|
|
||||||
match = re.search(r"\{[\s\S]+\}", raw)
|
match = re.search(r"\{[\s\S]+\}", raw)
|
||||||
if match:
|
if match:
|
||||||
try:
|
try:
|
||||||
return json.loads(match.group(0))
|
return json.loads(match.group(0))
|
||||||
except json.JSONDecodeError:
|
except json.JSONDecodeError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
raise ValueError(f"Kein gültiges JSON in Antwort gefunden: {raw[:200]}")
|
raise ValueError(f"Kein gültiges JSON in Antwort gefunden: {raw[:200]}")
|
||||||
|
|
||||||
|
|
||||||
|
async def _fetch_wikipedia_text(name: str) -> tuple[str | None, str | None]:
|
||||||
|
"""Holt den Einleitungstext eines Wikipedia-Artikels (de → en Fallback).
|
||||||
|
|
||||||
|
Returns: (text, lang) oder (None, None) wenn kein Artikel gefunden.
|
||||||
|
"""
|
||||||
|
for lang in ("de", "en"):
|
||||||
|
try:
|
||||||
|
async with httpx.AsyncClient(timeout=10, headers=_WP_HEADERS) as client:
|
||||||
|
resp = await client.get(
|
||||||
|
f"https://{lang}.wikipedia.org/w/api.php",
|
||||||
|
params={
|
||||||
|
"action": "query",
|
||||||
|
"titles": name,
|
||||||
|
"prop": "extracts",
|
||||||
|
"exintro": 1,
|
||||||
|
"explaintext": 1,
|
||||||
|
"format": "json",
|
||||||
|
"redirects": 1,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
pages = resp.json().get("query", {}).get("pages", {})
|
||||||
|
for page in pages.values():
|
||||||
|
if page.get("pageid", -1) == -1:
|
||||||
|
continue
|
||||||
|
text = page.get("extract", "").strip()
|
||||||
|
if len(text) > 150:
|
||||||
|
return text[:3000], lang
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug("Wikipedia-Text (%s) fehlgeschlagen für %s: %s", lang, name, e)
|
||||||
|
return None, None
|
||||||
|
|
||||||
|
|
||||||
async def _fetch_wikimedia_photo(name: str) -> str | None:
|
async def _fetch_wikimedia_photo(name: str) -> str | None:
|
||||||
"""Sucht ein lizenzfreies Foto via Wikipedia pageimages API (de → en Fallback)."""
|
"""Sucht ein lizenzfreies Foto via Wikipedia pageimages API (de → en Fallback)."""
|
||||||
for lang in ("de", "en"):
|
for lang in ("de", "en"):
|
||||||
try:
|
try:
|
||||||
async with httpx.AsyncClient(
|
async with httpx.AsyncClient(timeout=8, headers=_WP_HEADERS) as client:
|
||||||
timeout=8,
|
|
||||||
headers={"User-Agent": "Banyaro/1.0 (https://banyaro.de; mail@banyaro.de) httpx"},
|
|
||||||
) as client:
|
|
||||||
resp = await client.get(
|
resp = await client.get(
|
||||||
f"https://{lang}.wikipedia.org/w/api.php",
|
f"https://{lang}.wikipedia.org/w/api.php",
|
||||||
params={
|
params={
|
||||||
|
|
@ -198,12 +232,40 @@ async def _fetch_wikimedia_photo(name: str) -> str | None:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
async def _haiku_complete(prompt: str) -> str:
|
||||||
|
"""Claude Haiku direkt aufrufen (immer Cloud, für maximale Genauigkeit)."""
|
||||||
|
import anthropic
|
||||||
|
|
||||||
|
key = os.getenv("ANTHROPIC_API_KEY", "")
|
||||||
|
if not key:
|
||||||
|
raise RuntimeError("ANTHROPIC_API_KEY nicht gesetzt")
|
||||||
|
|
||||||
|
def _call():
|
||||||
|
client = anthropic.Anthropic(api_key=key)
|
||||||
|
return client.messages.create(
|
||||||
|
model=_HAIKU_MODEL,
|
||||||
|
max_tokens=700,
|
||||||
|
system=[{
|
||||||
|
"type": "text",
|
||||||
|
"text": _SYSTEM,
|
||||||
|
"cache_control": {"type": "ephemeral"},
|
||||||
|
}],
|
||||||
|
messages=[{"role": "user", "content": prompt}],
|
||||||
|
)
|
||||||
|
|
||||||
|
loop = asyncio.get_event_loop()
|
||||||
|
resp = await loop.run_in_executor(None, _call)
|
||||||
|
return resp.content[0].text.strip()
|
||||||
|
|
||||||
|
|
||||||
async def enrich_breeds(limit: int = 10) -> int:
|
async def enrich_breeds(limit: int = 10) -> int:
|
||||||
"""
|
"""
|
||||||
Reichert bis zu `limit` Rassen an, bei denen ki_enriched = 0.
|
Reichert bis zu `limit` Rassen an (ki_enriched = 0).
|
||||||
|
|
||||||
Returns:
|
Strategie: Wikipedia-Text holen → Haiku extrahiert Fakten.
|
||||||
Anzahl erfolgreich angereicherter Rassen.
|
Kein Wikipedia-Artikel → ki_enriched=2, ki_source='none'.
|
||||||
|
|
||||||
|
Returns: Anzahl erfolgreich angereicherter Rassen.
|
||||||
"""
|
"""
|
||||||
with db() as conn:
|
with db() as conn:
|
||||||
rassen = conn.execute(
|
rassen = conn.execute(
|
||||||
|
|
@ -215,32 +277,36 @@ async def enrich_breeds(limit: int = 10) -> int:
|
||||||
).fetchall()
|
).fetchall()
|
||||||
|
|
||||||
if not rassen:
|
if not rassen:
|
||||||
logger.info("Keine Rassen zur Anreicherung gefunden (alle ki_enriched=1).")
|
logger.info("Keine Rassen zur Anreicherung gefunden.")
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
enriched_count = 0
|
enriched_count = 0
|
||||||
|
|
||||||
for rasse in rassen:
|
for rasse in rassen:
|
||||||
name = rasse["name"]
|
name = rasse["name"]
|
||||||
herkunft = rasse["herkunft"] or "unbekannt"
|
|
||||||
rasse_id = rasse["id"]
|
rasse_id = rasse["id"]
|
||||||
|
|
||||||
prompt = _PROMPT_TEMPLATE.format(name=name, herkunft=herkunft)
|
# 1. Wikipedia-Text holen
|
||||||
|
wiki_text, wiki_lang = await _fetch_wikipedia_text(name)
|
||||||
|
|
||||||
try:
|
if not wiki_text:
|
||||||
raw, used_model = await complete(
|
# Kein Artikel → markieren und überspringen
|
||||||
prompt,
|
with db() as conn:
|
||||||
system=_SYSTEM,
|
conn.execute(
|
||||||
max_tokens=600,
|
"UPDATE wiki_rassen SET ki_enriched=2, ki_source='none' WHERE id=?",
|
||||||
requires_premium=False,
|
(rasse_id,),
|
||||||
return_model=True,
|
|
||||||
)
|
)
|
||||||
except KIUnavailableError as e:
|
logger.info("Kein Wikipedia-Artikel: %s → übersprungen", name)
|
||||||
logger.warning("KI nicht verfügbar, Anreicherung abgebrochen: %s", e)
|
await asyncio.sleep(0.5)
|
||||||
break
|
continue
|
||||||
|
|
||||||
|
# 2. Haiku extrahiert Fakten aus dem Quelltext
|
||||||
|
prompt = _PROMPT.format(name=name, lang=wiki_lang.upper(), wiki_text=wiki_text)
|
||||||
|
try:
|
||||||
|
raw = await _haiku_complete(prompt)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error("Fehler bei KI-Anfrage für %s: %s", name, e)
|
logger.error("Haiku-Anfrage fehlgeschlagen für %s: %s", name, e)
|
||||||
await asyncio.sleep(2)
|
await asyncio.sleep(3)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
|
@ -250,34 +316,32 @@ async def enrich_breeds(limit: int = 10) -> int:
|
||||||
await asyncio.sleep(2)
|
await asyncio.sleep(2)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Nur bekannte Felder mit nicht-None-Wert übernehmen
|
# 3. DB-Update
|
||||||
updates = {
|
updates = {
|
||||||
k: v for k, v in data.items()
|
k: v for k, v in data.items()
|
||||||
if k in _DIRECT_FIELDS and v is not None
|
if k in _DIRECT_FIELDS and v is not None
|
||||||
}
|
}
|
||||||
# Temperament sicherstellen: immer Deutsch
|
|
||||||
if "temperament" in updates:
|
if "temperament" in updates:
|
||||||
updates["temperament"] = translate_temperament(updates["temperament"])
|
updates["temperament"] = translate_temperament(updates["temperament"])
|
||||||
updates["ki_enriched"] = 1
|
updates["ki_enriched"] = 1
|
||||||
updates["ki_model"] = used_model
|
updates["ki_model"] = _HAIKU_MODEL
|
||||||
|
updates["ki_source"] = f"wikipedia_{wiki_lang}"
|
||||||
|
|
||||||
cols = ", ".join(f"{k}=?" for k in updates)
|
cols = ", ".join(f"{k}=?" for k in updates)
|
||||||
values = list(updates.values()) + [rasse_id]
|
values = list(updates.values()) + [rasse_id]
|
||||||
|
|
||||||
try:
|
try:
|
||||||
with db() as conn:
|
with db() as conn:
|
||||||
conn.execute(
|
conn.execute(f"UPDATE wiki_rassen SET {cols} WHERE id=?", values)
|
||||||
f"UPDATE wiki_rassen SET {cols} WHERE id=?",
|
logger.info("Rasse angereichert: %s (%d Felder, WP-%s)",
|
||||||
values,
|
name, len(updates) - 2, wiki_lang.upper())
|
||||||
)
|
|
||||||
logger.info("Rasse angereichert: %s (%d Felder)", name, len(updates) - 1)
|
|
||||||
enriched_count += 1
|
enriched_count += 1
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error("DB-Update fehlgeschlagen für %s: %s", name, e)
|
logger.error("DB-Update fehlgeschlagen für %s: %s", name, e)
|
||||||
await asyncio.sleep(2)
|
await asyncio.sleep(2)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Foto von Wikimedia holen, falls noch keins vorhanden
|
# 4. Foto holen wenn noch keins vorhanden
|
||||||
if not rasse["foto_url"]:
|
if not rasse["foto_url"]:
|
||||||
foto_url = await _fetch_wikimedia_photo(name)
|
foto_url = await _fetch_wikimedia_photo(name)
|
||||||
if foto_url:
|
if foto_url:
|
||||||
|
|
@ -291,17 +355,25 @@ async def enrich_breeds(limit: int = 10) -> int:
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error("Foto-Update fehlgeschlagen für %s: %s", name, e)
|
logger.error("Foto-Update fehlgeschlagen für %s: %s", name, e)
|
||||||
|
|
||||||
await asyncio.sleep(2)
|
await asyncio.sleep(1)
|
||||||
|
|
||||||
return enriched_count
|
return enriched_count
|
||||||
|
|
||||||
|
|
||||||
|
def reset_gemma_entries() -> int:
|
||||||
|
"""Setzt alle Gemma-angereicherten Einträge zurück auf ki_enriched=0."""
|
||||||
|
with db() as conn:
|
||||||
|
cur = conn.execute(
|
||||||
|
"UPDATE wiki_rassen SET ki_enriched=0, ki_model=NULL, ki_source=NULL "
|
||||||
|
"WHERE ki_model LIKE 'gemma%'",
|
||||||
|
)
|
||||||
|
count = cur.rowcount
|
||||||
|
logger.info("Gemma-Reset: %d Rassen zurückgesetzt", count)
|
||||||
|
return count
|
||||||
|
|
||||||
|
|
||||||
def translate_existing_temperaments() -> int:
|
def translate_existing_temperaments() -> int:
|
||||||
"""
|
"""Übersetzt alle englischen Temperament-Felder in der DB ins Deutsche."""
|
||||||
Übersetzt alle englischen Temperament-Felder in der DB ins Deutsche.
|
|
||||||
Erkennt englische Einträge anhand bekannter Wörter aus der Map.
|
|
||||||
Gibt Anzahl aktualisierter Datensätze zurück.
|
|
||||||
"""
|
|
||||||
_english_words = set(_TEMPER_DE.keys())
|
_english_words = set(_TEMPER_DE.keys())
|
||||||
updated = 0
|
updated = 0
|
||||||
with db() as conn:
|
with db() as conn:
|
||||||
|
|
@ -311,20 +383,17 @@ def translate_existing_temperaments() -> int:
|
||||||
for row in rows:
|
for row in rows:
|
||||||
original = row["temperament"]
|
original = row["temperament"]
|
||||||
parts_lower = [p.strip().lower() for p in original.split(",")]
|
parts_lower = [p.strip().lower() for p in original.split(",")]
|
||||||
# Verarbeiten wenn englisches Wort ODER Datenmüll gefunden
|
|
||||||
has_english = any(p in _english_words for p in parts_lower)
|
has_english = any(p in _english_words for p in parts_lower)
|
||||||
has_garbage = any(
|
has_garbage = any(
|
||||||
any(g in p for g in _TEMPER_GARBAGE)
|
any(g in p for g in _TEMPER_GARBAGE) for p in parts_lower
|
||||||
for p in parts_lower
|
|
||||||
)
|
)
|
||||||
if not has_english and not has_garbage:
|
if not has_english and not has_garbage:
|
||||||
continue
|
continue
|
||||||
translated = translate_temperament(original)
|
translated = translate_temperament(original)
|
||||||
# None = nur Müll → auf NULL setzen; unterschiedlicher Text → übersetzen
|
|
||||||
if translated != original:
|
if translated != original:
|
||||||
conn.execute(
|
conn.execute(
|
||||||
"UPDATE wiki_rassen SET temperament=? WHERE id=?",
|
"UPDATE wiki_rassen SET temperament=? WHERE id=?",
|
||||||
(translated, row["id"]), # None wird zu SQL NULL
|
(translated, row["id"]),
|
||||||
)
|
)
|
||||||
updated += 1
|
updated += 1
|
||||||
logger.info("Temperament-Migration: %d Rassen übersetzt", updated)
|
logger.info("Temperament-Migration: %d Rassen übersetzt", updated)
|
||||||
|
|
@ -335,10 +404,15 @@ if __name__ == "__main__":
|
||||||
import argparse
|
import argparse
|
||||||
|
|
||||||
logging.basicConfig(level=logging.INFO, format="%(levelname)s %(message)s")
|
logging.basicConfig(level=logging.INFO, format="%(levelname)s %(message)s")
|
||||||
|
parser = argparse.ArgumentParser(description="Rassen-Anreicherung (Wikipedia-grounded)")
|
||||||
parser = argparse.ArgumentParser(description="Rassen-Anreicherung via KI")
|
parser.add_argument("--limit", type=int, default=10)
|
||||||
parser.add_argument("--limit", type=int, default=10, help="Anzahl Rassen (default: 10)")
|
parser.add_argument("--reset-gemma", action="store_true",
|
||||||
|
help="Gemma-Einträge zurücksetzen bevor angereichert wird")
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
if args.reset_gemma:
|
||||||
|
n = reset_gemma_entries()
|
||||||
|
print(f"Reset: {n} Gemma-Einträge zurückgesetzt")
|
||||||
|
|
||||||
count = asyncio.run(enrich_breeds(args.limit))
|
count = asyncio.run(enrich_breeds(args.limit))
|
||||||
print(f"Angereichert: {count} Rassen")
|
print(f"Angereichert: {count} Rassen")
|
||||||
|
|
|
||||||
|
|
@ -3,7 +3,7 @@
|
||||||
Router, State-Management, Navigation, Initialisierung.
|
Router, State-Management, Navigation, Initialisierung.
|
||||||
============================================================ */
|
============================================================ */
|
||||||
|
|
||||||
const APP_VER = '323'; // ← bei jedem Deploy mit Frontend-Änderungen erhöhen
|
const APP_VER = '324'; // ← bei jedem Deploy mit Frontend-Änderungen erhöhen
|
||||||
|
|
||||||
const App = (() => {
|
const App = (() => {
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -686,6 +686,9 @@ window.Page_admin = (() => {
|
||||||
<button class="btn btn-secondary btn-sm" id="adm-evaluate-breeds">
|
<button class="btn btn-secondary btn-sm" id="adm-evaluate-breeds">
|
||||||
${UI.icon('chart-bar')} Qualitätsbewertung (20 Rassen)
|
${UI.icon('chart-bar')} Qualitätsbewertung (20 Rassen)
|
||||||
</button>
|
</button>
|
||||||
|
<button class="btn btn-danger btn-sm" id="adm-reset-gemma">
|
||||||
|
${UI.icon('arrow-counter-clockwise')} Gemma-Einträge zurücksetzen
|
||||||
|
</button>
|
||||||
</div>
|
</div>
|
||||||
<div id="adm-maint-result" style="margin-top:var(--space-2);font-size:var(--text-xs);
|
<div id="adm-maint-result" style="margin-top:var(--space-2);font-size:var(--text-xs);
|
||||||
color:var(--c-text-secondary)"></div>
|
color:var(--c-text-secondary)"></div>
|
||||||
|
|
@ -745,6 +748,21 @@ window.Page_admin = (() => {
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
el.querySelector('#adm-reset-gemma').addEventListener('click', async (e) => {
|
||||||
|
if (!confirm('Alle Gemma-angereicherten Einträge zurücksetzen? Sie werden beim nächsten Job neu (Wikipedia-grounded) angereichert.')) return;
|
||||||
|
const btn = e.currentTarget;
|
||||||
|
const res = el.querySelector('#adm-maint-result');
|
||||||
|
btn.disabled = true;
|
||||||
|
try {
|
||||||
|
const d = await API.post('/admin/wiki/reset-gemma', {});
|
||||||
|
res.textContent = `✓ ${d.reset} Gemma-Einträge zurückgesetzt`;
|
||||||
|
} catch (err) {
|
||||||
|
res.textContent = '✗ Fehler: ' + (err.message || err);
|
||||||
|
} finally {
|
||||||
|
btn.disabled = false;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
el.querySelector('#adm-evaluate-breeds').addEventListener('click', async (e) => {
|
el.querySelector('#adm-evaluate-breeds').addEventListener('click', async (e) => {
|
||||||
const btn = e.currentTarget;
|
const btn = e.currentTarget;
|
||||||
const res = el.querySelector('#adm-maint-result');
|
const res = el.querySelector('#adm-maint-result');
|
||||||
|
|
|
||||||
|
|
@ -3,7 +3,7 @@
|
||||||
Offline-Cache + Push Notifications + Tile-Cache
|
Offline-Cache + Push Notifications + Tile-Cache
|
||||||
============================================================ */
|
============================================================ */
|
||||||
|
|
||||||
const CACHE_VERSION = 'by-v336';
|
const CACHE_VERSION = 'by-v337';
|
||||||
const CACHE_STATIC = `${CACHE_VERSION}-static`;
|
const CACHE_STATIC = `${CACHE_VERSION}-static`;
|
||||||
const CACHE_TILES = 'ban-yaro-tiles-v1'; // bleibt über SW-Updates erhalten
|
const CACHE_TILES = 'ban-yaro-tiles-v1'; // bleibt über SW-Updates erhalten
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue