KI-Vision-Model, Breed-Scraper, Karte/Routen + Release v1292
Parallele Arbeit (auf Staging mitgetestet): KI-Vision-Model (VISION_MODEL in ki.py/routes, im KI-Status sichtbar), Breed-Scraper-Anpassungen (breed_enricher/breed_evaluator, evaluate_enrichment mit user_id), Karten-/Routen-Änderungen (map.js, routes.js), kleinere UI-Anpassungen (admin.js, components.css), docker-compose, MARKETING, nav-loop-Test. Version-Bump auf 1292 (VERSION, sw.js, app.js, index.html, landing.html).
This commit is contained in:
parent
51aad6cf1b
commit
f7370028da
17 changed files with 322 additions and 100 deletions
|
|
@ -43,19 +43,23 @@ Aktivität zur Erfahrung)?
|
|||
'''
|
||||
|
||||
|
||||
async def evaluate_enrichment(sample_size: int = 20) -> dict:
|
||||
async def evaluate_enrichment(sample_size: int = 20, user_id: int | None = None) -> dict:
|
||||
"""
|
||||
Bewertet `sample_size` zufällig gewählte angereicherte Rassen via Claude.
|
||||
Bewertet `sample_size` zufällig gewählte angereicherte Rassen als LLM-as-Judge.
|
||||
|
||||
Läuft über die zentrale KI-Abstraktion (ki.complete). Admins/Moderatoren werden
|
||||
dort Cloud-priorisiert (Claude); ist die Cloud nicht erreichbar, fällt die
|
||||
Bewertung sauber auf das lokale Modell zurück, statt hart abzubrechen.
|
||||
|
||||
Returns dict mit aggregierten Scores und Einzelergebnissen.
|
||||
"""
|
||||
import sys, os
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
from database import db
|
||||
import ki
|
||||
|
||||
ANTHROPIC_KEY = os.getenv("ANTHROPIC_API_KEY", "")
|
||||
if not ANTHROPIC_KEY:
|
||||
raise RuntimeError("ANTHROPIC_API_KEY nicht gesetzt — Evaluierung benötigt Cloud.")
|
||||
if ki.KI_MODE == "off":
|
||||
raise RuntimeError("KI ist deaktiviert (KI_MODE=off) — Evaluierung nicht möglich.")
|
||||
|
||||
with db() as conn:
|
||||
rassen = conn.execute(
|
||||
|
|
@ -65,8 +69,7 @@ async def evaluate_enrichment(sample_size: int = 20) -> dict:
|
|||
wohnung_geeignet, temperament, ki_model
|
||||
FROM wiki_rassen
|
||||
WHERE ki_enriched = 1
|
||||
AND ki_model IS NOT NULL
|
||||
AND ki_model NOT LIKE 'claude%'
|
||||
AND (ki_model IS NULL OR ki_model NOT LIKE 'claude%')
|
||||
ORDER BY RANDOM()
|
||||
LIMIT ?""",
|
||||
(sample_size,),
|
||||
|
|
@ -75,10 +78,10 @@ async def evaluate_enrichment(sample_size: int = 20) -> dict:
|
|||
if not rassen:
|
||||
return {"error": "Keine angereicherten Rassen gefunden."}
|
||||
|
||||
import anthropic
|
||||
client = anthropic.Anthropic(api_key=ANTHROPIC_KEY)
|
||||
_EVAL_SYSTEM = "Du bist ein präziser Qualitätsprüfer. Antworte ausschließlich als JSON."
|
||||
|
||||
results = []
|
||||
sources = set()
|
||||
totals = {"vollstaendigkeit": 0, "korrektheit": 0,
|
||||
"sprachqualitaet": 0, "konsistenz": 0, "gesamt": 0}
|
||||
|
||||
|
|
@ -102,22 +105,17 @@ async def evaluate_enrichment(sample_size: int = 20) -> dict:
|
|||
data=json.dumps(data, ensure_ascii=False, indent=2),
|
||||
)
|
||||
try:
|
||||
def _call():
|
||||
return client.messages.create(
|
||||
model="claude-haiku-4-5-20251001",
|
||||
max_tokens=256,
|
||||
system=[{
|
||||
"type": "text",
|
||||
"text": "Du bist ein präziser Qualitätsprüfer. Antworte ausschließlich als JSON.",
|
||||
"cache_control": {"type": "ephemeral"},
|
||||
}],
|
||||
messages=[{"role": "user", "content": prompt}],
|
||||
)
|
||||
loop = asyncio.get_event_loop()
|
||||
resp = await loop.run_in_executor(None, _call)
|
||||
raw = resp.content[0].text.strip()
|
||||
raw, source = await ki.complete(
|
||||
prompt,
|
||||
system=_EVAL_SYSTEM,
|
||||
max_tokens=256,
|
||||
json_mode=True,
|
||||
user_id=user_id,
|
||||
return_source=True,
|
||||
)
|
||||
sources.add(source)
|
||||
|
||||
# JSON extrahieren
|
||||
# JSON extrahieren (lokale Modelle wrappen gern in ```json … ```)
|
||||
import re
|
||||
match = re.search(r"\{[\s\S]+\}", raw)
|
||||
scores = json.loads(match.group(0)) if match else {}
|
||||
|
|
@ -136,9 +134,12 @@ async def evaluate_enrichment(sample_size: int = 20) -> dict:
|
|||
count = len([r for r in results if "error" not in r])
|
||||
averages = {k: round(v / count, 2) for k, v in totals.items()} if count else {}
|
||||
|
||||
judge_source = "/".join(sorted(sources)) if sources else "unbekannt"
|
||||
|
||||
return {
|
||||
"sample_size": len(rassen),
|
||||
"evaluated": count,
|
||||
"averages": averages,
|
||||
"judge_source": judge_source, # "cloud" (Claude) oder "local" (LM Studio)
|
||||
"results": results,
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue