"""BAN YARO — Import: Synology NoteStation (.nsx) + CSV""" import os, io, uuid, json, zipfile, csv, re, datetime from html.parser import HTMLParser from fastapi import APIRouter, Depends, HTTPException, UploadFile, File, Form from database import db from auth import get_current_user router = APIRouter() MEDIA_DIR = os.getenv("MEDIA_DIR", "/data/media") MAX_NSX_MB = 200 MAX_CSV_MB = 10 # ------------------------------------------------------------------ # HTML → Plaintext # ------------------------------------------------------------------ class _HTMLStripper(HTMLParser): _SKIP_TAGS = {"video", "audio", "script", "style"} def __init__(self): super().__init__() self.parts = [] self._skip = 0 def handle_starttag(self, tag, attrs): if tag.lower() in self._SKIP_TAGS: self._skip += 1 def handle_endtag(self, tag): if tag.lower() in self._SKIP_TAGS and self._skip > 0: self._skip -= 1 def handle_data(self, data): if not self._skip: self.parts.append(data) def handle_entityref(self, name): if name == "nbsp": self.parts.append(" ") def handle_charref(self, name): if name in ("160", "xa0"): self.parts.append(" ") def _html_to_text(html: str) -> str: if not html: return "" # Replace block-level tags with newlines before stripping html = re.sub(r"]*>", "\n", html, flags=re.IGNORECASE) s = _HTMLStripper() s.feed(html) text = "".join(s.parts) # Collapse excessive blank lines text = re.sub(r"\n{3,}", "\n\n", text).strip() return text # ------------------------------------------------------------------ # Helfer: Bild aus ZIP in Media-Ordner speichern # ------------------------------------------------------------------ def _save_image_from_zip(zf: zipfile.ZipFile, md5: str, mime: str) -> str | None: zip_name = f"file_{md5}" if zip_name not in zf.namelist(): return None ext = mime.split("/")[-1] if mime else "jpg" ext = ext.replace("jpeg", "jpg") dest_dir = os.path.join(MEDIA_DIR, "diary") os.makedirs(dest_dir, exist_ok=True) filename = f"import_{uuid.uuid4().hex}.{ext}" dest = os.path.join(dest_dir, filename) with zf.open(zip_name) as src, open(dest, "wb") as dst: dst.write(src.read()) return f"/media/diary/{filename}" # ------------------------------------------------------------------ # POST /api/import/notestation # ------------------------------------------------------------------ @router.post("/notestation") async def import_notestation( dog_id: int = Form(...), file: UploadFile = File(...), user=Depends(get_current_user), ): if not file.filename.lower().endswith(".nsx"): raise HTTPException(400, "Bitte eine .nsx-Datei hochladen.") raw = await file.read() if len(raw) > MAX_NSX_MB * 1024 * 1024: raise HTTPException(413, f"Datei zu groß (max {MAX_NSX_MB} MB).") with db() as conn: dog = conn.execute( "SELECT id FROM dogs WHERE id=? AND user_id=?", (dog_id, user["id"]) ).fetchone() if not dog: raise HTTPException(404, "Hund nicht gefunden.") try: zf = zipfile.ZipFile(io.BytesIO(raw)) except zipfile.BadZipFile: raise HTTPException(400, "Ungültige .nsx-Datei (kein ZIP).") config = json.loads(zf.read("config.json")) note_ids = config.get("note", []) imported = 0 skipped = 0 errors = [] with db() as conn: for nid in note_ids: try: note = json.loads(zf.read(nid)) except Exception as e: errors.append(f"{nid}: Lesefehler – {e}") continue if note.get("category") == "notebook": skipped += 1 continue # Datum aus ctime (Unix-Timestamp) ctime = note.get("ctime") or note.get("mtime") if ctime: datum = datetime.datetime.fromtimestamp(ctime).strftime("%Y-%m-%d") else: datum = datetime.date.today().isoformat() titel = (note.get("title") or "").strip() or None text = _html_to_text(note.get("content", "")) or None lat = note.get("latitude") or None lon = note.get("longitude") or None # Koordinate (0,0) ist kein echter Ort if lat == 0.0: lat = None if lon == 0.0: lon = None tags = note.get("tag") or [] # Eintrag anlegen conn.execute( """INSERT INTO diary (dog_id, datum, typ, titel, text, tags, gps_lat, gps_lon, is_milestone) VALUES (?, ?, 'eintrag', ?, ?, ?, ?, ?, 0)""", (dog_id, datum, titel, text, json.dumps(tags), lat, lon), ) entry_id = conn.execute( "SELECT last_insert_rowid() AS id" ).fetchone()["id"] conn.execute( "INSERT OR IGNORE INTO diary_dogs (diary_id, dog_id) VALUES (?,?)", (entry_id, dog_id), ) # Anhänge in diary_media speichern (statt veraltetem media_url-Feld) attachments = note.get("attachment") or {} first = True for att in attachments.values(): md5 = att.get("md5", "") mime = att.get("type", "image/jpeg") if not mime.startswith("image/"): continue media_url = _save_image_from_zip(zf, md5, mime) if media_url: conn.execute( "INSERT INTO diary_media (diary_id, url, media_type, sort_order, is_cover) VALUES (?,?,?,?,?)", (entry_id, media_url, "image", 0 if first else 1, 1 if first else 0), ) first = False imported += 1 return {"imported": imported, "skipped": skipped, "errors": errors} # ------------------------------------------------------------------ # POST /api/import/csv # Spalten (Header-Zeile): datum, titel, text, tags, gps_lat, gps_lon, is_milestone # Trenner: Komma oder Semikolon | Encoding: UTF-8 (mit oder ohne BOM) # ------------------------------------------------------------------ @router.post("/csv") async def import_csv( dog_id: int = Form(...), file: UploadFile = File(...), user=Depends(get_current_user), ): if not file.filename.lower().endswith(".csv"): raise HTTPException(400, "Bitte eine .csv-Datei hochladen.") raw = await file.read() if len(raw) > MAX_CSV_MB * 1024 * 1024: raise HTTPException(413, f"Datei zu groß (max {MAX_CSV_MB} MB).") with db() as conn: dog = conn.execute( "SELECT id FROM dogs WHERE id=? AND user_id=?", (dog_id, user["id"]) ).fetchone() if not dog: raise HTTPException(404, "Hund nicht gefunden.") text = raw.decode("utf-8-sig") # utf-8-sig entfernt BOM falls vorhanden # Semikolon oder Komma erkennen first_line = text.split("\n")[0] delimiter = ";" if first_line.count(";") >= first_line.count(",") else "," reader = csv.DictReader(io.StringIO(text), delimiter=delimiter) # Normalisierte Header (lowercase, strip) reader.fieldnames = [h.strip().lower() for h in (reader.fieldnames or [])] REQUIRED = {"datum", "titel", "text"} if not REQUIRED.issubset(set(reader.fieldnames)): missing = REQUIRED - set(reader.fieldnames) raise HTTPException( 400, f"CSV fehlen Pflicht-Spalten: {', '.join(sorted(missing))}. " "Erwartet: datum, titel, text (+ optional: tags, gps_lat, gps_lon, is_milestone)" ) imported = 0 skipped = 0 errors = [] with db() as conn: for i, row in enumerate(reader, start=2): try: datum = (row.get("datum") or "").strip() if not datum: skipped += 1 continue # Datum normalisieren: DD.MM.YYYY → YYYY-MM-DD if re.match(r"^\d{1,2}\.\d{1,2}\.\d{4}$", datum): d, m, y = datum.split(".") datum = f"{y}-{int(m):02d}-{int(d):02d}" titel = (row.get("titel") or "").strip() or None text = (row.get("text") or "").strip() or None raw_tags = (row.get("tags") or "").strip() if raw_tags.startswith("["): tags = json.loads(raw_tags) else: tags = [t.strip() for t in re.split(r"[;,]", raw_tags) if t.strip()] lat = None lon = None try: lat_s = (row.get("gps_lat") or "").strip().replace(",", ".") lon_s = (row.get("gps_lon") or "").strip().replace(",", ".") if lat_s: lat = float(lat_s) if lon_s: lon = float(lon_s) except ValueError: pass is_milestone = (row.get("is_milestone") or "").strip().lower() in ( "1", "true", "ja", "yes" ) conn.execute( """INSERT INTO diary (dog_id, datum, typ, titel, text, tags, gps_lat, gps_lon, is_milestone) VALUES (?, ?, 'eintrag', ?, ?, ?, ?, ?, ?)""", (dog_id, datum, titel, text, json.dumps(tags), lat, lon, int(is_milestone)), ) entry_id = conn.execute( "SELECT last_insert_rowid() AS id" ).fetchone()["id"] conn.execute( "INSERT OR IGNORE INTO diary_dogs (diary_id, dog_id) VALUES (?,?)", (entry_id, dog_id), ) imported += 1 except Exception as e: errors.append(f"Zeile {i}: {e}") return {"imported": imported, "skipped": skipped, "errors": errors}