banyaro/tools/osm-extract/extract_osm_pois.py
rene 4bc7454258 POI-Karte: Offline-Import aus OSM statt Live-Overpass-Scan (Build 4)
- osm.py: Live-Scanner deaktiviert — /pois liest nur noch aus DB,
  /analyze ist No-Op. Behebt wiederholte OSM-Banns (Tile-Load + Scanning).
- tools/osm-extract: Extraktion (pyosmium) + Loader (schützt user_edited)
  + Docker-Refresh-Job mit osmium-tags-filter-Vorstufe (RAM-schonend).
- docker-compose.osm.yml: Refresh-Service (mem_limit 4g), monatlich via
  DSM-Aufgabenplaner.
2026-06-03 20:44:32 +02:00

140 lines
4.4 KiB
Python

#!/usr/bin/env python3
"""
Offline-Extraktion der Ban-Yaro-POIs aus einer OSM-.pbf-Datei.
Ersetzt das Live-Scannen gegen overpass-api.de (backend/routes/osm.py) durch
einen einmaligen/periodischen Batch-Lauf — keine OSM-Live-Last, kein Bann mehr.
Das Kategorie→Tag-Mapping ist 1:1 aus OSM_QUERIES in backend/routes/osm.py
übernommen. Schreibt ins selbe Schema wie die Produktiv-Tabelle `osm_pois`
(osm_id, type, lat, lon, name, opening_hours, phone, website, cached_at).
Aufruf:
python3 extract_osm_pois.py <input.osm.pbf> <output.sqlite>
"""
import sys
import sqlite3
import osmium
# --- Kategorie-Klassifikation (1:1 aus OSM_QUERIES, backend/routes/osm.py) ---
def classify(t) -> list[str]:
"""Gibt alle Ban-Yaro-Typen zurück, die auf dieses OSM-Objekt passen."""
types: list[str] = []
a = t.get("amenity")
shop = t.get("shop")
craft = t.get("craft")
leisure = t.get("leisure")
tourism = t.get("tourism")
dog = t.get("dog")
outdoor = t.get("outdoor_seating")
# "hundefreundlich, breiter gefasst": explizit erlaubt ODER Terrasse
dog_ok = dog in ("yes", "allowed", "leashed")
if a == "waste_basket":
types.append("waste_basket")
if a == "drinking_water":
types.append("drinking_water")
if a == "veterinary":
types.append("tierarzt")
if a == "bench":
types.append("bank")
if leisure == "dog_park" or (leisure == "park" and dog == "yes"):
types.append("dog_park")
if shop == "pet":
types.append("shop")
if shop == "pet_grooming" or craft == "pet_grooming":
types.append("hundesalon")
if (a in ("restaurant", "cafe") and (dog_ok or outdoor == "yes")) or a == "biergarten":
types.append("restaurant")
if tourism in ("hotel", "guest_house", "hostel") and dog_ok:
types.append("hotel")
return types
SCHEMA = """
CREATE TABLE IF NOT EXISTS osm_pois (
osm_id INTEGER NOT NULL,
type TEXT NOT NULL,
lat REAL NOT NULL,
lon REAL NOT NULL,
name TEXT,
opening_hours TEXT,
phone TEXT,
website TEXT,
user_edited INTEGER NOT NULL DEFAULT 0,
cached_at TEXT NOT NULL DEFAULT (datetime('now')),
PRIMARY KEY (osm_id, type)
);
CREATE INDEX IF NOT EXISTS idx_osm_pois_loc ON osm_pois(type, lat, lon);
"""
class PoiHandler(osmium.SimpleHandler):
def __init__(self, conn):
super().__init__()
self.conn = conn
self.rows = 0
self.objs = 0
def _save(self, osm_id, tags, lat, lon):
types = classify(tags)
if not types:
return
self.objs += 1
name = tags.get("name")
oh = tags.get("opening_hours")
phone = tags.get("phone") or tags.get("contact:phone")
web = tags.get("website") or tags.get("contact:website")
for ty in types:
self.conn.execute(
"INSERT OR REPLACE INTO osm_pois "
"(osm_id, type, lat, lon, name, opening_hours, phone, website, cached_at) "
"VALUES (?,?,?,?,?,?,?,?, datetime('now'))",
(osm_id, ty, lat, lon, name, oh, phone, web),
)
self.rows += 1
def node(self, n):
if n.tags:
self._save(n.id, n.tags, n.location.lat, n.location.lon)
def way(self, w):
# Wege (z. B. Tierarzt im Gebäude) → Schwerpunkt aus Knoten ("out center")
if not w.tags:
return
lats, lons = [], []
for nd in w.nodes:
if nd.location.valid():
lats.append(nd.location.lat)
lons.append(nd.location.lon)
if lats:
self._save(w.id, w.tags, sum(lats) / len(lats), sum(lons) / len(lons))
def main():
if len(sys.argv) != 3:
print(__doc__)
sys.exit(1)
src, dst = sys.argv[1], sys.argv[2]
conn = sqlite3.connect(dst)
conn.executescript(SCHEMA)
h = PoiHandler(conn)
# locations=True: Knoten-Koordinaten im Speicher halten, damit Wege einen
# Schwerpunkt bekommen. flex_mem skaliert bis Länder-Extrakte.
h.apply_file(src, locations=True, idx="flex_mem")
conn.commit()
print(f"\nObjekte mit Treffer: {h.objs:,} eingefügte Zeilen: {h.rows:,}")
print("\nPro Typ:")
for ty, cnt in conn.execute(
"SELECT type, COUNT(*) FROM osm_pois GROUP BY type ORDER BY 2 DESC"
):
print(f" {ty:16s} {cnt:>8,}")
conn.close()
if __name__ == "__main__":
main()