- osm.py: Live-Scanner deaktiviert — /pois liest nur noch aus DB, /analyze ist No-Op. Behebt wiederholte OSM-Banns (Tile-Load + Scanning). - tools/osm-extract: Extraktion (pyosmium) + Loader (schützt user_edited) + Docker-Refresh-Job mit osmium-tags-filter-Vorstufe (RAM-schonend). - docker-compose.osm.yml: Refresh-Service (mem_limit 4g), monatlich via DSM-Aufgabenplaner.
140 lines
4.4 KiB
Python
140 lines
4.4 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Offline-Extraktion der Ban-Yaro-POIs aus einer OSM-.pbf-Datei.
|
|
|
|
Ersetzt das Live-Scannen gegen overpass-api.de (backend/routes/osm.py) durch
|
|
einen einmaligen/periodischen Batch-Lauf — keine OSM-Live-Last, kein Bann mehr.
|
|
|
|
Das Kategorie→Tag-Mapping ist 1:1 aus OSM_QUERIES in backend/routes/osm.py
|
|
übernommen. Schreibt ins selbe Schema wie die Produktiv-Tabelle `osm_pois`
|
|
(osm_id, type, lat, lon, name, opening_hours, phone, website, cached_at).
|
|
|
|
Aufruf:
|
|
python3 extract_osm_pois.py <input.osm.pbf> <output.sqlite>
|
|
"""
|
|
import sys
|
|
import sqlite3
|
|
import osmium
|
|
|
|
|
|
# --- Kategorie-Klassifikation (1:1 aus OSM_QUERIES, backend/routes/osm.py) ---
|
|
def classify(t) -> list[str]:
|
|
"""Gibt alle Ban-Yaro-Typen zurück, die auf dieses OSM-Objekt passen."""
|
|
types: list[str] = []
|
|
a = t.get("amenity")
|
|
shop = t.get("shop")
|
|
craft = t.get("craft")
|
|
leisure = t.get("leisure")
|
|
tourism = t.get("tourism")
|
|
dog = t.get("dog")
|
|
outdoor = t.get("outdoor_seating")
|
|
# "hundefreundlich, breiter gefasst": explizit erlaubt ODER Terrasse
|
|
dog_ok = dog in ("yes", "allowed", "leashed")
|
|
|
|
if a == "waste_basket":
|
|
types.append("waste_basket")
|
|
if a == "drinking_water":
|
|
types.append("drinking_water")
|
|
if a == "veterinary":
|
|
types.append("tierarzt")
|
|
if a == "bench":
|
|
types.append("bank")
|
|
if leisure == "dog_park" or (leisure == "park" and dog == "yes"):
|
|
types.append("dog_park")
|
|
if shop == "pet":
|
|
types.append("shop")
|
|
if shop == "pet_grooming" or craft == "pet_grooming":
|
|
types.append("hundesalon")
|
|
if (a in ("restaurant", "cafe") and (dog_ok or outdoor == "yes")) or a == "biergarten":
|
|
types.append("restaurant")
|
|
if tourism in ("hotel", "guest_house", "hostel") and dog_ok:
|
|
types.append("hotel")
|
|
return types
|
|
|
|
|
|
SCHEMA = """
|
|
CREATE TABLE IF NOT EXISTS osm_pois (
|
|
osm_id INTEGER NOT NULL,
|
|
type TEXT NOT NULL,
|
|
lat REAL NOT NULL,
|
|
lon REAL NOT NULL,
|
|
name TEXT,
|
|
opening_hours TEXT,
|
|
phone TEXT,
|
|
website TEXT,
|
|
user_edited INTEGER NOT NULL DEFAULT 0,
|
|
cached_at TEXT NOT NULL DEFAULT (datetime('now')),
|
|
PRIMARY KEY (osm_id, type)
|
|
);
|
|
CREATE INDEX IF NOT EXISTS idx_osm_pois_loc ON osm_pois(type, lat, lon);
|
|
"""
|
|
|
|
|
|
class PoiHandler(osmium.SimpleHandler):
|
|
def __init__(self, conn):
|
|
super().__init__()
|
|
self.conn = conn
|
|
self.rows = 0
|
|
self.objs = 0
|
|
|
|
def _save(self, osm_id, tags, lat, lon):
|
|
types = classify(tags)
|
|
if not types:
|
|
return
|
|
self.objs += 1
|
|
name = tags.get("name")
|
|
oh = tags.get("opening_hours")
|
|
phone = tags.get("phone") or tags.get("contact:phone")
|
|
web = tags.get("website") or tags.get("contact:website")
|
|
for ty in types:
|
|
self.conn.execute(
|
|
"INSERT OR REPLACE INTO osm_pois "
|
|
"(osm_id, type, lat, lon, name, opening_hours, phone, website, cached_at) "
|
|
"VALUES (?,?,?,?,?,?,?,?, datetime('now'))",
|
|
(osm_id, ty, lat, lon, name, oh, phone, web),
|
|
)
|
|
self.rows += 1
|
|
|
|
def node(self, n):
|
|
if n.tags:
|
|
self._save(n.id, n.tags, n.location.lat, n.location.lon)
|
|
|
|
def way(self, w):
|
|
# Wege (z. B. Tierarzt im Gebäude) → Schwerpunkt aus Knoten ("out center")
|
|
if not w.tags:
|
|
return
|
|
lats, lons = [], []
|
|
for nd in w.nodes:
|
|
if nd.location.valid():
|
|
lats.append(nd.location.lat)
|
|
lons.append(nd.location.lon)
|
|
if lats:
|
|
self._save(w.id, w.tags, sum(lats) / len(lats), sum(lons) / len(lons))
|
|
|
|
|
|
def main():
|
|
if len(sys.argv) != 3:
|
|
print(__doc__)
|
|
sys.exit(1)
|
|
src, dst = sys.argv[1], sys.argv[2]
|
|
|
|
conn = sqlite3.connect(dst)
|
|
conn.executescript(SCHEMA)
|
|
|
|
h = PoiHandler(conn)
|
|
# locations=True: Knoten-Koordinaten im Speicher halten, damit Wege einen
|
|
# Schwerpunkt bekommen. flex_mem skaliert bis Länder-Extrakte.
|
|
h.apply_file(src, locations=True, idx="flex_mem")
|
|
|
|
conn.commit()
|
|
print(f"\nObjekte mit Treffer: {h.objs:,} eingefügte Zeilen: {h.rows:,}")
|
|
print("\nPro Typ:")
|
|
for ty, cnt in conn.execute(
|
|
"SELECT type, COUNT(*) FROM osm_pois GROUP BY type ORDER BY 2 DESC"
|
|
):
|
|
print(f" {ty:16s} {cnt:>8,}")
|
|
conn.close()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|