From 4e5a13d9e2f12dfc9b49f8734b8e8ed97d615b91 Mon Sep 17 00:00:00 2001 From: rene Date: Fri, 8 May 2026 13:38:13 +0200 Subject: [PATCH] Fix: VDH-Scraper
in handle_starttag statt handle_endtag (void elements) --- backend/scraper/events_vdh.py | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/backend/scraper/events_vdh.py b/backend/scraper/events_vdh.py index 020908c..585c570 100644 --- a/backend/scraper/events_vdh.py +++ b/backend/scraper/events_vdh.py @@ -112,6 +112,13 @@ class _SpezialParser(HTMLParser): self._in_b = True self._buf = "" + #
ist void — kein handle_endtag, muss hier behandelt werden + if self._in_span and tag == "br": + part = self._buf.strip() + if part and not self._in_b: + self._parts.append(part) + self._buf = "" + def handle_endtag(self, tag): if self._in_span: if tag == "b" and self._in_b: @@ -119,12 +126,6 @@ class _SpezialParser(HTMLParser): self._title = self._buf.strip() self._buf = "" - elif tag == "br": - part = self._buf.strip() - if part: - self._parts.append(part) - self._buf = "" - elif self._depth <= self._span_d: # Ende der linken span6 — auswerten self._in_span = False @@ -199,6 +200,13 @@ class _SportParser(HTMLParser): if self._in_li and tag == "b": self._in_b = True + #
ist void — kein handle_endtag, muss hier behandelt werden + if self._in_li and tag == "br": + part = self._buf.strip() + if part: + self._parts.append(part) + self._buf = "" + def handle_endtag(self, tag): if tag == "h2" and self._in_h2: self._in_h2 = False @@ -209,12 +217,6 @@ class _SportParser(HTMLParser): if tag == "b": self._in_b = False - elif tag == "br": - part = self._buf.strip() - if part: - self._parts.append(part) - self._buf = "" - elif tag == "li" and self._depth <= self._li_d: self._in_li = False # parts: [date, title, "Ort: Stadt"] oder ähnlich