From 15545098b2ca1c788b4bdd59b20d1fa2e82e1fb0 Mon Sep 17 00:00:00 2001 From: Luca Date: Thu, 30 Mar 2023 01:19:28 +0200 Subject: [PATCH] Fix date range detection pattern --- bin/download_galleries.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/bin/download_galleries.py b/bin/download_galleries.py index 500bd60..167457f 100755 --- a/bin/download_galleries.py +++ b/bin/download_galleries.py @@ -15,8 +15,9 @@ GALLERIES_URL = f'{BASE_URL}{GALLERIES_PATH}' TARGET_DIR = Path(__file__).resolve().parent.parent / 'content' / 'galerie' -DATE_PATTERN = r'[DFMS]\w+, \d{1,2}. [ADFJMNOS]\w+ \d{4}' -DATE_RANGE_PATTERN = f'({DATE_PATTERN})(?:| bis ({DATE_PATTERN}))' +DATE_PATTERN = r'([DFMS]\w+, \d{1,2}. [ADFJMNOS]\w+ \d{4})' +DATE_RANGE_SEPARATOR_PATTERN = r'bis\s+<.+>' +DATE_RANGE_PATTERN = f'{DATE_RANGE_SEPARATOR_PATTERN}{DATE_PATTERN}' MONTH_NAMES = ('Januar', 'Februar', 'März', 'April', 'Mai', 'Juni', 'Juli', 'August', 'September', 'Oktober', 'November', 'Dezember') MONTHS = {month: str(i+1).zfill(2) for i, month in enumerate(MONTH_NAMES)} @@ -42,7 +43,9 @@ def download_gallery(path): r.raise_for_status() date = search(DATE_RANGE_PATTERN, r.text) - date = convert_date(date[2] if date[2] else date[1]) + if date is None: + date = search(DATE_PATTERN, r.text) + date = convert_date(date[1]) soup = BeautifulSoup(r.text, 'html.parser') minify_whitespace(soup)