Fix date range detection pattern

This commit is contained in:
Luca 2023-03-30 01:19:28 +02:00
parent 5a978d2f90
commit 15545098b2
1 changed files with 6 additions and 3 deletions

View File

@ -15,8 +15,9 @@ GALLERIES_URL = f'{BASE_URL}{GALLERIES_PATH}'
TARGET_DIR = Path(__file__).resolve().parent.parent / 'content' / 'galerie' TARGET_DIR = Path(__file__).resolve().parent.parent / 'content' / 'galerie'
DATE_PATTERN = r'[DFMS]\w+, \d{1,2}. [ADFJMNOS]\w+ \d{4}' DATE_PATTERN = r'([DFMS]\w+, \d{1,2}. [ADFJMNOS]\w+ \d{4})'
DATE_RANGE_PATTERN = f'({DATE_PATTERN})(?:| bis ({DATE_PATTERN}))' DATE_RANGE_SEPARATOR_PATTERN = r'bis\s+<.+>'
DATE_RANGE_PATTERN = f'{DATE_RANGE_SEPARATOR_PATTERN}{DATE_PATTERN}'
MONTH_NAMES = ('Januar', 'Februar', 'März', 'April', 'Mai', 'Juni', 'Juli', 'August', 'September', 'Oktober', 'November', 'Dezember') MONTH_NAMES = ('Januar', 'Februar', 'März', 'April', 'Mai', 'Juni', 'Juli', 'August', 'September', 'Oktober', 'November', 'Dezember')
MONTHS = {month: str(i+1).zfill(2) for i, month in enumerate(MONTH_NAMES)} MONTHS = {month: str(i+1).zfill(2) for i, month in enumerate(MONTH_NAMES)}
@ -42,7 +43,9 @@ def download_gallery(path):
r.raise_for_status() r.raise_for_status()
date = search(DATE_RANGE_PATTERN, r.text) date = search(DATE_RANGE_PATTERN, r.text)
date = convert_date(date[2] if date[2] else date[1]) if date is None:
date = search(DATE_PATTERN, r.text)
date = convert_date(date[1])
soup = BeautifulSoup(r.text, 'html.parser') soup = BeautifulSoup(r.text, 'html.parser')
minify_whitespace(soup) minify_whitespace(soup)