Fix date range detection pattern
This commit is contained in:
parent
5a978d2f90
commit
15545098b2
|
@ -15,8 +15,9 @@ GALLERIES_URL = f'{BASE_URL}{GALLERIES_PATH}'
|
|||
|
||||
TARGET_DIR = Path(__file__).resolve().parent.parent / 'content' / 'galerie'
|
||||
|
||||
DATE_PATTERN = r'[DFMS]\w+, \d{1,2}. [ADFJMNOS]\w+ \d{4}'
|
||||
DATE_RANGE_PATTERN = f'({DATE_PATTERN})(?:| bis ({DATE_PATTERN}))'
|
||||
DATE_PATTERN = r'([DFMS]\w+, \d{1,2}. [ADFJMNOS]\w+ \d{4})'
|
||||
DATE_RANGE_SEPARATOR_PATTERN = r'bis\s+<.+>'
|
||||
DATE_RANGE_PATTERN = f'{DATE_RANGE_SEPARATOR_PATTERN}{DATE_PATTERN}'
|
||||
|
||||
MONTH_NAMES = ('Januar', 'Februar', 'März', 'April', 'Mai', 'Juni', 'Juli', 'August', 'September', 'Oktober', 'November', 'Dezember')
|
||||
MONTHS = {month: str(i+1).zfill(2) for i, month in enumerate(MONTH_NAMES)}
|
||||
|
@ -42,7 +43,9 @@ def download_gallery(path):
|
|||
r.raise_for_status()
|
||||
|
||||
date = search(DATE_RANGE_PATTERN, r.text)
|
||||
date = convert_date(date[2] if date[2] else date[1])
|
||||
if date is None:
|
||||
date = search(DATE_PATTERN, r.text)
|
||||
date = convert_date(date[1])
|
||||
|
||||
soup = BeautifulSoup(r.text, 'html.parser')
|
||||
minify_whitespace(soup)
|
||||
|
|
Loading…
Reference in New Issue