Fix date range detection pattern
This commit is contained in:
parent
5a978d2f90
commit
15545098b2
|
@ -15,8 +15,9 @@ GALLERIES_URL = f'{BASE_URL}{GALLERIES_PATH}'
|
||||||
|
|
||||||
TARGET_DIR = Path(__file__).resolve().parent.parent / 'content' / 'galerie'
|
TARGET_DIR = Path(__file__).resolve().parent.parent / 'content' / 'galerie'
|
||||||
|
|
||||||
DATE_PATTERN = r'[DFMS]\w+, \d{1,2}. [ADFJMNOS]\w+ \d{4}'
|
DATE_PATTERN = r'([DFMS]\w+, \d{1,2}. [ADFJMNOS]\w+ \d{4})'
|
||||||
DATE_RANGE_PATTERN = f'({DATE_PATTERN})(?:| bis ({DATE_PATTERN}))'
|
DATE_RANGE_SEPARATOR_PATTERN = r'bis\s+<.+>'
|
||||||
|
DATE_RANGE_PATTERN = f'{DATE_RANGE_SEPARATOR_PATTERN}{DATE_PATTERN}'
|
||||||
|
|
||||||
MONTH_NAMES = ('Januar', 'Februar', 'März', 'April', 'Mai', 'Juni', 'Juli', 'August', 'September', 'Oktober', 'November', 'Dezember')
|
MONTH_NAMES = ('Januar', 'Februar', 'März', 'April', 'Mai', 'Juni', 'Juli', 'August', 'September', 'Oktober', 'November', 'Dezember')
|
||||||
MONTHS = {month: str(i+1).zfill(2) for i, month in enumerate(MONTH_NAMES)}
|
MONTHS = {month: str(i+1).zfill(2) for i, month in enumerate(MONTH_NAMES)}
|
||||||
|
@ -42,7 +43,9 @@ def download_gallery(path):
|
||||||
r.raise_for_status()
|
r.raise_for_status()
|
||||||
|
|
||||||
date = search(DATE_RANGE_PATTERN, r.text)
|
date = search(DATE_RANGE_PATTERN, r.text)
|
||||||
date = convert_date(date[2] if date[2] else date[1])
|
if date is None:
|
||||||
|
date = search(DATE_PATTERN, r.text)
|
||||||
|
date = convert_date(date[1])
|
||||||
|
|
||||||
soup = BeautifulSoup(r.text, 'html.parser')
|
soup = BeautifulSoup(r.text, 'html.parser')
|
||||||
minify_whitespace(soup)
|
minify_whitespace(soup)
|
||||||
|
|
Loading…
Reference in New Issue