Extend script to download images as well
continuous-integration/drone/push Build is passing
Details
continuous-integration/drone/push Build is passing
Details
This commit is contained in:
parent
61f679eb2d
commit
f5af0f8d85
|
@ -1,7 +1,9 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
from datetime import datetime, timezone
|
||||
from markdownify import MarkdownConverter
|
||||
from os import utime
|
||||
from pathlib import Path
|
||||
from re import search, sub
|
||||
from sys import stderr
|
||||
|
@ -33,6 +35,33 @@ def minify_whitespace(soup):
|
|||
else:
|
||||
soup.replace_with(sub(r'\s+', ' ', soup))
|
||||
|
||||
def parse_http_datetime(dt):
|
||||
return datetime.strptime(dt.replace('GMT', '+0000'), '%a, %d %b %Y %H:%M:%S %z')
|
||||
|
||||
def download_image(url, target):
|
||||
name = url.rsplit('/', 1)[-1]
|
||||
image = target / name
|
||||
|
||||
if image.exists():
|
||||
r = requests.head(url)
|
||||
r.raise_for_status()
|
||||
|
||||
last_modified = parse_http_datetime(r.headers['last-modified'])
|
||||
mtime = datetime.fromtimestamp(image.stat().st_mtime, tz=timezone.utc)
|
||||
if last_modified <= mtime:
|
||||
return
|
||||
|
||||
r = requests.get(url)
|
||||
r.raise_for_status()
|
||||
|
||||
image.write_bytes(r.content)
|
||||
|
||||
last_modified = parse_http_datetime(r.headers['last-modified'])
|
||||
timestamp = last_modified.timestamp()
|
||||
utime(image, times=(timestamp, timestamp))
|
||||
|
||||
print(f"downloaded image '{name}' (last modified: {last_modified.strftime('%a, %d %b %Y %H:%M:%S %Z')})", file=stderr)
|
||||
|
||||
def download_gallery(path):
|
||||
slug = path.removeprefix(f'{GALLERIES_PATH}/')
|
||||
|
||||
|
@ -65,6 +94,17 @@ date: {date}
|
|||
{content}
|
||||
''')
|
||||
|
||||
picture = soup.find(class_='picture')
|
||||
preview = search('https?://kontakt-bamberg.de[-./\w]+', picture.get('style'))
|
||||
download_image(preview[0], target)
|
||||
|
||||
for a in soup.select('.field-name-field-pictures > .field-items > .field-item > a'):
|
||||
href = a.get('href')
|
||||
try:
|
||||
download_image(href, target)
|
||||
except Exception as e:
|
||||
print(f"downloading image '{href}' has failed: {str(e)}", file=stderr)
|
||||
|
||||
if __name__ == '__main__':
|
||||
r = requests.get(GALLERIES_URL)
|
||||
r.raise_for_status()
|
||||
|
|
|
@ -3,7 +3,6 @@ certifi==2022.12.7
|
|||
charset-normalizer==3.1.0
|
||||
idna==3.4
|
||||
markdownify==0.11.6
|
||||
pathlib==1.0.1
|
||||
PyYAML==6.0
|
||||
requests==2.28.2
|
||||
six==1.16.0
|
||||
|
|
Loading…
Reference in New Issue