utils: add download_file()

This commit is contained in:
InsanePrawn 2022-12-08 16:19:03 +01:00
parent 4112f5a56e
commit db4fbc083a
2 changed files with 28 additions and 0 deletions

View file

@ -6,3 +6,5 @@ typing_extensions
coloredlogs
munch
setuptools # required by munch
requests
python-dateutil

View file

@ -1,12 +1,15 @@
import atexit
import datetime
import grp
import hashlib
import logging
import os
import pwd
import requests
import subprocess
import tarfile
from dateutil.parser import parse as parsedate
from shutil import which
from typing import Generator, IO, Optional, Union, Sequence
@ -134,6 +137,29 @@ def read_files_from_tar(tar_file: str, files: Sequence[str]) -> Generator[tuple[
yield path, fd
def download_file(path: str, url: str, update: bool = True):
"""Download a file over http[s]. With `update`, tries to use mtime timestamps to download only changed files."""
url_time = None
if os.path.exists(path) and update:
headers = requests.head(url).headers
if 'last-modified' in headers:
url_time = parsedate(headers['last-modified']).astimezone()
file_time = datetime.datetime.fromtimestamp(os.path.getmtime(path)).astimezone()
if url_time == file_time:
logging.debug(f"{path} seems already up to date")
return False
user_agent = {"User-agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:46.0) Gecko/20100101 Firefox/46.0"}
download = requests.get(url, headers=user_agent)
with open(path, 'wb') as fd:
for chunk in download.iter_content(4096):
fd.write(chunk)
if 'last-modified' in download.headers:
url_time = parsedate(download.headers['last-modified']).astimezone()
os.utime(path, (datetime.datetime.now().timestamp(), url_time.timestamp()))
logging.debug(f"{path} downloaded!")
return True
# stackoverflow magic from https://stackoverflow.com/a/44873382
def sha256sum(filename):
h = hashlib.sha256()