utils: add download_file()

This commit is contained in:
InsanePrawn 2022-12-08 16:19:03 +01:00
parent 4112f5a56e
commit db4fbc083a
2 changed files with 28 additions and 0 deletions

View file

@ -6,3 +6,5 @@ typing_extensions
coloredlogs coloredlogs
munch munch
setuptools # required by munch setuptools # required by munch
requests
python-dateutil

View file

@ -1,12 +1,15 @@
import atexit import atexit
import datetime
import grp import grp
import hashlib import hashlib
import logging import logging
import os import os
import pwd import pwd
import requests
import subprocess import subprocess
import tarfile import tarfile
from dateutil.parser import parse as parsedate
from shutil import which from shutil import which
from typing import Generator, IO, Optional, Union, Sequence from typing import Generator, IO, Optional, Union, Sequence
@ -134,6 +137,29 @@ def read_files_from_tar(tar_file: str, files: Sequence[str]) -> Generator[tuple[
yield path, fd yield path, fd
def download_file(path: str, url: str, update: bool = True):
"""Download a file over http[s]. With `update`, tries to use mtime timestamps to download only changed files."""
url_time = None
if os.path.exists(path) and update:
headers = requests.head(url).headers
if 'last-modified' in headers:
url_time = parsedate(headers['last-modified']).astimezone()
file_time = datetime.datetime.fromtimestamp(os.path.getmtime(path)).astimezone()
if url_time == file_time:
logging.debug(f"{path} seems already up to date")
return False
user_agent = {"User-agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:46.0) Gecko/20100101 Firefox/46.0"}
download = requests.get(url, headers=user_agent)
with open(path, 'wb') as fd:
for chunk in download.iter_content(4096):
fd.write(chunk)
if 'last-modified' in download.headers:
url_time = parsedate(download.headers['last-modified']).astimezone()
os.utime(path, (datetime.datetime.now().timestamp(), url_time.timestamp()))
logging.debug(f"{path} downloaded!")
return True
# stackoverflow magic from https://stackoverflow.com/a/44873382 # stackoverflow magic from https://stackoverflow.com/a/44873382
def sha256sum(filename): def sha256sum(filename):
h = hashlib.sha256() h = hashlib.sha256()