Spaces:
Running
Running
File size: 3,083 Bytes
b0f5083 14e19a5 b0f5083 14e19a5 b0f5083 14e19a5 b0f5083 14e19a5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 |
import logging
import os
import hashlib
import tarfile
import urllib.request
import zipfile
from tqdm import tqdm
from pathlib import Path
from logger import logger
from py7zr import SevenZipFile
class TqdmUpTo(tqdm):
def update_to(self, b=1, bsize=1, tsize=None):
if tsize is not None:
self.total = tsize
self.update(b * bsize - self.n)
def download_file(url, dest_path):
logging.info(f"Downloading: {url}")
with TqdmUpTo(unit="B", unit_scale=True, unit_divisor=1024, miniters=1, desc=url.split('/')[-1]) as t:
urllib.request.urlretrieve(url, dest_path, reporthook=t.update_to)
def verify_md5(file_path, expected_md5):
md5 = hashlib.md5(file_path.read_bytes()).hexdigest()
if md5 != expected_md5:
return False, f"MD5 mismatch: {md5} != {expected_md5}"
return True, ""
def extract_file(file_path, destination=None):
"""
Extract a compressed file based on its extension.
If destination is not specified, it will be extracted to its parent directory.
"""
if destination is None:
destination = Path(file_path).parent
logging.info(f"Extracting to {destination}")
if file_path.endswith('.zip'):
with zipfile.ZipFile(file_path, 'r') as zip_ref:
zip_ref.extractall(destination)
elif file_path.endswith('.tar.gz'):
with tarfile.open(file_path, 'r:gz') as tar_ref:
tar_ref.extractall(destination)
elif file_path.endswith('.tar.bz2'):
with tarfile.open(file_path, 'r:bz2') as tar_ref:
tar_ref.extractall(destination)
elif file_path.endswith('.7z'):
with SevenZipFile(file_path, mode='r') as z:
z.extractall(destination)
else:
logging.error(f"Unsupported compression format for file {file_path}")
def download_and_verify(urls, target_path, expected_md5=None, extract_destination=None):
for url in urls:
try:
download_file(url, target_path)
break
except Exception as error:
logger.error(f"downloading from URL {url}: {error}")
else: # This else is tied to the for loop, and executes if no download is successful
return False, "Error downloading from all provided URLs."
if expected_md5 is not None:
success, message = verify_md5(Path(target_path), expected_md5)
if not success:
os.remove(target_path)
return False, message
# If it's a compressed file, extract it
if target_path.endswith(('.zip', '.tar.gz', '.tar.bz2', '.7z')):
extract_file(target_path, extract_destination)
os.remove(target_path)
return True, "File downloaded, verified, and extracted successfully!"
if __name__ == "__main__":
URLS = [
"YOUR_PRIMARY_URL_HERE",
"YOUR_FIRST_BACKUP_URL_HERE",
# ... you can add more backup URLs as needed
]
TARGET_PATH = ""
EXPECTED_MD5 = ""
EXTRACT_DESTINATION = ""
success, message = download_and_verify(URLS, TARGET_PATH, EXPECTED_MD5, EXTRACT_DESTINATION)
print(message)
|