Spaces:
Runtime error
Runtime error
File size: 983 Bytes
6e89871 be3b0b4 6e89871 be3b0b4 6e89871 be3b0b4 6e89871 be3b0b4 6e89871 be3b0b4 6e89871 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 |
import os
import gzip
import shutil
from urllib.request import urlretrieve
from tqdm import tqdm
def download_large_file(url: str, output_file: str):
if not os.path.exists(output_file):
urlretrieve(url, output_file)
def unzip_file(input_file):
output_file = os.path.splitext(input_file)[0]
if not os.path.exists(output_file):
with gzip.open(input_file, "rb") as f_in:
# Input file has the format xxx.tsv.gz
with open(output_file, "wb") as f_out:
shutil.copyfileobj(f_in, f_out)
if __name__ == "__main__":
imdb_url = "https://datasets.imdbws.com"
filenames = [
"name.basics.tsv.gz",
"title.basics.tsv.gz",
"title.ratings.tsv.gz",
"title.principals.tsv.gz",
]
for filename in tqdm(filenames):
url = f"{imdb_url}/{filename}"
output_file = os.path.join("data", filename)
download_large_file(url, output_file)
unzip_file(output_file)
|