Spaces:

3martini
/

Solar-Eyes-Dockerized

Running

Solar-Eyes-Dockerized / download_pdfs.py

Upload folder using huggingface_hub

786d4da verified 9 months ago

958 Bytes

	import requests
	from bs4 import BeautifulSoup
	import os

	# URL setup
	base_url = "http://74.208.61.158:8888"
	page_url = base_url + "/list-pdfs" # if the PDF links are on the home page

	# Directory for storing PDFs
	pdf_dir = "pdf_downloads"
	os.makedirs(pdf_dir, exist_ok=True)

	# Fetch the webpage
	response = requests.get(page_url)
	response.raise_for_status() # will raise an exception for HTTP error codes

	# Parse the webpage
	soup = BeautifulSoup(response.content, 'html.parser')

	# Find all PDF links
	for link in soup.find_all('a', href=True):
	href = link['href']
	if href.endswith('.pdf'):
	pdf_url = base_url + href
	pdf_response = requests.get(pdf_url)
	pdf_response.raise_for_status()

	# Write the PDF to a file
	pdf_filename = os.path.join(pdf_dir, href.split('/')[-1])
	with open(pdf_filename, 'wb') as file:
	file.write(pdf_response.content)

	print(f"Downloaded: {pdf_filename}")