Spaces:
Running
Running
import requests | |
from bs4 import BeautifulSoup | |
import os | |
# URL setup | |
base_url = "http://74.208.61.158:8888" | |
page_url = base_url + "/list-pdfs" # if the PDF links are on the home page | |
# Directory for storing PDFs | |
pdf_dir = "pdf_downloads" | |
os.makedirs(pdf_dir, exist_ok=True) | |
# Fetch the webpage | |
response = requests.get(page_url) | |
response.raise_for_status() # will raise an exception for HTTP error codes | |
# Parse the webpage | |
soup = BeautifulSoup(response.content, 'html.parser') | |
# Find all PDF links | |
for link in soup.find_all('a', href=True): | |
href = link['href'] | |
if href.endswith('.pdf'): | |
pdf_url = base_url + href | |
pdf_response = requests.get(pdf_url) | |
pdf_response.raise_for_status() | |
# Write the PDF to a file | |
pdf_filename = os.path.join(pdf_dir, href.split('/')[-1]) | |
with open(pdf_filename, 'wb') as file: | |
file.write(pdf_response.content) | |
print(f"Downloaded: {pdf_filename}") |