Spaces:
Running
Running
import pypdfium2 as pdfium | |
import os | |
# Directory containing PDFs | |
pdf_dir = './pdf_downloads' | |
# Directory to store images | |
image_dir = './pdf_images' | |
os.makedirs(image_dir, exist_ok=True) | |
# Process each PDF | |
for pdf_file in os.listdir(pdf_dir): | |
if pdf_file.endswith('.pdf'): | |
pdf_path = os.path.join(pdf_dir, pdf_file) | |
image_path = os.path.join(image_dir, pdf_file.replace('.pdf', '.jpg')) | |
# Open the PDF | |
print(f"Attempting to convert {pdf_path}") | |
try: | |
pdf = pdfium.PdfDocument(pdf_path) | |
except Exception as e: | |
print(f"Unable to convert {pdf_path} due to {e}") | |
# Get just the first page | |
page = pdf[0] | |
image = page.render(scale=4).to_pil() | |
# Save the rendered page as a JPEG image | |
image.save(image_path, 'JPEG') | |
print(f"Converted {pdf_path} to {image_path}") |