pierreguillou
commited on
Commit
•
e21c1b2
1
Parent(s):
6401e6f
Update files/functions.py
Browse files- files/functions.py +14 -3
files/functions.py
CHANGED
@@ -25,8 +25,7 @@ import pypdf
|
|
25 |
from pypdf import PdfReader
|
26 |
from pypdf.errors import PdfReadError
|
27 |
|
28 |
-
import
|
29 |
-
from pdf2image import convert_from_path
|
30 |
import langdetect
|
31 |
from langdetect import detect_langs
|
32 |
|
@@ -409,7 +408,19 @@ def pdf_to_images(uploaded_pdf):
|
|
409 |
images = [Image.open(image_blank)]
|
410 |
else:
|
411 |
try:
|
412 |
-
images = convert_from_path(path_to_file, last_page=max_imgboxes)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
413 |
num_imgs = len(images)
|
414 |
msg = f'The PDF "{filename}" was converted into {num_imgs} images.'
|
415 |
except:
|
|
|
25 |
from pypdf import PdfReader
|
26 |
from pypdf.errors import PdfReadError
|
27 |
|
28 |
+
import pypdfium2 as pdfium
|
|
|
29 |
import langdetect
|
30 |
from langdetect import detect_langs
|
31 |
|
|
|
408 |
images = [Image.open(image_blank)]
|
409 |
else:
|
410 |
try:
|
411 |
+
# images = convert_from_path(path_to_file, last_page=max_imgboxes)
|
412 |
+
|
413 |
+
pdf = pdfium.PdfDocument(str(filename))
|
414 |
+
version = pdf.get_version() # get the PDF standard version
|
415 |
+
n_pages = len(pdf) # get the number of pages in the document
|
416 |
+
last_page = max_imgboxes
|
417 |
+
page_indices = [i for i in range(last_page)] # pages until last_page
|
418 |
+
images = list(pdf.render(
|
419 |
+
pdfium.PdfBitmap.to_pil,
|
420 |
+
page_indices = page_indices,
|
421 |
+
scale = 300/72, # 300dpi resolution
|
422 |
+
))
|
423 |
+
|
424 |
num_imgs = len(images)
|
425 |
msg = f'The PDF "{filename}" was converted into {num_imgs} images.'
|
426 |
except:
|