pierreguillou commited on
Commit
e21c1b2
1 Parent(s): 6401e6f

Update files/functions.py

Browse files
Files changed (1) hide show
  1. files/functions.py +14 -3
files/functions.py CHANGED
@@ -25,8 +25,7 @@ import pypdf
25
  from pypdf import PdfReader
26
  from pypdf.errors import PdfReadError
27
 
28
- import pdf2image
29
- from pdf2image import convert_from_path
30
  import langdetect
31
  from langdetect import detect_langs
32
 
@@ -409,7 +408,19 @@ def pdf_to_images(uploaded_pdf):
409
  images = [Image.open(image_blank)]
410
  else:
411
  try:
412
- images = convert_from_path(path_to_file, last_page=max_imgboxes)
 
 
 
 
 
 
 
 
 
 
 
 
413
  num_imgs = len(images)
414
  msg = f'The PDF "{filename}" was converted into {num_imgs} images.'
415
  except:
 
25
  from pypdf import PdfReader
26
  from pypdf.errors import PdfReadError
27
 
28
+ import pypdfium2 as pdfium
 
29
  import langdetect
30
  from langdetect import detect_langs
31
 
 
408
  images = [Image.open(image_blank)]
409
  else:
410
  try:
411
+ # images = convert_from_path(path_to_file, last_page=max_imgboxes)
412
+
413
+ pdf = pdfium.PdfDocument(str(filename))
414
+ version = pdf.get_version() # get the PDF standard version
415
+ n_pages = len(pdf) # get the number of pages in the document
416
+ last_page = max_imgboxes
417
+ page_indices = [i for i in range(last_page)] # pages until last_page
418
+ images = list(pdf.render(
419
+ pdfium.PdfBitmap.to_pil,
420
+ page_indices = page_indices,
421
+ scale = 300/72, # 300dpi resolution
422
+ ))
423
+
424
  num_imgs = len(images)
425
  msg = f'The PDF "{filename}" was converted into {num_imgs} images.'
426
  except: