Omnibus commited on
Commit
de8ef09
1 Parent(s): b4be601

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -3
app.py CHANGED
@@ -67,21 +67,29 @@ ocr_id = {
67
  def pdf_pil(file_path,page_num):
68
 
69
  pdf = pdfium.PdfDocument("data.pdf")
 
70
  #n_pages = len(pdf)
71
  #for page_number in range(n_pages):
72
  page = pdf.get_page(int(page_num)-1)
 
 
73
  bitmap = page.render(
74
  scale = 1, # 72dpi resolution
75
  rotation = 0, # no additional rotation
76
  # ... further rendering options
77
  )
 
 
78
  pil_image = bitmap.to_pil()
79
- #pil_image.save(f"image_{page_num}.png")
 
 
 
80
 
81
- return pil_image
82
 
83
  def ocrpdf(file_path,pdf_lang,page_num):
84
- img1=pdf_pil(file_path,page_num)
85
  print("DONE 1 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
86
  lang=[f"{ocr_id[pdf_lang]}"]
87
  print("DONE 2 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
 
67
  def pdf_pil(file_path,page_num):
68
 
69
  pdf = pdfium.PdfDocument("data.pdf")
70
+ print ("\n PDF read !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! \n")
71
  #n_pages = len(pdf)
72
  #for page_number in range(n_pages):
73
  page = pdf.get_page(int(page_num)-1)
74
+ print ("\n Page read !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! \n")
75
+
76
  bitmap = page.render(
77
  scale = 1, # 72dpi resolution
78
  rotation = 0, # no additional rotation
79
  # ... further rendering options
80
  )
81
+ print ("\n Page rendered !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! \n")
82
+
83
  pil_image = bitmap.to_pil()
84
+ print ("\n Page to PIL !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! \n")
85
+
86
+ pil_image.save(f"image_{page_num}.png")
87
+ print ("\n Page saved !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! \n")
88
 
89
+ return (f"image_{page_num}.png")
90
 
91
  def ocrpdf(file_path,pdf_lang,page_num):
92
+ img1 = pdf_pil(file_path,page_num)
93
  print("DONE 1 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
94
  lang=[f"{ocr_id[pdf_lang]}"]
95
  print("DONE 2 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")