Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -67,21 +67,29 @@ ocr_id = {
|
|
67 |
def pdf_pil(file_path,page_num):
|
68 |
|
69 |
pdf = pdfium.PdfDocument("data.pdf")
|
|
|
70 |
#n_pages = len(pdf)
|
71 |
#for page_number in range(n_pages):
|
72 |
page = pdf.get_page(int(page_num)-1)
|
|
|
|
|
73 |
bitmap = page.render(
|
74 |
scale = 1, # 72dpi resolution
|
75 |
rotation = 0, # no additional rotation
|
76 |
# ... further rendering options
|
77 |
)
|
|
|
|
|
78 |
pil_image = bitmap.to_pil()
|
79 |
-
|
|
|
|
|
|
|
80 |
|
81 |
-
return
|
82 |
|
83 |
def ocrpdf(file_path,pdf_lang,page_num):
|
84 |
-
img1=pdf_pil(file_path,page_num)
|
85 |
print("DONE 1 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
|
86 |
lang=[f"{ocr_id[pdf_lang]}"]
|
87 |
print("DONE 2 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
|
|
|
67 |
def pdf_pil(file_path,page_num):
|
68 |
|
69 |
pdf = pdfium.PdfDocument("data.pdf")
|
70 |
+
print ("\n PDF read !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! \n")
|
71 |
#n_pages = len(pdf)
|
72 |
#for page_number in range(n_pages):
|
73 |
page = pdf.get_page(int(page_num)-1)
|
74 |
+
print ("\n Page read !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! \n")
|
75 |
+
|
76 |
bitmap = page.render(
|
77 |
scale = 1, # 72dpi resolution
|
78 |
rotation = 0, # no additional rotation
|
79 |
# ... further rendering options
|
80 |
)
|
81 |
+
print ("\n Page rendered !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! \n")
|
82 |
+
|
83 |
pil_image = bitmap.to_pil()
|
84 |
+
print ("\n Page to PIL !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! \n")
|
85 |
+
|
86 |
+
pil_image.save(f"image_{page_num}.png")
|
87 |
+
print ("\n Page saved !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! \n")
|
88 |
|
89 |
+
return (f"image_{page_num}.png")
|
90 |
|
91 |
def ocrpdf(file_path,pdf_lang,page_num):
|
92 |
+
img1 = pdf_pil(file_path,page_num)
|
93 |
print("DONE 1 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
|
94 |
lang=[f"{ocr_id[pdf_lang]}"]
|
95 |
print("DONE 2 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
|