Spaces:
Paused
Paused
import gradio as gr | |
import subprocess | |
css = """ | |
.mkd { | |
height: 500px; | |
overflow: auto; | |
border: 1px solid #ccc; | |
} | |
""" | |
def nougat_ocr(file_name): | |
print('******* inside nougat_ocr *******') | |
# CLI Command to run | |
cli_command = [ | |
'nougat', | |
'--out', 'output', | |
'pdf', f'{file_name}', | |
'--checkpoint', 'nougat' | |
] | |
# Run the command and get .mmd file in an output folder | |
subprocess.run(cli_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) | |
return | |
def predict(pdf_file): | |
print('******* inside predict *******') | |
print(f"temporary file - {pdf_file.name}") | |
pdf_name = pdf_file.name.split('/')[-1].split('.')[0] | |
print(f"pdf file name - {pdf_name}") | |
#! Get prediction for a PDF using nougat | |
nougat_ocr(pdf_file.name) | |
print("BAACCKKK") | |
# Open the multimarkdown (.mmd) file for reading | |
with open(f'output/{pdf_name}.mmd', 'r') as file: | |
content = file.read() | |
return content | |
with gr.Blocks(css=css) as demo: | |
gr.HTML("<h1><center>Nougat: Neural Optical Understanding for Academic Documents<center><h1>") | |
gr.HTML("<h3><center>Lukas Blecher et al. <a href='https://arxiv.org/pdf/2308.13418.pdf' target='_blank'>Paper</a>, <a href='https://facebookresearch.github.io/nougat/'>Project</a><center></h3>") | |
with gr.Row(): | |
pdf_file = gr.File(label='Upload a PDF', scale=1) | |
mkd = gr.Markdown('<h2><center><i>OR</i></center></h2>',scale=1) | |
pdf_link = gr.Textbox(placeholder='Enter an arxiv link here', label='Provide a link', scale=1) | |
btn = gr.Button() | |
parsed_output = gr.Markdown(elem_id='mkd') | |
btn.click(predict, pdf_file, parsed_output ) | |
demo.queue() | |
demo.launch(debug=True) | |