exchange math delimiters (#1)
Browse files- exchange math delimiters (3dd4fff646571b17c82a9283b8505662ffcaf546)
Co-authored-by: Lukas Blecher <lukbl@users.noreply.huggingface.co>
app.py
CHANGED
@@ -3,6 +3,7 @@ import subprocess
|
|
3 |
import uuid
|
4 |
import os
|
5 |
import requests
|
|
|
6 |
|
7 |
|
8 |
def get_pdf(pdf_link):
|
@@ -31,7 +32,8 @@ def nougat_ocr(file_name):
|
|
31 |
#'--out', unique_filename,
|
32 |
'--out', 'output',
|
33 |
'pdf', f'{file_name}',
|
34 |
-
'--checkpoint', 'nougat'
|
|
|
35 |
]
|
36 |
|
37 |
# Run the command and capture its output
|
@@ -64,6 +66,8 @@ def predict(pdf_file, pdf_link):
|
|
64 |
file_name = file_name.split('/')[-1][:-4]
|
65 |
with open(f'output/{file_name}.mmd', 'r') as file:
|
66 |
content = file.read()
|
|
|
|
|
67 |
return content
|
68 |
|
69 |
|
@@ -76,7 +80,8 @@ def nougat_ocr1(file_name):
|
|
76 |
'nougat',
|
77 |
'--out', 'output',
|
78 |
'pdf', f'{file_name}',
|
79 |
-
'--checkpoint', 'nougat'
|
|
|
80 |
]
|
81 |
|
82 |
# Run the command and get .mmd file in an output folder
|
|
|
3 |
import uuid
|
4 |
import os
|
5 |
import requests
|
6 |
+
import re
|
7 |
|
8 |
|
9 |
def get_pdf(pdf_link):
|
|
|
32 |
#'--out', unique_filename,
|
33 |
'--out', 'output',
|
34 |
'pdf', f'{file_name}',
|
35 |
+
'--checkpoint', 'nougat',
|
36 |
+
'--markdown'
|
37 |
]
|
38 |
|
39 |
# Run the command and capture its output
|
|
|
66 |
file_name = file_name.split('/')[-1][:-4]
|
67 |
with open(f'output/{file_name}.mmd', 'r') as file:
|
68 |
content = file.read()
|
69 |
+
# switch math delimiters
|
70 |
+
content = content.replace(r'\(', '$').replace(r'\)', '$').replace(r'\[', '$$').replace(r'\]', '$$')
|
71 |
return content
|
72 |
|
73 |
|
|
|
80 |
'nougat',
|
81 |
'--out', 'output',
|
82 |
'pdf', f'{file_name}',
|
83 |
+
'--checkpoint', 'nougat',
|
84 |
+
'--markdown'
|
85 |
]
|
86 |
|
87 |
# Run the command and get .mmd file in an output folder
|