Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -6,9 +6,19 @@ from transformers import GPT2LMHeadModel, GPT2Tokenizer
|
|
6 |
tokenizer = GPT2Tokenizer.from_pretrained("openai-community/gpt2")
|
7 |
model = GPT2LMHeadModel.from_pretrained("openai-community/gpt2")
|
8 |
|
9 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
text = ""
|
11 |
-
with open(
|
12 |
reader = PyPDF2.PdfFileReader(f)
|
13 |
for page_num in range(reader.numPages):
|
14 |
text += reader.getPage(page_num).extractText()
|
|
|
6 |
tokenizer = GPT2Tokenizer.from_pretrained("openai-community/gpt2")
|
7 |
model = GPT2LMHeadModel.from_pretrained("openai-community/gpt2")
|
8 |
|
9 |
+
pdf_file = st.file_uploader("Upload an pdf file", type=["pdf"], accept_multiple_files=False)
|
10 |
+
|
11 |
+
if pdf_file is not None:
|
12 |
+
with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
|
13 |
+
tmp_file.write(pdf_file.read())
|
14 |
+
st.success("PDF file successfully uploaded and stored temporally.")
|
15 |
+
file_path = tmp_file.name
|
16 |
+
else:
|
17 |
+
st.markdown('File not found!')
|
18 |
+
|
19 |
+
def extract_text_from_pdf(file_path):
|
20 |
text = ""
|
21 |
+
with open(file_path, "rb") as f:
|
22 |
reader = PyPDF2.PdfFileReader(f)
|
23 |
for page_num in range(reader.numPages):
|
24 |
text += reader.getPage(page_num).extractText()
|