ashok2216 commited on
Commit
e86eadb
1 Parent(s): 9d423d7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -2
app.py CHANGED
@@ -6,9 +6,19 @@ from transformers import GPT2LMHeadModel, GPT2Tokenizer
6
  tokenizer = GPT2Tokenizer.from_pretrained("openai-community/gpt2")
7
  model = GPT2LMHeadModel.from_pretrained("openai-community/gpt2")
8
 
9
- def extract_text_from_pdf(pdf_path):
 
 
 
 
 
 
 
 
 
 
10
  text = ""
11
- with open(pdf_path, "rb") as f:
12
  reader = PyPDF2.PdfFileReader(f)
13
  for page_num in range(reader.numPages):
14
  text += reader.getPage(page_num).extractText()
 
6
  tokenizer = GPT2Tokenizer.from_pretrained("openai-community/gpt2")
7
  model = GPT2LMHeadModel.from_pretrained("openai-community/gpt2")
8
 
9
+ pdf_file = st.file_uploader("Upload an pdf file", type=["pdf"], accept_multiple_files=False)
10
+
11
+ if pdf_file is not None:
12
+ with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
13
+ tmp_file.write(pdf_file.read())
14
+ st.success("PDF file successfully uploaded and stored temporally.")
15
+ file_path = tmp_file.name
16
+ else:
17
+ st.markdown('File not found!')
18
+
19
+ def extract_text_from_pdf(file_path):
20
  text = ""
21
+ with open(file_path, "rb") as f:
22
  reader = PyPDF2.PdfFileReader(f)
23
  for page_num in range(reader.numPages):
24
  text += reader.getPage(page_num).extractText()