shripadbhat commited on
Commit
5ad2a61
1 Parent(s): 0dc42fe

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -56
app.py CHANGED
@@ -1,69 +1,18 @@
1
  import streamlit as st
2
- import pysbd
3
- from transformers import pipeline
4
- from sentence_transformers import CrossEncoder
5
- from transformers import AutoTokenizer, AutoModelWithLMHead, pipeline
6
-
7
-
8
- def fetch_answers(question, document ):
9
- document_paragraphs = document.splitlines()
10
- query_paragraph_list = [(question, para) for para in document_paragraphs if len(para.strip()) > 0 ]
11
-
12
- scores = passage_retreival_model.predict(query_paragraph_list)
13
- top_5_indices = scores.argsort()[-5:]
14
- top_5_query_paragraph_list = [query_paragraph_list[i] for i in top_5_indices ]
15
- top_5_query_paragraph_list.reverse()
16
-
17
- top_5_query_paragraph_answer_list = ""
18
- count = 1
19
- for query, passage in top_5_query_paragraph_list:
20
- passage_sentences = sentence_segmenter.segment(passage)
21
- answer = qa_model(question = query, context = passage)['answer']
22
- evidence_sentence = ""
23
- for i in range(len(passage_sentences)):
24
- if answer.startswith('.') or answer.startswith(':'):
25
- answer = answer[1:].strip()
26
- if answer in passage_sentences[i]:
27
- evidence_sentence = evidence_sentence + " " + passage_sentences[i]
28
-
29
-
30
- model_input = f"question: {query} context: {evidence_sentence}"
31
- encoded_input = tokenizer([model_input],
32
- return_tensors='pt',
33
- max_length=512,
34
- truncation=True)
35
-
36
- output = model.generate(input_ids = encoded_input.input_ids,
37
- attention_mask = encoded_input.attention_mask)
38
- output_answer = tokenizer.decode(output[0], skip_special_tokens=True)
39
-
40
- result_str = "# ANSWER "+str(count)+": "+ output_answer +"\n"
41
- result_str = result_str + "REFERENCE: "+ evidence_sentence + "\n\n"
42
- top_5_query_paragraph_answer_list += result_str
43
- count+=1
44
-
45
- return top_5_query_paragraph_answer_list
46
 
47
  st.title('Document Question Answering System')
48
  st.write("Loading the models...")
49
- my_bar = st.progress(0)
50
- model_name = "MaRiOrOsSi/t5-base-finetuned-question-answering"
51
- tokenizer = AutoTokenizer.from_pretrained(model_name)
52
- model = AutoModelWithLMHead.from_pretrained(model_name)
53
- my_bar.progress(25)
54
- sentence_segmenter = pysbd.Segmenter(language='en',clean=False)
55
- my_bar.progress(50)
56
- passage_retreival_model = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')
57
- my_bar.progress(75)
58
- qa_model = pipeline("question-answering",'a-ware/bart-squadv2')
59
  my_bar.progress(100)
60
  st.write('Models Loaded')
61
 
62
  query = st.text_input("Query")
63
- document = st.text_area("Document Text", "", height=100)
64
 
65
  if st.button("Get Answers From Document"):
66
 
67
- st.markdown(fetch_answers(query, document))
68
 
69
 
 
1
  import streamlit as st
2
+ from question_answering import QuestionAnswering
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
 
4
  st.title('Document Question Answering System')
5
  st.write("Loading the models...")
6
+ my_bar = st.progress(10)
7
+ qa = QuestionAnswering()
 
 
 
 
 
 
 
 
8
  my_bar.progress(100)
9
  st.write('Models Loaded')
10
 
11
  query = st.text_input("Query")
12
+ document_text = st.text_area("Document Text", "", height=100)
13
 
14
  if st.button("Get Answers From Document"):
15
 
16
+ st.markdown(qa.fetch_answers(query, document_text))
17
 
18