star_nox commited on
Commit
cf22d4f
1 Parent(s): b4791c9

added context retrieval to UI

Browse files
__pycache__/retrieval.cpython-310.pyc CHANGED
Binary files a/__pycache__/retrieval.cpython-310.pyc and b/__pycache__/retrieval.cpython-310.pyc differ
 
app.py CHANGED
@@ -111,14 +111,16 @@ def predict(
111
  (history[i].strip(), history[i + 1].strip())
112
  for i in range(0, len(history) - 1, 2)
113
  ]
114
- yield chat, history
115
 
116
  # add context retrieval part here
117
  ta = retrieval.Retrieval()
118
  ta._load_pinecone_vectorstore()
119
  question = inputs
120
  top_context_list = ta.retrieve_contexts_from_pinecone(user_question=question, topk=NUM_ANSWERS_GENERATED)
121
- print(top_context_list)
 
 
122
 
123
  def reset_textbox():
124
  return gr.update(value="")
@@ -333,7 +335,7 @@ with gr.Blocks(
333
  chatbot,
334
  state,
335
  ],
336
- [chatbot, state],
337
  )
338
  run_btn.click(
339
  predict,
 
111
  (history[i].strip(), history[i + 1].strip())
112
  for i in range(0, len(history) - 1, 2)
113
  ]
114
+ yield chat, history, None, None, None
115
 
116
  # add context retrieval part here
117
  ta = retrieval.Retrieval()
118
  ta._load_pinecone_vectorstore()
119
  question = inputs
120
  top_context_list = ta.retrieve_contexts_from_pinecone(user_question=question, topk=NUM_ANSWERS_GENERATED)
121
+ print(len(top_context_list))
122
+
123
+ yield chat, history, top_context_list[0], top_context_list[1], top_context_list[2]
124
 
125
  def reset_textbox():
126
  return gr.update(value="")
 
335
  chatbot,
336
  state,
337
  ],
338
+ [chatbot, state, context1, context2, context3],
339
  )
340
  run_btn.click(
341
  predict,
requirements.txt CHANGED
@@ -3,3 +3,5 @@ gradio==3.20.1
3
  pinecone-client
4
  sentence-transformers
5
  pandas
 
 
 
3
  pinecone-client
4
  sentence-transformers
5
  pandas
6
+ langchain
7
+ python-dotenv
retrieval.py CHANGED
@@ -15,7 +15,6 @@ from dotenv import load_dotenv
15
  from PIL import Image
16
  from transformers import (AutoModelForSequenceClassification, AutoTokenizer, GPT2Tokenizer, OPTForCausalLM, T5ForConditionalGeneration)
17
 
18
- PINECONE_API_KEY="insert your pinecone api key here"
19
 
20
  class Retrieval:
21
  def __init__(self,
@@ -35,11 +34,11 @@ class Retrieval:
35
 
36
  def _load_pinecone_vectorstore(self,):
37
  model_name = "intfloat/e5-large" # best text embedding model. 1024 dims.
38
- pincecone_index = pinecone.Index("uiuc-chatbot")
39
  embeddings = HuggingFaceEmbeddings(model_name=model_name)
40
  #pinecone.init(api_key=os.environ['PINECONE_API_KEY'], environment="us-west1-gcp")
41
  pinecone.init(api_key=PINECONE_API_KEY, environment="us-west1-gcp")
42
-
43
  print(pinecone.list_indexes())
44
 
45
  self.vectorstore = Pinecone(index=pincecone_index, embedding_function=embeddings.embed_query, text_key="text")
 
15
  from PIL import Image
16
  from transformers import (AutoModelForSequenceClassification, AutoTokenizer, GPT2Tokenizer, OPTForCausalLM, T5ForConditionalGeneration)
17
 
 
18
 
19
  class Retrieval:
20
  def __init__(self,
 
34
 
35
  def _load_pinecone_vectorstore(self,):
36
  model_name = "intfloat/e5-large" # best text embedding model. 1024 dims.
37
+
38
  embeddings = HuggingFaceEmbeddings(model_name=model_name)
39
  #pinecone.init(api_key=os.environ['PINECONE_API_KEY'], environment="us-west1-gcp")
40
  pinecone.init(api_key=PINECONE_API_KEY, environment="us-west1-gcp")
41
+ pincecone_index = pinecone.Index("uiuc-chatbot")
42
  print(pinecone.list_indexes())
43
 
44
  self.vectorstore = Pinecone(index=pincecone_index, embedding_function=embeddings.embed_query, text_key="text")