Spaces:
Runtime error
Runtime error
Removing UI validations temporarily
Browse files- app.py +34 -17
- grader_qa.py +301 -0
- utils.py +13 -297
app.py
CHANGED
@@ -9,8 +9,9 @@ from langchain.chat_models import ChatOpenAI
|
|
9 |
from langchain.embeddings import OpenAIEmbeddings
|
10 |
|
11 |
from grader import Grader
|
|
|
12 |
from ingest import ingest_canvas_discussions
|
13 |
-
from utils import
|
14 |
|
15 |
load_dotenv()
|
16 |
|
@@ -122,39 +123,52 @@ def get_grading_status(history):
|
|
122 |
grader_qa = GraderQA(grader, embeddings)
|
123 |
if len(history) == 1:
|
124 |
history = history + [(None, 'Grading is already complete. You can now ask questions')]
|
125 |
-
enable_fields(False, False, False, False, True, True, True)
|
126 |
# Check if data is ingested
|
127 |
elif len(glob.glob("docs/*.json")) > 0 and len(glob.glob("docs/*.html")):
|
128 |
if not grader_qa:
|
129 |
grader = Grader(qa_model)
|
130 |
if len(history) == 1:
|
131 |
history = history + [(None, 'Canvas data is already ingested. You can grade discussions now')]
|
132 |
-
enable_fields(False, False, False, True, True, False, False)
|
133 |
else:
|
134 |
history = history + [(None, 'Please ingest data and start grading')]
|
135 |
-
|
136 |
-
enable_fields(True, True, True, True, True, False, False)
|
137 |
return history
|
138 |
|
139 |
|
140 |
# handle enable/disable of fields
|
141 |
def enable_fields(url_status, canvas_api_key_status, submit_status, grade_status,
|
142 |
download_status, chatbot_txt_status, chatbot_btn_status):
|
143 |
-
url.interactive
|
144 |
-
canvas_api_key.interactive
|
145 |
-
submit.interactive
|
146 |
-
grade.interactive
|
147 |
-
download.interactive
|
148 |
-
txt.interactive
|
149 |
-
ask.interactive
|
|
|
150 |
if not chatbot_txt_status:
|
151 |
-
txt.placeholder
|
152 |
else:
|
153 |
-
txt.placeholder
|
154 |
if not url_status:
|
155 |
-
url.placeholder
|
156 |
if not canvas_api_key_status:
|
157 |
-
canvas_api_key.placeholder
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
158 |
|
159 |
|
160 |
def bot(history):
|
@@ -210,10 +224,13 @@ with gr.Blocks() as demo:
|
|
210 |
bot, chatbot, chatbot
|
211 |
)
|
212 |
|
213 |
-
ask.click(add_text, inputs=[chatbot, txt], outputs=[chatbot, txt], postprocess=False,).then(
|
214 |
bot, chatbot, chatbot
|
215 |
)
|
216 |
|
|
|
|
|
|
|
217 |
if __name__ == "__main__":
|
218 |
demo.queue()
|
219 |
demo.queue(concurrency_count=5)
|
|
|
9 |
from langchain.embeddings import OpenAIEmbeddings
|
10 |
|
11 |
from grader import Grader
|
12 |
+
from grader_qa import GraderQA
|
13 |
from ingest import ingest_canvas_discussions
|
14 |
+
from utils import reset_folder
|
15 |
|
16 |
load_dotenv()
|
17 |
|
|
|
123 |
grader_qa = GraderQA(grader, embeddings)
|
124 |
if len(history) == 1:
|
125 |
history = history + [(None, 'Grading is already complete. You can now ask questions')]
|
126 |
+
# enable_fields(False, False, False, False, True, True, True)
|
127 |
# Check if data is ingested
|
128 |
elif len(glob.glob("docs/*.json")) > 0 and len(glob.glob("docs/*.html")):
|
129 |
if not grader_qa:
|
130 |
grader = Grader(qa_model)
|
131 |
if len(history) == 1:
|
132 |
history = history + [(None, 'Canvas data is already ingested. You can grade discussions now')]
|
133 |
+
# enable_fields(False, False, False, True, True, False, False)
|
134 |
else:
|
135 |
history = history + [(None, 'Please ingest data and start grading')]
|
136 |
+
# enable_fields(True, True, True, True, True, False, False)
|
|
|
137 |
return history
|
138 |
|
139 |
|
140 |
# handle enable/disable of fields
|
141 |
def enable_fields(url_status, canvas_api_key_status, submit_status, grade_status,
|
142 |
download_status, chatbot_txt_status, chatbot_btn_status):
|
143 |
+
url.update(interactive=url_status)
|
144 |
+
canvas_api_key.update(interactive=canvas_api_key_status)
|
145 |
+
submit.update(interactive=submit_status)
|
146 |
+
grade.update(interactive=grade_status)
|
147 |
+
download.update(interactive=download_status)
|
148 |
+
txt.update(interactive=chatbot_txt_status)
|
149 |
+
ask.update(interactive=chatbot_btn_status)
|
150 |
+
|
151 |
if not chatbot_txt_status:
|
152 |
+
txt.update(placeholder="Please grade discussions first")
|
153 |
else:
|
154 |
+
txt.update(placeholder="Ask a question")
|
155 |
if not url_status:
|
156 |
+
url.update(placeholder="Data already ingested")
|
157 |
if not canvas_api_key_status:
|
158 |
+
canvas_api_key.update(placeholder="Data already ingested")
|
159 |
+
return url, canvas_api_key, submit, grade, download, txt, ask
|
160 |
+
|
161 |
+
|
162 |
+
def reset_data(history):
|
163 |
+
# Use shutil.rmtree() to delete output, docs, and vector_stores folders, reset grader and grader_qa, and get_grading_status, reset and return history
|
164 |
+
global grader, grader_qa
|
165 |
+
reset_folder('output')
|
166 |
+
reset_folder('docs')
|
167 |
+
reset_folder('vector_stores')
|
168 |
+
grader = None
|
169 |
+
grader_qa = None
|
170 |
+
history = [(None, 'Data reset successfully')]
|
171 |
+
return history
|
172 |
|
173 |
|
174 |
def bot(history):
|
|
|
224 |
bot, chatbot, chatbot
|
225 |
)
|
226 |
|
227 |
+
ask.click(add_text, inputs=[chatbot, txt], outputs=[chatbot, txt], postprocess=False, ).then(
|
228 |
bot, chatbot, chatbot
|
229 |
)
|
230 |
|
231 |
+
reset.click(reset_data, inputs=[chatbot], outputs=[chatbot], postprocess=False, show_progress=True, ).success(
|
232 |
+
bot, chatbot, chatbot)
|
233 |
+
|
234 |
if __name__ == "__main__":
|
235 |
demo.queue()
|
236 |
demo.queue(concurrency_count=5)
|
grader_qa.py
ADDED
@@ -0,0 +1,301 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
|
3 |
+
from langchain import FAISS
|
4 |
+
from langchain.chains import ConversationalRetrievalChain
|
5 |
+
from langchain.chat_models import ChatOpenAI
|
6 |
+
from langchain.document_loaders import CSVLoader
|
7 |
+
from langchain.memory import ConversationBufferMemory
|
8 |
+
from langchain.prompts import ChatPromptTemplate, HumanMessagePromptTemplate, SystemMessagePromptTemplate
|
9 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
10 |
+
|
11 |
+
|
12 |
+
def search_index_from_docs(source_chunks, embeddings):
|
13 |
+
# print("source chunks: " + str(len(source_chunks)))
|
14 |
+
# print("embeddings: " + str(embeddings))
|
15 |
+
search_index = FAISS.from_documents(source_chunks, embeddings)
|
16 |
+
return search_index
|
17 |
+
|
18 |
+
|
19 |
+
def get_chat_history(inputs) -> str:
|
20 |
+
res = []
|
21 |
+
for human, ai in inputs:
|
22 |
+
res.append(f"Human:{human}\nAI:{ai}")
|
23 |
+
return "\n".join(res)
|
24 |
+
|
25 |
+
|
26 |
+
class GraderQA:
|
27 |
+
def __init__(self, grader, embeddings):
|
28 |
+
self.grader = grader
|
29 |
+
self.llm = self.grader.llm
|
30 |
+
self.index_file = "vector_stores/canvas-discussions.faiss"
|
31 |
+
self.pickle_file = "vector_stores/canvas-discussions.pkl"
|
32 |
+
self.rubric_text = grader.rubric_text
|
33 |
+
self.search_index = self.get_search_index(embeddings)
|
34 |
+
self.chain = self.create_chain(embeddings)
|
35 |
+
self.tokens = None
|
36 |
+
self.question = None
|
37 |
+
|
38 |
+
def get_search_index(self, embeddings):
|
39 |
+
if os.path.isfile(self.pickle_file) and os.path.isfile(self.index_file) and os.path.getsize(
|
40 |
+
self.pickle_file) > 0:
|
41 |
+
# Load index from pickle file
|
42 |
+
search_index = self.load_index(embeddings)
|
43 |
+
else:
|
44 |
+
search_index = self.create_index(embeddings)
|
45 |
+
print("Created index")
|
46 |
+
return search_index
|
47 |
+
|
48 |
+
def load_index(self, embeddings):
|
49 |
+
# Load index
|
50 |
+
db = FAISS.load_local(
|
51 |
+
folder_path="vector_stores/",
|
52 |
+
index_name="canvas-discussions", embeddings=embeddings,
|
53 |
+
)
|
54 |
+
print("Loaded index")
|
55 |
+
return db
|
56 |
+
|
57 |
+
def create_index(self, embeddings):
|
58 |
+
source_chunks = self.create_chunk_documents()
|
59 |
+
search_index = search_index_from_docs(source_chunks, embeddings)
|
60 |
+
FAISS.save_local(search_index, folder_path="vector_stores/", index_name="canvas-discussions")
|
61 |
+
return search_index
|
62 |
+
|
63 |
+
def create_chunk_documents(self):
|
64 |
+
sources = self.fetch_data_for_embeddings()
|
65 |
+
|
66 |
+
splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
|
67 |
+
|
68 |
+
source_chunks = splitter.split_documents(sources)
|
69 |
+
|
70 |
+
print("chunks: " + str(len(source_chunks)))
|
71 |
+
print("sources: " + str(len(sources)))
|
72 |
+
|
73 |
+
return source_chunks
|
74 |
+
|
75 |
+
def fetch_data_for_embeddings(self):
|
76 |
+
document_list = self.get_csv_files()
|
77 |
+
print("document list: " + str(len(document_list)))
|
78 |
+
return document_list
|
79 |
+
|
80 |
+
def get_csv_files(self):
|
81 |
+
loader = CSVLoader(file_path=self.grader.csv, source_column="student_name")
|
82 |
+
document_list = loader.load()
|
83 |
+
return document_list
|
84 |
+
|
85 |
+
def create_chain(self, embeddings):
|
86 |
+
if not self.search_index:
|
87 |
+
self.search_index = self.load_index(embeddings)
|
88 |
+
|
89 |
+
question_prompt, combine_prompt = self.create_map_reduce_prompt()
|
90 |
+
# create agent, 1 chain for summary based question, 2nd chain for semantic retrieval based question
|
91 |
+
|
92 |
+
chain = ConversationalRetrievalChain.from_llm(llm=self.llm, chain_type='map_reduce',
|
93 |
+
retriever=self.search_index.as_retriever(search_type='mmr',
|
94 |
+
search_kwargs={
|
95 |
+
'lambda_mult': 1,
|
96 |
+
'fetch_k': 50,
|
97 |
+
'k': 30}),
|
98 |
+
return_source_documents=True,
|
99 |
+
verbose=True,
|
100 |
+
memory=ConversationBufferMemory(memory_key='chat_history',
|
101 |
+
return_messages=True,
|
102 |
+
output_key='answer'),
|
103 |
+
condense_question_llm=ChatOpenAI(temperature=0,
|
104 |
+
model='gpt-3.5-turbo'),
|
105 |
+
combine_docs_chain_kwargs={"question_prompt": question_prompt,
|
106 |
+
"combine_prompt": combine_prompt})
|
107 |
+
return chain
|
108 |
+
|
109 |
+
def create_map_reduce_prompt(self):
|
110 |
+
system_template = f"""Use the following portion of a long grading results document to answer the question BUT ONLY FOR THE STUDENT MENTIONED. Use the following examples to take guidance on how to answer the question.
|
111 |
+
Examples:
|
112 |
+
Question: How many students participated in the discussion?
|
113 |
+
Answer: This student participated in the discussion./This student did not participate in the discussion.
|
114 |
+
Question: What was the average score for the discussion?
|
115 |
+
Answer: This student received a score of 10/10 for the discussion.
|
116 |
+
Question: How many students received a full score?/How many students did not receive a full score?
|
117 |
+
Answer: This student received a full score./This student did not receive a full score.
|
118 |
+
Question: How many students lost marks in X category of the rubric?
|
119 |
+
Answer: This student lost marks in X category of the rubric./This student did not lose marks in X category of the rubric.
|
120 |
+
Question: Give me 3 best responses received for the discussion.
|
121 |
+
Answer: This student gave the following responses for the discussion and received a score of 10/10.
|
122 |
+
|
123 |
+
|
124 |
+
______________________
|
125 |
+
Grading Result For:
|
126 |
+
{{context}}
|
127 |
+
______________________
|
128 |
+
Following are the instructions and rubric of the discussion post for reference, used to grade the discussion.
|
129 |
+
----------------
|
130 |
+
Instructions and Rubric:
|
131 |
+
{self.rubric_text}
|
132 |
+
"""
|
133 |
+
messages = [
|
134 |
+
SystemMessagePromptTemplate.from_template(system_template),
|
135 |
+
HumanMessagePromptTemplate.from_template("{question}"),
|
136 |
+
]
|
137 |
+
CHAT_QUESTION_PROMPT = ChatPromptTemplate.from_messages(messages)
|
138 |
+
system_template = """You are Canvas Discussions Grading + Feedback QA Bot. Have a conversation with a human, answering the questions about the grading results, feedback, answers as accurately as possible.
|
139 |
+
Use the following answers for each student to answer the users question as accurately as possible.
|
140 |
+
You are an expert at basic calculations and answering questions on grading results and can answer the following questions with ease.
|
141 |
+
If you don't know the answer, just say that you don't know. Don't try to make up an answer.
|
142 |
+
______________________
|
143 |
+
{summaries}"""
|
144 |
+
messages = [
|
145 |
+
SystemMessagePromptTemplate.from_template(system_template),
|
146 |
+
HumanMessagePromptTemplate.from_template("{question}"),
|
147 |
+
]
|
148 |
+
CHAT_COMBINE_PROMPT = ChatPromptTemplate.from_messages(messages)
|
149 |
+
return CHAT_QUESTION_PROMPT, CHAT_COMBINE_PROMPT
|
150 |
+
|
151 |
+
def create_prompt(self):
|
152 |
+
system_template = f"""You are Canvas Discussions Grading + Feedback QA Bot. Have a conversation with a human, answering the questions about the grading results, feedback, answers as accurately as possible.
|
153 |
+
You are a grading assistant who graded the canvas discussions to create the following grading results and feedback.
|
154 |
+
Use the following instruction, rubric of the discussion which were used to grade the discussions and refine the answer if needed.
|
155 |
+
----------------
|
156 |
+
{self.rubric_text}
|
157 |
+
----------------
|
158 |
+
Use the following pieces of the grading results, score, feedback and summary of student responses to answer the users question as accurately as possible.
|
159 |
+
{{context}}"""
|
160 |
+
messages = [
|
161 |
+
SystemMessagePromptTemplate.from_template(system_template),
|
162 |
+
HumanMessagePromptTemplate.from_template("{question}"),
|
163 |
+
]
|
164 |
+
return ChatPromptTemplate.from_messages(messages)
|
165 |
+
|
166 |
+
def get_tokens(self):
|
167 |
+
total_tokens = 0
|
168 |
+
for doc in self.docs:
|
169 |
+
chat_prompt = self.prompt.format(context=doc, question=self.question)
|
170 |
+
|
171 |
+
num_tokens = self.llm.get_num_tokens(chat_prompt)
|
172 |
+
total_tokens += num_tokens
|
173 |
+
|
174 |
+
# summary = self.llm(summary_prompt)
|
175 |
+
|
176 |
+
# print (f"Summary: {summary.strip()}")
|
177 |
+
# print ("\n")
|
178 |
+
return total_tokens
|
179 |
+
|
180 |
+
def run_qa_chain(self, question):
|
181 |
+
self.question = question
|
182 |
+
self.get_tokens()
|
183 |
+
answer = self.chain(question)
|
184 |
+
return answer
|
185 |
+
|
186 |
+
# system_template = """You are Canvas Discussions Grading + Feedback QA Bot. Have a conversation with a human, answering the following questions as best you can.
|
187 |
+
# You are a grading assistant who graded the canvas discussions to create the following grading results and feedback. Use the following pieces of the grading results and feedback to answer the users question.
|
188 |
+
# Use the following pieces of context to answer the users question.
|
189 |
+
# ----------------
|
190 |
+
# {context}"""
|
191 |
+
#
|
192 |
+
# messages = [
|
193 |
+
# SystemMessagePromptTemplate.from_template(system_template),
|
194 |
+
# HumanMessagePromptTemplate.from_template("{question}"),
|
195 |
+
# ]
|
196 |
+
# CHAT_PROMPT = ChatPromptTemplate.from_messages(messages)
|
197 |
+
#
|
198 |
+
#
|
199 |
+
# def get_search_index(embeddings):
|
200 |
+
# global vectorstore_index
|
201 |
+
# if os.path.isfile(pickle_file) and os.path.isfile(index_file) and os.path.getsize(pickle_file) > 0:
|
202 |
+
# # Load index from pickle file
|
203 |
+
# search_index = load_index(embeddings)
|
204 |
+
# else:
|
205 |
+
# search_index = create_index(model)
|
206 |
+
# print("Created index")
|
207 |
+
#
|
208 |
+
# vectorstore_index = search_index
|
209 |
+
# return search_index
|
210 |
+
#
|
211 |
+
#
|
212 |
+
# def create_index(embeddings):
|
213 |
+
# source_chunks = create_chunk_documents()
|
214 |
+
# search_index = search_index_from_docs(source_chunks, embeddings)
|
215 |
+
# # search_index.persist()
|
216 |
+
# FAISS.save_local(search_index, folder_path="vector_stores/", index_name="canvas-discussions")
|
217 |
+
# # Save index to pickle file
|
218 |
+
# # with open(pickle_file, "wb") as f:
|
219 |
+
# # pickle.dump(search_index, f)
|
220 |
+
# return search_index
|
221 |
+
#
|
222 |
+
#
|
223 |
+
# def search_index_from_docs(source_chunks, embeddings):
|
224 |
+
# # print("source chunks: " + str(len(source_chunks)))
|
225 |
+
# # print("embeddings: " + str(embeddings))
|
226 |
+
# search_index = FAISS.from_documents(source_chunks, embeddings)
|
227 |
+
# return search_index
|
228 |
+
#
|
229 |
+
#
|
230 |
+
# def get_html_files():
|
231 |
+
# loader = DirectoryLoader('docs', glob="**/*.html", loader_cls=UnstructuredHTMLLoader, recursive=True)
|
232 |
+
# document_list = loader.load()
|
233 |
+
# for document in document_list:
|
234 |
+
# document.metadata["name"] = document.metadata["source"].split("/")[-1].split(".")[0]
|
235 |
+
# return document_list
|
236 |
+
#
|
237 |
+
#
|
238 |
+
# def get_text_files():
|
239 |
+
# loader = DirectoryLoader('docs', glob="**/*.txt", loader_cls=TextLoader, recursive=True)
|
240 |
+
# document_list = loader.load()
|
241 |
+
# return document_list
|
242 |
+
#
|
243 |
+
#
|
244 |
+
# def create_chunk_documents():
|
245 |
+
# sources = fetch_data_for_embeddings()
|
246 |
+
#
|
247 |
+
# splitter = RecursiveCharacterTextSplitter.from_language(
|
248 |
+
# language=Language.HTML, chunk_size=500, chunk_overlap=0
|
249 |
+
# )
|
250 |
+
#
|
251 |
+
# source_chunks = splitter.split_documents(sources)
|
252 |
+
#
|
253 |
+
# print("chunks: " + str(len(source_chunks)))
|
254 |
+
# print("sources: " + str(len(sources)))
|
255 |
+
#
|
256 |
+
# return source_chunks
|
257 |
+
#
|
258 |
+
#
|
259 |
+
# def create_chain(question, llm, embeddings):
|
260 |
+
# db = load_index(embeddings)
|
261 |
+
#
|
262 |
+
# # Create chain
|
263 |
+
# chain = ConversationalRetrievalChain.from_llm(llm, db.as_retriever(search_type='mmr',
|
264 |
+
# search_kwargs={'lambda_mult': 1, 'fetch_k': 50,
|
265 |
+
# 'k': 30}),
|
266 |
+
# return_source_documents=True,
|
267 |
+
# verbose=True,
|
268 |
+
# memory=ConversationSummaryBufferMemory(memory_key='chat_history',
|
269 |
+
# llm=llm, max_token_limit=40,
|
270 |
+
# return_messages=True,
|
271 |
+
# output_key='answer'),
|
272 |
+
# get_chat_history=get_chat_history,
|
273 |
+
# combine_docs_chain_kwargs={"prompt": CHAT_PROMPT})
|
274 |
+
#
|
275 |
+
# result = chain({"question": question})
|
276 |
+
#
|
277 |
+
# sources = []
|
278 |
+
# print(result)
|
279 |
+
#
|
280 |
+
# for document in result['source_documents']:
|
281 |
+
# sources.append("\n" + str(document.metadata))
|
282 |
+
# print(sources)
|
283 |
+
#
|
284 |
+
# source = ',\n'.join(set(sources))
|
285 |
+
# return result['answer'] + '\nSOURCES: ' + source
|
286 |
+
#
|
287 |
+
#
|
288 |
+
# def load_index(embeddings):
|
289 |
+
# # Load index
|
290 |
+
# db = FAISS.load_local(
|
291 |
+
# folder_path="vector_stores/",
|
292 |
+
# index_name="canvas-discussions", embeddings=embeddings,
|
293 |
+
# )
|
294 |
+
# return db
|
295 |
+
#
|
296 |
+
#
|
297 |
+
# def get_chat_history(inputs) -> str:
|
298 |
+
# res = []
|
299 |
+
# for human, ai in inputs:
|
300 |
+
# res.append(f"Human:{human}\nAI:{ai}")
|
301 |
+
# return "\n".join(res)
|
utils.py
CHANGED
@@ -1,298 +1,14 @@
|
|
1 |
import os
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
search_index = FAISS.from_documents(source_chunks, embeddings)
|
16 |
-
return search_index
|
17 |
-
|
18 |
-
|
19 |
-
def get_chat_history(inputs) -> str:
|
20 |
-
res = []
|
21 |
-
for human, ai in inputs:
|
22 |
-
res.append(f"Human:{human}\nAI:{ai}")
|
23 |
-
return "\n".join(res)
|
24 |
-
|
25 |
-
|
26 |
-
class GraderQA:
|
27 |
-
def __init__(self, grader, embeddings):
|
28 |
-
self.grader = grader
|
29 |
-
self.llm = self.grader.llm
|
30 |
-
self.index_file = "vector_stores/canvas-discussions.faiss"
|
31 |
-
self.pickle_file = "vector_stores/canvas-discussions.pkl"
|
32 |
-
self.rubric_text = grader.rubric_text
|
33 |
-
self.search_index = self.get_search_index(embeddings)
|
34 |
-
self.chain = self.create_chain(embeddings)
|
35 |
-
self.tokens = None
|
36 |
-
self.question = None
|
37 |
-
|
38 |
-
def get_search_index(self, embeddings):
|
39 |
-
if os.path.isfile(self.pickle_file) and os.path.isfile(self.index_file) and os.path.getsize(
|
40 |
-
self.pickle_file) > 0:
|
41 |
-
# Load index from pickle file
|
42 |
-
search_index = self.load_index(embeddings)
|
43 |
-
else:
|
44 |
-
search_index = self.create_index(embeddings)
|
45 |
-
print("Created index")
|
46 |
-
return search_index
|
47 |
-
|
48 |
-
def load_index(self, embeddings):
|
49 |
-
# Load index
|
50 |
-
db = FAISS.load_local(
|
51 |
-
folder_path="vector_stores/",
|
52 |
-
index_name="canvas-discussions", embeddings=embeddings,
|
53 |
-
)
|
54 |
-
print("Loaded index")
|
55 |
-
return db
|
56 |
-
|
57 |
-
def create_index(self, embeddings):
|
58 |
-
source_chunks = self.create_chunk_documents()
|
59 |
-
search_index = search_index_from_docs(source_chunks, embeddings)
|
60 |
-
FAISS.save_local(search_index, folder_path="vector_stores/", index_name="canvas-discussions")
|
61 |
-
return search_index
|
62 |
-
|
63 |
-
def create_chunk_documents(self):
|
64 |
-
sources = self.fetch_data_for_embeddings()
|
65 |
-
|
66 |
-
splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
|
67 |
-
|
68 |
-
source_chunks = splitter.split_documents(sources)
|
69 |
-
|
70 |
-
print("chunks: " + str(len(source_chunks)))
|
71 |
-
print("sources: " + str(len(sources)))
|
72 |
-
|
73 |
-
return source_chunks
|
74 |
-
|
75 |
-
def fetch_data_for_embeddings(self):
|
76 |
-
document_list = self.get_csv_files()
|
77 |
-
print("document list: " + str(len(document_list)))
|
78 |
-
return document_list
|
79 |
-
|
80 |
-
def get_csv_files(self):
|
81 |
-
loader = CSVLoader(file_path=self.grader.csv, source_column="student_name")
|
82 |
-
document_list = loader.load()
|
83 |
-
return document_list
|
84 |
-
|
85 |
-
def create_chain(self, embeddings):
|
86 |
-
if not self.search_index:
|
87 |
-
self.search_index = self.load_index(embeddings)
|
88 |
-
|
89 |
-
question_prompt, combine_prompt = self.create_map_reduce_prompt()
|
90 |
-
|
91 |
-
chain = ConversationalRetrievalChain.from_llm(llm=self.llm, chain_type='map_reduce',
|
92 |
-
retriever=self.search_index.as_retriever(search_type='mmr',
|
93 |
-
search_kwargs={
|
94 |
-
'lambda_mult': 1,
|
95 |
-
'fetch_k': 50,
|
96 |
-
'k': 30}),
|
97 |
-
return_source_documents=True,
|
98 |
-
verbose=True,
|
99 |
-
memory=ConversationBufferMemory(memory_key='chat_history',
|
100 |
-
return_messages=True,
|
101 |
-
output_key='answer'),
|
102 |
-
condense_question_llm=ChatOpenAI(temperature=0,
|
103 |
-
model='gpt-3.5-turbo'),
|
104 |
-
combine_docs_chain_kwargs={"question_prompt": question_prompt,
|
105 |
-
"combine_prompt": combine_prompt})
|
106 |
-
return chain
|
107 |
-
|
108 |
-
def create_map_reduce_prompt(self):
|
109 |
-
system_template = f"""Use the following portion of a long grading results document to answer the question BUT ONLY FOR THE STUDENT MENTIONED. Use the following examples to take guidance on how to answer the question.
|
110 |
-
Examples:
|
111 |
-
Question: How many students participated in the discussion?
|
112 |
-
Answer: This student participated in the discussion./This student did not participate in the discussion.
|
113 |
-
Question: What was the average score for the discussion?
|
114 |
-
Answer: This student received a score of 10/10 for the discussion.
|
115 |
-
Question: How many students received a full score?/How many students did not receive a full score?
|
116 |
-
Answer: This student received a full score./This student did not receive a full score.
|
117 |
-
Question: How many students lost marks in X category of the rubric?
|
118 |
-
Answer: This student lost marks in X category of the rubric./This student did not lose marks in X category of the rubric.
|
119 |
-
|
120 |
-
|
121 |
-
______________________
|
122 |
-
Grading Result For:
|
123 |
-
{{context}}
|
124 |
-
______________________
|
125 |
-
Following are the instructions and rubric of the discussion post for reference, used to grade the discussion.
|
126 |
-
----------------
|
127 |
-
Instructions and Rubric:
|
128 |
-
{self.rubric_text}
|
129 |
-
"""
|
130 |
-
messages = [
|
131 |
-
SystemMessagePromptTemplate.from_template(system_template),
|
132 |
-
HumanMessagePromptTemplate.from_template("{question}"),
|
133 |
-
]
|
134 |
-
CHAT_QUESTION_PROMPT = ChatPromptTemplate.from_messages(messages)
|
135 |
-
system_template = """You are Canvas Discussions Grading + Feedback QA Bot. Have a conversation with a human, answering the questions about the grading results, feedback, answers as accurately as possible.
|
136 |
-
Use the following answers for each student to answer the users question as accurately as possible.
|
137 |
-
You are an expert at basic calculations and answering questions on grading results and can answer the following questions with ease.
|
138 |
-
If you don't know the answer, just say that you don't know. Don't try to make up an answer.
|
139 |
-
______________________
|
140 |
-
{summaries}"""
|
141 |
-
messages = [
|
142 |
-
SystemMessagePromptTemplate.from_template(system_template),
|
143 |
-
HumanMessagePromptTemplate.from_template("{question}"),
|
144 |
-
]
|
145 |
-
CHAT_COMBINE_PROMPT = ChatPromptTemplate.from_messages(messages)
|
146 |
-
return CHAT_QUESTION_PROMPT, CHAT_COMBINE_PROMPT
|
147 |
-
|
148 |
-
def create_prompt(self):
|
149 |
-
system_template = f"""You are Canvas Discussions Grading + Feedback QA Bot. Have a conversation with a human, answering the questions about the grading results, feedback, answers as accurately as possible.
|
150 |
-
You are a grading assistant who graded the canvas discussions to create the following grading results and feedback.
|
151 |
-
Use the following instruction, rubric of the discussion which were used to grade the discussions and refine the answer if needed.
|
152 |
-
----------------
|
153 |
-
{self.rubric_text}
|
154 |
-
----------------
|
155 |
-
Use the following pieces of the grading results, score, feedback and summary of student responses to answer the users question as accurately as possible.
|
156 |
-
{{context}}"""
|
157 |
-
messages = [
|
158 |
-
SystemMessagePromptTemplate.from_template(system_template),
|
159 |
-
HumanMessagePromptTemplate.from_template("{question}"),
|
160 |
-
]
|
161 |
-
return ChatPromptTemplate.from_messages(messages)
|
162 |
-
|
163 |
-
def get_tokens(self):
|
164 |
-
total_tokens = 0
|
165 |
-
for doc in self.docs:
|
166 |
-
chat_prompt = self.prompt.format(context=doc, question=self.question)
|
167 |
-
|
168 |
-
num_tokens = self.llm.get_num_tokens(chat_prompt)
|
169 |
-
total_tokens += num_tokens
|
170 |
-
|
171 |
-
# summary = self.llm(summary_prompt)
|
172 |
-
|
173 |
-
# print (f"Summary: {summary.strip()}")
|
174 |
-
# print ("\n")
|
175 |
-
return total_tokens
|
176 |
-
|
177 |
-
def run_qa_chain(self, question):
|
178 |
-
self.question = question
|
179 |
-
self.get_tokens()
|
180 |
-
answer = self.chain(question)
|
181 |
-
return answer
|
182 |
-
|
183 |
-
# system_template = """You are Canvas Discussions Grading + Feedback QA Bot. Have a conversation with a human, answering the following questions as best you can.
|
184 |
-
# You are a grading assistant who graded the canvas discussions to create the following grading results and feedback. Use the following pieces of the grading results and feedback to answer the users question.
|
185 |
-
# Use the following pieces of context to answer the users question.
|
186 |
-
# ----------------
|
187 |
-
# {context}"""
|
188 |
-
#
|
189 |
-
# messages = [
|
190 |
-
# SystemMessagePromptTemplate.from_template(system_template),
|
191 |
-
# HumanMessagePromptTemplate.from_template("{question}"),
|
192 |
-
# ]
|
193 |
-
# CHAT_PROMPT = ChatPromptTemplate.from_messages(messages)
|
194 |
-
#
|
195 |
-
#
|
196 |
-
# def get_search_index(embeddings):
|
197 |
-
# global vectorstore_index
|
198 |
-
# if os.path.isfile(pickle_file) and os.path.isfile(index_file) and os.path.getsize(pickle_file) > 0:
|
199 |
-
# # Load index from pickle file
|
200 |
-
# search_index = load_index(embeddings)
|
201 |
-
# else:
|
202 |
-
# search_index = create_index(model)
|
203 |
-
# print("Created index")
|
204 |
-
#
|
205 |
-
# vectorstore_index = search_index
|
206 |
-
# return search_index
|
207 |
-
#
|
208 |
-
#
|
209 |
-
# def create_index(embeddings):
|
210 |
-
# source_chunks = create_chunk_documents()
|
211 |
-
# search_index = search_index_from_docs(source_chunks, embeddings)
|
212 |
-
# # search_index.persist()
|
213 |
-
# FAISS.save_local(search_index, folder_path="vector_stores/", index_name="canvas-discussions")
|
214 |
-
# # Save index to pickle file
|
215 |
-
# # with open(pickle_file, "wb") as f:
|
216 |
-
# # pickle.dump(search_index, f)
|
217 |
-
# return search_index
|
218 |
-
#
|
219 |
-
#
|
220 |
-
# def search_index_from_docs(source_chunks, embeddings):
|
221 |
-
# # print("source chunks: " + str(len(source_chunks)))
|
222 |
-
# # print("embeddings: " + str(embeddings))
|
223 |
-
# search_index = FAISS.from_documents(source_chunks, embeddings)
|
224 |
-
# return search_index
|
225 |
-
#
|
226 |
-
#
|
227 |
-
# def get_html_files():
|
228 |
-
# loader = DirectoryLoader('docs', glob="**/*.html", loader_cls=UnstructuredHTMLLoader, recursive=True)
|
229 |
-
# document_list = loader.load()
|
230 |
-
# for document in document_list:
|
231 |
-
# document.metadata["name"] = document.metadata["source"].split("/")[-1].split(".")[0]
|
232 |
-
# return document_list
|
233 |
-
#
|
234 |
-
#
|
235 |
-
# def get_text_files():
|
236 |
-
# loader = DirectoryLoader('docs', glob="**/*.txt", loader_cls=TextLoader, recursive=True)
|
237 |
-
# document_list = loader.load()
|
238 |
-
# return document_list
|
239 |
-
#
|
240 |
-
#
|
241 |
-
# def create_chunk_documents():
|
242 |
-
# sources = fetch_data_for_embeddings()
|
243 |
-
#
|
244 |
-
# splitter = RecursiveCharacterTextSplitter.from_language(
|
245 |
-
# language=Language.HTML, chunk_size=500, chunk_overlap=0
|
246 |
-
# )
|
247 |
-
#
|
248 |
-
# source_chunks = splitter.split_documents(sources)
|
249 |
-
#
|
250 |
-
# print("chunks: " + str(len(source_chunks)))
|
251 |
-
# print("sources: " + str(len(sources)))
|
252 |
-
#
|
253 |
-
# return source_chunks
|
254 |
-
#
|
255 |
-
#
|
256 |
-
# def create_chain(question, llm, embeddings):
|
257 |
-
# db = load_index(embeddings)
|
258 |
-
#
|
259 |
-
# # Create chain
|
260 |
-
# chain = ConversationalRetrievalChain.from_llm(llm, db.as_retriever(search_type='mmr',
|
261 |
-
# search_kwargs={'lambda_mult': 1, 'fetch_k': 50,
|
262 |
-
# 'k': 30}),
|
263 |
-
# return_source_documents=True,
|
264 |
-
# verbose=True,
|
265 |
-
# memory=ConversationSummaryBufferMemory(memory_key='chat_history',
|
266 |
-
# llm=llm, max_token_limit=40,
|
267 |
-
# return_messages=True,
|
268 |
-
# output_key='answer'),
|
269 |
-
# get_chat_history=get_chat_history,
|
270 |
-
# combine_docs_chain_kwargs={"prompt": CHAT_PROMPT})
|
271 |
-
#
|
272 |
-
# result = chain({"question": question})
|
273 |
-
#
|
274 |
-
# sources = []
|
275 |
-
# print(result)
|
276 |
-
#
|
277 |
-
# for document in result['source_documents']:
|
278 |
-
# sources.append("\n" + str(document.metadata))
|
279 |
-
# print(sources)
|
280 |
-
#
|
281 |
-
# source = ',\n'.join(set(sources))
|
282 |
-
# return result['answer'] + '\nSOURCES: ' + source
|
283 |
-
#
|
284 |
-
#
|
285 |
-
# def load_index(embeddings):
|
286 |
-
# # Load index
|
287 |
-
# db = FAISS.load_local(
|
288 |
-
# folder_path="vector_stores/",
|
289 |
-
# index_name="canvas-discussions", embeddings=embeddings,
|
290 |
-
# )
|
291 |
-
# return db
|
292 |
-
#
|
293 |
-
#
|
294 |
-
# def get_chat_history(inputs) -> str:
|
295 |
-
# res = []
|
296 |
-
# for human, ai in inputs:
|
297 |
-
# res.append(f"Human:{human}\nAI:{ai}")
|
298 |
-
# return "\n".join(res)
|
|
|
1 |
import os
|
2 |
+
import shutil
|
3 |
+
import time
|
4 |
+
|
5 |
+
|
6 |
+
def reset_folder(destination):
|
7 |
+
# synchrnously and recursively delete the destination folder and all its contents, donot return until done
|
8 |
+
if os.path.isdir(destination):
|
9 |
+
shutil.rmtree(destination)
|
10 |
+
while os.path.isdir(destination):
|
11 |
+
time.sleep(4)
|
12 |
+
os.mkdir(destination)
|
13 |
+
while not os.path.isdir(destination):
|
14 |
+
time.sleep(4)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|