Spaces:

rohan13
/

canvas-discussion-grader-with-feedback

Runtime error

App Files Files Community

rohan13 commited on Jul 23, 2023

Commit

440deef

•

1 Parent(s): c1fe6fb

Removing UI validations temporarily

Browse files

Files changed (3) hide show

app.py +34 -17
grader_qa.py +301 -0
utils.py +13 -297

app.py CHANGED Viewed

@@ -9,8 +9,9 @@ from langchain.chat_models import ChatOpenAI
 from langchain.embeddings import OpenAIEmbeddings
 from grader import Grader
 from ingest import ingest_canvas_discussions
-from utils import GraderQA
 load_dotenv()
@@ -122,39 +123,52 @@ def get_grading_status(history):
             grader_qa = GraderQA(grader, embeddings)
         if len(history) == 1:
             history = history + [(None, 'Grading is already complete. You can now ask questions')]
-        enable_fields(False, False, False, False, True, True, True)
     # Check if data is ingested
     elif len(glob.glob("docs/*.json")) > 0 and len(glob.glob("docs/*.html")):
         if not grader_qa:
             grader = Grader(qa_model)
         if len(history) == 1:
             history = history + [(None, 'Canvas data is already ingested. You can grade discussions now')]
-        enable_fields(False, False, False, True, True, False, False)
     else:
         history = history + [(None, 'Please ingest data and start grading')]
-        url.disabled = True
-        enable_fields(True, True, True, True, True, False, False)
     return history
 # handle enable/disable of fields
 def enable_fields(url_status, canvas_api_key_status, submit_status, grade_status,
                   download_status, chatbot_txt_status, chatbot_btn_status):
-    url.interactive = url_status
-    canvas_api_key.interactive = canvas_api_key_status
-    submit.interactive = submit_status
-    grade.interactive = grade_status
-    download.interactive = download_status
-    txt.interactive = chatbot_txt_status
-    ask.interactive = chatbot_btn_status
     if not chatbot_txt_status:
-        txt.placeholder = "Please grade discussions first"
     else:
-        txt.placeholder = "Ask a question"
     if not url_status:
-        url.placeholder = "Data already ingested"
     if not canvas_api_key_status:
-        canvas_api_key.placeholder = "Data already ingested"
 def bot(history):
@@ -210,10 +224,13 @@ with gr.Blocks() as demo:
         bot, chatbot, chatbot
     )
-    ask.click(add_text, inputs=[chatbot, txt], outputs=[chatbot, txt], postprocess=False,).then(
         bot, chatbot, chatbot
     )
 if __name__ == "__main__":
     demo.queue()
     demo.queue(concurrency_count=5)

 from langchain.embeddings import OpenAIEmbeddings
 from grader import Grader
+from grader_qa import GraderQA
 from ingest import ingest_canvas_discussions
+from utils import reset_folder
 load_dotenv()
             grader_qa = GraderQA(grader, embeddings)
         if len(history) == 1:
             history = history + [(None, 'Grading is already complete. You can now ask questions')]
+        # enable_fields(False, False, False, False, True, True, True)
     # Check if data is ingested
     elif len(glob.glob("docs/*.json")) > 0 and len(glob.glob("docs/*.html")):
         if not grader_qa:
             grader = Grader(qa_model)
         if len(history) == 1:
             history = history + [(None, 'Canvas data is already ingested. You can grade discussions now')]
+        # enable_fields(False, False, False, True, True, False, False)
     else:
         history = history + [(None, 'Please ingest data and start grading')]
+        # enable_fields(True, True, True, True, True, False, False)
     return history
 # handle enable/disable of fields
 def enable_fields(url_status, canvas_api_key_status, submit_status, grade_status,
                   download_status, chatbot_txt_status, chatbot_btn_status):
+    url.update(interactive=url_status)
+    canvas_api_key.update(interactive=canvas_api_key_status)
+    submit.update(interactive=submit_status)
+    grade.update(interactive=grade_status)
+    download.update(interactive=download_status)
+    txt.update(interactive=chatbot_txt_status)
+    ask.update(interactive=chatbot_btn_status)
     if not chatbot_txt_status:
+        txt.update(placeholder="Please grade discussions first")
     else:
+        txt.update(placeholder="Ask a question")
     if not url_status:
+        url.update(placeholder="Data already ingested")
     if not canvas_api_key_status:
+        canvas_api_key.update(placeholder="Data already ingested")
+    return url, canvas_api_key, submit, grade, download, txt, ask
+def reset_data(history):
+    # Use shutil.rmtree() to delete output, docs, and vector_stores folders, reset grader and grader_qa, and get_grading_status, reset and return history
+    global grader, grader_qa
+    reset_folder('output')
+    reset_folder('docs')
+    reset_folder('vector_stores')
+    grader = None
+    grader_qa = None
+    history = [(None, 'Data reset successfully')]
+    return history
 def bot(history):
         bot, chatbot, chatbot
     )
+    ask.click(add_text, inputs=[chatbot, txt], outputs=[chatbot, txt], postprocess=False, ).then(
         bot, chatbot, chatbot
     )
+    reset.click(reset_data, inputs=[chatbot], outputs=[chatbot], postprocess=False, show_progress=True, ).success(
+        bot, chatbot, chatbot)
 if __name__ == "__main__":
     demo.queue()
     demo.queue(concurrency_count=5)

grader_qa.py ADDED Viewed

	@@ -0,0 +1,301 @@

+import os
+from langchain import FAISS
+from langchain.chains import ConversationalRetrievalChain
+from langchain.chat_models import ChatOpenAI
+from langchain.document_loaders import CSVLoader
+from langchain.memory import ConversationBufferMemory
+from langchain.prompts import ChatPromptTemplate, HumanMessagePromptTemplate, SystemMessagePromptTemplate
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+def search_index_from_docs(source_chunks, embeddings):
+    # print("source chunks: " + str(len(source_chunks)))
+    # print("embeddings: " + str(embeddings))
+    search_index = FAISS.from_documents(source_chunks, embeddings)
+    return search_index
+def get_chat_history(inputs) -> str:
+    res = []
+    for human, ai in inputs:
+        res.append(f"Human:{human}\nAI:{ai}")
+    return "\n".join(res)
+class GraderQA:
+    def __init__(self, grader, embeddings):
+        self.grader = grader
+        self.llm = self.grader.llm
+        self.index_file = "vector_stores/canvas-discussions.faiss"
+        self.pickle_file = "vector_stores/canvas-discussions.pkl"
+        self.rubric_text = grader.rubric_text
+        self.search_index = self.get_search_index(embeddings)
+        self.chain = self.create_chain(embeddings)
+        self.tokens = None
+        self.question = None
+    def get_search_index(self, embeddings):
+        if os.path.isfile(self.pickle_file) and os.path.isfile(self.index_file) and os.path.getsize(
+                self.pickle_file) > 0:
+            # Load index from pickle file
+            search_index = self.load_index(embeddings)
+        else:
+            search_index = self.create_index(embeddings)
+            print("Created index")
+        return search_index
+    def load_index(self, embeddings):
+        # Load index
+        db = FAISS.load_local(
+            folder_path="vector_stores/",
+            index_name="canvas-discussions", embeddings=embeddings,
+        )
+        print("Loaded index")
+        return db
+    def create_index(self, embeddings):
+        source_chunks = self.create_chunk_documents()
+        search_index = search_index_from_docs(source_chunks, embeddings)
+        FAISS.save_local(search_index, folder_path="vector_stores/", index_name="canvas-discussions")
+        return search_index
+    def create_chunk_documents(self):
+        sources = self.fetch_data_for_embeddings()
+        splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
+        source_chunks = splitter.split_documents(sources)
+        print("chunks: " + str(len(source_chunks)))
+        print("sources: " + str(len(sources)))
+        return source_chunks
+    def fetch_data_for_embeddings(self):
+        document_list = self.get_csv_files()
+        print("document list: " + str(len(document_list)))
+        return document_list
+    def get_csv_files(self):
+        loader = CSVLoader(file_path=self.grader.csv, source_column="student_name")
+        document_list = loader.load()
+        return document_list
+    def create_chain(self, embeddings):
+        if not self.search_index:
+            self.search_index = self.load_index(embeddings)
+        question_prompt, combine_prompt = self.create_map_reduce_prompt()
+        # create agent, 1 chain for summary based question, 2nd chain for semantic retrieval based question
+        chain = ConversationalRetrievalChain.from_llm(llm=self.llm, chain_type='map_reduce',
+                                                      retriever=self.search_index.as_retriever(search_type='mmr',
+                                                                                               search_kwargs={
+                                                                                                   'lambda_mult': 1,
+                                                                                                   'fetch_k': 50,
+                                                                                                   'k': 30}),
+                                                      return_source_documents=True,
+                                                      verbose=True,
+                                                      memory=ConversationBufferMemory(memory_key='chat_history',
+                                                                                      return_messages=True,
+                                                                                      output_key='answer'),
+                                                      condense_question_llm=ChatOpenAI(temperature=0,
+                                                                                       model='gpt-3.5-turbo'),
+                                                      combine_docs_chain_kwargs={"question_prompt": question_prompt,
+                                                                                 "combine_prompt": combine_prompt})
+        return chain
+    def create_map_reduce_prompt(self):
+        system_template = f"""Use the following portion of a long grading results document to answer the question BUT ONLY FOR THE STUDENT MENTIONED. Use the following examples to take guidance on how to answer the question.
+        Examples:
+        Question: How many students participated in the discussion?
+        Answer: This student participated in the discussion./This student did not participate in the discussion.
+        Question: What was the average score for the discussion?
+        Answer: This student received a score of 10/10 for the discussion.
+        Question: How many students received a full score?/How many students did not receive a full score?
+        Answer: This student received a full score./This student did not receive a full score.
+        Question: How many students lost marks in X category of the rubric?
+        Answer: This student lost marks in X category of the rubric./This student did not lose marks in X category of the rubric.
+        Question: Give me 3 best responses received for the discussion.
+        Answer: This student gave the following responses for the discussion and received a score of 10/10.
+        ______________________
+        Grading Result For:
+        {{context}}
+        ______________________
+        Following are the instructions and rubric of the discussion post for reference, used to grade the discussion.
+        ----------------
+        Instructions and Rubric:
+        {self.rubric_text}
+        """
+        messages = [
+            SystemMessagePromptTemplate.from_template(system_template),
+            HumanMessagePromptTemplate.from_template("{question}"),
+        ]
+        CHAT_QUESTION_PROMPT = ChatPromptTemplate.from_messages(messages)
+        system_template = """You are Canvas Discussions Grading + Feedback QA Bot. Have a conversation with a human, answering the questions about the grading results, feedback, answers as accurately as possible.
+        Use the following answers for each student to answer the users question as accurately as possible.
+        You are an expert at basic calculations and answering questions on grading results and can answer the following questions with ease.
+        If you don't know the answer, just say that you don't know. Don't try to make up an answer.
+        ______________________
+        {summaries}"""
+        messages = [
+            SystemMessagePromptTemplate.from_template(system_template),
+            HumanMessagePromptTemplate.from_template("{question}"),
+        ]
+        CHAT_COMBINE_PROMPT = ChatPromptTemplate.from_messages(messages)
+        return CHAT_QUESTION_PROMPT, CHAT_COMBINE_PROMPT
+    def create_prompt(self):
+        system_template = f"""You are Canvas Discussions Grading + Feedback QA Bot. Have a conversation with a human, answering the questions about the grading results, feedback, answers as accurately as possible.
+        You are a grading assistant who graded the canvas discussions to create the following grading results and feedback.
+        Use the following instruction, rubric of the discussion which were used to grade the discussions and refine the answer if needed.
+        ----------------
+        {self.rubric_text}
+        ----------------
+        Use the following pieces of the grading results, score, feedback and summary of student responses to answer the users question as accurately as possible.
+        {{context}}"""
+        messages = [
+            SystemMessagePromptTemplate.from_template(system_template),
+            HumanMessagePromptTemplate.from_template("{question}"),
+        ]
+        return ChatPromptTemplate.from_messages(messages)
+    def get_tokens(self):
+        total_tokens = 0
+        for doc in self.docs:
+            chat_prompt = self.prompt.format(context=doc, question=self.question)
+            num_tokens = self.llm.get_num_tokens(chat_prompt)
+            total_tokens += num_tokens
+            # summary = self.llm(summary_prompt)
+            # print (f"Summary: {summary.strip()}")
+            # print ("\n")
+        return total_tokens
+    def run_qa_chain(self, question):
+        self.question = question
+        self.get_tokens()
+        answer = self.chain(question)
+        return answer
+# system_template = """You are Canvas Discussions Grading + Feedback QA Bot. Have a conversation with a human, answering the following questions as best you can.
+# You are a grading assistant who graded the canvas discussions to create the following grading results and feedback. Use the following pieces of the grading results and feedback to answer the users question.
+# Use the following pieces of context to answer the users question.
+# ----------------
+# {context}"""
+#
+# messages = [
+#     SystemMessagePromptTemplate.from_template(system_template),
+#     HumanMessagePromptTemplate.from_template("{question}"),
+# ]
+# CHAT_PROMPT = ChatPromptTemplate.from_messages(messages)
+#
+#
+# def get_search_index(embeddings):
+#     global vectorstore_index
+#     if os.path.isfile(pickle_file) and os.path.isfile(index_file) and os.path.getsize(pickle_file) > 0:
+#         # Load index from pickle file
+#         search_index = load_index(embeddings)
+#     else:
+#         search_index = create_index(model)
+#         print("Created index")
+#
+#     vectorstore_index = search_index
+#     return search_index
+#
+#
+# def create_index(embeddings):
+#     source_chunks = create_chunk_documents()
+#     search_index = search_index_from_docs(source_chunks, embeddings)
+#     # search_index.persist()
+#     FAISS.save_local(search_index, folder_path="vector_stores/", index_name="canvas-discussions")
+#     # Save index to pickle file
+#     # with open(pickle_file, "wb") as f:
+#     #     pickle.dump(search_index, f)
+#     return search_index
+#
+#
+# def search_index_from_docs(source_chunks, embeddings):
+#     # print("source chunks: " + str(len(source_chunks)))
+#     # print("embeddings: " + str(embeddings))
+#     search_index = FAISS.from_documents(source_chunks, embeddings)
+#     return search_index
+#
+#
+# def get_html_files():
+#     loader = DirectoryLoader('docs', glob="**/*.html", loader_cls=UnstructuredHTMLLoader, recursive=True)
+#     document_list = loader.load()
+#     for document in document_list:
+#         document.metadata["name"] = document.metadata["source"].split("/")[-1].split(".")[0]
+#     return document_list
+#
+#
+# def get_text_files():
+#     loader = DirectoryLoader('docs', glob="**/*.txt", loader_cls=TextLoader, recursive=True)
+#     document_list = loader.load()
+#     return document_list
+#
+#
+# def create_chunk_documents():
+#     sources = fetch_data_for_embeddings()
+#
+#     splitter = RecursiveCharacterTextSplitter.from_language(
+#         language=Language.HTML, chunk_size=500, chunk_overlap=0
+#     )
+#
+#     source_chunks = splitter.split_documents(sources)
+#
+#     print("chunks: " + str(len(source_chunks)))
+#     print("sources: " + str(len(sources)))
+#
+#     return source_chunks
+#
+#
+# def create_chain(question, llm, embeddings):
+#     db = load_index(embeddings)
+#
+#     # Create chain
+#     chain = ConversationalRetrievalChain.from_llm(llm, db.as_retriever(search_type='mmr',
+#                                                                        search_kwargs={'lambda_mult': 1, 'fetch_k': 50,
+#                                                                                       'k': 30}),
+#                                                   return_source_documents=True,
+#                                                   verbose=True,
+#                                                   memory=ConversationSummaryBufferMemory(memory_key='chat_history',
+#                                                                                          llm=llm, max_token_limit=40,
+#                                                                                          return_messages=True,
+#                                                                                          output_key='answer'),
+#                                                   get_chat_history=get_chat_history,
+#                                                   combine_docs_chain_kwargs={"prompt": CHAT_PROMPT})
+#
+#     result = chain({"question": question})
+#
+#     sources = []
+#     print(result)
+#
+#     for document in result['source_documents']:
+#         sources.append("\n" + str(document.metadata))
+#         print(sources)
+#
+#     source = ',\n'.join(set(sources))
+#     return result['answer'] + '\nSOURCES: ' + source
+#
+#
+# def load_index(embeddings):
+#     # Load index
+#     db = FAISS.load_local(
+#         folder_path="vector_stores/",
+#         index_name="canvas-discussions", embeddings=embeddings,
+#     )
+#     return db
+#
+#
+# def get_chat_history(inputs) -> str:
+#     res = []
+#     for human, ai in inputs:
+#         res.append(f"Human:{human}\nAI:{ai}")
+#     return "\n".join(res)

utils.py CHANGED Viewed

@@ -1,298 +1,14 @@
 import os
-from langchain import FAISS
-from langchain.chains import ConversationalRetrievalChain
-from langchain.chat_models import ChatOpenAI
-from langchain.document_loaders import CSVLoader
-from langchain.memory import ConversationBufferMemory
-from langchain.prompts import ChatPromptTemplate, HumanMessagePromptTemplate, SystemMessagePromptTemplate
-from langchain.text_splitter import RecursiveCharacterTextSplitter
-def search_index_from_docs(source_chunks, embeddings):
-    # print("source chunks: " + str(len(source_chunks)))
-    # print("embeddings: " + str(embeddings))
-    search_index = FAISS.from_documents(source_chunks, embeddings)
-    return search_index
-def get_chat_history(inputs) -> str:
-    res = []
-    for human, ai in inputs:
-        res.append(f"Human:{human}\nAI:{ai}")
-    return "\n".join(res)
-class GraderQA:
-    def __init__(self, grader, embeddings):
-        self.grader = grader
-        self.llm = self.grader.llm
-        self.index_file = "vector_stores/canvas-discussions.faiss"
-        self.pickle_file = "vector_stores/canvas-discussions.pkl"
-        self.rubric_text = grader.rubric_text
-        self.search_index = self.get_search_index(embeddings)
-        self.chain = self.create_chain(embeddings)
-        self.tokens = None
-        self.question = None
-    def get_search_index(self, embeddings):
-        if os.path.isfile(self.pickle_file) and os.path.isfile(self.index_file) and os.path.getsize(
-                self.pickle_file) > 0:
-            # Load index from pickle file
-            search_index = self.load_index(embeddings)
-        else:
-            search_index = self.create_index(embeddings)
-            print("Created index")
-        return search_index
-    def load_index(self, embeddings):
-        # Load index
-        db = FAISS.load_local(
-            folder_path="vector_stores/",
-            index_name="canvas-discussions", embeddings=embeddings,
-        )
-        print("Loaded index")
-        return db
-    def create_index(self, embeddings):
-        source_chunks = self.create_chunk_documents()
-        search_index = search_index_from_docs(source_chunks, embeddings)
-        FAISS.save_local(search_index, folder_path="vector_stores/", index_name="canvas-discussions")
-        return search_index
-    def create_chunk_documents(self):
-        sources = self.fetch_data_for_embeddings()
-        splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
-        source_chunks = splitter.split_documents(sources)
-        print("chunks: " + str(len(source_chunks)))
-        print("sources: " + str(len(sources)))
-        return source_chunks
-    def fetch_data_for_embeddings(self):
-        document_list = self.get_csv_files()
-        print("document list: " + str(len(document_list)))
-        return document_list
-    def get_csv_files(self):
-        loader = CSVLoader(file_path=self.grader.csv, source_column="student_name")
-        document_list = loader.load()
-        return document_list
-    def create_chain(self, embeddings):
-        if not self.search_index:
-            self.search_index = self.load_index(embeddings)
-        question_prompt, combine_prompt = self.create_map_reduce_prompt()
-        chain = ConversationalRetrievalChain.from_llm(llm=self.llm, chain_type='map_reduce',
-                                                      retriever=self.search_index.as_retriever(search_type='mmr',
-                                                                                               search_kwargs={
-                                                                                                   'lambda_mult': 1,
-                                                                                                   'fetch_k': 50,
-                                                                                                   'k': 30}),
-                                                      return_source_documents=True,
-                                                      verbose=True,
-                                                      memory=ConversationBufferMemory(memory_key='chat_history',
-                                                                                      return_messages=True,
-                                                                                      output_key='answer'),
-                                                      condense_question_llm=ChatOpenAI(temperature=0,
-                                                                                       model='gpt-3.5-turbo'),
-                                                      combine_docs_chain_kwargs={"question_prompt": question_prompt,
-                                                                                 "combine_prompt": combine_prompt})
-        return chain
-    def create_map_reduce_prompt(self):
-        system_template = f"""Use the following portion of a long grading results document to answer the question BUT ONLY FOR THE STUDENT MENTIONED. Use the following examples to take guidance on how to answer the question.
-        Examples:
-        Question: How many students participated in the discussion?
-        Answer: This student participated in the discussion./This student did not participate in the discussion.
-        Question: What was the average score for the discussion?
-        Answer: This student received a score of 10/10 for the discussion.
-        Question: How many students received a full score?/How many students did not receive a full score?
-        Answer: This student received a full score./This student did not receive a full score.
-        Question: How many students lost marks in X category of the rubric?
-        Answer: This student lost marks in X category of the rubric./This student did not lose marks in X category of the rubric.
-        ______________________
-        Grading Result For:
-        {{context}}
-        ______________________
-        Following are the instructions and rubric of the discussion post for reference, used to grade the discussion.
-        ----------------
-        Instructions and Rubric:
-        {self.rubric_text}
-        """
-        messages = [
-            SystemMessagePromptTemplate.from_template(system_template),
-            HumanMessagePromptTemplate.from_template("{question}"),
-        ]
-        CHAT_QUESTION_PROMPT = ChatPromptTemplate.from_messages(messages)
-        system_template = """You are Canvas Discussions Grading + Feedback QA Bot. Have a conversation with a human, answering the questions about the grading results, feedback, answers as accurately as possible.
-        Use the following answers for each student to answer the users question as accurately as possible.
-        You are an expert at basic calculations and answering questions on grading results and can answer the following questions with ease.
-        If you don't know the answer, just say that you don't know. Don't try to make up an answer.
-        ______________________
-        {summaries}"""
-        messages = [
-            SystemMessagePromptTemplate.from_template(system_template),
-            HumanMessagePromptTemplate.from_template("{question}"),
-        ]
-        CHAT_COMBINE_PROMPT = ChatPromptTemplate.from_messages(messages)
-        return CHAT_QUESTION_PROMPT, CHAT_COMBINE_PROMPT
-    def create_prompt(self):
-        system_template = f"""You are Canvas Discussions Grading + Feedback QA Bot. Have a conversation with a human, answering the questions about the grading results, feedback, answers as accurately as possible.
-        You are a grading assistant who graded the canvas discussions to create the following grading results and feedback.
-        Use the following instruction, rubric of the discussion which were used to grade the discussions and refine the answer if needed.
-        ----------------
-        {self.rubric_text}
-        ----------------
-        Use the following pieces of the grading results, score, feedback and summary of student responses to answer the users question as accurately as possible.
-        {{context}}"""
-        messages = [
-            SystemMessagePromptTemplate.from_template(system_template),
-            HumanMessagePromptTemplate.from_template("{question}"),
-        ]
-        return ChatPromptTemplate.from_messages(messages)
-    def get_tokens(self):
-        total_tokens = 0
-        for doc in self.docs:
-            chat_prompt = self.prompt.format(context=doc, question=self.question)
-            num_tokens = self.llm.get_num_tokens(chat_prompt)
-            total_tokens += num_tokens
-            # summary = self.llm(summary_prompt)
-            # print (f"Summary: {summary.strip()}")
-            # print ("\n")
-        return total_tokens
-    def run_qa_chain(self, question):
-        self.question = question
-        self.get_tokens()
-        answer = self.chain(question)
-        return answer
-# system_template = """You are Canvas Discussions Grading + Feedback QA Bot. Have a conversation with a human, answering the following questions as best you can.
-# You are a grading assistant who graded the canvas discussions to create the following grading results and feedback. Use the following pieces of the grading results and feedback to answer the users question.
-# Use the following pieces of context to answer the users question.
-# ----------------
-# {context}"""
-#
-# messages = [
-#     SystemMessagePromptTemplate.from_template(system_template),
-#     HumanMessagePromptTemplate.from_template("{question}"),
-# ]
-# CHAT_PROMPT = ChatPromptTemplate.from_messages(messages)
-#
-#
-# def get_search_index(embeddings):
-#     global vectorstore_index
-#     if os.path.isfile(pickle_file) and os.path.isfile(index_file) and os.path.getsize(pickle_file) > 0:
-#         # Load index from pickle file
-#         search_index = load_index(embeddings)
-#     else:
-#         search_index = create_index(model)
-#         print("Created index")
-#
-#     vectorstore_index = search_index
-#     return search_index
-#
-#
-# def create_index(embeddings):
-#     source_chunks = create_chunk_documents()
-#     search_index = search_index_from_docs(source_chunks, embeddings)
-#     # search_index.persist()
-#     FAISS.save_local(search_index, folder_path="vector_stores/", index_name="canvas-discussions")
-#     # Save index to pickle file
-#     # with open(pickle_file, "wb") as f:
-#     #     pickle.dump(search_index, f)
-#     return search_index
-#
-#
-# def search_index_from_docs(source_chunks, embeddings):
-#     # print("source chunks: " + str(len(source_chunks)))
-#     # print("embeddings: " + str(embeddings))
-#     search_index = FAISS.from_documents(source_chunks, embeddings)
-#     return search_index
-#
-#
-# def get_html_files():
-#     loader = DirectoryLoader('docs', glob="**/*.html", loader_cls=UnstructuredHTMLLoader, recursive=True)
-#     document_list = loader.load()
-#     for document in document_list:
-#         document.metadata["name"] = document.metadata["source"].split("/")[-1].split(".")[0]
-#     return document_list
-#
-#
-# def get_text_files():
-#     loader = DirectoryLoader('docs', glob="**/*.txt", loader_cls=TextLoader, recursive=True)
-#     document_list = loader.load()
-#     return document_list
-#
-#
-# def create_chunk_documents():
-#     sources = fetch_data_for_embeddings()
-#
-#     splitter = RecursiveCharacterTextSplitter.from_language(
-#         language=Language.HTML, chunk_size=500, chunk_overlap=0
-#     )
-#
-#     source_chunks = splitter.split_documents(sources)
-#
-#     print("chunks: " + str(len(source_chunks)))
-#     print("sources: " + str(len(sources)))
-#
-#     return source_chunks
-#
-#
-# def create_chain(question, llm, embeddings):
-#     db = load_index(embeddings)
-#
-#     # Create chain
-#     chain = ConversationalRetrievalChain.from_llm(llm, db.as_retriever(search_type='mmr',
-#                                                                        search_kwargs={'lambda_mult': 1, 'fetch_k': 50,
-#                                                                                       'k': 30}),
-#                                                   return_source_documents=True,
-#                                                   verbose=True,
-#                                                   memory=ConversationSummaryBufferMemory(memory_key='chat_history',
-#                                                                                          llm=llm, max_token_limit=40,
-#                                                                                          return_messages=True,
-#                                                                                          output_key='answer'),
-#                                                   get_chat_history=get_chat_history,
-#                                                   combine_docs_chain_kwargs={"prompt": CHAT_PROMPT})
-#
-#     result = chain({"question": question})
-#
-#     sources = []
-#     print(result)
-#
-#     for document in result['source_documents']:
-#         sources.append("\n" + str(document.metadata))
-#         print(sources)
-#
-#     source = ',\n'.join(set(sources))
-#     return result['answer'] + '\nSOURCES: ' + source
-#
-#
-# def load_index(embeddings):
-#     # Load index
-#     db = FAISS.load_local(
-#         folder_path="vector_stores/",
-#         index_name="canvas-discussions", embeddings=embeddings,
-#     )
-#     return db
-#
-#
-# def get_chat_history(inputs) -> str:
-#     res = []
-#     for human, ai in inputs:
-#         res.append(f"Human:{human}\nAI:{ai}")
-#     return "\n".join(res)

 import os
+import shutil
+import time
+def reset_folder(destination):
+    # synchrnously and recursively delete the destination folder and all its contents, donot return until done
+    if os.path.isdir(destination):
+        shutil.rmtree(destination)
+        while os.path.isdir(destination):
+            time.sleep(4)
+    os.mkdir(destination)
+    while not os.path.isdir(destination):
+        time.sleep(4)