chat-your-data-MED

Sleeping

App Files Files Community

danielcwq commited on Mar 12, 2023

Commit

6772051

•

0 Parent(s):

Duplicate from danielcwq/chat-your-data-trial

Browse files

Files changed (8) hide show

.gitattributes +34 -0
README.md +14 -0
app.py +102 -0
cli_app.py +17 -0
ingest_data.py +23 -0
query_data.py +34 -0
requirements.txt +5 -0
vectorstore.pkl +3 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,34 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1,14 @@

+---
+title: Chat Your Data H2 Economics
+emoji: 📊
+colorFrom: gray
+colorTo: purple
+sdk: gradio
+sdk_version: 3.17.0
+app_file: app.py
+pinned: false
+license: mit
+duplicated_from: danielcwq/chat-your-data-trial
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,102 @@

+import os
+from typing import Optional, Tuple
+import gradio as gr
+import pickle
+from query_data import get_chain
+from threading import Lock
+with open("vectorstore.pkl", "rb") as f:
+    vectorstore = pickle.load(f)
+def set_openai_api_key(api_key: str):
+    """Set the api key and return chain.
+    If no api_key, then None is returned.
+    """
+    if api_key:
+        os.environ["OPENAI_API_KEY"] = api_key
+        chain = get_chain(vectorstore)
+        os.environ["OPENAI_API_KEY"] = ""
+        return chain
+class ChatWrapper:
+    def __init__(self):
+        self.lock = Lock()
+    def __call__(
+        self, api_key: str, inp: str, history: Optional[Tuple[str, str]], chain
+    ):
+        """Execute the chat functionality."""
+        self.lock.acquire()
+        try:
+            history = history or []
+            # If chain is None, that is because no API key was provided.
+            if chain is None:
+                history.append((inp, "Please paste your OpenAI key to use"))
+                return history, history
+            # Set OpenAI key
+            import openai
+            openai.api_key = api_key
+            # Run chain and append input.
+            output = chain({"question": inp, "chat_history": history})["answer"]
+            history.append((inp, output))
+        except Exception as e:
+            raise e
+        finally:
+            self.lock.release()
+        return history, history
+chat = ChatWrapper()
+block = gr.Blocks(css=".gradio-container {background-color: lightgray}")
+with block:
+    with gr.Row():
+        gr.Markdown("<h3><center>Chat-Your-Data (H2 Economics)</center></h3>")
+        openai_api_key_textbox = gr.Textbox(
+            placeholder="Paste your OpenAI API key (sk-...)",
+            show_label=False,
+            lines=1,
+            type="password",
+        )
+    chatbot = gr.Chatbot()
+    with gr.Row():
+        message = gr.Textbox(
+            label="What's your question?",
+            placeholder="Ask questions about anything covered in the H2 Economics syllabus",
+            lines=1,
+        )
+        submit = gr.Button(value="Send", variant="secondary").style(full_width=False)
+    gr.Examples(
+        examples=[
+            "Explain real wealth effect.",
+            "Use the real wealth effect to explain the negative gradient of the AD curve.",
+            "Explain the multiplier process.",
+        ],
+        inputs=message,
+    )
+    gr.HTML("Demo application of a LangChain chain, built on H2 Economics Data. Many thanks to Jean Chua for giving her notes for this project.")
+    gr.HTML(
+        "<center>Powered by <a href='https://github.com/hwchase17/langchain'>LangChain 🦜️🔗</a></center>"
+    )
+    state = gr.State()
+    agent_state = gr.State()
+    submit.click(chat, inputs=[openai_api_key_textbox, message, state, agent_state], outputs=[chatbot, state])
+    message.submit(chat, inputs=[openai_api_key_textbox, message, state, agent_state], outputs=[chatbot, state])
+    openai_api_key_textbox.change(
+        set_openai_api_key,
+        inputs=[openai_api_key_textbox],
+        outputs=[agent_state],
+    )
+block.launch(debug=True)

cli_app.py ADDED Viewed

	@@ -0,0 +1,17 @@

+import pickle
+from query_data import get_chain
+if __name__ == "__main__":
+    with open("vectorstore.pkl", "rb") as f:
+        vectorstore = pickle.load(f)
+    qa_chain = get_chain(vectorstore)
+    chat_history = []
+    print("Chat with your docs!")
+    while True:
+        print("Human:")
+        question = input()
+        result = qa_chain({"question": question, "chat_history": chat_history})
+        chat_history.append((question, result["answer"]))
+        print("AI:")
+        print(result["answer"])

ingest_data.py ADDED Viewed

	@@ -0,0 +1,23 @@

+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.document_loaders import UnstructuredFileLoader
+from langchain.vectorstores.faiss import FAISS
+from langchain.embeddings import OpenAIEmbeddings
+import pickle
+# Load Data
+loader = UnstructuredFileLoader("state_of_the_union.txt")
+raw_documents = loader.load()
+# Split text
+text_splitter = RecursiveCharacterTextSplitter()
+documents = text_splitter.split_documents(raw_documents)
+# Load Data to vectorstore
+embeddings = OpenAIEmbeddings()
+vectorstore = FAISS.from_documents(documents, embeddings)
+# Save vectorstore
+with open("vectorstore.pkl", "wb") as f:
+    pickle.dump(vectorstore, f)

query_data.py ADDED Viewed

	@@ -0,0 +1,34 @@

+from langchain.prompts.prompt import PromptTemplate
+from langchain.llms import OpenAI
+from langchain.chains import ChatVectorDBChain
+_template = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question.
+You can assume the question about the syllabus of the H2 Economics A-Level Examination in Singapore.
+Chat History:
+{chat_history}
+Follow Up Input: {question}
+Standalone question:"""
+CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)
+template = """You are an AI assistant for answering questions about economics for the H2 Economics A-Levels.
+You are given the following extracted parts of a long document and a question. Provide a conversational answer.
+If you don't know the answer, just say "Hmm, I'm not sure." Don't try to make up an answer.
+If the question is not about H2 Economics, politely inform them that you are tuned to only answer questions about it.
+Question: {question}
+=========
+{context}
+=========
+Answer in Markdown:"""
+QA_PROMPT = PromptTemplate(template=template, input_variables=["question", "context"])
+def get_chain(vectorstore):
+    llm = OpenAI(temperature=0)
+    qa_chain = ChatVectorDBChain.from_llm(
+        llm,
+        vectorstore,
+        qa_prompt=QA_PROMPT,
+        condense_question_prompt=CONDENSE_QUESTION_PROMPT,
+    )
+    return qa_chain

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+langchain
+openai
+unstructured
+faiss-cpu
+gradio

vectorstore.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3dbc3a6b561a37e67ed7afd7827808b219706f3daf4d280b2894e9116c43a994
+size 1896857