danielcwq commited on
Commit
6772051
0 Parent(s):

Duplicate from danielcwq/chat-your-data-trial

Browse files
Files changed (8) hide show
  1. .gitattributes +34 -0
  2. README.md +14 -0
  3. app.py +102 -0
  4. cli_app.py +17 -0
  5. ingest_data.py +23 -0
  6. query_data.py +34 -0
  7. requirements.txt +5 -0
  8. vectorstore.pkl +3 -0
.gitattributes ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tflite filter=lfs diff=lfs merge=lfs -text
29
+ *.tgz filter=lfs diff=lfs merge=lfs -text
30
+ *.wasm filter=lfs diff=lfs merge=lfs -text
31
+ *.xz filter=lfs diff=lfs merge=lfs -text
32
+ *.zip filter=lfs diff=lfs merge=lfs -text
33
+ *.zst filter=lfs diff=lfs merge=lfs -text
34
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Chat Your Data H2 Economics
3
+ emoji: 📊
4
+ colorFrom: gray
5
+ colorTo: purple
6
+ sdk: gradio
7
+ sdk_version: 3.17.0
8
+ app_file: app.py
9
+ pinned: false
10
+ license: mit
11
+ duplicated_from: danielcwq/chat-your-data-trial
12
+ ---
13
+
14
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from typing import Optional, Tuple
3
+
4
+ import gradio as gr
5
+ import pickle
6
+ from query_data import get_chain
7
+ from threading import Lock
8
+
9
+ with open("vectorstore.pkl", "rb") as f:
10
+ vectorstore = pickle.load(f)
11
+
12
+
13
+ def set_openai_api_key(api_key: str):
14
+ """Set the api key and return chain.
15
+ If no api_key, then None is returned.
16
+ """
17
+ if api_key:
18
+ os.environ["OPENAI_API_KEY"] = api_key
19
+ chain = get_chain(vectorstore)
20
+ os.environ["OPENAI_API_KEY"] = ""
21
+ return chain
22
+
23
+ class ChatWrapper:
24
+
25
+ def __init__(self):
26
+ self.lock = Lock()
27
+ def __call__(
28
+ self, api_key: str, inp: str, history: Optional[Tuple[str, str]], chain
29
+ ):
30
+ """Execute the chat functionality."""
31
+ self.lock.acquire()
32
+ try:
33
+ history = history or []
34
+ # If chain is None, that is because no API key was provided.
35
+ if chain is None:
36
+ history.append((inp, "Please paste your OpenAI key to use"))
37
+ return history, history
38
+ # Set OpenAI key
39
+ import openai
40
+ openai.api_key = api_key
41
+ # Run chain and append input.
42
+ output = chain({"question": inp, "chat_history": history})["answer"]
43
+ history.append((inp, output))
44
+ except Exception as e:
45
+ raise e
46
+ finally:
47
+ self.lock.release()
48
+ return history, history
49
+
50
+ chat = ChatWrapper()
51
+
52
+ block = gr.Blocks(css=".gradio-container {background-color: lightgray}")
53
+
54
+ with block:
55
+ with gr.Row():
56
+ gr.Markdown("<h3><center>Chat-Your-Data (H2 Economics)</center></h3>")
57
+
58
+ openai_api_key_textbox = gr.Textbox(
59
+ placeholder="Paste your OpenAI API key (sk-...)",
60
+ show_label=False,
61
+ lines=1,
62
+ type="password",
63
+ )
64
+
65
+ chatbot = gr.Chatbot()
66
+
67
+ with gr.Row():
68
+ message = gr.Textbox(
69
+ label="What's your question?",
70
+ placeholder="Ask questions about anything covered in the H2 Economics syllabus",
71
+ lines=1,
72
+ )
73
+ submit = gr.Button(value="Send", variant="secondary").style(full_width=False)
74
+
75
+ gr.Examples(
76
+ examples=[
77
+ "Explain real wealth effect.",
78
+ "Use the real wealth effect to explain the negative gradient of the AD curve.",
79
+ "Explain the multiplier process.",
80
+ ],
81
+ inputs=message,
82
+ )
83
+
84
+ gr.HTML("Demo application of a LangChain chain, built on H2 Economics Data. Many thanks to Jean Chua for giving her notes for this project.")
85
+
86
+ gr.HTML(
87
+ "<center>Powered by <a href='https://github.com/hwchase17/langchain'>LangChain 🦜️🔗</a></center>"
88
+ )
89
+
90
+ state = gr.State()
91
+ agent_state = gr.State()
92
+
93
+ submit.click(chat, inputs=[openai_api_key_textbox, message, state, agent_state], outputs=[chatbot, state])
94
+ message.submit(chat, inputs=[openai_api_key_textbox, message, state, agent_state], outputs=[chatbot, state])
95
+
96
+ openai_api_key_textbox.change(
97
+ set_openai_api_key,
98
+ inputs=[openai_api_key_textbox],
99
+ outputs=[agent_state],
100
+ )
101
+
102
+ block.launch(debug=True)
cli_app.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pickle
2
+ from query_data import get_chain
3
+
4
+
5
+ if __name__ == "__main__":
6
+ with open("vectorstore.pkl", "rb") as f:
7
+ vectorstore = pickle.load(f)
8
+ qa_chain = get_chain(vectorstore)
9
+ chat_history = []
10
+ print("Chat with your docs!")
11
+ while True:
12
+ print("Human:")
13
+ question = input()
14
+ result = qa_chain({"question": question, "chat_history": chat_history})
15
+ chat_history.append((question, result["answer"]))
16
+ print("AI:")
17
+ print(result["answer"])
ingest_data.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
2
+ from langchain.document_loaders import UnstructuredFileLoader
3
+ from langchain.vectorstores.faiss import FAISS
4
+ from langchain.embeddings import OpenAIEmbeddings
5
+ import pickle
6
+
7
+ # Load Data
8
+ loader = UnstructuredFileLoader("state_of_the_union.txt")
9
+ raw_documents = loader.load()
10
+
11
+ # Split text
12
+ text_splitter = RecursiveCharacterTextSplitter()
13
+ documents = text_splitter.split_documents(raw_documents)
14
+
15
+
16
+ # Load Data to vectorstore
17
+ embeddings = OpenAIEmbeddings()
18
+ vectorstore = FAISS.from_documents(documents, embeddings)
19
+
20
+
21
+ # Save vectorstore
22
+ with open("vectorstore.pkl", "wb") as f:
23
+ pickle.dump(vectorstore, f)
query_data.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.prompts.prompt import PromptTemplate
2
+ from langchain.llms import OpenAI
3
+ from langchain.chains import ChatVectorDBChain
4
+
5
+ _template = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question.
6
+ You can assume the question about the syllabus of the H2 Economics A-Level Examination in Singapore.
7
+
8
+ Chat History:
9
+ {chat_history}
10
+ Follow Up Input: {question}
11
+ Standalone question:"""
12
+ CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)
13
+
14
+ template = """You are an AI assistant for answering questions about economics for the H2 Economics A-Levels.
15
+ You are given the following extracted parts of a long document and a question. Provide a conversational answer.
16
+ If you don't know the answer, just say "Hmm, I'm not sure." Don't try to make up an answer.
17
+ If the question is not about H2 Economics, politely inform them that you are tuned to only answer questions about it.
18
+ Question: {question}
19
+ =========
20
+ {context}
21
+ =========
22
+ Answer in Markdown:"""
23
+ QA_PROMPT = PromptTemplate(template=template, input_variables=["question", "context"])
24
+
25
+
26
+ def get_chain(vectorstore):
27
+ llm = OpenAI(temperature=0)
28
+ qa_chain = ChatVectorDBChain.from_llm(
29
+ llm,
30
+ vectorstore,
31
+ qa_prompt=QA_PROMPT,
32
+ condense_question_prompt=CONDENSE_QUESTION_PROMPT,
33
+ )
34
+ return qa_chain
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ langchain
2
+ openai
3
+ unstructured
4
+ faiss-cpu
5
+ gradio
vectorstore.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3dbc3a6b561a37e67ed7afd7827808b219706f3daf4d280b2894e9116c43a994
3
+ size 1896857