andreasmartin commited on
Commit
4d4c181
β€’
1 Parent(s): c769598

Add files via upload

Browse files
Files changed (6) hide show
  1. Procfile +1 -0
  2. app.json +14 -0
  3. app.py +190 -0
  4. chainlit.md +7 -0
  5. requirements.txt +5 -0
  6. runtime.txt +1 -0
Procfile ADDED
@@ -0,0 +1 @@
 
 
1
+ web: chainlit run app.py --port $PORT --no-cache
app.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "ChEdBot",
3
+ "image": "heroku/python",
4
+ "env": {
5
+ "DIALOGUE_SHEET": {},
6
+ "PROMPT_ENGINEERING_SHEET": {},
7
+ "PERSONA_SHEET": {},
8
+ "AGENT": {},
9
+ "OPENAI_API_KEY": {},
10
+ "OPENAI_API_TYPE": {},
11
+ "OPENAI_API_BASE": {},
12
+ "OPENAI_API_VERSION": {}
13
+ }
14
+ }
app.py ADDED
@@ -0,0 +1,190 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import pandas as pd
3
+ import chainlit as cl
4
+ from chainlit import user_session
5
+ from chainlit.types import LLMSettings
6
+ from langchain import LLMChain
7
+ from langchain.prompts import PromptTemplate
8
+ from langchain.llms import AzureOpenAI
9
+ from langchain.document_loaders import DataFrameLoader
10
+ from langchain.embeddings import HuggingFaceEmbeddings
11
+ from langchain.memory import ConversationBufferWindowMemory
12
+ from langchain.vectorstores import Chroma
13
+ from langchain.vectorstores.base import VectorStoreRetriever
14
+
15
+
16
+ current_agent = os.environ["AGENT"]
17
+
18
+
19
+ def load_dialogues():
20
+ df = pd.read_excel(os.environ["DIALOGUE_SHEET"], header=0, keep_default_na=False)
21
+ df = df[df["Agent"] == current_agent]
22
+ return df.astype(str)
23
+
24
+
25
+ def load_persona():
26
+ df = pd.read_excel(os.environ["PERSONA_SHEET"], header=0, keep_default_na=False)
27
+ df = df[df["Agent"] == current_agent]
28
+ return df.astype(str)
29
+
30
+
31
+ def load_prompt_engineering():
32
+ df = pd.read_excel(
33
+ os.environ["PROMPT_ENGINEERING_SHEET"], header=0, keep_default_na=False
34
+ )
35
+ df = df[df["Agent"] == current_agent]
36
+ return df.astype(str)
37
+
38
+
39
+ def load_documents(df, page_content_column: str):
40
+ return DataFrameLoader(df, page_content_column).load()
41
+
42
+
43
+ def init_embedding_function():
44
+ EMBEDDING_MODEL_FOLDER = ".embedding-model"
45
+ return HuggingFaceEmbeddings(
46
+ model_name="sentence-transformers/all-mpnet-base-v2",
47
+ encode_kwargs={"normalize_embeddings": True},
48
+ cache_folder=EMBEDDING_MODEL_FOLDER,
49
+ )
50
+
51
+
52
+ def load_vectordb(init: bool = False):
53
+ vectordb = None
54
+ VECTORDB_FOLDER = ".vectordb"
55
+ if not init:
56
+ vectordb = Chroma(
57
+ embedding_function=init_embedding_function(),
58
+ persist_directory=VECTORDB_FOLDER,
59
+ )
60
+ if init or not vectordb.get()["ids"]:
61
+ vectordb = Chroma.from_documents(
62
+ documents=load_documents(load_dialogues(), page_content_column="Utterance"),
63
+ embedding=init_embedding_function(),
64
+ persist_directory=VECTORDB_FOLDER,
65
+ )
66
+ vectordb.persist()
67
+ return vectordb
68
+
69
+
70
+ def get_retriever(context_state: str, vectordb):
71
+ return VectorStoreRetriever(
72
+ vectorstore=vectordb,
73
+ search_type="similarity",
74
+ search_kwargs={
75
+ "filter": {
76
+ "$or": [{"Context": {"$eq": ""}}, {"Context": {"$eq": context_state}}]
77
+ },
78
+ "k": 1,
79
+ },
80
+ )
81
+
82
+
83
+ vectordb = load_vectordb()
84
+
85
+
86
+ @cl.langchain_factory(use_async=True)
87
+ def factory():
88
+ df_prompt_engineering = load_prompt_engineering()
89
+ user_session.set("context_state", "")
90
+
91
+ llm_settings = LLMSettings(
92
+ model_name="text-davinci-003",
93
+ temperature=df_prompt_engineering["Temperature"].values[0],
94
+ )
95
+ user_session.set("llm_settings", llm_settings)
96
+
97
+ llm = AzureOpenAI(
98
+ deployment_name="davinci003",
99
+ model_name=llm_settings.model_name,
100
+ temperature=llm_settings.temperature,
101
+ streaming=True,
102
+ )
103
+
104
+ utterance_prompt = PromptTemplate.from_template(
105
+ df_prompt_engineering["Utterance-Prompt"].values[0]
106
+ )
107
+
108
+ chat_memory = ConversationBufferWindowMemory(
109
+ memory_key="History",
110
+ input_key="Utterance",
111
+ k=df_prompt_engineering["History"].values[0],
112
+ )
113
+
114
+ utterance_chain = LLMChain(
115
+ prompt=utterance_prompt,
116
+ llm=llm,
117
+ verbose=False,
118
+ memory=chat_memory,
119
+ )
120
+
121
+ continuation_prompt = PromptTemplate.from_template(
122
+ df_prompt_engineering["Continuation-Prompt"].values[0]
123
+ )
124
+
125
+ continuation_chain = LLMChain(
126
+ prompt=continuation_prompt,
127
+ llm=llm,
128
+ verbose=False,
129
+ memory=chat_memory,
130
+ )
131
+
132
+ user_session.set("continuation_chain", continuation_chain)
133
+
134
+ return utterance_chain
135
+
136
+
137
+ @cl.langchain_run
138
+ async def run(agent, input_str):
139
+ global vectordb
140
+ if input_str == "/reload":
141
+ vectordb = load_vectordb(True)
142
+ await cl.Message(content="Data loaded").send()
143
+ else:
144
+ df_persona = load_persona()
145
+
146
+ retriever = get_retriever(user_session.get("context_state"), vectordb)
147
+
148
+ document = retriever.get_relevant_documents(query=input_str)
149
+
150
+ response = await agent.acall(
151
+ {
152
+ "Persona": df_persona.loc[
153
+ df_persona["AI"] == document[0].metadata["AI"]
154
+ ]["Persona"].values[0],
155
+ "Utterance": input_str,
156
+ "Response": document[0].metadata["Response"],
157
+ },
158
+ callbacks=[cl.AsyncLangchainCallbackHandler()],
159
+ )
160
+ await cl.Message(
161
+ content=response["text"],
162
+ author=document[0].metadata["AI"],
163
+ llm_settings=user_session.get("llm_settings"),
164
+ ).send()
165
+ user_session.set("context_state", document[0].metadata["Contextualisation"])
166
+ continuation = document[0].metadata["Continuation"]
167
+
168
+ while continuation != "":
169
+ document_continuation = vectordb.get(where={"Intent": continuation})
170
+ continuation_chain = user_session.get("continuation_chain")
171
+ response = await continuation_chain.acall(
172
+ {
173
+ "Persona": df_persona.loc[
174
+ df_persona["AI"] == document_continuation["metadatas"][0]["AI"]
175
+ ]["Persona"].values[0],
176
+ "Utterance": "",
177
+ "Response": document_continuation["metadatas"][0]["Response"],
178
+ },
179
+ callbacks=[cl.AsyncLangchainCallbackHandler()],
180
+ )
181
+ await cl.Message(
182
+ content=response["text"],
183
+ author=document_continuation["metadatas"][0]["AI"],
184
+ llm_settings=user_session.get("llm_settings"),
185
+ ).send()
186
+ user_session.set(
187
+ "context_state",
188
+ document_continuation["metadatas"][0]["Contextualisation"],
189
+ )
190
+ continuation = document_continuation["metadatas"][0]["Continuation"]
chainlit.md ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ # Welcome to ChEdBot 🦜!
2
+
3
+ ## Useful Links πŸ”—
4
+
5
+ - **Documentation:** [ChEdBot Documentation]() πŸ“š
6
+
7
+ We can't wait to see what you create with ChEdBot!
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ chainlit
2
+ langchain
3
+ sentence_transformers
4
+ chromadb
5
+ openpyxl
runtime.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ python-3.10