Spaces:
Runtime error
Runtime error
andreasmartin
commited on
Commit
β’
4d4c181
1
Parent(s):
c769598
Add files via upload
Browse files- Procfile +1 -0
- app.json +14 -0
- app.py +190 -0
- chainlit.md +7 -0
- requirements.txt +5 -0
- runtime.txt +1 -0
Procfile
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
web: chainlit run app.py --port $PORT --no-cache
|
app.json
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"name": "ChEdBot",
|
3 |
+
"image": "heroku/python",
|
4 |
+
"env": {
|
5 |
+
"DIALOGUE_SHEET": {},
|
6 |
+
"PROMPT_ENGINEERING_SHEET": {},
|
7 |
+
"PERSONA_SHEET": {},
|
8 |
+
"AGENT": {},
|
9 |
+
"OPENAI_API_KEY": {},
|
10 |
+
"OPENAI_API_TYPE": {},
|
11 |
+
"OPENAI_API_BASE": {},
|
12 |
+
"OPENAI_API_VERSION": {}
|
13 |
+
}
|
14 |
+
}
|
app.py
ADDED
@@ -0,0 +1,190 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import pandas as pd
|
3 |
+
import chainlit as cl
|
4 |
+
from chainlit import user_session
|
5 |
+
from chainlit.types import LLMSettings
|
6 |
+
from langchain import LLMChain
|
7 |
+
from langchain.prompts import PromptTemplate
|
8 |
+
from langchain.llms import AzureOpenAI
|
9 |
+
from langchain.document_loaders import DataFrameLoader
|
10 |
+
from langchain.embeddings import HuggingFaceEmbeddings
|
11 |
+
from langchain.memory import ConversationBufferWindowMemory
|
12 |
+
from langchain.vectorstores import Chroma
|
13 |
+
from langchain.vectorstores.base import VectorStoreRetriever
|
14 |
+
|
15 |
+
|
16 |
+
current_agent = os.environ["AGENT"]
|
17 |
+
|
18 |
+
|
19 |
+
def load_dialogues():
|
20 |
+
df = pd.read_excel(os.environ["DIALOGUE_SHEET"], header=0, keep_default_na=False)
|
21 |
+
df = df[df["Agent"] == current_agent]
|
22 |
+
return df.astype(str)
|
23 |
+
|
24 |
+
|
25 |
+
def load_persona():
|
26 |
+
df = pd.read_excel(os.environ["PERSONA_SHEET"], header=0, keep_default_na=False)
|
27 |
+
df = df[df["Agent"] == current_agent]
|
28 |
+
return df.astype(str)
|
29 |
+
|
30 |
+
|
31 |
+
def load_prompt_engineering():
|
32 |
+
df = pd.read_excel(
|
33 |
+
os.environ["PROMPT_ENGINEERING_SHEET"], header=0, keep_default_na=False
|
34 |
+
)
|
35 |
+
df = df[df["Agent"] == current_agent]
|
36 |
+
return df.astype(str)
|
37 |
+
|
38 |
+
|
39 |
+
def load_documents(df, page_content_column: str):
|
40 |
+
return DataFrameLoader(df, page_content_column).load()
|
41 |
+
|
42 |
+
|
43 |
+
def init_embedding_function():
|
44 |
+
EMBEDDING_MODEL_FOLDER = ".embedding-model"
|
45 |
+
return HuggingFaceEmbeddings(
|
46 |
+
model_name="sentence-transformers/all-mpnet-base-v2",
|
47 |
+
encode_kwargs={"normalize_embeddings": True},
|
48 |
+
cache_folder=EMBEDDING_MODEL_FOLDER,
|
49 |
+
)
|
50 |
+
|
51 |
+
|
52 |
+
def load_vectordb(init: bool = False):
|
53 |
+
vectordb = None
|
54 |
+
VECTORDB_FOLDER = ".vectordb"
|
55 |
+
if not init:
|
56 |
+
vectordb = Chroma(
|
57 |
+
embedding_function=init_embedding_function(),
|
58 |
+
persist_directory=VECTORDB_FOLDER,
|
59 |
+
)
|
60 |
+
if init or not vectordb.get()["ids"]:
|
61 |
+
vectordb = Chroma.from_documents(
|
62 |
+
documents=load_documents(load_dialogues(), page_content_column="Utterance"),
|
63 |
+
embedding=init_embedding_function(),
|
64 |
+
persist_directory=VECTORDB_FOLDER,
|
65 |
+
)
|
66 |
+
vectordb.persist()
|
67 |
+
return vectordb
|
68 |
+
|
69 |
+
|
70 |
+
def get_retriever(context_state: str, vectordb):
|
71 |
+
return VectorStoreRetriever(
|
72 |
+
vectorstore=vectordb,
|
73 |
+
search_type="similarity",
|
74 |
+
search_kwargs={
|
75 |
+
"filter": {
|
76 |
+
"$or": [{"Context": {"$eq": ""}}, {"Context": {"$eq": context_state}}]
|
77 |
+
},
|
78 |
+
"k": 1,
|
79 |
+
},
|
80 |
+
)
|
81 |
+
|
82 |
+
|
83 |
+
vectordb = load_vectordb()
|
84 |
+
|
85 |
+
|
86 |
+
@cl.langchain_factory(use_async=True)
|
87 |
+
def factory():
|
88 |
+
df_prompt_engineering = load_prompt_engineering()
|
89 |
+
user_session.set("context_state", "")
|
90 |
+
|
91 |
+
llm_settings = LLMSettings(
|
92 |
+
model_name="text-davinci-003",
|
93 |
+
temperature=df_prompt_engineering["Temperature"].values[0],
|
94 |
+
)
|
95 |
+
user_session.set("llm_settings", llm_settings)
|
96 |
+
|
97 |
+
llm = AzureOpenAI(
|
98 |
+
deployment_name="davinci003",
|
99 |
+
model_name=llm_settings.model_name,
|
100 |
+
temperature=llm_settings.temperature,
|
101 |
+
streaming=True,
|
102 |
+
)
|
103 |
+
|
104 |
+
utterance_prompt = PromptTemplate.from_template(
|
105 |
+
df_prompt_engineering["Utterance-Prompt"].values[0]
|
106 |
+
)
|
107 |
+
|
108 |
+
chat_memory = ConversationBufferWindowMemory(
|
109 |
+
memory_key="History",
|
110 |
+
input_key="Utterance",
|
111 |
+
k=df_prompt_engineering["History"].values[0],
|
112 |
+
)
|
113 |
+
|
114 |
+
utterance_chain = LLMChain(
|
115 |
+
prompt=utterance_prompt,
|
116 |
+
llm=llm,
|
117 |
+
verbose=False,
|
118 |
+
memory=chat_memory,
|
119 |
+
)
|
120 |
+
|
121 |
+
continuation_prompt = PromptTemplate.from_template(
|
122 |
+
df_prompt_engineering["Continuation-Prompt"].values[0]
|
123 |
+
)
|
124 |
+
|
125 |
+
continuation_chain = LLMChain(
|
126 |
+
prompt=continuation_prompt,
|
127 |
+
llm=llm,
|
128 |
+
verbose=False,
|
129 |
+
memory=chat_memory,
|
130 |
+
)
|
131 |
+
|
132 |
+
user_session.set("continuation_chain", continuation_chain)
|
133 |
+
|
134 |
+
return utterance_chain
|
135 |
+
|
136 |
+
|
137 |
+
@cl.langchain_run
|
138 |
+
async def run(agent, input_str):
|
139 |
+
global vectordb
|
140 |
+
if input_str == "/reload":
|
141 |
+
vectordb = load_vectordb(True)
|
142 |
+
await cl.Message(content="Data loaded").send()
|
143 |
+
else:
|
144 |
+
df_persona = load_persona()
|
145 |
+
|
146 |
+
retriever = get_retriever(user_session.get("context_state"), vectordb)
|
147 |
+
|
148 |
+
document = retriever.get_relevant_documents(query=input_str)
|
149 |
+
|
150 |
+
response = await agent.acall(
|
151 |
+
{
|
152 |
+
"Persona": df_persona.loc[
|
153 |
+
df_persona["AI"] == document[0].metadata["AI"]
|
154 |
+
]["Persona"].values[0],
|
155 |
+
"Utterance": input_str,
|
156 |
+
"Response": document[0].metadata["Response"],
|
157 |
+
},
|
158 |
+
callbacks=[cl.AsyncLangchainCallbackHandler()],
|
159 |
+
)
|
160 |
+
await cl.Message(
|
161 |
+
content=response["text"],
|
162 |
+
author=document[0].metadata["AI"],
|
163 |
+
llm_settings=user_session.get("llm_settings"),
|
164 |
+
).send()
|
165 |
+
user_session.set("context_state", document[0].metadata["Contextualisation"])
|
166 |
+
continuation = document[0].metadata["Continuation"]
|
167 |
+
|
168 |
+
while continuation != "":
|
169 |
+
document_continuation = vectordb.get(where={"Intent": continuation})
|
170 |
+
continuation_chain = user_session.get("continuation_chain")
|
171 |
+
response = await continuation_chain.acall(
|
172 |
+
{
|
173 |
+
"Persona": df_persona.loc[
|
174 |
+
df_persona["AI"] == document_continuation["metadatas"][0]["AI"]
|
175 |
+
]["Persona"].values[0],
|
176 |
+
"Utterance": "",
|
177 |
+
"Response": document_continuation["metadatas"][0]["Response"],
|
178 |
+
},
|
179 |
+
callbacks=[cl.AsyncLangchainCallbackHandler()],
|
180 |
+
)
|
181 |
+
await cl.Message(
|
182 |
+
content=response["text"],
|
183 |
+
author=document_continuation["metadatas"][0]["AI"],
|
184 |
+
llm_settings=user_session.get("llm_settings"),
|
185 |
+
).send()
|
186 |
+
user_session.set(
|
187 |
+
"context_state",
|
188 |
+
document_continuation["metadatas"][0]["Contextualisation"],
|
189 |
+
)
|
190 |
+
continuation = document_continuation["metadatas"][0]["Continuation"]
|
chainlit.md
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Welcome to ChEdBot π¦!
|
2 |
+
|
3 |
+
## Useful Links π
|
4 |
+
|
5 |
+
- **Documentation:** [ChEdBot Documentation]() π
|
6 |
+
|
7 |
+
We can't wait to see what you create with ChEdBot!
|
requirements.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
chainlit
|
2 |
+
langchain
|
3 |
+
sentence_transformers
|
4 |
+
chromadb
|
5 |
+
openpyxl
|
runtime.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
python-3.10
|