Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -17,7 +17,7 @@ from langchain_community.retrievers import PineconeHybridSearchRetriever
|
|
17 |
from langchain_groq import ChatGroq
|
18 |
|
19 |
# Load environment variables
|
20 |
-
|
21 |
USER_AGENT = os.getenv("USER_AGENT")
|
22 |
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
|
23 |
SECRET_KEY = os.getenv("SECRET_KEY")
|
@@ -47,10 +47,36 @@ def initialize_pinecone(index_name: str):
|
|
47 |
print(f"Error initializing Pinecone: {e}")
|
48 |
raise
|
49 |
|
|
|
|
|
|
|
|
|
|
|
50 |
# Initialize Pinecone index and BM25 encoder
|
51 |
pinecone_index = initialize_pinecone("traveler-demo-website-vectorstore")
|
52 |
bm25 = BM25Encoder().load("./bm25_traveler_website.json")
|
53 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
54 |
# Initialize models and retriever
|
55 |
embed_model = HuggingFaceEmbeddings(model_name="Alibaba-NLP/gte-large-en-v1.5", model_kwargs={"trust_remote_code":True})
|
56 |
retriever = PineconeHybridSearchRetriever(
|
@@ -62,7 +88,7 @@ retriever = PineconeHybridSearchRetriever(
|
|
62 |
)
|
63 |
|
64 |
# Initialize LLM
|
65 |
-
llm = ChatGroq(model="llama-3.1-
|
66 |
|
67 |
# Contextualization prompt and retriever
|
68 |
contextualize_q_system_prompt = """Given a chat history and the latest user question \
|
@@ -82,6 +108,7 @@ history_aware_retriever = create_history_aware_retriever(llm, retriever, context
|
|
82 |
# QA system prompt and chain
|
83 |
qa_system_prompt = """You are a highly skilled information retrieval assistant. Use the following context to answer questions effectively. \
|
84 |
If you don't know the answer, simply state that you don't know. \
|
|
|
85 |
Provide answers in proper HTML format and keep them concise. \
|
86 |
|
87 |
When responding to queries, follow these guidelines: \
|
@@ -96,6 +123,8 @@ When responding to queries, follow these guidelines: \
|
|
96 |
|
97 |
3. Formatting for Readability: \
|
98 |
- The answer should be in a proper HTML format with appropriate tags. \
|
|
|
|
|
99 |
- Use bullet points or numbered lists where applicable to present information clearly. \
|
100 |
- Highlight key details using bold or italics. \
|
101 |
- Provide proper and meaningful abbreviations for urls. Do not include naked urls. \
|
@@ -134,6 +163,7 @@ conversational_rag_chain = RunnableWithMessageHistory(
|
|
134 |
get_session_history,
|
135 |
input_messages_key="input",
|
136 |
history_messages_key="chat_history",
|
|
|
137 |
output_messages_key="answer",
|
138 |
)
|
139 |
|
@@ -153,12 +183,17 @@ def handle_disconnect():
|
|
153 |
@socketio.on('message')
|
154 |
def handle_message(data):
|
155 |
question = data.get('question')
|
|
|
|
|
|
|
|
|
|
|
156 |
session_id = data.get('session_id', SESSION_ID_DEFAULT)
|
157 |
chain = conversational_rag_chain.pick("answer")
|
158 |
|
159 |
try:
|
160 |
for chunk in chain.stream(
|
161 |
-
{"input": question},
|
162 |
config={"configurable": {"session_id": session_id}},
|
163 |
):
|
164 |
emit('response', chunk, room=request.sid)
|
@@ -174,4 +209,5 @@ def index_view():
|
|
174 |
|
175 |
# Main function to run the app
|
176 |
if __name__ == '__main__':
|
177 |
-
|
|
|
|
17 |
from langchain_groq import ChatGroq
|
18 |
|
19 |
# Load environment variables
|
20 |
+
load_dotenv(".env")
|
21 |
USER_AGENT = os.getenv("USER_AGENT")
|
22 |
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
|
23 |
SECRET_KEY = os.getenv("SECRET_KEY")
|
|
|
47 |
print(f"Error initializing Pinecone: {e}")
|
48 |
raise
|
49 |
|
50 |
+
|
51 |
+
##################################################
|
52 |
+
## Change down here
|
53 |
+
##################################################
|
54 |
+
|
55 |
# Initialize Pinecone index and BM25 encoder
|
56 |
pinecone_index = initialize_pinecone("traveler-demo-website-vectorstore")
|
57 |
bm25 = BM25Encoder().load("./bm25_traveler_website.json")
|
58 |
|
59 |
+
### This is for UAE Legislation Website
|
60 |
+
# pinecone_index = initialize_pinecone("uae-legislation-site-data")
|
61 |
+
# bm25 = BM25Encoder().load("./bm25_uae_legislation_data.json")
|
62 |
+
|
63 |
+
|
64 |
+
### This is for u.ae Website
|
65 |
+
# pinecone_index = initialize_pinecone("vector-store-index")
|
66 |
+
# bm25 = BM25Encoder().load("./bm25_u.ae.json")
|
67 |
+
|
68 |
+
|
69 |
+
# #### This is for UAE Economic Department Website
|
70 |
+
# pinecone_index = initialize_pinecone("uae-department-of-economics-site-data")
|
71 |
+
# bm25 = BM25Encoder().load("./bm25_uae_department_of_economics_data.json")
|
72 |
+
|
73 |
+
|
74 |
+
|
75 |
+
##################################################
|
76 |
+
##################################################
|
77 |
+
|
78 |
+
old_embed_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
|
79 |
+
|
80 |
# Initialize models and retriever
|
81 |
embed_model = HuggingFaceEmbeddings(model_name="Alibaba-NLP/gte-large-en-v1.5", model_kwargs={"trust_remote_code":True})
|
82 |
retriever = PineconeHybridSearchRetriever(
|
|
|
88 |
)
|
89 |
|
90 |
# Initialize LLM
|
91 |
+
llm = ChatGroq(model="llama-3.1-8b-instant", temperature=0, max_tokens=1024, max_retries=2)
|
92 |
|
93 |
# Contextualization prompt and retriever
|
94 |
contextualize_q_system_prompt = """Given a chat history and the latest user question \
|
|
|
108 |
# QA system prompt and chain
|
109 |
qa_system_prompt = """You are a highly skilled information retrieval assistant. Use the following context to answer questions effectively. \
|
110 |
If you don't know the answer, simply state that you don't know. \
|
111 |
+
Your answer should be in {language} language. \
|
112 |
Provide answers in proper HTML format and keep them concise. \
|
113 |
|
114 |
When responding to queries, follow these guidelines: \
|
|
|
123 |
|
124 |
3. Formatting for Readability: \
|
125 |
- The answer should be in a proper HTML format with appropriate tags. \
|
126 |
+
- For arabic language response align the text to right and convert numbers also.
|
127 |
+
- Double check if the language of answer is correct or not.
|
128 |
- Use bullet points or numbered lists where applicable to present information clearly. \
|
129 |
- Highlight key details using bold or italics. \
|
130 |
- Provide proper and meaningful abbreviations for urls. Do not include naked urls. \
|
|
|
163 |
get_session_history,
|
164 |
input_messages_key="input",
|
165 |
history_messages_key="chat_history",
|
166 |
+
language_message_key="language",
|
167 |
output_messages_key="answer",
|
168 |
)
|
169 |
|
|
|
183 |
@socketio.on('message')
|
184 |
def handle_message(data):
|
185 |
question = data.get('question')
|
186 |
+
language = data.get('language')
|
187 |
+
if "en" in language:
|
188 |
+
language = "English"
|
189 |
+
else:
|
190 |
+
language = "Arabic"
|
191 |
session_id = data.get('session_id', SESSION_ID_DEFAULT)
|
192 |
chain = conversational_rag_chain.pick("answer")
|
193 |
|
194 |
try:
|
195 |
for chunk in chain.stream(
|
196 |
+
{"input": question, 'language': language},
|
197 |
config={"configurable": {"session_id": session_id}},
|
198 |
):
|
199 |
emit('response', chunk, room=request.sid)
|
|
|
209 |
|
210 |
# Main function to run the app
|
211 |
if __name__ == '__main__':
|
212 |
+
print("Hello world")
|
213 |
+
socketio.run(app, debug=True)
|