Spaces:

Ritesh-hf
/

rag-api

Running on Zero

App Files Files Community

Ritesh-hf commited on 21 days ago

Commit

4c6d98a

•

1 Parent(s): 32ba13a

initial commit

Browse files

Files changed (6) hide show

.env +4 -0
__pycache__/app.cpython-38.pyc +0 -0
app.py +138 -0
bm25_traveler_website.json +0 -0
requirements.txt +98 -0
temp.py +176 -0

.env ADDED Viewed

	@@ -0,0 +1,4 @@

+USER_AGENT='myagent'
+GROQ_API_KEY="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"
+PINECONE_API_KEY="ca8e6a33-7355-453f-ad4b-80c8a1c6a9c7"
+SECRET_KEY="b0*1x^y@9$)w%v+k=p!8xp@4bkt37s&b8+uf%1=mh+v1=@ybsh"

__pycache__/app.cpython-38.pyc ADDED Viewed

Binary file (4.55 kB). View file

app.py ADDED Viewed

	@@ -0,0 +1,138 @@

+import os
+from dotenv import load_dotenv
+load_dotenv(".env")
+os.environ['USER_AGENT'] = os.getenv("USER_AGENT")
+os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")
+os.environ["TOKENIZERS_PARALLELISM"]='true'
+from langchain.chains import create_history_aware_retriever, create_retrieval_chain
+from langchain.chains.combine_documents import create_stuff_documents_chain
+from langchain_community.chat_message_histories import ChatMessageHistory
+from langchain_community.document_loaders import WebBaseLoader
+from langchain_core.chat_history import BaseChatMessageHistory
+from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
+from langchain_core.runnables.history import RunnableWithMessageHistory
+from pinecone import Pinecone
+from pinecone_text.sparse import BM25Encoder
+from langchain_huggingface import HuggingFaceEmbeddings
+from langchain_community.retrievers import PineconeHybridSearchRetriever
+from langchain_groq import ChatGroq
+import gradio as gr
+import spaces
+import torch
+try:
+    pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
+    index_name = "traveler-demo-website-vectorstore"
+    # connect to index
+    pinecone_index = pc.Index(index_name)
+except:
+    pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
+    index_name = "traveler-demo-website-vectorstore"
+    # connect to index
+    pinecone_index = pc.Index(index_name)
+bm25 = BM25Encoder().load("./bm25_traveler_website.json")
+embed_model = HuggingFaceEmbeddings(model_name="Alibaba-NLP/gte-large-en-v1.5", model_kwargs={"trust_remote_code":True})
+retriever = PineconeHybridSearchRetriever(
+    embeddings=embed_model,
+    sparse_encoder=bm25,
+    index=pinecone_index,
+    top_k=20,
+    alpha=0.5,
+)
+llm = ChatGroq(model="llama-3.1-70b-versatile", temperature=0.1, max_tokens=1024, max_retries=2)
+### Contextualize question ###
+contextualize_q_system_prompt = """Given a chat history and the latest user question \
+which might reference context in the chat history, formulate a standalone question \
+which can be understood without the chat history. Do NOT answer the question, \
+just reformulate it if needed and otherwise return it as is.
+"""
+contextualize_q_prompt = ChatPromptTemplate.from_messages(
+    [
+        ("system", contextualize_q_system_prompt),
+        MessagesPlaceholder("chat_history"),
+        ("human", "{input}")
+    ]
+)
+history_aware_retriever = create_history_aware_retriever(
+    llm, retriever, contextualize_q_prompt
+)
+qa_system_prompt = """You are a highly skilled information retrieval assistant. Use the following pieces of retrieved context to answer the question. \
+Provide links to sources provided in the answer. \
+If you don't know the answer, just say that you don't know. \
+Do not give extra long answers. \
+When responding to queries, your responses should be comprehensive and well-organized. For each response: \
+    1. Provide Clear Answers \
+    2. Include Detailed References: \
+        - Include links to sources and any links or sites where there is a mentioned in the answer.
+        - Links to Sources: Provide URLs to credible sources where users can verify the information or explore further. \
+        - Downloadable Materials: Include links to any relevant downloadable resources if applicable. \
+        - Reference Sites: Mention specific websites or platforms that offer additional information. \
+    3. Formatting for Readability: \
+        - Bullet Points or Lists: Where applicable, use bullet points or numbered lists to present information clearly. \
+        - Emphasize Important Information: Use bold or italics to highlight key details. \
+    4. Organize Content Logically \
+Do not include anything about context in the answer. \
+{context}
+"""
+qa_prompt = ChatPromptTemplate.from_messages(
+    [
+        ("system", qa_system_prompt),
+        MessagesPlaceholder("chat_history"),
+        ("human", "{input}")
+    ]
+)
+question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)
+rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)
+### Statefully manage chat history ###
+store = {}
+def get_session_history(session_id: str) -> BaseChatMessageHistory:
+    if session_id not in store:
+        store[session_id] = ChatMessageHistory()
+    return store[session_id]
+conversational_rag_chain = RunnableWithMessageHistory(
+    rag_chain,
+    get_session_history,
+    input_messages_key="input",
+    history_messages_key="chat_history",
+    output_messages_key="answer",
+)
+@spaces.GPU
+def handle_message(question, history={}):
+    zero = torch.Tensor([0]).cuda()
+    print("With GPU: ", zero.device)
+    # question = data.get('question')
+    response = ''
+    chain = conversational_rag_chain.pick("answer")
+    for chunk in chain.stream(
+         {"input": question},
+        config={
+            "configurable": {"session_id": "abc123"}
+        },
+    ):
+        response += chunk
+        yield response
+if __name__ == '__main__':
+    demo = gr.ChatInterface(fn=handle_message)
+    demo.launch()

bm25_traveler_website.json ADDED Viewed

The diff for this file is too large to render. See raw diff

requirements.txt ADDED Viewed

	@@ -0,0 +1,98 @@

+aiohttp==3.9.5
+aiosignal==1.3.1
+annotated-types==0.7.0
+anyio==4.4.0
+async-timeout==4.0.3
+attrs==23.2.0
+bidict==0.23.1
+blinker==1.8.2
+certifi==2024.7.4
+charset-normalizer==3.3.2
+click==8.1.7
+dataclasses-json==0.6.7
+distro==1.9.0
+exceptiongroup==1.2.2
+filelock==3.15.4
+flask==3.0.3
+Flask-Cors==4.0.1
+Flask-SocketIO==5.3.6
+frozenlist==1.4.1
+fsspec==2024.6.1
+greenlet==3.0.3
+groq==0.9.0
+h11==0.14.0
+httpcore==1.0.5
+httpx==0.27.0
+huggingface-hub==0.24.2
+idna==3.7
+importlib-metadata==8.2.0
+itsdangerous==2.2.0
+jinja2==3.1.4
+joblib==1.4.2
+jsonpatch==1.33
+jsonpointer==3.0.0
+langchain==0.2.11
+langchain-community==0.2.10
+langchain-core==0.2.24
+langchain-groq==0.1.6
+langchain-huggingface==0.0.3
+langchain-text-splitters==0.2.2
+langsmith==0.1.93
+MarkupSafe==2.1.5
+marshmallow==3.21.3
+mmh3==4.1.0
+mpmath==1.3.0
+multidict==6.0.5
+mypy-extensions==1.0.0
+networkx==3.1
+nltk==3.8.1
+numpy==1.24.4
+nvidia-cublas-cu12==12.1.3.1
+nvidia-cuda-cupti-cu12==12.1.105
+nvidia-cuda-nvrtc-cu12==12.1.105
+nvidia-cuda-runtime-cu12==12.1.105
+nvidia-cudnn-cu12==9.1.0.70
+nvidia-cufft-cu12==11.0.2.54
+nvidia-curand-cu12==10.3.2.106
+nvidia-cusolver-cu12==11.4.5.107
+nvidia-cusparse-cu12==12.1.0.106
+nvidia-nccl-cu12==2.20.5
+nvidia-nvjitlink-cu12==12.5.82
+nvidia-nvtx-cu12==12.1.105
+orjson==3.10.6
+packaging==24.1
+pillow==10.4.0
+pinecone==4.0.0
+pinecone-text==0.9.0
+pydantic==2.8.2
+pydantic-core==2.20.1
+python-dotenv==1.0.1
+python-engineio==4.9.1
+python-socketio==5.11.3
+PyYAML==6.0.1
+regex==2024.7.24
+requests==2.32.3
+safetensors==0.4.3
+scikit-learn==1.3.2
+scipy==1.10.1
+sentence-transformers==3.0.1
+simple-websocket==1.0.0
+sniffio==1.3.1
+SQLAlchemy==2.0.31
+sympy==1.13.1
+tenacity==8.5.0
+threadpoolctl==3.5.0
+tokenizers==0.19.1
+torch==2.4.0
+tqdm==4.66.4
+transformers==4.43.3
+triton==3.0.0
+types-requests==2.32.0.20240712
+typing-extensions==4.12.2
+typing-inspect==0.9.0
+urllib3==2.2.2
+werkzeug==3.0.3
+wget==3.2
+wsproto==1.2.0
+yarl==1.9.4
+zipp==3.19.2

temp.py ADDED Viewed

	@@ -0,0 +1,176 @@

+import os
+from dotenv import load_dotenv
+load_dotenv(".env")
+os.environ['USER_AGENT'] = os.getenv("USER_AGENT")
+os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")
+os.environ["TOKENIZERS_PARALLELISM"]='true'
+from langchain.chains import create_history_aware_retriever, create_retrieval_chain
+from langchain.chains.combine_documents import create_stuff_documents_chain
+from langchain_community.chat_message_histories import ChatMessageHistory
+from langchain_community.document_loaders import WebBaseLoader
+from langchain_core.chat_history import BaseChatMessageHistory
+from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
+from langchain_core.runnables.history import RunnableWithMessageHistory
+from pinecone import Pinecone
+from pinecone_text.sparse import BM25Encoder
+from langchain_huggingface import HuggingFaceEmbeddings
+from langchain_community.retrievers import PineconeHybridSearchRetriever
+from langchain_groq import ChatGroq
+# from flask import Flask, request, render_template
+# from flask_cors import CORS
+# from flask_socketio import SocketIO, emit
+import gradio as gr
+import spaces
+import torch
+zero = torch.Tensor([0]).cuda()
+print(zero.device) # <-- 'cpu' 🤔
+@spaces.GPU
+def greet(n):
+    print(zero.device) # <-- 'cuda:0' 🤗
+    return f"Hello {zero + n} Tensor"
+# app = Flask(__name__)
+# CORS(app)
+# socketio = SocketIO(app, cors_allowed_origins="*")
+# app.config['SESSION_COOKIE_SECURE'] = True  # Use HTTPS
+# app.config['SESSION_COOKIE_HTTPONLY'] = True
+# app.config['SESSION_COOKIE_SAMESITE'] = 'Lax'
+# app.config['SECRET_KEY'] = os.getenv('SECRET_KEY')
+try:
+    pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
+    index_name = "traveler-demo-website-vectorstore"
+    # connect to index
+    pinecone_index = pc.Index(index_name)
+except:
+    pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
+    index_name = "traveler-demo-website-vectorstore"
+    # connect to index
+    pinecone_index = pc.Index(index_name)
+bm25 = BM25Encoder().load("./bm25_traveler_website.json")
+embed_model = HuggingFaceEmbeddings(model_name="Alibaba-NLP/gte-large-en-v1.5", model_kwargs={"trust_remote_code":True})
+retriever = PineconeHybridSearchRetriever(
+    embeddings=embed_model,
+    sparse_encoder=bm25,
+    index=pinecone_index,
+    top_k=20,
+    alpha=0.5,
+)
+llm = ChatGroq(model="llama-3.1-70b-versatile", temperature=0.1, max_tokens=1024, max_retries=2)
+### Contextualize question ###
+contextualize_q_system_prompt = """Given a chat history and the latest user question \
+which might reference context in the chat history, formulate a standalone question \
+which can be understood without the chat history. Do NOT answer the question, \
+just reformulate it if needed and otherwise return it as is.
+"""
+contextualize_q_prompt = ChatPromptTemplate.from_messages(
+    [
+        ("system", contextualize_q_system_prompt),
+        MessagesPlaceholder("chat_history"),
+        ("human", "{input}")
+    ]
+)
+history_aware_retriever = create_history_aware_retriever(
+    llm, retriever, contextualize_q_prompt
+)
+qa_system_prompt = """You are a highly skilled information retrieval assistant. Use the following pieces of retrieved context to answer the question. \
+Provide links to sources provided in the answer. \
+If you don't know the answer, just say that you don't know. \
+Do not give extra long answers. \
+When responding to queries, your responses should be comprehensive and well-organized. For each response: \
+    1. Provide Clear Answers \
+    2. Include Detailed References: \
+        - Include links to sources and any links or sites where there is a mentioned in the answer.
+        - Links to Sources: Provide URLs to credible sources where users can verify the information or explore further. \
+        - Downloadable Materials: Include links to any relevant downloadable resources if applicable. \
+        - Reference Sites: Mention specific websites or platforms that offer additional information. \
+    3. Formatting for Readability: \
+        - Bullet Points or Lists: Where applicable, use bullet points or numbered lists to present information clearly. \
+        - Emphasize Important Information: Use bold or italics to highlight key details. \
+    4. Organize Content Logically \
+Do not include anything about context in the answer. \
+{context}
+"""
+qa_prompt = ChatPromptTemplate.from_messages(
+    [
+        ("system", qa_system_prompt),
+        MessagesPlaceholder("chat_history"),
+        ("human", "{input}")
+    ]
+)
+question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)
+rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)
+### Statefully manage chat history ###
+store = {}
+def clean_temporary_data():
+    store = {}
+def get_session_history(session_id: str) -> BaseChatMessageHistory:
+    if session_id not in store:
+        store[session_id] = ChatMessageHistory()
+    return store[session_id]
+conversational_rag_chain = RunnableWithMessageHistory(
+    rag_chain,
+    get_session_history,
+    input_messages_key="input",
+    history_messages_key="chat_history",
+    output_messages_key="answer",
+)
+# Stream response to client
+@socketio.on('message')
+def handle_message(data):
+    question = data.get('question')
+    session_id = data.get('session_id', 'abc123')
+    chain = conversational_rag_chain.pick("answer")
+    try:
+        for chunk in chain.stream(
+                {"input": question},
+                config={
+                    "configurable": {"session_id": "abc123"}
+                },
+            ):
+                emit('response', chunk, room=request.sid)
+    except:
+        for chunk in chain.stream(
+                {"input": question},
+                config={
+                    "configurable": {"session_id": "abc123"}
+                },
+            ):
+                emit('response', chunk, room=request.sid)
+@app.route("/")
+def index_view():
+    return render_template('chat.html')
+if __name__ == '__main__':
+    socketio.run(app, debug=True)
+    demo = gr.Interface(fn=greet, inputs=gr.Number(), outputs=gr.Text())
+    demo.launch()