Spaces:

souravmighty
/

groqdoc

Sleeping

App Files Files Community

souravmighty commited on May 9

Commit

811a1e3

•

1 Parent(s): 1187c2e

Changed to snowflake embedding

Browse files

Files changed (6) hide show

.chainlit/config.toml +1 -1
Dockerfile +1 -0
app.py +50 -33
assets/conversational_rag_architecture.gif +0 -0
chainlit.md +6 -0
requirements.txt +0 -1

.chainlit/config.toml CHANGED Viewed

@@ -49,7 +49,7 @@ auto_tag_thread = true
 name = "Chatbot"
 # Show the readme while the thread is empty.
-show_readme_as_default = true
 # Description of the app and chatbot. This is used for HTML tags.
 # description = ""

 name = "Chatbot"
 # Show the readme while the thread is empty.
+show_readme_as_default = false
 # Description of the app and chatbot. This is used for HTML tags.
 # description = ""

Dockerfile CHANGED Viewed

@@ -7,5 +7,6 @@ WORKDIR $HOME/app
 COPY --chown=user . $HOME/app
 COPY ./requirements.txt ~/app/requirements.txt
 RUN pip install -r requirements.txt
 COPY . .
 CMD ["chainlit", "run", "app.py", "--port", "7860"]

 COPY --chown=user . $HOME/app
 COPY ./requirements.txt ~/app/requirements.txt
 RUN pip install -r requirements.txt
+RUN pip install git+https://github.com/UKPLab/sentence-transformers.git
 COPY . .
 CMD ["chainlit", "run", "app.py", "--port", "7860"]

app.py CHANGED Viewed

@@ -10,6 +10,7 @@ from chainlit.input_widget import Select
 import os
 @cl.cache
 def get_memory():
     # Initialize message history for conversation
@@ -41,26 +42,6 @@ async def on_chat_start():
         ]
     ).send()
-    await setup_agent(settings)
-@cl.on_settings_update
-async def setup_agent(settings):
-    user_env = cl.user_session.get("env")
-    os.environ["GROQ_API_KEY"] = user_env.get("GROQ_API_KEY")
-    # embeddings = OllamaEmbeddings(model="nomic-embed-text")
-    # embeddings = SentenceTransformerEmbeddings(model_name = "sentence-transformers/all-MiniLM-L6-v2")
-    # memory=get_memory()
-    # docsearch = await cl.make_async(Chroma)(
-    #     persist_directory="./chroma_db",
-    #     embedding_function=embeddings
-    # )
-    msg = cl.Message(content = f"You are using '{settings['Model']}' as LLM.")
-    await msg.send()
     files = None #Initialize variable to store uploaded files
@@ -71,23 +52,25 @@ async def setup_agent(settings):
             accept=["application/pdf"],
             max_size_mb=100,
             timeout=180,
         ).send()
-    file = files[0] # Get the first uploaded file
-    # Inform the user that processing has started
-    msg = cl.Message(content=f"Processing `{file.name}`...")
-    await msg.send()
-    # Read the PDF file
-    pdf = PyPDF2.PdfReader(file.path)
     pdf_text = ""
-    for page in pdf.pages:
-        pdf_text += page.extract_text()
     # Split the text into chunks
-    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
     texts = text_splitter.split_text(pdf_text)
     # Create a metadata for each chunk
@@ -95,20 +78,40 @@ async def setup_agent(settings):
     # Create a Chroma vector store
     # embeddings = OllamaEmbeddings(model="nomic-embed-text")
-    embeddings = SentenceTransformerEmbeddings(model_name = "sentence-transformers/all-MiniLM-L6-v2")
     #embeddings = OllamaEmbeddings(model="llama2:7b")
     docsearch = await cl.make_async(Chroma.from_texts)(
         texts, embeddings, metadatas=metadatas
     )
     # Let the user know that the system is ready
     msg.content = f"Processing `{file.name}` done. You can now ask questions!"
     await msg.update()
     memory=get_memory()
-    # Create a chain that uses the Chroma vector store
     chain = ConversationalRetrievalChain.from_llm(
         llm = ChatGroq(model=settings["Model"]),
         chain_type="stuff",
@@ -158,4 +161,18 @@ async def main(message: cl.Message):
         else:
             answer += "\nNo sources found"
     #return results
-    await cl.Message(content=answer, elements=text_elements).send()

 import os
 @cl.cache
 def get_memory():
     # Initialize message history for conversation
         ]
     ).send()
     files = None #Initialize variable to store uploaded files
             accept=["application/pdf"],
             max_size_mb=100,
             timeout=180,
+            max_files = 10,
         ).send()
     pdf_text = ""
+    for file in files:
+        # Inform the user that processing has started
+        msg = cl.Message(content=f"Processing `{file.name}`...")
+        await msg.send()
+        # Read the PDF file
+        pdf = PyPDF2.PdfReader(file.path)
+        for page in pdf.pages:
+            pdf_text += page.extract_text()
     # Split the text into chunks
+    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
     texts = text_splitter.split_text(pdf_text)
     # Create a metadata for each chunk
     # Create a Chroma vector store
     # embeddings = OllamaEmbeddings(model="nomic-embed-text")
+    # embeddings = SentenceTransformerEmbeddings(model_name = "sentence-transformers/all-MiniLM-L6-v2")
+    embeddings = SentenceTransformerEmbeddings(model_name = "Snowflake/snowflake-arctic-embed-m")
     #embeddings = OllamaEmbeddings(model="llama2:7b")
     docsearch = await cl.make_async(Chroma.from_texts)(
         texts, embeddings, metadatas=metadatas
     )
+    cl.user_session.set("docsearch", docsearch)
     # Let the user know that the system is ready
     msg.content = f"Processing `{file.name}` done. You can now ask questions!"
     await msg.update()
+    await setup_agent(settings)
+@cl.on_settings_update
+async def setup_agent(settings):
+    user_env = cl.user_session.get("env")
+    os.environ["GROQ_API_KEY"] = user_env.get("GROQ_API_KEY")
+    memory=get_memory()
+    docsearch = cl.user_session.get("docsearch")
+    msg = cl.Message(content = f"You are using `{settings['Model']}` as LLM. You can change model in `Settings Panel` in the chat box.")
+    await msg.send()
     memory=get_memory()
+    # Create a chain that uses the Chroma vector stores
     chain = ConversationalRetrievalChain.from_llm(
         llm = ChatGroq(model=settings["Model"]),
         chain_type="stuff",
         else:
             answer += "\nNo sources found"
     #return results
+    await cl.Message(content=answer, elements=text_elements).send()
+@cl.on_stop
+def on_stop():
+    print("The user wants to stop the task!")
+    docsearch = cl.user_session.get("docsearch")
+    docsearch.delete_collection()
+@cl.on_chat_end
+def on_chat_end():
+    print("The user disconnected!")
+    docsearch = cl.user_session.get("docsearch")
+    docsearch.delete_collection()

assets/conversational_rag_architecture.gif ADDED Viewed

chainlit.md ADDED Viewed

	@@ -0,0 +1,6 @@

+# Welcome to GroqDoc!
+## Useful Links 🔗
+- **Groq API KEY:** Generate Groq API Key for free [Groq API Key](https://console.groq.com/keys) 📚

requirements.txt CHANGED Viewed

@@ -5,4 +5,3 @@ PyPDF2
 chromadb
 groq
 langchain-groq
-sentence-transformers

 chromadb
 groq
 langchain-groq