Spaces:

dl4ds
/

dl4ds_tutor

Build error

App Files Files Community

XThomasBU commited on Feb 6

Commit

db6b619

•

1 Parent(s): ce9ef3e

improvements and docs

Browse files

Files changed (9) hide show

README.md +14 -0
code/.chainlit/config.toml +0 -84
code/config.yml +1 -0
code/modules/llm_tutor.py +4 -3
code/vectorstores/db_FAISS_sentence-transformers/all-MiniLM-L6-v2/index.faiss +0 -0
code/vectorstores/db_FAISS_sentence-transformers/all-MiniLM-L6-v2/index.pkl +0 -0
code/vectorstores/db_FAISS_text-embedding-ada-002/index.faiss +0 -0
code/vectorstores/db_FAISS_text-embedding-ada-002/index.pkl +0 -0
docs/README.md +41 -1

README.md CHANGED Viewed

@@ -12,3 +12,17 @@ DL4DS Tutor
 ===========
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ===========
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
+To run locally,
+Clone the repository from: https://github.com/DL4DS/dl4ds_tutor
+Put your data under the `storage/data` directory. Note: You can add urls in the urls.txt file, and other pdf files in the `storage/data` directory.
+To create the Vector Database, run the following command:
+```python code/modules/vector_db.py```
+To run the chainlit app, run the following command:
+```chainlit run code/main.py```
+See the [docs](https://github.com/DL4DS/dl4ds_tutor/tree/main/docs) for more information.

code/.chainlit/config.toml DELETED Viewed

@@ -1,84 +0,0 @@
-[project]
-# Whether to enable telemetry (default: true). No personal data is collected.
-enable_telemetry = true
-# List of environment variables to be provided by each user to use the app.
-user_env = []
-# Duration (in seconds) during which the session is saved when the connection is lost
-session_timeout = 3600
-# Enable third parties caching (e.g LangChain cache)
-cache = false
-# Follow symlink for asset mount (see https://github.com/Chainlit/chainlit/issues/317)
-# follow_symlink = false
-[features]
-# Show the prompt playground
-prompt_playground = true
-# Process and display HTML in messages. This can be a security risk (see https://stackoverflow.com/questions/19603097/why-is-it-dangerous-to-render-user-generated-html-or-javascript)
-unsafe_allow_html = false
-# Process and display mathematical expressions. This can clash with "$" characters in messages.
-latex = false
-# Authorize users to upload files with messages
-multi_modal = true
-# Allows user to use speech to text
-[features.speech_to_text]
-    enabled = false
-    # See all languages here https://github.com/JamesBrill/react-speech-recognition/blob/HEAD/docs/API.md#language-string
-    # language = "en-US"
-[UI]
-# Name of the app and chatbot.
-name = "LLM Tutor"
-# Show the readme while the conversation is empty.
-show_readme_as_default = true
-# Description of the app and chatbot. This is used for HTML tags.
-# description = ""
-# Large size content are by default collapsed for a cleaner ui
-default_collapse_content = true
-# The default value for the expand messages settings.
-default_expand_messages = false
-# Hide the chain of thought details from the user in the UI.
-hide_cot = false
-# Link to your github repo. This will add a github button in the UI's header.
-# github = "https://github.com/DL4DS/dl4ds_tutor"
-# Specify a CSS file that can be used to customize the user interface.
-# The CSS file can be served from the public directory or via an external link.
-# custom_css = "/public/test.css"
-# Override default MUI light theme. (Check theme.ts)
-[UI.theme.light]
-    #background = "#FAFAFA"
-    #paper = "#FFFFFF"
-    [UI.theme.light.primary]
-        #main = "#F80061"
-        #dark = "#980039"
-        #light = "#FFE7EB"
-# Override default MUI dark theme. (Check theme.ts)
-[UI.theme.dark]
-    #background = "#FAFAFA"
-    #paper = "#FFFFFF"
-    [UI.theme.dark.primary]
-        #main = "#F80061"
-        #dark = "#980039"
-        #light = "#FFE7EB"
-[meta]
-generated_by = "0.7.700"

code/config.yml CHANGED Viewed

@@ -10,6 +10,7 @@ embedding_options:
   search_top_k : 3 # int
 llm_params:
   use_history: False # bool
   llm_loader: 'local_llm' # str [local_llm, openai]
   openai_params:
     model: 'gpt-4' # str [gpt-3.5-turbo-1106, gpt-4]

   search_top_k : 3 # int
 llm_params:
   use_history: False # bool
+  memory_window: 3 # int
   llm_loader: 'local_llm' # str [local_llm, openai]
   openai_params:
     model: 'gpt-4' # str [gpt-3.5-turbo-1106, gpt-4]

code/modules/llm_tutor.py CHANGED Viewed

@@ -5,7 +5,7 @@ from langchain_community.embeddings import OpenAIEmbeddings
 from langchain.vectorstores import FAISS
 from langchain.chains import RetrievalQA, ConversationalRetrievalChain
 from langchain.llms import CTransformers
-from langchain.memory import ConversationBufferMemory
 from langchain.chains.conversational_retrieval.prompts import QA_PROMPT
 import os
@@ -35,8 +35,9 @@ class LLMTutor:
     # Retrieval QA Chain
     def retrieval_qa_chain(self, llm, prompt, db):
         if self.config["llm_params"]["use_history"]:
-            memory = ConversationBufferMemory(
-                memory_key="chat_history", return_messages=True, output_key="answer"
             )
             qa_chain = ConversationalRetrievalChain.from_llm(
                 llm=llm,

 from langchain.vectorstores import FAISS
 from langchain.chains import RetrievalQA, ConversationalRetrievalChain
 from langchain.llms import CTransformers
+from langchain.memory import ConversationBufferWindowMemory
 from langchain.chains.conversational_retrieval.prompts import QA_PROMPT
 import os
     # Retrieval QA Chain
     def retrieval_qa_chain(self, llm, prompt, db):
         if self.config["llm_params"]["use_history"]:
+            memory = ConversationBufferWindowMemory(
+            k = self.config["llm_params"]["memory_window"],
+            memory_key="chat_history", return_messages=True, output_key="answer"
             )
             qa_chain = ConversationalRetrievalChain.from_llm(
                 llm=llm,

code/vectorstores/db_FAISS_sentence-transformers/all-MiniLM-L6-v2/index.faiss DELETED Viewed

Binary file (6.19 kB)

code/vectorstores/db_FAISS_sentence-transformers/all-MiniLM-L6-v2/index.pkl DELETED Viewed

Binary file (9.21 kB)

code/vectorstores/db_FAISS_text-embedding-ada-002/index.faiss DELETED Viewed

Binary file (24.6 kB)

code/vectorstores/db_FAISS_text-embedding-ada-002/index.pkl DELETED Viewed

Binary file (9.21 kB)

docs/README.md CHANGED Viewed

@@ -1,3 +1,43 @@
 # Documentation
-To be updated

 # Documentation
+## File Structure:
+- `docs/` - Documentation files
+- `code/` - Code files
+- `storage/` - Storage files
+- `vectorstores/` - Vector Databases
+- `.env` - Environment Variables
+- `Dockerfile` - Dockerfile for Hugging Face
+- `.chainlit` - Chainlit Configuration
+- `chainlit.md` - Chainlit README
+- `README.md` - Repository README
+- `.gitignore` - Gitignore file
+- `requirements.txt` - Python Requirements
+- `.gitattributes` - Gitattributes file
+## Code Structure
+- `code/main.py` - Main Chainlit App
+- `code/config.yaml` - Configuration File to set Embedding related, Vector Database related, and Chat Model related parameters.
+- `code/modules/vector_db.py` - Vector Database Creation
+- `code/modules/chat_model_loader.py` - Chat Model Loader (Creates the Chat Model)
+- `code/modules/constants.py` - Constants (Loads the Environment Variables, Prompts, Model Paths, etc.)
+- `code/modules/data_loader.py` - Loads and Chunks the Data
+- `code/modules/embedding_model.py` - Creates the Embedding Model to Embed the Data
+- `code/modules/llm_tutor.py` - Creates the RAG LLM Tutor
+    - The Function `qa_bot()` loads the vector database and the chat model, and sets the prompt to pass to the chat model.
+- `code/modules/helpers.py` - Helper Functions
+## Storage and Vectorstores
+- `storage/data/` - Data Storage (Put your pdf files under this directory, and urls in the urls.txt file)
+- `storage/models/` - Model Storage (Put your local LLMs under this directory)
+- `vectorstores/` - Vector Databases (Stores the Vector Databases generated from `code/modules/vector_db.py`)
+## Useful Configurations
+set these in `code/config.yaml`:
+* ``["embedding_options"]["expand_urls"]`` - If set to True, gets and reads the data from all the links under the url provided. If set to False, only reads the data in the url provided.
+* ``["embedding_options"]["search_top_k"]`` - Number of sources that the retriever returns
+* ``["llm_params]["use_history"]`` - Whether to use history in the prompt or not
+* ``["llm_params]["memory_window"]`` - Number of interactions to keep a track of in the history