Spaces:

dl4ds
/

dl4ds_tutor

Build error

App Files Files Community

Farid Karimli commited on Aug 2

Commit

1052297

•

2 Parent(s): c26167a ccfbb8c

Merge branch 'dev_branch' into text_extraction

Browse files

Files changed (10) hide show

.github/workflows/push_to_hf_space_prototype.yml +14 -13
.vscode/launch.json +35 -0
.vscode/tasks.json +13 -0
code/.chainlit/translations/en-US.json +0 -229
code/main.py +18 -23
code/modules/chat/chat_model_loader.py +13 -0
code/modules/chat/helpers.py +5 -0
code/modules/config/config.yml +4 -1
code/modules/config/constants.py +1 -1
code/modules/dataloader/data_loader.py +1 -1

.github/workflows/push_to_hf_space_prototype.yml CHANGED Viewed

@@ -1,20 +1,21 @@
 name: Push Prototype to HuggingFace
 on:
-  pull_request:
-    branches:
-    - dev_branch
 jobs:
-  build:
     runs-on: ubuntu-latest
     steps:
-    - name: Deploy Prototype to HuggingFace
-      uses: nateraw/huggingface-sync-action@v0.0.4
-      with:
-        github_repo_id: DL4DS/dl4ds_tutor
-        huggingface_repo_id: dl4ds/tutor_dev
-        repo_type: space
-        space_sdk: static
-        hf_token: ${{ secrets.HF_TOKEN }}

 name: Push Prototype to HuggingFace
 on:
+  push:
+    branches: [dev_branch]
+  # run this workflow manuall from the Actions tab
+  workflow_dispatch:
 jobs:
+  sync-to-hub:
     runs-on: ubuntu-latest
     steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+          lfs: true
+      - name: Deploy Prototype to HuggingFace
+        env:
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
+        run: git push https://trgardos:$HF_TOKEN@huggingface.co/spaces/dl4ds/tutor_dev dev_branch:main

.vscode/launch.json ADDED Viewed

	@@ -0,0 +1,35 @@

+{
+    // Use IntelliSense to learn about possible attributes.
+    // Hover to view descriptions of existing attributes.
+    // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
+    "version": "0.2.0",
+    "configurations": [
+          {
+            "name": "Python Debugger: Chainlit run main.py",
+            "type": "debugpy",
+            "request": "launch",
+            "program": "${workspaceFolder}/.venv/bin/chainlit",
+            "console": "integratedTerminal",
+            "args": ["run", "main.py"],
+            "cwd": "${workspaceFolder}/code",
+            "justMyCode": true
+        },
+        {   "name":"Python Debugger: Module store_manager",
+            "type":"debugpy",
+            "request":"launch",
+            "module":"modules.vectorstore.store_manager",
+            "env": {"PYTHONPATH": "${workspaceFolder}/code"},
+            "cwd": "${workspaceFolder}/code",
+            "justMyCode": true
+        },
+        {
+            "name": "Python Debugger: Module data_loader",
+            "type": "debugpy",
+            "request": "launch",
+            "module": "modules.dataloader.data_loader",
+            "env": {"PYTHONPATH": "${workspaceFolder}/code"},
+            "cwd": "${workspaceFolder}/code",
+            "justMyCode": true
+        }
+    ]
+}

.vscode/tasks.json ADDED Viewed

	@@ -0,0 +1,13 @@

+{
+    // See https://go.microsoft.com/fwlink/?LinkId=733558
+    // for the documentation about the tasks.json format
+    "version": "2.0.0",
+    "tasks": [
+        {
+            "label": "echo",
+            "type": "shell",
+            "command": "echo ${workspaceFolder}; ls ${workspaceFolder}/code",
+            "problemMatcher": []
+        }
+    ]
+}

code/.chainlit/translations/en-US.json DELETED Viewed

@@ -1,229 +0,0 @@
-{
-    "components": {
-        "atoms": {
-            "buttons": {
-                "userButton": {
-                    "menu": {
-                        "settings": "Settings",
-                        "settingsKey": "S",
-                        "APIKeys": "API Keys",
-                        "logout": "Logout"
-                    }
-                }
-            }
-        },
-        "molecules": {
-            "newChatButton": {
-                "newChat": "New Chat"
-            },
-            "tasklist": {
-                "TaskList": {
-                    "title": "\ud83d\uddd2\ufe0f Task List",
-                    "loading": "Loading...",
-                    "error": "An error occurred"
-                }
-            },
-            "attachments": {
-                "cancelUpload": "Cancel upload",
-                "removeAttachment": "Remove attachment"
-            },
-            "newChatDialog": {
-                "createNewChat": "Create new chat?",
-                "clearChat": "This will clear the current messages and start a new chat.",
-                "cancel": "Cancel",
-                "confirm": "Confirm"
-            },
-            "settingsModal": {
-                "settings": "Settings",
-                "expandMessages": "Expand Messages",
-                "hideChainOfThought": "Hide Chain of Thought",
-                "darkMode": "Dark Mode"
-            },
-            "detailsButton": {
-                "using": "Using",
-                "used": "Used"
-            },
-            "auth": {
-                "authLogin": {
-                    "title": "Login to access the app.",
-                    "form": {
-                        "email": "Email address",
-                        "password": "Password",
-                        "noAccount": "Don't have an account?",
-                        "alreadyHaveAccount": "Already have an account?",
-                        "signup": "Sign Up",
-                        "signin": "Sign In",
-                        "or": "OR",
-                        "continue": "Continue",
-                        "forgotPassword": "Forgot password?",
-                        "passwordMustContain": "Your password must contain:",
-                        "emailRequired": "email is a required field",
-                        "passwordRequired": "password is a required field"
-                    },
-                    "error": {
-                        "default": "Unable to sign in.",
-                        "signin": "Try signing in with a different account.",
-                        "oauthsignin": "Try signing in with a different account.",
-                        "redirect_uri_mismatch": "The redirect URI is not matching the oauth app configuration.",
-                        "oauthcallbackerror": "Try signing in with a different account.",
-                        "oauthcreateaccount": "Try signing in with a different account.",
-                        "emailcreateaccount": "Try signing in with a different account.",
-                        "callback": "Try signing in with a different account.",
-                        "oauthaccountnotlinked": "To confirm your identity, sign in with the same account you used originally.",
-                        "emailsignin": "The e-mail could not be sent.",
-                        "emailverify": "Please verify your email, a new email has been sent.",
-                        "credentialssignin": "Sign in failed. Check the details you provided are correct.",
-                        "sessionrequired": "Please sign in to access this page."
-                    }
-                },
-                "authVerifyEmail": {
-                    "almostThere": "You're almost there! We've sent an email to ",
-                    "verifyEmailLink": "Please click on the link in that email to complete your signup.",
-                    "didNotReceive": "Can't find the email?",
-                    "resendEmail": "Resend email",
-                    "goBack": "Go Back",
-                    "emailSent": "Email sent successfully.",
-                    "verifyEmail": "Verify your email address"
-                },
-                "providerButton": {
-                    "continue": "Continue with {{provider}}",
-                    "signup": "Sign up with {{provider}}"
-                },
-                "authResetPassword": {
-                    "newPasswordRequired": "New password is a required field",
-                    "passwordsMustMatch": "Passwords must match",
-                    "confirmPasswordRequired": "Confirm password is a required field",
-                    "newPassword": "New password",
-                    "confirmPassword": "Confirm password",
-                    "resetPassword": "Reset Password"
-                },
-                "authForgotPassword": {
-                    "email": "Email address",
-                    "emailRequired": "email is a required field",
-                    "emailSent": "Please check the email address {{email}} for instructions to reset your password.",
-                    "enterEmail": "Enter your email address and we will send you instructions to reset your password.",
-                    "resendEmail": "Resend email",
-                    "continue": "Continue",
-                    "goBack": "Go Back"
-                }
-            }
-        },
-        "organisms": {
-            "chat": {
-                "history": {
-                    "index": {
-                        "showHistory": "Show history",
-                        "lastInputs": "Last Inputs",
-                        "noInputs": "Such empty...",
-                        "loading": "Loading..."
-                    }
-                },
-                "inputBox": {
-                    "input": {
-                        "placeholder": "Type your message here..."
-                    },
-                    "speechButton": {
-                        "start": "Start recording",
-                        "stop": "Stop recording"
-                    },
-                    "SubmitButton": {
-                        "sendMessage": "Send message",
-                        "stopTask": "Stop Task"
-                    },
-                    "UploadButton": {
-                        "attachFiles": "Attach files"
-                    },
-                    "waterMark": {
-                        "text": "Built with"
-                    }
-                },
-                "Messages": {
-                    "index": {
-                        "running": "Running",
-                        "executedSuccessfully": "executed successfully",
-                        "failed": "failed",
-                        "feedbackUpdated": "Feedback updated",
-                        "updating": "Updating"
-                    }
-                },
-                "dropScreen": {
-                    "dropYourFilesHere": "Drop your files here"
-                },
-                "index": {
-                    "failedToUpload": "Failed to upload",
-                    "cancelledUploadOf": "Cancelled upload of",
-                    "couldNotReachServer": "Could not reach the server",
-                    "continuingChat": "Continuing previous chat"
-                },
-                "settings": {
-                    "settingsPanel": "Settings panel",
-                    "reset": "Reset",
-                    "cancel": "Cancel",
-                    "confirm": "Confirm"
-                }
-            },
-            "threadHistory": {
-                "sidebar": {
-                    "filters": {
-                        "FeedbackSelect": {
-                            "feedbackAll": "Feedback: All",
-                            "feedbackPositive": "Feedback: Positive",
-                            "feedbackNegative": "Feedback: Negative"
-                        },
-                        "SearchBar": {
-                            "search": "Search"
-                        }
-                    },
-                    "DeleteThreadButton": {
-                        "confirmMessage": "This will delete the thread as well as it's messages and elements.",
-                        "cancel": "Cancel",
-                        "confirm": "Confirm",
-                        "deletingChat": "Deleting chat",
-                        "chatDeleted": "Chat deleted"
-                    },
-                    "index": {
-                        "pastChats": "Past Chats"
-                    },
-                    "ThreadList": {
-                        "empty": "Empty...",
-                        "today": "Today",
-                        "yesterday": "Yesterday",
-                        "previous7days": "Previous 7 days",
-                        "previous30days": "Previous 30 days"
-                    },
-                    "TriggerButton": {
-                        "closeSidebar": "Close sidebar",
-                        "openSidebar": "Open sidebar"
-                    }
-                },
-                "Thread": {
-                    "backToChat": "Go back to chat",
-                    "chatCreatedOn": "This chat was created on"
-                }
-            },
-            "header": {
-                "chat": "Chat",
-                "readme": "Readme"
-            }
-        }
-    },
-    "hooks": {
-        "useLLMProviders": {
-            "failedToFetchProviders": "Failed to fetch providers:"
-        }
-    },
-    "pages": {
-        "Design": {},
-        "Env": {
-            "savedSuccessfully": "Saved successfully",
-            "requiredApiKeys": "Required API Keys",
-            "requiredApiKeysInfo": "To use this app, the following API keys are required. The keys are stored on your device's local storage."
-        },
-        "Page": {
-            "notPartOfProject": "You are not part of this project."
-        },
-        "ResumeButton": {
-            "resumeChat": "Resume Chat"
-        }
-    }
-}

code/main.py CHANGED Viewed

@@ -17,6 +17,7 @@ from modules.chat.helpers import (
     get_sources,
     get_history_chat_resume,
     get_history_setup_llm,
 )
 import copy
 from typing import Optional
@@ -55,7 +56,7 @@ class Chatbot:
         """
         self.config = config
-    def _load_config(self):
         """
         Load the configuration from a YAML file.
         """
@@ -277,7 +278,7 @@ class Chatbot:
         rename_dict = {"Chatbot": "AI Tutor"}
         return rename_dict.get(orig_author, orig_author)
-    async def start(self):
         """
         Start the chatbot, initialize settings widgets,
         and display and load previous conversation if chat logging is enabled.
@@ -285,6 +286,12 @@ class Chatbot:
         start_time = time.time()
         await self.make_llm_settings_widgets(self.config)
         user = cl.user_session.get("user")
         self.user = {
@@ -370,25 +377,6 @@ class Chatbot:
         answer = res.get("answer", res.get("result"))
-        if cl_data._data_layer is not None:
-            with cl_data._data_layer.client.step(
-                type="run",
-                name="step_info",
-                thread_id=cl.context.session.thread_id,
-                # tags=self.tags,
-            ) as step:
-                step.input = {"question": user_query_dict["input"]}
-                step.output = {
-                    "chat_history": res.get("chat_history"),
-                    "context": res.get("context"),
-                    "answer": answer,
-                    "rephrase_prompt": res.get("rephrase_prompt"),
-                    "qa_prompt": res.get("qa_prompt"),
-                }
-                step.metadata = self.config
         answer_with_sources, source_elements, sources_dict = get_sources(
             res, answer, stream=stream, view_sources=view_sources
         )
@@ -425,14 +413,21 @@ class Chatbot:
             elements=source_elements,
             author=LLM,
             actions=actions,
         ).send()
     async def on_chat_resume(self, thread: ThreadDict):
         steps = thread["steps"]
-        k = self.config["llm_params"]["memory_window"]
         conversation_list = get_history_chat_resume(steps, k, SYSTEM, LLM)
         cl.user_session.set("memory", conversation_list)
-        await self.start()
     @cl.oauth_callback
     def auth_callback(

     get_sources,
     get_history_chat_resume,
     get_history_setup_llm,
+    get_last_config,
 )
 import copy
 from typing import Optional
         """
         self.config = config
+    async def _load_config(self):
         """
         Load the configuration from a YAML file.
         """
         rename_dict = {"Chatbot": "AI Tutor"}
         return rename_dict.get(orig_author, orig_author)
+    async def start(self, config=None):
         """
         Start the chatbot, initialize settings widgets,
         and display and load previous conversation if chat logging is enabled.
         start_time = time.time()
+        self.config = (
+            await self._load_config() if config is None else config
+        )  # Reload the configuration on chat resume
+        await self.make_llm_settings_widgets(self.config)  # Reload the settings widgets
         await self.make_llm_settings_widgets(self.config)
         user = cl.user_session.get("user")
         self.user = {
         answer = res.get("answer", res.get("result"))
         answer_with_sources, source_elements, sources_dict = get_sources(
             res, answer, stream=stream, view_sources=view_sources
         )
             elements=source_elements,
             author=LLM,
             actions=actions,
+            metadata=self.config,
         ).send()
     async def on_chat_resume(self, thread: ThreadDict):
+        thread_config = None
         steps = thread["steps"]
+        k = self.config["llm_params"][
+            "memory_window"
+        ]  # on resume, alwyas use the default memory window
         conversation_list = get_history_chat_resume(steps, k, SYSTEM, LLM)
+        thread_config = get_last_config(
+            steps
+        )  # TODO: Returns None for now - which causes config to be reloaded with default values
         cl.user_session.set("memory", conversation_list)
+        await self.start(config=thread_config)
     @cl.oauth_callback
     def auth_callback(

code/modules/chat/chat_model_loader.py CHANGED Viewed

@@ -5,6 +5,8 @@ from langchain_community.llms import LlamaCpp
 import torch
 import transformers
 import os
 from langchain.callbacks.manager import CallbackManager
 from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
 from modules.config.constants import LLAMA_PATH
@@ -15,6 +17,14 @@ class ChatModelLoader:
         self.config = config
         self.huggingface_token = os.getenv("HUGGINGFACEHUB_API_TOKEN")
     def load_chat_model(self):
         if self.config["llm_params"]["llm_loader"] in [
             "gpt-3.5-turbo-1106",
@@ -24,6 +34,9 @@ class ChatModelLoader:
             llm = ChatOpenAI(model_name=self.config["llm_params"]["llm_loader"])
         elif self.config["llm_params"]["llm_loader"] == "local_llm":
             n_batch = 512  # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.
             llm = LlamaCpp(
                 model_path=LLAMA_PATH,
                 n_batch=n_batch,

 import torch
 import transformers
 import os
+from pathlib import Path
+from huggingface_hub import hf_hub_download
 from langchain.callbacks.manager import CallbackManager
 from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
 from modules.config.constants import LLAMA_PATH
         self.config = config
         self.huggingface_token = os.getenv("HUGGINGFACEHUB_API_TOKEN")
+    def _verify_model_cache(self, model_cache_path):
+        hf_hub_download(
+            repo_id=self.config["llm_params"]["local_llm_params"]["repo_id"],
+            filename=self.config["llm_params"]["local_llm_params"]["filename"],
+            cache_dir=model_cache_path,
+        )
+        return str(list(Path(model_cache_path).glob("*/snapshots/*/*.gguf"))[0])
     def load_chat_model(self):
         if self.config["llm_params"]["llm_loader"] in [
             "gpt-3.5-turbo-1106",
             llm = ChatOpenAI(model_name=self.config["llm_params"]["llm_loader"])
         elif self.config["llm_params"]["llm_loader"] == "local_llm":
             n_batch = 512  # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.
+            model_path = self._verify_model_cache(
+                self.config["llm_params"]["local_llm_params"]["model"]
+            )
             llm = LlamaCpp(
                 model_path=LLAMA_PATH,
                 n_batch=n_batch,

code/modules/chat/helpers.py CHANGED Viewed

@@ -162,3 +162,8 @@ def get_history_setup_llm(memory_list):
             raise ValueError("Invalid message type")
     return conversation_list

             raise ValueError("Invalid message type")
     return conversation_list
+def get_last_config(steps):
+    # TODO: Implement this function
+    return None

code/modules/config/config.yml CHANGED Viewed

@@ -35,6 +35,9 @@ llm_params:
     temperature: 0.7 # float
   local_llm_params:
     temperature: 0.7 # float
   stream: False # bool
   pdf_reader: 'gpt' # str [llama, pymupdf, gpt]
@@ -54,4 +57,4 @@ splitter_options:
   chunk_separators : ["\n\n", "\n", " ", ""] # list of strings
   front_chunks_to_remove : null # int or None
   last_chunks_to_remove : null # int or None
-  delimiters_to_remove : ['\t', '\n', '   ', '  '] # list of strings

     temperature: 0.7 # float
   local_llm_params:
     temperature: 0.7 # float
+    repo_id: 'TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF' # HuggingFace repo id
+    filename: 'tinyllama-1.1b-chat-v1.0.Q5_0.gguf' # Specific name of gguf file in the repo
+  pdf_reader: 'pymupdf' # str [llama, pymupdf, gpt]
   stream: False # bool
   pdf_reader: 'gpt' # str [llama, pymupdf, gpt]
   chunk_separators : ["\n\n", "\n", " ", ""] # list of strings
   front_chunks_to_remove : null # int or None
   last_chunks_to_remove : null # int or None
+  delimiters_to_remove : ['\t', '\n', '   ', '  '] # list of strings

code/modules/config/constants.py CHANGED Viewed

@@ -18,6 +18,6 @@ opening_message = f"Hey, What Can I Help You With?\n\nYou can me ask me question
 # Model Paths
-LLAMA_PATH = "../storage/models/tinyllama-1.1b-chat-v1.0.Q5_K_M.gguf"
 RETRIEVER_HF_PATHS = {"RAGatouille": "XThomasBU/Colbert_Index"}

 # Model Paths
+LLAMA_PATH = "../storage/models/tinyllama"
 RETRIEVER_HF_PATHS = {"RAGatouille": "XThomasBU/Colbert_Index"}

code/modules/dataloader/data_loader.py CHANGED Viewed

@@ -98,7 +98,6 @@ class FileReader:
         self.web_reader = HTMLReader()
         self.logger.info(f"Initialized FileReader with {kind} PDF reader and HTML reader")
     def extract_text_from_pdf(self, pdf_path):
         text = ""
         with open(pdf_path, "rb") as file:
@@ -315,6 +314,7 @@ class ChunkProcessor:
             return
         try:
             if file_path in self.document_data:
                 self.logger.warning(f"File {file_name} already processed")
                 documents = [Document(page_content=content) for content in self.document_data[file_path].values()]

         self.web_reader = HTMLReader()
         self.logger.info(f"Initialized FileReader with {kind} PDF reader and HTML reader")
     def extract_text_from_pdf(self, pdf_path):
         text = ""
         with open(pdf_path, "rb") as file:
             return
         try:
             if file_path in self.document_data:
                 self.logger.warning(f"File {file_name} already processed")
                 documents = [Document(page_content=content) for content in self.document_data[file_path].values()]