diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml new file mode 100644 index 0000000000000000000000000000000000000000..0d679e7528e74d3314fad16c0a87b39f611f3e6e --- /dev/null +++ b/.github/workflows/publish.yml @@ -0,0 +1,29 @@ +name: Publish to PyPI + +on: + push: + branches: + - main + tags: + - "v*" + +jobs: + deploy: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: "3.x" + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install setuptools wheel twine + - name: Build and publish + env: + TWINE_USERNAME: __token__ + TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }} # from pypi + run: | + python setup.py sdist bdist_wheel + twine upload dist/* diff --git a/.github/workflows/push_to_hf_space.yml b/.github/workflows/push_to_hf_space.yml index 7ede88475859295c5232ca6e3d55cae28ac221cf..a58a5bef43ae8685424b1cafcdc452b286d4fa45 100644 --- a/.github/workflows/push_to_hf_space.yml +++ b/.github/workflows/push_to_hf_space.yml @@ -18,4 +18,4 @@ jobs: - name: Deploy Production (main) to HuggingFace env: HF_TOKEN: ${{ secrets.HF_TOKEN }} - run: git push --force https://trgardos:$HF_TOKEN@huggingface.co/spaces/dl4ds/dl4ds_tutor main:main + run: git push --force https://trgardos:$HF_TOKEN@huggingface.co/spaces/edubotics/dl4ds_tutor main:main diff --git a/.vscode/launch.json b/.vscode/launch.json index 519df5be8a3481b69aa147c9fd083ce0470d4494..4cd291155f8a1255252f67255d2a326f1e55b1a3 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -17,7 +17,7 @@ { "name":"Python Debugger: Module store_manager", "type":"debugpy", "request":"launch", - "module":"modules.vectorstore.store_manager", + "module":"edubotics_core.vectorstore.store_manager", "env": {"PYTHONPATH": "${workspaceFolder}/code"}, "cwd": "${workspaceFolder}/code", "justMyCode": true @@ -26,7 +26,7 @@ "name": "Python Debugger: Module data_loader", "type": "debugpy", "request": "launch", - "module": "modules.dataloader.data_loader", + "module": "edubotics_core.dataloader.data_loader", "env": {"PYTHONPATH": "${workspaceFolder}/code"}, "cwd": "${workspaceFolder}/code", "justMyCode": true diff --git a/Dockerfile b/Dockerfile index cc7b53b213e0afe5d067b679e8e39e5a7540a56e..3cf5430acc91d9fe94c12a73b4061727200c34f8 100644 --- a/Dockerfile +++ b/Dockerfile @@ -40,4 +40,4 @@ RUN --mount=type=secret,id=LITERAL_API_KEY_LOGGING,mode=0444,required=true RUN --mount=type=secret,id=CHAINLIT_AUTH_SECRET,mode=0444,required=true # Default command to run the application -CMD python -m modules.vectorstore.store_manager --config_file config/config.yml --project_config_file config/project_config.yml && python -m uvicorn app:app --host 0.0.0.0 --port 7860 \ No newline at end of file +CMD python -m edubotics_core.vectorstore.store_manager --config_file config/config.yml --project_config_file config/project_config.yml && python -m uvicorn app:app --host 0.0.0.0 --port 7860 \ No newline at end of file diff --git a/Dockerfile.dev b/Dockerfile.dev index c63abdafe2434209a44ac26ddef5794c456a207a..31e4c0cf434e0f2752529d546088a2355ee6c023 100644 --- a/Dockerfile.dev +++ b/Dockerfile.dev @@ -35,4 +35,4 @@ RUN ls -R /code EXPOSE 7860 # Default command to run the application -CMD python -m modules.vectorstore.store_manager --config_file config/config.yml --project_config_file config/project_config.yml && python -m uvicorn app:app --host 0.0.0.0 --port 7860 \ No newline at end of file +CMD python -m edubotics_core.vectorstore.store_manager --config_file config/config.yml --project_config_file config/project_config.yml && python -m uvicorn app:app --host 0.0.0.0 --port 7860 \ No newline at end of file diff --git a/README.md b/README.md index 13334f42e14510989b771a2f342ffbf749be08eb..80b3aed95e11ab07c74bbf82c02944d0abf12a4d 100644 --- a/README.md +++ b/README.md @@ -9,9 +9,9 @@ app_port: 7860 --- # DL4DS Tutor 🏃 -![Build Status](https://github.com/DL4DS/dl4ds_tutor/actions/workflows/push_to_hf_space.yml/badge.svg) -![License](https://img.shields.io/github/license/DL4DS/dl4ds_tutor) -![GitHub stars](https://img.shields.io/github/stars/DL4DS/dl4ds_tutor) +![Build Status](https://github.com/edubotics-ai/edubot-core/actions/workflows/push_to_hf_space.yml/badge.svg) +![License](https://img.shields.io/github/license/edubotics-ai/edubot-core) +![GitHub stars](https://img.shields.io/github/stars/edubotics-ai/edubot-core) ![PRs Welcome](https://img.shields.io/badge/PRs-welcome-brightgreen.svg?style=flat-square) @@ -33,7 +33,7 @@ Please visit [setup](https://dl4ds.github.io/dl4ds_tutor/guide/setup/) for more 1. **Clone the Repository** ```bash - git clone https://github.com/DL4DS/dl4ds_tutor + git clone https://github.com/edubotics-ai/edubot-core ``` 2. Create your app in the apps folder. (An example is the `apps/ai_tutor` app) @@ -49,13 +49,13 @@ Please visit [setup](https://dl4ds.github.io/dl4ds_tutor/guide/setup/) for more 3. **To test Data Loading (Optional)** ```bash cd apps/your_app - python -m modules.dataloader.data_loader --links "your_pdf_link" --config_file config/config.yml --project_config_file config/project_config.yml + python -m edubotics_core.dataloader.data_loader --links "your_pdf_link" --config_file config/config.yml --project_config_file config/project_config.yml ``` 4. **Create the Vector Database** ```bash cd apps/your_app - python -m modules.vectorstore.store_manager --config_file config/config.yml --project_config_file config/project_config.yml + python -m edubotics_core.vectorstore.store_manager --config_file config/config.yml --project_config_file config/project_config.yml ``` 6. **Run the FastAPI App** diff --git a/apps/ai_tutor/.chainlit/config.toml b/apps/ai_tutor/.chainlit/config.toml index 4ee8911d7816e23f173acaf01b98f158bbc62d1e..e1ecdb7c9767c341423545a17f3b74aa6731176c 100644 --- a/apps/ai_tutor/.chainlit/config.toml +++ b/apps/ai_tutor/.chainlit/config.toml @@ -65,7 +65,7 @@ default_collapse_content = true cot = "hidden" # Link to your github repo. This will add a github button in the UI's header. -github = "https://github.com/DL4DS/dl4ds_tutor" +github = "https://github.com/edubotics-ai/edubot-core" # Specify a CSS file that can be used to customize the user interface. # The CSS file can be served from the public directory or via an external link. diff --git a/apps/ai_tutor/app.py b/apps/ai_tutor/app.py index e26100ec24d6f064eb5915289c8d152ac925a744..9d0a2a04686fe4cb770ae514249352721a6ef2e8 100644 --- a/apps/ai_tutor/app.py +++ b/apps/ai_tutor/app.py @@ -21,7 +21,7 @@ from helpers import ( reset_tokens_for_user, check_user_cooldown, ) -from modules.chat_processor.helpers import get_user_details, update_user_info +from edubotics_core.chat_processor.helpers import get_user_details, update_user_info from config.config_manager import config_manager import hashlib diff --git a/apps/ai_tutor/chainlit_app.py b/apps/ai_tutor/chainlit_app.py index 800b6a5a2428cb39119f728da77c25441b5a364b..ba8768a4fa710e379b6acd0e69e30732b958fa1e 100644 --- a/apps/ai_tutor/chainlit_app.py +++ b/apps/ai_tutor/chainlit_app.py @@ -4,18 +4,18 @@ from config.constants import ( LITERAL_API_KEY_LOGGING, LITERAL_API_URL, ) -from modules.chat_processor.literal_ai import CustomLiteralDataLayer +from edubotics_core.chat_processor.literal_ai import CustomLiteralDataLayer import json from typing import Any, Dict, no_type_check import chainlit as cl -from modules.chat.llm_tutor import LLMTutor -from modules.chat.helpers import ( +from edubotics_core.chat.llm_tutor import LLMTutor +from edubotics_core.chat.helpers import ( get_sources, get_history_chat_resume, get_history_setup_llm, # get_last_config, ) -from modules.chat_processor.helpers import ( +from edubotics_core.chat_processor.helpers import ( update_user_info, get_user_details, ) diff --git a/apps/ai_tutor/config/config_manager.py b/apps/ai_tutor/config/config_manager.py index df540f9d82c05eef3809ac92421eade5a8ff0516..6cc5edfe0c96efe98ab9edecceac51e2b41725b9 100644 --- a/apps/ai_tutor/config/config_manager.py +++ b/apps/ai_tutor/config/config_manager.py @@ -104,7 +104,7 @@ class TokenConfig(BaseModel): class MiscConfig(BaseModel): - github_repo: HttpUrl = "https://github.com/DL4DS/dl4ds_tutor" + github_repo: HttpUrl = "https://github.com/edubotics-ai/edubot-core" docs_website: HttpUrl = "https://dl4ds.github.io/dl4ds_tutor/" diff --git a/apps/ai_tutor/config/project_config.yml b/apps/ai_tutor/config/project_config.yml index cc593aed17ac55608408302c18ed84129bc5efe3..9dbcf1dc9b64c2f96114ee51116996d10069ed7e 100644 --- a/apps/ai_tutor/config/project_config.yml +++ b/apps/ai_tutor/config/project_config.yml @@ -13,7 +13,7 @@ token_config: all_time_tokens_allocated: 1000000 misc: - github_repo: "https://github.com/DL4DS/dl4ds_tutor" + github_repo: "https://github.com/edubotics-ai/edubot-core" docs_website: "https://dl4ds.github.io/dl4ds_tutor/" api_config: diff --git a/apps/ai_tutor/helpers.py b/apps/ai_tutor/helpers.py index 32ff5d99c51dd7b3a699ddf090cddc4a8aeafcd8..a47d81ce7f6cee0635588d0b2b5a095915321f6e 100644 --- a/apps/ai_tutor/helpers.py +++ b/apps/ai_tutor/helpers.py @@ -1,6 +1,6 @@ from datetime import datetime, timedelta, timezone import tiktoken -from modules.chat_processor.helpers import update_user_info, convert_to_dict +from edubotics_core.chat_processor.helpers import update_user_info, convert_to_dict def get_time(): diff --git a/apps/chainlit_base/chainlit_base.py b/apps/chainlit_base/chainlit_base.py index 7fa537f5be273e6c10f0e93796531e2279217164..740015fe0c5a2a75b6eff26de820b8f671513bc6 100644 --- a/apps/chainlit_base/chainlit_base.py +++ b/apps/chainlit_base/chainlit_base.py @@ -2,8 +2,8 @@ import chainlit.data as cl_data import asyncio from typing import Any, Dict, no_type_check import chainlit as cl -from modules.chat.llm_tutor import LLMTutor -from modules.chat.helpers import ( +from edubotics_core.chat.llm_tutor import LLMTutor +from edubotics_core.chat.helpers import ( get_sources, get_history_setup_llm, ) diff --git a/modules/__init__.py b/edubotics_core/__init__.py similarity index 100% rename from modules/__init__.py rename to edubotics_core/__init__.py diff --git a/modules/chat/__init__.py b/edubotics_core/chat/__init__.py similarity index 100% rename from modules/chat/__init__.py rename to edubotics_core/chat/__init__.py diff --git a/modules/chat/base.py b/edubotics_core/chat/base.py similarity index 100% rename from modules/chat/base.py rename to edubotics_core/chat/base.py diff --git a/modules/chat/chat_model_loader.py b/edubotics_core/chat/chat_model_loader.py similarity index 100% rename from modules/chat/chat_model_loader.py rename to edubotics_core/chat/chat_model_loader.py diff --git a/modules/chat/helpers.py b/edubotics_core/chat/helpers.py similarity index 100% rename from modules/chat/helpers.py rename to edubotics_core/chat/helpers.py diff --git a/modules/chat/langchain/__init__.py b/edubotics_core/chat/langchain/__init__.py similarity index 100% rename from modules/chat/langchain/__init__.py rename to edubotics_core/chat/langchain/__init__.py diff --git a/modules/chat/langchain/langchain_rag.py b/edubotics_core/chat/langchain/langchain_rag.py similarity index 98% rename from modules/chat/langchain/langchain_rag.py rename to edubotics_core/chat/langchain/langchain_rag.py index 5f469b516807333876d35c80a450b68bb2743bbd..70b3120c2ed2eda7f253044f2f9dccc1ebfb9354 100644 --- a/modules/chat/langchain/langchain_rag.py +++ b/edubotics_core/chat/langchain/langchain_rag.py @@ -1,8 +1,8 @@ from langchain_core.prompts import ChatPromptTemplate -# from modules.chat.langchain.utils import +# from edubotics_core.chat.langchain.utils import from langchain_community.chat_message_histories import ChatMessageHistory -from modules.chat.base import BaseRAG +from edubotics_core.chat.base import BaseRAG from langchain_core.prompts import PromptTemplate from langchain.memory import ConversationBufferWindowMemory from langchain_core.runnables.utils import ConfigurableFieldSpec diff --git a/modules/chat/langchain/utils.py b/edubotics_core/chat/langchain/utils.py similarity index 100% rename from modules/chat/langchain/utils.py rename to edubotics_core/chat/langchain/utils.py diff --git a/modules/chat/llm_tutor.py b/edubotics_core/chat/llm_tutor.py similarity index 94% rename from modules/chat/llm_tutor.py rename to edubotics_core/chat/llm_tutor.py index 3177e1dba2cf0ab259ab9af245d6f8ebb72751aa..4499887acb62c0430af43ae0e3d5180d8af9fe21 100644 --- a/modules/chat/llm_tutor.py +++ b/edubotics_core/chat/llm_tutor.py @@ -1,8 +1,8 @@ -from modules.chat.helpers import get_prompt -from modules.chat.chat_model_loader import ChatModelLoader -from modules.vectorstore.store_manager import VectorStoreManager -from modules.retriever.retriever import Retriever -from modules.chat.langchain.langchain_rag import ( +from edubotics_core.chat.helpers import get_prompt +from edubotics_core.chat.chat_model_loader import ChatModelLoader +from edubotics_core.vectorstore.store_manager import VectorStoreManager +from edubotics_core.retriever.retriever import Retriever +from edubotics_core.chat.langchain.langchain_rag import ( Langchain_RAG_V2, QuestionGenerator, ) diff --git a/modules/chat_processor/__init__.py b/edubotics_core/chat_processor/__init__.py similarity index 100% rename from modules/chat_processor/__init__.py rename to edubotics_core/chat_processor/__init__.py diff --git a/modules/chat_processor/helpers.py b/edubotics_core/chat_processor/helpers.py similarity index 100% rename from modules/chat_processor/helpers.py rename to edubotics_core/chat_processor/helpers.py diff --git a/modules/chat_processor/literal_ai.py b/edubotics_core/chat_processor/literal_ai.py similarity index 100% rename from modules/chat_processor/literal_ai.py rename to edubotics_core/chat_processor/literal_ai.py diff --git a/modules/config/__init__.py b/edubotics_core/config/__init__.py similarity index 100% rename from modules/config/__init__.py rename to edubotics_core/config/__init__.py diff --git a/modules/config/constants.py b/edubotics_core/config/constants.py similarity index 100% rename from modules/config/constants.py rename to edubotics_core/config/constants.py diff --git a/modules/dataloader/__init__.py b/edubotics_core/dataloader/__init__.py similarity index 100% rename from modules/dataloader/__init__.py rename to edubotics_core/dataloader/__init__.py diff --git a/modules/dataloader/data_loader.py b/edubotics_core/dataloader/data_loader.py similarity index 98% rename from modules/dataloader/data_loader.py rename to edubotics_core/dataloader/data_loader.py index c2a2fe47848ff41a688e44098c2fabf58cbb7f62..8c24ea97c0bf8b867dc078e34380532d06b2ff6a 100644 --- a/modules/dataloader/data_loader.py +++ b/edubotics_core/dataloader/data_loader.py @@ -18,11 +18,11 @@ from urllib.parse import urljoin import html2text import bs4 import PyPDF2 -from modules.dataloader.pdf_readers.base import PDFReader -from modules.dataloader.pdf_readers.llama import LlamaParser -from modules.dataloader.pdf_readers.gpt import GPTParser -from modules.dataloader.helpers import get_metadata -from modules.config.constants import TIMEOUT +from edubotics_core.dataloader.pdf_readers.base import PDFReader +from edubotics_core.dataloader.pdf_readers.llama import LlamaParser +from edubotics_core.dataloader.pdf_readers.gpt import GPTParser +from edubotics_core.dataloader.helpers import get_metadata +from edubotics_core.config.constants import TIMEOUT logger = logging.getLogger(__name__) BASE_DIR = os.getcwd() diff --git a/modules/dataloader/helpers.py b/edubotics_core/dataloader/helpers.py similarity index 99% rename from modules/dataloader/helpers.py rename to edubotics_core/dataloader/helpers.py index c7219e04fd10eb2ec3c6ff0041766eaef6dacc4a..00cd2c2d86bfbdf21d2a6e46bf1c13b3ad13aa73 100644 --- a/modules/dataloader/helpers.py +++ b/edubotics_core/dataloader/helpers.py @@ -2,7 +2,7 @@ import requests from bs4 import BeautifulSoup from urllib.parse import urlparse import tempfile -from modules.config.constants import ( +from edubotics_core.config.constants import ( TIMEOUT, ) # TODO: MOVE THIS TO APP SPECIFIC DIRECTORY diff --git a/modules/dataloader/pdf_readers/__init__.py b/edubotics_core/dataloader/pdf_readers/__init__.py similarity index 100% rename from modules/dataloader/pdf_readers/__init__.py rename to edubotics_core/dataloader/pdf_readers/__init__.py diff --git a/modules/dataloader/pdf_readers/base.py b/edubotics_core/dataloader/pdf_readers/base.py similarity index 100% rename from modules/dataloader/pdf_readers/base.py rename to edubotics_core/dataloader/pdf_readers/base.py diff --git a/modules/dataloader/pdf_readers/gpt.py b/edubotics_core/dataloader/pdf_readers/gpt.py similarity index 98% rename from modules/dataloader/pdf_readers/gpt.py rename to edubotics_core/dataloader/pdf_readers/gpt.py index 9d839ae8db526dadfc62adeadc73d441c4b4ee32..67af0ddd1eebfa9dcae5503835e503db2886cd5b 100644 --- a/modules/dataloader/pdf_readers/gpt.py +++ b/edubotics_core/dataloader/pdf_readers/gpt.py @@ -6,7 +6,7 @@ from io import BytesIO from openai import OpenAI from pdf2image import convert_from_path from langchain.schema import Document -from modules.config.constants import TIMEOUT +from edubotics_core.config.constants import TIMEOUT class GPTParser: diff --git a/modules/dataloader/pdf_readers/llama.py b/edubotics_core/dataloader/pdf_readers/llama.py similarity index 95% rename from modules/dataloader/pdf_readers/llama.py rename to edubotics_core/dataloader/pdf_readers/llama.py index 6136c96eab41b7316b85d931a9e5edf2cf0428f5..2b21fa35d76cc7812c53cd6b1649c8fc0d4b9aa2 100644 --- a/modules/dataloader/pdf_readers/llama.py +++ b/edubotics_core/dataloader/pdf_readers/llama.py @@ -2,8 +2,8 @@ import os import requests from llama_parse import LlamaParse from langchain.schema import Document -from modules.config.constants import OPENAI_API_KEY, LLAMA_CLOUD_API_KEY, TIMEOUT -from modules.dataloader.helpers import download_pdf_from_url +from edubotics_core.config.constants import OPENAI_API_KEY, LLAMA_CLOUD_API_KEY, TIMEOUT +from edubotics_core.dataloader.helpers import download_pdf_from_url class LlamaParser: diff --git a/modules/dataloader/webpage_crawler.py b/edubotics_core/dataloader/webpage_crawler.py similarity index 98% rename from modules/dataloader/webpage_crawler.py rename to edubotics_core/dataloader/webpage_crawler.py index 7138b724af4d7e35da95f50f7946ee455d1441e2..5ecb3c628cd939a364f54db025f3d4aaa7c386a6 100644 --- a/modules/dataloader/webpage_crawler.py +++ b/edubotics_core/dataloader/webpage_crawler.py @@ -4,7 +4,7 @@ import asyncio import requests from bs4 import BeautifulSoup from urllib.parse import urljoin, urldefrag -from modules.config.constants import TIMEOUT +from edubotics_core.config.constants import TIMEOUT class WebpageCrawler: diff --git a/modules/retriever/__init__.py b/edubotics_core/retriever/__init__.py similarity index 100% rename from modules/retriever/__init__.py rename to edubotics_core/retriever/__init__.py diff --git a/modules/retriever/base.py b/edubotics_core/retriever/base.py similarity index 100% rename from modules/retriever/base.py rename to edubotics_core/retriever/base.py diff --git a/modules/retriever/chroma_retriever.py b/edubotics_core/retriever/chroma_retriever.py similarity index 100% rename from modules/retriever/chroma_retriever.py rename to edubotics_core/retriever/chroma_retriever.py diff --git a/modules/retriever/colbert_retriever.py b/edubotics_core/retriever/colbert_retriever.py similarity index 100% rename from modules/retriever/colbert_retriever.py rename to edubotics_core/retriever/colbert_retriever.py diff --git a/modules/retriever/faiss_retriever.py b/edubotics_core/retriever/faiss_retriever.py similarity index 100% rename from modules/retriever/faiss_retriever.py rename to edubotics_core/retriever/faiss_retriever.py diff --git a/modules/retriever/helpers.py b/edubotics_core/retriever/helpers.py similarity index 100% rename from modules/retriever/helpers.py rename to edubotics_core/retriever/helpers.py diff --git a/modules/retriever/raptor_retriever.py b/edubotics_core/retriever/raptor_retriever.py similarity index 100% rename from modules/retriever/raptor_retriever.py rename to edubotics_core/retriever/raptor_retriever.py diff --git a/modules/retriever/retriever.py b/edubotics_core/retriever/retriever.py similarity index 71% rename from modules/retriever/retriever.py rename to edubotics_core/retriever/retriever.py index 1ae6370a513e3f478ca37e1e326178189e21bc49..328353c4902fdc5e4a4011d4dbdd53ed2df923f7 100644 --- a/modules/retriever/retriever.py +++ b/edubotics_core/retriever/retriever.py @@ -1,7 +1,7 @@ -from modules.retriever.faiss_retriever import FaissRetriever -from modules.retriever.chroma_retriever import ChromaRetriever -from modules.retriever.colbert_retriever import ColbertRetriever -from modules.retriever.raptor_retriever import RaptorRetriever +from edubotics_core.retriever.faiss_retriever import FaissRetriever +from edubotics_core.retriever.chroma_retriever import ChromaRetriever +from edubotics_core.retriever.colbert_retriever import ColbertRetriever +from edubotics_core.retriever.raptor_retriever import RaptorRetriever class Retriever: diff --git a/modules/vectorstore/__init__.py b/edubotics_core/vectorstore/__init__.py similarity index 100% rename from modules/vectorstore/__init__.py rename to edubotics_core/vectorstore/__init__.py diff --git a/modules/vectorstore/base.py b/edubotics_core/vectorstore/base.py similarity index 100% rename from modules/vectorstore/base.py rename to edubotics_core/vectorstore/base.py diff --git a/modules/vectorstore/chroma.py b/edubotics_core/vectorstore/chroma.py similarity index 95% rename from modules/vectorstore/chroma.py rename to edubotics_core/vectorstore/chroma.py index d502d9e9ce4138ff4850d9c25c498b176943f3d1..1a5d44b341dfea2d86468ef506d28d3529fe78ec 100644 --- a/modules/vectorstore/chroma.py +++ b/edubotics_core/vectorstore/chroma.py @@ -1,5 +1,5 @@ from langchain_community.vectorstores import Chroma -from modules.vectorstore.base import VectorStoreBase +from edubotics_core.vectorstore.base import VectorStoreBase import os diff --git a/modules/vectorstore/colbert.py b/edubotics_core/vectorstore/colbert.py similarity index 98% rename from modules/vectorstore/colbert.py rename to edubotics_core/vectorstore/colbert.py index 67ab9f1d9cb30ce6ddf9015e782292e41f41ca49..fcbbbd433d9f8a878b72ffb573cf2ddc0534ab30 100644 --- a/modules/vectorstore/colbert.py +++ b/edubotics_core/vectorstore/colbert.py @@ -1,5 +1,5 @@ from ragatouille import RAGPretrainedModel -from modules.vectorstore.base import VectorStoreBase +from edubotics_core.vectorstore.base import VectorStoreBase from langchain_core.retrievers import BaseRetriever from langchain_core.callbacks.manager import CallbackManagerForRetrieverRun from langchain_core.documents import Document diff --git a/modules/vectorstore/embedding_model_loader.py b/edubotics_core/vectorstore/embedding_model_loader.py similarity index 92% rename from modules/vectorstore/embedding_model_loader.py rename to edubotics_core/vectorstore/embedding_model_loader.py index 76504dbd02237d425daeb350d47a001e4913474b..bb59910977087d4b898be0d8269c8a872b4e04d7 100644 --- a/modules/vectorstore/embedding_model_loader.py +++ b/edubotics_core/vectorstore/embedding_model_loader.py @@ -1,6 +1,6 @@ from langchain_community.embeddings import OpenAIEmbeddings from langchain_community.embeddings import HuggingFaceEmbeddings -from modules.config.constants import OPENAI_API_KEY, HUGGINGFACE_TOKEN +from edubotics_core.config.constants import OPENAI_API_KEY, HUGGINGFACE_TOKEN class EmbeddingModelLoader: diff --git a/modules/vectorstore/faiss.py b/edubotics_core/vectorstore/faiss.py similarity index 95% rename from modules/vectorstore/faiss.py rename to edubotics_core/vectorstore/faiss.py index 319241eaac6a617f946773379f2b5efebcf9f971..b1076946561a18edebf3d23cbe10f8ed43d899fa 100644 --- a/modules/vectorstore/faiss.py +++ b/edubotics_core/vectorstore/faiss.py @@ -1,5 +1,5 @@ from langchain_community.vectorstores import FAISS -from modules.vectorstore.base import VectorStoreBase +from edubotics_core.vectorstore.base import VectorStoreBase import os diff --git a/modules/vectorstore/helpers.py b/edubotics_core/vectorstore/helpers.py similarity index 100% rename from modules/vectorstore/helpers.py rename to edubotics_core/vectorstore/helpers.py diff --git a/modules/vectorstore/raptor.py b/edubotics_core/vectorstore/raptor.py similarity index 99% rename from modules/vectorstore/raptor.py rename to edubotics_core/vectorstore/raptor.py index 4659571cd7712a9b93f7d31a65b253e95762b303..4db12496c99f21c929bbe68ff048694040c228bb 100644 --- a/modules/vectorstore/raptor.py +++ b/edubotics_core/vectorstore/raptor.py @@ -11,7 +11,7 @@ from sklearn.mixture import GaussianMixture from langchain_community.chat_models import ChatOpenAI from langchain_community.vectorstores import FAISS from langchain.text_splitter import RecursiveCharacterTextSplitter -from modules.vectorstore.base import VectorStoreBase +from edubotics_core.vectorstore.base import VectorStoreBase RANDOM_SEED = 42 diff --git a/modules/vectorstore/store_manager.py b/edubotics_core/vectorstore/store_manager.py similarity index 94% rename from modules/vectorstore/store_manager.py rename to edubotics_core/vectorstore/store_manager.py index a3f9bd237c66f440769e59d779212203176fc200..54d447937206d32de79671d520b8df095964a344 100644 --- a/modules/vectorstore/store_manager.py +++ b/edubotics_core/vectorstore/store_manager.py @@ -1,12 +1,14 @@ -from modules.vectorstore.vectorstore import VectorStore -from modules.dataloader.helpers import get_urls_from_file -from modules.dataloader.webpage_crawler import WebpageCrawler -from modules.dataloader.data_loader import DataLoader -from modules.vectorstore.embedding_model_loader import EmbeddingModelLoader +from edubotics_core.vectorstore.vectorstore import VectorStore +from edubotics_core.dataloader.helpers import get_urls_from_file +from edubotics_core.dataloader.webpage_crawler import WebpageCrawler +from edubotics_core.dataloader.data_loader import DataLoader +from edubotics_core.vectorstore.embedding_model_loader import EmbeddingModelLoader import logging import os import time import asyncio +import yaml +import argparse class VectorStoreManager: @@ -137,7 +139,7 @@ class VectorStoreManager: self.loaded_vector_db = self.vector_db._load_database(self.embedding_model) except Exception as e: raise ValueError( - f"Error loading database, check if it exists. if not run python -m modules.vectorstore.store_manager / Resteart the HF Space: {e}" + f"Error loading database, check if it exists. if not run python -m edubotics_core.vectorstore.store_manager / Resteart the HF Space: {e}" ) # print(f"Creating database") # self.create_database() @@ -162,10 +164,7 @@ class VectorStoreManager: return len(self.vector_db) -if __name__ == "__main__": - import yaml - import argparse - +def main(): # Add argument parsing for config files parser = argparse.ArgumentParser(description="Load configuration files.") parser.add_argument( @@ -216,3 +215,7 @@ if __name__ == "__main__": print("Loaded database") print(f"View the logs at {config['log_dir']}/vector_db.log") + + +if __name__ == "__main__": + main() diff --git a/modules/vectorstore/vectorstore.py b/edubotics_core/vectorstore/vectorstore.py similarity index 91% rename from modules/vectorstore/vectorstore.py rename to edubotics_core/vectorstore/vectorstore.py index fef003f66b428eebb739da607723f368171c1ebc..81d801e23e4188afd07817710cf51cc16d039a13 100644 --- a/modules/vectorstore/vectorstore.py +++ b/edubotics_core/vectorstore/vectorstore.py @@ -1,7 +1,7 @@ -from modules.vectorstore.faiss import FaissVectorStore -from modules.vectorstore.chroma import ChromaVectorStore -from modules.vectorstore.colbert import ColbertVectorStore -from modules.vectorstore.raptor import RAPTORVectoreStore +from edubotics_core.vectorstore.faiss import FaissVectorStore +from edubotics_core.vectorstore.chroma import ChromaVectorStore +from edubotics_core.vectorstore.colbert import ColbertVectorStore +from edubotics_core.vectorstore.raptor import RAPTORVectoreStore from huggingface_hub import snapshot_download import os import shutil diff --git a/setup.py b/setup.py index 1ac55d5706e43e8d363984b377f5f64215a34e47..8feaa857f0d24560ee0af3b854153f707e70a85e 100644 --- a/setup.py +++ b/setup.py @@ -4,12 +4,25 @@ from setuptools import setup, find_packages with open("requirements.txt") as f: requirements = f.read().splitlines() +with open("README.md") as f: + readme = f.read() + setup( - name="dl4ds_tutor", + name="edubotics-core", version="0.1.0", packages=find_packages(), - package_dir={"modules": "modules"}, + package_dir={"edubotics-core": "edubotics_core"}, python_requires=">=3.7", install_requires=requirements, - description="A Deep Learning for Data Science Tutor application", + description="Core modules for edubotics-based LLM AI chatbots", + author="Xavier Thomas, Farid Karimli, Tom Gardos", + url="https://github.com/edubotics-ai/edubot-core", + license="MIT", + long_description=readme, + long_description_content_type="text/markdown", + entry_points={ + "console_scripts": [ + "vectorstore_creator=edubotics_core.vectorstore.store_manager:main", + ], + }, )