Spaces:
Runtime error
Runtime error
File size: 1,901 Bytes
5a67683 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 |
import os
from typing import Literal, Optional
from pydantic import Field
from pydantic_settings import BaseSettings
class Settings(BaseSettings):
ENVIRONMENT: str
PORT: int = 8000
VECTOR_DATABASE: Literal["weaviate"] = "weaviate"
OPENAI_API_KEY: Optional[str] = None
OPENAI_MODEL: str = "gpt-3.5-turbo"
WEAVIATE_CLIENT_URL: str = "http://localhost:8080"
LLM_MODE: Literal["openai", "mock", "local"] = "mock"
EMBEDDING_MODE: Literal["openai", "mock", "local"] = "mock"
LOCAL_DATA_FOLDER: str = "local_data/test"
DEFAULT_QUERY_SYSTEM_PROMPT: str = "You can only answer questions about the provided context. If you know the answer but it is not based in the provided context, don't provide the answer, just state the answer is not in the context provided."
LOCAL_HF_EMBEDDING_MODEL_NAME: str = "BAAI/bge-small-en-v1.5"
LOCAL_HF_LLM_REPO_ID: str = "TheBloke/Llama-2-7B-Chat-GGUF"
LOCAL_HF_LLM_MODEL_FILE: str = "llama-2-7b-chat.Q4_K_M.gguf"
# LLM config
LLM_TEMPERATURE: float = Field(
default=0.1, description="The temperature to use for sampling."
)
LLM_MAX_NEW_TOKENS: int = Field(
default=256,
description="The maximum number of tokens to generate.",
)
LLM_CONTEXT_WINDOW: int = Field(
default=3900,
description="The maximum number of context tokens for the model.",
)
# UI
IS_UI_ENABLED: bool = True
UI_PATH: str = "/"
# Rerank
IS_RERANK_ENABLED: bool = True
RERANK_TOP_N: int = 3
RERANK_MODEL_NAME: str = "cross-encoder/ms-marco-MiniLM-L-2-v2"
class Config:
case_sensitive = True
env_file_encoding = "utf-8"
environment = os.environ.get("ENVIRONMENT", "local")
settings = Settings(
ENVIRONMENT=environment,
# ".env.{environment}" takes priority over ".env"
_env_file=[".env", f".env.{environment}"],
)
|