Spaces:
Paused
Paused
from langchain_anthropic import ChatAnthropic | |
from langchain_openai import ChatOpenAI | |
from langchain.callbacks.manager import CallbackManager | |
from langchain.callbacks.tracers import LangChainTracer | |
from langchain_huggingface.embeddings import HuggingFaceEmbeddings | |
from langchain_experimental.text_splitter import SemanticChunker | |
from langchain_openai.embeddings import OpenAIEmbeddings | |
from langchain_qdrant import QdrantVectorStore, Qdrant | |
from langchain.retrievers.contextual_compression import ContextualCompressionRetriever | |
from qdrant_client import QdrantClient | |
from langchain_text_splitters import RecursiveCharacterTextSplitter | |
from langchain_cohere import CohereRerank | |
from langchain_core.globals import set_llm_cache | |
from langchain_core.caches import InMemoryCache | |
import constants | |
import os | |
os.environ["LANGCHAIN_API_KEY"] = constants.LANGCHAIN_API_KEY | |
os.environ["LANGCHAIN_TRACING_V2"] = str(constants.LANGCHAIN_TRACING_V2) | |
os.environ["LANGCHAIN_ENDPOINT"] = constants.LANGCHAIN_ENDPOINT | |
set_llm_cache(InMemoryCache()) | |
tracer = LangChainTracer() | |
callback_manager = CallbackManager([tracer]) | |
######################## | |
### Chat Models ### | |
######################## | |
#opus3 = ChatAnthropic( | |
# api_key=constants.ANTRHOPIC_API_KEY, | |
# temperature=0, | |
# model='claude-3-opus-20240229', | |
# callbacks=callback_manager | |
#) | |
# | |
#sonnet35 = ChatAnthropic( | |
# api_key=constants.ANTRHOPIC_API_KEY, | |
# temperature=0, | |
# model='claude-3-5-sonnet-20240620', | |
# max_tokens=4096, | |
# callbacks=callback_manager | |
#) | |
gpt4 = ChatOpenAI( | |
model="gpt-4", | |
temperature=0, | |
max_tokens=None, | |
timeout=None, | |
max_retries=2, | |
api_key=constants.OPENAI_API_KEY, | |
callbacks=callback_manager | |
) | |
gpt4o = ChatOpenAI( | |
model="gpt-4o", | |
temperature=0, | |
max_tokens=None, | |
timeout=None, | |
max_retries=2, | |
api_key=constants.OPENAI_API_KEY, | |
callbacks=callback_manager | |
) | |
gpt4o_mini = ChatOpenAI( | |
model="gpt-4o-mini", | |
temperature=0, | |
max_tokens=None, | |
timeout=None, | |
max_retries=2, | |
api_key=constants.OPENAI_API_KEY, | |
callbacks=callback_manager | |
) | |
######################## | |
### Embedding Models ### | |
######################## | |
#basic_embeddings = HuggingFaceEmbeddings(model_name="snowflake/snowflake-arctic-embed-l") | |
tuned_embeddings = HuggingFaceEmbeddings(model_name="CoExperiences/snowflake-l-marketing-tuned") | |
#te3_small = OpenAIEmbeddings(api_key=constants.OPENAI_API_KEY, model="text-embedding-3-small") | |
####################### | |
### Text Splitters ### | |
####################### | |
#semanticChunker = SemanticChunker( | |
# te3_small, | |
# breakpoint_threshold_type="percentile" | |
#) | |
semanticChunker_tuned = SemanticChunker( | |
tuned_embeddings, | |
breakpoint_threshold_type="percentile", | |
breakpoint_threshold_amount=85 | |
) | |
#RCTS = RecursiveCharacterTextSplitter( | |
# # Set a really small chunk size, just to show. | |
# chunk_size=500, | |
# chunk_overlap=25, | |
# length_function=len, | |
#) | |
####################### | |
### Vector Stores ### | |
####################### | |
qdrant_client = QdrantClient(url=constants.QDRANT_ENDPOINT, api_key=constants.QDRANT_API_KEY) | |
#semantic_Qdrant_vs = QdrantVectorStore( | |
# client=qdrant_client, | |
# collection_name="docs_from_ripped_urls", | |
# embedding=te3_small | |
#) | |
# | |
#rcts_Qdrant_vs = QdrantVectorStore( | |
# client=qdrant_client, | |
# collection_name="docs_from_ripped_urls_recursive", | |
# embedding=te3_small | |
#) | |
collection_name="docs_from_ripped_urls_semantic_tuned" | |
semantic_tuned_Qdrant_vs = QdrantVectorStore( | |
client=qdrant_client, | |
collection_name=collection_name, | |
embedding=tuned_embeddings | |
) | |
####################### | |
### Retrievers ### | |
####################### | |
semantic_tuned_retriever = semantic_tuned_Qdrant_vs.as_retriever(search_kwargs={"k" : 10}) | |
compressor = CohereRerank(model="rerank-english-v3.0") | |
compression_retriever = ContextualCompressionRetriever( | |
base_compressor=compressor, base_retriever=semantic_tuned_retriever | |
) |