Spaces:
Paused
Paused
angry-meow
commited on
Commit
•
b2f993e
1
Parent(s):
05201cc
commenting out unused stuff
Browse files
agents.py
CHANGED
@@ -55,7 +55,7 @@ voice_editor_agent = create_team_agent(
|
|
55 |
|
56 |
simple_rag_chain = (
|
57 |
{
|
58 |
-
"context": itemgetter("question") | models.
|
59 |
"question": itemgetter("question"),
|
60 |
"writing_style_guide": lambda _: prompts.style_guide_text
|
61 |
}
|
|
|
55 |
|
56 |
simple_rag_chain = (
|
57 |
{
|
58 |
+
"context": itemgetter("question") | models.semantic_tuned_retriever,
|
59 |
"question": itemgetter("question"),
|
60 |
"writing_style_guide": lambda _: prompts.style_guide_text
|
61 |
}
|
models.py
CHANGED
@@ -28,20 +28,20 @@ callback_manager = CallbackManager([tracer])
|
|
28 |
### Chat Models ###
|
29 |
########################
|
30 |
|
31 |
-
opus3 = ChatAnthropic(
|
32 |
-
api_key=constants.ANTRHOPIC_API_KEY,
|
33 |
-
temperature=0,
|
34 |
-
model='claude-3-opus-20240229',
|
35 |
-
callbacks=callback_manager
|
36 |
-
)
|
37 |
-
|
38 |
-
sonnet35 = ChatAnthropic(
|
39 |
-
api_key=constants.ANTRHOPIC_API_KEY,
|
40 |
-
temperature=0,
|
41 |
-
model='claude-3-5-sonnet-20240620',
|
42 |
-
max_tokens=4096,
|
43 |
-
callbacks=callback_manager
|
44 |
-
)
|
45 |
|
46 |
gpt4 = ChatOpenAI(
|
47 |
model="gpt-4",
|
@@ -77,20 +77,20 @@ gpt4o_mini = ChatOpenAI(
|
|
77 |
### Embedding Models ###
|
78 |
########################
|
79 |
|
80 |
-
basic_embeddings = HuggingFaceEmbeddings(model_name="snowflake/snowflake-arctic-embed-l")
|
81 |
|
82 |
tuned_embeddings = HuggingFaceEmbeddings(model_name="CoExperiences/snowflake-l-marketing-tuned")
|
83 |
|
84 |
-
te3_small = OpenAIEmbeddings(api_key=constants.OPENAI_API_KEY, model="text-embedding-3-small")
|
85 |
|
86 |
#######################
|
87 |
### Text Splitters ###
|
88 |
#######################
|
89 |
|
90 |
-
semanticChunker = SemanticChunker(
|
91 |
-
te3_small,
|
92 |
-
breakpoint_threshold_type="percentile"
|
93 |
-
)
|
94 |
|
95 |
semanticChunker_tuned = SemanticChunker(
|
96 |
tuned_embeddings,
|
@@ -98,12 +98,12 @@ semanticChunker_tuned = SemanticChunker(
|
|
98 |
breakpoint_threshold_amount=85
|
99 |
)
|
100 |
|
101 |
-
RCTS = RecursiveCharacterTextSplitter(
|
102 |
-
# Set a really small chunk size, just to show.
|
103 |
-
chunk_size=500,
|
104 |
-
chunk_overlap=25,
|
105 |
-
length_function=len,
|
106 |
-
)
|
107 |
|
108 |
#######################
|
109 |
### Vector Stores ###
|
@@ -111,17 +111,17 @@ RCTS = RecursiveCharacterTextSplitter(
|
|
111 |
|
112 |
qdrant_client = QdrantClient(url=constants.QDRANT_ENDPOINT, api_key=constants.QDRANT_API_KEY)
|
113 |
|
114 |
-
semantic_Qdrant_vs = QdrantVectorStore(
|
115 |
-
client=qdrant_client,
|
116 |
-
collection_name="docs_from_ripped_urls",
|
117 |
-
embedding=te3_small
|
118 |
-
)
|
119 |
-
|
120 |
-
rcts_Qdrant_vs = QdrantVectorStore(
|
121 |
-
client=qdrant_client,
|
122 |
-
collection_name="docs_from_ripped_urls_recursive",
|
123 |
-
embedding=te3_small
|
124 |
-
)
|
125 |
|
126 |
semantic_tuned_Qdrant_vs = QdrantVectorStore(
|
127 |
client=qdrant_client,
|
|
|
28 |
### Chat Models ###
|
29 |
########################
|
30 |
|
31 |
+
#opus3 = ChatAnthropic(
|
32 |
+
# api_key=constants.ANTRHOPIC_API_KEY,
|
33 |
+
# temperature=0,
|
34 |
+
# model='claude-3-opus-20240229',
|
35 |
+
# callbacks=callback_manager
|
36 |
+
#)
|
37 |
+
#
|
38 |
+
#sonnet35 = ChatAnthropic(
|
39 |
+
# api_key=constants.ANTRHOPIC_API_KEY,
|
40 |
+
# temperature=0,
|
41 |
+
# model='claude-3-5-sonnet-20240620',
|
42 |
+
# max_tokens=4096,
|
43 |
+
# callbacks=callback_manager
|
44 |
+
#)
|
45 |
|
46 |
gpt4 = ChatOpenAI(
|
47 |
model="gpt-4",
|
|
|
77 |
### Embedding Models ###
|
78 |
########################
|
79 |
|
80 |
+
#basic_embeddings = HuggingFaceEmbeddings(model_name="snowflake/snowflake-arctic-embed-l")
|
81 |
|
82 |
tuned_embeddings = HuggingFaceEmbeddings(model_name="CoExperiences/snowflake-l-marketing-tuned")
|
83 |
|
84 |
+
#te3_small = OpenAIEmbeddings(api_key=constants.OPENAI_API_KEY, model="text-embedding-3-small")
|
85 |
|
86 |
#######################
|
87 |
### Text Splitters ###
|
88 |
#######################
|
89 |
|
90 |
+
#semanticChunker = SemanticChunker(
|
91 |
+
# te3_small,
|
92 |
+
# breakpoint_threshold_type="percentile"
|
93 |
+
#)
|
94 |
|
95 |
semanticChunker_tuned = SemanticChunker(
|
96 |
tuned_embeddings,
|
|
|
98 |
breakpoint_threshold_amount=85
|
99 |
)
|
100 |
|
101 |
+
#RCTS = RecursiveCharacterTextSplitter(
|
102 |
+
# # Set a really small chunk size, just to show.
|
103 |
+
# chunk_size=500,
|
104 |
+
# chunk_overlap=25,
|
105 |
+
# length_function=len,
|
106 |
+
#)
|
107 |
|
108 |
#######################
|
109 |
### Vector Stores ###
|
|
|
111 |
|
112 |
qdrant_client = QdrantClient(url=constants.QDRANT_ENDPOINT, api_key=constants.QDRANT_API_KEY)
|
113 |
|
114 |
+
#semantic_Qdrant_vs = QdrantVectorStore(
|
115 |
+
# client=qdrant_client,
|
116 |
+
# collection_name="docs_from_ripped_urls",
|
117 |
+
# embedding=te3_small
|
118 |
+
#)
|
119 |
+
#
|
120 |
+
#rcts_Qdrant_vs = QdrantVectorStore(
|
121 |
+
# client=qdrant_client,
|
122 |
+
# collection_name="docs_from_ripped_urls_recursive",
|
123 |
+
# embedding=te3_small
|
124 |
+
#)
|
125 |
|
126 |
semantic_tuned_Qdrant_vs = QdrantVectorStore(
|
127 |
client=qdrant_client,
|
tools.py
CHANGED
@@ -1,20 +1,34 @@
|
|
1 |
from pathlib import Path
|
2 |
-
from typing import Annotated, Optional
|
3 |
from langchain_community.tools.tavily_search import TavilySearchResults
|
4 |
from langchain_core.tools import tool
|
5 |
-
|
|
|
|
|
|
|
6 |
|
7 |
WORKING_DIRECTORY = Path("/tmp/content/data")
|
8 |
WORKING_DIRECTORY.mkdir(parents=True, exist_ok=True)
|
9 |
|
10 |
tavily_tool = TavilySearchResults(max_results=5)
|
11 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
@tool
|
13 |
def retrieve_information(
|
14 |
query: Annotated[str, "query to ask the retrieve information tool"]
|
15 |
):
|
16 |
"""Use Retrieval Augmented Generation to retrieve information about the 'Extending Llama-3’s Context Ten-Fold Overnight' paper."""
|
17 |
-
return
|
18 |
|
19 |
@tool
|
20 |
def create_outline(points: List[str], file_name: str) -> str:
|
|
|
1 |
from pathlib import Path
|
2 |
+
from typing import Annotated, Dict, List, Optional
|
3 |
from langchain_community.tools.tavily_search import TavilySearchResults
|
4 |
from langchain_core.tools import tool
|
5 |
+
import prompts
|
6 |
+
import models
|
7 |
+
from operator import itemgetter
|
8 |
+
from langchain_core.runnables.passthrough import RunnablePassthrough
|
9 |
|
10 |
WORKING_DIRECTORY = Path("/tmp/content/data")
|
11 |
WORKING_DIRECTORY.mkdir(parents=True, exist_ok=True)
|
12 |
|
13 |
tavily_tool = TavilySearchResults(max_results=5)
|
14 |
|
15 |
+
tool_chain = (
|
16 |
+
{
|
17 |
+
"context": itemgetter("question") | models.semantic_tuned_retriever,
|
18 |
+
"question": itemgetter("question"),
|
19 |
+
"writing_style_guide": lambda _: prompts.style_guide_text
|
20 |
+
}
|
21 |
+
| RunnablePassthrough.assign(context=itemgetter("context"))
|
22 |
+
| prompts.chat_prompt
|
23 |
+
| models.gpt4o
|
24 |
+
)
|
25 |
+
|
26 |
@tool
|
27 |
def retrieve_information(
|
28 |
query: Annotated[str, "query to ask the retrieve information tool"]
|
29 |
):
|
30 |
"""Use Retrieval Augmented Generation to retrieve information about the 'Extending Llama-3’s Context Ten-Fold Overnight' paper."""
|
31 |
+
return tool_chain.invoke({"question" : query})
|
32 |
|
33 |
@tool
|
34 |
def create_outline(points: List[str], file_name: str) -> str:
|