File size: 5,404 Bytes
b210243
1cbf254
26e0da2
2624a11
1cbf254
 
 
7bc489f
1cbf254
72762da
16d1a5a
b745365
7bc489f
16d1a5a
5aaa7d9
16d1a5a
 
 
 
 
cc60679
16d1a5a
5aaa7d9
16d1a5a
 
 
 
 
cc60679
26e0da2
2624a11
cc60679
 
b745365
2624a11
72762da
 
 
 
 
7beb366
72762da
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2624a11
 
 
 
 
 
 
 
1cbf254
 
 
 
0f09cc9
 
 
1cbf254
 
 
 
 
 
9ec925a
1cbf254
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b210243
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
from typing import Dict, List
from langchain.agents import AgentExecutor, create_openai_functions_agent
from langchain_community.document_loaders import PyMuPDFLoader, TextLoader, UnstructuredURLLoader, WebBaseLoader
from langchain_community.vectorstores import Qdrant
from langchain_core.messages import AIMessage, BaseMessage, HumanMessage
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.language_models import BaseLanguageModel
import os
import functools
import requests
import tempfile
from chainlit.types import AskFileResponse

def process_file(uploaded_file: AskFileResponse):
    if uploaded_file.name.endswith(".pdf"):
        with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".pdf") as temp_file:
            temp_file_path = temp_file.name

        with open(temp_file_path, "wb") as f:
            f.write(uploaded_file.content)
        # Load PDF with PyMuPDFLoader
        loader = PyMuPDFLoader(temp_file_path)
    elif uploaded_file.name.endswith(".txt"):
        with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".txt") as temp_file:
            temp_file_path = temp_file.name

        with open(temp_file_path, "wb") as f:
            f.write(uploaded_file.content)
        # Load text file with TextLoader
        loader = TextLoader(temp_file_path)
    else:
        raise ValueError("Unsupported file format. Only PDF and TXT are supported.")

    return loader.load()

def load_documents_from_url(url):
    try:
        # Check if it's a PDF
        if url.endswith(".pdf"):
            try:
                loader = PyMuPDFLoader(url)
                return loader.load()
            except Exception as e:
                print(f"Error loading PDF from {url}: {e}")
                return None
        
        # Fetch the content and check for video pages
        try:
            response = requests.head(url, timeout=10)  # Timeout for fetching headers
            content_type = response.headers.get('Content-Type', '')
        except Exception as e:
            print(f"Error fetching headers from {url}: {e}")
            return None
        
        # Ignore video content (flagged for now)
        if 'video' in content_type:
            return None
        if 'youtube' in url:
            return None
        
        # Otherwise, treat it as an HTML page
        try:
            loader = UnstructuredURLLoader([url])
            return loader.load()
        except Exception as e:
            print(f"Error loading HTML from {url}: {e}")
            return None
    except Exception as e:
        print(f"General error loading from {url}: {e}")
        return None

def add_to_qdrant(documents, embeddings, qdrant_client, collection_name):
    Qdrant.from_documents(
        documents,
        embeddings,
        url=qdrant_client.url,
        prefer_grpc=True,
        collection_name=collection_name,
    )

def agent_node(state, agent, name):
    result = agent.invoke(state)
    return {
        "messages": [HumanMessage(content=result["messages"][-1].content, name=name)]
    }

def create_team_agent(llm, tools, system_prompt, agent_name, team_members):
    return create_agent(
        llm,
        tools,
        f"{system_prompt}\nBelow are files currently in your directory:\n{{current_files}}",
        #team_members
    )

def create_agent_node(agent, name):
    return functools.partial(agent_node, agent=agent, name=name)

def add_agent_to_graph(graph, agent_name, agent_node):
    graph.add_node(agent_name, agent_node)
    graph.add_edge(agent_name, "supervisor")

def create_team_supervisor(llm, team_description, team_members):
    return create_team_supervisor(
        llm,
        f"You are a supervisor tasked with managing a conversation between the"
        f" following workers: {', '.join(team_members)}. {team_description}"
        f" When all workers are finished, you must respond with FINISH.",
        team_members
    )

def enter_chain(message: str, members: List[str]):
    results = {
        "messages": [HumanMessage(content=message)],
        "team_members": ", ".join(members),
    }
    return results

def create_team_chain(graph, team_members):
    return (
        functools.partial(enter_chain, members=team_members)
        | graph.compile()
    )

def create_agent(
    llm: BaseLanguageModel,
    tools: list,
    system_prompt: str,
) -> str:
    """Create a function-calling agent and add it to the graph."""
    system_prompt += ("\nWork autonomously according to your specialty, using the tools available to you."
    " Do not ask for clarification."
    " Your other team members (and other teams) will collaborate with you with their own specialties."
    " You are chosen for a reason! You are one of the following team members: {{team_members}}.")
    prompt = ChatPromptTemplate.from_messages(
        [
            (
                "system",
                system_prompt,
            ),
            MessagesPlaceholder(variable_name="messages"),
            MessagesPlaceholder(variable_name="agent_scratchpad"),
        ]
    )
    agent = create_openai_functions_agent(llm, tools, prompt)
    executor = AgentExecutor(agent=agent, tools=tools)
    return executor

def format_docs(docs: List[Dict]) -> str:
    return "\n\n".join(f"Content: {doc.page_content}\nSource: {doc.metadata.get('source', 'Unknown')}" for doc in docs)