achuthc1298 commited on
Commit
ce4e6ee
1 Parent(s): 5e0cd4f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +59 -117
app.py CHANGED
@@ -1,158 +1,100 @@
1
- #This app is running
2
  import streamlit as st
3
- from IPython.display import display, Markdown
4
  import os
5
  from pathlib import Path
6
  from llama_index.core.query_engine.router_query_engine import RouterQueryEngine
7
  from llama_index.core.selectors import LLMSingleSelector
8
  from llama_index.core.tools import QueryEngineTool
9
  from llama_index.core import SummaryIndex, VectorStoreIndex
10
- from llama_index.core import VectorStoreIndex
11
- from llama_index.core import Settings
12
- from llama_index.llms.openai import OpenAI
13
- #from llama_index.embeddings.openai import OpenAIEmbedding
14
- #from llama_index.core.node_parser import SentenceSplitter
15
  from llama_index.core import SimpleDirectoryReader
16
  from llama_index.llms.groq import Groq
17
  from llama_index.embeddings.huggingface import HuggingFaceEmbedding
18
- #from llama_index.embeddings.fastembed import FastEmbedEmbedding
19
- #from llama_index.llms.mistralai import MistralAI
20
- # from llama_index.embeddings.huggingface_api import (
21
- # HuggingFaceInferenceAPIEmbedding,
22
- # )
23
  from typing import Tuple
24
  from llama_index.core import StorageContext, load_index_from_storage
25
  from llama_index.core.objects import ObjectIndex
26
  from llama_index.core.agent import ReActAgent
27
 
28
- #LOAD FROM INDEX
29
- async def create_doc_tools(
30
- document_fp: str,
31
- doc_name: str,
32
- verbose: bool = True,
33
- ) -> Tuple[QueryEngineTool]:
34
- # load lora_paper.pdf documents
35
  documents = SimpleDirectoryReader(input_files=[document_fp]).load_data()
36
 
37
- # # chunk_size of 1024 is a good default value
38
- # splitter = SentenceSplitter(chunk_size=1024)
39
- # # Create nodes from documents
40
- # nodes = splitter.get_nodes_from_documents(documents)
41
-
42
- # LLM model
43
  Settings.llm = Groq(model="mixtral-8x7b-32768")
44
- # embedding model
45
  Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-large-en-v1.5")
46
-
47
- load_dir_path = f"/home/achuthchandrasekhar/Documents/AMGPT/agentic_index/{doc_name}"
48
 
49
- # rebuild storage context
50
  storage_context = StorageContext.from_defaults(persist_dir=load_dir_path)
51
-
52
- # load index
53
  vector_index = load_index_from_storage(storage_context)
54
-
55
-
56
- # vector query engine
57
  vector_query_engine = vector_index.as_query_engine()
58
 
59
-
60
-
61
  vector_tool = QueryEngineTool.from_defaults(
62
  name=f"{doc_name}_vector_query_engine_tool",
63
  query_engine=vector_query_engine,
64
- description=(
65
- f"Useful for retrieving specific context from the the {doc_name}."
66
- ),
67
  )
68
 
69
  return vector_tool
70
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
 
72
- # Define the directory containing the .tex files
73
- directory = '/home/achuthchandrasekhar/Documents/AMGPT/advanced_rag_code/rag_docs_final_review_tex_merged'
74
-
75
- # Initialize an empty list to store the file paths
76
- tex_files = []
77
-
78
- # Walk through the directory and find all .tex files
79
- for root, dirs, files in os.walk(directory):
80
- for file in files:
81
- if file.endswith(('.tex', '.txt')):
82
- # Get the absolute path of the file
83
- file_path = os.path.abspath(os.path.join(root, file))
84
- tex_files.append(file_path)
85
-
86
- # Sort the list of file paths in alphabetical order
87
- tex_files.sort()
88
-
89
- # Create the desired output format
90
- output = 'tex_files = [\n'
91
- for file_path in tex_files:
92
- output += f' "{file_path}",\n'
93
- output += ']'
94
-
95
- # Print the output
96
- print(output)
97
-
98
-
99
-
100
-
101
- paper_to_tools_dict = {}
102
-
103
-
104
- for paper in tex_files:
105
- print(f"Creating {paper} tool")
106
- path = Path(paper)
107
- vector_tool = await create_doc_tools(doc_name=path.stem, document_fp=path)
108
- paper_to_tools_dict[path.stem] = [vector_tool]
109
-
110
-
111
- initial_tools = [t for paper in tex_files for t in paper_to_tools_dict[Path(paper).stem]]
112
-
113
-
114
- st.title("PDF Question Answering with LangChain")
115
 
 
116
 
117
-
118
- # API Key input
119
- api_key = st.text_input("Enter your Groq API Key", type="password")
120
 
121
- if api_key:
122
-
123
- obj_index = ObjectIndex.from_objects(
124
- initial_tools,
125
- index_cls=VectorStoreIndex,
126
  )
127
 
128
- obj_retriever = obj_index.as_retriever(similarity_top_k=6)
129
-
130
- # Define LLM
131
- llm = Groq(model="mixtral-8x7b-32768")
132
-
133
- # Add Context
134
- context = """You are an agent designed to answer scientific queries over a set of given documents.
135
- Please always use the tools provided to answer a question. Do not rely on prior knowledge.
136
- """
137
-
138
- agent = ReActAgent.from_tools(
139
- tool_retriever=obj_retriever,
140
- llm=llm,
141
- verbose=True,
142
- context=context
143
- )
144
-
145
-
146
- # User prompt input
147
- user_prompt = st.text_input("Enter your question")
148
-
149
- if user_prompt:
150
- with st.spinner("Processing..."):
151
- response = agent.query(user_prompt)
152
- markdown_response = f"""
153
  ### Query Response:
154
 
155
  {response}
156
- """
157
- st.write(markdown_response)
158
 
 
 
 
 
1
  import streamlit as st
 
2
  import os
3
  from pathlib import Path
4
  from llama_index.core.query_engine.router_query_engine import RouterQueryEngine
5
  from llama_index.core.selectors import LLMSingleSelector
6
  from llama_index.core.tools import QueryEngineTool
7
  from llama_index.core import SummaryIndex, VectorStoreIndex
8
+ from llama_index.core import VectorStoreIndex, Settings
 
 
 
 
9
  from llama_index.core import SimpleDirectoryReader
10
  from llama_index.llms.groq import Groq
11
  from llama_index.embeddings.huggingface import HuggingFaceEmbedding
 
 
 
 
 
12
  from typing import Tuple
13
  from llama_index.core import StorageContext, load_index_from_storage
14
  from llama_index.core.objects import ObjectIndex
15
  from llama_index.core.agent import ReActAgent
16
 
17
+ # Function to process files and create document tools
18
+ async def create_doc_tools(document_fp: str, doc_name: str, verbose: bool = True) -> Tuple[QueryEngineTool,]:
 
 
 
 
 
19
  documents = SimpleDirectoryReader(input_files=[document_fp]).load_data()
20
 
 
 
 
 
 
 
21
  Settings.llm = Groq(model="mixtral-8x7b-32768")
 
22
  Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-large-en-v1.5")
 
 
23
 
24
+ load_dir_path = f"/home/user/app/agentic_index/{doc_name}"
25
  storage_context = StorageContext.from_defaults(persist_dir=load_dir_path)
 
 
26
  vector_index = load_index_from_storage(storage_context)
 
 
 
27
  vector_query_engine = vector_index.as_query_engine()
28
 
 
 
29
  vector_tool = QueryEngineTool.from_defaults(
30
  name=f"{doc_name}_vector_query_engine_tool",
31
  query_engine=vector_query_engine,
32
+ description=f"Useful for retrieving specific context from the {doc_name}.",
 
 
33
  )
34
 
35
  return vector_tool
36
 
37
+ # Function to find and sort .tex files
38
+ def find_tex_files(directory: str):
39
+ tex_files = []
40
+ for root, dirs, files in os.walk(directory):
41
+ for file in files:
42
+ if file.endswith(('.tex', '.txt')):
43
+ file_path = os.path.abspath(os.path.join(root, file))
44
+ tex_files.append(file_path)
45
+ tex_files.sort()
46
+ return tex_files
47
+
48
+ # Main app function
49
+ def main():
50
+ st.title("PDF Question Answering with LangChain")
51
+
52
+ # API Key input
53
+ api_key = st.text_input("Enter your Groq API Key", type="password")
54
+
55
+ if api_key:
56
+ directory = '/home/user/app/rag_docs_final_review_tex_merged'
57
+ tex_files = find_tex_files(directory)
58
+
59
+ paper_to_tools_dict = {}
60
+ for paper in tex_files:
61
+ path = Path(paper)
62
+ vector_tool = await create_doc_tools(doc_name=path.stem, document_fp=path)
63
+ paper_to_tools_dict[path.stem] = [vector_tool]
64
+
65
+ initial_tools = [t for paper in tex_files for t in paper_to_tools_dict[Path(paper).stem]]
66
+
67
+ obj_index = ObjectIndex.from_objects(
68
+ initial_tools,
69
+ index_cls=VectorStoreIndex,
70
+ )
71
 
72
+ obj_retriever = obj_index.as_retriever(similarity_top_k=6)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
 
74
+ llm = Groq(model="mixtral-8x7b-32768")
75
 
76
+ context = """You are an agent designed to answer scientific queries over a set of given documents.
77
+ Please always use the tools provided to answer a question. Do not rely on prior knowledge.
78
+ """
79
 
80
+ agent = ReActAgent.from_tools(
81
+ tool_retriever=obj_retriever,
82
+ llm=llm,
83
+ verbose=True,
84
+ context=context
85
  )
86
 
87
+ user_prompt = st.text_input("Enter your question")
88
+
89
+ if user_prompt:
90
+ with st.spinner("Processing..."):
91
+ response = agent.query(user_prompt)
92
+ markdown_response = f"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  ### Query Response:
94
 
95
  {response}
96
+ """
97
+ st.write(markdown_response)
98
 
99
+ if __name__ == "__main__":
100
+ main()