mjbuehler commited on
Commit
f207b40
1 Parent(s): be70e85

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +57 -22
README.md CHANGED
@@ -75,27 +75,9 @@ The figure below shows results from knowledge recall evaluation experiments of B
75
 
76
  ### Retrieval Augmented Generation (RAG)
77
 
78
- ```
79
- import chromadb
80
- from llama_index import VectorStoreIndex, SimpleDirectoryReader
81
- from chromadb.config import Settings
82
- from llama_index.vector_stores import ChromaVectorStore
83
- from llama_index.storage.storage_context import StorageContext
84
 
85
- coll_name="Bioinspired"
86
- coll_path='./Bioinspired_Chroma' ## PATH TO CHROMA DATABASE
87
-
88
- client = chromadb.PersistentClient(path=coll_path)
89
- collection = client.get_collection (name=coll_name,)
90
-
91
- db2 = chromadb.PersistentClient(path=coll_path)
92
- chroma_collection = db2.get_or_create_collection(coll_name)
93
- vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
94
-
95
- chroma_collection.count()
96
- ```
97
-
98
- Set up BioinspiredLMM as custom LLM:
99
 
100
 
101
  ```
@@ -124,6 +106,29 @@ llm_custom = HuggingFaceLLM(context_window=2048,
124
  tokenizer=tokenizer)
125
  llm_custom.model_name='BioinspiredLLM'
126
  ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
  Set up custom LLM service context and vector store indedx:
128
  ```
129
  from llama_index.llms import LlamaCPP
@@ -134,12 +139,12 @@ from llama_index.llms.llama_utils import (
134
  )
135
 
136
  service_context = ServiceContext.from_defaults(
137
- llm=llm_custom,
138
  chunk_size=1024,
139
  embed_model="local:BAAI/bge-large-en"
140
  )
141
  index = VectorStoreIndex.from_vector_store(
142
- vector_store,
143
  service_context=service_context,
144
  )
145
  ```
@@ -158,3 +163,33 @@ question = "Which horn does not have tubules? A) big horn sheep B) pronghorn C)
158
  response = query_engine.query(question)
159
  display(Markdown(f"<b>{response}</b>"))
160
  ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
 
76
  ### Retrieval Augmented Generation (RAG)
77
 
78
+ Example based on Llama Index.
 
 
 
 
 
79
 
80
+ First, set up BioinspiredLMM as custom LLM:
 
 
 
 
 
 
 
 
 
 
 
 
 
81
 
82
 
83
  ```
 
106
  tokenizer=tokenizer)
107
  llm_custom.model_name='BioinspiredLLM'
108
  ```
109
+
110
+ Use Chroma database collection (for the purpose of this example it has already been created, load here):
111
+
112
+ ```
113
+ import chromadb
114
+ from llama_index import VectorStoreIndex, SimpleDirectoryReader
115
+ from chromadb.config import Settings
116
+ from llama_index.vector_stores import ChromaVectorStore
117
+ from llama_index.storage.storage_context import StorageContext
118
+
119
+ coll_name="Bioinspired"
120
+ coll_path='./Bioinspired_Chroma' ## PATH TO CHROMA DATABASE
121
+
122
+ client = chromadb.PersistentClient(path=coll_path)
123
+ collection = client.get_collection (name=coll_name,)
124
+
125
+ db2 = chromadb.PersistentClient(path=coll_path)
126
+ chroma_collection = db2.get_or_create_collection(coll_name)
127
+ vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
128
+
129
+ chroma_collection.count()
130
+ ```
131
+
132
  Set up custom LLM service context and vector store indedx:
133
  ```
134
  from llama_index.llms import LlamaCPP
 
139
  )
140
 
141
  service_context = ServiceContext.from_defaults(
142
+ llm=llm_custom,
143
  chunk_size=1024,
144
  embed_model="local:BAAI/bge-large-en"
145
  )
146
  index = VectorStoreIndex.from_vector_store(
147
+ vector_store,
148
  service_context=service_context,
149
  )
150
  ```
 
163
  response = query_engine.query(question)
164
  display(Markdown(f"<b>{response}</b>"))
165
  ```
166
+
167
+ Alternatively, load new documents, here with all-mpnet-base-v2 embeddings:
168
+ ```
169
+ from langchain.embeddings import HuggingFaceEmbeddings
170
+ embeddings = HuggingFaceEmbeddings(
171
+ model_name="sentence-transformers/all-mpnet-base-v2",
172
+ )
173
+ documents_graph = SimpleDirectoryReader(
174
+ input_files=[
175
+ "./XXXXXXXXXX/XXXXX.pdf",
176
+ ]
177
+ ).load_data()
178
+ index_doc = VectorStoreIndex.from_documents(documents_graph, service_context=
179
+ service_context,
180
+ show_progress=True,
181
+ embeddings=embeddings,
182
+ )
183
+
184
+ ```
185
+ Query:
186
+ ```
187
+ question="Which rapid prototyping techniques would be useful for creating hierarchical, bio-inspired materials?"
188
+
189
+ response = index_doc.as_query_engine(service_context=service_context,
190
+ response_mode="tree_summarize",
191
+ similarity_top_k=5,
192
+ ).query(question,
193
+ )
194
+ print(response)
195
+ ```