heikowagner commited on
Commit
8d717c1
1 Parent(s): 81be58e
app/VectorStore/index/id_to_uuid_3c194f90-478a-4f8e-a5ac-67776218c783.pkl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:30970b5f5b747f17da7471033fc01cda622aeb236966628f64285f507247c30e
3
- size 7594
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b3fd923d38dbc7773fa8ddd035a3a12b35b36c0596120795d5441fa2631aa500
3
+ size 7657
app/VectorStore/index/index_3c194f90-478a-4f8e-a5ac-67776218c783.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c220ae8125b8343264f932baba1c2abb62767acb593271c3745d43e50094eefb
3
- size 773296
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e8012c468a836e45dec5264f07e79a82dd9b0cfbd57b7db82ab3e5f87659e004
3
+ size 779728
app/VectorStore/index/index_metadata_3c194f90-478a-4f8e-a5ac-67776218c783.pkl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:70331079a9c91229b14c8d52b03b769cc42a7753a3e28272be2dfc41323d1af7
3
  size 73
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe883ac5dc1e9c3d5b56fe942e1fef13b990df4e9b32e59c5eb7b12bba00e7c0
3
  size 73
app/VectorStore/index/uuid_to_id_3c194f90-478a-4f8e-a5ac-67776218c783.pkl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e0e3ec8fd73d4a0139cfbc5a75be1534a545f7d6885200397eb3397dc792a60e
3
- size 8896
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d94d83b22ad6a388ffd24e1151e31ff2b22aaee250d0a8e442f0744bc00cffda
3
+ size 8970
app/app.py CHANGED
@@ -21,7 +21,7 @@ else:
21
 
22
  model_type = st.selectbox(
23
  'Select the Documents to be used to answer your question',
24
- ('OpenAI', 'local_model') )
25
 
26
  if model_type=='OpenAI':
27
  if 'openai_key' not in st.session_state:
@@ -33,6 +33,7 @@ else:
33
  os.environ["OPENAI_API_KEY"] = st.session_state.openai_key
34
  llm= load_model.load_openai_model()
35
  else:
 
36
  llm = load_model.load_gpu_model("decapoda-research/llama-7b-hf")
37
 
38
 
@@ -43,7 +44,7 @@ else:
43
 
44
  st.write('You selected:', option)
45
 
46
- chain = load_model.create_chain(llm, collection=option)
47
  try:
48
  query = st.text_area('Ask a question:', 'Hallo how are you today?')
49
  result = chain({"query": query})
 
21
 
22
  model_type = st.selectbox(
23
  'Select the Documents to be used to answer your question',
24
+ ('OpenAI', 'Load local model') )
25
 
26
  if model_type=='OpenAI':
27
  if 'openai_key' not in st.session_state:
 
33
  os.environ["OPENAI_API_KEY"] = st.session_state.openai_key
34
  llm= load_model.load_openai_model()
35
  else:
36
+ # Add more models here
37
  llm = load_model.load_gpu_model("decapoda-research/llama-7b-hf")
38
 
39
 
 
44
 
45
  st.write('You selected:', option)
46
 
47
+ chain = load_model.create_chain(llm, collection=option, model_name="hkunlp/instructor-large ")
48
  try:
49
  query = st.text_area('Ask a question:', 'Hallo how are you today?')
50
  result = chain({"query": query})
app/load_model.py CHANGED
@@ -44,7 +44,7 @@ def load_cpu_model():
44
  return llm
45
 
46
  @st.cache_resource(max_entries =1)
47
- def load_gpu_model(used_model = "chavinlo/gpt4-x-alpaca"):
48
  torch.cuda.empty_cache()
49
  tokenizer = LlamaTokenizer.from_pretrained(used_model)
50
 
@@ -113,10 +113,7 @@ def load_vectorstore(model_name, collection):
113
  )
114
  return vectorstore
115
 
116
- def add_document_to_vectorstore(vectorstore, docs):
117
- pass
118
-
119
- def create_chain(_llm, collection, model_name = "hkunlp/instructor-large"):
120
  vectorstore = load_vectorstore(model_name, collection)
121
  retriever = vectorstore.as_retriever(search_kwargs={"k": 4})
122
  chain = RetrievalQA.from_chain_type(llm=_llm, chain_type="stuff", retriever=retriever, return_source_documents=True)
 
44
  return llm
45
 
46
  @st.cache_resource(max_entries =1)
47
+ def load_gpu_model(used_model):
48
  torch.cuda.empty_cache()
49
  tokenizer = LlamaTokenizer.from_pretrained(used_model)
50
 
 
113
  )
114
  return vectorstore
115
 
116
+ def create_chain(_llm, collection, model_name):
 
 
 
117
  vectorstore = load_vectorstore(model_name, collection)
118
  retriever = vectorstore.as_retriever(search_kwargs={"k": 4})
119
  chain = RetrievalQA.from_chain_type(llm=_llm, chain_type="stuff", retriever=retriever, return_source_documents=True)
app/load_vectors.py CHANGED
@@ -12,7 +12,7 @@ from hashlib import sha256
12
  import cloudpickle
13
  import logging
14
  import os
15
- from load_model import load_embedding
16
  import torch
17
  import re
18
  import pathlib
@@ -42,34 +42,20 @@ def create_collection(collection_name, model_name, client):
42
  return True
43
 
44
  def create_and_add(collection_name, sub_docs, model_name):
45
- client_settings = chromadb.config.Settings(
46
- chroma_db_impl="duckdb+parquet",
47
- persist_directory=persist_directory,
48
- anonymized_telemetry=False
49
- )
50
-
51
- client = chromadb.Client(client_settings)
52
- collection_name = collection_name # + "_" + re.sub('[^A-Za-z0-9]+', '', model_name)
53
-
54
- embeddings = load_embedding(model_name)
55
  logging.info(f"Adding documents to {collection_name}")
56
- vectorstore = Chroma(
57
- collection_name=collection_name,
58
- embedding_function=embeddings,
59
- client_settings=client_settings,
60
- persist_directory=persist_directory,
61
- )
62
  vectorstore.add_documents(documents=sub_docs, embedding=embeddings)
63
  vectorstore.persist()
64
 
65
  # Test Vectorstore
66
- vectorstore2 = Chroma(
67
- collection_name=collection_name,
68
- embedding_function=embeddings,
69
- client_settings=client_settings,
70
- persist_directory=persist_directory,
71
- )
72
- print( vectorstore2.similarity_search_with_score(query="What are AXAs green Goals?", k=4) )
73
 
74
  return vectorstore
75
 
@@ -113,7 +99,7 @@ def load_from_web(urls, cache=True):
113
  #update metadata
114
  i=0
115
  for doc in docs:
116
- doc.metadata = {'source': docs_list[i], 'url': docs_list[i], 'company':'AXA'}
117
  i=i+1
118
  return docs
119
 
 
12
  import cloudpickle
13
  import logging
14
  import os
15
+ from load_model import load_embedding, load_vectorstore
16
  import torch
17
  import re
18
  import pathlib
 
42
  return True
43
 
44
  def create_and_add(collection_name, sub_docs, model_name):
 
 
 
 
 
 
 
 
 
 
45
  logging.info(f"Adding documents to {collection_name}")
46
+ embeddings = load_embedding(model_name)
47
+ vectorstore = load_vectorstore(model_name, collection_name)
 
 
 
 
48
  vectorstore.add_documents(documents=sub_docs, embedding=embeddings)
49
  vectorstore.persist()
50
 
51
  # Test Vectorstore
52
+ #vectorstore2 = Chroma(
53
+ #collection_name=collection_name,
54
+ #embedding_function=embeddings,
55
+ #client_settings=client_settings,
56
+ #persist_directory=persist_directory,
57
+ #)
58
+ #print( vectorstore2.similarity_search_with_score(query="What are AXAs green Goals?", k=4) )
59
 
60
  return vectorstore
61
 
 
99
  #update metadata
100
  i=0
101
  for doc in docs:
102
+ doc.metadata = {'source': docs_list[i], 'url': docs_list[i], 'owner':'Heiko Wagner'}
103
  i=i+1
104
  return docs
105
 
app/run.py CHANGED
@@ -12,6 +12,6 @@ import cloudpickle
12
  llm= load_model.load_openai_model()
13
 
14
  # %%
15
- chain = load_model.create_chain(llm, collection="heikospaper")
16
  result = chain({"query": "What are AXAs green Goals?"})
17
  print(result)
 
12
  llm= load_model.load_openai_model()
13
 
14
  # %%
15
+ chain = load_model.create_chain(llm, collection="heikospaper", model_name="hkunlp/instructor-large")
16
  result = chain({"query": "What are AXAs green Goals?"})
17
  print(result)