heikowagner commited on
Commit
0cd40c7
1 Parent(s): 60a70a9

add cpu model

Browse files
Dockerfile CHANGED
@@ -1,11 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  FROM tensorflow/tensorflow:latest-gpu
2
  WORKDIR /app
 
3
  # RUN apt-get upgrade -y
4
- RUN apt-get update -y
5
- RUN apt-get install -y git
6
  RUN apt install -y make wget git gcc g++ lhasa libgmp-dev libmpfr-dev libmpc-dev flex bison gettext texinfo ncurses-dev autoconf rsync
7
- COPY ./app .
8
  RUN pip install -r requirements.txt
 
9
  #RUN python load_docs.py
10
  RUN --mount=type=secret,id=OPENAI_API_KEY \
11
  cat /run/secrets/OPENAI_API_KEY > .openaiapikey
 
1
+ #Navigate to your user folder cd $env:USERPROFILE\AppData\Local\Docker\wsl\data
2
+ #Enter the following command resize-vhd -Path .\ext4.vhdx -SizeBytes 300GB, after that I was able to continue building with docker-compose!
3
+
4
+ FROM python:latest AS builder
5
+ RUN apt update -y
6
+ RUN apt install -y git git-lfs make gcc g++ libgmp-dev libmpfr-dev libmpc-dev
7
+ RUN git lfs install
8
+ RUN git clone https://github.com/ggerganov/llama.cpp
9
+ RUN cd llama.cpp && make
10
+ RUN git clone https://huggingface.co/nyanko7/LLaMA-7B
11
+ RUN ls -la
12
+ RUN cp -r ./LLaMA-7B ./llama.cpp/models
13
+ RUN ls -la ./llama.cpp/models/LLaMA-7B
14
+ # convert the 7B model to ggml FP16 format
15
+ WORKDIR llama.cpp
16
+ RUN python3 -m pip install -r requirements.txt
17
+ RUN python3 convert.py ./models/LLaMA-7B
18
+ # quantize the model to 4-bits (using q4_0 method)
19
+ RUN mkdir ./models/7B/
20
+ RUN ./quantize ./models/LLaMA-7B/ggml-model-f16.bin ./models/7B/ggml-model-q4_0.bin q4_0
21
+
22
  FROM tensorflow/tensorflow:latest-gpu
23
  WORKDIR /app
24
+ COPY --from=builder /llama.cpp//models/7B/ ./mymodels/LLaMA-7B
25
  # RUN apt-get upgrade -y
26
+ RUN apt update -y
27
+ RUN apt install -y git git-lfs
28
  RUN apt install -y make wget git gcc g++ lhasa libgmp-dev libmpfr-dev libmpc-dev flex bison gettext texinfo ncurses-dev autoconf rsync
29
+ COPY ./requirements.txt requirements.txt
30
  RUN pip install -r requirements.txt
31
+ COPY ./app .
32
  #RUN python load_docs.py
33
  RUN --mount=type=secret,id=OPENAI_API_KEY \
34
  cat /run/secrets/OPENAI_API_KEY > .openaiapikey
app/app.py CHANGED
@@ -19,7 +19,7 @@ else:
19
 
20
  model_type = st.selectbox(
21
  'Select the Documents to be used to answer your question',
22
- ('OpenAI', 'Load local model') )
23
 
24
  if model_type=='OpenAI':
25
  if 'openai_key' not in st.session_state:
@@ -30,9 +30,11 @@ else:
30
  else:
31
  os.environ["OPENAI_API_KEY"] = st.session_state.openai_key
32
  llm= load_model.load_openai_model()
33
- else:
34
  # Add more models here
35
  llm = load_model.load_gpu_model("decapoda-research/llama-7b-hf")
 
 
36
 
37
 
38
  collections = ut.retrieve_collections()
 
19
 
20
  model_type = st.selectbox(
21
  'Select the Documents to be used to answer your question',
22
+ ('OpenAI', 'decapoda-research/llama-7b-hf (gpu+cpu)', 'llama-7b 4bit (cpu only)',) )
23
 
24
  if model_type=='OpenAI':
25
  if 'openai_key' not in st.session_state:
 
30
  else:
31
  os.environ["OPENAI_API_KEY"] = st.session_state.openai_key
32
  llm= load_model.load_openai_model()
33
+ elif model_type=='decapoda-research/llama-7b-hf (gpu+cpu)':
34
  # Add more models here
35
  llm = load_model.load_gpu_model("decapoda-research/llama-7b-hf")
36
+ else:
37
+ llm = load_model.load_cpu_model()
38
 
39
 
40
  collections = ut.retrieve_collections()
app/exploration.py CHANGED
@@ -20,4 +20,32 @@ client.create_collection(collection, embedding_function=ef, metadata={"loaded_do
20
 
21
 
22
  # %%
23
- client.list_collections()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
 
22
  # %%
23
+ my_col = client.list_collections()
24
+
25
+ # %%
26
+ my_col.embedding_function
27
+
28
+ # %%
29
+ from langchain.vectorstores import Chroma
30
+ import load_model
31
+
32
+ from load_model import load_embedding
33
+
34
+ persist_directory = load_model.persist_directory
35
+
36
+ ef = load_embedding("hkunlp/instructor-large")
37
+ vectorstore = Chroma(
38
+ collection_name="papers",
39
+ embedding_function=ef,
40
+ persist_directory=persist_directory,
41
+ )
42
+
43
+ # %%
44
+ query = "What did the president say about Ketanji Brown Jackson"
45
+ docs = vectorstore.similarity_search(query)
46
+
47
+
48
+ # %%
49
+ docs
50
+ # %%
51
+ vectorstore.similarity_search_with_score(query)
app/load_model.py CHANGED
@@ -16,6 +16,7 @@ import os
16
  from langchain.chains import RetrievalQA
17
  from langchain.indexes import VectorstoreIndexCreator
18
  from langchain.llms import OpenAI
 
19
 
20
  from chromadb.config import Settings
21
  import chromadb
@@ -30,12 +31,12 @@ persist_directory = current_path + "/VectorStore"
30
  @st.cache_resource
31
  def load_cpu_model():
32
  """Does not work atm, bc cpu model is not persisted"""
33
- model_path= "./llama.cpp/models/LLaMA-7B/ggml-model-q4_0.bin"
34
  device_map = {"": int(os.environ.get("LOCAL_RANK") or 0)}
35
  llm = LlamaCpp(
36
  model_path=model_path,
37
  n_ctx=6000,
38
- n_threads=16,
39
  temperature=0.6,
40
  top_p=0.95
41
  )
 
16
  from langchain.chains import RetrievalQA
17
  from langchain.indexes import VectorstoreIndexCreator
18
  from langchain.llms import OpenAI
19
+ import multiprocessing
20
 
21
  from chromadb.config import Settings
22
  import chromadb
 
31
  @st.cache_resource
32
  def load_cpu_model():
33
  """Does not work atm, bc cpu model is not persisted"""
34
+ model_path= "./mymodels/LLaMA-7B/ggml-model-q4_0.bin"
35
  device_map = {"": int(os.environ.get("LOCAL_RANK") or 0)}
36
  llm = LlamaCpp(
37
  model_path=model_path,
38
  n_ctx=6000,
39
+ n_threads=multiprocessing.cpu_count(),
40
  temperature=0.6,
41
  top_p=0.95
42
  )
app/requirements.txt → requirements.txt RENAMED
File without changes