herMaster commited on
Commit
bbd68c6
β€’
1 Parent(s): 1a8b103

update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -207
app.py CHANGED
@@ -1,200 +1,20 @@
1
- # import gradio as gr
2
- # from qdrant_client import models, QdrantClient
3
- # from sentence_transformers import SentenceTransformer
4
- # from PyPDF2 import PdfReader
5
- # from langchain.text_splitter import RecursiveCharacterTextSplitter
6
- # from langchain.callbacks.manager import CallbackManager
7
- # from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
8
- # # from langchain.llms import LlamaCpp
9
- # from langchain.vectorstores import Qdrant
10
- # from qdrant_client.http import models
11
- # # from langchain.llms import CTransformers
12
- # from ctransformers import AutoModelForCausalLM
13
-
14
-
15
-
16
-
17
-
18
- # # loading the embedding model -
19
-
20
- # encoder = SentenceTransformer('jinaai/jina-embedding-b-en-v1')
21
-
22
- # print("embedding model loaded.............................")
23
- # print("####################################################")
24
-
25
- # # loading the LLM
26
-
27
- # callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
28
-
29
- # print("loading the LLM......................................")
30
-
31
- # # llm = LlamaCpp(
32
- # # model_path="TheBloke/Llama-2-7B-Chat-GGUF/llama-2-7b-chat.Q8_0.gguf",
33
- # # n_ctx=2048,
34
- # # f16_kv=True, # MUST set to True, otherwise you will run into problem after a couple of calls
35
- # # callback_manager=callback_manager,
36
- # # verbose=True,
37
- # # )
38
-
39
- # llm = AutoModelForCausalLM.from_pretrained("TheBloke/Llama-2-7B-Chat-GGUF",
40
- # model_file="llama-2-7b-chat.Q8_0.gguf",
41
- # model_type="llama",
42
- # # config = ctransformers.hub.AutoConfig,
43
- # # hf = True
44
- # temperature = 0.2,
45
- # max_new_tokens = 1024,
46
- # stop = ['\n']
47
- # )
48
-
49
-
50
-
51
- # print("LLM loaded........................................")
52
- # print("################################################################")
53
-
54
- # def get_chunks(text):
55
- # text_splitter = RecursiveCharacterTextSplitter(
56
- # # seperator = "\n",
57
- # chunk_size = 500,
58
- # chunk_overlap = 100,
59
- # length_function = len,
60
- # )
61
-
62
- # chunks = text_splitter.split_text(text)
63
- # return chunks
64
-
65
-
66
- # pdf_path = './100 Weird Facts About the Human Body.pdf'
67
-
68
-
69
- # reader = PdfReader(pdf_path)
70
- # text = ""
71
- # num_of_pages = len(reader.pages)
72
- # for page in range(num_of_pages):
73
- # current_page = reader.pages[page]
74
- # text += current_page.extract_text()
75
-
76
-
77
- # chunks = get_chunks(text)
78
-
79
- # print("Chunks are ready.....................................")
80
- # print("######################################################")
81
-
82
- # qdrant = QdrantClient(path = "./db")
83
- # print("db created................................................")
84
- # print("#####################################################################")
85
-
86
- # qdrant.recreate_collection(
87
- # collection_name="my_facts",
88
- # vectors_config=models.VectorParams(
89
- # size=encoder.get_sentence_embedding_dimension(), # Vector size is defined by used model
90
- # distance=models.Distance.COSINE,
91
- # ),
92
- # )
93
-
94
- # print("Collection created........................................")
95
- # print("#########################################################")
96
-
97
-
98
-
99
- # li = []
100
- # for i in range(len(chunks)):
101
- # li.append(i)
102
-
103
- # dic = zip(li, chunks)
104
- # dic= dict(dic)
105
-
106
- # qdrant.upload_records(
107
- # collection_name="my_facts",
108
- # records=[
109
- # models.Record(
110
- # id=idx,
111
- # vector=encoder.encode(dic[idx]).tolist(),
112
- # payload= {dic[idx][:5] : dic[idx]}
113
- # ) for idx in dic.keys()
114
- # ],
115
- # )
116
-
117
- # print("Records uploaded........................................")
118
- # print("###########################################################")
119
-
120
- # def chat(question):
121
- # # question = input("ask question from pdf.....")
122
-
123
-
124
- # hits = qdrant.search(
125
- # collection_name="my_facts",
126
- # query_vector=encoder.encode(question).tolist(),
127
- # limit=3
128
- # )
129
- # context = []
130
- # for hit in hits:
131
- # context.append(list(hit.payload.values())[0])
132
-
133
- # context = context[0] + context[1] + context[2]
134
-
135
- # system_prompt = """You are a helpful assistant, you will use the provided context to answer user questions.
136
- # Read the given context before answering questions and think step by step. If you can not answer a user question based on
137
- # the provided context, inform the user. Do not use any other information for answering user. Provide a detailed answer to the question."""
138
-
139
-
140
- # B_INST, E_INST = "[INST]", "[/INST]"
141
-
142
- # B_SYS, E_SYS = "<<SYS>>\n", "\n<</SYS>>\n\n"
143
-
144
- # SYSTEM_PROMPT = B_SYS + system_prompt + E_SYS
145
-
146
- # instruction = f"""
147
- # Context: {context}
148
- # User: {question}"""
149
-
150
- # prompt_template = B_INST + SYSTEM_PROMPT + instruction + E_INST
151
-
152
- # result = llm(prompt_template)
153
- # return result
154
-
155
- # gr.Interface(
156
- # fn = chat,
157
- # inputs = gr.Textbox(lines = 10, placeholder = "Enter your question here πŸ‘‰"),
158
- # outputs = gr.Textbox(lines = 10, placeholder = "Your answer will be here soon πŸš€"),
159
- # title="Q&N with PDF πŸ‘©πŸ»β€πŸ’»πŸ““βœπŸ»πŸ’‘",
160
- # description="This app facilitates a conversation with PDFs available on https://www.delo.si/assets/media/other/20110728/100%20Weird%20Facts%20About%20the%20Human%20Body.pdfπŸ’‘",
161
- # theme="soft",
162
- # examples=["Hello", "what is the speed of human nerve impulses?"],
163
- # # cache_examples=True,
164
- # ).launch()
165
-
166
-
167
  import gradio as gr
168
- from threading import Thread
169
- from queue import SimpleQueue
170
- from typing import Any, Dict, List, Union
171
- from langchain.callbacks.base import BaseCallbackHandler
172
- from langchain.schema import LLMResult
173
  from qdrant_client import models, QdrantClient
174
  from sentence_transformers import SentenceTransformer
175
  from PyPDF2 import PdfReader
176
  from langchain.text_splitter import RecursiveCharacterTextSplitter
177
- from qdrant_client.models import PointStruct
178
- import os
179
  from langchain.callbacks.manager import CallbackManager
180
  from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
181
- # from qdrant_client import QdrantClient
182
- # from langchain import VectorDBQA - This is obsolete
183
- from langchain.chains import RetrievalQA
184
- from langchain.llms import LlamaCpp
185
- # from PyPDF2 import PdfReader
186
  from langchain.vectorstores import Qdrant
187
- # from langchain.text_splitter import RecursiveCharacterTextSplitter
188
- from langchain.embeddings import HuggingFaceBgeEmbeddings
189
- from transformers import AutoModel
190
  from qdrant_client.http import models
191
- # from sentence_transformers import SentenceTransformer
192
- from langchain.prompts import PromptTemplate
193
  from ctransformers import AutoModelForCausalLM
194
 
 
195
  # loading the embedding model -
196
 
197
- encoder = SentenceTransformer("all-MiniLM-L6-v2")
198
 
199
  print("embedding model loaded.............................")
200
  print("####################################################")
@@ -206,9 +26,7 @@ callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
206
  print("loading the LLM......................................")
207
 
208
  # llm = LlamaCpp(
209
- # model_path="/home/devangpagare/llm/models/llama-2-7b-chat.Q3_K_S.gguf",
210
- # # n_gpu_layers=n_gpu_layers,
211
- # # n_batch=n_batch,
212
  # n_ctx=2048,
213
  # f16_kv=True, # MUST set to True, otherwise you will run into problem after a couple of calls
214
  # callback_manager=callback_manager,
@@ -216,16 +34,17 @@ print("loading the LLM......................................")
216
  # )
217
 
218
  llm = AutoModelForCausalLM.from_pretrained("TheBloke/Llama-2-7B-Chat-GGUF",
219
- model_file="llama-2-7b-chat.Q3_K_S.gguf",
220
  model_type="llama",
221
  # config = ctransformers.hub.AutoConfig,
222
  # hf = True
223
- temperature = 0.2,
224
  # max_new_tokens = 1024,
225
  # stop = ['\n']
226
  )
227
 
228
 
 
229
  print("LLM loaded........................................")
230
  print("################################################################")
231
 
@@ -257,11 +76,11 @@ print(chunks)
257
  print("Chunks are ready.....................................")
258
  print("######################################################")
259
 
260
- qdrant = QdrantClient(path = "./db")
261
  print("db created................................................")
262
  print("#####################################################################")
263
 
264
- qdrant.recreate_collection(
265
  collection_name="my_facts",
266
  vectors_config=models.VectorParams(
267
  size=encoder.get_sentence_embedding_dimension(), # Vector size is defined by used model
@@ -273,23 +92,21 @@ print("Collection created........................................")
273
  print("#########################################################")
274
 
275
 
276
- # starting a list of same size as chunks
277
  li = []
278
  for i in range(len(chunks)):
279
  li.append(i)
280
- # concantinating the li and chunks to create a dcitionary
281
  dic = zip(li, chunks)
282
  dic= dict(dic)
283
 
284
- qdrant.upload_records(
285
  collection_name="my_facts",
286
  records=[
287
  models.Record(
288
  id=idx,
289
  vector=encoder.encode(dic[idx]).tolist(),
290
  payload= {dic[idx][:5] : dic[idx]}
291
- ## payload is always suppose to be a dictionary with both keys and values as strings. To do this, I used first 5 chars of
292
- ## every value as key to make the payload.
293
  ) for idx in dic.keys()
294
  ],
295
  )
@@ -298,21 +115,16 @@ print("Records uploaded........................................")
298
  print("###########################################################")
299
 
300
  def chat(question):
301
- # question = input("ask question from pdf.....")
302
 
303
-
304
- hits = qdrant.search(
305
  collection_name="my_facts",
306
  query_vector=encoder.encode(question).tolist(),
307
  limit=3
308
  )
309
  context = []
310
  for hit in hits:
311
- # print(hit.payload, "score:", hit.score)
312
  context.append(list(hit.payload.values())[0])
313
- # context += str(hit.payload[hit.payload.values()[:5]])
314
- # print("##################################################################")
315
-
316
  context = context[0] + context[1] + context[2]
317
 
318
  system_prompt = """You are a helpful assistant, you will use the provided context to answer user questions.
@@ -335,7 +147,7 @@ def chat(question):
335
  result = llm(prompt_template)
336
  return result
337
 
338
- gr.Interface(
339
  fn = chat,
340
  inputs = gr.Textbox(lines = 10, placeholder = "Enter your question here πŸ‘‰"),
341
  outputs = gr.Textbox(lines = 10, placeholder = "Your answer will be here soon πŸš€"),
@@ -343,7 +155,6 @@ gr.Interface(
343
  description="This app facilitates a conversation with PDFs available on https://www.delo.si/assets/media/other/20110728/100%20Weird%20Facts%20About%20the%20Human%20Body.pdfπŸ’‘",
344
  theme="soft",
345
  examples=["Hello", "what is the speed of human nerve impulses?"],
346
- # cache_examples=True,
347
- ).launch()
348
-
349
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
 
 
 
 
 
2
  from qdrant_client import models, QdrantClient
3
  from sentence_transformers import SentenceTransformer
4
  from PyPDF2 import PdfReader
5
  from langchain.text_splitter import RecursiveCharacterTextSplitter
 
 
6
  from langchain.callbacks.manager import CallbackManager
7
  from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
8
+ # from langchain.llms import LlamaCpp
 
 
 
 
9
  from langchain.vectorstores import Qdrant
 
 
 
10
  from qdrant_client.http import models
11
+ # from langchain.llms import CTransformers
 
12
  from ctransformers import AutoModelForCausalLM
13
 
14
+
15
  # loading the embedding model -
16
 
17
+ encoder = SentenceTransformer('jinaai/jina-embedding-b-en-v1')
18
 
19
  print("embedding model loaded.............................")
20
  print("####################################################")
 
26
  print("loading the LLM......................................")
27
 
28
  # llm = LlamaCpp(
29
+ # model_path="TheBloke/Llama-2-7B-Chat-GGUF/llama-2-7b-chat.Q8_0.gguf",
 
 
30
  # n_ctx=2048,
31
  # f16_kv=True, # MUST set to True, otherwise you will run into problem after a couple of calls
32
  # callback_manager=callback_manager,
 
34
  # )
35
 
36
  llm = AutoModelForCausalLM.from_pretrained("TheBloke/Llama-2-7B-Chat-GGUF",
37
+ model_file="llama-2-7b-chat.Q8_0.gguf",
38
  model_type="llama",
39
  # config = ctransformers.hub.AutoConfig,
40
  # hf = True
41
+ # temperature = 0.2,
42
  # max_new_tokens = 1024,
43
  # stop = ['\n']
44
  )
45
 
46
 
47
+
48
  print("LLM loaded........................................")
49
  print("################################################################")
50
 
 
76
  print("Chunks are ready.....................................")
77
  print("######################################################")
78
 
79
+ client = QdrantClient(path = "./db")
80
  print("db created................................................")
81
  print("#####################################################################")
82
 
83
+ client.recreate_collection(
84
  collection_name="my_facts",
85
  vectors_config=models.VectorParams(
86
  size=encoder.get_sentence_embedding_dimension(), # Vector size is defined by used model
 
92
  print("#########################################################")
93
 
94
 
95
+
96
  li = []
97
  for i in range(len(chunks)):
98
  li.append(i)
99
+
100
  dic = zip(li, chunks)
101
  dic= dict(dic)
102
 
103
+ client.upload_records(
104
  collection_name="my_facts",
105
  records=[
106
  models.Record(
107
  id=idx,
108
  vector=encoder.encode(dic[idx]).tolist(),
109
  payload= {dic[idx][:5] : dic[idx]}
 
 
110
  ) for idx in dic.keys()
111
  ],
112
  )
 
115
  print("###########################################################")
116
 
117
  def chat(question):
 
118
 
119
+ hits = client.search(
 
120
  collection_name="my_facts",
121
  query_vector=encoder.encode(question).tolist(),
122
  limit=3
123
  )
124
  context = []
125
  for hit in hits:
 
126
  context.append(list(hit.payload.values())[0])
127
+
 
 
128
  context = context[0] + context[1] + context[2]
129
 
130
  system_prompt = """You are a helpful assistant, you will use the provided context to answer user questions.
 
147
  result = llm(prompt_template)
148
  return result
149
 
150
+ screen = gr.Interface(
151
  fn = chat,
152
  inputs = gr.Textbox(lines = 10, placeholder = "Enter your question here πŸ‘‰"),
153
  outputs = gr.Textbox(lines = 10, placeholder = "Your answer will be here soon πŸš€"),
 
155
  description="This app facilitates a conversation with PDFs available on https://www.delo.si/assets/media/other/20110728/100%20Weird%20Facts%20About%20the%20Human%20Body.pdfπŸ’‘",
156
  theme="soft",
157
  examples=["Hello", "what is the speed of human nerve impulses?"],
158
+ )
 
 
159
 
160
+ screen.launch()