Spaces:

herMaster
/

chat-with-a-pdf

Runtime error

App Files Files Community

herMaster commited on Jan 1

Commit

1a8b103

•

1 Parent(s): f005fdc

using complete local code and loading llm through ctransformers.

Browse files

Files changed (1) hide show

app.py +201 -16

app.py CHANGED Viewed

@@ -1,23 +1,200 @@
 import gradio as gr
 from qdrant_client import models, QdrantClient
 from sentence_transformers import SentenceTransformer
 from PyPDF2 import PdfReader
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain.callbacks.manager import CallbackManager
 from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
-# from langchain.llms import LlamaCpp
 from langchain.vectorstores import Qdrant
 from qdrant_client.http import models
-# from langchain.llms import CTransformers
 from ctransformers import AutoModelForCausalLM
 # loading the embedding model -
-encoder = SentenceTransformer('jinaai/jina-embedding-b-en-v1')
 print("embedding model loaded.............................")
 print("####################################################")
@@ -29,7 +206,9 @@ callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
 print("loading the LLM......................................")
 # llm = LlamaCpp(
-#     model_path="TheBloke/Llama-2-7B-Chat-GGUF/llama-2-7b-chat.Q8_0.gguf",
 #     n_ctx=2048,
 #     f16_kv=True,  # MUST set to True, otherwise you will run into problem after a couple of calls
 #     callback_manager=callback_manager,
@@ -37,17 +216,16 @@ print("loading the LLM......................................")
 # )
 llm = AutoModelForCausalLM.from_pretrained("TheBloke/Llama-2-7B-Chat-GGUF",
-                                           model_file="llama-2-7b-chat.Q8_0.gguf",
                                            model_type="llama",
                                           #  config = ctransformers.hub.AutoConfig,
                                            # hf = True
                                            temperature = 0.2,
-                                           max_new_tokens = 1024,
-                                           stop = ['\n']
                                            )
 print("LLM loaded........................................")
 print("################################################################")
@@ -75,7 +253,7 @@ for page in range(num_of_pages):
 chunks = get_chunks(text)
 print("Chunks are ready.....................................")
 print("######################################################")
@@ -95,11 +273,11 @@ print("Collection created........................................")
 print("#########################################################")
 li = []
 for i in range(len(chunks)):
     li.append(i)
 dic = zip(li, chunks)
 dic= dict(dic)
@@ -110,6 +288,8 @@ qdrant.upload_records(
             id=idx,
             vector=encoder.encode(dic[idx]).tolist(),
             payload= {dic[idx][:5] : dic[idx]}
         ) for idx in dic.keys()
     ],
 )
@@ -128,8 +308,11 @@ def chat(question):
     )
     context = []
     for hit in hits:
       context.append(list(hit.payload.values())[0])
     context = context[0] + context[1] + context[2]
     system_prompt = """You are a helpful assistant, you will use the provided context to answer user questions.
@@ -162,3 +345,5 @@ gr.Interface(
     examples=["Hello", "what is the speed of human nerve impulses?"],
     # cache_examples=True,
 ).launch()

+# import gradio as gr
+# from qdrant_client import models, QdrantClient
+# from sentence_transformers import SentenceTransformer
+# from PyPDF2 import PdfReader
+# from langchain.text_splitter import RecursiveCharacterTextSplitter
+# from langchain.callbacks.manager import CallbackManager
+# from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
+# # from langchain.llms import LlamaCpp
+# from langchain.vectorstores import Qdrant
+# from qdrant_client.http import models
+# # from langchain.llms import CTransformers
+# from ctransformers import AutoModelForCausalLM
+# # loading the embedding model -
+# encoder = SentenceTransformer('jinaai/jina-embedding-b-en-v1')
+# print("embedding model loaded.............................")
+# print("####################################################")
+# # loading the LLM
+# callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
+# print("loading the LLM......................................")
+# # llm = LlamaCpp(
+# #     model_path="TheBloke/Llama-2-7B-Chat-GGUF/llama-2-7b-chat.Q8_0.gguf",
+# #     n_ctx=2048,
+# #     f16_kv=True,  # MUST set to True, otherwise you will run into problem after a couple of calls
+# #     callback_manager=callback_manager,
+# #     verbose=True,
+# # )
+# llm = AutoModelForCausalLM.from_pretrained("TheBloke/Llama-2-7B-Chat-GGUF",
+#                                            model_file="llama-2-7b-chat.Q8_0.gguf",
+#                                            model_type="llama",
+#                                           #  config = ctransformers.hub.AutoConfig,
+#                                            # hf = True
+#                                            temperature = 0.2,
+#                                            max_new_tokens = 1024,
+#                                            stop = ['\n']
+#                                            )
+# print("LLM loaded........................................")
+# print("################################################################")
+# def get_chunks(text):
+#     text_splitter = RecursiveCharacterTextSplitter(
+#         # seperator = "\n",
+#         chunk_size = 500,
+#         chunk_overlap = 100,
+#         length_function = len,
+#     )
+#     chunks = text_splitter.split_text(text)
+#     return chunks
+# pdf_path = './100 Weird Facts About the Human Body.pdf'
+# reader = PdfReader(pdf_path)
+# text = ""
+# num_of_pages = len(reader.pages)
+# for page in range(num_of_pages):
+#     current_page = reader.pages[page]
+#     text += current_page.extract_text()
+# chunks = get_chunks(text)
+# print("Chunks are ready.....................................")
+# print("######################################################")
+# qdrant = QdrantClient(path = "./db")
+# print("db  created................................................")
+# print("#####################################################################")
+# qdrant.recreate_collection(
+#     collection_name="my_facts",
+#     vectors_config=models.VectorParams(
+#         size=encoder.get_sentence_embedding_dimension(),  # Vector size is defined by used model
+#         distance=models.Distance.COSINE,
+#     ),
+# )
+# print("Collection created........................................")
+# print("#########################################################")
+# li = []
+# for i in range(len(chunks)):
+#     li.append(i)
+# dic = zip(li, chunks)
+# dic= dict(dic)
+# qdrant.upload_records(
+#     collection_name="my_facts",
+#     records=[
+#         models.Record(
+#             id=idx,
+#             vector=encoder.encode(dic[idx]).tolist(),
+#             payload= {dic[idx][:5] : dic[idx]}
+#         ) for idx in dic.keys()
+#     ],
+# )
+# print("Records uploaded........................................")
+# print("###########################################################")
+# def chat(question):
+#     # question = input("ask question from pdf.....")
+#     hits = qdrant.search(
+#         collection_name="my_facts",
+#         query_vector=encoder.encode(question).tolist(),
+#         limit=3
+#     )
+#     context = []
+#     for hit in hits:
+#       context.append(list(hit.payload.values())[0])
+#     context = context[0] + context[1] + context[2]
+#     system_prompt = """You are a helpful assistant, you will use the provided context to answer user questions.
+#     Read the given context before answering questions and think step by step. If you can not answer a user question based on
+#     the provided context, inform the user. Do not use any other information for answering user. Provide a detailed answer to the question."""
+#     B_INST, E_INST = "[INST]", "[/INST]"
+#     B_SYS, E_SYS = "<<SYS>>\n", "\n<</SYS>>\n\n"
+#     SYSTEM_PROMPT = B_SYS + system_prompt + E_SYS
+#     instruction = f"""
+#     Context: {context}
+#     User: {question}"""
+#     prompt_template = B_INST + SYSTEM_PROMPT + instruction + E_INST
+#     result = llm(prompt_template)
+#     return result
+# gr.Interface(
+#     fn = chat,
+#     inputs = gr.Textbox(lines = 10, placeholder = "Enter your question here 👉"),
+#     outputs = gr.Textbox(lines = 10, placeholder = "Your answer will be here soon 🚀"),
+#     title="Q&N with PDF 👩🏻‍💻📓✍🏻💡",
+#     description="This app facilitates a conversation with PDFs available on https://www.delo.si/assets/media/other/20110728/100%20Weird%20Facts%20About%20the%20Human%20Body.pdf💡",
+#     theme="soft",
+#     examples=["Hello", "what is the speed of human nerve impulses?"],
+#     # cache_examples=True,
+# ).launch()
 import gradio as gr
+from threading import Thread
+from queue import SimpleQueue
+from typing import Any, Dict, List, Union
+from langchain.callbacks.base import BaseCallbackHandler
+from langchain.schema import LLMResult
 from qdrant_client import models, QdrantClient
 from sentence_transformers import SentenceTransformer
 from PyPDF2 import PdfReader
 from langchain.text_splitter import RecursiveCharacterTextSplitter
+from qdrant_client.models import PointStruct
+import os
 from langchain.callbacks.manager import CallbackManager
 from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
+# from qdrant_client import QdrantClient
+# from langchain import VectorDBQA - This is obsolete
+from langchain.chains import RetrievalQA
+from langchain.llms import LlamaCpp
+# from PyPDF2 import PdfReader
 from langchain.vectorstores import Qdrant
+# from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.embeddings import HuggingFaceBgeEmbeddings
+from transformers import AutoModel
 from qdrant_client.http import models
+# from sentence_transformers import SentenceTransformer
+from langchain.prompts import PromptTemplate
 from ctransformers import AutoModelForCausalLM
 # loading the embedding model -
+encoder = SentenceTransformer("all-MiniLM-L6-v2")
 print("embedding model loaded.............................")
 print("####################################################")
 print("loading the LLM......................................")
 # llm = LlamaCpp(
+#     model_path="/home/devangpagare/llm/models/llama-2-7b-chat.Q3_K_S.gguf",
+#     # n_gpu_layers=n_gpu_layers,
+#     # n_batch=n_batch,
 #     n_ctx=2048,
 #     f16_kv=True,  # MUST set to True, otherwise you will run into problem after a couple of calls
 #     callback_manager=callback_manager,
 # )
 llm = AutoModelForCausalLM.from_pretrained("TheBloke/Llama-2-7B-Chat-GGUF",
+                                           model_file="llama-2-7b-chat.Q3_K_S.gguf",
                                            model_type="llama",
                                           #  config = ctransformers.hub.AutoConfig,
                                            # hf = True
                                            temperature = 0.2,
+                                           # max_new_tokens = 1024,
+                                           # stop = ['\n']
                                            )
 print("LLM loaded........................................")
 print("################################################################")
 chunks = get_chunks(text)
+print(chunks)
 print("Chunks are ready.....................................")
 print("######################################################")
 print("#########################################################")
+# starting a list of same size as chunks
 li = []
 for i in range(len(chunks)):
     li.append(i)
+# concantinating the li and chunks to create a dcitionary
 dic = zip(li, chunks)
 dic= dict(dic)
             id=idx,
             vector=encoder.encode(dic[idx]).tolist(),
             payload= {dic[idx][:5] : dic[idx]}
+## payload is always suppose to be a dictionary with both keys and values as strings. To do this, I used first 5 chars of
+## every value as key to make the payload.
         ) for idx in dic.keys()
     ],
 )
     )
     context = []
     for hit in hits:
+    #   print(hit.payload, "score:", hit.score)
       context.append(list(hit.payload.values())[0])
+    #   context += str(hit.payload[hit.payload.values()[:5]])
+    # print("##################################################################")
     context = context[0] + context[1] + context[2]
     system_prompt = """You are a helpful assistant, you will use the provided context to answer user questions.
     examples=["Hello", "what is the speed of human nerve impulses?"],
     # cache_examples=True,
 ).launch()