import gradio as gr from qdrant_client import models, QdrantClient from sentence_transformers import SentenceTransformer from PyPDF2 import PdfReader from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.callbacks.manager import CallbackManager from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler # from langchain.llms import LlamaCpp from langchain.vectorstores import Qdrant from qdrant_client.http import models # from langchain.llms import CTransformers from ctransformers import AutoModelForCausalLM # loading the embedding model - encoder = SentenceTransformer("all-MiniLM-L6-v2") print("embedding model loaded.............................") print("####################################################") # loading the LLM callback_manager = CallbackManager([StreamingStdOutCallbackHandler()]) print("loading the LLM......................................") # llm = LlamaCpp( # model_path="TheBloke/Llama-2-7B-Chat-GGUF/llama-2-7b-chat.Q8_0.gguf", # n_ctx=2048, # f16_kv=True, # MUST set to True, otherwise you will run into problem after a couple of calls # callback_manager=callback_manager, # verbose=True, # ) llm = AutoModelForCausalLM.from_pretrained("TheBloke/Llama-2-7B-Chat-GGUF", model_file="llama-2-7b-chat.Q8_0.gguf", model_type="llama", # config = ctransformers.hub.AutoConfig, hf = True ) print("LLM loaded........................................") print("################################################################") def get_chunks(text): text_splitter = RecursiveCharacterTextSplitter( # seperator = "\n", chunk_size = 500, chunk_overlap = 100, length_function = len, ) chunks = text_splitter.split_text(text) return chunks pdf_path = '/home/devangpagare/llm/qdrant-cloud-rag-main/100 Weird Facts About the Human Body.pdf' reader = PdfReader(pdf_path) text = "" num_of_pages = len(reader.pages) for page in range(num_of_pages): current_page = reader.pages[page] text += current_page.extract_text() chunks = get_chunks(text) print("Chunks are ready.....................................") print("######################################################") qdrant = QdrantClient(path = "/home/devangpagare/llm/qdrant-cloud-rag-main/gradio/db") print("db created................................................") print("#####################################################################") qdrant.recreate_collection( collection_name="my_facts", vectors_config=models.VectorParams( size=encoder.get_sentence_embedding_dimension(), # Vector size is defined by used model distance=models.Distance.COSINE, ), ) print("Collection created........................................") print("#########################################################") li = [] for i in range(len(chunks)): li.append(i) dic = zip(li, chunks) dic= dict(dic) qdrant.upload_records( collection_name="my_facts", records=[ models.Record( id=idx, vector=encoder.encode(dic[idx]).tolist(), payload= {dic[idx][:5] : dic[idx]} ) for idx in dic.keys() ], ) print("Records uploaded........................................") print("###########################################################") def chat(question): # question = input("ask question from pdf.....") hits = qdrant.search( collection_name="my_facts", query_vector=encoder.encode(question).tolist(), limit=3 ) context = [] for hit in hits: context.append(list(hit.payload.values())[0]) context = context[0] + context[1] + context[2] system_prompt = """You are a helpful assistant, you will use the provided context to answer user questions. Read the given context before answering questions and think step by step. If you can not answer a user question based on the provided context, inform the user. Do not use any other information for answering user. Provide a detailed answer to the question.""" B_INST, E_INST = "[INST]", "[/INST]" B_SYS, E_SYS = "<>\n", "\n<>\n\n" SYSTEM_PROMPT = B_SYS + system_prompt + E_SYS instruction = f""" Context: {context} User: {question}""" prompt_template = B_INST + SYSTEM_PROMPT + instruction + E_INST result = llm(prompt_template) return result gr.Interface( fn = chat, inputs = gr.Textbox(lines = 10, placeholder = "Enter your question here 👉"), outputs = gr.Textbox(lines = 10, placeholder = "Your answer will be here soon 🚀"), title="Q&N with PDF 👩🏻‍💻📓✍🏻💡", description="This app facilitates a conversation with PDFs available on https://www.delo.si/assets/media/other/20110728/100%20Weird%20Facts%20About%20the%20Human%20Body.pdf💡", theme="soft", examples=["Hello", "what is the speed of human nerve impulses?"], cache_examples=True, ).launch(share = True, auth=("username", "password"))