from torch import cuda , bfloat16 import os import transformers from langchain.chains import RetrievalQA from langchain.llms import HuggingFacePipeline from langchain.document_loaders import PyPDFLoader from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.embeddings import HuggingFaceEmbeddings from langchain.vectorstores import FAISS import gradio as gr os.environ['HUGGINGFACEHUB_API_TOKEN'] = os.environ["HF_KEY"] os.environ['HF_TOKEN'] = os.environ["HF_KEY"] model_id = 'meta-llama/Llama-2-7b-chat-hf' device= f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu' bnb_config = transformers.BitsAndBytesConfig( load_in_4bit = True, bnb_4bit_quant_type='nf4', bnb_4bit_use_double_quant=True, bnb_4bit_compute_dtype=bfloat16 ) model_config = transformers.AutoConfig.from_pretrained( model_id, ) model = transformers.AutoModelForCausalLM.from_pretrained( model_id, trust_remote_code=True, config = model_config, quantization_config = bnb_config, device_map='auto', ) tokenizer = transformers.AutoTokenizer.from_pretrained( model_id, ) model.eval() print(f"Model loaded on {device}") generate_text = transformers.pipeline( model = model, tokenizer = tokenizer, return_full_text = True, task='text-generation', temperature = 0.1, max_new_tokens=512, repetition_penalty=1.1 ) llm = HuggingFacePipeline(pipeline=generate_text) # loader = PyPDFLoader("/content/CELEX%3A32023R1115%3AEN%3ATXT.pdf") # pdf_documents = loader.load() # text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024,chunk_overlap=100) # pdf_document_chunks = text_splitter.split_documents(pdf_documents) model_name = "sentence-transformers/all-mpnet-base-v2" # model_kwargs = {'device':'cpu'} embeddings = HuggingFaceEmbeddings(model_name = model_name)#,model_kwargs=model_kwargs) vectorstore = FAISS.load_local("faiss_index",embeddings) retrievalQA = RetrievalQA.from_chain_type(llm, chain_type='stuff', retriever = vectorstore.as_retriever(),return_source_documents=True) print("setup complete lets start answering questions") def question_answer(input): response = retrievalQA.invoke(input) return response['result'],response['source_documents'][0].page_content iface = gr.Interface(fn = question_answer,inputs='text',outputs=['text','text']) iface.launch()