RAG_EUDR / app.py
DANISHFAYAZNAJAR's picture
adding files
726afa6
from torch import cuda , bfloat16
import os
import transformers
from langchain.chains import RetrievalQA
from langchain.llms import HuggingFacePipeline
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
import gradio as gr
os.environ['HUGGINGFACEHUB_API_TOKEN'] = os.environ["HF_KEY"]
os.environ['HF_TOKEN'] = os.environ["HF_KEY"]
model_id = 'meta-llama/Llama-2-7b-chat-hf'
device= f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'
bnb_config = transformers.BitsAndBytesConfig(
load_in_4bit = True,
bnb_4bit_quant_type='nf4',
bnb_4bit_use_double_quant=True,
bnb_4bit_compute_dtype=bfloat16
)
model_config = transformers.AutoConfig.from_pretrained(
model_id,
)
model = transformers.AutoModelForCausalLM.from_pretrained(
model_id,
trust_remote_code=True,
config = model_config,
quantization_config = bnb_config,
device_map='auto',
)
tokenizer = transformers.AutoTokenizer.from_pretrained(
model_id,
)
model.eval()
print(f"Model loaded on {device}")
generate_text = transformers.pipeline(
model = model,
tokenizer = tokenizer,
return_full_text = True,
task='text-generation',
temperature = 0.1,
max_new_tokens=512,
repetition_penalty=1.1
)
llm = HuggingFacePipeline(pipeline=generate_text)
# loader = PyPDFLoader("/content/CELEX%3A32023R1115%3AEN%3ATXT.pdf")
# pdf_documents = loader.load()
# text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024,chunk_overlap=100)
# pdf_document_chunks = text_splitter.split_documents(pdf_documents)
model_name = "sentence-transformers/all-mpnet-base-v2"
# model_kwargs = {'device':'cpu'}
embeddings = HuggingFaceEmbeddings(model_name = model_name)#,model_kwargs=model_kwargs)
vectorstore = FAISS.load_local("faiss_index",embeddings)
retrievalQA = RetrievalQA.from_chain_type(llm,
chain_type='stuff',
retriever = vectorstore.as_retriever(),return_source_documents=True)
print("setup complete lets start answering questions")
def question_answer(input):
response = retrievalQA.invoke(input)
return response['result'],response['source_documents'][0].page_content
iface = gr.Interface(fn = question_answer,inputs='text',outputs=['text','text'])
iface.launch()