Spaces:
Runtime error
Runtime error
from torch import cuda , bfloat16 | |
import os | |
import transformers | |
from langchain.chains import RetrievalQA | |
from langchain.llms import HuggingFacePipeline | |
from langchain.document_loaders import PyPDFLoader | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain.embeddings import HuggingFaceEmbeddings | |
from langchain.vectorstores import FAISS | |
import gradio as gr | |
os.environ['HUGGINGFACEHUB_API_TOKEN'] = os.environ["HF_KEY"] | |
os.environ['HF_TOKEN'] = os.environ["HF_KEY"] | |
model_id = 'meta-llama/Llama-2-7b-chat-hf' | |
device= f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu' | |
bnb_config = transformers.BitsAndBytesConfig( | |
load_in_4bit = True, | |
bnb_4bit_quant_type='nf4', | |
bnb_4bit_use_double_quant=True, | |
bnb_4bit_compute_dtype=bfloat16 | |
) | |
model_config = transformers.AutoConfig.from_pretrained( | |
model_id, | |
) | |
model = transformers.AutoModelForCausalLM.from_pretrained( | |
model_id, | |
trust_remote_code=True, | |
config = model_config, | |
quantization_config = bnb_config, | |
device_map='auto', | |
) | |
tokenizer = transformers.AutoTokenizer.from_pretrained( | |
model_id, | |
) | |
model.eval() | |
print(f"Model loaded on {device}") | |
generate_text = transformers.pipeline( | |
model = model, | |
tokenizer = tokenizer, | |
return_full_text = True, | |
task='text-generation', | |
temperature = 0.1, | |
max_new_tokens=512, | |
repetition_penalty=1.1 | |
) | |
llm = HuggingFacePipeline(pipeline=generate_text) | |
# loader = PyPDFLoader("/content/CELEX%3A32023R1115%3AEN%3ATXT.pdf") | |
# pdf_documents = loader.load() | |
# text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024,chunk_overlap=100) | |
# pdf_document_chunks = text_splitter.split_documents(pdf_documents) | |
model_name = "sentence-transformers/all-mpnet-base-v2" | |
# model_kwargs = {'device':'cpu'} | |
embeddings = HuggingFaceEmbeddings(model_name = model_name)#,model_kwargs=model_kwargs) | |
vectorstore = FAISS.load_local("faiss_index",embeddings) | |
retrievalQA = RetrievalQA.from_chain_type(llm, | |
chain_type='stuff', | |
retriever = vectorstore.as_retriever(),return_source_documents=True) | |
print("setup complete lets start answering questions") | |
def question_answer(input): | |
response = retrievalQA.invoke(input) | |
return response['result'],response['source_documents'][0].page_content | |
iface = gr.Interface(fn = question_answer,inputs='text',outputs=['text','text']) | |
iface.launch() |