import openai import os import gradio as gr import chromadb from langchain.document_loaders import DirectoryLoader, TextLoader, PyPDFLoader, UnstructuredFileLoader from langchain.embeddings.openai import OpenAIEmbeddings from langchain.vectorstores import Chroma from langchain.indexes import VectorstoreIndexCreator from langchain.text_splitter import CharacterTextSplitter from langchain.chains import RetrievalQA, RetrievalQAWithSourcesChain from langchain.prompts import PromptTemplate from langchain.chat_models import ChatOpenAI from langchain.chains.question_answering import load_qa_chain os.environ["OPENAI_API_KEY"] = "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" def question_document(Document, Question): # loads a PDF document if not Document.name.endswith('.pdf'): return ("Le fichier doit être un document PDF") loader = PyPDFLoader(Document.name) docs = loader.load() # Create embeddings embeddings = OpenAIEmbeddings() # Write in DB docsearch = Chroma.from_documents(docs, embeddings, ids=["page" + str(d.metadata["page"]) for d in docs]) # Define LLM llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0.8) # Customize map_reduce prompts question_template = """{context} Precise the number starting the above text in your answer. It corresponds to its page number in the document it is from. Label this number as "page". Also make sure to answer in the same langage than the following question. QUESTION : {question} ANSWER : """ combine_template = """{summaries} Note that the above text is based on transient extracts from one source document. So make sure to not mention different documents or extracts or passages or portions or texts. There is only one, entire document. Also make sure to answer in the same langage than the following question. QUESTION : {question}. ANSWER : """ question_prompt = PromptTemplate(template = question_template, input_variables=['context', 'question']) combine_prompt = PromptTemplate(template = combine_template, input_variables=['summaries', 'question']) # Define chain chain_type_kwargs = { "combine_prompt" : combine_prompt, "question_prompt" : question_prompt} #, "return_intermediate_steps" : True} qa = RetrievalQAWithSourcesChain.from_chain_type(llm = llm, chain_type = "map_reduce", chain_type_kwargs = chain_type_kwargs, retriever=docsearch.as_retriever(), return_source_documents = True) answer = qa({"question" : Question}, return_only_outputs = True) return answer iface = gr.Interface( fn = question_document, inputs= ["file","question"], outputs = gr.outputs.Textbox(label="Réponse"), title="Interrogateur de PDF", description="par Nicolas \nPermet d'interroger un document PDF", allow_flagging = "never") iface.launch()