LongDocumentQuestioner

Sleeping

App Files Files Community

LongDocumentQuestioner / document_questioner_app.py

NicolasGaudemet

Update document_questioner_app.py

3fd3fa4 over 1 year ago

raw

history blame contribute delete

No virus

4.3 kB

	import openai
	import os
	import gradio as gr
	import chromadb
	from langchain.document_loaders import PyPDFLoader
	from langchain.embeddings.openai import OpenAIEmbeddings
	from langchain.vectorstores import Chroma
	from langchain.indexes import VectorstoreIndexCreator
	from langchain.chains import ConversationalRetrievalChain
	from langchain.prompts import PromptTemplate
	from langchain.chat_models import ChatOpenAI
	from langchain.llms import OpenAI

	def load_document(Document):

	# loads a PDF document
	if not Document:
	return "Merci de fournir un document PDF"
	if not Document.name.endswith('.pdf'):
	return ("Merci de fournir un document PDF")

	loader = PyPDFLoader(Document.name)
	docs = loader.load()
	global k
	k = len(docs)

	# Create embeddings
	embeddings = OpenAIEmbeddings(openai_api_key = os.environ['OpenaiKey'])

	# Write in DB
	global docsearch
	docsearch = Chroma.from_documents(docs, embeddings, ids=["page" + str(d.metadata["page"]) for d in docs], k=1)
	global chat_history
	chat_history = []

	return "Endodage créé"

	def get_chat_history(inputs) -> str:
	res = []
	for human, ai in inputs:
	res.append(f"Question : {human}\nRéponse : {ai}")
	return "\n".join(res)

	def question_document(Question):

	if "docsearch" not in globals():
	return "Merci d'encoder un document PDF"

	# Define LLM
	turbo = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0, openai_api_key = os.environ['OpenaiKey'])
	davinci = OpenAI(model_name = "text-davinci-003", openai_api_key = os.environ['OpenaiKey'])

	# Customize map_reduce prompts
	#question_template = """{context}
	#Precise the number starting the above text in your answer. It corresponds to its page number in the document it is from. Label this number as "page".
	#Also make sure to answer in the same langage than the following question.
	#QUESTION : {question}
	#ANSWER :
	#"""

	#combine_template = """{summaries}
	#Note that the above text is based on transient extracts from one source document.
	#So make sure to not mention different documents or extracts or passages or portions or texts. There is only one, entire document.
	#Also make sure to answer in the same langage than the following question.
	#QUESTION : {question}.
	#ANSWER :
	#"""

	#question_prompt = PromptTemplate(template = question_template, input_variables=['context', 'question'])
	#combine_prompt = PromptTemplate(template = combine_template, input_variables=['summaries', 'question'])

	# Define chain
	#chain_type_kwargs = { "combine_prompt" : combine_prompt, "question_prompt" : question_prompt} #, "return_intermediate_steps" : True}
	#qa = RetrievalQAWithSourcesChain.from_chain_type(llm = llm, chain_type = "map_reduce", chain_type_kwargs = chain_type_kwargs, retriever=docsearch.as_retriever(), return_source_documents = True)

	vectordbkwargs = {"search_distance": 10}
	search_kwargs={"k" : k}

	qa = ConversationalRetrievalChain.from_llm(llm = turbo, chain_type = "map_reduce",retriever=docsearch.as_retriever(search_kwargs = search_kwargs), get_chat_history = get_chat_history, return_source_documents = True)
	answer = qa({"question" : Question,"chat_history":chat_history, "vectordbkwargs": vectordbkwargs}, return_only_outputs = True)
	chat_history.append((Question, answer["answer"]))
	#answer = qa({"question" : Question}, )
	print(answer)
	return "".join(get_chat_history(chat_history))

	with gr.Blocks() as demo:

	gr.Markdown(
	"""
	# Interrogateur de PDF
	par Nicolas et Alex
	""")

	with gr.Row():

	with gr.Column():
	input_file = gr.inputs.File(label="Charger un document")
	greet_btnee = gr.Button("Encoder le document")
	output_words = gr.outputs.Textbox(label="Encodage")
	greet_btnee.click(fn=load_document, inputs=input_file, outputs = output_words)

	with gr.Column():
	text = gr.inputs.Textbox(label="Question")
	greet_btn = gr.Button("Poser une question")
	answer = gr.Textbox(label = "Réponse", lines = 8)
	greet_btn.click(fn = question_document, inputs = text, outputs = answer)


	demo.launch()