LongDocumentSummarizer / summarizer_app.py
NicolasGaudemet's picture
Update summarizer_app.py
6dd3f45
raw
history blame
2.46 kB
import os
import json
from langchain import OpenAI, PromptTemplate, LLMChain
from langchain.text_splitter import CharacterTextSplitter
from langchain.chains.mapreduce import MapReduceChain
from langchain.prompts import PromptTemplate
from langchain.docstore.document import Document
from langchain.chains.summarize import load_summarize_chain
import gradio as gr
#chargement des paramètres
with open("parametres.json", "r") as p:
params = json.load(p)
taille_max = params["taille_max"]
modele = params["modele"]
summary_length = params["summary_length"]
chunks_max = taille_max//4000
#définition du LLM
llm = OpenAI(model_name = modele, max_tokens = summary_length, temperature=0, openai_api_key = os.environ['OpenaiKey'])
#résumé d'un texte
def summarize_text(text_to_summarize, llm):
#préparation du texte
text_splitter = CharacterTextSplitter(chunk_size=3000)
texts = text_splitter.split_text(text_to_summarize)
print(len(texts))
docs = [Document(page_content=t) for t in texts[:chunks_max]]
print(len(docs))
#résumé
prompt_template = """Write a summary of the following, as long as possible in your context maximum size, in the langage of the original text:
{text}
SUMMARY:"""
summary_langage_prompt = PromptTemplate(template=prompt_template, input_variables=['text'])
#summary_langage_prompt.format(taille=f"{summary_length}")
chain = load_summarize_chain(llm, chain_type="map_reduce", return_intermediate_steps=True, map_prompt=summary_langage_prompt, combine_prompt = summary_langage_prompt)
steps = chain({"input_documents": docs}, return_only_outputs=True)
print(len(steps['intermediate_steps']))
print(steps['intermediate_steps'])
return steps['output_text']
# Lecture et résumé d'un fichier texte
def summarize_uploaded_file(file):
if not file.name.endswith('.txt'):
return ("Le fichier doit être un fichier texte (.txt)")
with open(file.name, "r", encoding = "latin-1") as f:
text = f.read()
summary = summarize_text(text, llm)
return summary
# Création de l'interface Gradio
iface = gr.Interface(
fn=summarize_uploaded_file,
inputs="file",
outputs=gr.outputs.Textbox(label="Résumé"),
title="Long Text Summarizer",
description=f"Résume un long fichier texte — jusqu'à {taille_max} tokens",
allow_flagging = "never")
# Lancer l'interface
iface.launch()