Spaces:

NicolasGaudemet
/

LongDocumentSummarizer

Runtime error

File size: 2,460 Bytes

a23dda1
 
5e73cde
 
 
 
 
 
 
 
 
 
 
 
9ff205d
 
6e668ce
5e73cde
9ff205d
 
 
5e73cde
 
 
 
 
ba29514
5e73cde
 
 
 
 
 
94e0df5
77feb10
4fabace
94e0df5
 
6dd3f45
5e73cde
4fabace
5e73cde
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e74ca2c
 
5e73cde

import os
import json
from langchain import OpenAI, PromptTemplate, LLMChain
from langchain.text_splitter import CharacterTextSplitter
from langchain.chains.mapreduce import MapReduceChain
from langchain.prompts import PromptTemplate
from langchain.docstore.document import Document
from langchain.chains.summarize import load_summarize_chain
import gradio as gr

#chargement des paramètres
with open("parametres.json", "r") as p:
    params = json.load(p)
    taille_max = params["taille_max"]
    modele = params["modele"]
    summary_length = params["summary_length"]
    chunks_max = taille_max//4000

#définition du LLM 
llm = OpenAI(model_name = modele, max_tokens = summary_length, temperature=0, openai_api_key = os.environ['OpenaiKey'])

#résumé d'un texte

def summarize_text(text_to_summarize, llm):
        
    #préparation du texte
    text_splitter = CharacterTextSplitter(chunk_size=3000)
    texts = text_splitter.split_text(text_to_summarize)
    print(len(texts))
    docs = [Document(page_content=t) for t in texts[:chunks_max]]
    print(len(docs))

    #résumé 
    prompt_template = """Write a summary of the following, as long as possible in your context maximum size, in the langage of the original text:
    {text}
    SUMMARY:"""
    summary_langage_prompt = PromptTemplate(template=prompt_template, input_variables=['text'])
    #summary_langage_prompt.format(taille=f"{summary_length}")
    chain = load_summarize_chain(llm, chain_type="map_reduce", return_intermediate_steps=True, map_prompt=summary_langage_prompt, combine_prompt = summary_langage_prompt)
    steps = chain({"input_documents": docs}, return_only_outputs=True)
    
    print(len(steps['intermediate_steps']))
    print(steps['intermediate_steps'])

    return steps['output_text']
    
# Lecture et résumé d'un fichier texte

def summarize_uploaded_file(file):
    if not file.name.endswith('.txt'):
        return ("Le fichier doit être un fichier texte (.txt)")
    with open(file.name, "r", encoding = "latin-1") as f:
        text = f.read()
    summary = summarize_text(text, llm)
    return summary     

# Création de l'interface Gradio

iface = gr.Interface(
    fn=summarize_uploaded_file,
    inputs="file",
    outputs=gr.outputs.Textbox(label="Résumé"),
    title="Long Text Summarizer",
    description=f"Résume un long fichier texte — jusqu'à {taille_max} tokens",
    allow_flagging = "never")

# Lancer l'interface
iface.launch()