File size: 2,460 Bytes
a23dda1
 
5e73cde
 
 
 
 
 
 
 
 
 
 
 
9ff205d
 
6e668ce
5e73cde
9ff205d
 
 
5e73cde
 
 
 
 
ba29514
5e73cde
 
 
 
 
 
94e0df5
77feb10
4fabace
94e0df5
 
6dd3f45
5e73cde
4fabace
5e73cde
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e74ca2c
 
5e73cde
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import os
import json
from langchain import OpenAI, PromptTemplate, LLMChain
from langchain.text_splitter import CharacterTextSplitter
from langchain.chains.mapreduce import MapReduceChain
from langchain.prompts import PromptTemplate
from langchain.docstore.document import Document
from langchain.chains.summarize import load_summarize_chain
import gradio as gr

#chargement des paramètres
with open("parametres.json", "r") as p:
    params = json.load(p)
    taille_max = params["taille_max"]
    modele = params["modele"]
    summary_length = params["summary_length"]
    chunks_max = taille_max//4000

#définition du LLM 
llm = OpenAI(model_name = modele, max_tokens = summary_length, temperature=0, openai_api_key = os.environ['OpenaiKey'])

#résumé d'un texte

def summarize_text(text_to_summarize, llm):
        
    #préparation du texte
    text_splitter = CharacterTextSplitter(chunk_size=3000)
    texts = text_splitter.split_text(text_to_summarize)
    print(len(texts))
    docs = [Document(page_content=t) for t in texts[:chunks_max]]
    print(len(docs))

    #résumé 
    prompt_template = """Write a summary of the following, as long as possible in your context maximum size, in the langage of the original text:
    {text}
    SUMMARY:"""
    summary_langage_prompt = PromptTemplate(template=prompt_template, input_variables=['text'])
    #summary_langage_prompt.format(taille=f"{summary_length}")
    chain = load_summarize_chain(llm, chain_type="map_reduce", return_intermediate_steps=True, map_prompt=summary_langage_prompt, combine_prompt = summary_langage_prompt)
    steps = chain({"input_documents": docs}, return_only_outputs=True)
    
    print(len(steps['intermediate_steps']))
    print(steps['intermediate_steps'])

    return steps['output_text']
    
# Lecture et résumé d'un fichier texte

def summarize_uploaded_file(file):
    if not file.name.endswith('.txt'):
        return ("Le fichier doit être un fichier texte (.txt)")
    with open(file.name, "r", encoding = "latin-1") as f:
        text = f.read()
    summary = summarize_text(text, llm)
    return summary     

# Création de l'interface Gradio

iface = gr.Interface(
    fn=summarize_uploaded_file,
    inputs="file",
    outputs=gr.outputs.Textbox(label="Résumé"),
    title="Long Text Summarizer",
    description=f"Résume un long fichier texte — jusqu'à {taille_max} tokens",
    allow_flagging = "never")

# Lancer l'interface
iface.launch()