File size: 1,748 Bytes
4142a7d
1457f73
eb51c13
4142a7d
2e9079e
de301cb
 
 
 
 
 
 
2e9079e
724a5a1
20be358
 
 
724a5a1
2e9079e
eb51c13
2e9079e
de301cb
eb51c13
 
 
 
 
 
 
20be358
 
 
 
 
17ad421
eb51c13
 
2e9079e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import streamlit as st
from transformers import pipeline
import textwrap

st.title('Hugging Face BERT Summarizer')

# List of models
models = ["sshleifer/distilbart-cnn-12-6", "facebook/bart-large-cnn", "t5-base", "t5-large", "google/pegasus-newsroom"]

# Dropdown model selector
model = st.sidebar.selectbox("Choose a model", models)

uploaded_file = st.file_uploader("Choose a .txt file", type="txt")

# Add slider to the sidebar for the scale value
scale_percentage = st.sidebar.slider('Scale %', min_value=1, max_value=100, value=50)

if uploaded_file is not None:
    user_input = uploaded_file.read().decode('utf-8')

    if st.button('Summarize'):
        summarizer = pipeline('summarization', model=model)
        summarized_text = ""

        # Split the text into chunks of approximately 500 words each
        chunks = textwrap.wrap(user_input, 500)
        
        # Summarize each chunk
        for chunk in chunks:
            chunk_length = len(chunk.split())
            min_length_percentage = max(scale_percentage - 10, 1)  # Ensure min_length_percentage is not less than 1
            max_length_percentage = min(scale_percentage + 10, 100)  # Ensure max_length_percentage is not more than 100
            min_length = max(int(chunk_length * min_length_percentage / 100), 1)  # Calculate min_length based on the percentage of the chunk length
            max_length = int(chunk_length * max_length_percentage / 100)  # Calculate max_length based on the percentage of the chunk length
            summarized = summarizer(chunk, max_length=max_length, min_length=min_length, do_sample=False)
            summarized_text += summarized[0]['summary_text'] + " "

        st.text_area('Summarized Text', summarized_text, height=200)