Update app.py
Browse files
app.py
CHANGED
@@ -2,6 +2,7 @@ import gradio as gr
|
|
2 |
from transformers import pipeline, AutoTokenizer
|
3 |
import nltk
|
4 |
from nltk.tokenize import sent_tokenize
|
|
|
5 |
|
6 |
# Download NLTK data
|
7 |
nltk.download('punkt')
|
@@ -23,8 +24,9 @@ summarization_models = {
|
|
23 |
# Initialize tokenizer
|
24 |
tokenizer = AutoTokenizer.from_pretrained("facebook/bart-large-cnn")
|
25 |
|
26 |
-
#
|
27 |
-
|
|
|
28 |
|
29 |
# Initialize translation pipeline
|
30 |
def get_translator(language):
|
@@ -60,11 +62,12 @@ def summarize_text(text, model_name):
|
|
60 |
if len(text) < 200: # Adjust the threshold as needed
|
61 |
print("Input text is too short for summarization. Please provide longer text.")
|
62 |
return ""
|
|
|
63 |
chunks = split_text(text)
|
64 |
summaries = []
|
65 |
for chunk in chunks:
|
66 |
try:
|
67 |
-
summary =
|
68 |
summaries.append(summary)
|
69 |
except Exception as e:
|
70 |
print(f"Error summarizing chunk: {chunk}\nError: {e}")
|
@@ -83,6 +86,7 @@ def translate_text(text, language):
|
|
83 |
return text
|
84 |
|
85 |
def process_text(input_text, model, language):
|
|
|
86 |
print(f"Input text: {input_text[:500]}...") # Show only the first 500 characters for brevity
|
87 |
summary = summarize_text(input_text, model)
|
88 |
if not summary:
|
@@ -96,6 +100,8 @@ def process_text(input_text, model, language):
|
|
96 |
print(f"Bullet Points: {bullet_points}")
|
97 |
translated_text = translate_text(bullet_points, language)
|
98 |
print(f"Translated Text: {translated_text}")
|
|
|
|
|
99 |
return bullet_points, translated_text
|
100 |
|
101 |
def generate_bullet_points(summary):
|
@@ -148,4 +154,5 @@ iface.launch()
|
|
148 |
|
149 |
|
150 |
|
|
|
151 |
|
|
|
2 |
from transformers import pipeline, AutoTokenizer
|
3 |
import nltk
|
4 |
from nltk.tokenize import sent_tokenize
|
5 |
+
import time
|
6 |
|
7 |
# Download NLTK data
|
8 |
nltk.download('punkt')
|
|
|
24 |
# Initialize tokenizer
|
25 |
tokenizer = AutoTokenizer.from_pretrained("facebook/bart-large-cnn")
|
26 |
|
27 |
+
# Helper function to initialize summarization pipeline
|
28 |
+
def get_summarizer(model_name):
|
29 |
+
return pipeline("summarization", model=model_name)
|
30 |
|
31 |
# Initialize translation pipeline
|
32 |
def get_translator(language):
|
|
|
62 |
if len(text) < 200: # Adjust the threshold as needed
|
63 |
print("Input text is too short for summarization. Please provide longer text.")
|
64 |
return ""
|
65 |
+
summarizer = get_summarizer(model_name)
|
66 |
chunks = split_text(text)
|
67 |
summaries = []
|
68 |
for chunk in chunks:
|
69 |
try:
|
70 |
+
summary = summarizer(chunk, max_length=150, min_length=20, do_sample=False)[0]['summary_text']
|
71 |
summaries.append(summary)
|
72 |
except Exception as e:
|
73 |
print(f"Error summarizing chunk: {chunk}\nError: {e}")
|
|
|
86 |
return text
|
87 |
|
88 |
def process_text(input_text, model, language):
|
89 |
+
start_time = time.time()
|
90 |
print(f"Input text: {input_text[:500]}...") # Show only the first 500 characters for brevity
|
91 |
summary = summarize_text(input_text, model)
|
92 |
if not summary:
|
|
|
100 |
print(f"Bullet Points: {bullet_points}")
|
101 |
translated_text = translate_text(bullet_points, language)
|
102 |
print(f"Translated Text: {translated_text}")
|
103 |
+
end_time = time.time()
|
104 |
+
print(f"Processing time: {end_time - start_time} seconds")
|
105 |
return bullet_points, translated_text
|
106 |
|
107 |
def generate_bullet_points(summary):
|
|
|
154 |
|
155 |
|
156 |
|
157 |
+
|
158 |
|