Update app.py
Browse files
app.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
import gradio as gr
|
2 |
-
from transformers import pipeline
|
3 |
from sentence_transformers import SentenceTransformer, util
|
4 |
import math
|
5 |
|
@@ -12,7 +12,9 @@ translation_models = {
|
|
12 |
}
|
13 |
|
14 |
# Initialize summarization pipeline with a specified model
|
15 |
-
|
|
|
|
|
16 |
|
17 |
# Initialize translation pipeline
|
18 |
def get_translator(language):
|
@@ -38,23 +40,15 @@ def generate_bullet_points(text):
|
|
38 |
|
39 |
# Helper function to split text into chunks
|
40 |
def split_text(text, max_tokens=1024):
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
current_tokens = 0
|
45 |
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
current_tokens = sentence_tokens
|
52 |
-
else:
|
53 |
-
current_chunk += sentence + ". "
|
54 |
-
current_tokens += sentence_tokens
|
55 |
-
|
56 |
-
if current_chunk:
|
57 |
-
chunks.append(current_chunk.strip())
|
58 |
|
59 |
return chunks
|
60 |
|
@@ -82,12 +76,12 @@ def process_text(input_text, language):
|
|
82 |
iface = gr.Interface(
|
83 |
fn=process_text,
|
84 |
inputs=[
|
85 |
-
gr.Textbox(label="Input Text", placeholder="Paste your text here..."),
|
86 |
gr.Dropdown(choices=["Vietnamese", "Japanese", "Thai", "Spanish"], label="Translate to", value="Vietnamese")
|
87 |
],
|
88 |
outputs=[
|
89 |
-
gr.Textbox(label="Bullet Points"),
|
90 |
-
gr.Textbox(label="Translated Bullet Points")
|
91 |
],
|
92 |
title="Text to Bullet Points and Translation",
|
93 |
description="Paste any text, and the program will summarize it into bullet points. Optionally, translate the bullet points into Vietnamese, Japanese, Thai, or Spanish."
|
@@ -97,3 +91,4 @@ iface.launch()
|
|
97 |
|
98 |
|
99 |
|
|
|
|
1 |
import gradio as gr
|
2 |
+
from transformers import pipeline, AutoTokenizer
|
3 |
from sentence_transformers import SentenceTransformer, util
|
4 |
import math
|
5 |
|
|
|
12 |
}
|
13 |
|
14 |
# Initialize summarization pipeline with a specified model
|
15 |
+
model_name = "sshleifer/distilbart-cnn-12-6"
|
16 |
+
summarizer = pipeline("summarization", model=model_name)
|
17 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
18 |
|
19 |
# Initialize translation pipeline
|
20 |
def get_translator(language):
|
|
|
40 |
|
41 |
# Helper function to split text into chunks
|
42 |
def split_text(text, max_tokens=1024):
|
43 |
+
inputs = tokenizer(text, return_tensors='pt', truncation=False)
|
44 |
+
input_ids = inputs['input_ids'][0]
|
45 |
+
total_tokens = len(input_ids)
|
|
|
46 |
|
47 |
+
chunks = []
|
48 |
+
for i in range(0, total_tokens, max_tokens):
|
49 |
+
chunk_ids = input_ids[i:i+max_tokens]
|
50 |
+
chunk_text = tokenizer.decode(chunk_ids, skip_special_tokens=True)
|
51 |
+
chunks.append(chunk_text)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
|
53 |
return chunks
|
54 |
|
|
|
76 |
iface = gr.Interface(
|
77 |
fn=process_text,
|
78 |
inputs=[
|
79 |
+
gr.Textbox(label="Input Text", placeholder="Paste your text here...", lines=10),
|
80 |
gr.Dropdown(choices=["Vietnamese", "Japanese", "Thai", "Spanish"], label="Translate to", value="Vietnamese")
|
81 |
],
|
82 |
outputs=[
|
83 |
+
gr.Textbox(label="Bullet Points", lines=10),
|
84 |
+
gr.Textbox(label="Translated Bullet Points", lines=10)
|
85 |
],
|
86 |
title="Text to Bullet Points and Translation",
|
87 |
description="Paste any text, and the program will summarize it into bullet points. Optionally, translate the bullet points into Vietnamese, Japanese, Thai, or Spanish."
|
|
|
91 |
|
92 |
|
93 |
|
94 |
+
|