Spaces:

codeteach
/

bullet

Runtime error

App Files Files Community

codeteach commited on May 20

Commit

c81d8ab

•

1 Parent(s): b451def

Update app.py

Browse files

Files changed (1) hide show

app.py +16 -21

app.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import gradio as gr
-from transformers import pipeline
 from sentence_transformers import SentenceTransformer, util
 import math
@@ -12,7 +12,9 @@ translation_models = {
 }
 # Initialize summarization pipeline with a specified model
-summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
 # Initialize translation pipeline
 def get_translator(language):
@@ -38,23 +40,15 @@ def generate_bullet_points(text):
 # Helper function to split text into chunks
 def split_text(text, max_tokens=1024):
-    sentences = text.split('. ')
-    chunks = []
-    current_chunk = ""
-    current_tokens = 0
-    for sentence in sentences:
-        sentence_tokens = len(sentence.split())
-        if current_tokens + sentence_tokens > max_tokens:
-            chunks.append(current_chunk.strip())
-            current_chunk = sentence
-            current_tokens = sentence_tokens
-        else:
-            current_chunk += sentence + ". "
-            current_tokens += sentence_tokens
-    if current_chunk:
-        chunks.append(current_chunk.strip())
     return chunks
@@ -82,12 +76,12 @@ def process_text(input_text, language):
 iface = gr.Interface(
     fn=process_text,
     inputs=[
-        gr.Textbox(label="Input Text", placeholder="Paste your text here..."),
         gr.Dropdown(choices=["Vietnamese", "Japanese", "Thai", "Spanish"], label="Translate to", value="Vietnamese")
     ],
     outputs=[
-        gr.Textbox(label="Bullet Points"),
-        gr.Textbox(label="Translated Bullet Points")
     ],
     title="Text to Bullet Points and Translation",
     description="Paste any text, and the program will summarize it into bullet points. Optionally, translate the bullet points into Vietnamese, Japanese, Thai, or Spanish."
@@ -97,3 +91,4 @@ iface.launch()

 import gradio as gr
+from transformers import pipeline, AutoTokenizer
 from sentence_transformers import SentenceTransformer, util
 import math
 }
 # Initialize summarization pipeline with a specified model
+model_name = "sshleifer/distilbart-cnn-12-6"
+summarizer = pipeline("summarization", model=model_name)
+tokenizer = AutoTokenizer.from_pretrained(model_name)
 # Initialize translation pipeline
 def get_translator(language):
 # Helper function to split text into chunks
 def split_text(text, max_tokens=1024):
+    inputs = tokenizer(text, return_tensors='pt', truncation=False)
+    input_ids = inputs['input_ids'][0]
+    total_tokens = len(input_ids)
+    chunks = []
+    for i in range(0, total_tokens, max_tokens):
+        chunk_ids = input_ids[i:i+max_tokens]
+        chunk_text = tokenizer.decode(chunk_ids, skip_special_tokens=True)
+        chunks.append(chunk_text)
     return chunks
 iface = gr.Interface(
     fn=process_text,
     inputs=[
+        gr.Textbox(label="Input Text", placeholder="Paste your text here...", lines=10),
         gr.Dropdown(choices=["Vietnamese", "Japanese", "Thai", "Spanish"], label="Translate to", value="Vietnamese")
     ],
     outputs=[
+        gr.Textbox(label="Bullet Points", lines=10),
+        gr.Textbox(label="Translated Bullet Points", lines=10)
     ],
     title="Text to Bullet Points and Translation",
     description="Paste any text, and the program will summarize it into bullet points. Optionally, translate the bullet points into Vietnamese, Japanese, Thai, or Spanish."