codeteach commited on
Commit
c81d8ab
1 Parent(s): b451def

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -21
app.py CHANGED
@@ -1,5 +1,5 @@
1
  import gradio as gr
2
- from transformers import pipeline
3
  from sentence_transformers import SentenceTransformer, util
4
  import math
5
 
@@ -12,7 +12,9 @@ translation_models = {
12
  }
13
 
14
  # Initialize summarization pipeline with a specified model
15
- summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
 
 
16
 
17
  # Initialize translation pipeline
18
  def get_translator(language):
@@ -38,23 +40,15 @@ def generate_bullet_points(text):
38
 
39
  # Helper function to split text into chunks
40
  def split_text(text, max_tokens=1024):
41
- sentences = text.split('. ')
42
- chunks = []
43
- current_chunk = ""
44
- current_tokens = 0
45
 
46
- for sentence in sentences:
47
- sentence_tokens = len(sentence.split())
48
- if current_tokens + sentence_tokens > max_tokens:
49
- chunks.append(current_chunk.strip())
50
- current_chunk = sentence
51
- current_tokens = sentence_tokens
52
- else:
53
- current_chunk += sentence + ". "
54
- current_tokens += sentence_tokens
55
-
56
- if current_chunk:
57
- chunks.append(current_chunk.strip())
58
 
59
  return chunks
60
 
@@ -82,12 +76,12 @@ def process_text(input_text, language):
82
  iface = gr.Interface(
83
  fn=process_text,
84
  inputs=[
85
- gr.Textbox(label="Input Text", placeholder="Paste your text here..."),
86
  gr.Dropdown(choices=["Vietnamese", "Japanese", "Thai", "Spanish"], label="Translate to", value="Vietnamese")
87
  ],
88
  outputs=[
89
- gr.Textbox(label="Bullet Points"),
90
- gr.Textbox(label="Translated Bullet Points")
91
  ],
92
  title="Text to Bullet Points and Translation",
93
  description="Paste any text, and the program will summarize it into bullet points. Optionally, translate the bullet points into Vietnamese, Japanese, Thai, or Spanish."
@@ -97,3 +91,4 @@ iface.launch()
97
 
98
 
99
 
 
 
1
  import gradio as gr
2
+ from transformers import pipeline, AutoTokenizer
3
  from sentence_transformers import SentenceTransformer, util
4
  import math
5
 
 
12
  }
13
 
14
  # Initialize summarization pipeline with a specified model
15
+ model_name = "sshleifer/distilbart-cnn-12-6"
16
+ summarizer = pipeline("summarization", model=model_name)
17
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
18
 
19
  # Initialize translation pipeline
20
  def get_translator(language):
 
40
 
41
  # Helper function to split text into chunks
42
  def split_text(text, max_tokens=1024):
43
+ inputs = tokenizer(text, return_tensors='pt', truncation=False)
44
+ input_ids = inputs['input_ids'][0]
45
+ total_tokens = len(input_ids)
 
46
 
47
+ chunks = []
48
+ for i in range(0, total_tokens, max_tokens):
49
+ chunk_ids = input_ids[i:i+max_tokens]
50
+ chunk_text = tokenizer.decode(chunk_ids, skip_special_tokens=True)
51
+ chunks.append(chunk_text)
 
 
 
 
 
 
 
52
 
53
  return chunks
54
 
 
76
  iface = gr.Interface(
77
  fn=process_text,
78
  inputs=[
79
+ gr.Textbox(label="Input Text", placeholder="Paste your text here...", lines=10),
80
  gr.Dropdown(choices=["Vietnamese", "Japanese", "Thai", "Spanish"], label="Translate to", value="Vietnamese")
81
  ],
82
  outputs=[
83
+ gr.Textbox(label="Bullet Points", lines=10),
84
+ gr.Textbox(label="Translated Bullet Points", lines=10)
85
  ],
86
  title="Text to Bullet Points and Translation",
87
  description="Paste any text, and the program will summarize it into bullet points. Optionally, translate the bullet points into Vietnamese, Japanese, Thai, or Spanish."
 
91
 
92
 
93
 
94
+