|
import gradio as gr |
|
from transformers import pipeline |
|
from sentence_transformers import SentenceTransformer, util |
|
|
|
|
|
translation_models = { |
|
'Vietnamese': "Helsinki-NLP/opus-mt-en-vi", |
|
'Japanese': "Helsinki-NLP/opus-mt-en-jap", |
|
'Thai': "Helsinki-NLP/opus-mt-en-tha", |
|
'Spanish': "Helsinki-NLP/opus-mt-en-es" |
|
} |
|
|
|
|
|
summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6") |
|
|
|
|
|
def get_translator(language): |
|
model_name = translation_models.get(language) |
|
if model_name: |
|
return pipeline("translation", model=model_name) |
|
return None |
|
|
|
|
|
def generate_bullet_points(text): |
|
model = SentenceTransformer('paraphrase-MiniLM-L6-v2') |
|
sentences = text.split('. ') |
|
embeddings = model.encode(sentences, convert_to_tensor=True) |
|
clusters = util.community_detection(embeddings, threshold=0.75) |
|
|
|
bullet_points = [] |
|
for cluster in clusters: |
|
cluster_sentences = [sentences[idx] for idx in cluster] |
|
main_sentence = cluster_sentences[0] if cluster_sentences else "" |
|
bullet_points.append(main_sentence.strip()) |
|
|
|
return "\n".join(f"- {point}" for point in bullet_points) |
|
|
|
|
|
def summarize_text(text): |
|
summary = summarizer(text, max_length=150, min_length=40, do_sample=False) |
|
return summary[0]['summary_text'] |
|
|
|
|
|
def translate_text(text, language): |
|
translator = get_translator(language) |
|
if translator: |
|
translated_text = translator(text)[0]['translation_text'] |
|
return translated_text |
|
return text |
|
|
|
def process_text(input_text, language): |
|
summary = summarize_text(input_text) |
|
bullet_points = generate_bullet_points(summary) |
|
translated_text = translate_text(bullet_points, language) |
|
return bullet_points, translated_text |
|
|
|
|
|
iface = gr.Interface( |
|
fn=process_text, |
|
inputs=[ |
|
gr.Textbox(label="Input Text", placeholder="Paste your text here..."), |
|
gr.Dropdown(choices=["Vietnamese", "Japanese", "Thai", "Spanish"], label="Translate to", value="Vietnamese") |
|
], |
|
outputs=[ |
|
gr.Textbox(label="Bullet Points"), |
|
gr.Textbox(label="Translated Bullet Points") |
|
], |
|
title="Text to Bullet Points and Translation", |
|
description="Paste any text, and the program will summarize it into bullet points. Optionally, translate the bullet points into Vietnamese, Japanese, Thai, or Spanish." |
|
) |
|
|
|
iface.launch() |
|
|
|
|
|
|