import gradio as gr from transformers import pipeline from sentence_transformers import SentenceTransformer, util import math # Translation models translation_models = { 'Vietnamese': "Helsinki-NLP/opus-mt-en-vi", 'Japanese': "Helsinki-NLP/opus-mt-en-jap", 'Thai': "Helsinki-NLP/opus-mt-en-tha", 'Spanish': "Helsinki-NLP/opus-mt-en-es" } # Initialize summarization pipeline with a specified model summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6") # Initialize translation pipeline def get_translator(language): model_name = translation_models.get(language) if model_name: return pipeline("translation", model=model_name) return None # Helper function to generate bullet points def generate_bullet_points(text): model = SentenceTransformer('paraphrase-MiniLM-L6-v2') sentences = text.split('. ') embeddings = model.encode(sentences, convert_to_tensor=True) clusters = util.community_detection(embeddings, threshold=0.75) bullet_points = [] for cluster in clusters: cluster_sentences = [sentences[idx] for idx in cluster] main_sentence = cluster_sentences[0] if cluster_sentences else "" bullet_points.append(main_sentence.strip()) return "\n".join(f"- {point}" for point in bullet_points) # Helper function to split text into chunks def split_text(text, max_tokens=1024): sentences = text.split('. ') chunks = [] current_chunk = "" current_tokens = 0 for sentence in sentences: sentence_tokens = len(sentence.split()) if current_tokens + sentence_tokens > max_tokens: chunks.append(current_chunk.strip()) current_chunk = sentence current_tokens = sentence_tokens else: current_chunk += sentence + ". " current_tokens += sentence_tokens if current_chunk: chunks.append(current_chunk.strip()) return chunks # Helper function to summarize text def summarize_text(text): chunks = split_text(text) summaries = [summarizer(chunk, max_length=150, min_length=40, do_sample=False)[0]['summary_text'] for chunk in chunks] return " ".join(summaries) # Helper function to translate text def translate_text(text, language): translator = get_translator(language) if translator: translated_text = translator(text)[0]['translation_text'] return translated_text return text def process_text(input_text, language): summary = summarize_text(input_text) bullet_points = generate_bullet_points(summary) translated_text = translate_text(bullet_points, language) return bullet_points, translated_text # Create Gradio interface iface = gr.Interface( fn=process_text, inputs=[ gr.Textbox(label="Input Text", placeholder="Paste your text here..."), gr.Dropdown(choices=["Vietnamese", "Japanese", "Thai", "Spanish"], label="Translate to", value="Vietnamese") ], outputs=[ gr.Textbox(label="Bullet Points"), gr.Textbox(label="Translated Bullet Points") ], title="Text to Bullet Points and Translation", description="Paste any text, and the program will summarize it into bullet points. Optionally, translate the bullet points into Vietnamese, Japanese, Thai, or Spanish." ) iface.launch()