File size: 1,268 Bytes
317aed8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 |
import streamlit as st
from transformers import T5ForConditionalGeneration, T5Tokenizer
import opencc
# 使用中文摘要模型
local_path = 'utrobinmv/t5_summary_en_ru_zh_base_2048'
model = T5ForConditionalGeneration.from_pretrained(local_path)
tokenizer = T5Tokenizer.from_pretrained(local_path)
# Streamlit UI
st.title("中文文章摘要工具")
# Create an OpenCC converter for converting simplified Chinese to traditional Chinese
converter = opencc.OpenCC('s2t')
# Input text area for the article
article = st.text_area("請輸入文章", "")
# Function to generate summary
@st.cache_data
def generate_summary(article):
inputs = tokenizer.encode("摘要:" + article, return_tensors="pt", max_length=1024, truncation=True)
summary_ids = model.generate(inputs, max_length=180, min_length=60, length_penalty=2.0, num_beams=4, early_stopping=True)
summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
return summary
# Button to generate summary
if st.button("生成摘要"):
if article.strip() == "":
st.error("請輸入文章。")
else:
summary = generate_summary(article)
traditional_summary = converter.convert(summary)
st.subheader("摘要:")
st.write(traditional_summary)
|