JennySHT commited on
Commit
317aed8
1 Parent(s): 5c50536

Upload 2 files

Browse files
Files changed (2) hide show
  1. requirements.txt +7 -0
  2. summary.py +36 -0
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ opencc-python-reimplemented
2
+ streamlit
3
+ transformers
4
+ sentencepiece
5
+ torch
6
+ torchvision
7
+ torchaudio
summary.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from transformers import T5ForConditionalGeneration, T5Tokenizer
3
+ import opencc
4
+
5
+ #local_path = "./LLM"
6
+ # 使用中文摘要模型
7
+ local_path = 'utrobinmv/t5_summary_en_ru_zh_base_2048'
8
+ model = T5ForConditionalGeneration.from_pretrained(local_path)
9
+ tokenizer = T5Tokenizer.from_pretrained(local_path)
10
+
11
+
12
+
13
+ # Streamlit UI
14
+ st.title("中文文章摘要工具")
15
+ # Create an OpenCC converter for converting simplified Chinese to traditional Chinese
16
+ converter = opencc.OpenCC('s2t')
17
+ # Input text area for the article
18
+ article = st.text_area("請輸入文章", "")
19
+
20
+ # Function to generate summary
21
+ @st.cache_data
22
+ def generate_summary(article):
23
+ inputs = tokenizer.encode("摘要:" + article, return_tensors="pt", max_length=1024, truncation=True)
24
+ summary_ids = model.generate(inputs, max_length=180, min_length=60, length_penalty=2.0, num_beams=4, early_stopping=True)
25
+ summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
26
+ return summary
27
+
28
+ # Button to generate summary
29
+ if st.button("生成摘要"):
30
+ if article.strip() == "":
31
+ st.error("請輸入文章。")
32
+ else:
33
+ summary = generate_summary(article)
34
+ traditional_summary = converter.convert(summary)
35
+ st.subheader("摘要:")
36
+ st.write(traditional_summary)