import gradio as gr from transformers import AutoTokenizer, AutoModel from transformers import GPT2Tokenizer,GPT2LMHeadModel def chat(prompt): global model, tokenizer inputs = tokenizer(prompt, return_tensors='pt') generation_output = model.generate(**inputs, return_dict_in_generate=True, output_scores=True, max_length=150, # max_new_tokens=80, do_sample=True, top_p=0.6, # num_beams=5, eos_token_id=50256, pad_token_id=0, num_return_sequences=1) for idx, sentence in enumerate(generation_output.sequences): return tokenizer.decode(sentence).split('<|endoftext|>')[0] if __name__ == '__main__': hf_model_path = 'IDEA-CCNL/Yuyuan-GPT2-110M-SciFi-Chinese' tokenizer = GPT2Tokenizer.from_pretrained(hf_model_path) model = GPT2LMHeadModel.from_pretrained(hf_model_path) model.eval() iface = gr.Interface(fn=chat, inputs="text", outputs="text") iface.launch()