import gradio as gr import torch from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline from datetime import datetime print('{}:loading...'.format(datetime.now().strftime('%Y-%m-%d %H:%M:%S'))) tokenizer = AutoTokenizer.from_pretrained('line-corporation/japanese-large-lm-1.7b', use_fast=False) model = AutoModelForCausalLM.from_pretrained('line-corporation/japanese-large-lm-1.7b') #tokenizer = AutoTokenizer.from_pretrained('line-corporation/japanese-large-lm-3.6b', use_fast=False) #model = AutoModelForCausalLM.from_pretrained('line-corporation/japanese-large-lm-3.6b') if torch.cuda.is_available(): model.half() model = model.to('cuda') generator = pipeline('text-generation', model=model, tokenizer=tokenizer, device=model.device) print('{}:done.'.format(datetime.now().strftime('%Y-%m-%d %H:%M:%S'))) def generate(input_text, maxlen): output = generator( input_text, max_length=maxlen, do_sample=True, num_return_sequences=1, pad_token_id=tokenizer.pad_token_id, bos_token_id=tokenizer.bos_token_id, eos_token_id=tokenizer.eos_token_id ) generated_text = output[0]['generated_text'] return generated_text with gr.Blocks(title='text generation ja') as app: gr.Markdown('# Text Generation JA') chatbot = gr.Chatbot(label='generated text') msg = gr.Textbox(label='text') maxlen = gr.Slider(minimum=30, maximum=100, value=30, step=1, label='max length') clear = gr.ClearButton([msg, chatbot]) def respond(message, maxlen, chat_history): if message == '': return '', chat_history bot_message = generate(message, maxlen) chat_history.append((message, bot_message)) return '', chat_history msg.submit(respond, [msg, maxlen, chatbot], [msg, chatbot], concurrency_limit=20) app.launch()