import os,random os.system('sh install_lmdeploy.sh') import gradio as gr from lmdeploy.serve.gradio.app import * os.system('sh download.sh') InterFace.async_engine = AsyncEngine(model_path='turbomind-internlm-chat-20b-w4', instance_num=2, tp=1) async def reset_local_demo(instruction_txtbox: gr.Textbox, state_chatbot: gr.State, request: gr.Request): """reset the session. Args: instruction_txtbox (str): user's prompt state_chatbot (Sequence): the chatting history request (gr.Request): the request from a user """ state_chatbot = [] return ( state_chatbot, state_chatbot, gr.Textbox.update(value=''), ) async def cancel_local_demo(state_chatbot: gr.State, cancel_btn: gr.Button, reset_btn: gr.Button, request: gr.Request): """stop the session. Args: instruction_txtbox (str): user's prompt state_chatbot (Sequence): the chatting history request (gr.Request): the request from a user """ return (state_chatbot, disable_btn, enable_btn) async def chat_stream_demo( instruction: str, state_chatbot: Sequence, cancel_btn: gr.Button, reset_btn: gr.Button, request: gr.Request, ): """Chat with AI assistant. Args: instruction (str): user's prompt state_chatbot (Sequence): the chatting history request (gr.Request): the request from a user """ session_id = random.randint(0,100000) bot_summarized_response = '' state_chatbot = state_chatbot + [(instruction, None)] messages = [] for item in state_chatbot: messages.append(dict(role='user', content=item[0])) if item[1] is not None: messages.append(dict(role='assistant', content=item[1])) yield (state_chatbot, state_chatbot, disable_btn, enable_btn, f'{bot_summarized_response}'.strip()) async for outputs in InterFace.async_engine.generate( messages, session_id, stream_response=True, sequence_start=True, sequence_end=True): response = outputs.response if outputs.finish_reason == 'length': gr.Warning('WARNING: exceed session max length.' ' Please restart the session by reset button.') if outputs.generate_token_len < 0: gr.Warning('WARNING: running on the old session.' ' Please restart the session by reset button.') if state_chatbot[-1][-1] is None: state_chatbot[-1] = (state_chatbot[-1][0], response) else: state_chatbot[-1] = (state_chatbot[-1][0], state_chatbot[-1][1] + response ) # piece by piece yield (state_chatbot, state_chatbot, disable_btn, disable_btn, f'{bot_summarized_response}'.strip()) yield (state_chatbot, state_chatbot, disable_btn, enable_btn, f'{bot_summarized_response}'.strip()) with gr.Blocks(css=CSS, theme=THEME) as demo: state_chatbot = gr.State([]) with gr.Column(elem_id='container'): gr.Markdown('## LMDeploy Playground') chatbot = gr.Chatbot( elem_id='chatbot', label=InterFace.async_engine.tm_model.model_name) instruction_txtbox = gr.Textbox( placeholder='Please input the instruction', label='Instruction') with gr.Row(): cancel_btn = gr.Button(value='Cancel', interactive=False) reset_btn = gr.Button(value='Reset') send_event = instruction_txtbox.submit( chat_stream_demo, [instruction_txtbox, state_chatbot, cancel_btn, reset_btn], [state_chatbot, chatbot, cancel_btn, reset_btn]) instruction_txtbox.submit( lambda: gr.Textbox.update(value=''), [], [instruction_txtbox], ) cancel_btn.click(cancel_local_demo, [state_chatbot, cancel_btn, reset_btn], [state_chatbot, cancel_btn, reset_btn], cancels=[send_event]) reset_btn.click(reset_local_demo, [instruction_txtbox, state_chatbot], [state_chatbot, chatbot, instruction_txtbox], cancels=[send_event]) # print(f'server is gonna mount on: http://{server_name}:{server_port}') demo.queue(concurrency_count=4, max_size=100).launch()