import gradio as gr import time from openai import OpenAI import re import os client = OpenAI(api_key=os.environ.get("openai")) model="gpt-3.5-turbo" # llm = Llama(model_path="./snorkel-mistral-pairrm-dpo.Q4_K_M.gguf", # chat_format="chatml", # n_gpu_layers=0, # cpu only # n_ctx=6000) def split_text(text, llm, chunk_size): text_newline = text.split('\n') text_newline = [t for t in text_newline if len(t)>0] summary_list=[] new_list=[] for i, t in enumerate(text_newline): new_list.append(t) n_tokens=get_num_tokens('\n\n\n'.join(new_list), llm) if i==(len(text_newline)-1): summary_list.append('\n\n'.join(new_list)) elif n_tokens>chunk_size: summary_list.append('\n\n'.join(new_list)) new_list=[] return summary_list def all_to_list(all_sum, llm, chunk_size): summary_list = split_text(all_sum, llm, chunk_size) len_chunks = [get_num_tokens(chunk, llm) for chunk in summary_list] print(f'len_chunks: {len_chunks}') print(f'total parts: {len(summary_list)}') return summary_list def clean_output(text): text = text.replace('`','') text = re.sub(r'\d+\.', '', text) # removes numeric bullet points text = text.replace('- ',' ') text = text.replace('*','') text = text.replace('+','') return text def get_content_length(messages, llm): # print(messages) # user_list=[m for m in messages if m['role']=='user'] # assistant_list=[m for m in messages if m['role']=='assistant'] system_list=[m for m in messages if m['role']=='system'] # print(f'system: {system_list}') content_total=system_list[0]['content'] for i, (m) in enumerate(messages[1:]): content_total+=m['content'] return get_num_tokens(content_total, llm) def pop_first_user_assistant(messages): new_messages=[entry for i, entry in enumerate(messages) if i not in [1,2]] return new_messages def get_num_tokens(text, llm): bytes_string = text.encode('utf-8') tokens = llm.tokenize(bytes_string) return len(tokens) def response_stream(): global writer_messages, editor_messages, turn if turn=='writer': yield else: yield def adverse(message, history): global writer_messages, editor_messages, turn total_response = '' for i in range(4): # update writer_messages if len(writer_messages)==1: # first call writer_messages.append({ 'role':'user', 'content':message, }) # check whose turn it is turn = 'writer' if len(writer_messages)%2==0 else 'editor' list_of_messages = writer_messages if turn=='writer' else editor_messages print(f'turn: {turn}\n\nlist_of_messages: {list_of_messages}') total_response+=f'\n\n\nturn: {turn}\n' ############################# # call llm.create_chat_completion for whoever's turn # response_iter # response_str = f'writer {len(writer_messages)}' if turn=='writer' else f'editor {len(editor_messages)}' # response_iter = iter(response_str.split(' ')) # response_iter = llm.create_chat_completion( # list_of_messages, # Prompt # max_tokens=-1, # stop=["###"], # stream=True # ) response_iter = model=model, messages=list_of_messages, stream=True, ) response='' for chunk in response_iter: try: response+=chunk.choices[0].delta.content total_response+=chunk.choices[0].delta.content time.sleep(1) # print(f'chunk: {chunk}') yield total_response except Exception as e: print(e) total_response+='\n\n' if turn=='editor': response+='\nNow rewrite your response keeping my suggestions in mind.\n' ############################# # update writer_messages and editor_messages if turn=='writer': writer_messages.append({ 'role':'assistant', 'content':response, }) editor_messages.append({ 'role':'user', 'content':response, }) else: # editor writer_messages.append({ 'role':'user', 'content':response, }) editor_messages.append({ 'role':'assistant', 'content':response, }) max_tokens=4_000 chunk_size=1000 max_words = 10_000 print(f'max_words: {max_words}') # llm = Llama(model_path="E:\\yt\\bookSummary/Snorkel-Mistral-PairRM-DPO/snorkel-mistral-pairrm-dpo.Q4_K_M.gguf", chat_format="chatml", n_gpu_layers=-1, n_ctx=6000) writer_system_prompt_unformatted = '''You are a writer. The topic is {topic}.''' editor_system_prompt_unformatted = '''You are an editor. You give feedback about my writing. The topic is {topic}. You should reinforce me to adjust my content and improve it. You should push me to make my response as close as possible to the topic.''' writer_messages = [{'role':'system','content':writer_system_prompt_unformatted}] editor_messages = [{'role':'system','content':editor_system_prompt_unformatted}] turn = 'writer' def set_system_prompts(x): global writer_system_prompt, editor_system_prompt, writer_messages, editor_messages, writer_system_prompt_unformatted, editor_system_prompt_unformatted writer_system_prompt = writer_system_prompt_unformatted.format(topic=x) editor_system_prompt = editor_system_prompt_unformatted.format(topic=x) writer_messages = [{'role':'system','content':writer_system_prompt}] editor_messages = [{'role':'system','content':editor_system_prompt}] return f'writer system prompt:\n{writer_system_prompt}\n\neditor system prompt:\n{editor_system_prompt}' with gr.Blocks() as demo: gr.Markdown( """ # Multi Agent LLMs for End-to-End Story Generation Start typing below to see the output. """) gr.Interface( fn=set_system_prompts, inputs=gr.Textbox(placeholder="What is the topic?", label = 'Topic', lines=4), outputs=gr.Textbox(label='System prompt to use', lines=4) ) out_test = gr.Textbox(lines=4) button = gr.Button("test") : f"{writer_system_prompt} \n\n\n{editor_system_prompt}", outputs=out_test) chat = gr.ChatInterface( fn=adverse, examples=["Start the story", "Write a poem", 'The funniest joke ever!'], title="Multi-Agent Bot", autofocus=False, fill_height=True, ).queue() if __name__ == "__main__": demo.launch()