Spaces:

Corvius
/

LLaMA-3.1-405B-Instruct

Runtime error

App Files Files Community

Corvius commited on Sep 15

Commit

f7d27cc

•

1 Parent(s): 435c68c

OOOOOOHMYYYYYYYYGOOOOOOOOOOOOOOOOOOOOOO

Browse files

Files changed (1) hide show

app.py +70 -119

app.py CHANGED Viewed

@@ -1,10 +1,9 @@
 import gradio as gr
 import json
 import os
 import datetime
-import asyncio
-import aiohttp
-from aiohttp import ClientSession, ClientTimeout
 API_URL = os.environ.get('API_URL')
 API_KEY = os.environ.get('API_KEY')
@@ -27,7 +26,7 @@ DEFAULT_PARAMS = {
 def get_timestamp():
     return datetime.datetime.now().strftime("%H:%M:%S")
-async def predict(message, history, system_prompt, temperature, top_p, top_k, frequency_penalty, presence_penalty, repetition_penalty, max_tokens):
     history_format = [{"role": "system", "content": system_prompt}]
     for human, assistant in history:
         history_format.append({"role": "user", "content": human})
@@ -69,36 +68,31 @@ async def predict(message, history, system_prompt, temperature, top_p, top_k, fr
     }
     try:
-        timeout = ClientTimeout(total=60)  # Set a 60-second timeout
-        async with ClientSession(timeout=timeout) as session:
-            async with session.post(API_URL, headers=headers, json=data) as response:
-                partial_message = ""
-                async for line in response.content:
-                    if asyncio.current_task().cancelled():
-                        print("Task cancelled during API request")
-                        break
-                    if line:
-                        line = line.decode('utf-8')
-                        if line.startswith("data: "):
-                            if line.strip() == "data: [DONE]":
-                                break
-                            try:
-                                json_data = json.loads(line[6:])
-                                if 'choices' in json_data and json_data['choices']:
-                                    content = json_data['choices'][0]['delta'].get('content', '')
-                                    if content:
-                                        partial_message += content
-                                        yield partial_message
-                            except json.JSONDecodeError:
-                                continue
         if partial_message:
             yield partial_message
-    except asyncio.TimeoutError:
-        print("Request timed out")
-        yield "Request timed out. Please try again."
-    except Exception as e:
         print(f"Request error: {e}")
         yield f"An error occurred: {str(e)}"
@@ -136,71 +130,12 @@ def export_chat(history, system_prompt):
                 export_data += f"<|assistant|> {assistant_msg}\n\n"
     return export_data
-def sanitize_chatbot_history(history):
-    """Ensure each entry in the chatbot history is a tuple of two items."""
-    return [tuple(entry[:2]) if isinstance(entry, (list, tuple)) else (str(entry), None) for entry in history]
-async def user(user_message, history):
-    history = sanitize_chatbot_history(history or [])
-    return "", history + [(user_message, None)]
-async def bot(history, system_prompt, temperature, top_p, top_k, frequency_penalty, presence_penalty, repetition_penalty, max_tokens, task_info):
-    history = sanitize_chatbot_history(history or [])
-    if not history:
-        yield history
-        return
-    user_message = history[-1][0]
-    bot_message = predict(user_message, history[:-1], system_prompt, temperature, top_p, top_k, frequency_penalty, presence_penalty, repetition_penalty, max_tokens)
-    history[-1] = (history[-1][0], "")
-    task = asyncio.current_task()
-    task_info['task'] = task
-    task_info['stop_requested'] = False
-    try:
-        async for chunk in bot_message:
-            if task_info.get('stop_requested', False):
-                print("Stop requested, breaking the loop")
-                break
-            history[-1] = (history[-1][0], chunk)
-            yield history
-    except asyncio.CancelledError:
-        print("Bot generation cancelled")
-    except GeneratorExit:
-        print("Generator exited")
-    except Exception as e:
-        print(f"Error in bot generation: {e}")
-    finally:
-        if history[-1][1] == "":
-            history[-1] = (history[-1][0], " [Generation stopped]")
-        task_info['task'] = None
-        task_info['stop_requested'] = False
-        yield history
-async def regenerate_response(history, system_prompt, temperature, top_p, top_k, frequency_penalty, presence_penalty, repetition_penalty, max_tokens, task_info):
-    if 'task' in task_info and task_info['task']:
-        print("Cancelling previous task")
-        task_info['stop_requested'] = True
-        task_info['task'].cancel()
-    await asyncio.sleep(0.1)
-    history = sanitize_chatbot_history(history or [])
-    if history:
-        history[-1] = (history[-1][0], None)
-        try:
-            async for new_history in bot(history, system_prompt, temperature, top_p, top_k, frequency_penalty, presence_penalty, repetition_penalty, max_tokens, task_info):
-                yield sanitize_chatbot_history(new_history)
-        except Exception as e:
-            print(f"Error in regenerate_response: {e}")
-            yield history
-    else:
-        yield []
-def import_chat_wrapper(custom_format_string):
-    imported_history, imported_system_prompt = import_chat(custom_format_string)
-    return sanitize_chatbot_history(imported_history), imported_system_prompt
 with gr.Blocks(theme='gradio/monochrome') as demo:
-    task_info = gr.State({'task': None, 'stop_requested': False})
     with gr.Row():
         with gr.Column(scale=2):
@@ -227,43 +162,59 @@ with gr.Blocks(theme='gradio/monochrome') as demo:
             repetition_penalty = gr.Slider(0.01, 5, value=1.1, step=0.01, label="Repetition Penalty")
             max_tokens = gr.Slider(1, 4096, value=512, step=1, label="Max Output (max_tokens)")
-    submit_event = msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
-        bot, [chatbot, system_prompt, temperature, top_p, top_k, frequency_penalty, presence_penalty, repetition_penalty, max_tokens, task_info], chatbot,
-        concurrency_limit=10
     )
-    clear.click(lambda: [], None, chatbot, queue=False)
-    regenerate_event = regenerate.click(
         regenerate_response,
-        [chatbot, system_prompt, temperature, top_p, top_k, frequency_penalty, presence_penalty, repetition_penalty, max_tokens, task_info],
-        chatbot,
-        concurrency_limit=10
     )
-    import_button.click(import_chat_wrapper, inputs=[import_textbox], outputs=[chatbot, system_prompt], concurrency_limit=10)
     export_button.click(
         export_chat,
         inputs=[chatbot, system_prompt],
-        outputs=[import_textbox],
-        concurrency_limit=10
     )
-    def stop_generation(task_info):
-        if 'task' in task_info and task_info['task']:
-            print("Stop requested")
-            task_info['stop_requested'] = True
-            task_info['task'].cancel()
-        return task_info
-    stop_btn.click(
-        stop_generation,
-        inputs=[task_info],
-        outputs=[task_info],
-        cancels=[submit_event, regenerate_event],
-        queue=False
-    )
 if __name__ == "__main__":
-    demo.launch(debug=True, max_threads=40)

 import gradio as gr
+import requests
 import json
 import os
 import datetime
+from requests.exceptions import RequestException
 API_URL = os.environ.get('API_URL')
 API_KEY = os.environ.get('API_KEY')
 def get_timestamp():
     return datetime.datetime.now().strftime("%H:%M:%S")
+def predict(message, history, system_prompt, temperature, top_p, top_k, frequency_penalty, presence_penalty, repetition_penalty, max_tokens, stop_flag):
     history_format = [{"role": "system", "content": system_prompt}]
     for human, assistant in history:
         history_format.append({"role": "user", "content": human})
     }
     try:
+        with requests.post(API_URL, headers=headers, data=json.dumps(data), stream=True) as response:
+            partial_message = ""
+            for line in response.iter_lines():
+                if stop_flag[0]:
+                    response.close()
+                    break
+                if line:
+                    line = line.decode('utf-8')
+                    if line.startswith("data: "):
+                        if line.strip() == "data: [DONE]":
+                            break
+                        try:
+                            json_data = json.loads(line[6:])
+                            if 'choices' in json_data and json_data['choices']:
+                                content = json_data['choices'][0]['delta'].get('content', '')
+                                if content:
+                                    partial_message += content
+                                    yield partial_message
+                        except json.JSONDecodeError:
+                            continue
         if partial_message:
             yield partial_message
+    except RequestException as e:
         print(f"Request error: {e}")
         yield f"An error occurred: {str(e)}"
                 export_data += f"<|assistant|> {assistant_msg}\n\n"
     return export_data
+def stop_generation_func(stop_flag):
+    stop_flag[0] = True
+    return stop_flag
 with gr.Blocks(theme='gradio/monochrome') as demo:
+    stop_flag = gr.State([False])
     with gr.Row():
         with gr.Column(scale=2):
             repetition_penalty = gr.Slider(0.01, 5, value=1.1, step=0.01, label="Repetition Penalty")
             max_tokens = gr.Slider(1, 4096, value=512, step=1, label="Max Output (max_tokens)")
+    def user(user_message, history):
+        history = history or []
+        return "", history + [[user_message, None]]
+    def bot(history, system_prompt, temperature, top_p, top_k, frequency_penalty, presence_penalty, repetition_penalty, max_tokens, stop_flag):
+        stop_flag[0] = False
+        history = history or []
+        if not history:
+            return history
+        user_message = history[-1][0]
+        bot_message = predict(user_message, history[:-1], system_prompt, temperature, top_p, top_k, frequency_penalty, presence_penalty, repetition_penalty, max_tokens, stop_flag)
+        history[-1][1] = ""
+        for chunk in bot_message:
+            if stop_flag[0]:
+                history[-1][1] += " [Generation stopped]"
+                break
+            history[-1][1] = chunk
+            yield history
+    def regenerate_response(history, system_prompt, temperature, top_p, top_k, frequency_penalty, presence_penalty, repetition_penalty, max_tokens, stop_flag):
+        if history and len(history) > 0:
+            last_user_message = history[-1][0]
+            history[-1][1] = None
+            for new_history in bot(history, system_prompt, temperature, top_p, top_k, frequency_penalty, presence_penalty, repetition_penalty, max_tokens, stop_flag):
+                yield new_history
+        else:
+            yield []
+    def import_chat_wrapper(custom_format_string):
+        imported_history, imported_system_prompt = import_chat(custom_format_string)
+        return imported_history, imported_system_prompt
+    msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
+        bot, [chatbot, system_prompt, temperature, top_p, top_k, frequency_penalty, presence_penalty, repetition_penalty, max_tokens, stop_flag], chatbot
     )
+    clear.click(lambda: None, None, chatbot, queue=False)
+    regenerate.click(
         regenerate_response,
+        [chatbot, system_prompt, temperature, top_p, top_k, frequency_penalty, presence_penalty, repetition_penalty, max_tokens, stop_flag],
+        chatbot
     )
+    import_button.click(import_chat_wrapper, inputs=[import_textbox], outputs=[chatbot, system_prompt])
     export_button.click(
         export_chat,
         inputs=[chatbot, system_prompt],
+        outputs=[import_textbox]
     )
+    stop_btn.click(stop_generation_func, inputs=[stop_flag], outputs=[stop_flag])
 if __name__ == "__main__":
+    demo.queue(max_size=20, default_concurrency_limit=20).launch(debug=True)