Spaces:

Intel
/

powered_by_intel_llm_leaderboard

Running

eduardo-alvarez commited on Mar 9

Commit

be7b294

•

1 Parent(s): 24086b0

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -50,33 +50,35 @@ with demo:
         #chat_model_selection = chat_model_dropdown.value
         chat_model_selection = 'Intel/neural-chat-7b-v1-1'
-        def call_api_and_stream_response(query, chat_model):
-            """
-            Call the API endpoint and yield characters as they are received.
-            This function simulates streaming by yielding characters one by one.
-            """
-            url = inference_endpoint_url
-            params = {"query": query,"selected_model":chat_model}
-            with requests.get(url, json=params, stream=True) as r:
-                for chunk in r.iter_content(chunk_size=1):
-                    if chunk:
-                        yield chunk.decode()
-        def get_response(query, history):
-            """
-            Wrapper function to call the streaming API and compile the response.
-            """
-            response = ''
-            global chat_model_selection
-            for char in call_api_and_stream_response(query, chat_model=chat_model_selection):
-                if char == '<':
-                    break
-                response += char
-                yield response
-        gr.ChatInterface(get_response, retry_btn = None, undo_btn=None, concurrency_limit=inference_concurrency_limit).launch()
     with gr.Tabs(elem_classes="tab-buttons") as tabs:
         with gr.TabItem("🏆 LLM Leadeboard", elem_id="llm-benchmark-table", id=0):

         #chat_model_selection = chat_model_dropdown.value
         chat_model_selection = 'Intel/neural-chat-7b-v1-1'
+        gr.ChatInterface()
+        #def call_api_and_stream_response(query, chat_model):
+        #    """
+        #    Call the API endpoint and yield characters as they are received.
+        #    This function simulates streaming by yielding characters one by one.
+        #    """
+        #    url = inference_endpoint_url
+        #    params = {"query": query,"selected_model":chat_model}
+        #    with requests.get(url, json=params, stream=True) as r:
+        #        for chunk in r.iter_content(chunk_size=1):
+        #            if chunk:
+        #                yield chunk.decode()
+        #def get_response(query, history):
+        #    """
+        #    Wrapper function to call the streaming API and compile the response.
+        #    """
+        #    response = ''
+        #
+        #    global chat_model_selection
+        #
+        #    for char in call_api_and_stream_response(query, chat_model=chat_model_selection):
+        #        if char == '<':
+        #            break
+        #        response += char
+        #        yield response
+        #
+        #gr.ChatInterface(get_response, retry_btn = None, undo_btn=None, concurrency_limit=inference_concurrency_limit).launch()
     with gr.Tabs(elem_classes="tab-buttons") as tabs:
         with gr.TabItem("🏆 LLM Leadeboard", elem_id="llm-benchmark-table", id=0):