Spaces:

Intel
/

powered_by_intel_llm_leaderboard

Running

App Files Files Community

eduardo-alvarez commited on Mar 9

Commit

a75abaf

•

1 Parent(s): e51fe0f

Update app.py

Browse files

Files changed (1) hide show

app.py +29 -30

app.py CHANGED Viewed

@@ -48,36 +48,35 @@ with demo:
                         interactive=True,
                     )
-   #    #chat_model_selection = chat_model_dropdown.value
-   #    chat_model_selection = 'Intel/neural-chat-7b-v1-1'
-   #
-   #    def call_api_and_stream_response(query, chat_model):
-   #        """
-   #        Call the API endpoint and yield characters as they are received.
-   #        This function simulates streaming by yielding characters one by one.
-   #        """
-   #        url = inference_endpoint_url
-   #        params = {"query": query,"selected_model":chat_model}
-   #        with requests.get(url, json=params, stream=True) as r:
-   #            for chunk in r.iter_content(chunk_size=1):
-   #                if chunk:
-   #                    yield chunk.decode()
-#
-   #    def get_response(query, history):
-   #        """
-   #        Wrapper function to call the streaming API and compile the response.
-   #        """
-   #        response = ''
-   #
-   #        global chat_model_selection
-   #
-   #        for char in call_api_and_stream_response(query, chat_model=chat_model_selection):
-   #            if char == '<':
-   #                break
-   #            response += char
-   #            yield response
-   #
-   #    gr.ChatInterface(get_response, retry_btn = None, undo_btn=None, concurrency_limit=inference_concurrency_limit).launch()
     with gr.Tabs(elem_classes="tab-buttons") as tabs:
         with gr.TabItem("🏆 LLM Leadeboard", elem_id="llm-benchmark-table", id=0):

                         interactive=True,
                     )
+        #chat_model_selection = chat_model_dropdown.value
+        chat_model_selection = 'Intel/neural-chat-7b-v1-1'
+        def call_api_and_stream_response(query, chat_model):
+            """
+            Call the API endpoint and yield characters as they are received.
+            This function simulates streaming by yielding characters one by one.
+            """
+            url = inference_endpoint_url
+            params = {"query": query,"selected_model":chat_model}
+            with requests.get(url, json=params, stream=True) as r:
+                for chunk in r.iter_content(chunk_size=1):
+                    if chunk:
+                        yield chunk.decode()
+        def get_response(query, history):
+            """
+            Wrapper function to call the streaming API and compile the response.
+            """
+            response = ''
+            global chat_model_selection
+            for char in call_api_and_stream_response(query, chat_model=chat_model_selection):
+                if char == '<':
+                    break
+                response += char
+                yield response
+        gr.ChatInterface(get_response, retry_btn = None, undo_btn=None, concurrency_limit=inference_concurrency_limit).launch()
     with gr.Tabs(elem_classes="tab-buttons") as tabs:
         with gr.TabItem("🏆 LLM Leadeboard", elem_id="llm-benchmark-table", id=0):