eduardo-alvarez commited on
Commit
be7b294
β€’
1 Parent(s): 24086b0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -26
app.py CHANGED
@@ -50,33 +50,35 @@ with demo:
50
 
51
  #chat_model_selection = chat_model_dropdown.value
52
  chat_model_selection = 'Intel/neural-chat-7b-v1-1'
 
 
53
 
54
- def call_api_and_stream_response(query, chat_model):
55
- """
56
- Call the API endpoint and yield characters as they are received.
57
- This function simulates streaming by yielding characters one by one.
58
- """
59
- url = inference_endpoint_url
60
- params = {"query": query,"selected_model":chat_model}
61
- with requests.get(url, json=params, stream=True) as r:
62
- for chunk in r.iter_content(chunk_size=1):
63
- if chunk:
64
- yield chunk.decode()
65
- def get_response(query, history):
66
- """
67
- Wrapper function to call the streaming API and compile the response.
68
- """
69
- response = ''
70
-
71
- global chat_model_selection
72
-
73
- for char in call_api_and_stream_response(query, chat_model=chat_model_selection):
74
- if char == '<':
75
- break
76
- response += char
77
- yield response
78
-
79
- gr.ChatInterface(get_response, retry_btn = None, undo_btn=None, concurrency_limit=inference_concurrency_limit).launch()
80
 
81
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
82
  with gr.TabItem("πŸ† LLM Leadeboard", elem_id="llm-benchmark-table", id=0):
 
50
 
51
  #chat_model_selection = chat_model_dropdown.value
52
  chat_model_selection = 'Intel/neural-chat-7b-v1-1'
53
+
54
+ gr.ChatInterface()
55
 
56
+ #def call_api_and_stream_response(query, chat_model):
57
+ # """
58
+ # Call the API endpoint and yield characters as they are received.
59
+ # This function simulates streaming by yielding characters one by one.
60
+ # """
61
+ # url = inference_endpoint_url
62
+ # params = {"query": query,"selected_model":chat_model}
63
+ # with requests.get(url, json=params, stream=True) as r:
64
+ # for chunk in r.iter_content(chunk_size=1):
65
+ # if chunk:
66
+ # yield chunk.decode()
67
+ #def get_response(query, history):
68
+ # """
69
+ # Wrapper function to call the streaming API and compile the response.
70
+ # """
71
+ # response = ''
72
+ #
73
+ # global chat_model_selection
74
+ #
75
+ # for char in call_api_and_stream_response(query, chat_model=chat_model_selection):
76
+ # if char == '<':
77
+ # break
78
+ # response += char
79
+ # yield response
80
+ #
81
+ #gr.ChatInterface(get_response, retry_btn = None, undo_btn=None, concurrency_limit=inference_concurrency_limit).launch()
82
 
83
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
84
  with gr.TabItem("πŸ† LLM Leadeboard", elem_id="llm-benchmark-table", id=0):