meg-huggingface
commited on
Commit
•
66621a9
1
Parent(s):
74d59fa
Updating with new approach to inference endpoint
Browse files- app.py +1 -1
- main_backend_toxicity.py +7 -0
- src/backend/inference_endpoint.py +2 -1
app.py
CHANGED
@@ -37,7 +37,7 @@ def button_auto_eval():
|
|
37 |
run_auto_eval()
|
38 |
|
39 |
|
40 |
-
reverse_order_checkbox = gr.Checkbox(label="Reverse Order", value=
|
41 |
|
42 |
with gr.Blocks(js=dark_mode_gradio_js) as demo:
|
43 |
gr.Markdown(intro_md)
|
|
|
37 |
run_auto_eval()
|
38 |
|
39 |
|
40 |
+
reverse_order_checkbox = gr.Checkbox(label="Reverse Order", value=False)
|
41 |
|
42 |
with gr.Blocks(js=dark_mode_gradio_js) as demo:
|
43 |
gr.Markdown(intro_md)
|
main_backend_toxicity.py
CHANGED
@@ -73,7 +73,14 @@ def run_auto_eval():
|
|
73 |
logger.info("Created an endpoint url at %s" % endpoint_url)
|
74 |
results = main(endpoint_url, eval_request)
|
75 |
logger.debug("FINISHED!")
|
|
|
76 |
logger.info(f'Completed Evaluation of {eval_request.json_filepath}')
|
|
|
|
|
|
|
|
|
|
|
|
|
77 |
|
78 |
|
79 |
if __name__ == "__main__":
|
|
|
73 |
logger.info("Created an endpoint url at %s" % endpoint_url)
|
74 |
results = main(endpoint_url, eval_request)
|
75 |
logger.debug("FINISHED!")
|
76 |
+
logger.debug(results)
|
77 |
logger.info(f'Completed Evaluation of {eval_request.json_filepath}')
|
78 |
+
set_eval_request(api=API,
|
79 |
+
eval_request=eval_request,
|
80 |
+
set_to_status=FINISHED_STATUS,
|
81 |
+
hf_repo=QUEUE_REPO,
|
82 |
+
local_dir=EVAL_REQUESTS_PATH_BACKEND,
|
83 |
+
)
|
84 |
|
85 |
|
86 |
if __name__ == "__main__":
|
src/backend/inference_endpoint.py
CHANGED
@@ -9,7 +9,8 @@ logging.basicConfig(level=logging.DEBUG)
|
|
9 |
logger = setup_logger(__name__)
|
10 |
TIMEOUT=20
|
11 |
|
12 |
-
|
|
|
13 |
logger.info("Creating endpoint %s..." % endpoint_name)
|
14 |
# TODO(mm): Handle situation where it's paused
|
15 |
try:
|
|
|
9 |
logger = setup_logger(__name__)
|
10 |
TIMEOUT=20
|
11 |
|
12 |
+
# TODO: Handle case where endpoint returns an error (for example because of flash attention or not fitting into memory)
|
13 |
+
def create_endpoint(endpoint_name, repository, framework="pytorch", task="text-generation", accelerator="gpu", vendor="aws", region="us-east-1", type="protected", instance_size="x4", instance_type="nvidia-l4"):
|
14 |
logger.info("Creating endpoint %s..." % endpoint_name)
|
15 |
# TODO(mm): Handle situation where it's paused
|
16 |
try:
|