backend

Sleeping

meg-huggingface commited on Jul 18

Commit

66621a9

•

1 Parent(s): 74d59fa

Updating with new approach to inference endpoint

Files changed (3) hide show

app.py CHANGED Viewed

@@ -37,7 +37,7 @@ def button_auto_eval():
     run_auto_eval()
-reverse_order_checkbox = gr.Checkbox(label="Reverse Order", value=True)
 with gr.Blocks(js=dark_mode_gradio_js) as demo:
     gr.Markdown(intro_md)

     run_auto_eval()
+reverse_order_checkbox = gr.Checkbox(label="Reverse Order", value=False)
 with gr.Blocks(js=dark_mode_gradio_js) as demo:
     gr.Markdown(intro_md)

main_backend_toxicity.py CHANGED Viewed

@@ -73,7 +73,14 @@ def run_auto_eval():
     logger.info("Created an endpoint url at %s" % endpoint_url)
     results = main(endpoint_url, eval_request)
     logger.debug("FINISHED!")
     logger.info(f'Completed Evaluation of {eval_request.json_filepath}')
 if __name__ == "__main__":

     logger.info("Created an endpoint url at %s" % endpoint_url)
     results = main(endpoint_url, eval_request)
     logger.debug("FINISHED!")
+    logger.debug(results)
     logger.info(f'Completed Evaluation of {eval_request.json_filepath}')
+    set_eval_request(api=API,
+        eval_request=eval_request,
+        set_to_status=FINISHED_STATUS,
+        hf_repo=QUEUE_REPO,
+        local_dir=EVAL_REQUESTS_PATH_BACKEND,
+    )
 if __name__ == "__main__":

src/backend/inference_endpoint.py CHANGED Viewed

@@ -9,7 +9,8 @@ logging.basicConfig(level=logging.DEBUG)
 logger = setup_logger(__name__)
 TIMEOUT=20
-def create_endpoint(endpoint_name, repository, framework="pytorch", task="text-generation", accelerator="gpu", vendor="aws", region="us-east-1", type="protected", instance_size="x1", instance_type="nvidia-l4"):
     logger.info("Creating endpoint %s..." % endpoint_name)
     # TODO(mm): Handle situation where it's paused
     try:

 logger = setup_logger(__name__)
 TIMEOUT=20
+# TODO: Handle case where endpoint returns an error (for example because of flash attention or not fitting into memory)
+def create_endpoint(endpoint_name, repository, framework="pytorch", task="text-generation", accelerator="gpu", vendor="aws", region="us-east-1", type="protected", instance_size="x4", instance_type="nvidia-l4"):
     logger.info("Creating endpoint %s..." % endpoint_name)
     # TODO(mm): Handle situation where it's paused
     try: