MobiLlama / tests /launch_openai_api_test_server.py
Ashmal's picture
Upload folder using huggingface_hub
5472531 verified
"""
Launch an OpenAI API server with multiple model workers.
"""
import os
import argparse
def launch_process(cmd):
os.popen(cmd)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--multimodal", action="store_true", default=False)
args = parser.parse_args()
launch_process("python3 -m fastchat.serve.controller")
launch_process("python3 -m fastchat.serve.openai_api_server")
if args.multimodal:
models = [
("liuhaotian/llava-v1.5-7b", "sglang_worker"),
]
else:
models = [
("lmsys/vicuna-7b-v1.5", "model_worker"),
("lmsys/fastchat-t5-3b-v1.0", "model_worker"),
("THUDM/chatglm-6b", "model_worker"),
("mosaicml/mpt-7b-chat", "model_worker"),
("meta-llama/Llama-2-7b-chat-hf", "vllm_worker"),
]
for i, (model_path, worker_name) in enumerate(models):
cmd = (
f"CUDA_VISIBLE_DEVICES={i} python3 -m fastchat.serve.{worker_name} "
f"--model-path {model_path} --port {40000+i} "
f"--worker-address http://localhost:{40000+i} "
)
if "llava" in model_path.lower():
cmd += f"--tokenizer-path llava-hf/llava-1.5-7b-hf"
if worker_name == "vllm_worker":
cmd += "--tokenizer hf-internal-testing/llama-tokenizer"
launch_process(cmd)
while True:
pass