Spaces:
Running
Running
Jae-Won Chung
commited on
Commit
•
b3e31f5
1
Parent(s):
e795d0f
Update docker-compose files
Browse files
deployment/docker-compose-0.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
services:
|
2 |
MPT-7B:
|
3 |
container_name: worker0
|
4 |
-
image: mlenergy/tgi:
|
5 |
command: ["--model-id", "mosaicml/mpt-7b-chat", "--num-shard", "1", "--otlp-endpoint", "http://jaeger:4317"]
|
6 |
shm_size: 1g
|
7 |
networks:
|
@@ -19,7 +19,7 @@ services:
|
|
19 |
capabilities: [gpu]
|
20 |
Llama2-7B:
|
21 |
container_name: worker1
|
22 |
-
image: mlenergy/tgi:
|
23 |
command: ["--model-id", "/weights/metaai/Llama-2-7b-chat-hf", "--num-shard", "1", "--otlp-endpoint", "http://jaeger:4317"]
|
24 |
shm_size: 1g
|
25 |
networks:
|
@@ -38,7 +38,7 @@ services:
|
|
38 |
capabilities: [gpu]
|
39 |
Vicuna-13B:
|
40 |
container_name: worker2
|
41 |
-
image: mlenergy/tgi:
|
42 |
command: ["--model-id", "lmsys/vicuna-13b-v1.5", "--num-shard", "1", "--otlp-endpoint", "http://jaeger:4317"]
|
43 |
shm_size: 1g
|
44 |
networks:
|
@@ -56,7 +56,7 @@ services:
|
|
56 |
capabilities: [gpu]
|
57 |
Llama2-13B:
|
58 |
container_name: worker3
|
59 |
-
image: mlenergy/tgi:
|
60 |
command: ["--model-id", "/weights/metaai/Llama-2-13b-chat-hf", "--num-shard", "1", "--otlp-endpoint", "http://jaeger:4317"]
|
61 |
shm_size: 1g
|
62 |
networks:
|
|
|
1 |
services:
|
2 |
MPT-7B:
|
3 |
container_name: worker0
|
4 |
+
image: mlenergy/tgi:v1.0.0
|
5 |
command: ["--model-id", "mosaicml/mpt-7b-chat", "--num-shard", "1", "--otlp-endpoint", "http://jaeger:4317"]
|
6 |
shm_size: 1g
|
7 |
networks:
|
|
|
19 |
capabilities: [gpu]
|
20 |
Llama2-7B:
|
21 |
container_name: worker1
|
22 |
+
image: mlenergy/tgi:v1.0.0
|
23 |
command: ["--model-id", "/weights/metaai/Llama-2-7b-chat-hf", "--num-shard", "1", "--otlp-endpoint", "http://jaeger:4317"]
|
24 |
shm_size: 1g
|
25 |
networks:
|
|
|
38 |
capabilities: [gpu]
|
39 |
Vicuna-13B:
|
40 |
container_name: worker2
|
41 |
+
image: mlenergy/tgi:v1.0.0
|
42 |
command: ["--model-id", "lmsys/vicuna-13b-v1.5", "--num-shard", "1", "--otlp-endpoint", "http://jaeger:4317"]
|
43 |
shm_size: 1g
|
44 |
networks:
|
|
|
56 |
capabilities: [gpu]
|
57 |
Llama2-13B:
|
58 |
container_name: worker3
|
59 |
+
image: mlenergy/tgi:v1.0.0
|
60 |
command: ["--model-id", "/weights/metaai/Llama-2-13b-chat-hf", "--num-shard", "1", "--otlp-endpoint", "http://jaeger:4317"]
|
61 |
shm_size: 1g
|
62 |
networks:
|
deployment/docker-compose-1.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
services:
|
2 |
Llama2-70B-INT8:
|
3 |
container_name: worker4
|
4 |
-
image: mlenergy/tgi:
|
5 |
command: ["--model-id", "meta-llama/Llama-2-70b-chat-hf", "--num-shard", "2", "--otlp-endpoint", "http://jaeger:4317", "--quantize", "bitsandbytes"]
|
6 |
shm_size: 1g
|
7 |
environment:
|
@@ -21,7 +21,7 @@ services:
|
|
21 |
capabilities: [gpu]
|
22 |
MPT-30B:
|
23 |
container_name: worker5
|
24 |
-
image: mlenergy/tgi:
|
25 |
command: ["--model-id", "mosaicml/mpt-30b-chat", "--num-shard", "2", "--otlp-endpoint", "http://jaeger:4317"]
|
26 |
shm_size: 1g
|
27 |
networks:
|
|
|
1 |
services:
|
2 |
Llama2-70B-INT8:
|
3 |
container_name: worker4
|
4 |
+
image: mlenergy/tgi:v1.0.0
|
5 |
command: ["--model-id", "meta-llama/Llama-2-70b-chat-hf", "--num-shard", "2", "--otlp-endpoint", "http://jaeger:4317", "--quantize", "bitsandbytes"]
|
6 |
shm_size: 1g
|
7 |
environment:
|
|
|
21 |
capabilities: [gpu]
|
22 |
MPT-30B:
|
23 |
container_name: worker5
|
24 |
+
image: mlenergy/tgi:v1.0.0
|
25 |
command: ["--model-id", "mosaicml/mpt-30b-chat", "--num-shard", "2", "--otlp-endpoint", "http://jaeger:4317"]
|
26 |
shm_size: 1g
|
27 |
networks:
|