Spaces:
Running
Running
File size: 2,098 Bytes
8ff63e4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 |
services:
Falcon-7B:
container_name: worker0
image: mlenergy/tgi:latest
command: ["--model-id", "tiiuae/falcon-7b-instruct", "--num-shard", "1", "--otlp-endpoint", "http://jaeger:4317"]
shm_size: 1g
networks:
- leaderboard
volumes:
- /data/leaderboard/tgi-data:/data
deploy:
resources:
reservations:
devices:
- driver: nvidia
device_ids: ["0"]
capabilities: [gpu]
Llama2-7B:
container_name: worker1
image: mlenergy/tgi:latest
command: ["--model-id", "/weights/metaai/Llama-2-7b-chat-hf", "--num-shard", "1", "--otlp-endpoint", "http://jaeger:4317"]
shm_size: 1g
networks:
- leaderboard
volumes:
- /data/leaderboard/tgi-data:/data
- /data/leaderboard/weights:/weights
deploy:
resources:
reservations:
devices:
- driver: nvidia
device_ids: ["1"]
capabilities: [gpu]
FastChat-T5-3B:
container_name: worker2
image: mlenergy/tgi:latest
command: ["--model-id", "lmsys/fastchat-t5-3b-v1.0", "--num-shard", "1", "--otlp-endpoint", "http://jaeger:4317"]
environment:
PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION: python
shm_size: 1g
networks:
- leaderboard
volumes:
- /data/leaderboard/tgi-data:/data
deploy:
resources:
reservations:
devices:
- driver: nvidia
device_ids: ["2"]
capabilities: [gpu]
Llama2-13B:
container_name: worker3
image: mlenergy/tgi:latest
command: ["--model-id", "/weights/metaai/Llama-2-13b-chat-hf", "--num-shard", "1", "--otlp-endpoint", "http://jaeger:4317"]
shm_size: 1g
networks:
- leaderboard
volumes:
- /data/leaderboard/tgi-data:/data
- /data/leaderboard/weights:/weights
deploy:
resources:
reservations:
devices:
- driver: nvidia
device_ids: ["3"]
capabilities: [gpu]
networks:
leaderboard:
name: leaderboard
external: true
|