Spaces:
Running
Running
File size: 1,193 Bytes
8ff63e4 b3e31f5 8ff63e4 9d117bb 8ff63e4 f0ad70f 8ff63e4 9d117bb 8ff63e4 b3e31f5 9d117bb 8ff63e4 f0ad70f 8ff63e4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 |
services:
Llama2-70B-INT8:
container_name: worker4
image: mlenergy/tgi:v1.0.0
command: ["--model-id", "meta-llama/Llama-2-70b-chat-hf", "--num-shard", "2", "--otlp-endpoint", "http://jaeger:4317", "--quantize", "bitsandbytes"]
shm_size: 1g
environment:
HUGGING_FACE_HUB_TOKEN: "${HF_TOKEN}"
networks:
- leaderboard
volumes:
- /data/leaderboard/tgi-data:/data
deploy:
restart_policy:
condition: any
resources:
reservations:
devices:
- driver: nvidia
device_ids: ["0", "1"]
capabilities: [gpu]
MPT-30B:
container_name: worker5
image: mlenergy/tgi:v1.0.0
command: ["--model-id", "mosaicml/mpt-30b-chat", "--num-shard", "2", "--otlp-endpoint", "http://jaeger:4317"]
shm_size: 1g
networks:
- leaderboard
volumes:
- /data/leaderboard/tgi-data:/data
deploy:
restart_policy:
condition: any
resources:
reservations:
devices:
- driver: nvidia
device_ids: ["2", "3"]
capabilities: [gpu]
networks:
leaderboard:
name: leaderboard
external: true
|