gomoku / DI-engine /ding /scripts /dijob-qbert.yaml
zjowowen's picture
init space
079c32c
raw
history blame
No virus
7.78 kB
apiVersion: diengine.opendilab.org/v1alpha1
kind: DIJob
metadata:
name: qbert-dqn
spec:
group: xxx
priorityClassName: ""
cleanPodPolicy: "Running"
volumes:
- name: cache-volume
emptyDir:
medium: Memory
sizeLimit: 128Mi
- name: work-dir
hostPath:
path: /data/nfs/ding/qbert
coordinator:
template:
spec:
containers:
- name: coordinator
image: diorchestrator/ding:v0.1.0-df39b81c
imagePullPolicy: Always
env:
- name: PYTHONUNBUFFERED
value: "1"
resources:
requests:
cpu: 3
memory: "10Gi"
limits:
cpu: 3
memory: "10Gi"
command: ["/bin/bash", "-c",]
args:
- |
cat <<EOF > qbert_dqn_config_k8s.py
from easydict import EasyDict
qbert_dqn_config = dict(
env=dict(
collector_env_num=16,
collector_episode_num=2,
evaluator_env_num=8,
evaluator_episode_num=1,
stop_value=30000,
env_id='QbertNoFrameskip-v4',
frame_stack=4,
manager=dict(
shared_memory=False,
),
),
policy=dict(
cuda=False,
priority=True,
model=dict(
obs_shape=[4, 84, 84],
action_shape=6,
encoder_hidden_size_list=[128, 128, 512],
),
nstep=3,
discount_factor=0.99,
learn=dict(
batch_size=32,
learning_rate=0.0001,
learner=dict(
learner_num=1,
send_policy_freq=1,
),
),
collect=dict(
n_sample=16,
collector=dict(
collector_num=2,
update_policy_second=3,
),
),
eval=dict(evaluator=dict(eval_freq=500, )),
other=dict(
eps=dict(
type='exp',
start=1.,
end=0.05,
decay=250000,
),
replay_buffer=dict(
replay_buffer_size=400000,
enable_track_used_data=True,
),
commander=dict(
collector_task_space=0,
learner_task_space=1,
eval_interval=30,
),
),
),
)
qbert_dqn_config = EasyDict(qbert_dqn_config)
main_config = qbert_dqn_config
qbert_dqn_create_config = dict(
env=dict(
type='atari',
import_names=['dizoo.atari.envs.atari_env'],
),
env_manager=dict(type='subprocess'),
policy=dict(type='dqn_command'),
learner=dict(type='base', import_names=['ding.worker.learner.base_learner']),
collector=dict(
type='zergling',
import_names=['ding.worker.collector.zergling_parallel_collector'],
),
commander=dict(
type='solo',
import_names=['ding.worker.coordinator.solo_parallel_commander'],
),
comm_learner=dict(
type='flask_fs',
import_names=['ding.worker.learner.comm.flask_fs_learner'],
),
comm_collector=dict(
type='flask_fs',
import_names=['ding.worker.collector.comm.flask_fs_collector'],
),
)
qbert_dqn_create_config = EasyDict(qbert_dqn_create_config)
create_config = qbert_dqn_create_config
qbert_dqn_system_config = dict(
coordinator=dict(
operator_server=dict(
system_addr='ding-server.ding-system:8080',
api_version='/v1alpha1',
init_replicas_request=dict(
collectors={
"replicas": 2,
},
learners={
"gpus": "0",
"replicas": 1,
},
),
collector_target_num=2,
learner_target_num=1,
),
),
path_data='./data',
path_policy='./policy',
communication_mode='auto',
learner_gpu_num=1,
)
qbert_dqn_system_config = EasyDict(qbert_dqn_system_config)
system_config = qbert_dqn_system_config
EOF
# if code has been changed in the mount path, we have to reinstall ding cli
# pip install --no-cache-dir -e .;
ding -m dist --module config -P k8s -c qbert_dqn_config_k8s.py -s 0;
ding -m dist --module coordinator -c qbert_dqn_config_k8s.py.pkl -s 0 --disable-flask-log 0 -cdp $COORDINATOR_PORT
ports:
- name: coordinator
containerPort: 22273
volumeMounts:
- name: work-dir
mountPath: /ding
collector:
template:
spec:
containers:
- name: collector
image: diorchestrator/ding:v0.1.0-df39b81c
imagePullPolicy: Always
env:
- name: PYTHONUNBUFFERED
value: "1"
resources:
requests:
cpu: 6
memory: "10Gi"
limits:
cpu: 6
memory: "10Gi"
command: ["/bin/bash", "-c",]
args:
- |
# if code has been changed in the mount path, we have to reinstall ding cli
# pip install --no-cache-dir -e .;
ding -m dist --module collector -c qbert_dqn_config_k8s.py.pkl -s 0 -clp $COLLECTOR_PORT --disable-flask-log 0
ports:
- name: collector
containerPort: 22270
volumeMounts:
- name: work-dir
mountPath: /ding
learner:
template:
spec:
containers:
- name: learner
image: diorchestrator/ding:v0.1.0-df39b81c
imagePullPolicy: Always
env:
- name: PYTHONUNBUFFERED
value: "1"
resources:
requests:
cpu: 3
memory: "30Gi"
limits:
cpu: 3
memory: "30Gi"
command: ["/bin/bash", "-c",]
args:
- |
# if code has been changed in the mount path, we have to reinstall ding cli
# pip install --no-cache-dir -e .;
ding -m dist --module spawn_learner -c qbert_dqn_config_k8s.py.pkl -s 0 -lp $LEARNER_PORT --disable-flask-log 0
ports:
- name: learner
containerPort: 22271
volumeMounts:
- name: cache-volume
mountPath: /dev/shm
- name: work-dir
mountPath: /ding