zjowowen's picture
init space
079c32c
raw
history blame
2.91 kB
from easydict import EasyDict
collector_env_num = 8
evaluator_env_num = 8
cartpole_r2d2_config = dict(
exp_name='cartpole_r2d2_seed0',
env=dict(
collector_env_num=collector_env_num,
evaluator_env_num=evaluator_env_num,
n_evaluator_episode=evaluator_env_num,
stop_value=195,
),
policy=dict(
cuda=False,
priority=False,
priority_IS_weight=False,
model=dict(
obs_shape=4,
action_shape=2,
encoder_hidden_size_list=[128, 128, 64],
),
discount_factor=0.995,
nstep=5,
burnin_step=2,
# (int) the whole sequence length to unroll the RNN network minus
# the timesteps of burnin part,
# i.e., <the whole sequence length> = <unroll_len> = <burnin_step> + <learn_unroll_len>
learn_unroll_len=40,
learn=dict(
# according to the R2D2 paper, actor parameter update interval is 400
# environment timesteps, and in per collect phase, we collect 32 sequence
# samples, the length of each sample sequence is <burnin_step> + <unroll_len>,
# which is 100 in our seeting, 32*100/400=8, so we set update_per_collect=8
# in most environments
update_per_collect=5,
batch_size=64,
learning_rate=0.0005,
target_update_theta=0.001,
),
collect=dict(
# NOTE: It is important that set key traj_len_inf=True here,
# to make sure self._traj_len=INF in serial_sample_collector.py.
# In R2D2 policy, for each collect_env, we want to collect data of length self._traj_len=INF
# unless the episode enters the 'done' state.
# In each collect phase, we collect a total of <n_sample> sequence samples.
n_sample=32,
unroll_len=2 + 40,
traj_len_inf=True,
env_num=collector_env_num,
),
eval=dict(env_num=evaluator_env_num, evaluator=dict(eval_freq=30)),
other=dict(
eps=dict(
type='exp',
start=0.95,
end=0.05,
decay=10000,
), replay_buffer=dict(replay_buffer_size=100000, )
),
),
)
cartpole_r2d2_config = EasyDict(cartpole_r2d2_config)
main_config = cartpole_r2d2_config
cartpole_r2d2_create_config = dict(
env=dict(
type='cartpole',
import_names=['dizoo.classic_control.cartpole.envs.cartpole_env'],
),
env_manager=dict(type='base'),
policy=dict(type='r2d2'),
)
cartpole_r2d2_create_config = EasyDict(cartpole_r2d2_create_config)
create_config = cartpole_r2d2_create_config
if __name__ == "__main__":
# or you can enter `ding -m serial -c cartpole_r2d2_config.py -s 0`
from ding.entry import serial_pipeline
serial_pipeline((main_config, create_config), seed=0)