File size: 2,887 Bytes
079c32c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 |
from easydict import EasyDict
main_config = dict(
exp_name="halfcheetah_medium_expert_pd_seed0",
env=dict(
env_id='halfcheetah-medium-expert-v2',
collector_env_num=1,
evaluator_env_num=8,
use_act_scale=True,
n_evaluator_episode=8,
returns_scale=1.0,
termination_penalty=-100,
max_path_length=1000,
use_padding=True,
include_returns=True,
normed=False,
stop_value=12000,
horizon=4,
obs_dim=17,
action_dim=6,
),
policy=dict(
cuda=True,
model=dict(
diffuser_model='GaussianDiffusion',
diffuser_model_cfg=dict(
model='DiffusionUNet1d',
model_cfg=dict(
transition_dim=23,
dim=32,
dim_mults=[1, 4, 8],
returns_condition=False,
kernel_size=5,
attention=True,
),
horizon=4,
obs_dim=17,
action_dim=6,
n_timesteps=20,
predict_epsilon=False,
loss_discount=1,
action_weight=10,
),
value_model='ValueDiffusion',
value_model_cfg=dict(
model='TemporalValue',
model_cfg=dict(
horizon = 4,
transition_dim=23,
dim=32,
dim_mults=[1, 4, 8],
kernel_size=5,
),
horizon=4,
obs_dim=17,
action_dim=6,
n_timesteps=20,
predict_epsilon=True,
loss_discount=1,
),
n_guide_steps=2,
scale=0.001,
t_stopgrad=4,
scale_grad_by_std=True,
),
normalizer='GaussianNormalizer',
learn=dict(
data_path=None,
train_epoch=60000,
gradient_accumulate_every=2,
batch_size=32,
learning_rate=2e-4,
discount_factor=0.99,
plan_batch_size=64,
learner=dict(hook=dict(save_ckpt_after_iter=1000000000, )),
),
collect=dict(data_type='diffuser_traj', ),
eval=dict(
evaluator=dict(eval_freq=500, ),
test_ret=0.9,
),
other=dict(replay_buffer=dict(replay_buffer_size=2000000, ), ),
),
)
main_config = EasyDict(main_config)
main_config = main_config
create_config = dict(
env=dict(
type='d4rl',
import_names=['dizoo.d4rl.envs.d4rl_env'],
),
env_manager=dict(type='subprocess'),
policy=dict(
type='pd',
),
replay_buffer=dict(type='naive', ),
)
create_config = EasyDict(create_config)
create_config = create_config
|