Quentin Gallouédec
Initial commit
3ed8b9a
raw
history blame
562 Bytes
!!python/object/apply:collections.OrderedDict
- - - batch_size
- 2048
- - buffer_size
- 1000000
- - env_wrapper
- sb3_contrib.common.wrappers.TimeFeatureWrapper
- - gamma
- 0.95
- - learning_rate
- 0.001
- - n_timesteps
- 1000000.0
- - policy
- MultiInputPolicy
- - policy_kwargs
- dict(net_arch=[512, 512, 512], n_critics=2)
- - replay_buffer_class
- HerReplayBuffer
- - replay_buffer_kwargs
- dict( online_sampling=True, goal_selection_strategy='future', n_sampled_goal=4,
)
- - tau
- 0.05