!!python/object/apply:collections.OrderedDict - - - batch_size - 64 - - gamma - 0.99 - - learning_rate - 0.0001 - - n_timesteps - 1000000.0 - - noise_std - 0.3 - - noise_type - ornstein-uhlenbeck - - policy - MlpPolicy - - policy_kwargs - dict(net_arch=dict(pi=[300, 200], qf=[400, 300]))