OpenDILabCommunity
/

Hopper-v3-PPO

@@ -8,17 +8,20 @@ exp_config = {
     'epoch_per_collect': 10,
     'batch_size': 320,
     'learning_rate': 0.0003,
     'weight_decay': 0,
     'value_weight': 0.5,
     'entropy_weight': 0.01,
     'clip_ratio': 0.2,
     'adv_norm': True,
-    'value_norm': 'symlog',
     'ppo_param_init': True,
     'grad_norm': 0.5,
     'n_sample': 3200,
     'unroll_len': 1,
     'deterministic_eval': True,
     'model': {},
-    'cfg_type': 'PPOFPolicyDict'
 }

     'epoch_per_collect': 10,
     'batch_size': 320,
     'learning_rate': 0.0003,
+    'lr_scheduler': None,
     'weight_decay': 0,
     'value_weight': 0.5,
     'entropy_weight': 0.01,
     'clip_ratio': 0.2,
     'adv_norm': True,
+    'value_norm': 'baseline',
     'ppo_param_init': True,
     'grad_norm': 0.5,
     'n_sample': 3200,
     'unroll_len': 1,
     'deterministic_eval': True,
     'model': {},
+    'cfg_type': 'PPOFPolicyDict',
+    'env_id': 'Hopper-v3',
+    'exp_name': 'Hopper-v3-PPO'
 }