File size: 2,993 Bytes
f540c0e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
CartPole-v1: &cartpole-defaults
  n_timesteps: !!float 5e4
  env_hyperparams:
    rolling_length: 50
  policy_hyperparams:
    hidden_sizes: [256, 256]
  algo_hyperparams:
    learning_rate: !!float 2.3e-3
    batch_size: 64
    buffer_size: 100000
    learning_starts: 1000
    gamma: 0.99
    target_update_interval: 10
    train_freq: 256
    gradient_steps: 128
    exploration_fraction: 0.16
    exploration_final_eps: 0.04
  eval_params:
    step_freq: !!float 1e4

CartPole-v0:
  <<: *cartpole-defaults
  n_timesteps: !!float 4e4

MountainCar-v0:
  n_timesteps: !!float 1.2e5
  env_hyperparams:
    rolling_length: 50
  policy_hyperparams:
    hidden_sizes: [256, 256]
  algo_hyperparams:
    learning_rate: !!float 4e-3
    batch_size: 128
    buffer_size: 10000
    learning_starts: 1000
    gamma: 0.98
    target_update_interval: 600
    train_freq: 16
    gradient_steps: 8
    exploration_fraction: 0.2
    exploration_final_eps: 0.07

Acrobot-v1:
  n_timesteps: !!float 1e5
  env_hyperparams:
    rolling_length: 50
  policy_hyperparams:
    hidden_sizes: [256, 256]
  algo_hyperparams:
    learning_rate: !!float 6.3e-4
    batch_size: 128
    buffer_size: 50000
    learning_starts: 0
    gamma: 0.99
    target_update_interval: 250
    train_freq: 4
    gradient_steps: -1
    exploration_fraction: 0.12
    exploration_final_eps: 0.1

LunarLander-v2:
  n_timesteps: !!float 5e5
  env_hyperparams:
    rolling_length: 50
  policy_hyperparams:
    hidden_sizes: [256, 256]
  algo_hyperparams:
    learning_rate: !!float 1e-4
    batch_size: 256
    buffer_size: 100000
    learning_starts: 10000
    gamma: 0.99
    target_update_interval: 250
    train_freq: 8
    gradient_steps: -1
    exploration_fraction: 0.12
    exploration_final_eps: 0.1
    max_grad_norm: 0.5
  eval_params:
    step_freq: 25_000

_atari: &atari-defaults
  n_timesteps: !!float 1e7
  env_hyperparams:
    frame_stack: 4
    no_reward_timeout_steps: 1_000
    no_reward_fire_steps: 500
    n_envs: 8
    vec_env_class: "subproc"
  algo_hyperparams:
    buffer_size: 100000
    learning_rate: !!float 1e-4
    batch_size: 32
    learning_starts: 100000
    target_update_interval: 1000
    train_freq: 8
    gradient_steps: 2
    exploration_fraction: 0.1
    exploration_final_eps: 0.01
  eval_params:
    deterministic: false

PongNoFrameskip-v4:
  <<: *atari-defaults
  n_timesteps: !!float 2.5e6

_impala-atari: &impala-atari-defaults
  <<: *atari-defaults
  policy_hyperparams:
    cnn_style: impala
    cnn_feature_dim: 256
    init_layers_orthogonal: true
    cnn_layers_init_orthogonal: false

impala-PongNoFrameskip-v4:
  <<: *impala-atari-defaults
  env_id: PongNoFrameskip-v4
  n_timesteps: !!float 2.5e6

impala-BreakoutNoFrameskip-v4:
  <<: *impala-atari-defaults
  env_id: BreakoutNoFrameskip-v4

impala-SpaceInvadersNoFrameskip-v4:
  <<: *impala-atari-defaults
  env_id: SpaceInvadersNoFrameskip-v4

impala-QbertNoFrameskip-v4:
  <<: *impala-atari-defaults
  env_id: QbertNoFrameskip-v4