Initial commit
Browse files- README.md +1 -1
- args.yml +8 -8
- ppo-Pendulum-v1.zip +2 -2
- ppo-Pendulum-v1/data +18 -18
- ppo-Pendulum-v1/policy.optimizer.pth +2 -2
- ppo-Pendulum-v1/policy.pth +1 -1
- replay.mp4 +2 -2
- results.json +1 -1
- train_eval_metrics.zip +2 -2
README.md
CHANGED
@@ -10,7 +10,7 @@ model-index:
|
|
10 |
results:
|
11 |
- metrics:
|
12 |
- type: mean_reward
|
13 |
-
value: -336.
|
14 |
name: mean_reward
|
15 |
task:
|
16 |
type: reinforcement-learning
|
|
|
10 |
results:
|
11 |
- metrics:
|
12 |
- type: mean_reward
|
13 |
+
value: -336.89 +/- 406.36
|
14 |
name: mean_reward
|
15 |
task:
|
16 |
type: reinforcement-learning
|
args.yml
CHANGED
@@ -2,7 +2,7 @@
|
|
2 |
- - - algo
|
3 |
- ppo
|
4 |
- - device
|
5 |
-
-
|
6 |
- - env
|
7 |
- Pendulum-v1
|
8 |
- - env_kwargs
|
@@ -12,11 +12,11 @@
|
|
12 |
- - eval_freq
|
13 |
- 25000
|
14 |
- - gym_packages
|
15 |
-
-
|
16 |
- - hyperparams
|
17 |
- null
|
18 |
- - log_folder
|
19 |
-
-
|
20 |
- - log_interval
|
21 |
- -1
|
22 |
- - max_total_trials
|
@@ -36,7 +36,7 @@
|
|
36 |
- - no_optim_plots
|
37 |
- false
|
38 |
- - num_threads
|
39 |
-
-
|
40 |
- - optimization_log_path
|
41 |
- null
|
42 |
- - optimize_hyperparameters
|
@@ -50,15 +50,15 @@
|
|
50 |
- - save_replay_buffer
|
51 |
- false
|
52 |
- - seed
|
53 |
-
-
|
54 |
- - storage
|
55 |
- null
|
56 |
- - study_name
|
57 |
- null
|
58 |
- - tensorboard_log
|
59 |
-
-
|
60 |
- - track
|
61 |
-
-
|
62 |
- - trained_agent
|
63 |
- ''
|
64 |
- - truncate_last_trajectory
|
@@ -72,4 +72,4 @@
|
|
72 |
- - wandb_entity
|
73 |
- null
|
74 |
- - wandb_project_name
|
75 |
-
-
|
|
|
2 |
- - - algo
|
3 |
- ppo
|
4 |
- - device
|
5 |
+
- cpu
|
6 |
- - env
|
7 |
- Pendulum-v1
|
8 |
- - env_kwargs
|
|
|
12 |
- - eval_freq
|
13 |
- 25000
|
14 |
- - gym_packages
|
15 |
+
- - seals
|
16 |
- - hyperparams
|
17 |
- null
|
18 |
- - log_folder
|
19 |
+
- pendulum_experts/seed_8/
|
20 |
- - log_interval
|
21 |
- -1
|
22 |
- - max_total_trials
|
|
|
36 |
- - no_optim_plots
|
37 |
- false
|
38 |
- - num_threads
|
39 |
+
- 2
|
40 |
- - optimization_log_path
|
41 |
- null
|
42 |
- - optimize_hyperparameters
|
|
|
50 |
- - save_replay_buffer
|
51 |
- false
|
52 |
- - seed
|
53 |
+
- 8
|
54 |
- - storage
|
55 |
- null
|
56 |
- - study_name
|
57 |
- null
|
58 |
- - tensorboard_log
|
59 |
+
- runs/Pendulum-v1__ppo__8__1659105255
|
60 |
- - track
|
61 |
+
- true
|
62 |
- - trained_agent
|
63 |
- ''
|
64 |
- - truncate_last_trajectory
|
|
|
72 |
- - wandb_entity
|
73 |
- null
|
74 |
- - wandb_project_name
|
75 |
+
- pendulum_experts
|
ppo-Pendulum-v1.zip
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3cbe8f9c58a0397a68c46a17fe7e7df5ea9f72f7bf8d24bf6c3cdf90a793ed6b
|
3 |
+
size 142033
|
ppo-Pendulum-v1/data
CHANGED
@@ -4,19 +4,19 @@
|
|
4 |
":serialized:": "gAWVOwAAAAAAAACMIXN0YWJsZV9iYXNlbGluZXMzLmNvbW1vbi5wb2xpY2llc5SMEUFjdG9yQ3JpdGljUG9saWN5lJOULg==",
|
5 |
"__module__": "stable_baselines3.common.policies",
|
6 |
"__doc__": "\n Policy class for actor-critic algorithms (has both policy and value prediction).\n Used by A2C, PPO and the likes.\n\n :param observation_space: Observation space\n :param action_space: Action space\n :param lr_schedule: Learning rate schedule (could be constant)\n :param net_arch: The specification of the policy and value networks.\n :param activation_fn: Activation function\n :param ortho_init: Whether to use or not orthogonal initialization\n :param use_sde: Whether to use State Dependent Exploration or not\n :param log_std_init: Initial value for the log standard deviation\n :param full_std: Whether to use (n_features x n_actions) parameters\n for the std instead of only (n_features,) when using gSDE\n :param sde_net_arch: Network architecture for extracting features\n when using gSDE. If None, the latent features from the policy will be used.\n Pass an empty list to use the states as features.\n :param use_expln: Use ``expln()`` function instead of ``exp()`` to ensure\n a positive standard deviation (cf paper). It allows to keep variance\n above zero and prevent it from growing too fast. In practice, ``exp()`` is usually enough.\n :param squash_output: Whether to squash the output using a tanh function,\n this allows to ensure boundaries when using gSDE.\n :param features_extractor_class: Features extractor to use.\n :param features_extractor_kwargs: Keyword arguments\n to pass to the features extractor.\n :param normalize_images: Whether to normalize images or not,\n dividing by 255.0 (True by default)\n :param optimizer_class: The optimizer to use,\n ``th.optim.Adam`` by default\n :param optimizer_kwargs: Additional keyword arguments,\n excluding the learning rate, to pass to the optimizer\n ",
|
7 |
-
"__init__": "<function ActorCriticPolicy.__init__ at
|
8 |
-
"_get_constructor_parameters": "<function ActorCriticPolicy._get_constructor_parameters at
|
9 |
-
"reset_noise": "<function ActorCriticPolicy.reset_noise at
|
10 |
-
"_build_mlp_extractor": "<function ActorCriticPolicy._build_mlp_extractor at
|
11 |
-
"_build": "<function ActorCriticPolicy._build at
|
12 |
-
"forward": "<function ActorCriticPolicy.forward at
|
13 |
-
"_get_action_dist_from_latent": "<function ActorCriticPolicy._get_action_dist_from_latent at
|
14 |
-
"_predict": "<function ActorCriticPolicy._predict at
|
15 |
-
"evaluate_actions": "<function ActorCriticPolicy.evaluate_actions at
|
16 |
-
"get_distribution": "<function ActorCriticPolicy.get_distribution at
|
17 |
-
"predict_values": "<function ActorCriticPolicy.predict_values at
|
18 |
"__abstractmethods__": "frozenset()",
|
19 |
-
"_abc_impl": "<_abc_data object at
|
20 |
},
|
21 |
"verbose": 1,
|
22 |
"policy_kwargs": {},
|
@@ -52,15 +52,15 @@
|
|
52 |
"_num_timesteps_at_start": 0,
|
53 |
"seed": 0,
|
54 |
"action_noise": null,
|
55 |
-
"start_time":
|
56 |
"learning_rate": {
|
57 |
":type:": "<class 'function'>",
|
58 |
-
":serialized:": "
|
59 |
},
|
60 |
-
"tensorboard_log":
|
61 |
"lr_schedule": {
|
62 |
":type:": "<class 'function'>",
|
63 |
-
":serialized:": "
|
64 |
},
|
65 |
"_last_obs": null,
|
66 |
"_last_episode_starts": {
|
@@ -74,7 +74,7 @@
|
|
74 |
"_current_progress_remaining": -0.02400000000000002,
|
75 |
"ep_info_buffer": {
|
76 |
":type:": "<class 'collections.deque'>",
|
77 |
-
":serialized:": "gAWVHRAAAAAAAACMC2NvbGxlY3Rpb25zlIwFZGVxdWWUk5QpS2SGlFKUKH2UKIwBcpSMFW51bXB5LmNvcmUubXVsdGlhcnJheZSMBnNjYWxhcpSTlIwFbnVtcHmUjAVkdHlwZZSTlIwCZjiUiYiHlFKUKEsDjAE8lE5OTkr/////Sv////
|
78 |
},
|
79 |
"ep_success_buffer": {
|
80 |
":type:": "<class 'collections.deque'>",
|
@@ -91,7 +91,7 @@
|
|
91 |
"n_epochs": 10,
|
92 |
"clip_range": {
|
93 |
":type:": "<class 'function'>",
|
94 |
-
":serialized:": "
|
95 |
},
|
96 |
"clip_range_vf": null,
|
97 |
"normalize_advantage": true,
|
|
|
4 |
":serialized:": "gAWVOwAAAAAAAACMIXN0YWJsZV9iYXNlbGluZXMzLmNvbW1vbi5wb2xpY2llc5SMEUFjdG9yQ3JpdGljUG9saWN5lJOULg==",
|
5 |
"__module__": "stable_baselines3.common.policies",
|
6 |
"__doc__": "\n Policy class for actor-critic algorithms (has both policy and value prediction).\n Used by A2C, PPO and the likes.\n\n :param observation_space: Observation space\n :param action_space: Action space\n :param lr_schedule: Learning rate schedule (could be constant)\n :param net_arch: The specification of the policy and value networks.\n :param activation_fn: Activation function\n :param ortho_init: Whether to use or not orthogonal initialization\n :param use_sde: Whether to use State Dependent Exploration or not\n :param log_std_init: Initial value for the log standard deviation\n :param full_std: Whether to use (n_features x n_actions) parameters\n for the std instead of only (n_features,) when using gSDE\n :param sde_net_arch: Network architecture for extracting features\n when using gSDE. If None, the latent features from the policy will be used.\n Pass an empty list to use the states as features.\n :param use_expln: Use ``expln()`` function instead of ``exp()`` to ensure\n a positive standard deviation (cf paper). It allows to keep variance\n above zero and prevent it from growing too fast. In practice, ``exp()`` is usually enough.\n :param squash_output: Whether to squash the output using a tanh function,\n this allows to ensure boundaries when using gSDE.\n :param features_extractor_class: Features extractor to use.\n :param features_extractor_kwargs: Keyword arguments\n to pass to the features extractor.\n :param normalize_images: Whether to normalize images or not,\n dividing by 255.0 (True by default)\n :param optimizer_class: The optimizer to use,\n ``th.optim.Adam`` by default\n :param optimizer_kwargs: Additional keyword arguments,\n excluding the learning rate, to pass to the optimizer\n ",
|
7 |
+
"__init__": "<function ActorCriticPolicy.__init__ at 0x7f2e52529820>",
|
8 |
+
"_get_constructor_parameters": "<function ActorCriticPolicy._get_constructor_parameters at 0x7f2e525298b0>",
|
9 |
+
"reset_noise": "<function ActorCriticPolicy.reset_noise at 0x7f2e52529940>",
|
10 |
+
"_build_mlp_extractor": "<function ActorCriticPolicy._build_mlp_extractor at 0x7f2e525299d0>",
|
11 |
+
"_build": "<function ActorCriticPolicy._build at 0x7f2e52529a60>",
|
12 |
+
"forward": "<function ActorCriticPolicy.forward at 0x7f2e52529af0>",
|
13 |
+
"_get_action_dist_from_latent": "<function ActorCriticPolicy._get_action_dist_from_latent at 0x7f2e52529b80>",
|
14 |
+
"_predict": "<function ActorCriticPolicy._predict at 0x7f2e52529c10>",
|
15 |
+
"evaluate_actions": "<function ActorCriticPolicy.evaluate_actions at 0x7f2e52529ca0>",
|
16 |
+
"get_distribution": "<function ActorCriticPolicy.get_distribution at 0x7f2e52529d30>",
|
17 |
+
"predict_values": "<function ActorCriticPolicy.predict_values at 0x7f2e52529dc0>",
|
18 |
"__abstractmethods__": "frozenset()",
|
19 |
+
"_abc_impl": "<_abc_data object at 0x7f2e5251ed50>"
|
20 |
},
|
21 |
"verbose": 1,
|
22 |
"policy_kwargs": {},
|
|
|
52 |
"_num_timesteps_at_start": 0,
|
53 |
"seed": 0,
|
54 |
"action_noise": null,
|
55 |
+
"start_time": 1659105256.5056021,
|
56 |
"learning_rate": {
|
57 |
":type:": "<class 'function'>",
|
58 |
+
":serialized:": "gAWVhQIAAAAAAACMF2Nsb3VkcGlja2xlLmNsb3VkcGlja2xllIwNX2J1aWx0aW5fdHlwZZSTlIwKTGFtYmRhVHlwZZSFlFKUKGgCjAhDb2RlVHlwZZSFlFKUKEsBSwBLAEsBSwFLE0MEiABTAJROhZQpjAFflIWUjFUvaG9tZS9tYXhpbWlsaWFuLy5sb2NhbC9saWIvcHl0aG9uMy44L3NpdGUtcGFja2FnZXMvc3RhYmxlX2Jhc2VsaW5lczMvY29tbW9uL3V0aWxzLnB5lIwEZnVuY5RLgEMCAAGUjAN2YWyUhZQpdJRSlH2UKIwLX19wYWNrYWdlX1+UjBhzdGFibGVfYmFzZWxpbmVzMy5jb21tb26UjAhfX25hbWVfX5SMHnN0YWJsZV9iYXNlbGluZXMzLmNvbW1vbi51dGlsc5SMCF9fZmlsZV9flGgNdU5OaACMEF9tYWtlX2VtcHR5X2NlbGyUk5QpUpSFlHSUUpSMHGNsb3VkcGlja2xlLmNsb3VkcGlja2xlX2Zhc3SUjBJfZnVuY3Rpb25fc2V0c3RhdGWUk5RoH32UfZQoaBdoDowMX19xdWFsbmFtZV9flIwZY29uc3RhbnRfZm4uPGxvY2Fscz4uZnVuY5SMD19fYW5ub3RhdGlvbnNfX5R9lIwOX19rd2RlZmF1bHRzX1+UTowMX19kZWZhdWx0c19flE6MCl9fbW9kdWxlX1+UaBiMB19fZG9jX1+UTowLX19jbG9zdXJlX1+UaACMCl9tYWtlX2NlbGyUk5RHP1BiTdLxqfyFlFKUhZSMF19jbG91ZHBpY2tsZV9zdWJtb2R1bGVzlF2UjAtfX2dsb2JhbHNfX5R9lHWGlIZSMC4="
|
59 |
},
|
60 |
+
"tensorboard_log": "runs/Pendulum-v1__ppo__8__1659105255/Pendulum-v1",
|
61 |
"lr_schedule": {
|
62 |
":type:": "<class 'function'>",
|
63 |
+
":serialized:": "gAWVhQIAAAAAAACMF2Nsb3VkcGlja2xlLmNsb3VkcGlja2xllIwNX2J1aWx0aW5fdHlwZZSTlIwKTGFtYmRhVHlwZZSFlFKUKGgCjAhDb2RlVHlwZZSFlFKUKEsBSwBLAEsBSwFLE0MEiABTAJROhZQpjAFflIWUjFUvaG9tZS9tYXhpbWlsaWFuLy5sb2NhbC9saWIvcHl0aG9uMy44L3NpdGUtcGFja2FnZXMvc3RhYmxlX2Jhc2VsaW5lczMvY29tbW9uL3V0aWxzLnB5lIwEZnVuY5RLgEMCAAGUjAN2YWyUhZQpdJRSlH2UKIwLX19wYWNrYWdlX1+UjBhzdGFibGVfYmFzZWxpbmVzMy5jb21tb26UjAhfX25hbWVfX5SMHnN0YWJsZV9iYXNlbGluZXMzLmNvbW1vbi51dGlsc5SMCF9fZmlsZV9flGgNdU5OaACMEF9tYWtlX2VtcHR5X2NlbGyUk5QpUpSFlHSUUpSMHGNsb3VkcGlja2xlLmNsb3VkcGlja2xlX2Zhc3SUjBJfZnVuY3Rpb25fc2V0c3RhdGWUk5RoH32UfZQoaBdoDowMX19xdWFsbmFtZV9flIwZY29uc3RhbnRfZm4uPGxvY2Fscz4uZnVuY5SMD19fYW5ub3RhdGlvbnNfX5R9lIwOX19rd2RlZmF1bHRzX1+UTowMX19kZWZhdWx0c19flE6MCl9fbW9kdWxlX1+UaBiMB19fZG9jX1+UTowLX19jbG9zdXJlX1+UaACMCl9tYWtlX2NlbGyUk5RHP1BiTdLxqfyFlFKUhZSMF19jbG91ZHBpY2tsZV9zdWJtb2R1bGVzlF2UjAtfX2dsb2JhbHNfX5R9lHWGlIZSMC4="
|
64 |
},
|
65 |
"_last_obs": null,
|
66 |
"_last_episode_starts": {
|
|
|
74 |
"_current_progress_remaining": -0.02400000000000002,
|
75 |
"ep_info_buffer": {
|
76 |
":type:": "<class 'collections.deque'>",
|
77 |
+
":serialized:": "gAWVHRAAAAAAAACMC2NvbGxlY3Rpb25zlIwFZGVxdWWUk5QpS2SGlFKUKH2UKIwBcpSMFW51bXB5LmNvcmUubXVsdGlhcnJheZSMBnNjYWxhcpSTlIwFbnVtcHmUjAVkdHlwZZSTlIwCZjiUiYiHlFKUKEsDjAE8lE5OTkr/////Sv////9LAHSUYkMIxRwEHa1accCUhpRSlIwBbJRLyIwBdJRHQEvB6ciGFi91fZQoaAZoCWgPQwhcPpKSHgYDwJSGlFKUaBVLyGgWR0BLwbr1M/QjdX2UKGgGaAloD0MIN3Fyv0PTcMCUhpRSlGgVS8hoFkdAS8Gkep4r0HV9lChoBmgJaA9DCMoXtJAA03DAlIaUUpRoFUvIaBZHQEvBjxTbWVh1fZQoaAZoCWgPQwio34Wt2SdxwJSGlFKUaBVLyGgWR0BL1FAeJYT1dX2UKGgGaAloD0MIrDyBsFOsA8CUhpRSlGgVS8hoFkdAS9QhB7eEZnV9lChoBmgJaA9DCCodrP9z22DAlIaUUpRoFUvIaBZHQEvUCvovBad1fZQoaAZoCWgPQwjvVpbo7Il6wJSGlFKUaBVLyGgWR0BL0/V7Qb++dX2UKGgGaAloD0MI9l0R/K9lecCUhpRSlGgVS8hoFkdAS+bQzDXOGHV9lChoBmgJaA9DCFLTLqYZj3jAlIaUUpRoFUvIaBZHQEvmoegctGx1fZQoaAZoCWgPQwiE9X8Oc4hgwJSGlFKUaBVLyGgWR0BL5ow/PgNxdX2UKGgGaAloD0MIdELooOuVgcCUhpRSlGgVS8hoFkdAS+Z3HJcPfHV9lChoBmgJaA9DCF95kJ4iwnDAlIaUUpRoFUvIaBZHQEv5QSi/O+t1fZQoaAZoCWgPQwgKuVLPgnBgwJSGlFKUaBVLyGgWR0BL+RJNCZ4OdX2UKGgGaAloD0MIfy4aMt69eMCUhpRSlGgVS8hoFkdAS/j8WKuSwHV9lChoBmgJaA9DCHu8kA7Pd3DAlIaUUpRoFUvIaBZHQEv45wwTM7l1fZQoaAZoCWgPQwiX/brTXRV4wJSGlFKUaBVLyGgWR0BNAfg75mAcdX2UKGgGaAloD0MImFEst7Qa+b+UhpRSlGgVS8hoFkdATQHNX5nDi3V9lChoBmgJaA9DCMjT8gPXJ3DAlIaUUpRoFUvIaBZHQE0Budf9gnd1fZQoaAZoCWgPQwgAPKJCdTP/v5SGlFKUaBVLyGgWR0BNAadlNDc/dX2UKGgGaAloD0MINbVsrS+QYMCUhpRSlGgVS8hoFkdATR+GfwqiGnV9lChoBmgJaA9DCNREn48y72DAlIaUUpRoFUvIaBZHQE0fWy1NQCV1fZQoaAZoCWgPQwjusfShC+hwwJSGlFKUaBVLyGgWR0BNH0gKWszVdX2UKGgGaAloD0MIqmG/Jxanf8CUhpRSlGgVS8hoFkdATR82DQJHAnV9lChoBmgJaA9DCKlqgqg7GnrAlIaUUpRoFUvIaBZHQE09SSeRPoF1fZQoaAZoCWgPQwgJNxlVRnh6wJSGlFKUaBVLyGgWR0BNPR33YcvNdX2UKGgGaAloD0MIb38uGrKYb8CUhpRSlGgVS8hoFkdATT0KPXCj13V9lChoBmgJaA9DCFneVQ+YFmDAlIaUUpRoFUvIaBZHQE0892X9itt1fZQoaAZoCWgPQwjbFI+LatH/v5SGlFKUaBVLyGgWR0BNWxNATqSpdX2UKGgGaAloD0MI24toOyY1dsCUhpRSlGgVS8hoFkdATVrnX/YJ3XV9lChoBmgJaA9DCEH0pEwq2HDAlIaUUpRoFUvIaBZHQE1a0+C9RJp1fZQoaAZoCWgPQwg7b2Ozo/RgwJSGlFKUaBVLyGgWR0BNWsIu5BkadX2UKGgGaAloD0MI9PqT+JwhccCUhpRSlGgVS8hoFkdATXjYXfqHGnV9lChoBmgJaA9DCDV+4ZWkiWDAlIaUUpRoFUvIaBZHQE14rJ8v25B1fZQoaAZoCWgPQwhsPq4NFedewJSGlFKUaBVLyGgWR0BNeJj+aScLdX2UKGgGaAloD0MIP/89eG33YMCUhpRSlGgVS8hoFkdATXiGUOd5IHV9lChoBmgJaA9DCJXXSuguZWDAlIaUUpRoFUvIaBZHQE7P49ovi991fZQoaAZoCWgPQwgCmggbnmZvwJSGlFKUaBVLyGgWR0BOz7UXpGF0dX2UKGgGaAloD0MIJJur5nlPdsCUhpRSlGgVS8hoFkdATs+fI0ZWJnV9lChoBmgJaA9DCNV6v9FOKnzAlIaUUpRoFUvIaBZHQE7PiZv1lGx1fZQoaAZoCWgPQwg7GLFPgMFgwJSGlFKUaBVLyGgWR0BO4s+3Ytg8dX2UKGgGaAloD0MIjWDj+vfOcMCUhpRSlGgVS8hoFkdATuKg7HQyAXV9lChoBmgJaA9DCLHAV3TrFQDAlIaUUpRoFUvIaBZHQE7iipNsWO91fZQoaAZoCWgPQwgm4NdIkgN5wJSGlFKUaBVLyGgWR0BO4nUc4o7WdX2UKGgGaAloD0MIOQoQBTMm9L+UhpRSlGgVS8hoFkdATvX2GqPwNXV9lChoBmgJaA9DCGEcXDrmvAjAlIaUUpRoFUvIaBZHQE71x1gYxcp1fZQoaAZoCWgPQwj7zcR0IcdgwJSGlFKUaBVLyGgWR0BO9bDVH4GmdX2UKGgGaAloD0MIP/7Sor71YMCUhpRSlGgVS8hoFkdATvWbd8Aq/nV9lChoBmgJaA9DCDiHa7UHyG7AlIaUUpRoFUvIaBZHQE8IeOn2qT91fZQoaAZoCWgPQwiTcCGP4JpgwJSGlFKUaBVLyGgWR0BPCEo4MnZ1dX2UKGgGaAloD0MIM1Naf0ssf8CUhpRSlGgVS8hoFkdATwgz1schknV9lChoBmgJaA9DCIgq/BnebAbAlIaUUpRoFUvIaBZHQE8IHnlnyup1fZQoaAZoCWgPQwioc0UpIf1fwJSGlFKUaBVLyGgWR0BPGvSc9W6tdX2UKGgGaAloD0MIXHaIf9jyAsCUhpRSlGgVS8hoFkdATxrF6zE74nV9lChoBmgJaA9DCEm6ZvIN1XfAlIaUUpRoFUvIaBZHQE8ar6LwWnF1fZQoaAZoCWgPQwiOOjquxi18wJSGlFKUaBVLyGgWR0BPGposZpBYdX2UKGgGaAloD0MIUirhCb1+97+UhpRSlGgVS8hoFkdAT/PvWpZOi3V9lChoBmgJaA9DCCVBuAKKMGDAlIaUUpRoFUvIaBZHQE/zwBHTZxt1fZQoaAZoCWgPQwhu+x7118sJwJSGlFKUaBVLyGgWR0BP86kAPuohdX2UKGgGaAloD0MIeqhtwyiIAsCUhpRSlGgVS8hoFkdAT/OTV2A5JnV9lChoBmgJaA9DCOZbH9abnmDAlIaUUpRoFUvIaBZHQFADattALRd1fZQoaAZoCWgPQwjdQlciUMBgwJSGlFKUaBVLyGgWR0BQA1NlAeJYdX2UKGgGaAloD0MIptO6DWpPcMCUhpRSlGgVS8hoFkdAUANIVdonKHV9lChoBmgJaA9DCOjc7XrpBWHAlIaUUpRoFUvIaBZHQFADPatcOb11fZQoaAZoCWgPQwiEKjV7oOBgwJSGlFKUaBVLyGgWR0BQDKu0TlDGdX2UKGgGaAloD0MI1uWUgJgkAsCUhpRSlGgVS8hoFkdAUAyUNayKN3V9lChoBmgJaA9DCOVFJuBXvmDAlIaUUpRoFUvIaBZHQFAMiXY150N1fZQoaAZoCWgPQwjpSC7/IelgwJSGlFKUaBVLyGgWR0BQDH8CPp6hdX2UKGgGaAloD0MI5DEDlXHHYMCUhpRSlGgVS8hoFkdAUBY1Gb1AaHV9lChoBmgJaA9DCOyi6IGP0WDAlIaUUpRoFUvIaBZHQFAWHaN+9al1fZQoaAZoCWgPQwjVPh2PmQFgwJSGlFKUaBVLyGgWR0BQFhLbpNbkdX2UKGgGaAloD0MI51HxfwcYcMCUhpRSlGgVS8hoFkdAUBYILPUrkXV9lChoBmgJaA9DCP27PnOWKXfAlIaUUpRoFUvIaBZHQFAfbBXS0Bx1fZQoaAZoCWgPQwgB/FOqRIVwwJSGlFKUaBVLyGgWR0BQH1S88La3dX2UKGgGaAloD0MIZ+22C82McMCUhpRSlGgVS8hoFkdAUB9J+UhV2nV9lChoBmgJaA9DCOY9zjThIGDAlIaUUpRoFUvIaBZHQFAfP3BYV7B1fZQoaAZoCWgPQwinBMQkXAgQwJSGlFKUaBVLyGgWR0BQjlUdaMaTdX2UKGgGaAloD0MIGm7A54ftXcCUhpRSlGgVS8hoFkdAUI49r433pXV9lChoBmgJaA9DCBGMg0tHKGDAlIaUUpRoFUvIaBZHQFCOMrmQr+Z1fZQoaAZoCWgPQwgMQKN0abRgwJSGlFKUaBVLyGgWR0BQjigK4QSSdX2UKGgGaAloD0MILBGo/kH8bsCUhpRSlGgVS8hoFkdAUJef5DZ13nV9lChoBmgJaA9DCNSbUfMVsHzAlIaUUpRoFUvIaBZHQFCXiF0xM391fZQoaAZoCWgPQwjrrBbY45RuwJSGlFKUaBVLyGgWR0BQl30PH1e0dX2UKGgGaAloD0MIWdx/ZLpKb8CUhpRSlGgVS8hoFkdAUJdyimEXcnV9lChoBmgJaA9DCN/eNejLSG7AlIaUUpRoFUvIaBZHQFCg1yeZof11fZQoaAZoCWgPQwiq7/yiRMl3wJSGlFKUaBVLyGgWR0BQoL9/BnBddX2UKGgGaAloD0MI7xtfe+Z2YMCUhpRSlGgVS8hoFkdAUKC0pmVZ93V9lChoBmgJaA9DCMN/uoGC9WDAlIaUUpRoFUvIaBZHQFCgqhDgIhR1fZQoaAZoCWgPQwj7dhIR/hRhwJSGlFKUaBVLyGgWR0BQwm1+iJwbdX2UKGgGaAloD0MIF4IclHCMcMCUhpRSlGgVS8hoFkdAUMJV3ljmS3V9lChoBmgJaA9DCCUgJuFCnvO/lIaUUpRoFUvIaBZHQFDCSrYGt6p1fZQoaAZoCWgPQwg25QrvcspewJSGlFKUaBVLyGgWR0BQwkADJU5udX2UKGgGaAloD0MInkFD/4Tvc8CUhpRSlGgVS8hoFkdAUMvUlRgqmXV9lChoBmgJaA9DCLPsSWAzOXfAlIaUUpRoFUvIaBZHQFDLvUjLSu11fZQoaAZoCWgPQwjjxi3mp296wJSGlFKUaBVLyGgWR0BQy7JCBwuNdX2UKGgGaAloD0MIBTHQta+8ccCUhpRSlGgVS8hoFkdAUMunl4keIXV9lChoBmgJaA9DCNhHp668SHbAlIaUUpRoFUvIaBZHQFDVE/jbSJF1fZQoaAZoCWgPQwh2qRH6mQ1xwJSGlFKUaBVLyGgWR0BQ1Pxc3VCpdX2UKGgGaAloD0MIob5lThc+ccCUhpRSlGgVS8hoFkdAUNTxSYPXkHV9lChoBmgJaA9DCOmdCrjns2DAlIaUUpRoFUvIaBZHQFDU5ooNNJx1ZS4="
|
78 |
},
|
79 |
"ep_success_buffer": {
|
80 |
":type:": "<class 'collections.deque'>",
|
|
|
91 |
"n_epochs": 10,
|
92 |
"clip_range": {
|
93 |
":type:": "<class 'function'>",
|
94 |
+
":serialized:": "gAWVhQIAAAAAAACMF2Nsb3VkcGlja2xlLmNsb3VkcGlja2xllIwNX2J1aWx0aW5fdHlwZZSTlIwKTGFtYmRhVHlwZZSFlFKUKGgCjAhDb2RlVHlwZZSFlFKUKEsBSwBLAEsBSwFLE0MEiABTAJROhZQpjAFflIWUjFUvaG9tZS9tYXhpbWlsaWFuLy5sb2NhbC9saWIvcHl0aG9uMy44L3NpdGUtcGFja2FnZXMvc3RhYmxlX2Jhc2VsaW5lczMvY29tbW9uL3V0aWxzLnB5lIwEZnVuY5RLgEMCAAGUjAN2YWyUhZQpdJRSlH2UKIwLX19wYWNrYWdlX1+UjBhzdGFibGVfYmFzZWxpbmVzMy5jb21tb26UjAhfX25hbWVfX5SMHnN0YWJsZV9iYXNlbGluZXMzLmNvbW1vbi51dGlsc5SMCF9fZmlsZV9flGgNdU5OaACMEF9tYWtlX2VtcHR5X2NlbGyUk5QpUpSFlHSUUpSMHGNsb3VkcGlja2xlLmNsb3VkcGlja2xlX2Zhc3SUjBJfZnVuY3Rpb25fc2V0c3RhdGWUk5RoH32UfZQoaBdoDowMX19xdWFsbmFtZV9flIwZY29uc3RhbnRfZm4uPGxvY2Fscz4uZnVuY5SMD19fYW5ub3RhdGlvbnNfX5R9lIwOX19rd2RlZmF1bHRzX1+UTowMX19kZWZhdWx0c19flE6MCl9fbW9kdWxlX1+UaBiMB19fZG9jX1+UTowLX19jbG9zdXJlX1+UaACMCl9tYWtlX2NlbGyUk5RHP8mZmZmZmZqFlFKUhZSMF19jbG91ZHBpY2tsZV9zdWJtb2R1bGVzlF2UjAtfX2dsb2JhbHNfX5R9lHWGlIZSMC4="
|
95 |
},
|
96 |
"clip_range_vf": null,
|
97 |
"normalize_advantage": true,
|
ppo-Pendulum-v1/policy.optimizer.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5f35140a14d394d2df394225b62bcac4e8f0baf3c5e7b6bee35b6a8f07481327
|
3 |
+
size 82096
|
ppo-Pendulum-v1/policy.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 40254
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c45e5dbe7d88b14202ead17e017d9e6b39f128b089f3b3ae5840a18d9412aeca
|
3 |
size 40254
|
replay.mp4
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a8aff0086945e50d3cb3a42e8a95051e42022f276ab13fdd7114ec2fc3a2d2e7
|
3 |
+
size 140831
|
results.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"mean_reward": -336.
|
|
|
1 |
+
{"mean_reward": -336.8904653, "std_reward": 406.3584272686389, "is_deterministic": true, "n_eval_episodes": 10, "eval_datetime": "2022-07-29T16:39:51.603484"}
|
train_eval_metrics.zip
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6168a606f3d16b969a034c584d66d5fc4628639ef9e1a564f7c944a5681b27f9
|
3 |
+
size 15560
|