Upload . with huggingface_hub
Browse files- .gitattributes +1 -0
- .summary/0/events.out.tfevents.1673493103.brain2.usc.edu +3 -0
- README.md +56 -0
- checkpoint_p0/best_000015162_99418112_reward_22253.053.pth +3 -0
- checkpoint_p0/checkpoint_000012502_81985536.pth +3 -0
- checkpoint_p0/checkpoint_000015262_100073472.pth +3 -0
- config.json +147 -0
- git.diff +0 -0
- replay.mp4 +3 -0
- sf_log.txt +308 -0
.gitattributes
CHANGED
@@ -32,3 +32,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
32 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
33 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
34 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
32 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
33 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
34 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
35 |
+
replay.mp4 filter=lfs diff=lfs merge=lfs -text
|
.summary/0/events.out.tfevents.1673493103.brain2.usc.edu
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:34d1922d46eb48594eaf4559516e4769aaa45abc44a9708ed74d71a293f67fc7
|
3 |
+
size 125917
|
README.md
ADDED
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
library_name: sample-factory
|
3 |
+
tags:
|
4 |
+
- deep-reinforcement-learning
|
5 |
+
- reinforcement-learning
|
6 |
+
- sample-factory
|
7 |
+
model-index:
|
8 |
+
- name: APPO
|
9 |
+
results:
|
10 |
+
- task:
|
11 |
+
type: reinforcement-learning
|
12 |
+
name: reinforcement-learning
|
13 |
+
dataset:
|
14 |
+
name: halfcheetah
|
15 |
+
type: halfcheetah
|
16 |
+
metrics:
|
17 |
+
- type: mean_reward
|
18 |
+
value: 22298.35 +/- 1882.48
|
19 |
+
name: mean_reward
|
20 |
+
verified: false
|
21 |
+
---
|
22 |
+
|
23 |
+
A(n) **APPO** model trained on the **halfcheetah** environment.
|
24 |
+
|
25 |
+
This model was trained using Sample-Factory 2.0: https://github.com/alex-petrenko/sample-factory.
|
26 |
+
Documentation for how to use Sample-Factory can be found at https://www.samplefactory.dev/
|
27 |
+
|
28 |
+
|
29 |
+
## Downloading the model
|
30 |
+
|
31 |
+
After installing Sample-Factory, download the model with:
|
32 |
+
```
|
33 |
+
python -m sample_factory.huggingface.load_from_hub -r apetrenko/sample_factory_brax_halfcheetah
|
34 |
+
```
|
35 |
+
|
36 |
+
|
37 |
+
## Using the model
|
38 |
+
|
39 |
+
To run the model after download, use the `enjoy` script corresponding to this environment:
|
40 |
+
```
|
41 |
+
python -m sf_examples.brax.enjoy_brax --algo=APPO --env=halfcheetah --train_dir=./train_dir --experiment=sample_factory_brax_halfcheetah
|
42 |
+
```
|
43 |
+
|
44 |
+
|
45 |
+
You can also upload models to the Hugging Face Hub using the same script with the `--push_to_hub` flag.
|
46 |
+
See https://www.samplefactory.dev/10-huggingface/huggingface/ for more details
|
47 |
+
|
48 |
+
## Training with this model
|
49 |
+
|
50 |
+
To continue training with this model, use the `train` script corresponding to this environment:
|
51 |
+
```
|
52 |
+
python -m sf_examples.brax.train_brax --algo=APPO --env=halfcheetah --train_dir=./train_dir --experiment=sample_factory_brax_halfcheetah --restart_behavior=resume --train_for_env_steps=10000000000
|
53 |
+
```
|
54 |
+
|
55 |
+
Note, you may have to adjust `--train_for_env_steps` to a suitably high number as the experiment will resume at the number of steps it concluded at.
|
56 |
+
|
checkpoint_p0/best_000015162_99418112_reward_22253.053.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:958d830194bb3a76f926a88f26a3854b1b6061fc21ebe9c6b6f8c9ec2c81cfce
|
3 |
+
size 573943
|
checkpoint_p0/checkpoint_000012502_81985536.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:02b5ccd0f162ea965c7be784649d2f9c5ce842fce885e4638124817f562cdb60
|
3 |
+
size 574255
|
checkpoint_p0/checkpoint_000015262_100073472.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8fbb08b5ebb4a59eda008ef1ba5e3a89a299c3422dff80f3bd656fb32b40729e
|
3 |
+
size 574255
|
config.json
ADDED
@@ -0,0 +1,147 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"help": false,
|
3 |
+
"algo": "APPO",
|
4 |
+
"env": "halfcheetah",
|
5 |
+
"experiment": "04_v083_brax_basic_benchmark_see_2322090_env_halfcheetah_u.rnn_False_n.epo_5",
|
6 |
+
"train_dir": "./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm",
|
7 |
+
"restart_behavior": "resume",
|
8 |
+
"device": "gpu",
|
9 |
+
"seed": 2322090,
|
10 |
+
"num_policies": 1,
|
11 |
+
"async_rl": false,
|
12 |
+
"serial_mode": true,
|
13 |
+
"batched_sampling": true,
|
14 |
+
"num_batches_to_accumulate": 2,
|
15 |
+
"worker_num_splits": 1,
|
16 |
+
"policy_workers_per_policy": 1,
|
17 |
+
"max_policy_lag": 1000,
|
18 |
+
"num_workers": 1,
|
19 |
+
"num_envs_per_worker": 1,
|
20 |
+
"batch_size": 32768,
|
21 |
+
"num_batches_per_epoch": 2,
|
22 |
+
"num_epochs": 5,
|
23 |
+
"rollout": 32,
|
24 |
+
"recurrence": 1,
|
25 |
+
"shuffle_minibatches": false,
|
26 |
+
"gamma": 0.99,
|
27 |
+
"reward_scale": 0.01,
|
28 |
+
"reward_clip": 1000.0,
|
29 |
+
"value_bootstrap": true,
|
30 |
+
"normalize_returns": true,
|
31 |
+
"exploration_loss_coeff": 0.0,
|
32 |
+
"value_loss_coeff": 2.0,
|
33 |
+
"kl_loss_coeff": 0.0,
|
34 |
+
"exploration_loss": "entropy",
|
35 |
+
"gae_lambda": 0.95,
|
36 |
+
"ppo_clip_ratio": 0.2,
|
37 |
+
"ppo_clip_value": 1.0,
|
38 |
+
"with_vtrace": false,
|
39 |
+
"vtrace_rho": 1.0,
|
40 |
+
"vtrace_c": 1.0,
|
41 |
+
"optimizer": "adam",
|
42 |
+
"adam_eps": 1e-06,
|
43 |
+
"adam_beta1": 0.9,
|
44 |
+
"adam_beta2": 0.999,
|
45 |
+
"max_grad_norm": 1.0,
|
46 |
+
"learning_rate": 0.0003,
|
47 |
+
"lr_schedule": "kl_adaptive_epoch",
|
48 |
+
"lr_schedule_kl_threshold": 0.008,
|
49 |
+
"lr_adaptive_min": 1e-06,
|
50 |
+
"lr_adaptive_max": 0.002,
|
51 |
+
"obs_subtract_mean": 0.0,
|
52 |
+
"obs_scale": 1.0,
|
53 |
+
"normalize_input": true,
|
54 |
+
"normalize_input_keys": null,
|
55 |
+
"decorrelate_experience_max_seconds": 0,
|
56 |
+
"decorrelate_envs_on_one_worker": true,
|
57 |
+
"actor_worker_gpus": [
|
58 |
+
0
|
59 |
+
],
|
60 |
+
"set_workers_cpu_affinity": true,
|
61 |
+
"force_envs_single_thread": false,
|
62 |
+
"default_niceness": 0,
|
63 |
+
"log_to_file": true,
|
64 |
+
"experiment_summaries_interval": 10,
|
65 |
+
"flush_summaries_interval": 30,
|
66 |
+
"stats_avg": 100,
|
67 |
+
"summaries_use_frameskip": true,
|
68 |
+
"heartbeat_interval": 20,
|
69 |
+
"heartbeat_reporting_interval": 180,
|
70 |
+
"train_for_env_steps": 100000000,
|
71 |
+
"train_for_seconds": 10000000000,
|
72 |
+
"save_every_sec": 120,
|
73 |
+
"keep_checkpoints": 2,
|
74 |
+
"load_checkpoint_kind": "latest",
|
75 |
+
"save_milestones_sec": -1,
|
76 |
+
"save_best_every_sec": 5,
|
77 |
+
"save_best_metric": "reward",
|
78 |
+
"save_best_after": 5000000,
|
79 |
+
"benchmark": false,
|
80 |
+
"encoder_mlp_layers": [
|
81 |
+
256,
|
82 |
+
128,
|
83 |
+
64
|
84 |
+
],
|
85 |
+
"encoder_conv_architecture": "convnet_simple",
|
86 |
+
"encoder_conv_mlp_layers": [
|
87 |
+
512
|
88 |
+
],
|
89 |
+
"use_rnn": false,
|
90 |
+
"rnn_size": 512,
|
91 |
+
"rnn_type": "gru",
|
92 |
+
"rnn_num_layers": 1,
|
93 |
+
"decoder_mlp_layers": [],
|
94 |
+
"nonlinearity": "elu",
|
95 |
+
"policy_initialization": "torch_default",
|
96 |
+
"policy_init_gain": 1.0,
|
97 |
+
"actor_critic_share_weights": true,
|
98 |
+
"adaptive_stddev": false,
|
99 |
+
"continuous_tanh_scale": 0.0,
|
100 |
+
"initial_stddev": 1.0,
|
101 |
+
"use_env_info_cache": false,
|
102 |
+
"env_gpu_actions": true,
|
103 |
+
"env_gpu_observations": true,
|
104 |
+
"env_frameskip": 1,
|
105 |
+
"env_framestack": 1,
|
106 |
+
"pixel_format": "CHW",
|
107 |
+
"use_record_episode_statistics": false,
|
108 |
+
"with_wandb": true,
|
109 |
+
"wandb_user": null,
|
110 |
+
"wandb_project": "sample_factory",
|
111 |
+
"wandb_group": null,
|
112 |
+
"wandb_job_type": "SF",
|
113 |
+
"wandb_tags": [],
|
114 |
+
"with_pbt": false,
|
115 |
+
"pbt_mix_policies_in_one_env": true,
|
116 |
+
"pbt_period_env_steps": 5000000,
|
117 |
+
"pbt_start_mutation": 20000000,
|
118 |
+
"pbt_replace_fraction": 0.3,
|
119 |
+
"pbt_mutation_rate": 0.15,
|
120 |
+
"pbt_replace_reward_gap": 0.1,
|
121 |
+
"pbt_replace_reward_gap_absolute": 1e-06,
|
122 |
+
"pbt_optimize_gamma": false,
|
123 |
+
"pbt_target_objective": "true_objective",
|
124 |
+
"pbt_perturb_min": 1.1,
|
125 |
+
"pbt_perturb_max": 1.5,
|
126 |
+
"env_agents": 2048,
|
127 |
+
"clamp_actions": false,
|
128 |
+
"clamp_rew_obs": false,
|
129 |
+
"command_line": "--actor_worker_gpus 0 --wandb_project=sample_factory --with_wandb=True --seed=2322090 --env=halfcheetah --use_rnn=False --num_epochs=5 --experiment=04_v083_brax_basic_benchmark_see_2322090_env_halfcheetah_u.rnn_False_n.epo_5 --train_dir=./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm",
|
130 |
+
"cli_args": {
|
131 |
+
"env": "halfcheetah",
|
132 |
+
"experiment": "04_v083_brax_basic_benchmark_see_2322090_env_halfcheetah_u.rnn_False_n.epo_5",
|
133 |
+
"train_dir": "./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm",
|
134 |
+
"seed": 2322090,
|
135 |
+
"num_epochs": 5,
|
136 |
+
"actor_worker_gpus": [
|
137 |
+
0
|
138 |
+
],
|
139 |
+
"use_rnn": false,
|
140 |
+
"with_wandb": true,
|
141 |
+
"wandb_project": "sample_factory"
|
142 |
+
},
|
143 |
+
"git_hash": "6aa87f2d416b9fad874b299d864a522c887c238a",
|
144 |
+
"git_repo_name": "git@github.com:alex-petrenko/sample-factory.git",
|
145 |
+
"train_script": "sf_examples.brax.train_brax",
|
146 |
+
"wandb_unique_id": "04_v083_brax_basic_benchmark_see_2322090_env_halfcheetah_u.rnn_False_n.epo_5_20230111_191136_435706"
|
147 |
+
}
|
git.diff
ADDED
File without changes
|
replay.mp4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0a92f3be64482403cef08b53ea7eefecd2b0b39dc53e754ef9918a2531c6e384
|
3 |
+
size 1043582
|
sf_log.txt
ADDED
@@ -0,0 +1,308 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[2023-01-11 19:11:48,464][457818] Saving configuration to ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/04_v083_brax_basic_benchmark_see_2322090_env_halfcheetah_u.rnn_False_n.epo_5/config.json...
|
2 |
+
[2023-01-11 19:11:48,641][457818] Using GPUs [0] for process 0 (actually maps to GPUs [0])
|
3 |
+
[2023-01-11 19:11:48,642][457818] Rollout worker 0 uses device cuda:0
|
4 |
+
[2023-01-11 19:11:48,643][457818] In synchronous mode, we only accumulate one batch. Setting num_batches_to_accumulate to 1
|
5 |
+
[2023-01-11 19:11:48,675][457818] Using GPUs [0] for process 0 (actually maps to GPUs [0])
|
6 |
+
[2023-01-11 19:11:48,676][457818] InferenceWorker_p0-w0: min num requests: 1
|
7 |
+
[2023-01-11 19:11:48,677][457818] Using GPUs [0] for process 0 (actually maps to GPUs [0])
|
8 |
+
[2023-01-11 19:11:48,679][457818] WARNING! It is generally recommended to enable Fixed KL loss (https://arxiv.org/pdf/1707.06347.pdf) for continuous action tasks to avoid potential numerical issues. I.e. set --kl_loss_coeff=0.1
|
9 |
+
[2023-01-11 19:11:48,679][457818] Setting fixed seed 2322090
|
10 |
+
[2023-01-11 19:11:48,680][457818] Using GPUs [0] for process 0 (actually maps to GPUs [0])
|
11 |
+
[2023-01-11 19:11:48,680][457818] Initializing actor-critic model on device cuda:0
|
12 |
+
[2023-01-11 19:11:48,681][457818] RunningMeanStd input shape: (18,)
|
13 |
+
[2023-01-11 19:11:48,682][457818] RunningMeanStd input shape: (1,)
|
14 |
+
[2023-01-11 19:11:48,763][457818] Created Actor Critic model with architecture:
|
15 |
+
[2023-01-11 19:11:48,764][457818] ActorCriticSharedWeights(
|
16 |
+
(obs_normalizer): ObservationNormalizer(
|
17 |
+
(running_mean_std): RunningMeanStdDictInPlace(
|
18 |
+
(running_mean_std): ModuleDict(
|
19 |
+
(obs): RunningMeanStdInPlace()
|
20 |
+
)
|
21 |
+
)
|
22 |
+
)
|
23 |
+
(returns_normalizer): RecursiveScriptModule(original_name=RunningMeanStdInPlace)
|
24 |
+
(encoder): MultiInputEncoder(
|
25 |
+
(encoders): ModuleDict(
|
26 |
+
(obs): MlpEncoder(
|
27 |
+
(mlp_head): RecursiveScriptModule(
|
28 |
+
original_name=Sequential
|
29 |
+
(0): RecursiveScriptModule(original_name=Linear)
|
30 |
+
(1): RecursiveScriptModule(original_name=ELU)
|
31 |
+
(2): RecursiveScriptModule(original_name=Linear)
|
32 |
+
(3): RecursiveScriptModule(original_name=ELU)
|
33 |
+
(4): RecursiveScriptModule(original_name=Linear)
|
34 |
+
(5): RecursiveScriptModule(original_name=ELU)
|
35 |
+
)
|
36 |
+
)
|
37 |
+
)
|
38 |
+
)
|
39 |
+
(core): ModelCoreIdentity()
|
40 |
+
(decoder): MlpDecoder(
|
41 |
+
(mlp): Identity()
|
42 |
+
)
|
43 |
+
(critic_linear): Linear(in_features=64, out_features=1, bias=True)
|
44 |
+
(action_parameterization): ActionParameterizationContinuousNonAdaptiveStddev(
|
45 |
+
(distribution_linear): Linear(in_features=64, out_features=6, bias=True)
|
46 |
+
)
|
47 |
+
)
|
48 |
+
[2023-01-11 19:11:48,767][457818] Using optimizer <class 'torch.optim.adam.Adam'>
|
49 |
+
[2023-01-11 19:11:48,770][457818] No checkpoints found
|
50 |
+
[2023-01-11 19:11:48,771][457818] Did not load from checkpoint, starting from scratch!
|
51 |
+
[2023-01-11 19:11:48,772][457818] Initialized policy 0 weights for model version 0
|
52 |
+
[2023-01-11 19:11:48,772][457818] LearnerWorker_p0 finished initialization!
|
53 |
+
[2023-01-11 19:11:48,773][457818] Using GPUs [0] for process 0 (actually maps to GPUs [0])
|
54 |
+
[2023-01-11 19:11:48,778][457818] Inference worker 0-0 is ready!
|
55 |
+
[2023-01-11 19:11:48,779][457818] All inference workers are ready! Signal rollout workers to start!
|
56 |
+
[2023-01-11 19:11:48,780][457818] EnvRunner 0-0 uses policy 0
|
57 |
+
[2023-01-11 19:11:50,292][457818] Resetting env <VectorGymWrapper instance> with 2048 parallel agents...
|
58 |
+
[2023-01-11 19:11:56,260][457818] reset() done, obs.shape=torch.Size([2048, 18])!
|
59 |
+
[2023-01-11 19:11:56,271][457818] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 0. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
|
60 |
+
[2023-01-11 19:12:09,903][457818] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 150.2. Samples: 2048. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
|
61 |
+
[2023-01-11 19:12:09,914][457818] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 300.2. Samples: 4096. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
|
62 |
+
[2023-01-11 19:12:09,917][457818] Heartbeat connected on Batcher_0
|
63 |
+
[2023-01-11 19:12:09,917][457818] Heartbeat connected on LearnerWorker_p0
|
64 |
+
[2023-01-11 19:12:09,918][457818] Heartbeat connected on InferenceWorker_p0-w0
|
65 |
+
[2023-01-11 19:12:09,918][457818] Heartbeat connected on RolloutWorker_w0
|
66 |
+
[2023-01-11 19:12:09,918][457818] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 300.1. Samples: 4096. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
|
67 |
+
[2023-01-11 19:12:13,067][457818] Fps is (10 sec: 145474.4, 60 sec: 27312.7, 300 sec: 27312.7). Total num frames: 458752. Throughput: 0: 23410.9. Samples: 393216. Policy #0 lag: (min: 2.0, avg: 2.0, max: 2.0)
|
68 |
+
[2023-01-11 19:12:18,043][457818] Fps is (10 sec: 209709.3, 60 sec: 78261.0, 300 sec: 78261.0). Total num frames: 1703936. Throughput: 0: 52769.7. Samples: 1148928. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
69 |
+
[2023-01-11 19:12:23,041][457818] Fps is (10 sec: 249685.6, 60 sec: 110163.8, 300 sec: 110163.8). Total num frames: 2949120. Throughput: 0: 100142.0. Samples: 2680832. Policy #0 lag: (min: 1.0, avg: 1.0, max: 1.0)
|
70 |
+
[2023-01-11 19:12:23,042][457818] Avg episode reward: [(0, '125.668')]
|
71 |
+
[2023-01-11 19:12:28,039][457818] Fps is (10 sec: 249141.6, 60 sec: 132028.1, 300 sec: 132028.1). Total num frames: 4194304. Throughput: 0: 131383.4. Samples: 4173824. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
72 |
+
[2023-01-11 19:12:28,040][457818] Avg episode reward: [(0, '2973.944')]
|
73 |
+
[2023-01-11 19:12:33,087][457818] Fps is (10 sec: 254425.7, 60 sec: 149527.7, 300 sec: 149527.7). Total num frames: 5505024. Throughput: 0: 133673.7. Samples: 4921344. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
74 |
+
[2023-01-11 19:12:33,088][457818] Avg episode reward: [(0, '2973.944')]
|
75 |
+
[2023-01-11 19:12:33,097][457818] Saving new best policy, reward=2973.944!
|
76 |
+
[2023-01-11 19:12:38,040][457818] Fps is (10 sec: 249028.2, 60 sec: 160040.5, 300 sec: 160040.5). Total num frames: 6684672. Throughput: 0: 154058.6. Samples: 6434816. Policy #0 lag: (min: 1.0, avg: 1.0, max: 1.0)
|
77 |
+
[2023-01-11 19:12:38,040][457818] Avg episode reward: [(0, '6932.740')]
|
78 |
+
[2023-01-11 19:12:38,102][457818] Saving new best policy, reward=6932.740!
|
79 |
+
[2023-01-11 19:12:43,044][457818] Fps is (10 sec: 243537.0, 60 sec: 169539.9, 300 sec: 169539.9). Total num frames: 7929856. Throughput: 0: 239154.4. Samples: 7927808. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
80 |
+
[2023-01-11 19:12:43,044][457818] Avg episode reward: [(0, '6932.740')]
|
81 |
+
[2023-01-11 19:12:48,040][457818] Fps is (10 sec: 249025.8, 60 sec: 177230.2, 300 sec: 177230.2). Total num frames: 9175040. Throughput: 0: 226951.2. Samples: 8656896. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
82 |
+
[2023-01-11 19:12:48,041][457818] Avg episode reward: [(0, '9671.154')]
|
83 |
+
[2023-01-11 19:12:48,048][457818] Saving new best policy, reward=9671.154!
|
84 |
+
[2023-01-11 19:12:53,043][457818] Fps is (10 sec: 249045.5, 60 sec: 183543.6, 300 sec: 183543.6). Total num frames: 10420224. Throughput: 0: 235453.5. Samples: 10158080. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
85 |
+
[2023-01-11 19:12:53,044][457818] Avg episode reward: [(0, '10904.311')]
|
86 |
+
[2023-01-11 19:12:53,052][457818] Saving new best policy, reward=10904.311!
|
87 |
+
[2023-01-11 19:12:58,043][457818] Fps is (10 sec: 248972.3, 60 sec: 242324.6, 300 sec: 188847.3). Total num frames: 11665408. Throughput: 0: 250311.6. Samples: 11651072. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
88 |
+
[2023-01-11 19:12:58,043][457818] Avg episode reward: [(0, '10904.311')]
|
89 |
+
[2023-01-11 19:13:03,043][457818] Fps is (10 sec: 249054.8, 60 sec: 243004.9, 300 sec: 193354.1). Total num frames: 12910592. Throughput: 0: 249723.5. Samples: 12386304. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
90 |
+
[2023-01-11 19:13:03,044][457818] Avg episode reward: [(0, '11910.594')]
|
91 |
+
[2023-01-11 19:13:03,051][457818] Saving new best policy, reward=11910.594!
|
92 |
+
[2023-01-11 19:13:08,044][457818] Fps is (10 sec: 249012.5, 60 sec: 243538.6, 300 sec: 197230.8). Total num frames: 14155776. Throughput: 0: 248932.9. Samples: 13883392. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
93 |
+
[2023-01-11 19:13:08,044][457818] Avg episode reward: [(0, '11910.594')]
|
94 |
+
[2023-01-11 19:13:13,043][457818] Fps is (10 sec: 249041.0, 60 sec: 249139.4, 300 sec: 200607.6). Total num frames: 15400960. Throughput: 0: 249428.1. Samples: 15398912. Policy #0 lag: (min: 1.0, avg: 1.0, max: 1.0)
|
95 |
+
[2023-01-11 19:13:13,043][457818] Avg episode reward: [(0, '12797.121')]
|
96 |
+
[2023-01-11 19:13:13,055][457818] Saving new best policy, reward=12797.121!
|
97 |
+
[2023-01-11 19:13:18,043][457818] Fps is (10 sec: 249047.4, 60 sec: 249037.9, 300 sec: 203567.3). Total num frames: 16646144. Throughput: 0: 249143.2. Samples: 16121856. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
98 |
+
[2023-01-11 19:13:18,044][457818] Avg episode reward: [(0, '13501.586')]
|
99 |
+
[2023-01-11 19:13:18,051][457818] Saving new best policy, reward=13501.586!
|
100 |
+
[2023-01-11 19:13:23,042][457818] Fps is (10 sec: 249049.9, 60 sec: 249033.7, 300 sec: 206190.1). Total num frames: 17891328. Throughput: 0: 248750.3. Samples: 17629184. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
101 |
+
[2023-01-11 19:13:23,043][457818] Avg episode reward: [(0, '13501.586')]
|
102 |
+
[2023-01-11 19:13:28,040][457818] Fps is (10 sec: 249120.1, 60 sec: 249034.4, 300 sec: 208529.5). Total num frames: 19136512. Throughput: 0: 248421.5. Samples: 19105792. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0)
|
103 |
+
[2023-01-11 19:13:28,040][457818] Avg episode reward: [(0, '14243.232')]
|
104 |
+
[2023-01-11 19:13:28,047][457818] Saving new best policy, reward=14243.232!
|
105 |
+
[2023-01-11 19:13:33,039][457818] Fps is (10 sec: 249112.8, 60 sec: 248143.4, 300 sec: 210624.3). Total num frames: 20381696. Throughput: 0: 248906.1. Samples: 19857408. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0)
|
106 |
+
[2023-01-11 19:13:33,040][457818] Avg episode reward: [(0, '14243.232')]
|
107 |
+
[2023-01-11 19:13:38,042][457818] Fps is (10 sec: 248974.0, 60 sec: 249025.3, 300 sec: 212504.5). Total num frames: 21626880. Throughput: 0: 248860.7. Samples: 21356544. Policy #0 lag: (min: 4.0, avg: 4.0, max: 4.0)
|
108 |
+
[2023-01-11 19:13:38,043][457818] Avg episode reward: [(0, '14880.855')]
|
109 |
+
[2023-01-11 19:13:38,045][457818] Saving new best policy, reward=14880.855!
|
110 |
+
[2023-01-11 19:13:43,040][457818] Fps is (10 sec: 249005.7, 60 sec: 249051.6, 300 sec: 214219.6). Total num frames: 22872064. Throughput: 0: 249277.7. Samples: 22867968. Policy #0 lag: (min: 4.0, avg: 4.0, max: 4.0)
|
111 |
+
[2023-01-11 19:13:43,041][457818] Avg episode reward: [(0, '15424.260')]
|
112 |
+
[2023-01-11 19:13:43,053][457818] Saving ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/04_v083_brax_basic_benchmark_see_2322090_env_halfcheetah_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000003490_22872064.pth...
|
113 |
+
[2023-01-11 19:13:43,503][457818] Saving new best policy, reward=15424.260!
|
114 |
+
[2023-01-11 19:13:48,043][457818] Fps is (10 sec: 242479.9, 60 sec: 247934.4, 300 sec: 215186.4). Total num frames: 24051712. Throughput: 0: 246899.0. Samples: 23496704. Policy #0 lag: (min: 4.0, avg: 4.0, max: 4.0)
|
115 |
+
[2023-01-11 19:13:48,044][457818] Avg episode reward: [(0, '15424.260')]
|
116 |
+
[2023-01-11 19:13:53,041][457818] Fps is (10 sec: 242469.0, 60 sec: 247955.4, 300 sec: 216639.0). Total num frames: 25296896. Throughput: 0: 247186.2. Samples: 25006080. Policy #0 lag: (min: 4.0, avg: 4.0, max: 4.0)
|
117 |
+
[2023-01-11 19:13:53,041][457818] Avg episode reward: [(0, '16100.578')]
|
118 |
+
[2023-01-11 19:13:53,054][457818] Saving new best policy, reward=16100.578!
|
119 |
+
[2023-01-11 19:13:58,044][457818] Fps is (10 sec: 249005.3, 60 sec: 247939.8, 300 sec: 217964.0). Total num frames: 26542080. Throughput: 0: 246163.0. Samples: 26476544. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
|
120 |
+
[2023-01-11 19:13:58,044][457818] Avg episode reward: [(0, '16100.578')]
|
121 |
+
[2023-01-11 19:14:03,044][457818] Fps is (10 sec: 242413.1, 60 sec: 246848.2, 300 sec: 218672.7). Total num frames: 27721728. Throughput: 0: 246394.3. Samples: 27209728. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
|
122 |
+
[2023-01-11 19:14:03,044][457818] Avg episode reward: [(0, '16658.027')]
|
123 |
+
[2023-01-11 19:14:03,058][457818] Saving new best policy, reward=16658.027!
|
124 |
+
[2023-01-11 19:14:08,042][457818] Fps is (10 sec: 242525.3, 60 sec: 246858.8, 300 sec: 219827.6). Total num frames: 28966912. Throughput: 0: 245851.0. Samples: 28692480. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
125 |
+
[2023-01-11 19:14:08,043][457818] Avg episode reward: [(0, '17339.150')]
|
126 |
+
[2023-01-11 19:14:08,050][457818] Saving new best policy, reward=17339.150!
|
127 |
+
[2023-01-11 19:14:13,043][457818] Fps is (10 sec: 249044.6, 60 sec: 246848.8, 300 sec: 220893.2). Total num frames: 30212096. Throughput: 0: 246605.2. Samples: 30203904. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
128 |
+
[2023-01-11 19:14:13,044][457818] Avg episode reward: [(0, '17339.150')]
|
129 |
+
[2023-01-11 19:14:18,042][457818] Fps is (10 sec: 249024.9, 60 sec: 246855.1, 300 sec: 221887.2). Total num frames: 31457280. Throughput: 0: 246104.8. Samples: 30932992. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
130 |
+
[2023-01-11 19:14:18,043][457818] Avg episode reward: [(0, '17867.812')]
|
131 |
+
[2023-01-11 19:14:18,050][457818] Saving new best policy, reward=17867.812!
|
132 |
+
[2023-01-11 19:14:23,043][457818] Fps is (10 sec: 249051.9, 60 sec: 246849.2, 300 sec: 222811.6). Total num frames: 32702464. Throughput: 0: 246303.8. Samples: 32440320. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
133 |
+
[2023-01-11 19:14:23,043][457818] Avg episode reward: [(0, '17867.812')]
|
134 |
+
[2023-01-11 19:14:28,041][457818] Fps is (10 sec: 249081.8, 60 sec: 246848.8, 300 sec: 223678.7). Total num frames: 33947648. Throughput: 0: 245985.0. Samples: 33937408. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
135 |
+
[2023-01-11 19:14:28,041][457818] Avg episode reward: [(0, '18219.049')]
|
136 |
+
[2023-01-11 19:14:28,048][457818] Saving new best policy, reward=18219.049!
|
137 |
+
[2023-01-11 19:14:33,043][457818] Fps is (10 sec: 249041.6, 60 sec: 246837.4, 300 sec: 224484.7). Total num frames: 35192832. Throughput: 0: 248262.6. Samples: 34668544. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
138 |
+
[2023-01-11 19:14:33,043][457818] Avg episode reward: [(0, '18429.346')]
|
139 |
+
[2023-01-11 19:14:33,049][457818] Saving new best policy, reward=18429.346!
|
140 |
+
[2023-01-11 19:14:38,076][457818] Fps is (10 sec: 254685.5, 60 sec: 247804.7, 300 sec: 225601.8). Total num frames: 36503552. Throughput: 0: 248204.4. Samples: 36184064. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
141 |
+
[2023-01-11 19:14:38,077][457818] Avg episode reward: [(0, '18429.346')]
|
142 |
+
[2023-01-11 19:14:43,040][457818] Fps is (10 sec: 249103.3, 60 sec: 246853.5, 300 sec: 225960.5). Total num frames: 37683200. Throughput: 0: 248876.0. Samples: 37675008. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
143 |
+
[2023-01-11 19:14:43,041][457818] Avg episode reward: [(0, '18644.486')]
|
144 |
+
[2023-01-11 19:14:43,052][457818] Saving new best policy, reward=18644.486!
|
145 |
+
[2023-01-11 19:14:48,042][457818] Fps is (10 sec: 243307.2, 60 sec: 247945.1, 300 sec: 226629.1). Total num frames: 38928384. Throughput: 0: 248907.7. Samples: 38410240. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
146 |
+
[2023-01-11 19:14:48,043][457818] Avg episode reward: [(0, '18897.219')]
|
147 |
+
[2023-01-11 19:14:48,050][457818] Saving new best policy, reward=18897.219!
|
148 |
+
[2023-01-11 19:14:51,825][457818] Early stopping after 4 epochs (8 sgd steps), loss delta 0.0000010
|
149 |
+
[2023-01-11 19:14:53,040][457818] Fps is (10 sec: 249043.9, 60 sec: 247949.4, 300 sec: 227266.4). Total num frames: 40173568. Throughput: 0: 249323.1. Samples: 39911424. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0)
|
150 |
+
[2023-01-11 19:14:53,040][457818] Avg episode reward: [(0, '18897.219')]
|
151 |
+
[2023-01-11 19:14:58,096][457818] Fps is (10 sec: 254218.6, 60 sec: 248818.9, 300 sec: 228154.6). Total num frames: 41484288. Throughput: 0: 248926.1. Samples: 41418752. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0)
|
152 |
+
[2023-01-11 19:14:58,097][457818] Avg episode reward: [(0, '19037.414')]
|
153 |
+
[2023-01-11 19:14:58,099][457818] Saving new best policy, reward=19037.414!
|
154 |
+
[2023-01-11 19:15:03,042][457818] Fps is (10 sec: 242436.1, 60 sec: 247953.3, 300 sec: 228078.7). Total num frames: 42598400. Throughput: 0: 247722.0. Samples: 42080256. Policy #0 lag: (min: 8.0, avg: 8.0, max: 8.0)
|
155 |
+
[2023-01-11 19:15:03,042][457818] Avg episode reward: [(0, '19037.414')]
|
156 |
+
[2023-01-11 19:15:08,042][457818] Fps is (10 sec: 237224.6, 60 sec: 247945.8, 300 sec: 228625.0). Total num frames: 43843584. Throughput: 0: 247540.8. Samples: 43579392. Policy #0 lag: (min: 8.0, avg: 8.0, max: 8.0)
|
157 |
+
[2023-01-11 19:15:08,042][457818] Avg episode reward: [(0, '19141.273')]
|
158 |
+
[2023-01-11 19:15:08,049][457818] Saving new best policy, reward=19141.273!
|
159 |
+
[2023-01-11 19:15:13,043][457818] Fps is (10 sec: 242457.3, 60 sec: 246855.3, 300 sec: 228809.5). Total num frames: 45023232. Throughput: 0: 245112.2. Samples: 44967936. Policy #0 lag: (min: 8.0, avg: 8.0, max: 8.0)
|
160 |
+
[2023-01-11 19:15:13,043][457818] Avg episode reward: [(0, '19141.273')]
|
161 |
+
[2023-01-11 19:15:18,042][457818] Fps is (10 sec: 242477.9, 60 sec: 246854.6, 300 sec: 229311.6). Total num frames: 46268416. Throughput: 0: 245672.5. Samples: 45723648. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
|
162 |
+
[2023-01-11 19:15:18,043][457818] Avg episode reward: [(0, '19317.938')]
|
163 |
+
[2023-01-11 19:15:18,049][457818] Saving new best policy, reward=19317.938!
|
164 |
+
[2023-01-11 19:15:23,041][457818] Fps is (10 sec: 242515.3, 60 sec: 245766.0, 300 sec: 229472.3). Total num frames: 47448064. Throughput: 0: 243582.3. Samples: 47136768. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
|
165 |
+
[2023-01-11 19:15:23,042][457818] Avg episode reward: [(0, '19508.840')]
|
166 |
+
[2023-01-11 19:15:23,057][457818] Saving new best policy, reward=19508.840!
|
167 |
+
[2023-01-11 19:15:28,044][457818] Fps is (10 sec: 235891.0, 60 sec: 244656.0, 300 sec: 229622.3). Total num frames: 48627712. Throughput: 0: 242008.4. Samples: 48566272. Policy #0 lag: (min: 6.0, avg: 6.0, max: 6.0)
|
168 |
+
[2023-01-11 19:15:28,044][457818] Avg episode reward: [(0, '19508.840')]
|
169 |
+
[2023-01-11 19:15:33,041][457818] Fps is (10 sec: 242488.1, 60 sec: 244673.7, 300 sec: 230072.7). Total num frames: 49872896. Throughput: 0: 242853.9. Samples: 49338368. Policy #0 lag: (min: 6.0, avg: 6.0, max: 6.0)
|
170 |
+
[2023-01-11 19:15:33,042][457818] Avg episode reward: [(0, '19688.805')]
|
171 |
+
[2023-01-11 19:15:33,053][457818] Saving new best policy, reward=19688.805!
|
172 |
+
[2023-01-11 19:15:38,040][457818] Fps is (10 sec: 242563.4, 60 sec: 242628.6, 300 sec: 230205.6). Total num frames: 51052544. Throughput: 0: 241524.0. Samples: 50780160. Policy #0 lag: (min: 6.0, avg: 6.0, max: 6.0)
|
173 |
+
[2023-01-11 19:15:38,041][457818] Avg episode reward: [(0, '19688.805')]
|
174 |
+
[2023-01-11 19:15:43,039][457818] Fps is (10 sec: 242522.6, 60 sec: 243577.2, 300 sec: 230621.6). Total num frames: 52297728. Throughput: 0: 241787.2. Samples: 52285440. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
175 |
+
[2023-01-11 19:15:43,040][457818] Avg episode reward: [(0, '19800.465')]
|
176 |
+
[2023-01-11 19:15:43,052][457818] Saving ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/04_v083_brax_basic_benchmark_see_2322090_env_halfcheetah_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000007978_52297728.pth...
|
177 |
+
[2023-01-11 19:15:43,291][457818] Saving new best policy, reward=19800.465!
|
178 |
+
[2023-01-11 19:15:48,043][457818] Fps is (10 sec: 235874.7, 60 sec: 241389.9, 300 sec: 230450.3). Total num frames: 53411840. Throughput: 0: 240156.7. Samples: 52887552. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
179 |
+
[2023-01-11 19:15:48,043][457818] Avg episode reward: [(0, '20117.086')]
|
180 |
+
[2023-01-11 19:15:48,050][457818] Saving new best policy, reward=20117.086!
|
181 |
+
[2023-01-11 19:15:53,072][457818] Fps is (10 sec: 235160.0, 60 sec: 241259.9, 300 sec: 230813.9). Total num frames: 54657024. Throughput: 0: 238862.5. Samples: 54335488. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
182 |
+
[2023-01-11 19:15:53,073][457818] Avg episode reward: [(0, '20117.086')]
|
183 |
+
[2023-01-11 19:15:58,041][457818] Fps is (10 sec: 249066.5, 60 sec: 240518.8, 300 sec: 231220.2). Total num frames: 55902208. Throughput: 0: 241670.7. Samples: 55842816. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
184 |
+
[2023-01-11 19:15:58,042][457818] Avg episode reward: [(0, '20120.244')]
|
185 |
+
[2023-01-11 19:15:58,049][457818] Saving new best policy, reward=20120.244!
|
186 |
+
[2023-01-11 19:16:02,787][457818] Early stopping after 2 epochs (4 sgd steps), loss delta 0.0000008
|
187 |
+
[2023-01-11 19:16:03,048][457818] Fps is (10 sec: 249643.5, 60 sec: 242457.6, 300 sec: 231575.1). Total num frames: 57147392. Throughput: 0: 241358.9. Samples: 56586240. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
188 |
+
[2023-01-11 19:16:03,049][457818] Avg episode reward: [(0, '20120.244')]
|
189 |
+
[2023-01-11 19:16:08,086][457818] Fps is (10 sec: 247928.1, 60 sec: 242303.9, 300 sec: 231886.7). Total num frames: 58392576. Throughput: 0: 242742.2. Samples: 58071040. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
190 |
+
[2023-01-11 19:16:08,087][457818] Avg episode reward: [(0, '20412.082')]
|
191 |
+
[2023-01-11 19:16:08,089][457818] Saving new best policy, reward=20412.082!
|
192 |
+
[2023-01-11 19:16:13,060][457818] Fps is (10 sec: 248745.2, 60 sec: 243506.5, 300 sec: 232244.5). Total num frames: 59637760. Throughput: 0: 244489.4. Samples: 59572224. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0)
|
193 |
+
[2023-01-11 19:16:13,060][457818] Avg episode reward: [(0, '20515.316')]
|
194 |
+
[2023-01-11 19:16:13,069][457818] Saving new best policy, reward=20515.316!
|
195 |
+
[2023-01-11 19:16:18,091][457818] Fps is (10 sec: 248923.0, 60 sec: 243377.7, 300 sec: 232537.7). Total num frames: 60882944. Throughput: 0: 243534.7. Samples: 60309504. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0)
|
196 |
+
[2023-01-11 19:16:18,092][457818] Avg episode reward: [(0, '20515.316')]
|
197 |
+
[2023-01-11 19:16:23,042][457818] Fps is (10 sec: 249467.3, 60 sec: 244663.4, 300 sec: 232889.0). Total num frames: 62128128. Throughput: 0: 245293.3. Samples: 61818880. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0)
|
198 |
+
[2023-01-11 19:16:23,044][457818] Avg episode reward: [(0, '20655.256')]
|
199 |
+
[2023-01-11 19:16:23,058][457818] Saving new best policy, reward=20655.256!
|
200 |
+
[2023-01-11 19:16:28,043][457818] Fps is (10 sec: 250227.3, 60 sec: 245761.9, 300 sec: 233185.5). Total num frames: 63373312. Throughput: 0: 245285.2. Samples: 63324160. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0)
|
201 |
+
[2023-01-11 19:16:28,044][457818] Avg episode reward: [(0, '20655.256')]
|
202 |
+
[2023-01-11 19:16:33,041][457818] Fps is (10 sec: 249067.2, 60 sec: 245759.8, 300 sec: 233473.5). Total num frames: 64618496. Throughput: 0: 248680.6. Samples: 64077824. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
203 |
+
[2023-01-11 19:16:33,042][457818] Avg episode reward: [(0, '20727.250')]
|
204 |
+
[2023-01-11 19:16:33,053][457818] Saving new best policy, reward=20727.250!
|
205 |
+
[2023-01-11 19:16:38,043][457818] Fps is (10 sec: 249038.3, 60 sec: 246840.8, 300 sec: 233748.1). Total num frames: 65863680. Throughput: 0: 250200.2. Samples: 65587200. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
206 |
+
[2023-01-11 19:16:38,044][457818] Avg episode reward: [(0, '20896.125')]
|
207 |
+
[2023-01-11 19:16:38,050][457818] Saving new best policy, reward=20896.125!
|
208 |
+
[2023-01-11 19:16:43,039][457818] Fps is (10 sec: 249082.6, 60 sec: 246853.0, 300 sec: 234017.7). Total num frames: 67108864. Throughput: 0: 249776.5. Samples: 67082240. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
209 |
+
[2023-01-11 19:16:43,040][457818] Avg episode reward: [(0, '20896.125')]
|
210 |
+
[2023-01-11 19:16:48,042][457818] Fps is (10 sec: 249052.9, 60 sec: 249037.6, 300 sec: 234272.6). Total num frames: 68354048. Throughput: 0: 249841.1. Samples: 67827712. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
211 |
+
[2023-01-11 19:16:48,043][457818] Avg episode reward: [(0, '21059.682')]
|
212 |
+
[2023-01-11 19:16:48,050][457818] Saving new best policy, reward=21059.682!
|
213 |
+
[2023-01-11 19:16:53,087][457818] Fps is (10 sec: 247850.0, 60 sec: 248974.6, 300 sec: 245773.7). Total num frames: 69599232. Throughput: 0: 249030.8. Samples: 69277696. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
214 |
+
[2023-01-11 19:16:53,088][457818] Avg episode reward: [(0, '21059.682')]
|
215 |
+
[2023-01-11 19:16:58,042][457818] Fps is (10 sec: 242488.8, 60 sec: 247941.3, 300 sec: 245650.5). Total num frames: 70778880. Throughput: 0: 248769.3. Samples: 70762496. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
216 |
+
[2023-01-11 19:16:58,043][457818] Avg episode reward: [(0, '21069.305')]
|
217 |
+
[2023-01-11 19:16:58,049][457818] Saving new best policy, reward=21069.305!
|
218 |
+
[2023-01-11 19:17:03,042][457818] Fps is (10 sec: 243572.6, 60 sec: 247967.0, 300 sec: 245711.7). Total num frames: 72024064. Throughput: 0: 248939.4. Samples: 71499776. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
219 |
+
[2023-01-11 19:17:03,043][457818] Avg episode reward: [(0, '21291.115')]
|
220 |
+
[2023-01-11 19:17:03,050][457818] Saving new best policy, reward=21291.115!
|
221 |
+
[2023-01-11 19:17:08,045][457818] Fps is (10 sec: 248975.5, 60 sec: 248116.1, 300 sec: 246834.2). Total num frames: 73269248. Throughput: 0: 247841.1. Samples: 72972288. Policy #0 lag: (min: 5.0, avg: 5.0, max: 5.0)
|
222 |
+
[2023-01-11 19:17:08,045][457818] Avg episode reward: [(0, '21291.115')]
|
223 |
+
[2023-01-11 19:17:13,042][457818] Fps is (10 sec: 249056.6, 60 sec: 248018.8, 300 sec: 246816.7). Total num frames: 74514432. Throughput: 0: 247770.4. Samples: 74473472. Policy #0 lag: (min: 5.0, avg: 5.0, max: 5.0)
|
224 |
+
[2023-01-11 19:17:13,042][457818] Avg episode reward: [(0, '21271.732')]
|
225 |
+
[2023-01-11 19:17:18,041][457818] Fps is (10 sec: 249125.4, 60 sec: 248149.7, 300 sec: 246815.4). Total num frames: 75759616. Throughput: 0: 247717.4. Samples: 75225088. Policy #0 lag: (min: 5.0, avg: 5.0, max: 5.0)
|
226 |
+
[2023-01-11 19:17:18,042][457818] Avg episode reward: [(0, '21271.732')]
|
227 |
+
[2023-01-11 19:17:23,040][457818] Fps is (10 sec: 249083.2, 60 sec: 247955.1, 300 sec: 246814.8). Total num frames: 77004800. Throughput: 0: 247188.6. Samples: 76709888. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
228 |
+
[2023-01-11 19:17:23,040][457818] Avg episode reward: [(0, '21196.262')]
|
229 |
+
[2023-01-11 19:17:28,043][457818] Fps is (10 sec: 248985.7, 60 sec: 247944.4, 300 sec: 246629.8). Total num frames: 78249984. Throughput: 0: 246831.4. Samples: 78190592. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
230 |
+
[2023-01-11 19:17:28,044][457818] Avg episode reward: [(0, '21287.096')]
|
231 |
+
[2023-01-11 19:17:33,079][457818] Fps is (10 sec: 248060.4, 60 sec: 247787.5, 300 sec: 246782.1). Total num frames: 79495168. Throughput: 0: 246605.2. Samples: 78934016. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
232 |
+
[2023-01-11 19:17:33,080][457818] Avg episode reward: [(0, '21287.096')]
|
233 |
+
[2023-01-11 19:17:38,091][457818] Fps is (10 sec: 247857.5, 60 sec: 247747.7, 300 sec: 246776.0). Total num frames: 80740352. Throughput: 0: 247561.1. Samples: 80418816. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
234 |
+
[2023-01-11 19:17:38,091][457818] Avg episode reward: [(0, '21565.305')]
|
235 |
+
[2023-01-11 19:17:38,094][457818] Saving new best policy, reward=21565.305!
|
236 |
+
[2023-01-11 19:17:43,044][457818] Fps is (10 sec: 249925.5, 60 sec: 247926.8, 300 sec: 246812.2). Total num frames: 81985536. Throughput: 0: 247936.6. Samples: 81920000. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0)
|
237 |
+
[2023-01-11 19:17:43,044][457818] Avg episode reward: [(0, '21661.523')]
|
238 |
+
[2023-01-11 19:17:43,053][457818] Saving ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/04_v083_brax_basic_benchmark_see_2322090_env_halfcheetah_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000012502_81985536.pth...
|
239 |
+
[2023-01-11 19:17:43,067][457818] Removing ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/04_v083_brax_basic_benchmark_see_2322090_env_halfcheetah_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000003490_22872064.pth
|
240 |
+
[2023-01-11 19:17:43,069][457818] Saving new best policy, reward=21661.523!
|
241 |
+
[2023-01-11 19:17:48,043][457818] Fps is (10 sec: 250231.8, 60 sec: 247942.2, 300 sec: 246815.6). Total num frames: 83230720. Throughput: 0: 248306.0. Samples: 82673664. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0)
|
242 |
+
[2023-01-11 19:17:48,044][457818] Avg episode reward: [(0, '21661.523')]
|
243 |
+
[2023-01-11 19:17:53,076][457818] Fps is (10 sec: 248241.7, 60 sec: 247992.3, 300 sec: 246787.6). Total num frames: 84475904. Throughput: 0: 248546.9. Samples: 84164608. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0)
|
244 |
+
[2023-01-11 19:17:53,076][457818] Avg episode reward: [(0, '21755.340')]
|
245 |
+
[2023-01-11 19:17:53,085][457818] Saving new best policy, reward=21755.340!
|
246 |
+
[2023-01-11 19:17:58,039][457818] Fps is (10 sec: 242578.1, 60 sec: 247957.5, 300 sec: 246596.2). Total num frames: 85655552. Throughput: 0: 248368.7. Samples: 85649408. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0)
|
247 |
+
[2023-01-11 19:17:58,040][457818] Avg episode reward: [(0, '21755.340')]
|
248 |
+
[2023-01-11 19:18:03,090][457818] Fps is (10 sec: 248681.0, 60 sec: 248839.9, 300 sec: 246776.5). Total num frames: 86966272. Throughput: 0: 247721.0. Samples: 86384640. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0)
|
249 |
+
[2023-01-11 19:18:03,091][457818] Avg episode reward: [(0, '21714.219')]
|
250 |
+
[2023-01-11 19:18:08,040][457818] Fps is (10 sec: 249000.9, 60 sec: 247961.7, 300 sec: 246594.8). Total num frames: 88145920. Throughput: 0: 248304.9. Samples: 87883776. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0)
|
251 |
+
[2023-01-11 19:18:08,041][457818] Avg episode reward: [(0, '21863.023')]
|
252 |
+
[2023-01-11 19:18:08,048][457818] Saving new best policy, reward=21863.023!
|
253 |
+
[2023-01-11 19:18:13,042][457818] Fps is (10 sec: 243639.5, 60 sec: 247941.2, 300 sec: 246593.7). Total num frames: 89391104. Throughput: 0: 248130.1. Samples: 89356288. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0)
|
254 |
+
[2023-01-11 19:18:13,043][457818] Avg episode reward: [(0, '21863.023')]
|
255 |
+
[2023-01-11 19:18:18,043][457818] Fps is (10 sec: 248970.5, 60 sec: 247936.0, 300 sec: 246592.1). Total num frames: 90636288. Throughput: 0: 248689.8. Samples: 90116096. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0)
|
256 |
+
[2023-01-11 19:18:18,044][457818] Avg episode reward: [(0, '21893.676')]
|
257 |
+
[2023-01-11 19:18:18,046][457818] Saving new best policy, reward=21893.676!
|
258 |
+
[2023-01-11 19:18:23,078][457818] Fps is (10 sec: 254683.4, 60 sec: 248878.0, 300 sec: 246783.2). Total num frames: 91947008. Throughput: 0: 249061.0. Samples: 91623424. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0)
|
259 |
+
[2023-01-11 19:18:23,079][457818] Avg episode reward: [(0, '21893.676')]
|
260 |
+
[2023-01-11 19:18:28,043][457818] Fps is (10 sec: 249050.8, 60 sec: 247946.8, 300 sec: 246590.1). Total num frames: 93126656. Throughput: 0: 249042.5. Samples: 93126656. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
261 |
+
[2023-01-11 19:18:28,043][457818] Avg episode reward: [(0, '21788.742')]
|
262 |
+
[2023-01-11 19:18:33,041][457818] Fps is (10 sec: 243376.3, 60 sec: 248100.7, 300 sec: 246593.9). Total num frames: 94371840. Throughput: 0: 248317.1. Samples: 93847552. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
263 |
+
[2023-01-11 19:18:33,042][457818] Avg episode reward: [(0, '22082.066')]
|
264 |
+
[2023-01-11 19:18:33,054][457818] Saving new best policy, reward=22082.066!
|
265 |
+
[2023-01-11 19:18:38,042][457818] Fps is (10 sec: 249041.1, 60 sec: 248144.3, 300 sec: 246591.2). Total num frames: 95617024. Throughput: 0: 248856.5. Samples: 95354880. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
266 |
+
[2023-01-11 19:18:38,043][457818] Avg episode reward: [(0, '22082.066')]
|
267 |
+
[2023-01-11 19:18:43,040][457818] Fps is (10 sec: 249063.0, 60 sec: 247958.0, 300 sec: 246817.0). Total num frames: 96862208. Throughput: 0: 248893.0. Samples: 96849920. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
268 |
+
[2023-01-11 19:18:43,041][457818] Avg episode reward: [(0, '22204.691')]
|
269 |
+
[2023-01-11 19:18:43,055][457818] Saving new best policy, reward=22204.691!
|
270 |
+
[2023-01-11 19:18:48,044][457818] Fps is (10 sec: 255562.0, 60 sec: 249034.4, 300 sec: 247035.1). Total num frames: 98172928. Throughput: 0: 249977.4. Samples: 97622016. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
271 |
+
[2023-01-11 19:18:48,044][457818] Avg episode reward: [(0, '22204.691')]
|
272 |
+
[2023-01-11 19:18:53,041][457818] Fps is (10 sec: 255574.3, 60 sec: 249180.7, 300 sec: 247039.7). Total num frames: 99418112. Throughput: 0: 249625.6. Samples: 99117056. Policy #0 lag: (min: 9.0, avg: 9.0, max: 9.0)
|
273 |
+
[2023-01-11 19:18:53,042][457818] Avg episode reward: [(0, '22253.053')]
|
274 |
+
[2023-01-11 19:18:53,054][457818] Saving new best policy, reward=22253.053!
|
275 |
+
[2023-01-11 19:18:55,695][457818] Saving ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/04_v083_brax_basic_benchmark_see_2322090_env_halfcheetah_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000015262_100073472.pth...
|
276 |
+
[2023-01-11 19:18:55,729][457818] Removing ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/04_v083_brax_basic_benchmark_see_2322090_env_halfcheetah_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000007978_52297728.pth
|
277 |
+
[2023-01-11 19:18:55,731][457818] Stopping Batcher_0...
|
278 |
+
[2023-01-11 19:18:55,732][457818] Stopping InferenceWorker_p0-w0...
|
279 |
+
[2023-01-11 19:18:55,732][457818] Stopping RolloutWorker_w0...
|
280 |
+
[2023-01-11 19:18:55,732][457818] Component Batcher_0 stopped!
|
281 |
+
[2023-01-11 19:18:55,733][457818] Saving ./train_dir/v083_brax_basic_benchmark/v083_brax_basic_benchmark_slurm/04_v083_brax_basic_benchmark_see_2322090_env_halfcheetah_u.rnn_False_n.epo_5/checkpoint_p0/checkpoint_000015262_100073472.pth...
|
282 |
+
[2023-01-11 19:18:55,748][457818] Stopping LearnerWorker_p0...
|
283 |
+
[2023-01-11 19:18:55,749][457818] Component InferenceWorker_p0-w0 stopped!
|
284 |
+
[2023-01-11 19:18:55,749][457818] Component RolloutWorker_w0 stopped!
|
285 |
+
[2023-01-11 19:18:55,749][457818] Component LearnerWorker_p0 stopped!
|
286 |
+
[2023-01-11 19:18:55,749][457818] Batcher 0 profile tree view:
|
287 |
+
batching: 0.3653, releasing_batches: 0.0719
|
288 |
+
[2023-01-11 19:18:55,749][457818] InferenceWorker_p0-w0 profile tree view:
|
289 |
+
update_model: 0.4851
|
290 |
+
one_step: 0.0012
|
291 |
+
handle_policy_step: 60.9075
|
292 |
+
deserialize: 0.5112, stack: 0.0665, obs_to_device_normalize: 10.8553, forward: 38.7097, prepare_outputs: 6.6728, send_messages: 0.8327
|
293 |
+
[2023-01-11 19:18:55,750][457818] Learner 0 profile tree view:
|
294 |
+
misc: 0.0056, prepare_batch: 5.7227
|
295 |
+
train: 89.4918
|
296 |
+
epoch_init: 0.0671, minibatch_init: 1.0447, losses_postprocess: 2.4470, kl_divergence: 5.8758, after_optimizer: 0.3590
|
297 |
+
calculate_losses: 18.5979
|
298 |
+
losses_init: 0.0364, forward_head: 3.0156, bptt_initial: 0.1332, bptt: 0.1403, tail: 9.1234, advantages_returns: 1.1915, losses: 3.6083
|
299 |
+
update: 59.1597
|
300 |
+
clip: 8.9046
|
301 |
+
[2023-01-11 19:18:55,750][457818] RolloutWorker_w0 profile tree view:
|
302 |
+
wait_for_trajectories: 0.0901, enqueue_policy_requests: 5.6020, process_policy_outputs: 3.4572, env_step: 223.0124, finalize_trajectories: 0.1451, complete_rollouts: 0.0697
|
303 |
+
post_env_step: 14.4411
|
304 |
+
process_env_step: 2.8677
|
305 |
+
[2023-01-11 19:18:55,750][457818] Loop Runner_EvtLoop terminating...
|
306 |
+
[2023-01-11 19:18:55,750][457818] Runner profile tree view:
|
307 |
+
main_loop: 427.0721
|
308 |
+
[2023-01-11 19:18:55,751][457818] Collected {0: 100073472}, FPS: 234324.5
|