andrewzhang505
commited on
Commit
•
206942c
1
Parent(s):
b16d53e
Upload . with huggingface_hub
Browse files- .summary/0/events.out.tfevents.1670186873.andrew-gpu +3 -0
- README.md +22 -0
- checkpoint_p0/best_000000928_475136_reward_321.313.pth +3 -0
- checkpoint_p0/checkpoint_000000928_475136.pth +3 -0
- checkpoint_p0/checkpoint_000000952_487424.pth +3 -0
- config.json +130 -0
- git.diff +66 -0
- sf_log.txt +263 -0
.summary/0/events.out.tfevents.1670186873.andrew-gpu
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e43b5413e94b50c334b2b093667bd2d720ba516a657964401af2294afa3c992b
|
3 |
+
size 70933
|
README.md
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
library_name: sample-factory
|
3 |
+
tags:
|
4 |
+
- deep-reinforcement-learning
|
5 |
+
- reinforcement-learning
|
6 |
+
- sample-factory
|
7 |
+
---
|
8 |
+
|
9 |
+
A(n) **APPO** model trained on the **mujoco_ant** environment.
|
10 |
+
|
11 |
+
This model was trained using Sample-Factory 2.0: https://github.com/alex-petrenko/sample-factory.
|
12 |
+
Documentation for how to use Sample-Factory can be found at https://www.samplefactory.dev/
|
13 |
+
|
14 |
+
|
15 |
+
## Downloading the model
|
16 |
+
|
17 |
+
After installing Sample-Factory, download the model with:
|
18 |
+
```
|
19 |
+
python -m sample_factory.huggingface.load_from_hub -r andrewzhang505/ant_test2
|
20 |
+
```
|
21 |
+
|
22 |
+
|
checkpoint_p0/best_000000928_475136_reward_321.313.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:78a270ae0aaa4d62d1cc85c340c7bc1f5917af859419a227346a58323c755e74
|
3 |
+
size 89730
|
checkpoint_p0/checkpoint_000000928_475136.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:920e8e287f9e8564ad2da100a86f8050bdfa28428635072a4dc315076a25a387
|
3 |
+
size 89730
|
checkpoint_p0/checkpoint_000000952_487424.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:df941a254fb9b62986160dad41587b474e099476b2f7e76299e09883dd0142dd
|
3 |
+
size 89730
|
config.json
ADDED
@@ -0,0 +1,130 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"help": false,
|
3 |
+
"algo": "APPO",
|
4 |
+
"env": "mujoco_ant",
|
5 |
+
"experiment": "ant_test",
|
6 |
+
"train_dir": "/home/andrew_huggingface_co/sample-factory/train_dir",
|
7 |
+
"restart_behavior": "resume",
|
8 |
+
"device": "gpu",
|
9 |
+
"seed": null,
|
10 |
+
"num_policies": 1,
|
11 |
+
"async_rl": false,
|
12 |
+
"serial_mode": false,
|
13 |
+
"batched_sampling": false,
|
14 |
+
"num_batches_to_accumulate": 2,
|
15 |
+
"worker_num_splits": 2,
|
16 |
+
"policy_workers_per_policy": 1,
|
17 |
+
"max_policy_lag": 1000,
|
18 |
+
"num_workers": 8,
|
19 |
+
"num_envs_per_worker": 8,
|
20 |
+
"batch_size": 1024,
|
21 |
+
"num_batches_per_epoch": 4,
|
22 |
+
"num_epochs": 2,
|
23 |
+
"rollout": 64,
|
24 |
+
"recurrence": 1,
|
25 |
+
"shuffle_minibatches": false,
|
26 |
+
"gamma": 0.99,
|
27 |
+
"reward_scale": 1,
|
28 |
+
"reward_clip": 1000.0,
|
29 |
+
"value_bootstrap": true,
|
30 |
+
"normalize_returns": true,
|
31 |
+
"exploration_loss_coeff": 0.0,
|
32 |
+
"value_loss_coeff": 1.3,
|
33 |
+
"kl_loss_coeff": 0.1,
|
34 |
+
"exploration_loss": "entropy",
|
35 |
+
"gae_lambda": 0.95,
|
36 |
+
"ppo_clip_ratio": 0.2,
|
37 |
+
"ppo_clip_value": 1.0,
|
38 |
+
"with_vtrace": false,
|
39 |
+
"vtrace_rho": 1.0,
|
40 |
+
"vtrace_c": 1.0,
|
41 |
+
"optimizer": "adam",
|
42 |
+
"adam_eps": 1e-06,
|
43 |
+
"adam_beta1": 0.9,
|
44 |
+
"adam_beta2": 0.999,
|
45 |
+
"max_grad_norm": 3.5,
|
46 |
+
"learning_rate": 0.00295,
|
47 |
+
"lr_schedule": "linear_decay",
|
48 |
+
"lr_schedule_kl_threshold": 0.008,
|
49 |
+
"obs_subtract_mean": 0.0,
|
50 |
+
"obs_scale": 1.0,
|
51 |
+
"normalize_input": true,
|
52 |
+
"normalize_input_keys": null,
|
53 |
+
"decorrelate_experience_max_seconds": 0,
|
54 |
+
"decorrelate_envs_on_one_worker": true,
|
55 |
+
"actor_worker_gpus": [],
|
56 |
+
"set_workers_cpu_affinity": true,
|
57 |
+
"force_envs_single_thread": false,
|
58 |
+
"default_niceness": 0,
|
59 |
+
"log_to_file": true,
|
60 |
+
"experiment_summaries_interval": 3,
|
61 |
+
"flush_summaries_interval": 30,
|
62 |
+
"stats_avg": 100,
|
63 |
+
"summaries_use_frameskip": true,
|
64 |
+
"heartbeat_interval": 20,
|
65 |
+
"heartbeat_reporting_interval": 180,
|
66 |
+
"train_for_env_steps": 10000000,
|
67 |
+
"train_for_seconds": 10000000000,
|
68 |
+
"save_every_sec": 15,
|
69 |
+
"keep_checkpoints": 2,
|
70 |
+
"load_checkpoint_kind": "latest",
|
71 |
+
"save_milestones_sec": -1,
|
72 |
+
"save_best_every_sec": 5,
|
73 |
+
"save_best_metric": "reward",
|
74 |
+
"save_best_after": 100000,
|
75 |
+
"benchmark": false,
|
76 |
+
"encoder_mlp_layers": [
|
77 |
+
64,
|
78 |
+
64
|
79 |
+
],
|
80 |
+
"encoder_conv_architecture": "convnet_simple",
|
81 |
+
"encoder_conv_mlp_layers": [
|
82 |
+
512
|
83 |
+
],
|
84 |
+
"use_rnn": false,
|
85 |
+
"rnn_size": 512,
|
86 |
+
"rnn_type": "gru",
|
87 |
+
"rnn_num_layers": 1,
|
88 |
+
"decoder_mlp_layers": [],
|
89 |
+
"nonlinearity": "tanh",
|
90 |
+
"policy_initialization": "torch_default",
|
91 |
+
"policy_init_gain": 1.0,
|
92 |
+
"actor_critic_share_weights": true,
|
93 |
+
"adaptive_stddev": false,
|
94 |
+
"continuous_tanh_scale": 0.0,
|
95 |
+
"initial_stddev": 1.0,
|
96 |
+
"use_env_info_cache": false,
|
97 |
+
"env_gpu_actions": false,
|
98 |
+
"env_gpu_observations": true,
|
99 |
+
"env_frameskip": 1,
|
100 |
+
"env_framestack": 1,
|
101 |
+
"pixel_format": "CHW",
|
102 |
+
"use_record_episode_statistics": false,
|
103 |
+
"with_wandb": false,
|
104 |
+
"wandb_user": null,
|
105 |
+
"wandb_project": "sample_factory",
|
106 |
+
"wandb_group": null,
|
107 |
+
"wandb_job_type": "SF",
|
108 |
+
"wandb_tags": [],
|
109 |
+
"with_pbt": false,
|
110 |
+
"pbt_mix_policies_in_one_env": true,
|
111 |
+
"pbt_period_env_steps": 5000000,
|
112 |
+
"pbt_start_mutation": 20000000,
|
113 |
+
"pbt_replace_fraction": 0.3,
|
114 |
+
"pbt_mutation_rate": 0.15,
|
115 |
+
"pbt_replace_reward_gap": 0.1,
|
116 |
+
"pbt_replace_reward_gap_absolute": 1e-06,
|
117 |
+
"pbt_optimize_gamma": false,
|
118 |
+
"pbt_target_objective": "true_objective",
|
119 |
+
"pbt_perturb_min": 1.1,
|
120 |
+
"pbt_perturb_max": 1.5,
|
121 |
+
"command_line": "--algo=APPO --env=mujoco_ant --experiment=ant_test",
|
122 |
+
"cli_args": {
|
123 |
+
"algo": "APPO",
|
124 |
+
"env": "mujoco_ant",
|
125 |
+
"experiment": "ant_test"
|
126 |
+
},
|
127 |
+
"git_hash": "162ce7329569a2a3abaa26ecb8162a74f833b63e",
|
128 |
+
"git_repo_name": "https://github.com/andrewzhang505/sample-factory.git",
|
129 |
+
"train_script": "sf_examples.mujoco.train_mujoco"
|
130 |
+
}
|
git.diff
ADDED
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
diff --git a/sample_factory/huggingface/huggingface_utils.py b/sample_factory/huggingface/huggingface_utils.py
|
2 |
+
index b6b10fc2..88ebd2c4 100644
|
3 |
+
--- a/sample_factory/huggingface/huggingface_utils.py
|
4 |
+
+++ b/sample_factory/huggingface/huggingface_utils.py
|
5 |
+
@@ -117,27 +117,40 @@ def push_to_hf(dir_path: str, repo_name: str, num_policies: int = 1):
|
6 |
+
exist_ok=True,
|
7 |
+
)
|
8 |
+
|
9 |
+
- # Upload folders
|
10 |
+
- folders = [".summary"]
|
11 |
+
- for policy_id in range(num_policies):
|
12 |
+
- folders.append(f"checkpoint_p{policy_id}")
|
13 |
+
- for f in folders:
|
14 |
+
- if os.path.exists(os.path.join(dir_path, f)):
|
15 |
+
- upload_folder(
|
16 |
+
- repo_id=repo_name,
|
17 |
+
- folder_path=os.path.join(dir_path, f),
|
18 |
+
- path_in_repo=f,
|
19 |
+
- )
|
20 |
+
-
|
21 |
+
- # Upload files
|
22 |
+
- files = ["config.json", "README.md", "replay.mp4"]
|
23 |
+
- for f in files:
|
24 |
+
- if os.path.exists(os.path.join(dir_path, f)):
|
25 |
+
- upload_file(
|
26 |
+
- repo_id=repo_name,
|
27 |
+
- path_or_fileobj=os.path.join(dir_path, f),
|
28 |
+
- path_in_repo=f,
|
29 |
+
- )
|
30 |
+
+ upload_folder(
|
31 |
+
+ repo_id=repo_name,
|
32 |
+
+ folder_path=dir_path,
|
33 |
+
+ path_in_repo=f,
|
34 |
+
+ allow_patterns=[
|
35 |
+
+ ".summary/*",
|
36 |
+
+ "config.json",
|
37 |
+
+ "README.md",
|
38 |
+
+ "replay.mp4",
|
39 |
+
+ ]
|
40 |
+
+ + [f"checkpoint_p{policy_id}/*" for policy_id in range(num_policies)],
|
41 |
+
+ )
|
42 |
+
+
|
43 |
+
+ # # Upload folders
|
44 |
+
+ # folders = [".summary"]
|
45 |
+
+ # for policy_id in range(num_policies):
|
46 |
+
+ # folders.append(f"checkpoint_p{policy_id}")
|
47 |
+
+ # for f in folders:
|
48 |
+
+ # if os.path.exists(os.path.join(dir_path, f)):
|
49 |
+
+ # upload_folder(
|
50 |
+
+ # repo_id=repo_name,
|
51 |
+
+ # folder_path=os.path.join(dir_path, f),
|
52 |
+
+ # path_in_repo=f,
|
53 |
+
+ # )
|
54 |
+
+
|
55 |
+
+ # # Upload files
|
56 |
+
+ # files = ["config.json", "README.md", "replay.mp4"]
|
57 |
+
+ # for f in files:
|
58 |
+
+ # if os.path.exists(os.path.join(dir_path, f)):
|
59 |
+
+ # upload_file(
|
60 |
+
+ # repo_id=repo_name,
|
61 |
+
+ # path_or_fileobj=os.path.join(dir_path, f),
|
62 |
+
+ # path_in_repo=f,
|
63 |
+
+ # )
|
64 |
+
|
65 |
+
log.info(f"The model has been pushed to {repo_url}")
|
66 |
+
|
sf_log.txt
ADDED
@@ -0,0 +1,263 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[2022-12-04 20:47:56,451][04266] Saving configuration to /home/andrew_huggingface_co/sample-factory/train_dir/ant_test/config.json...
|
2 |
+
[2022-12-04 20:47:56,464][04266] Rollout worker 0 uses device cpu
|
3 |
+
[2022-12-04 20:47:56,464][04266] Rollout worker 1 uses device cpu
|
4 |
+
[2022-12-04 20:47:56,464][04266] Rollout worker 2 uses device cpu
|
5 |
+
[2022-12-04 20:47:56,465][04266] Rollout worker 3 uses device cpu
|
6 |
+
[2022-12-04 20:47:56,465][04266] Rollout worker 4 uses device cpu
|
7 |
+
[2022-12-04 20:47:56,465][04266] Rollout worker 5 uses device cpu
|
8 |
+
[2022-12-04 20:47:56,465][04266] Rollout worker 6 uses device cpu
|
9 |
+
[2022-12-04 20:47:56,465][04266] Rollout worker 7 uses device cpu
|
10 |
+
[2022-12-04 20:47:56,465][04266] In synchronous mode, we only accumulate one batch. Setting num_batches_to_accumulate to 1
|
11 |
+
[2022-12-04 20:47:56,487][04266] Using GPUs [0] for process 0 (actually maps to GPUs [0])
|
12 |
+
[2022-12-04 20:47:56,487][04266] InferenceWorker_p0-w0: min num requests: 2
|
13 |
+
[2022-12-04 20:47:56,519][04266] Starting all processes...
|
14 |
+
[2022-12-04 20:47:56,520][04266] Starting process learner_proc0
|
15 |
+
[2022-12-04 20:47:56,570][04266] Starting all processes...
|
16 |
+
[2022-12-04 20:47:56,577][04266] Starting process inference_proc0-0
|
17 |
+
[2022-12-04 20:47:56,577][04266] Starting process rollout_proc0
|
18 |
+
[2022-12-04 20:47:56,578][04266] Starting process rollout_proc1
|
19 |
+
[2022-12-04 20:47:56,578][04266] Starting process rollout_proc2
|
20 |
+
[2022-12-04 20:47:56,578][04266] Starting process rollout_proc3
|
21 |
+
[2022-12-04 20:47:56,579][04266] Starting process rollout_proc4
|
22 |
+
[2022-12-04 20:47:56,579][04266] Starting process rollout_proc5
|
23 |
+
[2022-12-04 20:47:56,584][04266] Starting process rollout_proc6
|
24 |
+
[2022-12-04 20:47:56,591][04266] Starting process rollout_proc7
|
25 |
+
[2022-12-04 20:47:58,489][04366] Worker 5 uses CPU cores [5]
|
26 |
+
[2022-12-04 20:47:58,561][04361] Worker 0 uses CPU cores [0]
|
27 |
+
[2022-12-04 20:47:58,611][04360] Using GPUs [0] for process 0 (actually maps to GPUs [0])
|
28 |
+
[2022-12-04 20:47:58,612][04360] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for inference process 0
|
29 |
+
[2022-12-04 20:47:58,705][04367] Worker 4 uses CPU cores [4]
|
30 |
+
[2022-12-04 20:47:58,733][04363] Worker 6 uses CPU cores [6]
|
31 |
+
[2022-12-04 20:47:58,765][04368] Worker 2 uses CPU cores [2]
|
32 |
+
[2022-12-04 20:47:58,779][04365] Worker 3 uses CPU cores [3]
|
33 |
+
[2022-12-04 20:47:58,824][04340] Using GPUs [0] for process 0 (actually maps to GPUs [0])
|
34 |
+
[2022-12-04 20:47:58,825][04340] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for learning process 0
|
35 |
+
[2022-12-04 20:47:58,834][04364] Worker 7 uses CPU cores [7]
|
36 |
+
[2022-12-04 20:47:58,885][04362] Worker 1 uses CPU cores [1]
|
37 |
+
[2022-12-04 20:47:59,427][04360] Num visible devices: 1
|
38 |
+
[2022-12-04 20:47:59,428][04340] Num visible devices: 1
|
39 |
+
[2022-12-04 20:47:59,446][04340] Starting seed is not provided
|
40 |
+
[2022-12-04 20:47:59,446][04340] Using GPUs [0] for process 0 (actually maps to GPUs [0])
|
41 |
+
[2022-12-04 20:47:59,446][04340] Initializing actor-critic model on device cuda:0
|
42 |
+
[2022-12-04 20:47:59,446][04340] RunningMeanStd input shape: (27,)
|
43 |
+
[2022-12-04 20:47:59,447][04340] RunningMeanStd input shape: (1,)
|
44 |
+
[2022-12-04 20:47:59,522][04340] Created Actor Critic model with architecture:
|
45 |
+
[2022-12-04 20:47:59,522][04340] ActorCriticSharedWeights(
|
46 |
+
(obs_normalizer): ObservationNormalizer(
|
47 |
+
(running_mean_std): RunningMeanStdDictInPlace(
|
48 |
+
(running_mean_std): ModuleDict(
|
49 |
+
(obs): RunningMeanStdInPlace()
|
50 |
+
)
|
51 |
+
)
|
52 |
+
)
|
53 |
+
(returns_normalizer): RecursiveScriptModule(original_name=RunningMeanStdInPlace)
|
54 |
+
(encoder): MultiInputEncoder(
|
55 |
+
(encoders): ModuleDict(
|
56 |
+
(obs): MlpEncoder(
|
57 |
+
(mlp_head): RecursiveScriptModule(
|
58 |
+
original_name=Sequential
|
59 |
+
(0): RecursiveScriptModule(original_name=Linear)
|
60 |
+
(1): RecursiveScriptModule(original_name=Tanh)
|
61 |
+
(2): RecursiveScriptModule(original_name=Linear)
|
62 |
+
(3): RecursiveScriptModule(original_name=Tanh)
|
63 |
+
)
|
64 |
+
)
|
65 |
+
)
|
66 |
+
)
|
67 |
+
(core): ModelCoreIdentity()
|
68 |
+
(decoder): MlpDecoder(
|
69 |
+
(mlp): Identity()
|
70 |
+
)
|
71 |
+
(critic_linear): Linear(in_features=64, out_features=1, bias=True)
|
72 |
+
(action_parameterization): ActionParameterizationContinuousNonAdaptiveStddev(
|
73 |
+
(distribution_linear): Linear(in_features=64, out_features=8, bias=True)
|
74 |
+
)
|
75 |
+
)
|
76 |
+
[2022-12-04 20:48:03,416][04340] Using optimizer <class 'torch.optim.adam.Adam'>
|
77 |
+
[2022-12-04 20:48:03,417][04340] No checkpoints found
|
78 |
+
[2022-12-04 20:48:03,417][04340] Did not load from checkpoint, starting from scratch!
|
79 |
+
[2022-12-04 20:48:03,417][04340] Initialized policy 0 weights for model version 0
|
80 |
+
[2022-12-04 20:48:03,422][04340] LearnerWorker_p0 finished initialization!
|
81 |
+
[2022-12-04 20:48:03,424][04340] Using GPUs [0] for process 0 (actually maps to GPUs [0])
|
82 |
+
[2022-12-04 20:48:03,551][04360] RunningMeanStd input shape: (27,)
|
83 |
+
[2022-12-04 20:48:03,552][04360] RunningMeanStd input shape: (1,)
|
84 |
+
[2022-12-04 20:48:03,650][04266] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 0. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
|
85 |
+
[2022-12-04 20:48:07,105][04266] Inference worker 0-0 is ready!
|
86 |
+
[2022-12-04 20:48:07,105][04266] All inference workers are ready! Signal rollout workers to start!
|
87 |
+
[2022-12-04 20:48:07,303][04364] Decorrelating experience for 0 frames...
|
88 |
+
[2022-12-04 20:48:07,303][04362] Decorrelating experience for 0 frames...
|
89 |
+
[2022-12-04 20:48:07,305][04363] Decorrelating experience for 0 frames...
|
90 |
+
[2022-12-04 20:48:07,306][04362] Decorrelating experience for 64 frames...
|
91 |
+
[2022-12-04 20:48:07,305][04367] Decorrelating experience for 0 frames...
|
92 |
+
[2022-12-04 20:48:07,305][04364] Decorrelating experience for 64 frames...
|
93 |
+
[2022-12-04 20:48:07,305][04361] Decorrelating experience for 0 frames...
|
94 |
+
[2022-12-04 20:48:07,305][04368] Decorrelating experience for 0 frames...
|
95 |
+
[2022-12-04 20:48:07,306][04366] Decorrelating experience for 0 frames...
|
96 |
+
[2022-12-04 20:48:07,307][04367] Decorrelating experience for 64 frames...
|
97 |
+
[2022-12-04 20:48:07,307][04363] Decorrelating experience for 64 frames...
|
98 |
+
[2022-12-04 20:48:07,307][04365] Decorrelating experience for 0 frames...
|
99 |
+
[2022-12-04 20:48:07,308][04368] Decorrelating experience for 64 frames...
|
100 |
+
[2022-12-04 20:48:07,308][04366] Decorrelating experience for 64 frames...
|
101 |
+
[2022-12-04 20:48:07,308][04361] Decorrelating experience for 64 frames...
|
102 |
+
[2022-12-04 20:48:07,309][04365] Decorrelating experience for 64 frames...
|
103 |
+
[2022-12-04 20:48:07,359][04364] Decorrelating experience for 128 frames...
|
104 |
+
[2022-12-04 20:48:07,360][04363] Decorrelating experience for 128 frames...
|
105 |
+
[2022-12-04 20:48:07,362][04366] Decorrelating experience for 128 frames...
|
106 |
+
[2022-12-04 20:48:07,361][04362] Decorrelating experience for 128 frames...
|
107 |
+
[2022-12-04 20:48:07,362][04361] Decorrelating experience for 128 frames...
|
108 |
+
[2022-12-04 20:48:07,362][04365] Decorrelating experience for 128 frames...
|
109 |
+
[2022-12-04 20:48:07,362][04367] Decorrelating experience for 128 frames...
|
110 |
+
[2022-12-04 20:48:07,362][04368] Decorrelating experience for 128 frames...
|
111 |
+
[2022-12-04 20:48:07,467][04363] Decorrelating experience for 192 frames...
|
112 |
+
[2022-12-04 20:48:07,467][04364] Decorrelating experience for 192 frames...
|
113 |
+
[2022-12-04 20:48:07,469][04367] Decorrelating experience for 192 frames...
|
114 |
+
[2022-12-04 20:48:07,469][04365] Decorrelating experience for 192 frames...
|
115 |
+
[2022-12-04 20:48:07,470][04366] Decorrelating experience for 192 frames...
|
116 |
+
[2022-12-04 20:48:07,471][04361] Decorrelating experience for 192 frames...
|
117 |
+
[2022-12-04 20:48:07,472][04362] Decorrelating experience for 192 frames...
|
118 |
+
[2022-12-04 20:48:07,474][04368] Decorrelating experience for 192 frames...
|
119 |
+
[2022-12-04 20:48:07,650][04364] Decorrelating experience for 256 frames...
|
120 |
+
[2022-12-04 20:48:07,658][04363] Decorrelating experience for 256 frames...
|
121 |
+
[2022-12-04 20:48:07,658][04365] Decorrelating experience for 256 frames...
|
122 |
+
[2022-12-04 20:48:07,659][04367] Decorrelating experience for 256 frames...
|
123 |
+
[2022-12-04 20:48:07,659][04362] Decorrelating experience for 256 frames...
|
124 |
+
[2022-12-04 20:48:07,661][04361] Decorrelating experience for 256 frames...
|
125 |
+
[2022-12-04 20:48:07,662][04366] Decorrelating experience for 256 frames...
|
126 |
+
[2022-12-04 20:48:07,664][04368] Decorrelating experience for 256 frames...
|
127 |
+
[2022-12-04 20:48:07,856][04364] Decorrelating experience for 320 frames...
|
128 |
+
[2022-12-04 20:48:07,863][04363] Decorrelating experience for 320 frames...
|
129 |
+
[2022-12-04 20:48:07,864][04365] Decorrelating experience for 320 frames...
|
130 |
+
[2022-12-04 20:48:07,866][04362] Decorrelating experience for 320 frames...
|
131 |
+
[2022-12-04 20:48:07,866][04361] Decorrelating experience for 320 frames...
|
132 |
+
[2022-12-04 20:48:07,871][04366] Decorrelating experience for 320 frames...
|
133 |
+
[2022-12-04 20:48:07,872][04367] Decorrelating experience for 320 frames...
|
134 |
+
[2022-12-04 20:48:07,877][04368] Decorrelating experience for 320 frames...
|
135 |
+
[2022-12-04 20:48:08,114][04364] Decorrelating experience for 384 frames...
|
136 |
+
[2022-12-04 20:48:08,119][04363] Decorrelating experience for 384 frames...
|
137 |
+
[2022-12-04 20:48:08,121][04365] Decorrelating experience for 384 frames...
|
138 |
+
[2022-12-04 20:48:08,123][04361] Decorrelating experience for 384 frames...
|
139 |
+
[2022-12-04 20:48:08,128][04362] Decorrelating experience for 384 frames...
|
140 |
+
[2022-12-04 20:48:08,129][04366] Decorrelating experience for 384 frames...
|
141 |
+
[2022-12-04 20:48:08,131][04367] Decorrelating experience for 384 frames...
|
142 |
+
[2022-12-04 20:48:08,144][04368] Decorrelating experience for 384 frames...
|
143 |
+
[2022-12-04 20:48:08,431][04364] Decorrelating experience for 448 frames...
|
144 |
+
[2022-12-04 20:48:08,433][04363] Decorrelating experience for 448 frames...
|
145 |
+
[2022-12-04 20:48:08,437][04365] Decorrelating experience for 448 frames...
|
146 |
+
[2022-12-04 20:48:08,437][04361] Decorrelating experience for 448 frames...
|
147 |
+
[2022-12-04 20:48:08,440][04362] Decorrelating experience for 448 frames...
|
148 |
+
[2022-12-04 20:48:08,444][04367] Decorrelating experience for 448 frames...
|
149 |
+
[2022-12-04 20:48:08,452][04366] Decorrelating experience for 448 frames...
|
150 |
+
[2022-12-04 20:48:08,466][04368] Decorrelating experience for 448 frames...
|
151 |
+
[2022-12-04 20:48:08,650][04266] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
|
152 |
+
[2022-12-04 20:48:08,652][04340] Saving /home/andrew_huggingface_co/sample-factory/train_dir/ant_test/checkpoint_p0/checkpoint_000000000_0.pth...
|
153 |
+
[2022-12-04 20:48:13,650][04266] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 8192. Throughput: 0: 846.4. Samples: 8464. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
|
154 |
+
[2022-12-04 20:48:13,650][04266] Avg episode reward: [(0, '-160.026')]
|
155 |
+
[2022-12-04 20:48:16,478][04266] Heartbeat connected on Batcher_0
|
156 |
+
[2022-12-04 20:48:16,482][04266] Heartbeat connected on LearnerWorker_p0
|
157 |
+
[2022-12-04 20:48:16,492][04266] Heartbeat connected on InferenceWorker_p0-w0
|
158 |
+
[2022-12-04 20:48:16,493][04266] Heartbeat connected on RolloutWorker_w0
|
159 |
+
[2022-12-04 20:48:16,503][04266] Heartbeat connected on RolloutWorker_w2
|
160 |
+
[2022-12-04 20:48:16,503][04266] Heartbeat connected on RolloutWorker_w1
|
161 |
+
[2022-12-04 20:48:16,510][04266] Heartbeat connected on RolloutWorker_w4
|
162 |
+
[2022-12-04 20:48:16,511][04266] Heartbeat connected on RolloutWorker_w3
|
163 |
+
[2022-12-04 20:48:16,516][04266] Heartbeat connected on RolloutWorker_w5
|
164 |
+
[2022-12-04 20:48:16,521][04266] Heartbeat connected on RolloutWorker_w6
|
165 |
+
[2022-12-04 20:48:16,529][04266] Heartbeat connected on RolloutWorker_w7
|
166 |
+
[2022-12-04 20:48:18,650][04266] Fps is (10 sec: 3686.4, 60 sec: 2457.6, 300 sec: 2457.6). Total num frames: 36864. Throughput: 0: 1698.1. Samples: 25472. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
|
167 |
+
[2022-12-04 20:48:18,651][04266] Avg episode reward: [(0, '-169.308')]
|
168 |
+
[2022-12-04 20:48:18,924][04360] Updated weights for policy 0, policy_version 80 (0.0006)
|
169 |
+
[2022-12-04 20:48:23,650][04266] Fps is (10 sec: 5734.3, 60 sec: 3276.8, 300 sec: 3276.8). Total num frames: 65536. Throughput: 0: 2930.0. Samples: 58600. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
|
170 |
+
[2022-12-04 20:48:23,651][04266] Avg episode reward: [(0, '-249.723')]
|
171 |
+
[2022-12-04 20:48:23,656][04340] Saving /home/andrew_huggingface_co/sample-factory/train_dir/ant_test/checkpoint_p0/checkpoint_000000128_65536.pth...
|
172 |
+
[2022-12-04 20:48:26,260][04360] Updated weights for policy 0, policy_version 160 (0.0007)
|
173 |
+
[2022-12-04 20:48:28,650][04266] Fps is (10 sec: 5734.4, 60 sec: 3768.3, 300 sec: 3768.3). Total num frames: 94208. Throughput: 0: 3705.3. Samples: 92632. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
|
174 |
+
[2022-12-04 20:48:28,651][04266] Avg episode reward: [(0, '-89.994')]
|
175 |
+
[2022-12-04 20:48:33,559][04360] Updated weights for policy 0, policy_version 240 (0.0006)
|
176 |
+
[2022-12-04 20:48:33,650][04266] Fps is (10 sec: 5734.4, 60 sec: 4096.0, 300 sec: 4096.0). Total num frames: 122880. Throughput: 0: 3641.5. Samples: 109244. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
|
177 |
+
[2022-12-04 20:48:33,651][04266] Avg episode reward: [(0, '-153.751')]
|
178 |
+
[2022-12-04 20:48:33,651][04340] Saving new best policy, reward=-153.751!
|
179 |
+
[2022-12-04 20:48:38,650][04266] Fps is (10 sec: 5324.8, 60 sec: 4213.0, 300 sec: 4213.0). Total num frames: 147456. Throughput: 0: 4093.4. Samples: 143268. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
|
180 |
+
[2022-12-04 20:48:38,650][04266] Avg episode reward: [(0, '-137.350')]
|
181 |
+
[2022-12-04 20:48:38,669][04340] Saving /home/andrew_huggingface_co/sample-factory/train_dir/ant_test/checkpoint_p0/checkpoint_000000296_151552.pth...
|
182 |
+
[2022-12-04 20:48:38,675][04340] Removing /home/andrew_huggingface_co/sample-factory/train_dir/ant_test/checkpoint_p0/checkpoint_000000000_0.pth
|
183 |
+
[2022-12-04 20:48:38,675][04340] Saving new best policy, reward=-137.350!
|
184 |
+
[2022-12-04 20:48:40,889][04360] Updated weights for policy 0, policy_version 320 (0.0006)
|
185 |
+
[2022-12-04 20:48:43,650][04266] Fps is (10 sec: 5324.8, 60 sec: 4403.2, 300 sec: 4403.2). Total num frames: 176128. Throughput: 0: 4415.1. Samples: 176604. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
|
186 |
+
[2022-12-04 20:48:43,651][04266] Avg episode reward: [(0, '-69.206')]
|
187 |
+
[2022-12-04 20:48:43,651][04340] Saving new best policy, reward=-69.206!
|
188 |
+
[2022-12-04 20:48:48,177][04360] Updated weights for policy 0, policy_version 400 (0.0006)
|
189 |
+
[2022-12-04 20:48:48,650][04266] Fps is (10 sec: 5734.4, 60 sec: 4551.1, 300 sec: 4551.1). Total num frames: 204800. Throughput: 0: 4290.7. Samples: 193080. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
|
190 |
+
[2022-12-04 20:48:48,651][04266] Avg episode reward: [(0, '-52.726')]
|
191 |
+
[2022-12-04 20:48:48,651][04340] Saving new best policy, reward=-52.726!
|
192 |
+
[2022-12-04 20:48:53,650][04266] Fps is (10 sec: 5734.4, 60 sec: 4669.5, 300 sec: 4669.5). Total num frames: 233472. Throughput: 0: 5054.2. Samples: 227440. Policy #0 lag: (min: 2.0, avg: 2.0, max: 2.0)
|
193 |
+
[2022-12-04 20:48:53,650][04266] Avg episode reward: [(0, '-33.694')]
|
194 |
+
[2022-12-04 20:48:53,657][04340] Saving /home/andrew_huggingface_co/sample-factory/train_dir/ant_test/checkpoint_p0/checkpoint_000000456_233472.pth...
|
195 |
+
[2022-12-04 20:48:53,664][04340] Removing /home/andrew_huggingface_co/sample-factory/train_dir/ant_test/checkpoint_p0/checkpoint_000000128_65536.pth
|
196 |
+
[2022-12-04 20:48:53,664][04340] Saving new best policy, reward=-33.694!
|
197 |
+
[2022-12-04 20:48:55,518][04360] Updated weights for policy 0, policy_version 480 (0.0006)
|
198 |
+
[2022-12-04 20:48:58,650][04266] Fps is (10 sec: 5734.4, 60 sec: 4766.3, 300 sec: 4766.3). Total num frames: 262144. Throughput: 0: 5586.5. Samples: 259856. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
|
199 |
+
[2022-12-04 20:48:58,651][04266] Avg episode reward: [(0, '-45.611')]
|
200 |
+
[2022-12-04 20:49:03,653][04266] Fps is (10 sec: 4913.5, 60 sec: 4710.1, 300 sec: 4710.1). Total num frames: 282624. Throughput: 0: 5596.9. Samples: 277352. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
|
201 |
+
[2022-12-04 20:49:03,654][04266] Avg episode reward: [(0, '-29.953')]
|
202 |
+
[2022-12-04 20:49:03,655][04340] Saving new best policy, reward=-29.953!
|
203 |
+
[2022-12-04 20:49:04,937][04360] Updated weights for policy 0, policy_version 560 (0.0008)
|
204 |
+
[2022-12-04 20:49:08,650][04266] Fps is (10 sec: 4096.0, 60 sec: 5051.7, 300 sec: 4663.1). Total num frames: 303104. Throughput: 0: 5336.0. Samples: 298720. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
|
205 |
+
[2022-12-04 20:49:08,650][04266] Avg episode reward: [(0, '-29.014')]
|
206 |
+
[2022-12-04 20:49:08,678][04340] Saving /home/andrew_huggingface_co/sample-factory/train_dir/ant_test/checkpoint_p0/checkpoint_000000600_307200.pth...
|
207 |
+
[2022-12-04 20:49:08,686][04340] Removing /home/andrew_huggingface_co/sample-factory/train_dir/ant_test/checkpoint_p0/checkpoint_000000296_151552.pth
|
208 |
+
[2022-12-04 20:49:08,686][04340] Saving new best policy, reward=-29.014!
|
209 |
+
[2022-12-04 20:49:12,321][04360] Updated weights for policy 0, policy_version 640 (0.0007)
|
210 |
+
[2022-12-04 20:49:13,650][04266] Fps is (10 sec: 4916.9, 60 sec: 5393.1, 300 sec: 4739.7). Total num frames: 331776. Throughput: 0: 5326.1. Samples: 332308. Policy #0 lag: (min: 4.0, avg: 4.0, max: 4.0)
|
211 |
+
[2022-12-04 20:49:13,650][04266] Avg episode reward: [(0, '-0.035')]
|
212 |
+
[2022-12-04 20:49:13,651][04340] Saving new best policy, reward=-0.035!
|
213 |
+
[2022-12-04 20:49:18,650][04266] Fps is (10 sec: 5734.4, 60 sec: 5393.1, 300 sec: 4806.0). Total num frames: 360448. Throughput: 0: 5338.0. Samples: 349452. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
|
214 |
+
[2022-12-04 20:49:18,650][04266] Avg episode reward: [(0, '26.827')]
|
215 |
+
[2022-12-04 20:49:18,651][04340] Saving new best policy, reward=26.827!
|
216 |
+
[2022-12-04 20:49:19,490][04360] Updated weights for policy 0, policy_version 720 (0.0006)
|
217 |
+
[2022-12-04 20:49:23,650][04266] Fps is (10 sec: 5734.3, 60 sec: 5393.1, 300 sec: 4864.0). Total num frames: 389120. Throughput: 0: 5356.0. Samples: 384288. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
|
218 |
+
[2022-12-04 20:49:23,651][04266] Avg episode reward: [(0, '75.358')]
|
219 |
+
[2022-12-04 20:49:23,656][04340] Saving /home/andrew_huggingface_co/sample-factory/train_dir/ant_test/checkpoint_p0/checkpoint_000000760_389120.pth...
|
220 |
+
[2022-12-04 20:49:23,665][04340] Removing /home/andrew_huggingface_co/sample-factory/train_dir/ant_test/checkpoint_p0/checkpoint_000000456_233472.pth
|
221 |
+
[2022-12-04 20:49:23,665][04340] Saving new best policy, reward=75.358!
|
222 |
+
[2022-12-04 20:49:26,586][04360] Updated weights for policy 0, policy_version 800 (0.0006)
|
223 |
+
[2022-12-04 20:49:28,650][04266] Fps is (10 sec: 5734.4, 60 sec: 5393.1, 300 sec: 4915.2). Total num frames: 417792. Throughput: 0: 5375.7. Samples: 418512. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
|
224 |
+
[2022-12-04 20:49:28,650][04266] Avg episode reward: [(0, '153.991')]
|
225 |
+
[2022-12-04 20:49:28,651][04340] Saving new best policy, reward=153.991!
|
226 |
+
[2022-12-04 20:49:33,650][04266] Fps is (10 sec: 5734.5, 60 sec: 5393.1, 300 sec: 4960.7). Total num frames: 446464. Throughput: 0: 5396.6. Samples: 435928. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
|
227 |
+
[2022-12-04 20:49:33,650][04266] Avg episode reward: [(0, '231.230')]
|
228 |
+
[2022-12-04 20:49:33,671][04340] Saving new best policy, reward=231.230!
|
229 |
+
[2022-12-04 20:49:33,672][04360] Updated weights for policy 0, policy_version 880 (0.0006)
|
230 |
+
[2022-12-04 20:49:38,650][04266] Fps is (10 sec: 5734.3, 60 sec: 5461.3, 300 sec: 5001.4). Total num frames: 475136. Throughput: 0: 5398.1. Samples: 470356. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
|
231 |
+
[2022-12-04 20:49:38,651][04266] Avg episode reward: [(0, '321.313')]
|
232 |
+
[2022-12-04 20:49:38,656][04340] Saving /home/andrew_huggingface_co/sample-factory/train_dir/ant_test/checkpoint_p0/checkpoint_000000928_475136.pth...
|
233 |
+
[2022-12-04 20:49:38,664][04340] Removing /home/andrew_huggingface_co/sample-factory/train_dir/ant_test/checkpoint_p0/checkpoint_000000600_307200.pth
|
234 |
+
[2022-12-04 20:49:38,665][04340] Saving new best policy, reward=321.313!
|
235 |
+
[2022-12-04 20:49:40,419][04266] Keyboard interrupt detected in the event loop EvtLoop [Runner_EvtLoop, process=main process 4266], exiting...
|
236 |
+
[2022-12-04 20:49:40,420][04266] Runner profile tree view:
|
237 |
+
main_loop: 103.9009
|
238 |
+
[2022-12-04 20:49:40,421][04266] Collected {0: 487424}, FPS: 4691.2
|
239 |
+
[2022-12-04 20:49:40,421][04340] Stopping Batcher_0...
|
240 |
+
[2022-12-04 20:49:40,421][04340] Loop batcher_evt_loop terminating...
|
241 |
+
[2022-12-04 20:49:40,421][04365] Stopping RolloutWorker_w3...
|
242 |
+
[2022-12-04 20:49:40,422][04365] Loop rollout_proc3_evt_loop terminating...
|
243 |
+
[2022-12-04 20:49:40,422][04340] Saving /home/andrew_huggingface_co/sample-factory/train_dir/ant_test/checkpoint_p0/checkpoint_000000952_487424.pth...
|
244 |
+
[2022-12-04 20:49:40,424][04366] Stopping RolloutWorker_w5...
|
245 |
+
[2022-12-04 20:49:40,424][04366] Loop rollout_proc5_evt_loop terminating...
|
246 |
+
[2022-12-04 20:49:40,425][04361] Stopping RolloutWorker_w0...
|
247 |
+
[2022-12-04 20:49:40,425][04362] Stopping RolloutWorker_w1...
|
248 |
+
[2022-12-04 20:49:40,426][04363] Stopping RolloutWorker_w6...
|
249 |
+
[2022-12-04 20:49:40,426][04361] Loop rollout_proc0_evt_loop terminating...
|
250 |
+
[2022-12-04 20:49:40,426][04362] Loop rollout_proc1_evt_loop terminating...
|
251 |
+
[2022-12-04 20:49:40,426][04368] Stopping RolloutWorker_w2...
|
252 |
+
[2022-12-04 20:49:40,426][04363] Loop rollout_proc6_evt_loop terminating...
|
253 |
+
[2022-12-04 20:49:40,426][04368] Loop rollout_proc2_evt_loop terminating...
|
254 |
+
[2022-12-04 20:49:40,429][04340] Removing /home/andrew_huggingface_co/sample-factory/train_dir/ant_test/checkpoint_p0/checkpoint_000000760_389120.pth
|
255 |
+
[2022-12-04 20:49:40,429][04340] Stopping LearnerWorker_p0...
|
256 |
+
[2022-12-04 20:49:40,430][04340] Loop learner_proc0_evt_loop terminating...
|
257 |
+
[2022-12-04 20:49:40,436][04360] Weights refcount: 2 0
|
258 |
+
[2022-12-04 20:49:40,437][04360] Stopping InferenceWorker_p0-w0...
|
259 |
+
[2022-12-04 20:49:40,438][04360] Loop inference_proc0-0_evt_loop terminating...
|
260 |
+
[2022-12-04 20:49:40,474][04364] Stopping RolloutWorker_w7...
|
261 |
+
[2022-12-04 20:49:40,475][04364] Loop rollout_proc7_evt_loop terminating...
|
262 |
+
[2022-12-04 20:49:40,498][04367] Stopping RolloutWorker_w4...
|
263 |
+
[2022-12-04 20:49:40,521][04367] Loop rollout_proc4_evt_loop terminating...
|