diff --git "a/sf_log.txt" "b/sf_log.txt" new file mode 100644--- /dev/null +++ "b/sf_log.txt" @@ -0,0 +1,1719 @@ +[2023-03-11 10:41:46,573][00127] Saving configuration to /content/train_dir/default_experiment/config.json... +[2023-03-11 10:41:46,576][00127] Rollout worker 0 uses device cpu +[2023-03-11 10:41:46,578][00127] Rollout worker 1 uses device cpu +[2023-03-11 10:41:46,580][00127] Rollout worker 2 uses device cpu +[2023-03-11 10:41:46,585][00127] Rollout worker 3 uses device cpu +[2023-03-11 10:41:46,587][00127] Rollout worker 4 uses device cpu +[2023-03-11 10:41:46,588][00127] Rollout worker 5 uses device cpu +[2023-03-11 10:41:46,591][00127] Rollout worker 6 uses device cpu +[2023-03-11 10:41:46,593][00127] Rollout worker 7 uses device cpu +[2023-03-11 10:41:46,795][00127] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2023-03-11 10:41:46,797][00127] InferenceWorker_p0-w0: min num requests: 2 +[2023-03-11 10:41:46,828][00127] Starting all processes... +[2023-03-11 10:41:46,829][00127] Starting process learner_proc0 +[2023-03-11 10:41:46,884][00127] Starting all processes... +[2023-03-11 10:41:46,891][00127] Starting process inference_proc0-0 +[2023-03-11 10:41:46,892][00127] Starting process rollout_proc0 +[2023-03-11 10:41:46,894][00127] Starting process rollout_proc1 +[2023-03-11 10:41:46,894][00127] Starting process rollout_proc2 +[2023-03-11 10:41:46,894][00127] Starting process rollout_proc3 +[2023-03-11 10:41:46,894][00127] Starting process rollout_proc4 +[2023-03-11 10:41:46,894][00127] Starting process rollout_proc5 +[2023-03-11 10:41:46,894][00127] Starting process rollout_proc6 +[2023-03-11 10:41:46,894][00127] Starting process rollout_proc7 +[2023-03-11 10:41:59,291][10419] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2023-03-11 10:41:59,295][10419] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for learning process 0 +[2023-03-11 10:41:59,329][10436] Worker 5 uses CPU cores [1] +[2023-03-11 10:41:59,509][10433] Worker 1 uses CPU cores [1] +[2023-03-11 10:41:59,599][10432] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2023-03-11 10:41:59,600][10432] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for inference process 0 +[2023-03-11 10:41:59,731][10437] Worker 4 uses CPU cores [0] +[2023-03-11 10:41:59,737][10438] Worker 6 uses CPU cores [0] +[2023-03-11 10:41:59,839][10434] Worker 0 uses CPU cores [0] +[2023-03-11 10:41:59,880][10435] Worker 2 uses CPU cores [0] +[2023-03-11 10:41:59,961][10439] Worker 3 uses CPU cores [1] +[2023-03-11 10:42:00,000][10440] Worker 7 uses CPU cores [1] +[2023-03-11 10:42:00,278][10432] Num visible devices: 1 +[2023-03-11 10:42:00,284][10419] Num visible devices: 1 +[2023-03-11 10:42:00,299][10419] Starting seed is not provided +[2023-03-11 10:42:00,300][10419] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2023-03-11 10:42:00,301][10419] Initializing actor-critic model on device cuda:0 +[2023-03-11 10:42:00,306][10419] RunningMeanStd input shape: (3, 72, 128) +[2023-03-11 10:42:00,308][10419] RunningMeanStd input shape: (1,) +[2023-03-11 10:42:00,332][10419] ConvEncoder: input_channels=3 +[2023-03-11 10:42:00,823][10419] Conv encoder output size: 512 +[2023-03-11 10:42:00,823][10419] Policy head output size: 512 +[2023-03-11 10:42:00,891][10419] Created Actor Critic model with architecture: +[2023-03-11 10:42:00,893][10419] ActorCriticSharedWeights( + (obs_normalizer): ObservationNormalizer( + (running_mean_std): RunningMeanStdDictInPlace( + (running_mean_std): ModuleDict( + (obs): RunningMeanStdInPlace() + ) + ) + ) + (returns_normalizer): RecursiveScriptModule(original_name=RunningMeanStdInPlace) + (encoder): VizdoomEncoder( + (basic_encoder): ConvEncoder( + (enc): RecursiveScriptModule( + original_name=ConvEncoderImpl + (conv_head): RecursiveScriptModule( + original_name=Sequential + (0): RecursiveScriptModule(original_name=Conv2d) + (1): RecursiveScriptModule(original_name=ELU) + (2): RecursiveScriptModule(original_name=Conv2d) + (3): RecursiveScriptModule(original_name=ELU) + (4): RecursiveScriptModule(original_name=Conv2d) + (5): RecursiveScriptModule(original_name=ELU) + ) + (mlp_layers): RecursiveScriptModule( + original_name=Sequential + (0): RecursiveScriptModule(original_name=Linear) + (1): RecursiveScriptModule(original_name=ELU) + ) + ) + ) + ) + (core): ModelCoreRNN( + (core): GRU(512, 512) + ) + (decoder): MlpDecoder( + (mlp): Identity() + ) + (critic_linear): Linear(in_features=512, out_features=1, bias=True) + (action_parameterization): ActionParameterizationDefault( + (distribution_linear): Linear(in_features=512, out_features=5, bias=True) + ) +) +[2023-03-11 10:42:06,788][00127] Heartbeat connected on Batcher_0 +[2023-03-11 10:42:06,797][00127] Heartbeat connected on InferenceWorker_p0-w0 +[2023-03-11 10:42:06,808][00127] Heartbeat connected on RolloutWorker_w1 +[2023-03-11 10:42:06,809][00127] Heartbeat connected on RolloutWorker_w0 +[2023-03-11 10:42:06,812][00127] Heartbeat connected on RolloutWorker_w2 +[2023-03-11 10:42:06,817][00127] Heartbeat connected on RolloutWorker_w4 +[2023-03-11 10:42:06,818][00127] Heartbeat connected on RolloutWorker_w3 +[2023-03-11 10:42:06,820][00127] Heartbeat connected on RolloutWorker_w5 +[2023-03-11 10:42:06,823][00127] Heartbeat connected on RolloutWorker_w6 +[2023-03-11 10:42:06,827][00127] Heartbeat connected on RolloutWorker_w7 +[2023-03-11 10:42:09,311][10419] Using optimizer +[2023-03-11 10:42:09,312][10419] No checkpoints found +[2023-03-11 10:42:09,312][10419] Did not load from checkpoint, starting from scratch! +[2023-03-11 10:42:09,313][10419] Initialized policy 0 weights for model version 0 +[2023-03-11 10:42:09,323][10419] LearnerWorker_p0 finished initialization! +[2023-03-11 10:42:09,323][10419] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2023-03-11 10:42:09,324][00127] Heartbeat connected on LearnerWorker_p0 +[2023-03-11 10:42:09,562][10432] RunningMeanStd input shape: (3, 72, 128) +[2023-03-11 10:42:09,564][10432] RunningMeanStd input shape: (1,) +[2023-03-11 10:42:09,583][10432] ConvEncoder: input_channels=3 +[2023-03-11 10:42:09,738][10432] Conv encoder output size: 512 +[2023-03-11 10:42:09,739][10432] Policy head output size: 512 +[2023-03-11 10:42:11,995][00127] Inference worker 0-0 is ready! +[2023-03-11 10:42:11,997][00127] All inference workers are ready! Signal rollout workers to start! +[2023-03-11 10:42:12,111][10440] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-03-11 10:42:12,137][10436] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-03-11 10:42:12,148][10433] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-03-11 10:42:12,157][10439] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-03-11 10:42:12,155][10438] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-03-11 10:42:12,159][10435] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-03-11 10:42:12,171][10437] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-03-11 10:42:12,173][10434] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-03-11 10:42:12,422][00127] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 0. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2023-03-11 10:42:13,022][10437] Decorrelating experience for 0 frames... +[2023-03-11 10:42:13,024][10435] Decorrelating experience for 0 frames... +[2023-03-11 10:42:13,362][10437] Decorrelating experience for 32 frames... +[2023-03-11 10:42:13,557][10440] Decorrelating experience for 0 frames... +[2023-03-11 10:42:13,565][10433] Decorrelating experience for 0 frames... +[2023-03-11 10:42:13,567][10436] Decorrelating experience for 0 frames... +[2023-03-11 10:42:13,572][10439] Decorrelating experience for 0 frames... +[2023-03-11 10:42:14,085][10435] Decorrelating experience for 32 frames... +[2023-03-11 10:42:14,188][10437] Decorrelating experience for 64 frames... +[2023-03-11 10:42:14,590][10435] Decorrelating experience for 64 frames... +[2023-03-11 10:42:14,806][10433] Decorrelating experience for 32 frames... +[2023-03-11 10:42:14,808][10436] Decorrelating experience for 32 frames... +[2023-03-11 10:42:14,810][10439] Decorrelating experience for 32 frames... +[2023-03-11 10:42:14,894][10440] Decorrelating experience for 32 frames... +[2023-03-11 10:42:15,504][10434] Decorrelating experience for 0 frames... +[2023-03-11 10:42:15,556][10437] Decorrelating experience for 96 frames... +[2023-03-11 10:42:16,216][10436] Decorrelating experience for 64 frames... +[2023-03-11 10:42:16,218][10433] Decorrelating experience for 64 frames... +[2023-03-11 10:42:16,256][10439] Decorrelating experience for 64 frames... +[2023-03-11 10:42:16,757][10434] Decorrelating experience for 32 frames... +[2023-03-11 10:42:16,782][10438] Decorrelating experience for 0 frames... +[2023-03-11 10:42:16,828][10435] Decorrelating experience for 96 frames... +[2023-03-11 10:42:17,321][10434] Decorrelating experience for 64 frames... +[2023-03-11 10:42:17,422][00127] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2023-03-11 10:42:18,092][10433] Decorrelating experience for 96 frames... +[2023-03-11 10:42:18,194][10439] Decorrelating experience for 96 frames... +[2023-03-11 10:42:18,704][10438] Decorrelating experience for 32 frames... +[2023-03-11 10:42:19,285][10436] Decorrelating experience for 96 frames... +[2023-03-11 10:42:19,316][10440] Decorrelating experience for 64 frames... +[2023-03-11 10:42:19,518][10434] Decorrelating experience for 96 frames... +[2023-03-11 10:42:19,658][10438] Decorrelating experience for 64 frames... +[2023-03-11 10:42:20,175][10440] Decorrelating experience for 96 frames... +[2023-03-11 10:42:20,768][10438] Decorrelating experience for 96 frames... +[2023-03-11 10:42:22,422][00127] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 10.6. Samples: 106. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2023-03-11 10:42:26,213][10419] Signal inference workers to stop experience collection... +[2023-03-11 10:42:26,224][10432] InferenceWorker_p0-w0: stopping experience collection +[2023-03-11 10:42:27,422][00127] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 163.5. Samples: 2452. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2023-03-11 10:42:27,425][00127] Avg episode reward: [(0, '1.958')] +[2023-03-11 10:42:29,154][10419] Signal inference workers to resume experience collection... +[2023-03-11 10:42:29,154][10432] InferenceWorker_p0-w0: resuming experience collection +[2023-03-11 10:42:32,422][00127] Fps is (10 sec: 1228.8, 60 sec: 614.4, 300 sec: 614.4). Total num frames: 12288. Throughput: 0: 128.8. Samples: 2576. Policy #0 lag: (min: 1.0, avg: 1.0, max: 1.0) +[2023-03-11 10:42:32,424][00127] Avg episode reward: [(0, '3.241')] +[2023-03-11 10:42:37,422][00127] Fps is (10 sec: 3686.6, 60 sec: 1474.5, 300 sec: 1474.5). Total num frames: 36864. Throughput: 0: 344.1. Samples: 8602. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-03-11 10:42:37,424][00127] Avg episode reward: [(0, '3.913')] +[2023-03-11 10:42:37,885][10432] Updated weights for policy 0, policy_version 10 (0.0572) +[2023-03-11 10:42:42,421][00127] Fps is (10 sec: 4505.8, 60 sec: 1911.5, 300 sec: 1911.5). Total num frames: 57344. Throughput: 0: 513.8. Samples: 15414. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-03-11 10:42:42,424][00127] Avg episode reward: [(0, '4.538')] +[2023-03-11 10:42:47,429][00127] Fps is (10 sec: 3683.8, 60 sec: 2106.1, 300 sec: 2106.1). Total num frames: 73728. Throughput: 0: 503.4. Samples: 17624. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-03-11 10:42:47,436][00127] Avg episode reward: [(0, '4.550')] +[2023-03-11 10:42:49,578][10432] Updated weights for policy 0, policy_version 20 (0.0019) +[2023-03-11 10:42:52,421][00127] Fps is (10 sec: 3276.8, 60 sec: 2252.8, 300 sec: 2252.8). Total num frames: 90112. Throughput: 0: 550.8. Samples: 22032. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-03-11 10:42:52,424][00127] Avg episode reward: [(0, '4.341')] +[2023-03-11 10:42:57,422][00127] Fps is (10 sec: 4098.9, 60 sec: 2548.6, 300 sec: 2548.6). Total num frames: 114688. Throughput: 0: 642.7. Samples: 28920. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-03-11 10:42:57,424][00127] Avg episode reward: [(0, '4.340')] +[2023-03-11 10:42:57,427][10419] Saving new best policy, reward=4.340! +[2023-03-11 10:42:59,040][10432] Updated weights for policy 0, policy_version 30 (0.0017) +[2023-03-11 10:43:02,422][00127] Fps is (10 sec: 4505.6, 60 sec: 2703.4, 300 sec: 2703.4). Total num frames: 135168. Throughput: 0: 718.4. Samples: 32330. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-03-11 10:43:02,425][00127] Avg episode reward: [(0, '4.366')] +[2023-03-11 10:43:02,436][10419] Saving new best policy, reward=4.366! +[2023-03-11 10:43:07,421][00127] Fps is (10 sec: 3276.9, 60 sec: 2681.0, 300 sec: 2681.0). Total num frames: 147456. Throughput: 0: 811.6. Samples: 36630. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-03-11 10:43:07,433][00127] Avg episode reward: [(0, '4.362')] +[2023-03-11 10:43:12,422][00127] Fps is (10 sec: 2048.0, 60 sec: 2594.1, 300 sec: 2594.1). Total num frames: 155648. Throughput: 0: 839.2. Samples: 40214. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-03-11 10:43:12,427][00127] Avg episode reward: [(0, '4.439')] +[2023-03-11 10:43:12,443][10419] Saving new best policy, reward=4.439! +[2023-03-11 10:43:14,212][10432] Updated weights for policy 0, policy_version 40 (0.0031) +[2023-03-11 10:43:17,422][00127] Fps is (10 sec: 2457.6, 60 sec: 2867.2, 300 sec: 2646.6). Total num frames: 172032. Throughput: 0: 878.5. Samples: 42108. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-03-11 10:43:17,429][00127] Avg episode reward: [(0, '4.538')] +[2023-03-11 10:43:17,432][10419] Saving new best policy, reward=4.538! +[2023-03-11 10:43:22,422][00127] Fps is (10 sec: 4096.0, 60 sec: 3276.8, 300 sec: 2808.7). Total num frames: 196608. Throughput: 0: 874.5. Samples: 47954. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-03-11 10:43:22,428][00127] Avg episode reward: [(0, '4.510')] +[2023-03-11 10:43:24,336][10432] Updated weights for policy 0, policy_version 50 (0.0023) +[2023-03-11 10:43:27,427][00127] Fps is (10 sec: 4093.9, 60 sec: 3549.6, 300 sec: 2839.7). Total num frames: 212992. Throughput: 0: 868.5. Samples: 54502. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-03-11 10:43:27,429][00127] Avg episode reward: [(0, '4.454')] +[2023-03-11 10:43:32,422][00127] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 2867.2). Total num frames: 229376. Throughput: 0: 867.9. Samples: 56674. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-03-11 10:43:32,428][00127] Avg episode reward: [(0, '4.449')] +[2023-03-11 10:43:36,623][10432] Updated weights for policy 0, policy_version 60 (0.0044) +[2023-03-11 10:43:37,422][00127] Fps is (10 sec: 3278.4, 60 sec: 3481.6, 300 sec: 2891.3). Total num frames: 245760. Throughput: 0: 868.4. Samples: 61112. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-03-11 10:43:37,430][00127] Avg episode reward: [(0, '4.390')] +[2023-03-11 10:43:42,422][00127] Fps is (10 sec: 4096.0, 60 sec: 3549.9, 300 sec: 3003.7). Total num frames: 270336. Throughput: 0: 867.1. Samples: 67938. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-03-11 10:43:42,429][00127] Avg episode reward: [(0, '4.547')] +[2023-03-11 10:43:42,440][10419] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000066_270336.pth... +[2023-03-11 10:43:42,589][10419] Saving new best policy, reward=4.547! +[2023-03-11 10:43:46,022][10432] Updated weights for policy 0, policy_version 70 (0.0011) +[2023-03-11 10:43:47,421][00127] Fps is (10 sec: 4505.8, 60 sec: 3618.6, 300 sec: 3061.2). Total num frames: 290816. Throughput: 0: 861.5. Samples: 71098. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-03-11 10:43:47,425][00127] Avg episode reward: [(0, '4.651')] +[2023-03-11 10:43:47,430][10419] Saving new best policy, reward=4.651! +[2023-03-11 10:43:52,423][00127] Fps is (10 sec: 2866.9, 60 sec: 3481.5, 300 sec: 2990.0). Total num frames: 299008. Throughput: 0: 849.1. Samples: 74840. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-03-11 10:43:52,425][00127] Avg episode reward: [(0, '4.616')] +[2023-03-11 10:43:57,422][00127] Fps is (10 sec: 2048.0, 60 sec: 3276.8, 300 sec: 2964.7). Total num frames: 311296. Throughput: 0: 842.4. Samples: 78120. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2023-03-11 10:43:57,425][00127] Avg episode reward: [(0, '4.460')] +[2023-03-11 10:44:02,422][00127] Fps is (10 sec: 2457.9, 60 sec: 3140.3, 300 sec: 2941.7). Total num frames: 323584. Throughput: 0: 840.8. Samples: 79942. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-03-11 10:44:02,424][00127] Avg episode reward: [(0, '4.449')] +[2023-03-11 10:44:02,502][10432] Updated weights for policy 0, policy_version 80 (0.0025) +[2023-03-11 10:44:07,422][00127] Fps is (10 sec: 3686.4, 60 sec: 3345.1, 300 sec: 3027.5). Total num frames: 348160. Throughput: 0: 856.0. Samples: 86476. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-03-11 10:44:07,424][00127] Avg episode reward: [(0, '4.458')] +[2023-03-11 10:44:11,459][10432] Updated weights for policy 0, policy_version 90 (0.0017) +[2023-03-11 10:44:12,422][00127] Fps is (10 sec: 4505.4, 60 sec: 3549.8, 300 sec: 3072.0). Total num frames: 368640. Throughput: 0: 858.8. Samples: 93142. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-03-11 10:44:12,429][00127] Avg episode reward: [(0, '4.363')] +[2023-03-11 10:44:17,421][00127] Fps is (10 sec: 3686.4, 60 sec: 3549.9, 300 sec: 3080.2). Total num frames: 385024. Throughput: 0: 861.7. Samples: 95452. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-03-11 10:44:17,428][00127] Avg episode reward: [(0, '4.338')] +[2023-03-11 10:44:22,422][00127] Fps is (10 sec: 3276.9, 60 sec: 3413.3, 300 sec: 3087.8). Total num frames: 401408. Throughput: 0: 867.3. Samples: 100142. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-03-11 10:44:22,424][00127] Avg episode reward: [(0, '4.373')] +[2023-03-11 10:44:23,368][10432] Updated weights for policy 0, policy_version 100 (0.0029) +[2023-03-11 10:44:27,422][00127] Fps is (10 sec: 4096.0, 60 sec: 3550.2, 300 sec: 3155.4). Total num frames: 425984. Throughput: 0: 874.7. Samples: 107298. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-03-11 10:44:27,424][00127] Avg episode reward: [(0, '4.389')] +[2023-03-11 10:44:32,422][00127] Fps is (10 sec: 4505.6, 60 sec: 3618.1, 300 sec: 3189.0). Total num frames: 446464. Throughput: 0: 882.7. Samples: 110818. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-03-11 10:44:32,428][00127] Avg episode reward: [(0, '4.452')] +[2023-03-11 10:44:33,524][10432] Updated weights for policy 0, policy_version 110 (0.0013) +[2023-03-11 10:44:37,424][00127] Fps is (10 sec: 3276.0, 60 sec: 3549.7, 300 sec: 3163.8). Total num frames: 458752. Throughput: 0: 884.8. Samples: 114656. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-03-11 10:44:37,426][00127] Avg episode reward: [(0, '4.441')] +[2023-03-11 10:44:42,422][00127] Fps is (10 sec: 2457.6, 60 sec: 3345.1, 300 sec: 3140.3). Total num frames: 471040. Throughput: 0: 892.5. Samples: 118284. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-03-11 10:44:42,425][00127] Avg episode reward: [(0, '4.380')] +[2023-03-11 10:44:47,422][00127] Fps is (10 sec: 2867.9, 60 sec: 3276.8, 300 sec: 3144.7). Total num frames: 487424. Throughput: 0: 896.8. Samples: 120298. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-03-11 10:44:47,424][00127] Avg episode reward: [(0, '4.320')] +[2023-03-11 10:44:47,892][10432] Updated weights for policy 0, policy_version 120 (0.0019) +[2023-03-11 10:44:52,422][00127] Fps is (10 sec: 4096.0, 60 sec: 3549.9, 300 sec: 3200.0). Total num frames: 512000. Throughput: 0: 906.4. Samples: 127264. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-03-11 10:44:52,424][00127] Avg episode reward: [(0, '4.525')] +[2023-03-11 10:44:56,834][10432] Updated weights for policy 0, policy_version 130 (0.0025) +[2023-03-11 10:44:57,422][00127] Fps is (10 sec: 4505.6, 60 sec: 3686.4, 300 sec: 3227.1). Total num frames: 532480. Throughput: 0: 901.6. Samples: 133714. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-03-11 10:44:57,426][00127] Avg episode reward: [(0, '4.692')] +[2023-03-11 10:44:57,434][10419] Saving new best policy, reward=4.692! +[2023-03-11 10:45:02,427][00127] Fps is (10 sec: 3684.5, 60 sec: 3754.3, 300 sec: 3228.5). Total num frames: 548864. Throughput: 0: 900.1. Samples: 135960. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-03-11 10:45:02,429][00127] Avg episode reward: [(0, '4.620')] +[2023-03-11 10:45:07,422][00127] Fps is (10 sec: 3276.9, 60 sec: 3618.1, 300 sec: 3230.0). Total num frames: 565248. Throughput: 0: 901.4. Samples: 140706. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-03-11 10:45:07,427][00127] Avg episode reward: [(0, '4.533')] +[2023-03-11 10:45:08,690][10432] Updated weights for policy 0, policy_version 140 (0.0028) +[2023-03-11 10:45:12,422][00127] Fps is (10 sec: 4098.1, 60 sec: 3686.4, 300 sec: 3276.8). Total num frames: 589824. Throughput: 0: 900.4. Samples: 147816. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-03-11 10:45:12,426][00127] Avg episode reward: [(0, '4.447')] +[2023-03-11 10:45:17,422][00127] Fps is (10 sec: 4096.0, 60 sec: 3686.4, 300 sec: 3276.8). Total num frames: 606208. Throughput: 0: 892.5. Samples: 150980. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-03-11 10:45:17,428][00127] Avg episode reward: [(0, '4.556')] +[2023-03-11 10:45:20,284][10432] Updated weights for policy 0, policy_version 150 (0.0017) +[2023-03-11 10:45:22,423][00127] Fps is (10 sec: 2866.8, 60 sec: 3618.0, 300 sec: 3255.2). Total num frames: 618496. Throughput: 0: 888.3. Samples: 154630. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-03-11 10:45:22,428][00127] Avg episode reward: [(0, '4.534')] +[2023-03-11 10:45:27,422][00127] Fps is (10 sec: 2047.9, 60 sec: 3345.0, 300 sec: 3213.8). Total num frames: 626688. Throughput: 0: 870.7. Samples: 157464. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-03-11 10:45:27,426][00127] Avg episode reward: [(0, '4.583')] +[2023-03-11 10:45:32,422][00127] Fps is (10 sec: 2867.6, 60 sec: 3345.1, 300 sec: 3235.8). Total num frames: 647168. Throughput: 0: 877.2. Samples: 159772. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-03-11 10:45:32,428][00127] Avg episode reward: [(0, '4.670')] +[2023-03-11 10:45:34,030][10432] Updated weights for policy 0, policy_version 160 (0.0028) +[2023-03-11 10:45:37,422][00127] Fps is (10 sec: 4096.2, 60 sec: 3481.7, 300 sec: 3256.8). Total num frames: 667648. Throughput: 0: 875.2. Samples: 166650. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-03-11 10:45:37,424][00127] Avg episode reward: [(0, '4.636')] +[2023-03-11 10:45:42,422][00127] Fps is (10 sec: 4096.0, 60 sec: 3618.1, 300 sec: 3276.8). Total num frames: 688128. Throughput: 0: 865.7. Samples: 172670. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-03-11 10:45:42,428][00127] Avg episode reward: [(0, '4.517')] +[2023-03-11 10:45:42,449][10419] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000168_688128.pth... +[2023-03-11 10:45:44,564][10432] Updated weights for policy 0, policy_version 170 (0.0016) +[2023-03-11 10:45:47,422][00127] Fps is (10 sec: 3686.2, 60 sec: 3618.1, 300 sec: 3276.8). Total num frames: 704512. Throughput: 0: 864.8. Samples: 174874. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-03-11 10:45:47,430][00127] Avg episode reward: [(0, '4.464')] +[2023-03-11 10:45:52,422][00127] Fps is (10 sec: 3276.8, 60 sec: 3481.6, 300 sec: 3276.8). Total num frames: 720896. Throughput: 0: 871.6. Samples: 179928. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-03-11 10:45:52,427][00127] Avg episode reward: [(0, '4.466')] +[2023-03-11 10:45:55,214][10432] Updated weights for policy 0, policy_version 180 (0.0031) +[2023-03-11 10:45:57,426][00127] Fps is (10 sec: 4094.3, 60 sec: 3549.6, 300 sec: 3313.1). Total num frames: 745472. Throughput: 0: 868.2. Samples: 186890. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-03-11 10:45:57,428][00127] Avg episode reward: [(0, '4.584')] +[2023-03-11 10:46:02,422][00127] Fps is (10 sec: 3686.2, 60 sec: 3481.9, 300 sec: 3294.6). Total num frames: 757760. Throughput: 0: 848.5. Samples: 189164. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-03-11 10:46:02,425][00127] Avg episode reward: [(0, '4.714')] +[2023-03-11 10:46:02,435][10419] Saving new best policy, reward=4.714! +[2023-03-11 10:46:07,421][00127] Fps is (10 sec: 2458.8, 60 sec: 3413.3, 300 sec: 3276.8). Total num frames: 770048. Throughput: 0: 843.8. Samples: 192602. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-03-11 10:46:07,425][00127] Avg episode reward: [(0, '4.879')] +[2023-03-11 10:46:07,427][10419] Saving new best policy, reward=4.879! +[2023-03-11 10:46:10,225][10432] Updated weights for policy 0, policy_version 190 (0.0020) +[2023-03-11 10:46:12,422][00127] Fps is (10 sec: 2457.8, 60 sec: 3208.5, 300 sec: 3259.7). Total num frames: 782336. Throughput: 0: 864.3. Samples: 196356. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-03-11 10:46:12,423][00127] Avg episode reward: [(0, '5.068')] +[2023-03-11 10:46:12,439][10419] Saving new best policy, reward=5.068! +[2023-03-11 10:46:17,421][00127] Fps is (10 sec: 3276.8, 60 sec: 3276.8, 300 sec: 3276.8). Total num frames: 802816. Throughput: 0: 881.1. Samples: 199422. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-03-11 10:46:17,428][00127] Avg episode reward: [(0, '4.802')] +[2023-03-11 10:46:20,012][10432] Updated weights for policy 0, policy_version 200 (0.0024) +[2023-03-11 10:46:22,422][00127] Fps is (10 sec: 4505.6, 60 sec: 3481.7, 300 sec: 3309.6). Total num frames: 827392. Throughput: 0: 886.2. Samples: 206530. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-03-11 10:46:22,423][00127] Avg episode reward: [(0, '4.654')] +[2023-03-11 10:46:27,423][00127] Fps is (10 sec: 4095.5, 60 sec: 3618.1, 300 sec: 3308.9). Total num frames: 843776. Throughput: 0: 874.0. Samples: 212000. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-03-11 10:46:27,426][00127] Avg episode reward: [(0, '4.655')] +[2023-03-11 10:46:32,099][10432] Updated weights for policy 0, policy_version 210 (0.0017) +[2023-03-11 10:46:32,424][00127] Fps is (10 sec: 3276.1, 60 sec: 3549.7, 300 sec: 3308.3). Total num frames: 860160. Throughput: 0: 875.2. Samples: 214260. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-03-11 10:46:32,427][00127] Avg episode reward: [(0, '4.576')] +[2023-03-11 10:46:37,422][00127] Fps is (10 sec: 3686.8, 60 sec: 3549.9, 300 sec: 3323.2). Total num frames: 880640. Throughput: 0: 886.3. Samples: 219812. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-03-11 10:46:37,428][00127] Avg episode reward: [(0, '4.591')] +[2023-03-11 10:46:41,135][10432] Updated weights for policy 0, policy_version 220 (0.0017) +[2023-03-11 10:46:42,422][00127] Fps is (10 sec: 4506.6, 60 sec: 3618.1, 300 sec: 3352.7). Total num frames: 905216. Throughput: 0: 882.4. Samples: 226592. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-03-11 10:46:42,429][00127] Avg episode reward: [(0, '4.695')] +[2023-03-11 10:46:47,422][00127] Fps is (10 sec: 3686.5, 60 sec: 3549.9, 300 sec: 3336.4). Total num frames: 917504. Throughput: 0: 873.3. Samples: 228462. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-03-11 10:46:47,424][00127] Avg episode reward: [(0, '4.873')] +[2023-03-11 10:46:52,422][00127] Fps is (10 sec: 2048.0, 60 sec: 3413.3, 300 sec: 3306.1). Total num frames: 925696. Throughput: 0: 874.0. Samples: 231934. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-03-11 10:46:52,427][00127] Avg episode reward: [(0, '4.733')] +[2023-03-11 10:46:57,091][10432] Updated weights for policy 0, policy_version 230 (0.0020) +[2023-03-11 10:46:57,422][00127] Fps is (10 sec: 2457.6, 60 sec: 3277.0, 300 sec: 3305.5). Total num frames: 942080. Throughput: 0: 883.2. Samples: 236100. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-03-11 10:46:57,424][00127] Avg episode reward: [(0, '4.728')] +[2023-03-11 10:47:02,422][00127] Fps is (10 sec: 4096.0, 60 sec: 3481.6, 300 sec: 3333.3). Total num frames: 966656. Throughput: 0: 891.6. Samples: 239542. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-03-11 10:47:02,424][00127] Avg episode reward: [(0, '4.498')] +[2023-03-11 10:47:05,779][10432] Updated weights for policy 0, policy_version 240 (0.0020) +[2023-03-11 10:47:07,422][00127] Fps is (10 sec: 4505.7, 60 sec: 3618.1, 300 sec: 3346.2). Total num frames: 987136. Throughput: 0: 889.3. Samples: 246548. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-03-11 10:47:07,428][00127] Avg episode reward: [(0, '4.698')] +[2023-03-11 10:47:12,422][00127] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3401.8). Total num frames: 1003520. Throughput: 0: 873.9. Samples: 251326. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-03-11 10:47:12,429][00127] Avg episode reward: [(0, '4.833')] +[2023-03-11 10:47:17,422][00127] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3457.3). Total num frames: 1019904. Throughput: 0: 873.2. Samples: 253554. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2023-03-11 10:47:17,429][00127] Avg episode reward: [(0, '5.012')] +[2023-03-11 10:47:18,076][10432] Updated weights for policy 0, policy_version 250 (0.0011) +[2023-03-11 10:47:22,422][00127] Fps is (10 sec: 3686.4, 60 sec: 3549.9, 300 sec: 3526.7). Total num frames: 1040384. Throughput: 0: 889.0. Samples: 259816. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-03-11 10:47:22,423][00127] Avg episode reward: [(0, '4.975')] +[2023-03-11 10:47:27,426][00127] Fps is (10 sec: 4094.1, 60 sec: 3617.9, 300 sec: 3554.4). Total num frames: 1060864. Throughput: 0: 883.6. Samples: 266360. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-03-11 10:47:27,428][00127] Avg episode reward: [(0, '4.751')] +[2023-03-11 10:47:27,786][10432] Updated weights for policy 0, policy_version 260 (0.0011) +[2023-03-11 10:47:32,422][00127] Fps is (10 sec: 3276.8, 60 sec: 3550.0, 300 sec: 3512.8). Total num frames: 1073152. Throughput: 0: 879.2. Samples: 268024. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-03-11 10:47:32,424][00127] Avg episode reward: [(0, '4.868')] +[2023-03-11 10:47:37,422][00127] Fps is (10 sec: 2458.7, 60 sec: 3413.3, 300 sec: 3485.1). Total num frames: 1085440. Throughput: 0: 879.5. Samples: 271510. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-03-11 10:47:37,426][00127] Avg episode reward: [(0, '4.969')] +[2023-03-11 10:47:42,422][00127] Fps is (10 sec: 2867.2, 60 sec: 3276.8, 300 sec: 3485.2). Total num frames: 1101824. Throughput: 0: 880.8. Samples: 275736. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-03-11 10:47:42,424][00127] Avg episode reward: [(0, '4.906')] +[2023-03-11 10:47:42,433][10419] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000269_1101824.pth... +[2023-03-11 10:47:42,549][10419] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000066_270336.pth +[2023-03-11 10:47:43,073][10432] Updated weights for policy 0, policy_version 270 (0.0020) +[2023-03-11 10:47:47,422][00127] Fps is (10 sec: 4096.0, 60 sec: 3481.6, 300 sec: 3512.8). Total num frames: 1126400. Throughput: 0: 881.6. Samples: 279216. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-03-11 10:47:47,424][00127] Avg episode reward: [(0, '5.136')] +[2023-03-11 10:47:47,429][10419] Saving new best policy, reward=5.136! +[2023-03-11 10:47:52,220][10432] Updated weights for policy 0, policy_version 280 (0.0017) +[2023-03-11 10:47:52,422][00127] Fps is (10 sec: 4505.6, 60 sec: 3686.4, 300 sec: 3499.0). Total num frames: 1146880. Throughput: 0: 881.2. Samples: 286200. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-03-11 10:47:52,425][00127] Avg episode reward: [(0, '5.215')] +[2023-03-11 10:47:52,441][10419] Saving new best policy, reward=5.215! +[2023-03-11 10:47:57,421][00127] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3471.2). Total num frames: 1159168. Throughput: 0: 878.4. Samples: 290852. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-03-11 10:47:57,427][00127] Avg episode reward: [(0, '5.276')] +[2023-03-11 10:47:57,505][10419] Saving new best policy, reward=5.276! +[2023-03-11 10:48:02,422][00127] Fps is (10 sec: 2867.2, 60 sec: 3481.6, 300 sec: 3485.1). Total num frames: 1175552. Throughput: 0: 877.6. Samples: 293046. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-03-11 10:48:02,428][00127] Avg episode reward: [(0, '5.299')] +[2023-03-11 10:48:02,438][10419] Saving new best policy, reward=5.299! +[2023-03-11 10:48:04,547][10432] Updated weights for policy 0, policy_version 290 (0.0012) +[2023-03-11 10:48:07,422][00127] Fps is (10 sec: 4096.0, 60 sec: 3549.9, 300 sec: 3540.6). Total num frames: 1200128. Throughput: 0: 874.8. Samples: 299180. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-03-11 10:48:07,424][00127] Avg episode reward: [(0, '5.532')] +[2023-03-11 10:48:07,427][10419] Saving new best policy, reward=5.532! +[2023-03-11 10:48:12,427][00127] Fps is (10 sec: 4093.8, 60 sec: 3549.6, 300 sec: 3540.5). Total num frames: 1216512. Throughput: 0: 869.5. Samples: 305486. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-03-11 10:48:12,429][00127] Avg episode reward: [(0, '5.382')] +[2023-03-11 10:48:15,799][10432] Updated weights for policy 0, policy_version 300 (0.0023) +[2023-03-11 10:48:17,422][00127] Fps is (10 sec: 2867.2, 60 sec: 3481.6, 300 sec: 3499.0). Total num frames: 1228800. Throughput: 0: 872.6. Samples: 307292. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-03-11 10:48:17,429][00127] Avg episode reward: [(0, '5.318')] +[2023-03-11 10:48:22,422][00127] Fps is (10 sec: 2458.9, 60 sec: 3345.1, 300 sec: 3485.1). Total num frames: 1241088. Throughput: 0: 871.7. Samples: 310738. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-03-11 10:48:22,429][00127] Avg episode reward: [(0, '5.204')] +[2023-03-11 10:48:27,422][00127] Fps is (10 sec: 2867.2, 60 sec: 3277.0, 300 sec: 3485.1). Total num frames: 1257472. Throughput: 0: 870.4. Samples: 314906. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-03-11 10:48:27,428][00127] Avg episode reward: [(0, '5.486')] +[2023-03-11 10:48:29,615][10432] Updated weights for policy 0, policy_version 310 (0.0022) +[2023-03-11 10:48:32,422][00127] Fps is (10 sec: 4096.0, 60 sec: 3481.6, 300 sec: 3512.8). Total num frames: 1282048. Throughput: 0: 870.2. Samples: 318374. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-03-11 10:48:32,424][00127] Avg episode reward: [(0, '5.627')] +[2023-03-11 10:48:32,437][10419] Saving new best policy, reward=5.627! +[2023-03-11 10:48:37,422][00127] Fps is (10 sec: 4505.6, 60 sec: 3618.1, 300 sec: 3499.0). Total num frames: 1302528. Throughput: 0: 865.2. Samples: 325134. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-03-11 10:48:37,428][00127] Avg episode reward: [(0, '5.624')] +[2023-03-11 10:48:40,000][10432] Updated weights for policy 0, policy_version 320 (0.0012) +[2023-03-11 10:48:42,422][00127] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3471.2). Total num frames: 1314816. Throughput: 0: 857.7. Samples: 329450. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-03-11 10:48:42,427][00127] Avg episode reward: [(0, '5.918')] +[2023-03-11 10:48:42,442][10419] Saving new best policy, reward=5.918! +[2023-03-11 10:48:47,422][00127] Fps is (10 sec: 2867.2, 60 sec: 3413.3, 300 sec: 3499.0). Total num frames: 1331200. Throughput: 0: 855.5. Samples: 331544. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-03-11 10:48:47,424][00127] Avg episode reward: [(0, '5.777')] +[2023-03-11 10:48:51,220][10432] Updated weights for policy 0, policy_version 330 (0.0016) +[2023-03-11 10:48:52,425][00127] Fps is (10 sec: 4094.7, 60 sec: 3481.4, 300 sec: 3540.6). Total num frames: 1355776. Throughput: 0: 865.5. Samples: 338130. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-03-11 10:48:52,427][00127] Avg episode reward: [(0, '5.735')] +[2023-03-11 10:48:57,421][00127] Fps is (10 sec: 4096.0, 60 sec: 3549.9, 300 sec: 3554.5). Total num frames: 1372160. Throughput: 0: 861.6. Samples: 344252. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2023-03-11 10:48:57,430][00127] Avg episode reward: [(0, '5.796')] +[2023-03-11 10:49:02,421][00127] Fps is (10 sec: 2868.1, 60 sec: 3481.6, 300 sec: 3512.8). Total num frames: 1384448. Throughput: 0: 860.6. Samples: 346018. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-03-11 10:49:02,424][00127] Avg episode reward: [(0, '5.726')] +[2023-03-11 10:49:04,652][10432] Updated weights for policy 0, policy_version 340 (0.0017) +[2023-03-11 10:49:07,422][00127] Fps is (10 sec: 2457.6, 60 sec: 3276.8, 300 sec: 3485.1). Total num frames: 1396736. Throughput: 0: 859.4. Samples: 349412. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-03-11 10:49:07,424][00127] Avg episode reward: [(0, '5.679')] +[2023-03-11 10:49:12,422][00127] Fps is (10 sec: 2867.2, 60 sec: 3277.1, 300 sec: 3485.1). Total num frames: 1413120. Throughput: 0: 859.4. Samples: 353580. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-03-11 10:49:12,423][00127] Avg episode reward: [(0, '5.548')] +[2023-03-11 10:49:16,882][10432] Updated weights for policy 0, policy_version 350 (0.0013) +[2023-03-11 10:49:17,422][00127] Fps is (10 sec: 3686.4, 60 sec: 3413.3, 300 sec: 3499.0). Total num frames: 1433600. Throughput: 0: 857.6. Samples: 356966. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-03-11 10:49:17,431][00127] Avg episode reward: [(0, '5.854')] +[2023-03-11 10:49:22,422][00127] Fps is (10 sec: 4096.0, 60 sec: 3549.9, 300 sec: 3485.1). Total num frames: 1454080. Throughput: 0: 857.9. Samples: 363738. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-03-11 10:49:22,425][00127] Avg episode reward: [(0, '5.889')] +[2023-03-11 10:49:27,421][00127] Fps is (10 sec: 3686.4, 60 sec: 3549.9, 300 sec: 3471.2). Total num frames: 1470464. Throughput: 0: 857.0. Samples: 368014. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-03-11 10:49:27,429][00127] Avg episode reward: [(0, '5.880')] +[2023-03-11 10:49:28,488][10432] Updated weights for policy 0, policy_version 360 (0.0019) +[2023-03-11 10:49:32,422][00127] Fps is (10 sec: 3276.7, 60 sec: 3413.3, 300 sec: 3485.1). Total num frames: 1486848. Throughput: 0: 857.6. Samples: 370138. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-03-11 10:49:32,428][00127] Avg episode reward: [(0, '5.853')] +[2023-03-11 10:49:37,422][00127] Fps is (10 sec: 3686.3, 60 sec: 3413.3, 300 sec: 3512.8). Total num frames: 1507328. Throughput: 0: 853.4. Samples: 376530. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-03-11 10:49:37,430][00127] Avg episode reward: [(0, '6.024')] +[2023-03-11 10:49:37,433][10419] Saving new best policy, reward=6.024! +[2023-03-11 10:49:38,873][10432] Updated weights for policy 0, policy_version 370 (0.0018) +[2023-03-11 10:49:42,422][00127] Fps is (10 sec: 3686.4, 60 sec: 3481.6, 300 sec: 3512.8). Total num frames: 1523712. Throughput: 0: 848.0. Samples: 382412. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-03-11 10:49:42,424][00127] Avg episode reward: [(0, '6.072')] +[2023-03-11 10:49:42,487][10419] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000373_1527808.pth... +[2023-03-11 10:49:42,595][10419] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000168_688128.pth +[2023-03-11 10:49:42,620][10419] Saving new best policy, reward=6.072! +[2023-03-11 10:49:47,424][00127] Fps is (10 sec: 2866.5, 60 sec: 3413.2, 300 sec: 3471.2). Total num frames: 1536000. Throughput: 0: 845.0. Samples: 384046. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-03-11 10:49:47,430][00127] Avg episode reward: [(0, '5.837')] +[2023-03-11 10:49:52,425][00127] Fps is (10 sec: 2456.9, 60 sec: 3208.5, 300 sec: 3443.4). Total num frames: 1548288. Throughput: 0: 847.8. Samples: 387566. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-03-11 10:49:52,432][00127] Avg episode reward: [(0, '6.200')] +[2023-03-11 10:49:52,441][10419] Saving new best policy, reward=6.200! +[2023-03-11 10:49:54,842][10432] Updated weights for policy 0, policy_version 380 (0.0011) +[2023-03-11 10:49:57,421][00127] Fps is (10 sec: 2868.0, 60 sec: 3208.5, 300 sec: 3443.5). Total num frames: 1564672. Throughput: 0: 847.0. Samples: 391696. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-03-11 10:49:57,424][00127] Avg episode reward: [(0, '6.145')] +[2023-03-11 10:50:02,422][00127] Fps is (10 sec: 3687.6, 60 sec: 3345.1, 300 sec: 3457.3). Total num frames: 1585152. Throughput: 0: 848.1. Samples: 395132. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-03-11 10:50:02,424][00127] Avg episode reward: [(0, '5.874')] +[2023-03-11 10:50:04,276][10432] Updated weights for policy 0, policy_version 390 (0.0023) +[2023-03-11 10:50:07,421][00127] Fps is (10 sec: 4505.6, 60 sec: 3549.9, 300 sec: 3457.3). Total num frames: 1609728. Throughput: 0: 852.1. Samples: 402084. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-03-11 10:50:07,424][00127] Avg episode reward: [(0, '5.623')] +[2023-03-11 10:50:12,422][00127] Fps is (10 sec: 3686.4, 60 sec: 3481.6, 300 sec: 3443.4). Total num frames: 1622016. Throughput: 0: 854.7. Samples: 406476. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-03-11 10:50:12,426][00127] Avg episode reward: [(0, '5.986')] +[2023-03-11 10:50:16,571][10432] Updated weights for policy 0, policy_version 400 (0.0030) +[2023-03-11 10:50:17,422][00127] Fps is (10 sec: 2867.2, 60 sec: 3413.3, 300 sec: 3457.3). Total num frames: 1638400. Throughput: 0: 857.2. Samples: 408712. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-03-11 10:50:17,424][00127] Avg episode reward: [(0, '6.329')] +[2023-03-11 10:50:17,490][10419] Saving new best policy, reward=6.329! +[2023-03-11 10:50:22,422][00127] Fps is (10 sec: 4096.0, 60 sec: 3481.6, 300 sec: 3512.8). Total num frames: 1662976. Throughput: 0: 855.0. Samples: 415004. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-03-11 10:50:22,424][00127] Avg episode reward: [(0, '6.465')] +[2023-03-11 10:50:22,435][10419] Saving new best policy, reward=6.465! +[2023-03-11 10:50:26,360][10432] Updated weights for policy 0, policy_version 410 (0.0021) +[2023-03-11 10:50:27,423][00127] Fps is (10 sec: 4095.2, 60 sec: 3481.5, 300 sec: 3498.9). Total num frames: 1679360. Throughput: 0: 851.5. Samples: 420730. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-03-11 10:50:27,428][00127] Avg episode reward: [(0, '6.425')] +[2023-03-11 10:50:32,422][00127] Fps is (10 sec: 2867.1, 60 sec: 3413.3, 300 sec: 3471.2). Total num frames: 1691648. Throughput: 0: 852.1. Samples: 422390. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-03-11 10:50:32,430][00127] Avg episode reward: [(0, '6.741')] +[2023-03-11 10:50:32,440][10419] Saving new best policy, reward=6.741! +[2023-03-11 10:50:37,425][00127] Fps is (10 sec: 2047.7, 60 sec: 3208.4, 300 sec: 3429.5). Total num frames: 1699840. Throughput: 0: 844.3. Samples: 425558. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-03-11 10:50:37,428][00127] Avg episode reward: [(0, '6.974')] +[2023-03-11 10:50:37,431][10419] Saving new best policy, reward=6.974! +[2023-03-11 10:50:42,422][00127] Fps is (10 sec: 2457.6, 60 sec: 3208.5, 300 sec: 3429.5). Total num frames: 1716224. Throughput: 0: 851.9. Samples: 430034. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-03-11 10:50:42,424][00127] Avg episode reward: [(0, '6.757')] +[2023-03-11 10:50:42,672][10432] Updated weights for policy 0, policy_version 420 (0.0021) +[2023-03-11 10:50:47,422][00127] Fps is (10 sec: 4097.3, 60 sec: 3413.5, 300 sec: 3457.3). Total num frames: 1740800. Throughput: 0: 853.0. Samples: 433518. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-03-11 10:50:47,424][00127] Avg episode reward: [(0, '6.994')] +[2023-03-11 10:50:47,427][10419] Saving new best policy, reward=6.994! +[2023-03-11 10:50:51,602][10432] Updated weights for policy 0, policy_version 430 (0.0014) +[2023-03-11 10:50:52,422][00127] Fps is (10 sec: 4505.3, 60 sec: 3550.0, 300 sec: 3443.5). Total num frames: 1761280. Throughput: 0: 850.4. Samples: 440354. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-03-11 10:50:52,426][00127] Avg episode reward: [(0, '7.058')] +[2023-03-11 10:50:52,437][10419] Saving new best policy, reward=7.058! +[2023-03-11 10:50:57,422][00127] Fps is (10 sec: 3276.8, 60 sec: 3481.6, 300 sec: 3443.4). Total num frames: 1773568. Throughput: 0: 849.2. Samples: 444690. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-03-11 10:50:57,428][00127] Avg episode reward: [(0, '6.767')] +[2023-03-11 10:51:02,422][00127] Fps is (10 sec: 3277.0, 60 sec: 3481.6, 300 sec: 3471.2). Total num frames: 1794048. Throughput: 0: 848.5. Samples: 446896. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-03-11 10:51:02,424][00127] Avg episode reward: [(0, '6.804')] +[2023-03-11 10:51:04,095][10432] Updated weights for policy 0, policy_version 440 (0.0022) +[2023-03-11 10:51:07,421][00127] Fps is (10 sec: 4096.0, 60 sec: 3413.3, 300 sec: 3499.0). Total num frames: 1814528. Throughput: 0: 855.7. Samples: 453512. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-03-11 10:51:07,424][00127] Avg episode reward: [(0, '7.161')] +[2023-03-11 10:51:07,426][10419] Saving new best policy, reward=7.161! +[2023-03-11 10:51:12,422][00127] Fps is (10 sec: 4096.1, 60 sec: 3549.9, 300 sec: 3499.0). Total num frames: 1835008. Throughput: 0: 853.8. Samples: 459150. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-03-11 10:51:12,424][00127] Avg episode reward: [(0, '7.404')] +[2023-03-11 10:51:12,439][10419] Saving new best policy, reward=7.404! +[2023-03-11 10:51:15,813][10432] Updated weights for policy 0, policy_version 450 (0.0011) +[2023-03-11 10:51:17,421][00127] Fps is (10 sec: 2867.2, 60 sec: 3413.3, 300 sec: 3443.4). Total num frames: 1843200. Throughput: 0: 854.2. Samples: 460828. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-03-11 10:51:17,426][00127] Avg episode reward: [(0, '7.995')] +[2023-03-11 10:51:17,429][10419] Saving new best policy, reward=7.995! +[2023-03-11 10:51:22,422][00127] Fps is (10 sec: 2048.0, 60 sec: 3208.5, 300 sec: 3429.5). Total num frames: 1855488. Throughput: 0: 860.1. Samples: 464258. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-03-11 10:51:22,427][00127] Avg episode reward: [(0, '7.906')] +[2023-03-11 10:51:27,422][00127] Fps is (10 sec: 2867.2, 60 sec: 3208.6, 300 sec: 3429.6). Total num frames: 1871872. Throughput: 0: 864.9. Samples: 468952. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-03-11 10:51:27,423][00127] Avg episode reward: [(0, '8.223')] +[2023-03-11 10:51:27,433][10419] Saving new best policy, reward=8.223! +[2023-03-11 10:51:29,352][10432] Updated weights for policy 0, policy_version 460 (0.0023) +[2023-03-11 10:51:32,422][00127] Fps is (10 sec: 4096.0, 60 sec: 3413.3, 300 sec: 3443.4). Total num frames: 1896448. Throughput: 0: 862.3. Samples: 472320. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-03-11 10:51:32,424][00127] Avg episode reward: [(0, '7.576')] +[2023-03-11 10:51:37,422][00127] Fps is (10 sec: 4505.4, 60 sec: 3618.3, 300 sec: 3429.5). Total num frames: 1916928. Throughput: 0: 860.7. Samples: 479086. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-03-11 10:51:37,428][00127] Avg episode reward: [(0, '7.557')] +[2023-03-11 10:51:39,579][10432] Updated weights for policy 0, policy_version 470 (0.0018) +[2023-03-11 10:51:42,422][00127] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3429.5). Total num frames: 1929216. Throughput: 0: 859.7. Samples: 483376. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-03-11 10:51:42,423][00127] Avg episode reward: [(0, '7.060')] +[2023-03-11 10:51:42,445][10419] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000471_1929216.pth... +[2023-03-11 10:51:42,630][10419] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000269_1101824.pth +[2023-03-11 10:51:47,421][00127] Fps is (10 sec: 2867.3, 60 sec: 3413.3, 300 sec: 3457.3). Total num frames: 1945600. Throughput: 0: 856.6. Samples: 485442. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-03-11 10:51:47,428][00127] Avg episode reward: [(0, '7.497')] +[2023-03-11 10:51:51,115][10432] Updated weights for policy 0, policy_version 480 (0.0028) +[2023-03-11 10:51:52,422][00127] Fps is (10 sec: 4096.0, 60 sec: 3481.6, 300 sec: 3485.1). Total num frames: 1970176. Throughput: 0: 854.8. Samples: 491980. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-03-11 10:51:52,430][00127] Avg episode reward: [(0, '8.270')] +[2023-03-11 10:51:52,439][10419] Saving new best policy, reward=8.270! +[2023-03-11 10:51:57,422][00127] Fps is (10 sec: 4096.0, 60 sec: 3549.9, 300 sec: 3457.3). Total num frames: 1986560. Throughput: 0: 849.4. Samples: 497374. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-03-11 10:51:57,425][00127] Avg episode reward: [(0, '8.549')] +[2023-03-11 10:51:57,433][10419] Saving new best policy, reward=8.549! +[2023-03-11 10:52:02,424][00127] Fps is (10 sec: 2866.6, 60 sec: 3413.2, 300 sec: 3429.5). Total num frames: 1998848. Throughput: 0: 848.7. Samples: 499020. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-03-11 10:52:02,433][00127] Avg episode reward: [(0, '8.620')] +[2023-03-11 10:52:02,454][10419] Saving new best policy, reward=8.620! +[2023-03-11 10:52:05,834][10432] Updated weights for policy 0, policy_version 490 (0.0037) +[2023-03-11 10:52:07,424][00127] Fps is (10 sec: 2047.5, 60 sec: 3208.4, 300 sec: 3401.7). Total num frames: 2007040. Throughput: 0: 846.7. Samples: 502362. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-03-11 10:52:07,426][00127] Avg episode reward: [(0, '8.877')] +[2023-03-11 10:52:07,428][10419] Saving new best policy, reward=8.877! +[2023-03-11 10:52:12,422][00127] Fps is (10 sec: 2867.8, 60 sec: 3208.5, 300 sec: 3415.6). Total num frames: 2027520. Throughput: 0: 849.6. Samples: 507186. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-03-11 10:52:12,424][00127] Avg episode reward: [(0, '9.548')] +[2023-03-11 10:52:12,434][10419] Saving new best policy, reward=9.548! +[2023-03-11 10:52:16,996][10432] Updated weights for policy 0, policy_version 500 (0.0021) +[2023-03-11 10:52:17,422][00127] Fps is (10 sec: 4096.9, 60 sec: 3413.3, 300 sec: 3415.6). Total num frames: 2048000. Throughput: 0: 847.8. Samples: 510472. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-03-11 10:52:17,429][00127] Avg episode reward: [(0, '9.986')] +[2023-03-11 10:52:17,435][10419] Saving new best policy, reward=9.986! +[2023-03-11 10:52:22,422][00127] Fps is (10 sec: 4095.7, 60 sec: 3549.8, 300 sec: 3415.7). Total num frames: 2068480. Throughput: 0: 836.2. Samples: 516716. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-03-11 10:52:22,430][00127] Avg episode reward: [(0, '10.646')] +[2023-03-11 10:52:22,442][10419] Saving new best policy, reward=10.646! +[2023-03-11 10:52:27,421][00127] Fps is (10 sec: 3276.8, 60 sec: 3481.6, 300 sec: 3415.6). Total num frames: 2080768. Throughput: 0: 834.0. Samples: 520904. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-03-11 10:52:27,424][00127] Avg episode reward: [(0, '10.874')] +[2023-03-11 10:52:27,425][10419] Saving new best policy, reward=10.874! +[2023-03-11 10:52:29,507][10432] Updated weights for policy 0, policy_version 510 (0.0051) +[2023-03-11 10:52:32,422][00127] Fps is (10 sec: 2867.5, 60 sec: 3345.1, 300 sec: 3429.5). Total num frames: 2097152. Throughput: 0: 837.6. Samples: 523134. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-03-11 10:52:32,428][00127] Avg episode reward: [(0, '10.626')] +[2023-03-11 10:52:37,422][00127] Fps is (10 sec: 4095.9, 60 sec: 3413.3, 300 sec: 3457.3). Total num frames: 2121728. Throughput: 0: 838.2. Samples: 529700. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-03-11 10:52:37,424][00127] Avg episode reward: [(0, '10.222')] +[2023-03-11 10:52:39,017][10432] Updated weights for policy 0, policy_version 520 (0.0025) +[2023-03-11 10:52:42,428][00127] Fps is (10 sec: 3684.1, 60 sec: 3413.0, 300 sec: 3415.6). Total num frames: 2134016. Throughput: 0: 830.9. Samples: 534768. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-03-11 10:52:42,432][00127] Avg episode reward: [(0, '10.796')] +[2023-03-11 10:52:47,425][00127] Fps is (10 sec: 2456.9, 60 sec: 3344.9, 300 sec: 3387.8). Total num frames: 2146304. Throughput: 0: 831.8. Samples: 536452. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-03-11 10:52:47,427][00127] Avg episode reward: [(0, '10.387')] +[2023-03-11 10:52:52,422][00127] Fps is (10 sec: 2459.1, 60 sec: 3140.3, 300 sec: 3387.9). Total num frames: 2158592. Throughput: 0: 833.2. Samples: 539854. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-03-11 10:52:52,427][00127] Avg episode reward: [(0, '10.765')] +[2023-03-11 10:52:55,463][10432] Updated weights for policy 0, policy_version 530 (0.0011) +[2023-03-11 10:52:57,422][00127] Fps is (10 sec: 3277.9, 60 sec: 3208.5, 300 sec: 3401.8). Total num frames: 2179072. Throughput: 0: 847.6. Samples: 545330. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-03-11 10:52:57,424][00127] Avg episode reward: [(0, '12.312')] +[2023-03-11 10:52:57,427][10419] Saving new best policy, reward=12.312! +[2023-03-11 10:53:02,422][00127] Fps is (10 sec: 4505.5, 60 sec: 3413.4, 300 sec: 3401.8). Total num frames: 2203648. Throughput: 0: 853.2. Samples: 548864. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-03-11 10:53:02,424][00127] Avg episode reward: [(0, '12.806')] +[2023-03-11 10:53:02,449][10419] Saving new best policy, reward=12.806! +[2023-03-11 10:53:04,071][10432] Updated weights for policy 0, policy_version 540 (0.0011) +[2023-03-11 10:53:07,423][00127] Fps is (10 sec: 4095.5, 60 sec: 3549.9, 300 sec: 3401.8). Total num frames: 2220032. Throughput: 0: 855.9. Samples: 555232. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-03-11 10:53:07,427][00127] Avg episode reward: [(0, '12.842')] +[2023-03-11 10:53:07,430][10419] Saving new best policy, reward=12.842! +[2023-03-11 10:53:12,422][00127] Fps is (10 sec: 3276.8, 60 sec: 3481.6, 300 sec: 3415.6). Total num frames: 2236416. Throughput: 0: 859.2. Samples: 559570. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-03-11 10:53:12,427][00127] Avg episode reward: [(0, '12.815')] +[2023-03-11 10:53:16,204][10432] Updated weights for policy 0, policy_version 550 (0.0015) +[2023-03-11 10:53:17,422][00127] Fps is (10 sec: 3686.9, 60 sec: 3481.6, 300 sec: 3443.4). Total num frames: 2256896. Throughput: 0: 870.6. Samples: 562310. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-03-11 10:53:17,424][00127] Avg episode reward: [(0, '12.628')] +[2023-03-11 10:53:22,421][00127] Fps is (10 sec: 4505.7, 60 sec: 3549.9, 300 sec: 3471.2). Total num frames: 2281472. Throughput: 0: 882.0. Samples: 569392. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-03-11 10:53:22,424][00127] Avg episode reward: [(0, '12.722')] +[2023-03-11 10:53:26,898][10432] Updated weights for policy 0, policy_version 560 (0.0019) +[2023-03-11 10:53:27,425][00127] Fps is (10 sec: 3685.2, 60 sec: 3549.7, 300 sec: 3429.5). Total num frames: 2293760. Throughput: 0: 873.3. Samples: 574062. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-03-11 10:53:27,427][00127] Avg episode reward: [(0, '13.184')] +[2023-03-11 10:53:27,432][10419] Saving new best policy, reward=13.184! +[2023-03-11 10:53:32,422][00127] Fps is (10 sec: 2457.6, 60 sec: 3481.6, 300 sec: 3401.8). Total num frames: 2306048. Throughput: 0: 873.9. Samples: 575774. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-03-11 10:53:32,431][00127] Avg episode reward: [(0, '14.129')] +[2023-03-11 10:53:32,450][10419] Saving new best policy, reward=14.129! +[2023-03-11 10:53:37,422][00127] Fps is (10 sec: 2458.4, 60 sec: 3276.8, 300 sec: 3401.8). Total num frames: 2318336. Throughput: 0: 876.6. Samples: 579300. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-03-11 10:53:37,425][00127] Avg episode reward: [(0, '14.376')] +[2023-03-11 10:53:37,427][10419] Saving new best policy, reward=14.376! +[2023-03-11 10:53:40,776][10432] Updated weights for policy 0, policy_version 570 (0.0026) +[2023-03-11 10:53:42,422][00127] Fps is (10 sec: 3276.8, 60 sec: 3413.7, 300 sec: 3415.6). Total num frames: 2338816. Throughput: 0: 893.8. Samples: 585552. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-03-11 10:53:42,424][00127] Avg episode reward: [(0, '14.762')] +[2023-03-11 10:53:42,440][10419] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000571_2338816.pth... +[2023-03-11 10:53:42,578][10419] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000373_1527808.pth +[2023-03-11 10:53:42,607][10419] Saving new best policy, reward=14.762! +[2023-03-11 10:53:47,422][00127] Fps is (10 sec: 4505.6, 60 sec: 3618.3, 300 sec: 3415.7). Total num frames: 2363392. Throughput: 0: 891.5. Samples: 588980. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-03-11 10:53:47,424][00127] Avg episode reward: [(0, '15.993')] +[2023-03-11 10:53:47,427][10419] Saving new best policy, reward=15.993! +[2023-03-11 10:53:50,883][10432] Updated weights for policy 0, policy_version 580 (0.0024) +[2023-03-11 10:53:52,422][00127] Fps is (10 sec: 4095.7, 60 sec: 3686.3, 300 sec: 3415.6). Total num frames: 2379776. Throughput: 0: 877.4. Samples: 594716. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-03-11 10:53:52,427][00127] Avg episode reward: [(0, '16.361')] +[2023-03-11 10:53:52,441][10419] Saving new best policy, reward=16.361! +[2023-03-11 10:53:57,423][00127] Fps is (10 sec: 2866.9, 60 sec: 3549.8, 300 sec: 3415.6). Total num frames: 2392064. Throughput: 0: 881.1. Samples: 599218. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-03-11 10:53:57,425][00127] Avg episode reward: [(0, '17.334')] +[2023-03-11 10:53:57,427][10419] Saving new best policy, reward=17.334! +[2023-03-11 10:54:02,366][10432] Updated weights for policy 0, policy_version 590 (0.0029) +[2023-03-11 10:54:02,422][00127] Fps is (10 sec: 3686.7, 60 sec: 3549.9, 300 sec: 3457.3). Total num frames: 2416640. Throughput: 0: 887.9. Samples: 602266. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-03-11 10:54:02,424][00127] Avg episode reward: [(0, '17.605')] +[2023-03-11 10:54:02,433][10419] Saving new best policy, reward=17.605! +[2023-03-11 10:54:07,421][00127] Fps is (10 sec: 4506.1, 60 sec: 3618.2, 300 sec: 3471.2). Total num frames: 2437120. Throughput: 0: 878.3. Samples: 608914. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-03-11 10:54:07,424][00127] Avg episode reward: [(0, '18.367')] +[2023-03-11 10:54:07,428][10419] Saving new best policy, reward=18.367! +[2023-03-11 10:54:12,425][00127] Fps is (10 sec: 3275.7, 60 sec: 3549.7, 300 sec: 3443.4). Total num frames: 2449408. Throughput: 0: 871.5. Samples: 613280. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-03-11 10:54:12,427][00127] Avg episode reward: [(0, '18.678')] +[2023-03-11 10:54:12,446][10419] Saving new best policy, reward=18.678! +[2023-03-11 10:54:14,805][10432] Updated weights for policy 0, policy_version 600 (0.0041) +[2023-03-11 10:54:17,424][00127] Fps is (10 sec: 2457.1, 60 sec: 3413.2, 300 sec: 3415.6). Total num frames: 2461696. Throughput: 0: 873.2. Samples: 615072. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-03-11 10:54:17,426][00127] Avg episode reward: [(0, '17.589')] +[2023-03-11 10:54:22,422][00127] Fps is (10 sec: 2458.3, 60 sec: 3208.5, 300 sec: 3401.8). Total num frames: 2473984. Throughput: 0: 878.4. Samples: 618828. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-03-11 10:54:22,426][00127] Avg episode reward: [(0, '17.310')] +[2023-03-11 10:54:27,333][10432] Updated weights for policy 0, policy_version 610 (0.0017) +[2023-03-11 10:54:27,422][00127] Fps is (10 sec: 3687.2, 60 sec: 3413.5, 300 sec: 3429.5). Total num frames: 2498560. Throughput: 0: 871.9. Samples: 624786. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-03-11 10:54:27,429][00127] Avg episode reward: [(0, '18.097')] +[2023-03-11 10:54:32,422][00127] Fps is (10 sec: 4505.7, 60 sec: 3549.9, 300 sec: 3429.5). Total num frames: 2519040. Throughput: 0: 871.0. Samples: 628176. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-03-11 10:54:32,431][00127] Avg episode reward: [(0, '19.194')] +[2023-03-11 10:54:32,441][10419] Saving new best policy, reward=19.194! +[2023-03-11 10:54:37,423][00127] Fps is (10 sec: 3685.8, 60 sec: 3618.0, 300 sec: 3429.5). Total num frames: 2535424. Throughput: 0: 863.8. Samples: 633588. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-03-11 10:54:37,425][00127] Avg episode reward: [(0, '20.878')] +[2023-03-11 10:54:37,429][10419] Saving new best policy, reward=20.878! +[2023-03-11 10:54:38,715][10432] Updated weights for policy 0, policy_version 620 (0.0041) +[2023-03-11 10:54:42,422][00127] Fps is (10 sec: 2867.2, 60 sec: 3481.6, 300 sec: 3429.6). Total num frames: 2547712. Throughput: 0: 858.0. Samples: 637828. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-03-11 10:54:42,430][00127] Avg episode reward: [(0, '20.556')] +[2023-03-11 10:54:47,422][00127] Fps is (10 sec: 3686.9, 60 sec: 3481.6, 300 sec: 3471.2). Total num frames: 2572288. Throughput: 0: 860.7. Samples: 640998. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-03-11 10:54:47,424][00127] Avg episode reward: [(0, '21.656')] +[2023-03-11 10:54:47,431][10419] Saving new best policy, reward=21.656! +[2023-03-11 10:54:49,080][10432] Updated weights for policy 0, policy_version 630 (0.0031) +[2023-03-11 10:54:52,425][00127] Fps is (10 sec: 4504.1, 60 sec: 3549.7, 300 sec: 3485.0). Total num frames: 2592768. Throughput: 0: 866.3. Samples: 647900. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-03-11 10:54:52,431][00127] Avg episode reward: [(0, '21.258')] +[2023-03-11 10:54:57,422][00127] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3457.3). Total num frames: 2605056. Throughput: 0: 860.2. Samples: 651986. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-03-11 10:54:57,424][00127] Avg episode reward: [(0, '20.747')] +[2023-03-11 10:55:02,421][00127] Fps is (10 sec: 2458.4, 60 sec: 3345.1, 300 sec: 3415.6). Total num frames: 2617344. Throughput: 0: 858.8. Samples: 653718. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-03-11 10:55:02,424][00127] Avg episode reward: [(0, '21.291')] +[2023-03-11 10:55:03,945][10432] Updated weights for policy 0, policy_version 640 (0.0021) +[2023-03-11 10:55:07,422][00127] Fps is (10 sec: 2457.7, 60 sec: 3208.5, 300 sec: 3415.6). Total num frames: 2629632. Throughput: 0: 856.0. Samples: 657346. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-03-11 10:55:07,430][00127] Avg episode reward: [(0, '19.891')] +[2023-03-11 10:55:12,422][00127] Fps is (10 sec: 3686.4, 60 sec: 3413.5, 300 sec: 3443.4). Total num frames: 2654208. Throughput: 0: 877.7. Samples: 664282. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-03-11 10:55:12,424][00127] Avg episode reward: [(0, '20.538')] +[2023-03-11 10:55:13,659][10432] Updated weights for policy 0, policy_version 650 (0.0023) +[2023-03-11 10:55:17,421][00127] Fps is (10 sec: 4915.2, 60 sec: 3618.3, 300 sec: 3443.4). Total num frames: 2678784. Throughput: 0: 882.8. Samples: 667904. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-03-11 10:55:17,425][00127] Avg episode reward: [(0, '18.164')] +[2023-03-11 10:55:22,422][00127] Fps is (10 sec: 4096.0, 60 sec: 3686.4, 300 sec: 3443.4). Total num frames: 2695168. Throughput: 0: 882.7. Samples: 673308. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-03-11 10:55:22,427][00127] Avg episode reward: [(0, '16.979')] +[2023-03-11 10:55:25,120][10432] Updated weights for policy 0, policy_version 660 (0.0031) +[2023-03-11 10:55:27,422][00127] Fps is (10 sec: 3276.7, 60 sec: 3549.9, 300 sec: 3457.3). Total num frames: 2711552. Throughput: 0: 894.2. Samples: 678068. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-03-11 10:55:27,424][00127] Avg episode reward: [(0, '16.172')] +[2023-03-11 10:55:32,422][00127] Fps is (10 sec: 3686.3, 60 sec: 3549.9, 300 sec: 3499.0). Total num frames: 2732032. Throughput: 0: 902.7. Samples: 681618. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-03-11 10:55:32,424][00127] Avg episode reward: [(0, '16.115')] +[2023-03-11 10:55:34,334][10432] Updated weights for policy 0, policy_version 670 (0.0027) +[2023-03-11 10:55:37,422][00127] Fps is (10 sec: 4096.1, 60 sec: 3618.2, 300 sec: 3512.8). Total num frames: 2752512. Throughput: 0: 904.6. Samples: 688604. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-03-11 10:55:37,430][00127] Avg episode reward: [(0, '17.313')] +[2023-03-11 10:55:42,422][00127] Fps is (10 sec: 3276.9, 60 sec: 3618.1, 300 sec: 3471.2). Total num frames: 2764800. Throughput: 0: 896.5. Samples: 692328. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-03-11 10:55:42,427][00127] Avg episode reward: [(0, '17.475')] +[2023-03-11 10:55:42,435][10419] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000675_2764800.pth... +[2023-03-11 10:55:42,619][10419] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000471_1929216.pth +[2023-03-11 10:55:47,422][00127] Fps is (10 sec: 2457.6, 60 sec: 3413.3, 300 sec: 3443.4). Total num frames: 2777088. Throughput: 0: 896.3. Samples: 694050. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-03-11 10:55:47,426][00127] Avg episode reward: [(0, '18.703')] +[2023-03-11 10:55:49,527][10432] Updated weights for policy 0, policy_version 680 (0.0023) +[2023-03-11 10:55:52,422][00127] Fps is (10 sec: 2867.2, 60 sec: 3345.2, 300 sec: 3457.3). Total num frames: 2793472. Throughput: 0: 903.3. Samples: 697996. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-03-11 10:55:52,426][00127] Avg episode reward: [(0, '19.879')] +[2023-03-11 10:55:57,422][00127] Fps is (10 sec: 4096.0, 60 sec: 3549.9, 300 sec: 3471.2). Total num frames: 2818048. Throughput: 0: 908.0. Samples: 705144. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-03-11 10:55:57,424][00127] Avg episode reward: [(0, '19.821')] +[2023-03-11 10:55:58,686][10432] Updated weights for policy 0, policy_version 690 (0.0018) +[2023-03-11 10:56:02,422][00127] Fps is (10 sec: 4505.6, 60 sec: 3686.4, 300 sec: 3471.2). Total num frames: 2838528. Throughput: 0: 906.8. Samples: 708712. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-03-11 10:56:02,425][00127] Avg episode reward: [(0, '19.093')] +[2023-03-11 10:56:07,421][00127] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3457.3). Total num frames: 2854912. Throughput: 0: 901.6. Samples: 713880. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-03-11 10:56:07,433][00127] Avg episode reward: [(0, '19.490')] +[2023-03-11 10:56:10,407][10432] Updated weights for policy 0, policy_version 700 (0.0020) +[2023-03-11 10:56:12,422][00127] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3485.1). Total num frames: 2871296. Throughput: 0: 907.0. Samples: 718884. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-03-11 10:56:12,430][00127] Avg episode reward: [(0, '18.658')] +[2023-03-11 10:56:17,421][00127] Fps is (10 sec: 4096.0, 60 sec: 3618.1, 300 sec: 3526.7). Total num frames: 2895872. Throughput: 0: 908.1. Samples: 722482. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-03-11 10:56:17,427][00127] Avg episode reward: [(0, '18.078')] +[2023-03-11 10:56:19,272][10432] Updated weights for policy 0, policy_version 710 (0.0016) +[2023-03-11 10:56:22,424][00127] Fps is (10 sec: 4914.1, 60 sec: 3754.5, 300 sec: 3554.5). Total num frames: 2920448. Throughput: 0: 910.8. Samples: 729590. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-03-11 10:56:22,426][00127] Avg episode reward: [(0, '19.309')] +[2023-03-11 10:56:27,426][00127] Fps is (10 sec: 3275.4, 60 sec: 3617.9, 300 sec: 3498.9). Total num frames: 2928640. Throughput: 0: 912.4. Samples: 733390. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-03-11 10:56:27,435][00127] Avg episode reward: [(0, '19.647')] +[2023-03-11 10:56:32,422][00127] Fps is (10 sec: 2048.5, 60 sec: 3481.6, 300 sec: 3471.2). Total num frames: 2940928. Throughput: 0: 913.7. Samples: 735166. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-03-11 10:56:32,425][00127] Avg episode reward: [(0, '19.190')] +[2023-03-11 10:56:34,218][10432] Updated weights for policy 0, policy_version 720 (0.0011) +[2023-03-11 10:56:37,421][00127] Fps is (10 sec: 2868.4, 60 sec: 3413.3, 300 sec: 3485.1). Total num frames: 2957312. Throughput: 0: 911.9. Samples: 739030. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-03-11 10:56:37,424][00127] Avg episode reward: [(0, '19.202')] +[2023-03-11 10:56:42,422][00127] Fps is (10 sec: 4095.9, 60 sec: 3618.1, 300 sec: 3512.8). Total num frames: 2981888. Throughput: 0: 910.2. Samples: 746104. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-03-11 10:56:42,424][00127] Avg episode reward: [(0, '19.311')] +[2023-03-11 10:56:43,585][10432] Updated weights for policy 0, policy_version 730 (0.0016) +[2023-03-11 10:56:47,422][00127] Fps is (10 sec: 4505.6, 60 sec: 3754.7, 300 sec: 3499.0). Total num frames: 3002368. Throughput: 0: 912.3. Samples: 749766. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-03-11 10:56:47,424][00127] Avg episode reward: [(0, '20.453')] +[2023-03-11 10:56:52,422][00127] Fps is (10 sec: 3686.5, 60 sec: 3754.7, 300 sec: 3499.0). Total num frames: 3018752. Throughput: 0: 910.9. Samples: 754870. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-03-11 10:56:52,427][00127] Avg episode reward: [(0, '19.656')] +[2023-03-11 10:56:55,573][10432] Updated weights for policy 0, policy_version 740 (0.0027) +[2023-03-11 10:56:57,421][00127] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3512.9). Total num frames: 3035136. Throughput: 0: 913.1. Samples: 759974. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-03-11 10:56:57,424][00127] Avg episode reward: [(0, '20.575')] +[2023-03-11 10:57:02,422][00127] Fps is (10 sec: 4505.5, 60 sec: 3754.7, 300 sec: 3582.3). Total num frames: 3063808. Throughput: 0: 912.9. Samples: 763562. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-03-11 10:57:02,424][00127] Avg episode reward: [(0, '21.048')] +[2023-03-11 10:57:04,207][10432] Updated weights for policy 0, policy_version 750 (0.0019) +[2023-03-11 10:57:07,426][00127] Fps is (10 sec: 4913.1, 60 sec: 3822.7, 300 sec: 3582.2). Total num frames: 3084288. Throughput: 0: 913.2. Samples: 770686. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-03-11 10:57:07,429][00127] Avg episode reward: [(0, '20.743')] +[2023-03-11 10:57:12,421][00127] Fps is (10 sec: 3276.9, 60 sec: 3754.7, 300 sec: 3554.5). Total num frames: 3096576. Throughput: 0: 912.7. Samples: 774456. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-03-11 10:57:12,424][00127] Avg episode reward: [(0, '19.484')] +[2023-03-11 10:57:17,421][00127] Fps is (10 sec: 2458.6, 60 sec: 3549.9, 300 sec: 3526.7). Total num frames: 3108864. Throughput: 0: 912.8. Samples: 776242. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-03-11 10:57:17,431][00127] Avg episode reward: [(0, '19.839')] +[2023-03-11 10:57:18,883][10432] Updated weights for policy 0, policy_version 760 (0.0027) +[2023-03-11 10:57:22,422][00127] Fps is (10 sec: 2457.6, 60 sec: 3345.2, 300 sec: 3526.7). Total num frames: 3121152. Throughput: 0: 914.9. Samples: 780200. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-03-11 10:57:22,426][00127] Avg episode reward: [(0, '21.053')] +[2023-03-11 10:57:27,422][00127] Fps is (10 sec: 3686.4, 60 sec: 3618.4, 300 sec: 3554.5). Total num frames: 3145728. Throughput: 0: 915.3. Samples: 787292. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-03-11 10:57:27,424][00127] Avg episode reward: [(0, '20.955')] +[2023-03-11 10:57:28,374][10432] Updated weights for policy 0, policy_version 770 (0.0011) +[2023-03-11 10:57:32,422][00127] Fps is (10 sec: 4915.2, 60 sec: 3822.9, 300 sec: 3554.5). Total num frames: 3170304. Throughput: 0: 915.6. Samples: 790968. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-03-11 10:57:32,424][00127] Avg episode reward: [(0, '20.559')] +[2023-03-11 10:57:37,427][00127] Fps is (10 sec: 4093.9, 60 sec: 3822.6, 300 sec: 3568.4). Total num frames: 3186688. Throughput: 0: 915.9. Samples: 796092. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-03-11 10:57:37,429][00127] Avg episode reward: [(0, '22.166')] +[2023-03-11 10:57:37,432][10419] Saving new best policy, reward=22.166! +[2023-03-11 10:57:40,069][10432] Updated weights for policy 0, policy_version 780 (0.0016) +[2023-03-11 10:57:42,422][00127] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3582.3). Total num frames: 3203072. Throughput: 0: 913.5. Samples: 801080. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-03-11 10:57:42,429][00127] Avg episode reward: [(0, '23.619')] +[2023-03-11 10:57:42,439][10419] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000782_3203072.pth... +[2023-03-11 10:57:42,631][10419] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000571_2338816.pth +[2023-03-11 10:57:42,647][10419] Saving new best policy, reward=23.619! +[2023-03-11 10:57:47,422][00127] Fps is (10 sec: 3688.2, 60 sec: 3686.4, 300 sec: 3610.0). Total num frames: 3223552. Throughput: 0: 907.6. Samples: 804404. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-03-11 10:57:47,430][00127] Avg episode reward: [(0, '23.606')] +[2023-03-11 10:57:49,580][10432] Updated weights for policy 0, policy_version 790 (0.0019) +[2023-03-11 10:57:52,425][00127] Fps is (10 sec: 4094.7, 60 sec: 3754.5, 300 sec: 3610.0). Total num frames: 3244032. Throughput: 0: 905.7. Samples: 811440. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-03-11 10:57:52,427][00127] Avg episode reward: [(0, '24.107')] +[2023-03-11 10:57:52,439][10419] Saving new best policy, reward=24.107! +[2023-03-11 10:57:57,422][00127] Fps is (10 sec: 3276.9, 60 sec: 3686.4, 300 sec: 3568.4). Total num frames: 3256320. Throughput: 0: 903.5. Samples: 815114. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-03-11 10:57:57,427][00127] Avg episode reward: [(0, '23.820')] +[2023-03-11 10:58:02,423][00127] Fps is (10 sec: 2458.1, 60 sec: 3413.3, 300 sec: 3554.5). Total num frames: 3268608. Throughput: 0: 902.9. Samples: 816874. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-03-11 10:58:02,425][00127] Avg episode reward: [(0, '24.180')] +[2023-03-11 10:58:02,440][10419] Saving new best policy, reward=24.180! +[2023-03-11 10:58:04,575][10432] Updated weights for policy 0, policy_version 800 (0.0023) +[2023-03-11 10:58:07,422][00127] Fps is (10 sec: 2867.2, 60 sec: 3345.3, 300 sec: 3554.5). Total num frames: 3284992. Throughput: 0: 902.8. Samples: 820824. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-03-11 10:58:07,431][00127] Avg episode reward: [(0, '24.704')] +[2023-03-11 10:58:07,433][10419] Saving new best policy, reward=24.704! +[2023-03-11 10:58:12,422][00127] Fps is (10 sec: 4096.5, 60 sec: 3549.9, 300 sec: 3568.4). Total num frames: 3309568. Throughput: 0: 902.9. Samples: 827924. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-03-11 10:58:12,430][00127] Avg episode reward: [(0, '24.494')] +[2023-03-11 10:58:13,812][10432] Updated weights for policy 0, policy_version 810 (0.0011) +[2023-03-11 10:58:17,421][00127] Fps is (10 sec: 4505.6, 60 sec: 3686.4, 300 sec: 3554.5). Total num frames: 3330048. Throughput: 0: 900.0. Samples: 831466. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-03-11 10:58:17,424][00127] Avg episode reward: [(0, '24.662')] +[2023-03-11 10:58:22,425][00127] Fps is (10 sec: 3685.2, 60 sec: 3754.5, 300 sec: 3568.4). Total num frames: 3346432. Throughput: 0: 897.7. Samples: 836486. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-03-11 10:58:22,427][00127] Avg episode reward: [(0, '23.870')] +[2023-03-11 10:58:25,943][10432] Updated weights for policy 0, policy_version 820 (0.0011) +[2023-03-11 10:58:27,422][00127] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3582.3). Total num frames: 3362816. Throughput: 0: 897.9. Samples: 841484. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-03-11 10:58:27,429][00127] Avg episode reward: [(0, '24.678')] +[2023-03-11 10:58:32,422][00127] Fps is (10 sec: 4097.3, 60 sec: 3618.1, 300 sec: 3623.9). Total num frames: 3387392. Throughput: 0: 903.5. Samples: 845060. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-03-11 10:58:32,428][00127] Avg episode reward: [(0, '23.862')] +[2023-03-11 10:58:34,531][10432] Updated weights for policy 0, policy_version 830 (0.0017) +[2023-03-11 10:58:37,421][00127] Fps is (10 sec: 4505.6, 60 sec: 3686.7, 300 sec: 3623.9). Total num frames: 3407872. Throughput: 0: 902.1. Samples: 852030. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-03-11 10:58:37,424][00127] Avg episode reward: [(0, '25.372')] +[2023-03-11 10:58:37,432][10419] Saving new best policy, reward=25.372! +[2023-03-11 10:58:42,426][00127] Fps is (10 sec: 3275.5, 60 sec: 3617.9, 300 sec: 3582.2). Total num frames: 3420160. Throughput: 0: 899.2. Samples: 855580. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-03-11 10:58:42,428][00127] Avg episode reward: [(0, '26.051')] +[2023-03-11 10:58:42,446][10419] Saving new best policy, reward=26.051! +[2023-03-11 10:58:47,421][00127] Fps is (10 sec: 2457.6, 60 sec: 3481.6, 300 sec: 3568.4). Total num frames: 3432448. Throughput: 0: 899.8. Samples: 857362. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-03-11 10:58:47,424][00127] Avg episode reward: [(0, '26.076')] +[2023-03-11 10:58:47,428][10419] Saving new best policy, reward=26.076! +[2023-03-11 10:58:50,268][10432] Updated weights for policy 0, policy_version 840 (0.0039) +[2023-03-11 10:58:52,422][00127] Fps is (10 sec: 2868.3, 60 sec: 3413.5, 300 sec: 3582.3). Total num frames: 3448832. Throughput: 0: 904.2. Samples: 861514. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-03-11 10:58:52,424][00127] Avg episode reward: [(0, '23.956')] +[2023-03-11 10:58:57,421][00127] Fps is (10 sec: 4096.0, 60 sec: 3618.1, 300 sec: 3582.3). Total num frames: 3473408. Throughput: 0: 909.2. Samples: 868836. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-03-11 10:58:57,428][00127] Avg episode reward: [(0, '24.893')] +[2023-03-11 10:58:58,772][10432] Updated weights for policy 0, policy_version 850 (0.0015) +[2023-03-11 10:59:02,421][00127] Fps is (10 sec: 4505.7, 60 sec: 3754.8, 300 sec: 3582.3). Total num frames: 3493888. Throughput: 0: 912.8. Samples: 872540. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-03-11 10:59:02,431][00127] Avg episode reward: [(0, '23.839')] +[2023-03-11 10:59:07,421][00127] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3596.2). Total num frames: 3510272. Throughput: 0: 907.8. Samples: 877332. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-03-11 10:59:07,424][00127] Avg episode reward: [(0, '20.959')] +[2023-03-11 10:59:11,008][10432] Updated weights for policy 0, policy_version 860 (0.0020) +[2023-03-11 10:59:12,422][00127] Fps is (10 sec: 3276.7, 60 sec: 3618.1, 300 sec: 3610.1). Total num frames: 3526656. Throughput: 0: 909.6. Samples: 882418. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-03-11 10:59:12,424][00127] Avg episode reward: [(0, '20.596')] +[2023-03-11 10:59:17,422][00127] Fps is (10 sec: 4096.0, 60 sec: 3686.4, 300 sec: 3651.7). Total num frames: 3551232. Throughput: 0: 908.0. Samples: 885918. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-03-11 10:59:17,424][00127] Avg episode reward: [(0, '22.470')] +[2023-03-11 10:59:19,627][10432] Updated weights for policy 0, policy_version 870 (0.0011) +[2023-03-11 10:59:22,425][00127] Fps is (10 sec: 4094.8, 60 sec: 3686.4, 300 sec: 3623.9). Total num frames: 3567616. Throughput: 0: 896.8. Samples: 892388. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-03-11 10:59:22,434][00127] Avg episode reward: [(0, '21.842')] +[2023-03-11 10:59:27,422][00127] Fps is (10 sec: 2867.2, 60 sec: 3618.1, 300 sec: 3596.1). Total num frames: 3579904. Throughput: 0: 897.4. Samples: 895958. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-03-11 10:59:27,428][00127] Avg episode reward: [(0, '21.919')] +[2023-03-11 10:59:32,422][00127] Fps is (10 sec: 2458.3, 60 sec: 3413.3, 300 sec: 3582.3). Total num frames: 3592192. Throughput: 0: 898.0. Samples: 897774. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-03-11 10:59:32,424][00127] Avg episode reward: [(0, '21.872')] +[2023-03-11 10:59:35,358][10432] Updated weights for policy 0, policy_version 880 (0.0025) +[2023-03-11 10:59:37,422][00127] Fps is (10 sec: 3276.8, 60 sec: 3413.3, 300 sec: 3610.0). Total num frames: 3612672. Throughput: 0: 911.8. Samples: 902544. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-03-11 10:59:37,424][00127] Avg episode reward: [(0, '22.747')] +[2023-03-11 10:59:42,422][00127] Fps is (10 sec: 4505.7, 60 sec: 3618.4, 300 sec: 3610.0). Total num frames: 3637248. Throughput: 0: 909.0. Samples: 909742. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-03-11 10:59:42,424][00127] Avg episode reward: [(0, '24.138')] +[2023-03-11 10:59:42,434][10419] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000888_3637248.pth... +[2023-03-11 10:59:42,547][10419] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000675_2764800.pth +[2023-03-11 10:59:43,904][10432] Updated weights for policy 0, policy_version 890 (0.0021) +[2023-03-11 10:59:47,422][00127] Fps is (10 sec: 4505.4, 60 sec: 3754.6, 300 sec: 3610.1). Total num frames: 3657728. Throughput: 0: 905.0. Samples: 913266. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-03-11 10:59:47,424][00127] Avg episode reward: [(0, '23.499')] +[2023-03-11 10:59:52,424][00127] Fps is (10 sec: 3275.9, 60 sec: 3686.2, 300 sec: 3610.0). Total num frames: 3670016. Throughput: 0: 899.1. Samples: 917792. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-03-11 10:59:52,437][00127] Avg episode reward: [(0, '23.490')] +[2023-03-11 10:59:55,988][10432] Updated weights for policy 0, policy_version 900 (0.0051) +[2023-03-11 10:59:57,422][00127] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3637.8). Total num frames: 3690496. Throughput: 0: 910.9. Samples: 923408. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-03-11 10:59:57,428][00127] Avg episode reward: [(0, '22.427')] +[2023-03-11 11:00:02,422][00127] Fps is (10 sec: 4506.9, 60 sec: 3686.4, 300 sec: 3679.5). Total num frames: 3715072. Throughput: 0: 913.4. Samples: 927022. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-03-11 11:00:02,424][00127] Avg episode reward: [(0, '23.183')] +[2023-03-11 11:00:04,633][10432] Updated weights for policy 0, policy_version 910 (0.0011) +[2023-03-11 11:00:07,421][00127] Fps is (10 sec: 4096.2, 60 sec: 3686.4, 300 sec: 3651.7). Total num frames: 3731456. Throughput: 0: 908.0. Samples: 933244. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-03-11 11:00:07,429][00127] Avg episode reward: [(0, '23.897')] +[2023-03-11 11:00:12,422][00127] Fps is (10 sec: 2867.2, 60 sec: 3618.1, 300 sec: 3610.0). Total num frames: 3743744. Throughput: 0: 907.9. Samples: 936814. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-03-11 11:00:12,424][00127] Avg episode reward: [(0, '23.954')] +[2023-03-11 11:00:17,421][00127] Fps is (10 sec: 2457.6, 60 sec: 3413.3, 300 sec: 3596.1). Total num frames: 3756032. Throughput: 0: 905.7. Samples: 938530. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-03-11 11:00:17,427][00127] Avg episode reward: [(0, '24.607')] +[2023-03-11 11:00:20,289][10432] Updated weights for policy 0, policy_version 920 (0.0015) +[2023-03-11 11:00:22,424][00127] Fps is (10 sec: 3275.9, 60 sec: 3481.6, 300 sec: 3610.0). Total num frames: 3776512. Throughput: 0: 910.7. Samples: 943530. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-03-11 11:00:22,431][00127] Avg episode reward: [(0, '24.217')] +[2023-03-11 11:00:27,422][00127] Fps is (10 sec: 4505.5, 60 sec: 3686.4, 300 sec: 3623.9). Total num frames: 3801088. Throughput: 0: 911.3. Samples: 950752. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-03-11 11:00:27,424][00127] Avg episode reward: [(0, '25.470')] +[2023-03-11 11:00:28,984][10432] Updated weights for policy 0, policy_version 930 (0.0013) +[2023-03-11 11:00:32,422][00127] Fps is (10 sec: 4097.1, 60 sec: 3754.7, 300 sec: 3610.0). Total num frames: 3817472. Throughput: 0: 904.5. Samples: 953970. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-03-11 11:00:32,428][00127] Avg episode reward: [(0, '25.159')] +[2023-03-11 11:00:37,422][00127] Fps is (10 sec: 3276.7, 60 sec: 3686.4, 300 sec: 3623.9). Total num frames: 3833856. Throughput: 0: 904.9. Samples: 958510. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-03-11 11:00:37,425][00127] Avg episode reward: [(0, '25.359')] +[2023-03-11 11:00:41,042][10432] Updated weights for policy 0, policy_version 940 (0.0021) +[2023-03-11 11:00:42,422][00127] Fps is (10 sec: 3686.4, 60 sec: 3618.1, 300 sec: 3651.7). Total num frames: 3854336. Throughput: 0: 909.7. Samples: 964344. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-03-11 11:00:42,424][00127] Avg episode reward: [(0, '25.458')] +[2023-03-11 11:00:47,422][00127] Fps is (10 sec: 4505.9, 60 sec: 3686.4, 300 sec: 3679.5). Total num frames: 3878912. Throughput: 0: 907.5. Samples: 967858. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-03-11 11:00:47,424][00127] Avg episode reward: [(0, '25.519')] +[2023-03-11 11:00:50,461][10432] Updated weights for policy 0, policy_version 950 (0.0024) +[2023-03-11 11:00:52,425][00127] Fps is (10 sec: 4094.7, 60 sec: 3754.7, 300 sec: 3651.7). Total num frames: 3895296. Throughput: 0: 896.0. Samples: 973568. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-03-11 11:00:52,432][00127] Avg episode reward: [(0, '27.270')] +[2023-03-11 11:00:52,444][10419] Saving new best policy, reward=27.270! +[2023-03-11 11:00:57,422][00127] Fps is (10 sec: 2457.6, 60 sec: 3549.9, 300 sec: 3610.0). Total num frames: 3903488. Throughput: 0: 895.9. Samples: 977128. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-03-11 11:00:57,428][00127] Avg episode reward: [(0, '26.059')] +[2023-03-11 11:01:02,423][00127] Fps is (10 sec: 2458.1, 60 sec: 3413.3, 300 sec: 3610.0). Total num frames: 3919872. Throughput: 0: 897.7. Samples: 978926. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-03-11 11:01:02,426][00127] Avg episode reward: [(0, '25.930')] +[2023-03-11 11:01:05,740][10432] Updated weights for policy 0, policy_version 960 (0.0037) +[2023-03-11 11:01:07,422][00127] Fps is (10 sec: 3686.4, 60 sec: 3481.6, 300 sec: 3623.9). Total num frames: 3940352. Throughput: 0: 898.5. Samples: 983958. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-03-11 11:01:07,424][00127] Avg episode reward: [(0, '24.102')] +[2023-03-11 11:01:12,422][00127] Fps is (10 sec: 4506.1, 60 sec: 3686.4, 300 sec: 3623.9). Total num frames: 3964928. Throughput: 0: 900.6. Samples: 991278. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-03-11 11:01:12,425][00127] Avg episode reward: [(0, '21.869')] +[2023-03-11 11:01:14,136][10432] Updated weights for policy 0, policy_version 970 (0.0023) +[2023-03-11 11:01:17,421][00127] Fps is (10 sec: 4096.0, 60 sec: 3754.7, 300 sec: 3596.2). Total num frames: 3981312. Throughput: 0: 902.7. Samples: 994592. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-03-11 11:01:17,424][00127] Avg episode reward: [(0, '20.910')] +[2023-03-11 11:01:22,422][00127] Fps is (10 sec: 3276.8, 60 sec: 3686.6, 300 sec: 3624.0). Total num frames: 3997696. Throughput: 0: 904.2. Samples: 999200. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-03-11 11:01:22,424][00127] Avg episode reward: [(0, '20.119')] +[2023-03-11 11:01:24,633][10419] Stopping Batcher_0... +[2023-03-11 11:01:24,634][00127] Component Batcher_0 stopped! +[2023-03-11 11:01:24,635][10419] Loop batcher_evt_loop terminating... +[2023-03-11 11:01:24,640][10419] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000978_4005888.pth... +[2023-03-11 11:01:24,695][10432] Weights refcount: 2 0 +[2023-03-11 11:01:24,698][00127] Component RolloutWorker_w5 stopped! +[2023-03-11 11:01:24,701][10436] Stopping RolloutWorker_w5... +[2023-03-11 11:01:24,707][10436] Loop rollout_proc5_evt_loop terminating... +[2023-03-11 11:01:24,713][00127] Component InferenceWorker_p0-w0 stopped! +[2023-03-11 11:01:24,720][10435] Stopping RolloutWorker_w2... +[2023-03-11 11:01:24,721][10435] Loop rollout_proc2_evt_loop terminating... +[2023-03-11 11:01:24,720][00127] Component RolloutWorker_w2 stopped! +[2023-03-11 11:01:24,713][10432] Stopping InferenceWorker_p0-w0... +[2023-03-11 11:01:24,729][10432] Loop inference_proc0-0_evt_loop terminating... +[2023-03-11 11:01:24,730][10438] Stopping RolloutWorker_w6... +[2023-03-11 11:01:24,730][00127] Component RolloutWorker_w6 stopped! +[2023-03-11 11:01:24,733][00127] Component RolloutWorker_w3 stopped! +[2023-03-11 11:01:24,735][10439] Stopping RolloutWorker_w3... +[2023-03-11 11:01:24,736][10439] Loop rollout_proc3_evt_loop terminating... +[2023-03-11 11:01:24,741][00127] Component RolloutWorker_w1 stopped! +[2023-03-11 11:01:24,743][10433] Stopping RolloutWorker_w1... +[2023-03-11 11:01:24,744][10433] Loop rollout_proc1_evt_loop terminating... +[2023-03-11 11:01:24,746][00127] Component RolloutWorker_w7 stopped! +[2023-03-11 11:01:24,748][10440] Stopping RolloutWorker_w7... +[2023-03-11 11:01:24,750][10440] Loop rollout_proc7_evt_loop terminating... +[2023-03-11 11:01:24,731][10438] Loop rollout_proc6_evt_loop terminating... +[2023-03-11 11:01:24,769][00127] Component RolloutWorker_w0 stopped! +[2023-03-11 11:01:24,769][10434] Stopping RolloutWorker_w0... +[2023-03-11 11:01:24,777][10434] Loop rollout_proc0_evt_loop terminating... +[2023-03-11 11:01:24,778][10437] Stopping RolloutWorker_w4... +[2023-03-11 11:01:24,778][00127] Component RolloutWorker_w4 stopped! +[2023-03-11 11:01:24,779][10437] Loop rollout_proc4_evt_loop terminating... +[2023-03-11 11:01:24,800][10419] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000782_3203072.pth +[2023-03-11 11:01:24,810][10419] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000978_4005888.pth... +[2023-03-11 11:01:25,012][10419] Stopping LearnerWorker_p0... +[2023-03-11 11:01:25,012][00127] Component LearnerWorker_p0 stopped! +[2023-03-11 11:01:25,014][00127] Waiting for process learner_proc0 to stop... +[2023-03-11 11:01:25,014][10419] Loop learner_proc0_evt_loop terminating... +[2023-03-11 11:01:26,872][00127] Waiting for process inference_proc0-0 to join... +[2023-03-11 11:01:27,238][00127] Waiting for process rollout_proc0 to join... +[2023-03-11 11:01:27,637][00127] Waiting for process rollout_proc1 to join... +[2023-03-11 11:01:27,638][00127] Waiting for process rollout_proc2 to join... +[2023-03-11 11:01:27,640][00127] Waiting for process rollout_proc3 to join... +[2023-03-11 11:01:27,640][00127] Waiting for process rollout_proc4 to join... +[2023-03-11 11:01:27,641][00127] Waiting for process rollout_proc5 to join... +[2023-03-11 11:01:27,642][00127] Waiting for process rollout_proc6 to join... +[2023-03-11 11:01:27,653][00127] Waiting for process rollout_proc7 to join... +[2023-03-11 11:01:27,654][00127] Batcher 0 profile tree view: +batching: 27.2237, releasing_batches: 0.0245 +[2023-03-11 11:01:27,655][00127] InferenceWorker_p0-w0 profile tree view: +wait_policy: 0.0000 + wait_policy_total: 572.2740 +update_model: 8.4871 + weight_update: 0.0017 +one_step: 0.0063 + handle_policy_step: 527.0538 + deserialize: 15.0771, stack: 2.8985, obs_to_device_normalize: 114.7894, forward: 251.1924, send_messages: 28.3041 + prepare_outputs: 86.8889 + to_cpu: 53.9259 +[2023-03-11 11:01:27,656][00127] Learner 0 profile tree view: +misc: 0.0058, prepare_batch: 18.0810 +train: 75.3699 + epoch_init: 0.0234, minibatch_init: 0.0064, losses_postprocess: 0.5622, kl_divergence: 0.4779, after_optimizer: 32.1472 + calculate_losses: 27.0488 + losses_init: 0.0034, forward_head: 1.7383, bptt_initial: 17.6407, tail: 1.0932, advantages_returns: 0.2859, losses: 3.6945 + bptt: 2.2697 + bptt_forward_core: 2.1739 + update: 14.4576 + clip: 1.3730 +[2023-03-11 11:01:27,657][00127] RolloutWorker_w0 profile tree view: +wait_for_trajectories: 0.4518, enqueue_policy_requests: 151.8972, env_step: 879.0450, overhead: 20.3899, complete_rollouts: 6.3289 +save_policy_outputs: 20.2930 + split_output_tensors: 9.9240 +[2023-03-11 11:01:27,658][00127] RolloutWorker_w7 profile tree view: +wait_for_trajectories: 0.3305, enqueue_policy_requests: 155.0347, env_step: 873.7203, overhead: 20.0240, complete_rollouts: 7.5855 +save_policy_outputs: 20.0161 + split_output_tensors: 9.7461 +[2023-03-11 11:01:27,660][00127] Loop Runner_EvtLoop terminating... +[2023-03-11 11:01:27,661][00127] Runner profile tree view: +main_loop: 1180.8338 +[2023-03-11 11:01:27,662][00127] Collected {0: 4005888}, FPS: 3392.4 +[2023-03-11 11:01:27,871][00127] Loading existing experiment configuration from /content/train_dir/default_experiment/config.json +[2023-03-11 11:01:27,872][00127] Overriding arg 'num_workers' with value 1 passed from command line +[2023-03-11 11:01:27,875][00127] Adding new argument 'no_render'=True that is not in the saved config file! +[2023-03-11 11:01:27,876][00127] Adding new argument 'save_video'=True that is not in the saved config file! +[2023-03-11 11:01:27,877][00127] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file! +[2023-03-11 11:01:27,878][00127] Adding new argument 'video_name'=None that is not in the saved config file! +[2023-03-11 11:01:27,880][00127] Adding new argument 'max_num_frames'=1000000000.0 that is not in the saved config file! +[2023-03-11 11:01:27,881][00127] Adding new argument 'max_num_episodes'=10 that is not in the saved config file! +[2023-03-11 11:01:27,882][00127] Adding new argument 'push_to_hub'=False that is not in the saved config file! +[2023-03-11 11:01:27,883][00127] Adding new argument 'hf_repository'=None that is not in the saved config file! +[2023-03-11 11:01:27,884][00127] Adding new argument 'policy_index'=0 that is not in the saved config file! +[2023-03-11 11:01:27,885][00127] Adding new argument 'eval_deterministic'=False that is not in the saved config file! +[2023-03-11 11:01:27,886][00127] Adding new argument 'train_script'=None that is not in the saved config file! +[2023-03-11 11:01:27,887][00127] Adding new argument 'enjoy_script'=None that is not in the saved config file! +[2023-03-11 11:01:27,888][00127] Using frameskip 1 and render_action_repeat=4 for evaluation +[2023-03-11 11:01:27,920][00127] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-03-11 11:01:27,922][00127] RunningMeanStd input shape: (3, 72, 128) +[2023-03-11 11:01:27,925][00127] RunningMeanStd input shape: (1,) +[2023-03-11 11:01:27,939][00127] ConvEncoder: input_channels=3 +[2023-03-11 11:01:28,598][00127] Conv encoder output size: 512 +[2023-03-11 11:01:28,599][00127] Policy head output size: 512 +[2023-03-11 11:01:31,012][00127] Loading state from checkpoint /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000978_4005888.pth... +[2023-03-11 11:01:32,263][00127] Num frames 100... +[2023-03-11 11:01:32,374][00127] Num frames 200... +[2023-03-11 11:01:32,482][00127] Num frames 300... +[2023-03-11 11:01:32,603][00127] Num frames 400... +[2023-03-11 11:01:32,717][00127] Num frames 500... +[2023-03-11 11:01:32,859][00127] Num frames 600... +[2023-03-11 11:01:33,015][00127] Num frames 700... +[2023-03-11 11:01:33,173][00127] Num frames 800... +[2023-03-11 11:01:33,325][00127] Num frames 900... +[2023-03-11 11:01:33,483][00127] Num frames 1000... +[2023-03-11 11:01:33,676][00127] Avg episode rewards: #0: 27.790, true rewards: #0: 10.790 +[2023-03-11 11:01:33,678][00127] Avg episode reward: 27.790, avg true_objective: 10.790 +[2023-03-11 11:01:33,711][00127] Num frames 1100... +[2023-03-11 11:01:33,862][00127] Num frames 1200... +[2023-03-11 11:01:34,017][00127] Num frames 1300... +[2023-03-11 11:01:34,176][00127] Num frames 1400... +[2023-03-11 11:01:34,335][00127] Num frames 1500... +[2023-03-11 11:01:34,496][00127] Num frames 1600... +[2023-03-11 11:01:34,680][00127] Num frames 1700... +[2023-03-11 11:01:34,854][00127] Num frames 1800... +[2023-03-11 11:01:35,037][00127] Num frames 1900... +[2023-03-11 11:01:35,198][00127] Num frames 2000... +[2023-03-11 11:01:35,402][00127] Num frames 2100... +[2023-03-11 11:01:35,524][00127] Avg episode rewards: #0: 26.675, true rewards: #0: 10.675 +[2023-03-11 11:01:35,526][00127] Avg episode reward: 26.675, avg true_objective: 10.675 +[2023-03-11 11:01:35,642][00127] Num frames 2200... +[2023-03-11 11:01:35,813][00127] Num frames 2300... +[2023-03-11 11:01:35,996][00127] Num frames 2400... +[2023-03-11 11:01:36,195][00127] Num frames 2500... +[2023-03-11 11:01:36,385][00127] Num frames 2600... +[2023-03-11 11:01:36,588][00127] Num frames 2700... +[2023-03-11 11:01:36,777][00127] Num frames 2800... +[2023-03-11 11:01:36,977][00127] Num frames 2900... +[2023-03-11 11:01:37,187][00127] Num frames 3000... +[2023-03-11 11:01:37,391][00127] Num frames 3100... +[2023-03-11 11:01:37,547][00127] Num frames 3200... +[2023-03-11 11:01:37,707][00127] Num frames 3300... +[2023-03-11 11:01:37,870][00127] Num frames 3400... +[2023-03-11 11:01:38,035][00127] Num frames 3500... +[2023-03-11 11:01:38,161][00127] Avg episode rewards: #0: 28.477, true rewards: #0: 11.810 +[2023-03-11 11:01:38,164][00127] Avg episode reward: 28.477, avg true_objective: 11.810 +[2023-03-11 11:01:38,256][00127] Num frames 3600... +[2023-03-11 11:01:38,422][00127] Num frames 3700... +[2023-03-11 11:01:38,583][00127] Num frames 3800... +[2023-03-11 11:01:38,742][00127] Num frames 3900... +[2023-03-11 11:01:38,906][00127] Num frames 4000... +[2023-03-11 11:01:39,062][00127] Num frames 4100... +[2023-03-11 11:01:39,214][00127] Num frames 4200... +[2023-03-11 11:01:39,331][00127] Num frames 4300... +[2023-03-11 11:01:39,448][00127] Num frames 4400... +[2023-03-11 11:01:39,563][00127] Num frames 4500... +[2023-03-11 11:01:39,677][00127] Num frames 4600... +[2023-03-11 11:01:39,794][00127] Num frames 4700... +[2023-03-11 11:01:39,915][00127] Num frames 4800... +[2023-03-11 11:01:40,028][00127] Num frames 4900... +[2023-03-11 11:01:40,146][00127] Num frames 5000... +[2023-03-11 11:01:40,257][00127] Num frames 5100... +[2023-03-11 11:01:40,369][00127] Num frames 5200... +[2023-03-11 11:01:40,485][00127] Num frames 5300... +[2023-03-11 11:01:40,599][00127] Num frames 5400... +[2023-03-11 11:01:40,709][00127] Num frames 5500... +[2023-03-11 11:01:40,831][00127] Num frames 5600... +[2023-03-11 11:01:40,938][00127] Avg episode rewards: #0: 36.107, true rewards: #0: 14.107 +[2023-03-11 11:01:40,939][00127] Avg episode reward: 36.107, avg true_objective: 14.107 +[2023-03-11 11:01:41,011][00127] Num frames 5700... +[2023-03-11 11:01:41,117][00127] Num frames 5800... +[2023-03-11 11:01:41,235][00127] Num frames 5900... +[2023-03-11 11:01:41,345][00127] Num frames 6000... +[2023-03-11 11:01:41,457][00127] Num frames 6100... +[2023-03-11 11:01:41,570][00127] Num frames 6200... +[2023-03-11 11:01:41,681][00127] Num frames 6300... +[2023-03-11 11:01:41,788][00127] Num frames 6400... +[2023-03-11 11:01:41,902][00127] Num frames 6500... +[2023-03-11 11:01:41,969][00127] Avg episode rewards: #0: 32.620, true rewards: #0: 13.020 +[2023-03-11 11:01:41,971][00127] Avg episode reward: 32.620, avg true_objective: 13.020 +[2023-03-11 11:01:42,073][00127] Num frames 6600... +[2023-03-11 11:01:42,181][00127] Num frames 6700... +[2023-03-11 11:01:42,292][00127] Num frames 6800... +[2023-03-11 11:01:42,403][00127] Num frames 6900... +[2023-03-11 11:01:42,521][00127] Num frames 7000... +[2023-03-11 11:01:42,631][00127] Num frames 7100... +[2023-03-11 11:01:42,741][00127] Num frames 7200... +[2023-03-11 11:01:42,849][00127] Num frames 7300... +[2023-03-11 11:01:42,966][00127] Num frames 7400... +[2023-03-11 11:01:43,063][00127] Avg episode rewards: #0: 30.063, true rewards: #0: 12.397 +[2023-03-11 11:01:43,064][00127] Avg episode reward: 30.063, avg true_objective: 12.397 +[2023-03-11 11:01:43,134][00127] Num frames 7500... +[2023-03-11 11:01:43,242][00127] Num frames 7600... +[2023-03-11 11:01:43,353][00127] Num frames 7700... +[2023-03-11 11:01:43,471][00127] Num frames 7800... +[2023-03-11 11:01:43,625][00127] Avg episode rewards: #0: 26.703, true rewards: #0: 11.274 +[2023-03-11 11:01:43,627][00127] Avg episode reward: 26.703, avg true_objective: 11.274 +[2023-03-11 11:01:43,639][00127] Num frames 7900... +[2023-03-11 11:01:43,749][00127] Num frames 8000... +[2023-03-11 11:01:43,861][00127] Num frames 8100... +[2023-03-11 11:01:43,981][00127] Num frames 8200... +[2023-03-11 11:01:44,108][00127] Num frames 8300... +[2023-03-11 11:01:44,218][00127] Num frames 8400... +[2023-03-11 11:01:44,328][00127] Num frames 8500... +[2023-03-11 11:01:44,430][00127] Avg episode rewards: #0: 24.679, true rewards: #0: 10.679 +[2023-03-11 11:01:44,432][00127] Avg episode reward: 24.679, avg true_objective: 10.679 +[2023-03-11 11:01:44,496][00127] Num frames 8600... +[2023-03-11 11:01:44,606][00127] Num frames 8700... +[2023-03-11 11:01:44,716][00127] Num frames 8800... +[2023-03-11 11:01:44,826][00127] Num frames 8900... +[2023-03-11 11:01:44,946][00127] Num frames 9000... +[2023-03-11 11:01:45,055][00127] Num frames 9100... +[2023-03-11 11:01:45,169][00127] Num frames 9200... +[2023-03-11 11:01:45,281][00127] Num frames 9300... +[2023-03-11 11:01:45,391][00127] Num frames 9400... +[2023-03-11 11:01:45,505][00127] Num frames 9500... +[2023-03-11 11:01:45,629][00127] Num frames 9600... +[2023-03-11 11:01:45,726][00127] Avg episode rewards: #0: 24.923, true rewards: #0: 10.701 +[2023-03-11 11:01:45,728][00127] Avg episode reward: 24.923, avg true_objective: 10.701 +[2023-03-11 11:01:45,812][00127] Num frames 9700... +[2023-03-11 11:01:45,925][00127] Num frames 9800... +[2023-03-11 11:01:46,044][00127] Num frames 9900... +[2023-03-11 11:01:46,168][00127] Num frames 10000... +[2023-03-11 11:01:46,283][00127] Num frames 10100... +[2023-03-11 11:01:46,413][00127] Num frames 10200... +[2023-03-11 11:01:46,528][00127] Num frames 10300... +[2023-03-11 11:01:46,639][00127] Num frames 10400... +[2023-03-11 11:01:46,751][00127] Num frames 10500... +[2023-03-11 11:01:46,870][00127] Num frames 10600... +[2023-03-11 11:01:46,989][00127] Num frames 10700... +[2023-03-11 11:01:47,142][00127] Avg episode rewards: #0: 25.583, true rewards: #0: 10.783 +[2023-03-11 11:01:47,144][00127] Avg episode reward: 25.583, avg true_objective: 10.783 +[2023-03-11 11:02:52,585][00127] Replay video saved to /content/train_dir/default_experiment/replay.mp4! +[2023-03-11 11:42:37,813][00127] Loading existing experiment configuration from /content/train_dir/default_experiment/config.json +[2023-03-11 11:42:37,814][00127] Overriding arg 'num_workers' with value 1 passed from command line +[2023-03-11 11:42:37,817][00127] Adding new argument 'no_render'=True that is not in the saved config file! +[2023-03-11 11:42:37,819][00127] Adding new argument 'save_video'=True that is not in the saved config file! +[2023-03-11 11:42:37,822][00127] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file! +[2023-03-11 11:42:37,825][00127] Adding new argument 'video_name'=None that is not in the saved config file! +[2023-03-11 11:42:37,826][00127] Adding new argument 'max_num_frames'=1000000000.0 that is not in the saved config file! +[2023-03-11 11:42:37,828][00127] Adding new argument 'max_num_episodes'=10 that is not in the saved config file! +[2023-03-11 11:42:37,829][00127] Adding new argument 'push_to_hub'=False that is not in the saved config file! +[2023-03-11 11:42:37,830][00127] Adding new argument 'hf_repository'=None that is not in the saved config file! +[2023-03-11 11:42:37,831][00127] Adding new argument 'policy_index'=0 that is not in the saved config file! +[2023-03-11 11:42:37,832][00127] Adding new argument 'eval_deterministic'=False that is not in the saved config file! +[2023-03-11 11:42:37,834][00127] Adding new argument 'train_script'=None that is not in the saved config file! +[2023-03-11 11:42:37,836][00127] Adding new argument 'enjoy_script'=None that is not in the saved config file! +[2023-03-11 11:42:37,837][00127] Using frameskip 1 and render_action_repeat=4 for evaluation +[2023-03-11 11:42:37,859][00127] RunningMeanStd input shape: (3, 72, 128) +[2023-03-11 11:42:37,861][00127] RunningMeanStd input shape: (1,) +[2023-03-11 11:42:37,874][00127] ConvEncoder: input_channels=3 +[2023-03-11 11:42:37,911][00127] Conv encoder output size: 512 +[2023-03-11 11:42:37,912][00127] Policy head output size: 512 +[2023-03-11 11:42:37,947][00127] Loading state from checkpoint /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000978_4005888.pth... +[2023-03-11 11:42:38,564][00127] Num frames 100... +[2023-03-11 11:42:38,719][00127] Num frames 200... +[2023-03-11 11:42:38,873][00127] Num frames 300... +[2023-03-11 11:42:39,028][00127] Num frames 400... +[2023-03-11 11:42:39,181][00127] Num frames 500... +[2023-03-11 11:42:39,337][00127] Num frames 600... +[2023-03-11 11:42:39,496][00127] Num frames 700... +[2023-03-11 11:42:39,653][00127] Num frames 800... +[2023-03-11 11:42:39,803][00127] Num frames 900... +[2023-03-11 11:42:39,956][00127] Num frames 1000... +[2023-03-11 11:42:40,105][00127] Num frames 1100... +[2023-03-11 11:42:40,237][00127] Avg episode rewards: #0: 23.520, true rewards: #0: 11.520 +[2023-03-11 11:42:40,239][00127] Avg episode reward: 23.520, avg true_objective: 11.520 +[2023-03-11 11:42:40,311][00127] Num frames 1200... +[2023-03-11 11:42:40,475][00127] Num frames 1300... +[2023-03-11 11:42:40,635][00127] Num frames 1400... +[2023-03-11 11:42:40,789][00127] Num frames 1500... +[2023-03-11 11:42:40,944][00127] Num frames 1600... +[2023-03-11 11:42:41,104][00127] Num frames 1700... +[2023-03-11 11:42:41,259][00127] Num frames 1800... +[2023-03-11 11:42:41,414][00127] Num frames 1900... +[2023-03-11 11:42:41,571][00127] Num frames 2000... +[2023-03-11 11:42:41,711][00127] Num frames 2100... +[2023-03-11 11:42:41,823][00127] Num frames 2200... +[2023-03-11 11:42:41,944][00127] Num frames 2300... +[2023-03-11 11:42:42,105][00127] Num frames 2400... +[2023-03-11 11:42:42,247][00127] Num frames 2500... +[2023-03-11 11:42:42,357][00127] Num frames 2600... +[2023-03-11 11:42:42,480][00127] Num frames 2700... +[2023-03-11 11:42:42,591][00127] Num frames 2800... +[2023-03-11 11:42:42,702][00127] Num frames 2900... +[2023-03-11 11:42:42,812][00127] Num frames 3000... +[2023-03-11 11:42:42,931][00127] Num frames 3100... +[2023-03-11 11:42:43,049][00127] Num frames 3200... +[2023-03-11 11:42:43,161][00127] Avg episode rewards: #0: 38.260, true rewards: #0: 16.260 +[2023-03-11 11:42:43,164][00127] Avg episode reward: 38.260, avg true_objective: 16.260 +[2023-03-11 11:42:43,221][00127] Num frames 3300... +[2023-03-11 11:42:43,335][00127] Num frames 3400... +[2023-03-11 11:42:43,447][00127] Num frames 3500... +[2023-03-11 11:42:43,563][00127] Num frames 3600... +[2023-03-11 11:42:43,675][00127] Num frames 3700... +[2023-03-11 11:42:43,786][00127] Num frames 3800... +[2023-03-11 11:42:43,896][00127] Num frames 3900... +[2023-03-11 11:42:44,016][00127] Num frames 4000... +[2023-03-11 11:42:44,134][00127] Num frames 4100... +[2023-03-11 11:42:44,242][00127] Num frames 4200... +[2023-03-11 11:42:44,387][00127] Avg episode rewards: #0: 32.253, true rewards: #0: 14.253 +[2023-03-11 11:42:44,388][00127] Avg episode reward: 32.253, avg true_objective: 14.253 +[2023-03-11 11:42:44,419][00127] Num frames 4300... +[2023-03-11 11:42:44,533][00127] Num frames 4400... +[2023-03-11 11:42:44,655][00127] Num frames 4500... +[2023-03-11 11:42:44,768][00127] Num frames 4600... +[2023-03-11 11:42:44,875][00127] Num frames 4700... +[2023-03-11 11:42:44,988][00127] Num frames 4800... +[2023-03-11 11:42:45,101][00127] Num frames 4900... +[2023-03-11 11:42:45,214][00127] Num frames 5000... +[2023-03-11 11:42:45,327][00127] Num frames 5100... +[2023-03-11 11:42:45,439][00127] Num frames 5200... +[2023-03-11 11:42:45,553][00127] Num frames 5300... +[2023-03-11 11:42:45,821][00127] Avg episode rewards: #0: 30.240, true rewards: #0: 13.490 +[2023-03-11 11:42:45,823][00127] Avg episode reward: 30.240, avg true_objective: 13.490 +[2023-03-11 11:42:45,835][00127] Num frames 5400... +[2023-03-11 11:42:46,072][00127] Num frames 5500... +[2023-03-11 11:42:46,314][00127] Num frames 5600... +[2023-03-11 11:42:46,468][00127] Num frames 5700... +[2023-03-11 11:42:46,670][00127] Num frames 5800... +[2023-03-11 11:42:46,849][00127] Num frames 5900... +[2023-03-11 11:42:47,093][00127] Num frames 6000... +[2023-03-11 11:42:47,263][00127] Num frames 6100... +[2023-03-11 11:42:47,474][00127] Num frames 6200... +[2023-03-11 11:42:47,589][00127] Avg episode rewards: #0: 27.656, true rewards: #0: 12.456 +[2023-03-11 11:42:47,591][00127] Avg episode reward: 27.656, avg true_objective: 12.456 +[2023-03-11 11:42:47,748][00127] Num frames 6300... +[2023-03-11 11:42:47,911][00127] Num frames 6400... +[2023-03-11 11:42:48,097][00127] Num frames 6500... +[2023-03-11 11:42:48,279][00127] Num frames 6600... +[2023-03-11 11:42:48,507][00127] Num frames 6700... +[2023-03-11 11:42:48,705][00127] Avg episode rewards: #0: 24.287, true rewards: #0: 11.287 +[2023-03-11 11:42:48,711][00127] Avg episode reward: 24.287, avg true_objective: 11.287 +[2023-03-11 11:42:48,766][00127] Num frames 6800... +[2023-03-11 11:42:49,046][00127] Num frames 6900... +[2023-03-11 11:42:49,250][00127] Num frames 7000... +[2023-03-11 11:42:49,421][00127] Num frames 7100... +[2023-03-11 11:42:49,611][00127] Num frames 7200... +[2023-03-11 11:42:49,877][00127] Avg episode rewards: #0: 22.217, true rewards: #0: 10.360 +[2023-03-11 11:42:49,883][00127] Avg episode reward: 22.217, avg true_objective: 10.360 +[2023-03-11 11:42:49,974][00127] Num frames 7300... +[2023-03-11 11:42:50,179][00127] Num frames 7400... +[2023-03-11 11:42:50,424][00127] Num frames 7500... +[2023-03-11 11:42:50,632][00127] Num frames 7600... +[2023-03-11 11:42:50,853][00127] Num frames 7700... +[2023-03-11 11:42:51,033][00127] Avg episode rewards: #0: 20.705, true rewards: #0: 9.705 +[2023-03-11 11:42:51,035][00127] Avg episode reward: 20.705, avg true_objective: 9.705 +[2023-03-11 11:42:51,132][00127] Num frames 7800... +[2023-03-11 11:42:51,422][00127] Num frames 7900... +[2023-03-11 11:42:51,666][00127] Num frames 8000... +[2023-03-11 11:42:52,035][00127] Num frames 8100... +[2023-03-11 11:42:52,192][00127] Num frames 8200... +[2023-03-11 11:42:52,287][00127] Avg episode rewards: #0: 19.581, true rewards: #0: 9.137 +[2023-03-11 11:42:52,289][00127] Avg episode reward: 19.581, avg true_objective: 9.137 +[2023-03-11 11:42:52,411][00127] Num frames 8300... +[2023-03-11 11:42:52,563][00127] Num frames 8400... +[2023-03-11 11:42:52,720][00127] Num frames 8500... +[2023-03-11 11:42:52,879][00127] Num frames 8600... +[2023-03-11 11:42:53,034][00127] Num frames 8700... +[2023-03-11 11:42:53,187][00127] Num frames 8800... +[2023-03-11 11:42:53,342][00127] Num frames 8900... +[2023-03-11 11:42:53,490][00127] Num frames 9000... +[2023-03-11 11:42:53,646][00127] Num frames 9100... +[2023-03-11 11:42:53,808][00127] Num frames 9200... +[2023-03-11 11:42:53,971][00127] Num frames 9300... +[2023-03-11 11:42:54,133][00127] Num frames 9400... +[2023-03-11 11:42:54,297][00127] Num frames 9500... +[2023-03-11 11:42:54,465][00127] Num frames 9600... +[2023-03-11 11:42:54,626][00127] Num frames 9700... +[2023-03-11 11:42:54,794][00127] Num frames 9800... +[2023-03-11 11:42:54,955][00127] Num frames 9900... +[2023-03-11 11:42:55,092][00127] Avg episode rewards: #0: 21.851, true rewards: #0: 9.951 +[2023-03-11 11:42:55,095][00127] Avg episode reward: 21.851, avg true_objective: 9.951 +[2023-03-11 11:43:56,598][00127] Replay video saved to /content/train_dir/default_experiment/replay.mp4! +[2023-03-11 11:45:42,249][00127] Loading existing experiment configuration from /content/train_dir/default_experiment/config.json +[2023-03-11 11:45:42,251][00127] Overriding arg 'num_workers' with value 1 passed from command line +[2023-03-11 11:45:42,254][00127] Adding new argument 'no_render'=True that is not in the saved config file! +[2023-03-11 11:45:42,256][00127] Adding new argument 'save_video'=True that is not in the saved config file! +[2023-03-11 11:45:42,259][00127] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file! +[2023-03-11 11:45:42,261][00127] Adding new argument 'video_name'=None that is not in the saved config file! +[2023-03-11 11:45:42,262][00127] Adding new argument 'max_num_frames'=100000 that is not in the saved config file! +[2023-03-11 11:45:42,264][00127] Adding new argument 'max_num_episodes'=10 that is not in the saved config file! +[2023-03-11 11:45:42,265][00127] Adding new argument 'push_to_hub'=True that is not in the saved config file! +[2023-03-11 11:45:42,267][00127] Adding new argument 'hf_repository'='Taratata/rl_course_vizdoom_health_gathering_supreme' that is not in the saved config file! +[2023-03-11 11:45:42,268][00127] Adding new argument 'policy_index'=0 that is not in the saved config file! +[2023-03-11 11:45:42,270][00127] Adding new argument 'eval_deterministic'=False that is not in the saved config file! +[2023-03-11 11:45:42,271][00127] Adding new argument 'train_script'=None that is not in the saved config file! +[2023-03-11 11:45:42,272][00127] Adding new argument 'enjoy_script'=None that is not in the saved config file! +[2023-03-11 11:45:42,274][00127] Using frameskip 1 and render_action_repeat=4 for evaluation +[2023-03-11 11:45:42,297][00127] RunningMeanStd input shape: (3, 72, 128) +[2023-03-11 11:45:42,299][00127] RunningMeanStd input shape: (1,) +[2023-03-11 11:45:42,313][00127] ConvEncoder: input_channels=3 +[2023-03-11 11:45:42,350][00127] Conv encoder output size: 512 +[2023-03-11 11:45:42,351][00127] Policy head output size: 512 +[2023-03-11 11:45:42,370][00127] Loading state from checkpoint /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000978_4005888.pth... +[2023-03-11 11:45:42,809][00127] Num frames 100... +[2023-03-11 11:45:42,920][00127] Num frames 200... +[2023-03-11 11:45:43,036][00127] Num frames 300... +[2023-03-11 11:45:43,149][00127] Num frames 400... +[2023-03-11 11:45:43,267][00127] Num frames 500... +[2023-03-11 11:45:43,382][00127] Num frames 600... +[2023-03-11 11:45:43,497][00127] Num frames 700... +[2023-03-11 11:45:43,606][00127] Num frames 800... +[2023-03-11 11:45:43,715][00127] Num frames 900... +[2023-03-11 11:45:43,827][00127] Num frames 1000... +[2023-03-11 11:45:43,936][00127] Num frames 1100... +[2023-03-11 11:45:44,044][00127] Num frames 1200... +[2023-03-11 11:45:44,118][00127] Avg episode rewards: #0: 25.160, true rewards: #0: 12.160 +[2023-03-11 11:45:44,119][00127] Avg episode reward: 25.160, avg true_objective: 12.160 +[2023-03-11 11:45:44,228][00127] Num frames 1300... +[2023-03-11 11:45:44,341][00127] Num frames 1400... +[2023-03-11 11:45:44,450][00127] Num frames 1500... +[2023-03-11 11:45:44,573][00127] Num frames 1600... +[2023-03-11 11:45:44,690][00127] Num frames 1700... +[2023-03-11 11:45:44,800][00127] Num frames 1800... +[2023-03-11 11:45:44,916][00127] Num frames 1900... +[2023-03-11 11:45:45,033][00127] Num frames 2000... +[2023-03-11 11:45:45,157][00127] Num frames 2100... +[2023-03-11 11:45:45,276][00127] Num frames 2200... +[2023-03-11 11:45:45,412][00127] Avg episode rewards: #0: 24.860, true rewards: #0: 11.360 +[2023-03-11 11:45:45,415][00127] Avg episode reward: 24.860, avg true_objective: 11.360 +[2023-03-11 11:45:45,450][00127] Num frames 2300... +[2023-03-11 11:45:45,571][00127] Num frames 2400... +[2023-03-11 11:45:45,732][00127] Num frames 2500... +[2023-03-11 11:45:45,890][00127] Num frames 2600... +[2023-03-11 11:45:46,045][00127] Num frames 2700... +[2023-03-11 11:45:46,198][00127] Num frames 2800... +[2023-03-11 11:45:46,360][00127] Num frames 2900... +[2023-03-11 11:45:46,467][00127] Avg episode rewards: #0: 21.437, true rewards: #0: 9.770 +[2023-03-11 11:45:46,469][00127] Avg episode reward: 21.437, avg true_objective: 9.770 +[2023-03-11 11:45:46,578][00127] Num frames 3000... +[2023-03-11 11:45:46,740][00127] Num frames 3100... +[2023-03-11 11:45:46,894][00127] Num frames 3200... +[2023-03-11 11:45:47,067][00127] Num frames 3300... +[2023-03-11 11:45:47,223][00127] Num frames 3400... +[2023-03-11 11:45:47,350][00127] Avg episode rewards: #0: 18.358, true rewards: #0: 8.607 +[2023-03-11 11:45:47,352][00127] Avg episode reward: 18.358, avg true_objective: 8.607 +[2023-03-11 11:45:47,445][00127] Num frames 3500... +[2023-03-11 11:45:47,613][00127] Num frames 3600... +[2023-03-11 11:45:47,725][00127] Avg episode rewards: #0: 15.270, true rewards: #0: 7.270 +[2023-03-11 11:45:47,727][00127] Avg episode reward: 15.270, avg true_objective: 7.270 +[2023-03-11 11:45:47,836][00127] Num frames 3700... +[2023-03-11 11:45:47,999][00127] Num frames 3800... +[2023-03-11 11:45:48,158][00127] Num frames 3900... +[2023-03-11 11:45:48,322][00127] Num frames 4000... +[2023-03-11 11:45:48,493][00127] Num frames 4100... +[2023-03-11 11:45:48,660][00127] Num frames 4200... +[2023-03-11 11:45:48,828][00127] Num frames 4300... +[2023-03-11 11:45:48,990][00127] Num frames 4400... +[2023-03-11 11:45:49,132][00127] Num frames 4500... +[2023-03-11 11:45:49,250][00127] Num frames 4600... +[2023-03-11 11:45:49,368][00127] Num frames 4700... +[2023-03-11 11:45:49,486][00127] Num frames 4800... +[2023-03-11 11:45:49,599][00127] Num frames 4900... +[2023-03-11 11:45:49,719][00127] Num frames 5000... +[2023-03-11 11:45:49,837][00127] Num frames 5100... +[2023-03-11 11:45:49,939][00127] Avg episode rewards: #0: 18.732, true rewards: #0: 8.565 +[2023-03-11 11:45:49,940][00127] Avg episode reward: 18.732, avg true_objective: 8.565 +[2023-03-11 11:45:50,011][00127] Num frames 5200... +[2023-03-11 11:45:50,119][00127] Num frames 5300... +[2023-03-11 11:45:50,231][00127] Num frames 5400... +[2023-03-11 11:45:50,339][00127] Num frames 5500... +[2023-03-11 11:45:50,459][00127] Num frames 5600... +[2023-03-11 11:45:50,571][00127] Num frames 5700... +[2023-03-11 11:45:50,694][00127] Num frames 5800... +[2023-03-11 11:45:50,802][00127] Num frames 5900... +[2023-03-11 11:45:50,912][00127] Num frames 6000... +[2023-03-11 11:45:51,024][00127] Avg episode rewards: #0: 19.073, true rewards: #0: 8.644 +[2023-03-11 11:45:51,026][00127] Avg episode reward: 19.073, avg true_objective: 8.644 +[2023-03-11 11:45:51,084][00127] Num frames 6100... +[2023-03-11 11:45:51,192][00127] Num frames 6200... +[2023-03-11 11:45:51,308][00127] Num frames 6300... +[2023-03-11 11:45:51,423][00127] Num frames 6400... +[2023-03-11 11:45:51,537][00127] Num frames 6500... +[2023-03-11 11:45:51,649][00127] Num frames 6600... +[2023-03-11 11:45:51,767][00127] Num frames 6700... +[2023-03-11 11:45:51,880][00127] Num frames 6800... +[2023-03-11 11:45:52,001][00127] Num frames 6900... +[2023-03-11 11:45:52,113][00127] Num frames 7000... +[2023-03-11 11:45:52,184][00127] Avg episode rewards: #0: 19.015, true rewards: #0: 8.765 +[2023-03-11 11:45:52,185][00127] Avg episode reward: 19.015, avg true_objective: 8.765 +[2023-03-11 11:45:52,301][00127] Num frames 7100... +[2023-03-11 11:45:52,416][00127] Num frames 7200... +[2023-03-11 11:45:52,529][00127] Num frames 7300... +[2023-03-11 11:45:52,645][00127] Num frames 7400... +[2023-03-11 11:45:52,769][00127] Num frames 7500... +[2023-03-11 11:45:52,880][00127] Num frames 7600... +[2023-03-11 11:45:52,996][00127] Avg episode rewards: #0: 18.169, true rewards: #0: 8.502 +[2023-03-11 11:45:52,998][00127] Avg episode reward: 18.169, avg true_objective: 8.502 +[2023-03-11 11:45:53,055][00127] Num frames 7700... +[2023-03-11 11:45:53,175][00127] Num frames 7800... +[2023-03-11 11:45:53,295][00127] Num frames 7900... +[2023-03-11 11:45:53,406][00127] Num frames 8000... +[2023-03-11 11:45:53,522][00127] Num frames 8100... +[2023-03-11 11:45:53,574][00127] Avg episode rewards: #0: 17.000, true rewards: #0: 8.100 +[2023-03-11 11:45:53,575][00127] Avg episode reward: 17.000, avg true_objective: 8.100 +[2023-03-11 11:46:41,540][00127] Replay video saved to /content/train_dir/default_experiment/replay.mp4! +[2023-03-11 11:46:53,587][00127] The model has been pushed to https://huggingface.co/Taratata/rl_course_vizdoom_health_gathering_supreme +[2023-03-11 11:48:24,968][00127] Loading existing experiment configuration from /content/train_dir/default_experiment/config.json +[2023-03-11 11:48:24,970][00127] Overriding arg 'num_workers' with value 1 passed from command line +[2023-03-11 11:48:24,972][00127] Adding new argument 'no_render'=True that is not in the saved config file! +[2023-03-11 11:48:24,974][00127] Adding new argument 'save_video'=True that is not in the saved config file! +[2023-03-11 11:48:24,976][00127] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file! +[2023-03-11 11:48:24,977][00127] Adding new argument 'video_name'=None that is not in the saved config file! +[2023-03-11 11:48:24,979][00127] Adding new argument 'max_num_frames'=1000000000.0 that is not in the saved config file! +[2023-03-11 11:48:24,981][00127] Adding new argument 'max_num_episodes'=10 that is not in the saved config file! +[2023-03-11 11:48:24,982][00127] Adding new argument 'push_to_hub'=False that is not in the saved config file! +[2023-03-11 11:48:24,983][00127] Adding new argument 'hf_repository'=None that is not in the saved config file! +[2023-03-11 11:48:24,984][00127] Adding new argument 'policy_index'=0 that is not in the saved config file! +[2023-03-11 11:48:24,985][00127] Adding new argument 'eval_deterministic'=False that is not in the saved config file! +[2023-03-11 11:48:24,986][00127] Adding new argument 'train_script'=None that is not in the saved config file! +[2023-03-11 11:48:24,987][00127] Adding new argument 'enjoy_script'=None that is not in the saved config file! +[2023-03-11 11:48:24,988][00127] Using frameskip 1 and render_action_repeat=4 for evaluation +[2023-03-11 11:48:25,005][00127] RunningMeanStd input shape: (3, 72, 128) +[2023-03-11 11:48:25,008][00127] RunningMeanStd input shape: (1,) +[2023-03-11 11:48:25,022][00127] ConvEncoder: input_channels=3 +[2023-03-11 11:48:25,062][00127] Conv encoder output size: 512 +[2023-03-11 11:48:25,063][00127] Policy head output size: 512 +[2023-03-11 11:48:25,082][00127] Loading state from checkpoint /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000978_4005888.pth... +[2023-03-11 11:48:25,531][00127] Num frames 100... +[2023-03-11 11:48:25,643][00127] Num frames 200... +[2023-03-11 11:48:25,755][00127] Num frames 300... +[2023-03-11 11:48:25,864][00127] Num frames 400... +[2023-03-11 11:48:25,984][00127] Num frames 500... +[2023-03-11 11:48:26,098][00127] Num frames 600... +[2023-03-11 11:48:26,204][00127] Num frames 700... +[2023-03-11 11:48:26,316][00127] Num frames 800... +[2023-03-11 11:48:26,435][00127] Num frames 900... +[2023-03-11 11:48:26,552][00127] Num frames 1000... +[2023-03-11 11:48:26,668][00127] Avg episode rewards: #0: 22.550, true rewards: #0: 10.550 +[2023-03-11 11:48:26,671][00127] Avg episode reward: 22.550, avg true_objective: 10.550 +[2023-03-11 11:48:26,724][00127] Num frames 1100... +[2023-03-11 11:48:26,841][00127] Num frames 1200... +[2023-03-11 11:48:26,952][00127] Num frames 1300... +[2023-03-11 11:48:27,062][00127] Num frames 1400... +[2023-03-11 11:48:27,171][00127] Num frames 1500... +[2023-03-11 11:48:27,288][00127] Num frames 1600... +[2023-03-11 11:48:27,410][00127] Num frames 1700... +[2023-03-11 11:48:27,525][00127] Num frames 1800... +[2023-03-11 11:48:27,645][00127] Num frames 1900... +[2023-03-11 11:48:27,754][00127] Num frames 2000... +[2023-03-11 11:48:27,864][00127] Num frames 2100... +[2023-03-11 11:48:27,986][00127] Num frames 2200... +[2023-03-11 11:48:28,109][00127] Num frames 2300... +[2023-03-11 11:48:28,220][00127] Avg episode rewards: #0: 27.245, true rewards: #0: 11.745 +[2023-03-11 11:48:28,222][00127] Avg episode reward: 27.245, avg true_objective: 11.745 +[2023-03-11 11:48:28,284][00127] Num frames 2400... +[2023-03-11 11:48:28,395][00127] Num frames 2500... +[2023-03-11 11:48:28,507][00127] Num frames 2600... +[2023-03-11 11:48:28,624][00127] Num frames 2700... +[2023-03-11 11:48:28,747][00127] Num frames 2800... +[2023-03-11 11:48:28,858][00127] Num frames 2900... +[2023-03-11 11:48:28,968][00127] Num frames 3000... +[2023-03-11 11:48:29,074][00127] Num frames 3100... +[2023-03-11 11:48:29,186][00127] Num frames 3200... +[2023-03-11 11:48:29,301][00127] Num frames 3300... +[2023-03-11 11:48:29,425][00127] Num frames 3400... +[2023-03-11 11:48:29,535][00127] Num frames 3500... +[2023-03-11 11:48:29,652][00127] Num frames 3600... +[2023-03-11 11:48:29,764][00127] Num frames 3700... +[2023-03-11 11:48:29,879][00127] Num frames 3800... +[2023-03-11 11:48:29,989][00127] Num frames 3900... +[2023-03-11 11:48:30,118][00127] Num frames 4000... +[2023-03-11 11:48:30,231][00127] Num frames 4100... +[2023-03-11 11:48:30,340][00127] Num frames 4200... +[2023-03-11 11:48:30,472][00127] Avg episode rewards: #0: 35.563, true rewards: #0: 14.230 +[2023-03-11 11:48:30,474][00127] Avg episode reward: 35.563, avg true_objective: 14.230 +[2023-03-11 11:48:30,517][00127] Num frames 4300... +[2023-03-11 11:48:30,634][00127] Num frames 4400... +[2023-03-11 11:48:30,753][00127] Num frames 4500... +[2023-03-11 11:48:30,864][00127] Num frames 4600... +[2023-03-11 11:48:30,978][00127] Num frames 4700... +[2023-03-11 11:48:31,088][00127] Num frames 4800... +[2023-03-11 11:48:31,199][00127] Num frames 4900... +[2023-03-11 11:48:31,310][00127] Num frames 5000... +[2023-03-11 11:48:31,408][00127] Avg episode rewards: #0: 30.592, true rewards: #0: 12.592 +[2023-03-11 11:48:31,410][00127] Avg episode reward: 30.592, avg true_objective: 12.592 +[2023-03-11 11:48:31,481][00127] Num frames 5100... +[2023-03-11 11:48:31,590][00127] Num frames 5200... +[2023-03-11 11:48:31,710][00127] Num frames 5300... +[2023-03-11 11:48:31,822][00127] Num frames 5400... +[2023-03-11 11:48:31,931][00127] Num frames 5500... +[2023-03-11 11:48:32,062][00127] Num frames 5600... +[2023-03-11 11:48:32,169][00127] Num frames 5700... +[2023-03-11 11:48:32,284][00127] Num frames 5800... +[2023-03-11 11:48:32,398][00127] Num frames 5900... +[2023-03-11 11:48:32,513][00127] Num frames 6000... +[2023-03-11 11:48:32,621][00127] Num frames 6100... +[2023-03-11 11:48:32,743][00127] Num frames 6200... +[2023-03-11 11:48:32,862][00127] Num frames 6300... +[2023-03-11 11:48:32,959][00127] Avg episode rewards: #0: 30.476, true rewards: #0: 12.676 +[2023-03-11 11:48:32,960][00127] Avg episode reward: 30.476, avg true_objective: 12.676 +[2023-03-11 11:48:33,029][00127] Num frames 6400... +[2023-03-11 11:48:33,138][00127] Num frames 6500... +[2023-03-11 11:48:33,295][00127] Num frames 6600... +[2023-03-11 11:48:33,485][00127] Avg episode rewards: #0: 26.150, true rewards: #0: 11.150 +[2023-03-11 11:48:33,487][00127] Avg episode reward: 26.150, avg true_objective: 11.150 +[2023-03-11 11:48:33,516][00127] Num frames 6700... +[2023-03-11 11:48:33,686][00127] Num frames 6800... +[2023-03-11 11:48:33,842][00127] Num frames 6900... +[2023-03-11 11:48:33,997][00127] Num frames 7000... +[2023-03-11 11:48:34,151][00127] Num frames 7100... +[2023-03-11 11:48:34,303][00127] Num frames 7200... +[2023-03-11 11:48:34,460][00127] Num frames 7300... +[2023-03-11 11:48:34,612][00127] Num frames 7400... +[2023-03-11 11:48:34,786][00127] Num frames 7500... +[2023-03-11 11:48:34,936][00127] Num frames 7600... +[2023-03-11 11:48:35,092][00127] Num frames 7700... +[2023-03-11 11:48:35,266][00127] Avg episode rewards: #0: 26.254, true rewards: #0: 11.111 +[2023-03-11 11:48:35,268][00127] Avg episode reward: 26.254, avg true_objective: 11.111 +[2023-03-11 11:48:35,311][00127] Num frames 7800... +[2023-03-11 11:48:35,470][00127] Num frames 7900... +[2023-03-11 11:48:35,629][00127] Num frames 8000... +[2023-03-11 11:48:35,788][00127] Num frames 8100... +[2023-03-11 11:48:35,899][00127] Avg episode rewards: #0: 23.542, true rewards: #0: 10.167 +[2023-03-11 11:48:35,901][00127] Avg episode reward: 23.542, avg true_objective: 10.167 +[2023-03-11 11:48:36,004][00127] Num frames 8200... +[2023-03-11 11:48:36,157][00127] Num frames 8300... +[2023-03-11 11:48:36,312][00127] Num frames 8400... +[2023-03-11 11:48:36,466][00127] Num frames 8500... +[2023-03-11 11:48:36,632][00127] Num frames 8600... +[2023-03-11 11:48:36,761][00127] Num frames 8700... +[2023-03-11 11:48:36,879][00127] Num frames 8800... +[2023-03-11 11:48:36,997][00127] Num frames 8900... +[2023-03-11 11:48:37,115][00127] Num frames 9000... +[2023-03-11 11:48:37,223][00127] Num frames 9100... +[2023-03-11 11:48:37,335][00127] Num frames 9200... +[2023-03-11 11:48:37,492][00127] Avg episode rewards: #0: 23.762, true rewards: #0: 10.318 +[2023-03-11 11:48:37,494][00127] Avg episode reward: 23.762, avg true_objective: 10.318 +[2023-03-11 11:48:37,514][00127] Num frames 9300... +[2023-03-11 11:48:37,624][00127] Num frames 9400... +[2023-03-11 11:48:37,732][00127] Num frames 9500... +[2023-03-11 11:48:37,849][00127] Num frames 9600... +[2023-03-11 11:48:37,962][00127] Num frames 9700... +[2023-03-11 11:48:38,073][00127] Num frames 9800... +[2023-03-11 11:48:38,183][00127] Num frames 9900... +[2023-03-11 11:48:38,301][00127] Num frames 10000... +[2023-03-11 11:48:38,411][00127] Num frames 10100... +[2023-03-11 11:48:38,520][00127] Num frames 10200... +[2023-03-11 11:48:38,627][00127] Num frames 10300... +[2023-03-11 11:48:38,732][00127] Avg episode rewards: #0: 23.841, true rewards: #0: 10.341 +[2023-03-11 11:48:38,733][00127] Avg episode reward: 23.841, avg true_objective: 10.341 +[2023-03-11 11:49:39,627][00127] Replay video saved to /content/train_dir/default_experiment/replay.mp4! +[2023-03-11 11:50:02,077][00127] Loading existing experiment configuration from /content/train_dir/default_experiment/config.json +[2023-03-11 11:50:02,079][00127] Overriding arg 'num_workers' with value 1 passed from command line +[2023-03-11 11:50:02,081][00127] Adding new argument 'no_render'=True that is not in the saved config file! +[2023-03-11 11:50:02,083][00127] Adding new argument 'save_video'=True that is not in the saved config file! +[2023-03-11 11:50:02,085][00127] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file! +[2023-03-11 11:50:02,087][00127] Adding new argument 'video_name'=None that is not in the saved config file! +[2023-03-11 11:50:02,088][00127] Adding new argument 'max_num_frames'=100000 that is not in the saved config file! +[2023-03-11 11:50:02,090][00127] Adding new argument 'max_num_episodes'=10 that is not in the saved config file! +[2023-03-11 11:50:02,091][00127] Adding new argument 'push_to_hub'=True that is not in the saved config file! +[2023-03-11 11:50:02,092][00127] Adding new argument 'hf_repository'='Taratata/rl_course_vizdoom_health_gathering_supreme' that is not in the saved config file! +[2023-03-11 11:50:02,093][00127] Adding new argument 'policy_index'=0 that is not in the saved config file! +[2023-03-11 11:50:02,094][00127] Adding new argument 'eval_deterministic'=False that is not in the saved config file! +[2023-03-11 11:50:02,095][00127] Adding new argument 'train_script'=None that is not in the saved config file! +[2023-03-11 11:50:02,096][00127] Adding new argument 'enjoy_script'=None that is not in the saved config file! +[2023-03-11 11:50:02,097][00127] Using frameskip 1 and render_action_repeat=4 for evaluation +[2023-03-11 11:50:02,117][00127] RunningMeanStd input shape: (3, 72, 128) +[2023-03-11 11:50:02,120][00127] RunningMeanStd input shape: (1,) +[2023-03-11 11:50:02,133][00127] ConvEncoder: input_channels=3 +[2023-03-11 11:50:02,168][00127] Conv encoder output size: 512 +[2023-03-11 11:50:02,172][00127] Policy head output size: 512 +[2023-03-11 11:50:02,192][00127] Loading state from checkpoint /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000978_4005888.pth... +[2023-03-11 11:50:02,628][00127] Num frames 100... +[2023-03-11 11:50:02,748][00127] Num frames 200... +[2023-03-11 11:50:02,868][00127] Num frames 300... +[2023-03-11 11:50:02,990][00127] Num frames 400... +[2023-03-11 11:50:03,108][00127] Num frames 500... +[2023-03-11 11:50:03,227][00127] Num frames 600... +[2023-03-11 11:50:03,340][00127] Num frames 700... +[2023-03-11 11:50:03,459][00127] Num frames 800... +[2023-03-11 11:50:03,580][00127] Num frames 900... +[2023-03-11 11:50:03,696][00127] Num frames 1000... +[2023-03-11 11:50:03,768][00127] Avg episode rewards: #0: 22.080, true rewards: #0: 10.080 +[2023-03-11 11:50:03,770][00127] Avg episode reward: 22.080, avg true_objective: 10.080 +[2023-03-11 11:50:03,875][00127] Num frames 1100... +[2023-03-11 11:50:03,999][00127] Num frames 1200... +[2023-03-11 11:50:04,108][00127] Num frames 1300... +[2023-03-11 11:50:04,224][00127] Num frames 1400... +[2023-03-11 11:50:04,339][00127] Num frames 1500... +[2023-03-11 11:50:04,450][00127] Num frames 1600... +[2023-03-11 11:50:04,566][00127] Num frames 1700... +[2023-03-11 11:50:04,685][00127] Num frames 1800... +[2023-03-11 11:50:04,801][00127] Num frames 1900... +[2023-03-11 11:50:04,909][00127] Num frames 2000... +[2023-03-11 11:50:05,022][00127] Num frames 2100... +[2023-03-11 11:50:05,132][00127] Num frames 2200... +[2023-03-11 11:50:05,247][00127] Num frames 2300... +[2023-03-11 11:50:05,357][00127] Num frames 2400... +[2023-03-11 11:50:05,469][00127] Num frames 2500... +[2023-03-11 11:50:05,580][00127] Num frames 2600... +[2023-03-11 11:50:05,695][00127] Num frames 2700... +[2023-03-11 11:50:05,807][00127] Num frames 2800... +[2023-03-11 11:50:05,859][00127] Avg episode rewards: #0: 32.500, true rewards: #0: 14.000 +[2023-03-11 11:50:05,861][00127] Avg episode reward: 32.500, avg true_objective: 14.000 +[2023-03-11 11:50:05,977][00127] Num frames 2900... +[2023-03-11 11:50:06,085][00127] Num frames 3000... +[2023-03-11 11:50:06,194][00127] Num frames 3100... +[2023-03-11 11:50:06,311][00127] Num frames 3200... +[2023-03-11 11:50:06,423][00127] Num frames 3300... +[2023-03-11 11:50:06,531][00127] Num frames 3400... +[2023-03-11 11:50:06,661][00127] Num frames 3500... +[2023-03-11 11:50:06,776][00127] Num frames 3600... +[2023-03-11 11:50:06,828][00127] Avg episode rewards: #0: 27.333, true rewards: #0: 12.000 +[2023-03-11 11:50:06,830][00127] Avg episode reward: 27.333, avg true_objective: 12.000 +[2023-03-11 11:50:06,941][00127] Num frames 3700... +[2023-03-11 11:50:07,058][00127] Num frames 3800... +[2023-03-11 11:50:07,163][00127] Num frames 3900... +[2023-03-11 11:50:07,270][00127] Num frames 4000... +[2023-03-11 11:50:07,381][00127] Avg episode rewards: #0: 21.870, true rewards: #0: 10.120 +[2023-03-11 11:50:07,383][00127] Avg episode reward: 21.870, avg true_objective: 10.120 +[2023-03-11 11:50:07,452][00127] Num frames 4100... +[2023-03-11 11:50:07,565][00127] Num frames 4200... +[2023-03-11 11:50:07,679][00127] Num frames 4300... +[2023-03-11 11:50:07,790][00127] Num frames 4400... +[2023-03-11 11:50:07,908][00127] Num frames 4500... +[2023-03-11 11:50:08,029][00127] Num frames 4600... +[2023-03-11 11:50:08,141][00127] Num frames 4700... +[2023-03-11 11:50:08,257][00127] Num frames 4800... +[2023-03-11 11:50:08,378][00127] Num frames 4900... +[2023-03-11 11:50:08,483][00127] Avg episode rewards: #0: 21.488, true rewards: #0: 9.888 +[2023-03-11 11:50:08,484][00127] Avg episode reward: 21.488, avg true_objective: 9.888 +[2023-03-11 11:50:08,553][00127] Num frames 5000... +[2023-03-11 11:50:08,665][00127] Num frames 5100... +[2023-03-11 11:50:08,776][00127] Num frames 5200... +[2023-03-11 11:50:08,905][00127] Num frames 5300... +[2023-03-11 11:50:09,033][00127] Num frames 5400... +[2023-03-11 11:50:09,144][00127] Num frames 5500... +[2023-03-11 11:50:09,255][00127] Num frames 5600... +[2023-03-11 11:50:09,398][00127] Avg episode rewards: #0: 20.462, true rewards: #0: 9.462 +[2023-03-11 11:50:09,400][00127] Avg episode reward: 20.462, avg true_objective: 9.462 +[2023-03-11 11:50:09,440][00127] Num frames 5700... +[2023-03-11 11:50:09,550][00127] Num frames 5800... +[2023-03-11 11:50:09,662][00127] Num frames 5900... +[2023-03-11 11:50:09,791][00127] Num frames 6000... +[2023-03-11 11:50:09,950][00127] Num frames 6100... +[2023-03-11 11:50:10,097][00127] Avg episode rewards: #0: 18.653, true rewards: #0: 8.796 +[2023-03-11 11:50:10,099][00127] Avg episode reward: 18.653, avg true_objective: 8.796 +[2023-03-11 11:50:10,170][00127] Num frames 6200... +[2023-03-11 11:50:10,324][00127] Num frames 6300... +[2023-03-11 11:50:10,479][00127] Num frames 6400... +[2023-03-11 11:50:10,633][00127] Num frames 6500... +[2023-03-11 11:50:10,710][00127] Avg episode rewards: #0: 16.886, true rewards: #0: 8.136 +[2023-03-11 11:50:10,714][00127] Avg episode reward: 16.886, avg true_objective: 8.136 +[2023-03-11 11:50:10,861][00127] Num frames 6600... +[2023-03-11 11:50:11,011][00127] Num frames 6700... +[2023-03-11 11:50:11,161][00127] Num frames 6800... +[2023-03-11 11:50:11,320][00127] Num frames 6900... +[2023-03-11 11:50:11,473][00127] Num frames 7000... +[2023-03-11 11:50:11,634][00127] Num frames 7100... +[2023-03-11 11:50:11,794][00127] Num frames 7200... +[2023-03-11 11:50:11,969][00127] Num frames 7300... +[2023-03-11 11:50:12,154][00127] Num frames 7400... +[2023-03-11 11:50:12,328][00127] Num frames 7500... +[2023-03-11 11:50:12,499][00127] Num frames 7600... +[2023-03-11 11:50:12,663][00127] Num frames 7700... +[2023-03-11 11:50:12,823][00127] Num frames 7800... +[2023-03-11 11:50:12,980][00127] Num frames 7900... +[2023-03-11 11:50:13,146][00127] Num frames 8000... +[2023-03-11 11:50:13,323][00127] Avg episode rewards: #0: 19.860, true rewards: #0: 8.971 +[2023-03-11 11:50:13,325][00127] Avg episode reward: 19.860, avg true_objective: 8.971 +[2023-03-11 11:50:13,358][00127] Num frames 8100... +[2023-03-11 11:50:13,470][00127] Num frames 8200... +[2023-03-11 11:50:13,581][00127] Num frames 8300... +[2023-03-11 11:50:13,695][00127] Num frames 8400... +[2023-03-11 11:50:13,823][00127] Num frames 8500... +[2023-03-11 11:50:13,944][00127] Num frames 8600... +[2023-03-11 11:50:14,054][00127] Num frames 8700... +[2023-03-11 11:50:14,173][00127] Num frames 8800... +[2023-03-11 11:50:14,292][00127] Num frames 8900... +[2023-03-11 11:50:14,404][00127] Num frames 9000... +[2023-03-11 11:50:14,517][00127] Num frames 9100... +[2023-03-11 11:50:14,630][00127] Num frames 9200... +[2023-03-11 11:50:14,756][00127] Num frames 9300... +[2023-03-11 11:50:14,865][00127] Num frames 9400... +[2023-03-11 11:50:15,017][00127] Avg episode rewards: #0: 21.182, true rewards: #0: 9.482 +[2023-03-11 11:50:15,018][00127] Avg episode reward: 21.182, avg true_objective: 9.482 +[2023-03-11 11:51:10,872][00127] Replay video saved to /content/train_dir/default_experiment/replay.mp4! +[2023-03-11 11:51:20,603][00127] The model has been pushed to https://huggingface.co/Taratata/rl_course_vizdoom_health_gathering_supreme +[2023-03-11 11:52:14,489][00127] Loading existing experiment configuration from /content/train_dir/default_experiment/config.json +[2023-03-11 11:52:14,491][00127] Overriding arg 'num_workers' with value 1 passed from command line +[2023-03-11 11:52:14,493][00127] Adding new argument 'no_render'=True that is not in the saved config file! +[2023-03-11 11:52:14,494][00127] Adding new argument 'save_video'=True that is not in the saved config file! +[2023-03-11 11:52:14,496][00127] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file! +[2023-03-11 11:52:14,498][00127] Adding new argument 'video_name'=None that is not in the saved config file! +[2023-03-11 11:52:14,500][00127] Adding new argument 'max_num_frames'=100000 that is not in the saved config file! +[2023-03-11 11:52:14,502][00127] Adding new argument 'max_num_episodes'=10 that is not in the saved config file! +[2023-03-11 11:52:14,503][00127] Adding new argument 'push_to_hub'=True that is not in the saved config file! +[2023-03-11 11:52:14,504][00127] Adding new argument 'hf_repository'='Taratata/rl_course_vizdoom_health_gathering_supreme-v2' that is not in the saved config file! +[2023-03-11 11:52:14,504][00127] Adding new argument 'policy_index'=0 that is not in the saved config file! +[2023-03-11 11:52:14,506][00127] Adding new argument 'eval_deterministic'=False that is not in the saved config file! +[2023-03-11 11:52:14,507][00127] Adding new argument 'train_script'=None that is not in the saved config file! +[2023-03-11 11:52:14,508][00127] Adding new argument 'enjoy_script'=None that is not in the saved config file! +[2023-03-11 11:52:14,509][00127] Using frameskip 1 and render_action_repeat=4 for evaluation +[2023-03-11 11:52:14,529][00127] RunningMeanStd input shape: (3, 72, 128) +[2023-03-11 11:52:14,531][00127] RunningMeanStd input shape: (1,) +[2023-03-11 11:52:14,545][00127] ConvEncoder: input_channels=3 +[2023-03-11 11:52:14,580][00127] Conv encoder output size: 512 +[2023-03-11 11:52:14,581][00127] Policy head output size: 512 +[2023-03-11 11:52:14,600][00127] Loading state from checkpoint /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000978_4005888.pth... +[2023-03-11 11:52:15,038][00127] Num frames 100... +[2023-03-11 11:52:15,156][00127] Num frames 200... +[2023-03-11 11:52:15,265][00127] Num frames 300... +[2023-03-11 11:52:15,374][00127] Num frames 400... +[2023-03-11 11:52:15,490][00127] Num frames 500... +[2023-03-11 11:52:15,612][00127] Num frames 600... +[2023-03-11 11:52:15,731][00127] Num frames 700... +[2023-03-11 11:52:15,848][00127] Num frames 800... +[2023-03-11 11:52:15,959][00127] Num frames 900... +[2023-03-11 11:52:16,070][00127] Num frames 1000... +[2023-03-11 11:52:16,179][00127] Num frames 1100... +[2023-03-11 11:52:16,302][00127] Num frames 1200... +[2023-03-11 11:52:16,424][00127] Num frames 1300... +[2023-03-11 11:52:16,569][00127] Avg episode rewards: #0: 32.760, true rewards: #0: 13.760 +[2023-03-11 11:52:16,571][00127] Avg episode reward: 32.760, avg true_objective: 13.760 +[2023-03-11 11:52:16,601][00127] Num frames 1400... +[2023-03-11 11:52:16,719][00127] Num frames 1500... +[2023-03-11 11:52:16,832][00127] Num frames 1600... +[2023-03-11 11:52:16,949][00127] Num frames 1700... +[2023-03-11 11:52:17,060][00127] Num frames 1800... +[2023-03-11 11:52:17,175][00127] Num frames 1900... +[2023-03-11 11:52:17,296][00127] Num frames 2000... +[2023-03-11 11:52:17,422][00127] Avg episode rewards: #0: 23.320, true rewards: #0: 10.320 +[2023-03-11 11:52:17,424][00127] Avg episode reward: 23.320, avg true_objective: 10.320 +[2023-03-11 11:52:17,469][00127] Num frames 2100... +[2023-03-11 11:52:17,588][00127] Num frames 2200... +[2023-03-11 11:52:17,698][00127] Num frames 2300... +[2023-03-11 11:52:17,817][00127] Num frames 2400... +[2023-03-11 11:52:17,931][00127] Num frames 2500... +[2023-03-11 11:52:18,040][00127] Num frames 2600... +[2023-03-11 11:52:18,146][00127] Num frames 2700... +[2023-03-11 11:52:18,252][00127] Num frames 2800... +[2023-03-11 11:52:18,363][00127] Num frames 2900... +[2023-03-11 11:52:18,470][00127] Num frames 3000... +[2023-03-11 11:52:18,578][00127] Num frames 3100... +[2023-03-11 11:52:18,692][00127] Num frames 3200... +[2023-03-11 11:52:18,814][00127] Num frames 3300... +[2023-03-11 11:52:18,930][00127] Num frames 3400... +[2023-03-11 11:52:19,042][00127] Num frames 3500... +[2023-03-11 11:52:19,152][00127] Num frames 3600... +[2023-03-11 11:52:19,265][00127] Num frames 3700... +[2023-03-11 11:52:19,376][00127] Num frames 3800... +[2023-03-11 11:52:19,493][00127] Num frames 3900... +[2023-03-11 11:52:19,607][00127] Num frames 4000... +[2023-03-11 11:52:19,720][00127] Avg episode rewards: #0: 30.173, true rewards: #0: 13.507 +[2023-03-11 11:52:19,723][00127] Avg episode reward: 30.173, avg true_objective: 13.507 +[2023-03-11 11:52:19,786][00127] Num frames 4100... +[2023-03-11 11:52:19,894][00127] Num frames 4200... +[2023-03-11 11:52:20,004][00127] Num frames 4300... +[2023-03-11 11:52:20,114][00127] Num frames 4400... +[2023-03-11 11:52:20,224][00127] Num frames 4500... +[2023-03-11 11:52:20,335][00127] Num frames 4600... +[2023-03-11 11:52:20,448][00127] Num frames 4700... +[2023-03-11 11:52:20,559][00127] Num frames 4800... +[2023-03-11 11:52:20,671][00127] Num frames 4900... +[2023-03-11 11:52:20,755][00127] Avg episode rewards: #0: 26.790, true rewards: #0: 12.290 +[2023-03-11 11:52:20,756][00127] Avg episode reward: 26.790, avg true_objective: 12.290 +[2023-03-11 11:52:20,861][00127] Num frames 5000... +[2023-03-11 11:52:20,971][00127] Num frames 5100... +[2023-03-11 11:52:21,082][00127] Num frames 5200... +[2023-03-11 11:52:21,199][00127] Num frames 5300... +[2023-03-11 11:52:21,321][00127] Num frames 5400... +[2023-03-11 11:52:21,433][00127] Num frames 5500... +[2023-03-11 11:52:21,541][00127] Num frames 5600... +[2023-03-11 11:52:21,639][00127] Avg episode rewards: #0: 25.060, true rewards: #0: 11.260 +[2023-03-11 11:52:21,641][00127] Avg episode reward: 25.060, avg true_objective: 11.260 +[2023-03-11 11:52:21,720][00127] Num frames 5700... +[2023-03-11 11:52:21,834][00127] Num frames 5800... +[2023-03-11 11:52:21,944][00127] Num frames 5900... +[2023-03-11 11:52:22,055][00127] Num frames 6000... +[2023-03-11 11:52:22,164][00127] Num frames 6100... +[2023-03-11 11:52:22,274][00127] Num frames 6200... +[2023-03-11 11:52:22,350][00127] Avg episode rewards: #0: 22.863, true rewards: #0: 10.363 +[2023-03-11 11:52:22,351][00127] Avg episode reward: 22.863, avg true_objective: 10.363 +[2023-03-11 11:52:22,442][00127] Num frames 6300... +[2023-03-11 11:52:22,559][00127] Num frames 6400... +[2023-03-11 11:52:22,727][00127] Num frames 6500... +[2023-03-11 11:52:22,884][00127] Num frames 6600... +[2023-03-11 11:52:23,044][00127] Num frames 6700... +[2023-03-11 11:52:23,195][00127] Num frames 6800... +[2023-03-11 11:52:23,345][00127] Num frames 6900... +[2023-03-11 11:52:23,503][00127] Num frames 7000... +[2023-03-11 11:52:23,671][00127] Num frames 7100... +[2023-03-11 11:52:23,825][00127] Num frames 7200... +[2023-03-11 11:52:23,920][00127] Avg episode rewards: #0: 22.459, true rewards: #0: 10.316 +[2023-03-11 11:52:23,922][00127] Avg episode reward: 22.459, avg true_objective: 10.316 +[2023-03-11 11:52:24,039][00127] Num frames 7300... +[2023-03-11 11:52:24,190][00127] Num frames 7400... +[2023-03-11 11:52:24,343][00127] Num frames 7500... +[2023-03-11 11:52:24,505][00127] Num frames 7600... +[2023-03-11 11:52:24,664][00127] Num frames 7700... +[2023-03-11 11:52:24,823][00127] Num frames 7800... +[2023-03-11 11:52:24,930][00127] Avg episode rewards: #0: 20.786, true rewards: #0: 9.786 +[2023-03-11 11:52:24,932][00127] Avg episode reward: 20.786, avg true_objective: 9.786 +[2023-03-11 11:52:25,043][00127] Num frames 7900... +[2023-03-11 11:52:25,198][00127] Num frames 8000... +[2023-03-11 11:52:25,375][00127] Num frames 8100... +[2023-03-11 11:52:25,530][00127] Num frames 8200... +[2023-03-11 11:52:25,692][00127] Num frames 8300... +[2023-03-11 11:52:25,858][00127] Num frames 8400... +[2023-03-11 11:52:26,013][00127] Num frames 8500... +[2023-03-11 11:52:26,145][00127] Num frames 8600... +[2023-03-11 11:52:26,259][00127] Num frames 8700... +[2023-03-11 11:52:26,395][00127] Num frames 8800... +[2023-03-11 11:52:26,505][00127] Num frames 8900... +[2023-03-11 11:52:26,626][00127] Num frames 9000... +[2023-03-11 11:52:26,738][00127] Num frames 9100... +[2023-03-11 11:52:26,849][00127] Num frames 9200... +[2023-03-11 11:52:26,969][00127] Num frames 9300... +[2023-03-11 11:52:27,090][00127] Num frames 9400... +[2023-03-11 11:52:27,200][00127] Num frames 9500... +[2023-03-11 11:52:27,311][00127] Num frames 9600... +[2023-03-11 11:52:27,427][00127] Num frames 9700... +[2023-03-11 11:52:27,538][00127] Num frames 9800... +[2023-03-11 11:52:27,656][00127] Avg episode rewards: #0: 24.050, true rewards: #0: 10.939 +[2023-03-11 11:52:27,658][00127] Avg episode reward: 24.050, avg true_objective: 10.939 +[2023-03-11 11:52:27,721][00127] Num frames 9900... +[2023-03-11 11:52:27,831][00127] Num frames 10000... +[2023-03-11 11:52:27,955][00127] Num frames 10100... +[2023-03-11 11:52:28,062][00127] Num frames 10200... +[2023-03-11 11:52:28,171][00127] Num frames 10300... +[2023-03-11 11:52:28,278][00127] Num frames 10400... +[2023-03-11 11:52:28,388][00127] Num frames 10500... +[2023-03-11 11:52:28,502][00127] Num frames 10600... +[2023-03-11 11:52:28,622][00127] Num frames 10700... +[2023-03-11 11:52:28,738][00127] Num frames 10800... +[2023-03-11 11:52:28,856][00127] Num frames 10900... +[2023-03-11 11:52:28,915][00127] Avg episode rewards: #0: 24.001, true rewards: #0: 10.901 +[2023-03-11 11:52:28,917][00127] Avg episode reward: 24.001, avg true_objective: 10.901 +[2023-03-11 11:53:32,400][00127] Replay video saved to /content/train_dir/default_experiment/replay.mp4!