diff --git "a/sf_log.txt" "b/sf_log.txt" new file mode 100644--- /dev/null +++ "b/sf_log.txt" @@ -0,0 +1,1126 @@ +[2024-09-22 11:56:50,676][00564] Saving configuration to /content/train_dir/default_experiment/config.json... +[2024-09-22 11:56:50,679][00564] Rollout worker 0 uses device cpu +[2024-09-22 11:56:50,681][00564] Rollout worker 1 uses device cpu +[2024-09-22 11:56:50,682][00564] Rollout worker 2 uses device cpu +[2024-09-22 11:56:50,684][00564] Rollout worker 3 uses device cpu +[2024-09-22 11:56:50,686][00564] Rollout worker 4 uses device cpu +[2024-09-22 11:56:50,691][00564] Rollout worker 5 uses device cpu +[2024-09-22 11:56:50,692][00564] Rollout worker 6 uses device cpu +[2024-09-22 11:56:50,693][00564] Rollout worker 7 uses device cpu +[2024-09-22 11:56:50,858][00564] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2024-09-22 11:56:50,860][00564] InferenceWorker_p0-w0: min num requests: 2 +[2024-09-22 11:56:50,894][00564] Starting all processes... +[2024-09-22 11:56:50,895][00564] Starting process learner_proc0 +[2024-09-22 11:56:51,597][00564] Starting all processes... +[2024-09-22 11:56:51,606][00564] Starting process inference_proc0-0 +[2024-09-22 11:56:51,606][00564] Starting process rollout_proc0 +[2024-09-22 11:56:51,606][00564] Starting process rollout_proc1 +[2024-09-22 11:56:51,606][00564] Starting process rollout_proc2 +[2024-09-22 11:56:51,607][00564] Starting process rollout_proc3 +[2024-09-22 11:56:51,607][00564] Starting process rollout_proc4 +[2024-09-22 11:56:51,607][00564] Starting process rollout_proc5 +[2024-09-22 11:56:51,607][00564] Starting process rollout_proc6 +[2024-09-22 11:56:51,607][00564] Starting process rollout_proc7 +[2024-09-22 11:57:07,108][02942] Worker 3 uses CPU cores [1] +[2024-09-22 11:57:07,172][02938] Worker 0 uses CPU cores [0] +[2024-09-22 11:57:07,313][02925] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2024-09-22 11:57:07,313][02941] Worker 2 uses CPU cores [0] +[2024-09-22 11:57:07,313][02925] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for learning process 0 +[2024-09-22 11:57:07,323][02944] Worker 4 uses CPU cores [0] +[2024-09-22 11:57:07,356][02925] Num visible devices: 1 +[2024-09-22 11:57:07,363][02945] Worker 7 uses CPU cores [1] +[2024-09-22 11:57:07,384][02925] Starting seed is not provided +[2024-09-22 11:57:07,384][02925] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2024-09-22 11:57:07,385][02925] Initializing actor-critic model on device cuda:0 +[2024-09-22 11:57:07,386][02925] RunningMeanStd input shape: (3, 72, 128) +[2024-09-22 11:57:07,389][02925] RunningMeanStd input shape: (1,) +[2024-09-22 11:57:07,427][02925] ConvEncoder: input_channels=3 +[2024-09-22 11:57:07,436][02943] Worker 5 uses CPU cores [1] +[2024-09-22 11:57:07,454][02940] Worker 1 uses CPU cores [1] +[2024-09-22 11:57:07,472][02939] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2024-09-22 11:57:07,472][02939] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for inference process 0 +[2024-09-22 11:57:07,492][02939] Num visible devices: 1 +[2024-09-22 11:57:07,515][02946] Worker 6 uses CPU cores [0] +[2024-09-22 11:57:07,703][02925] Conv encoder output size: 512 +[2024-09-22 11:57:07,703][02925] Policy head output size: 512 +[2024-09-22 11:57:07,762][02925] Created Actor Critic model with architecture: +[2024-09-22 11:57:07,764][02925] ActorCriticSharedWeights( + (obs_normalizer): ObservationNormalizer( + (running_mean_std): RunningMeanStdDictInPlace( + (running_mean_std): ModuleDict( + (obs): RunningMeanStdInPlace() + ) + ) + ) + (returns_normalizer): RecursiveScriptModule(original_name=RunningMeanStdInPlace) + (encoder): VizdoomEncoder( + (basic_encoder): ConvEncoder( + (enc): RecursiveScriptModule( + original_name=ConvEncoderImpl + (conv_head): RecursiveScriptModule( + original_name=Sequential + (0): RecursiveScriptModule(original_name=Conv2d) + (1): RecursiveScriptModule(original_name=ELU) + (2): RecursiveScriptModule(original_name=Conv2d) + (3): RecursiveScriptModule(original_name=ELU) + (4): RecursiveScriptModule(original_name=Conv2d) + (5): RecursiveScriptModule(original_name=ELU) + ) + (mlp_layers): RecursiveScriptModule( + original_name=Sequential + (0): RecursiveScriptModule(original_name=Linear) + (1): RecursiveScriptModule(original_name=ELU) + ) + ) + ) + ) + (core): ModelCoreRNN( + (core): GRU(512, 512) + ) + (decoder): MlpDecoder( + (mlp): Identity() + ) + (critic_linear): Linear(in_features=512, out_features=1, bias=True) + (action_parameterization): ActionParameterizationDefault( + (distribution_linear): Linear(in_features=512, out_features=5, bias=True) + ) +) +[2024-09-22 11:57:08,243][02925] Using optimizer +[2024-09-22 11:57:09,080][02925] No checkpoints found +[2024-09-22 11:57:09,080][02925] Did not load from checkpoint, starting from scratch! +[2024-09-22 11:57:09,080][02925] Initialized policy 0 weights for model version 0 +[2024-09-22 11:57:09,084][02925] LearnerWorker_p0 finished initialization! +[2024-09-22 11:57:09,086][02925] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2024-09-22 11:57:09,177][02939] RunningMeanStd input shape: (3, 72, 128) +[2024-09-22 11:57:09,178][02939] RunningMeanStd input shape: (1,) +[2024-09-22 11:57:09,190][02939] ConvEncoder: input_channels=3 +[2024-09-22 11:57:09,290][02939] Conv encoder output size: 512 +[2024-09-22 11:57:09,291][02939] Policy head output size: 512 +[2024-09-22 11:57:09,342][00564] Inference worker 0-0 is ready! +[2024-09-22 11:57:09,344][00564] All inference workers are ready! Signal rollout workers to start! +[2024-09-22 11:57:09,691][02944] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-09-22 11:57:09,728][02941] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-09-22 11:57:09,739][02945] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-09-22 11:57:09,740][02946] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-09-22 11:57:09,749][02940] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-09-22 11:57:09,736][02938] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-09-22 11:57:09,762][02943] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-09-22 11:57:09,767][02942] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-09-22 11:57:10,257][00564] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 0. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2024-09-22 11:57:10,851][00564] Heartbeat connected on Batcher_0 +[2024-09-22 11:57:10,855][00564] Heartbeat connected on LearnerWorker_p0 +[2024-09-22 11:57:10,882][02946] Decorrelating experience for 0 frames... +[2024-09-22 11:57:10,881][02944] Decorrelating experience for 0 frames... +[2024-09-22 11:57:10,912][00564] Heartbeat connected on InferenceWorker_p0-w0 +[2024-09-22 11:57:11,110][02945] Decorrelating experience for 0 frames... +[2024-09-22 11:57:11,109][02942] Decorrelating experience for 0 frames... +[2024-09-22 11:57:11,117][02940] Decorrelating experience for 0 frames... +[2024-09-22 11:57:11,428][02944] Decorrelating experience for 32 frames... +[2024-09-22 11:57:11,939][02941] Decorrelating experience for 0 frames... +[2024-09-22 11:57:12,102][02945] Decorrelating experience for 32 frames... +[2024-09-22 11:57:12,104][02942] Decorrelating experience for 32 frames... +[2024-09-22 11:57:12,206][02944] Decorrelating experience for 64 frames... +[2024-09-22 11:57:12,718][02940] Decorrelating experience for 32 frames... +[2024-09-22 11:57:13,185][02941] Decorrelating experience for 32 frames... +[2024-09-22 11:57:13,189][02944] Decorrelating experience for 96 frames... +[2024-09-22 11:57:13,263][02943] Decorrelating experience for 0 frames... +[2024-09-22 11:57:13,382][00564] Heartbeat connected on RolloutWorker_w4 +[2024-09-22 11:57:13,831][02945] Decorrelating experience for 64 frames... +[2024-09-22 11:57:14,056][02946] Decorrelating experience for 32 frames... +[2024-09-22 11:57:14,767][02940] Decorrelating experience for 64 frames... +[2024-09-22 11:57:15,131][02941] Decorrelating experience for 64 frames... +[2024-09-22 11:57:15,260][00564] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2024-09-22 11:57:15,313][02943] Decorrelating experience for 32 frames... +[2024-09-22 11:57:15,378][02942] Decorrelating experience for 64 frames... +[2024-09-22 11:57:16,319][02946] Decorrelating experience for 64 frames... +[2024-09-22 11:57:17,528][02940] Decorrelating experience for 96 frames... +[2024-09-22 11:57:17,670][02945] Decorrelating experience for 96 frames... +[2024-09-22 11:57:17,847][00564] Heartbeat connected on RolloutWorker_w1 +[2024-09-22 11:57:17,973][00564] Heartbeat connected on RolloutWorker_w7 +[2024-09-22 11:57:18,021][02942] Decorrelating experience for 96 frames... +[2024-09-22 11:57:18,380][00564] Heartbeat connected on RolloutWorker_w3 +[2024-09-22 11:57:18,524][02943] Decorrelating experience for 64 frames... +[2024-09-22 11:57:19,050][02941] Decorrelating experience for 96 frames... +[2024-09-22 11:57:19,244][02946] Decorrelating experience for 96 frames... +[2024-09-22 11:57:19,409][00564] Heartbeat connected on RolloutWorker_w2 +[2024-09-22 11:57:19,803][00564] Heartbeat connected on RolloutWorker_w6 +[2024-09-22 11:57:20,259][00564] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 51.2. Samples: 512. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2024-09-22 11:57:20,266][00564] Avg episode reward: [(0, '2.485')] +[2024-09-22 11:57:21,933][02925] Signal inference workers to stop experience collection... +[2024-09-22 11:57:21,926][02943] Decorrelating experience for 96 frames... +[2024-09-22 11:57:21,939][02939] InferenceWorker_p0-w0: stopping experience collection +[2024-09-22 11:57:22,012][00564] Heartbeat connected on RolloutWorker_w5 +[2024-09-22 11:57:25,247][02925] Signal inference workers to resume experience collection... +[2024-09-22 11:57:25,248][02939] InferenceWorker_p0-w0: resuming experience collection +[2024-09-22 11:57:25,257][00564] Fps is (10 sec: 409.7, 60 sec: 273.1, 300 sec: 273.1). Total num frames: 4096. Throughput: 0: 160.5. Samples: 2408. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0) +[2024-09-22 11:57:25,263][00564] Avg episode reward: [(0, '3.104')] +[2024-09-22 11:57:30,257][00564] Fps is (10 sec: 2458.2, 60 sec: 1228.8, 300 sec: 1228.8). Total num frames: 24576. Throughput: 0: 212.4. Samples: 4248. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 11:57:30,259][00564] Avg episode reward: [(0, '3.946')] +[2024-09-22 11:57:34,935][02939] Updated weights for policy 0, policy_version 10 (0.0266) +[2024-09-22 11:57:35,258][00564] Fps is (10 sec: 3686.0, 60 sec: 1638.3, 300 sec: 1638.3). Total num frames: 40960. Throughput: 0: 388.8. Samples: 9720. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-22 11:57:35,260][00564] Avg episode reward: [(0, '4.045')] +[2024-09-22 11:57:40,257][00564] Fps is (10 sec: 3276.8, 60 sec: 1911.5, 300 sec: 1911.5). Total num frames: 57344. Throughput: 0: 487.5. Samples: 14624. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 11:57:40,259][00564] Avg episode reward: [(0, '4.299')] +[2024-09-22 11:57:44,745][02939] Updated weights for policy 0, policy_version 20 (0.0034) +[2024-09-22 11:57:45,257][00564] Fps is (10 sec: 4096.3, 60 sec: 2340.5, 300 sec: 2340.5). Total num frames: 81920. Throughput: 0: 517.2. Samples: 18102. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 11:57:45,263][00564] Avg episode reward: [(0, '4.165')] +[2024-09-22 11:57:50,258][00564] Fps is (10 sec: 4505.2, 60 sec: 2559.9, 300 sec: 2559.9). Total num frames: 102400. Throughput: 0: 626.6. Samples: 25064. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 11:57:50,260][00564] Avg episode reward: [(0, '4.473')] +[2024-09-22 11:57:50,265][02925] Saving new best policy, reward=4.473! +[2024-09-22 11:57:55,257][00564] Fps is (10 sec: 3686.5, 60 sec: 2639.6, 300 sec: 2639.6). Total num frames: 118784. Throughput: 0: 650.4. Samples: 29266. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-22 11:57:55,259][00564] Avg episode reward: [(0, '4.616')] +[2024-09-22 11:57:55,269][02925] Saving new best policy, reward=4.616! +[2024-09-22 11:57:56,160][02939] Updated weights for policy 0, policy_version 30 (0.0039) +[2024-09-22 11:58:00,257][00564] Fps is (10 sec: 3686.7, 60 sec: 2785.3, 300 sec: 2785.3). Total num frames: 139264. Throughput: 0: 725.4. Samples: 32640. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 11:58:00,265][00564] Avg episode reward: [(0, '4.502')] +[2024-09-22 11:58:05,257][00564] Fps is (10 sec: 4096.0, 60 sec: 2904.4, 300 sec: 2904.4). Total num frames: 159744. Throughput: 0: 866.6. Samples: 39508. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-22 11:58:05,261][00564] Avg episode reward: [(0, '4.384')] +[2024-09-22 11:58:07,055][02939] Updated weights for policy 0, policy_version 40 (0.0037) +[2024-09-22 11:58:10,257][00564] Fps is (10 sec: 3276.8, 60 sec: 2867.2, 300 sec: 2867.2). Total num frames: 172032. Throughput: 0: 900.8. Samples: 42942. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 11:58:10,262][00564] Avg episode reward: [(0, '4.367')] +[2024-09-22 11:58:15,257][00564] Fps is (10 sec: 3276.8, 60 sec: 3208.7, 300 sec: 2961.7). Total num frames: 192512. Throughput: 0: 921.3. Samples: 45708. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-22 11:58:15,260][00564] Avg episode reward: [(0, '4.323')] +[2024-09-22 11:58:17,666][02939] Updated weights for policy 0, policy_version 50 (0.0025) +[2024-09-22 11:58:20,257][00564] Fps is (10 sec: 4505.6, 60 sec: 3618.3, 300 sec: 3101.3). Total num frames: 217088. Throughput: 0: 953.3. Samples: 52616. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 11:58:20,260][00564] Avg episode reward: [(0, '4.536')] +[2024-09-22 11:58:25,257][00564] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3058.3). Total num frames: 229376. Throughput: 0: 960.7. Samples: 57854. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 11:58:25,264][00564] Avg episode reward: [(0, '4.479')] +[2024-09-22 11:58:28,912][02939] Updated weights for policy 0, policy_version 60 (0.0035) +[2024-09-22 11:58:30,262][00564] Fps is (10 sec: 3275.0, 60 sec: 3754.3, 300 sec: 3123.0). Total num frames: 249856. Throughput: 0: 931.0. Samples: 60002. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2024-09-22 11:58:30,265][00564] Avg episode reward: [(0, '4.339')] +[2024-09-22 11:58:35,258][00564] Fps is (10 sec: 4095.5, 60 sec: 3822.9, 300 sec: 3180.4). Total num frames: 270336. Throughput: 0: 929.7. Samples: 66900. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-22 11:58:35,263][00564] Avg episode reward: [(0, '4.536')] +[2024-09-22 11:58:38,537][02939] Updated weights for policy 0, policy_version 70 (0.0015) +[2024-09-22 11:58:40,257][00564] Fps is (10 sec: 4098.3, 60 sec: 3891.2, 300 sec: 3231.3). Total num frames: 290816. Throughput: 0: 967.9. Samples: 72820. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2024-09-22 11:58:40,261][00564] Avg episode reward: [(0, '4.467')] +[2024-09-22 11:58:45,257][00564] Fps is (10 sec: 3686.8, 60 sec: 3754.7, 300 sec: 3233.7). Total num frames: 307200. Throughput: 0: 939.1. Samples: 74898. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-09-22 11:58:45,259][00564] Avg episode reward: [(0, '4.446')] +[2024-09-22 11:58:45,268][02925] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000075_307200.pth... +[2024-09-22 11:58:49,457][02939] Updated weights for policy 0, policy_version 80 (0.0027) +[2024-09-22 11:58:50,257][00564] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3276.8). Total num frames: 327680. Throughput: 0: 923.3. Samples: 81058. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2024-09-22 11:58:50,261][00564] Avg episode reward: [(0, '4.487')] +[2024-09-22 11:58:55,257][00564] Fps is (10 sec: 4096.0, 60 sec: 3822.9, 300 sec: 3315.8). Total num frames: 348160. Throughput: 0: 996.8. Samples: 87800. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2024-09-22 11:58:55,266][00564] Avg episode reward: [(0, '4.459')] +[2024-09-22 11:59:00,257][00564] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3314.0). Total num frames: 364544. Throughput: 0: 980.8. Samples: 89842. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-22 11:59:00,264][00564] Avg episode reward: [(0, '4.364')] +[2024-09-22 11:59:00,862][02939] Updated weights for policy 0, policy_version 90 (0.0023) +[2024-09-22 11:59:05,257][00564] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3348.0). Total num frames: 385024. Throughput: 0: 953.1. Samples: 95506. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-22 11:59:05,259][00564] Avg episode reward: [(0, '4.551')] +[2024-09-22 11:59:09,714][02939] Updated weights for policy 0, policy_version 100 (0.0022) +[2024-09-22 11:59:10,260][00564] Fps is (10 sec: 4504.3, 60 sec: 3959.3, 300 sec: 3413.2). Total num frames: 409600. Throughput: 0: 991.8. Samples: 102490. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-22 11:59:10,262][00564] Avg episode reward: [(0, '4.512')] +[2024-09-22 11:59:15,257][00564] Fps is (10 sec: 4096.0, 60 sec: 3891.2, 300 sec: 3407.9). Total num frames: 425984. Throughput: 0: 1000.1. Samples: 105002. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 11:59:15,259][00564] Avg episode reward: [(0, '4.371')] +[2024-09-22 11:59:20,257][00564] Fps is (10 sec: 3687.5, 60 sec: 3822.9, 300 sec: 3434.3). Total num frames: 446464. Throughput: 0: 961.6. Samples: 110172. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2024-09-22 11:59:20,259][00564] Avg episode reward: [(0, '4.254')] +[2024-09-22 11:59:20,846][02939] Updated weights for policy 0, policy_version 110 (0.0034) +[2024-09-22 11:59:25,257][00564] Fps is (10 sec: 4096.0, 60 sec: 3959.5, 300 sec: 3458.8). Total num frames: 466944. Throughput: 0: 986.2. Samples: 117200. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 11:59:25,261][00564] Avg episode reward: [(0, '4.360')] +[2024-09-22 11:59:30,257][00564] Fps is (10 sec: 4096.0, 60 sec: 3959.8, 300 sec: 3481.6). Total num frames: 487424. Throughput: 0: 1011.2. Samples: 120400. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-09-22 11:59:30,260][00564] Avg episode reward: [(0, '4.595')] +[2024-09-22 11:59:31,758][02939] Updated weights for policy 0, policy_version 120 (0.0029) +[2024-09-22 11:59:35,257][00564] Fps is (10 sec: 3686.4, 60 sec: 3891.3, 300 sec: 3474.5). Total num frames: 503808. Throughput: 0: 971.3. Samples: 124766. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 11:59:35,262][00564] Avg episode reward: [(0, '4.492')] +[2024-09-22 11:59:40,257][00564] Fps is (10 sec: 4096.0, 60 sec: 3959.5, 300 sec: 3522.6). Total num frames: 528384. Throughput: 0: 977.2. Samples: 131772. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-09-22 11:59:40,262][00564] Avg episode reward: [(0, '4.409')] +[2024-09-22 11:59:40,933][02939] Updated weights for policy 0, policy_version 130 (0.0021) +[2024-09-22 11:59:45,257][00564] Fps is (10 sec: 4505.6, 60 sec: 4027.7, 300 sec: 3541.1). Total num frames: 548864. Throughput: 0: 1011.6. Samples: 135366. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-09-22 11:59:45,260][00564] Avg episode reward: [(0, '4.491')] +[2024-09-22 11:59:50,257][00564] Fps is (10 sec: 3686.4, 60 sec: 3959.5, 300 sec: 3532.8). Total num frames: 565248. Throughput: 0: 989.2. Samples: 140018. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2024-09-22 11:59:50,264][00564] Avg episode reward: [(0, '4.363')] +[2024-09-22 11:59:52,153][02939] Updated weights for policy 0, policy_version 140 (0.0022) +[2024-09-22 11:59:55,257][00564] Fps is (10 sec: 3686.4, 60 sec: 3959.5, 300 sec: 3549.9). Total num frames: 585728. Throughput: 0: 980.8. Samples: 146622. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-22 11:59:55,264][00564] Avg episode reward: [(0, '4.610')] +[2024-09-22 12:00:00,257][00564] Fps is (10 sec: 4505.6, 60 sec: 4096.0, 300 sec: 3590.0). Total num frames: 610304. Throughput: 0: 1001.8. Samples: 150082. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 12:00:00,265][00564] Avg episode reward: [(0, '4.640')] +[2024-09-22 12:00:00,269][02925] Saving new best policy, reward=4.640! +[2024-09-22 12:00:01,784][02939] Updated weights for policy 0, policy_version 150 (0.0019) +[2024-09-22 12:00:05,257][00564] Fps is (10 sec: 3686.3, 60 sec: 3959.4, 300 sec: 3557.7). Total num frames: 622592. Throughput: 0: 1002.9. Samples: 155302. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-09-22 12:00:05,260][00564] Avg episode reward: [(0, '4.541')] +[2024-09-22 12:00:10,257][00564] Fps is (10 sec: 3276.8, 60 sec: 3891.4, 300 sec: 3572.6). Total num frames: 643072. Throughput: 0: 974.4. Samples: 161050. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-09-22 12:00:10,261][00564] Avg episode reward: [(0, '4.869')] +[2024-09-22 12:00:10,264][02925] Saving new best policy, reward=4.869! +[2024-09-22 12:00:12,204][02939] Updated weights for policy 0, policy_version 160 (0.0024) +[2024-09-22 12:00:15,257][00564] Fps is (10 sec: 4505.8, 60 sec: 4027.7, 300 sec: 3608.9). Total num frames: 667648. Throughput: 0: 981.0. Samples: 164544. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-22 12:00:15,263][00564] Avg episode reward: [(0, '4.950')] +[2024-09-22 12:00:15,272][02925] Saving new best policy, reward=4.950! +[2024-09-22 12:00:20,257][00564] Fps is (10 sec: 4095.9, 60 sec: 3959.5, 300 sec: 3600.2). Total num frames: 684032. Throughput: 0: 1018.4. Samples: 170594. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2024-09-22 12:00:20,260][00564] Avg episode reward: [(0, '4.941')] +[2024-09-22 12:00:23,413][02939] Updated weights for policy 0, policy_version 170 (0.0040) +[2024-09-22 12:00:25,257][00564] Fps is (10 sec: 3686.4, 60 sec: 3959.5, 300 sec: 3612.9). Total num frames: 704512. Throughput: 0: 978.4. Samples: 175802. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 12:00:25,260][00564] Avg episode reward: [(0, '4.926')] +[2024-09-22 12:00:30,257][00564] Fps is (10 sec: 4096.1, 60 sec: 3959.5, 300 sec: 3625.0). Total num frames: 724992. Throughput: 0: 974.7. Samples: 179228. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 12:00:30,260][00564] Avg episode reward: [(0, '5.280')] +[2024-09-22 12:00:30,265][02925] Saving new best policy, reward=5.280! +[2024-09-22 12:00:32,245][02939] Updated weights for policy 0, policy_version 180 (0.0025) +[2024-09-22 12:00:35,257][00564] Fps is (10 sec: 4096.0, 60 sec: 4027.7, 300 sec: 3636.4). Total num frames: 745472. Throughput: 0: 1016.4. Samples: 185756. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-09-22 12:00:35,262][00564] Avg episode reward: [(0, '5.315')] +[2024-09-22 12:00:35,272][02925] Saving new best policy, reward=5.315! +[2024-09-22 12:00:40,257][00564] Fps is (10 sec: 3276.8, 60 sec: 3822.9, 300 sec: 3608.4). Total num frames: 757760. Throughput: 0: 959.8. Samples: 189812. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-09-22 12:00:40,259][00564] Avg episode reward: [(0, '5.252')] +[2024-09-22 12:00:43,855][02939] Updated weights for policy 0, policy_version 190 (0.0035) +[2024-09-22 12:00:45,257][00564] Fps is (10 sec: 3686.4, 60 sec: 3891.2, 300 sec: 3638.8). Total num frames: 782336. Throughput: 0: 959.4. Samples: 193256. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-22 12:00:45,262][00564] Avg episode reward: [(0, '4.877')] +[2024-09-22 12:00:45,271][02925] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000191_782336.pth... +[2024-09-22 12:00:50,257][00564] Fps is (10 sec: 4505.5, 60 sec: 3959.4, 300 sec: 3649.2). Total num frames: 802816. Throughput: 0: 993.7. Samples: 200018. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 12:00:50,263][00564] Avg episode reward: [(0, '4.896')] +[2024-09-22 12:00:55,190][02939] Updated weights for policy 0, policy_version 200 (0.0021) +[2024-09-22 12:00:55,257][00564] Fps is (10 sec: 3686.4, 60 sec: 3891.2, 300 sec: 3640.9). Total num frames: 819200. Throughput: 0: 966.6. Samples: 204548. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 12:00:55,260][00564] Avg episode reward: [(0, '4.806')] +[2024-09-22 12:01:00,259][00564] Fps is (10 sec: 3685.9, 60 sec: 3822.8, 300 sec: 3650.8). Total num frames: 839680. Throughput: 0: 952.7. Samples: 207418. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-09-22 12:01:00,261][00564] Avg episode reward: [(0, '5.176')] +[2024-09-22 12:01:04,248][02939] Updated weights for policy 0, policy_version 210 (0.0014) +[2024-09-22 12:01:05,257][00564] Fps is (10 sec: 4096.0, 60 sec: 3959.5, 300 sec: 3660.3). Total num frames: 860160. Throughput: 0: 971.9. Samples: 214330. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2024-09-22 12:01:05,259][00564] Avg episode reward: [(0, '5.344')] +[2024-09-22 12:01:05,286][02925] Saving new best policy, reward=5.344! +[2024-09-22 12:01:10,257][00564] Fps is (10 sec: 3687.0, 60 sec: 3891.2, 300 sec: 3652.3). Total num frames: 876544. Throughput: 0: 972.8. Samples: 219578. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-22 12:01:10,262][00564] Avg episode reward: [(0, '5.510')] +[2024-09-22 12:01:10,266][02925] Saving new best policy, reward=5.510! +[2024-09-22 12:01:15,257][00564] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3661.3). Total num frames: 897024. Throughput: 0: 943.2. Samples: 221670. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-22 12:01:15,262][00564] Avg episode reward: [(0, '5.518')] +[2024-09-22 12:01:15,273][02925] Saving new best policy, reward=5.518! +[2024-09-22 12:01:15,872][02939] Updated weights for policy 0, policy_version 220 (0.0028) +[2024-09-22 12:01:20,257][00564] Fps is (10 sec: 4505.6, 60 sec: 3959.5, 300 sec: 3686.4). Total num frames: 921600. Throughput: 0: 951.8. Samples: 228586. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 12:01:20,263][00564] Avg episode reward: [(0, '5.477')] +[2024-09-22 12:01:25,257][00564] Fps is (10 sec: 4096.0, 60 sec: 3891.2, 300 sec: 3678.4). Total num frames: 937984. Throughput: 0: 996.8. Samples: 234666. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-22 12:01:25,260][00564] Avg episode reward: [(0, '5.308')] +[2024-09-22 12:01:26,171][02939] Updated weights for policy 0, policy_version 230 (0.0030) +[2024-09-22 12:01:30,257][00564] Fps is (10 sec: 3276.8, 60 sec: 3822.9, 300 sec: 3670.6). Total num frames: 954368. Throughput: 0: 967.6. Samples: 236800. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2024-09-22 12:01:30,259][00564] Avg episode reward: [(0, '5.491')] +[2024-09-22 12:01:35,257][00564] Fps is (10 sec: 4096.0, 60 sec: 3891.2, 300 sec: 3694.1). Total num frames: 978944. Throughput: 0: 956.0. Samples: 243036. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 12:01:35,264][00564] Avg episode reward: [(0, '5.647')] +[2024-09-22 12:01:35,274][02925] Saving new best policy, reward=5.647! +[2024-09-22 12:01:36,139][02939] Updated weights for policy 0, policy_version 240 (0.0034) +[2024-09-22 12:01:40,257][00564] Fps is (10 sec: 4505.6, 60 sec: 4027.7, 300 sec: 3701.6). Total num frames: 999424. Throughput: 0: 1004.0. Samples: 249726. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 12:01:40,265][00564] Avg episode reward: [(0, '5.745')] +[2024-09-22 12:01:40,275][02925] Saving new best policy, reward=5.745! +[2024-09-22 12:01:45,260][00564] Fps is (10 sec: 3275.8, 60 sec: 3822.7, 300 sec: 3678.9). Total num frames: 1011712. Throughput: 0: 986.0. Samples: 251790. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-09-22 12:01:45,262][00564] Avg episode reward: [(0, '5.931')] +[2024-09-22 12:01:45,277][02925] Saving new best policy, reward=5.931! +[2024-09-22 12:01:47,541][02939] Updated weights for policy 0, policy_version 250 (0.0030) +[2024-09-22 12:01:50,257][00564] Fps is (10 sec: 3276.8, 60 sec: 3823.0, 300 sec: 3686.4). Total num frames: 1032192. Throughput: 0: 955.9. Samples: 257346. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-09-22 12:01:50,264][00564] Avg episode reward: [(0, '5.986')] +[2024-09-22 12:01:50,286][02925] Saving new best policy, reward=5.986! +[2024-09-22 12:01:55,257][00564] Fps is (10 sec: 4506.9, 60 sec: 3959.5, 300 sec: 3708.0). Total num frames: 1056768. Throughput: 0: 992.6. Samples: 264246. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-22 12:01:55,263][00564] Avg episode reward: [(0, '6.101')] +[2024-09-22 12:01:55,275][02925] Saving new best policy, reward=6.101! +[2024-09-22 12:01:56,848][02939] Updated weights for policy 0, policy_version 260 (0.0025) +[2024-09-22 12:02:00,257][00564] Fps is (10 sec: 4096.0, 60 sec: 3891.3, 300 sec: 3700.5). Total num frames: 1073152. Throughput: 0: 1005.0. Samples: 266894. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2024-09-22 12:02:00,260][00564] Avg episode reward: [(0, '6.421')] +[2024-09-22 12:02:00,264][02925] Saving new best policy, reward=6.421! +[2024-09-22 12:02:05,257][00564] Fps is (10 sec: 3686.3, 60 sec: 3891.2, 300 sec: 3707.2). Total num frames: 1093632. Throughput: 0: 960.5. Samples: 271810. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-22 12:02:05,262][00564] Avg episode reward: [(0, '6.368')] +[2024-09-22 12:02:07,724][02939] Updated weights for policy 0, policy_version 270 (0.0032) +[2024-09-22 12:02:10,258][00564] Fps is (10 sec: 4095.3, 60 sec: 3959.4, 300 sec: 3776.7). Total num frames: 1114112. Throughput: 0: 980.3. Samples: 278780. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-22 12:02:10,265][00564] Avg episode reward: [(0, '6.956')] +[2024-09-22 12:02:10,267][02925] Saving new best policy, reward=6.956! +[2024-09-22 12:02:15,260][00564] Fps is (10 sec: 4095.0, 60 sec: 3959.3, 300 sec: 3846.1). Total num frames: 1134592. Throughput: 0: 1004.2. Samples: 281992. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-22 12:02:15,266][00564] Avg episode reward: [(0, '6.658')] +[2024-09-22 12:02:19,027][02939] Updated weights for policy 0, policy_version 280 (0.0034) +[2024-09-22 12:02:20,257][00564] Fps is (10 sec: 3687.0, 60 sec: 3822.9, 300 sec: 3887.7). Total num frames: 1150976. Throughput: 0: 963.2. Samples: 286380. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-22 12:02:20,259][00564] Avg episode reward: [(0, '6.940')] +[2024-09-22 12:02:25,257][00564] Fps is (10 sec: 3687.3, 60 sec: 3891.2, 300 sec: 3887.7). Total num frames: 1171456. Throughput: 0: 969.9. Samples: 293372. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 12:02:25,262][00564] Avg episode reward: [(0, '7.214')] +[2024-09-22 12:02:25,279][02925] Saving new best policy, reward=7.214! +[2024-09-22 12:02:27,887][02939] Updated weights for policy 0, policy_version 290 (0.0022) +[2024-09-22 12:02:30,257][00564] Fps is (10 sec: 4505.6, 60 sec: 4027.7, 300 sec: 3915.5). Total num frames: 1196032. Throughput: 0: 999.4. Samples: 296758. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-09-22 12:02:30,259][00564] Avg episode reward: [(0, '7.583')] +[2024-09-22 12:02:30,262][02925] Saving new best policy, reward=7.583! +[2024-09-22 12:02:35,259][00564] Fps is (10 sec: 3685.8, 60 sec: 3822.8, 300 sec: 3901.6). Total num frames: 1208320. Throughput: 0: 982.9. Samples: 301578. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 12:02:35,262][00564] Avg episode reward: [(0, '7.305')] +[2024-09-22 12:02:39,204][02939] Updated weights for policy 0, policy_version 300 (0.0031) +[2024-09-22 12:02:40,257][00564] Fps is (10 sec: 3686.4, 60 sec: 3891.2, 300 sec: 3901.6). Total num frames: 1232896. Throughput: 0: 967.5. Samples: 307782. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-09-22 12:02:40,260][00564] Avg episode reward: [(0, '7.090')] +[2024-09-22 12:02:45,257][00564] Fps is (10 sec: 4506.5, 60 sec: 4027.9, 300 sec: 3901.6). Total num frames: 1253376. Throughput: 0: 985.5. Samples: 311242. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 12:02:45,270][00564] Avg episode reward: [(0, '7.411')] +[2024-09-22 12:02:45,279][02925] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000306_1253376.pth... +[2024-09-22 12:02:45,449][02925] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000075_307200.pth +[2024-09-22 12:02:50,258][00564] Fps is (10 sec: 3276.5, 60 sec: 3891.1, 300 sec: 3887.7). Total num frames: 1265664. Throughput: 0: 991.4. Samples: 316426. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 12:02:50,261][00564] Avg episode reward: [(0, '7.698')] +[2024-09-22 12:02:50,268][02925] Saving new best policy, reward=7.698! +[2024-09-22 12:02:50,559][02939] Updated weights for policy 0, policy_version 310 (0.0013) +[2024-09-22 12:02:55,262][00564] Fps is (10 sec: 3684.6, 60 sec: 3890.9, 300 sec: 3901.5). Total num frames: 1290240. Throughput: 0: 964.1. Samples: 322168. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 12:02:55,268][00564] Avg episode reward: [(0, '8.189')] +[2024-09-22 12:02:55,275][02925] Saving new best policy, reward=8.189! +[2024-09-22 12:02:59,503][02939] Updated weights for policy 0, policy_version 320 (0.0044) +[2024-09-22 12:03:00,257][00564] Fps is (10 sec: 4506.1, 60 sec: 3959.5, 300 sec: 3901.6). Total num frames: 1310720. Throughput: 0: 970.9. Samples: 325680. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 12:03:00,259][00564] Avg episode reward: [(0, '8.694')] +[2024-09-22 12:03:00,266][02925] Saving new best policy, reward=8.694! +[2024-09-22 12:03:05,258][00564] Fps is (10 sec: 3687.9, 60 sec: 3891.2, 300 sec: 3915.5). Total num frames: 1327104. Throughput: 0: 1002.2. Samples: 331482. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-22 12:03:05,263][00564] Avg episode reward: [(0, '8.634')] +[2024-09-22 12:03:10,257][00564] Fps is (10 sec: 3276.8, 60 sec: 3823.0, 300 sec: 3901.6). Total num frames: 1343488. Throughput: 0: 942.1. Samples: 335766. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-22 12:03:10,264][00564] Avg episode reward: [(0, '8.335')] +[2024-09-22 12:03:11,610][02939] Updated weights for policy 0, policy_version 330 (0.0030) +[2024-09-22 12:03:15,257][00564] Fps is (10 sec: 3686.7, 60 sec: 3823.1, 300 sec: 3887.7). Total num frames: 1363968. Throughput: 0: 943.5. Samples: 339214. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-22 12:03:15,263][00564] Avg episode reward: [(0, '8.924')] +[2024-09-22 12:03:15,346][02925] Saving new best policy, reward=8.924! +[2024-09-22 12:03:20,257][00564] Fps is (10 sec: 4096.0, 60 sec: 3891.2, 300 sec: 3915.5). Total num frames: 1384448. Throughput: 0: 978.8. Samples: 345624. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-22 12:03:20,263][00564] Avg episode reward: [(0, '8.853')] +[2024-09-22 12:03:22,797][02939] Updated weights for policy 0, policy_version 340 (0.0023) +[2024-09-22 12:03:25,257][00564] Fps is (10 sec: 3686.4, 60 sec: 3823.0, 300 sec: 3901.7). Total num frames: 1400832. Throughput: 0: 933.6. Samples: 349794. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-22 12:03:25,265][00564] Avg episode reward: [(0, '9.448')] +[2024-09-22 12:03:25,272][02925] Saving new best policy, reward=9.448! +[2024-09-22 12:03:30,257][00564] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3901.6). Total num frames: 1421312. Throughput: 0: 927.8. Samples: 352994. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 12:03:30,265][00564] Avg episode reward: [(0, '8.878')] +[2024-09-22 12:03:32,301][02939] Updated weights for policy 0, policy_version 350 (0.0023) +[2024-09-22 12:03:35,257][00564] Fps is (10 sec: 4096.0, 60 sec: 3891.3, 300 sec: 3901.6). Total num frames: 1441792. Throughput: 0: 967.0. Samples: 359942. Policy #0 lag: (min: 0.0, avg: 0.3, max: 2.0) +[2024-09-22 12:03:35,259][00564] Avg episode reward: [(0, '8.925')] +[2024-09-22 12:03:40,257][00564] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3901.6). Total num frames: 1458176. Throughput: 0: 945.4. Samples: 364706. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-22 12:03:40,259][00564] Avg episode reward: [(0, '8.640')] +[2024-09-22 12:03:44,179][02939] Updated weights for policy 0, policy_version 360 (0.0014) +[2024-09-22 12:03:45,257][00564] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3901.6). Total num frames: 1478656. Throughput: 0: 919.9. Samples: 367076. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-09-22 12:03:45,259][00564] Avg episode reward: [(0, '9.323')] +[2024-09-22 12:03:50,257][00564] Fps is (10 sec: 4096.0, 60 sec: 3891.3, 300 sec: 3901.6). Total num frames: 1499136. Throughput: 0: 937.8. Samples: 373684. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-09-22 12:03:50,260][00564] Avg episode reward: [(0, '9.939')] +[2024-09-22 12:03:50,262][02925] Saving new best policy, reward=9.939! +[2024-09-22 12:03:54,777][02939] Updated weights for policy 0, policy_version 370 (0.0030) +[2024-09-22 12:03:55,257][00564] Fps is (10 sec: 3686.4, 60 sec: 3755.0, 300 sec: 3901.6). Total num frames: 1515520. Throughput: 0: 961.8. Samples: 379048. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 12:03:55,259][00564] Avg episode reward: [(0, '10.556')] +[2024-09-22 12:03:55,273][02925] Saving new best policy, reward=10.556! +[2024-09-22 12:04:00,257][00564] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3887.7). Total num frames: 1531904. Throughput: 0: 927.9. Samples: 380970. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-22 12:04:00,261][00564] Avg episode reward: [(0, '10.915')] +[2024-09-22 12:04:00,264][02925] Saving new best policy, reward=10.915! +[2024-09-22 12:04:05,257][00564] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3873.9). Total num frames: 1552384. Throughput: 0: 923.1. Samples: 387162. Policy #0 lag: (min: 0.0, avg: 0.3, max: 2.0) +[2024-09-22 12:04:05,263][00564] Avg episode reward: [(0, '10.942')] +[2024-09-22 12:04:05,272][02925] Saving new best policy, reward=10.942! +[2024-09-22 12:04:05,549][02939] Updated weights for policy 0, policy_version 380 (0.0041) +[2024-09-22 12:04:10,258][00564] Fps is (10 sec: 4095.4, 60 sec: 3822.8, 300 sec: 3887.7). Total num frames: 1572864. Throughput: 0: 972.0. Samples: 393534. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-22 12:04:10,263][00564] Avg episode reward: [(0, '9.896')] +[2024-09-22 12:04:15,257][00564] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3873.8). Total num frames: 1589248. Throughput: 0: 948.4. Samples: 395670. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-09-22 12:04:15,262][00564] Avg episode reward: [(0, '9.865')] +[2024-09-22 12:04:16,911][02939] Updated weights for policy 0, policy_version 390 (0.0034) +[2024-09-22 12:04:20,257][00564] Fps is (10 sec: 3687.0, 60 sec: 3754.7, 300 sec: 3873.8). Total num frames: 1609728. Throughput: 0: 923.4. Samples: 401494. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-22 12:04:20,262][00564] Avg episode reward: [(0, '9.747')] +[2024-09-22 12:04:25,257][00564] Fps is (10 sec: 4505.6, 60 sec: 3891.2, 300 sec: 3887.7). Total num frames: 1634304. Throughput: 0: 974.5. Samples: 408560. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-22 12:04:25,260][00564] Avg episode reward: [(0, '10.459')] +[2024-09-22 12:04:25,942][02939] Updated weights for policy 0, policy_version 400 (0.0019) +[2024-09-22 12:04:30,261][00564] Fps is (10 sec: 3684.9, 60 sec: 3754.4, 300 sec: 3873.8). Total num frames: 1646592. Throughput: 0: 977.0. Samples: 411044. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-22 12:04:30,263][00564] Avg episode reward: [(0, '10.897')] +[2024-09-22 12:04:35,257][00564] Fps is (10 sec: 3686.3, 60 sec: 3822.9, 300 sec: 3873.8). Total num frames: 1671168. Throughput: 0: 944.3. Samples: 416176. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-22 12:04:35,260][00564] Avg episode reward: [(0, '11.721')] +[2024-09-22 12:04:35,269][02925] Saving new best policy, reward=11.721! +[2024-09-22 12:04:36,955][02939] Updated weights for policy 0, policy_version 410 (0.0026) +[2024-09-22 12:04:40,257][00564] Fps is (10 sec: 4507.3, 60 sec: 3891.2, 300 sec: 3873.8). Total num frames: 1691648. Throughput: 0: 978.3. Samples: 423070. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2024-09-22 12:04:40,260][00564] Avg episode reward: [(0, '12.566')] +[2024-09-22 12:04:40,264][02925] Saving new best policy, reward=12.566! +[2024-09-22 12:04:45,260][00564] Fps is (10 sec: 3685.5, 60 sec: 3822.8, 300 sec: 3873.8). Total num frames: 1708032. Throughput: 0: 1003.9. Samples: 426148. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-09-22 12:04:45,265][00564] Avg episode reward: [(0, '12.629')] +[2024-09-22 12:04:45,277][02925] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000417_1708032.pth... +[2024-09-22 12:04:45,414][02925] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000191_782336.pth +[2024-09-22 12:04:45,430][02925] Saving new best policy, reward=12.629! +[2024-09-22 12:04:48,474][02939] Updated weights for policy 0, policy_version 420 (0.0029) +[2024-09-22 12:04:50,257][00564] Fps is (10 sec: 3686.5, 60 sec: 3822.9, 300 sec: 3873.8). Total num frames: 1728512. Throughput: 0: 963.4. Samples: 430516. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-09-22 12:04:50,262][00564] Avg episode reward: [(0, '13.551')] +[2024-09-22 12:04:50,265][02925] Saving new best policy, reward=13.551! +[2024-09-22 12:04:55,257][00564] Fps is (10 sec: 4097.1, 60 sec: 3891.2, 300 sec: 3860.0). Total num frames: 1748992. Throughput: 0: 977.2. Samples: 437508. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-09-22 12:04:55,259][00564] Avg episode reward: [(0, '13.036')] +[2024-09-22 12:04:57,234][02939] Updated weights for policy 0, policy_version 430 (0.0032) +[2024-09-22 12:05:00,257][00564] Fps is (10 sec: 4096.0, 60 sec: 3959.5, 300 sec: 3887.7). Total num frames: 1769472. Throughput: 0: 1007.2. Samples: 440996. Policy #0 lag: (min: 0.0, avg: 0.3, max: 2.0) +[2024-09-22 12:05:00,259][00564] Avg episode reward: [(0, '12.661')] +[2024-09-22 12:05:05,257][00564] Fps is (10 sec: 3686.4, 60 sec: 3891.2, 300 sec: 3873.8). Total num frames: 1785856. Throughput: 0: 978.4. Samples: 445520. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 12:05:05,259][00564] Avg episode reward: [(0, '13.377')] +[2024-09-22 12:05:08,513][02939] Updated weights for policy 0, policy_version 440 (0.0021) +[2024-09-22 12:05:10,257][00564] Fps is (10 sec: 3686.4, 60 sec: 3891.3, 300 sec: 3860.0). Total num frames: 1806336. Throughput: 0: 962.4. Samples: 451870. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-22 12:05:10,260][00564] Avg episode reward: [(0, '13.118')] +[2024-09-22 12:05:15,257][00564] Fps is (10 sec: 4505.6, 60 sec: 4027.7, 300 sec: 3887.7). Total num frames: 1830912. Throughput: 0: 985.4. Samples: 455384. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 12:05:15,260][00564] Avg episode reward: [(0, '13.357')] +[2024-09-22 12:05:19,248][02939] Updated weights for policy 0, policy_version 450 (0.0029) +[2024-09-22 12:05:20,257][00564] Fps is (10 sec: 3686.4, 60 sec: 3891.2, 300 sec: 3860.0). Total num frames: 1843200. Throughput: 0: 988.4. Samples: 460652. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-09-22 12:05:20,259][00564] Avg episode reward: [(0, '13.531')] +[2024-09-22 12:05:25,257][00564] Fps is (10 sec: 3686.4, 60 sec: 3891.2, 300 sec: 3873.8). Total num frames: 1867776. Throughput: 0: 964.1. Samples: 466456. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-22 12:05:25,260][00564] Avg episode reward: [(0, '13.682')] +[2024-09-22 12:05:25,268][02925] Saving new best policy, reward=13.682! +[2024-09-22 12:05:28,604][02939] Updated weights for policy 0, policy_version 460 (0.0023) +[2024-09-22 12:05:30,257][00564] Fps is (10 sec: 4505.6, 60 sec: 4028.0, 300 sec: 3873.8). Total num frames: 1888256. Throughput: 0: 973.3. Samples: 469946. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-09-22 12:05:30,260][00564] Avg episode reward: [(0, '14.512')] +[2024-09-22 12:05:30,263][02925] Saving new best policy, reward=14.512! +[2024-09-22 12:05:35,257][00564] Fps is (10 sec: 3686.4, 60 sec: 3891.2, 300 sec: 3887.7). Total num frames: 1904640. Throughput: 0: 1006.6. Samples: 475814. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2024-09-22 12:05:35,259][00564] Avg episode reward: [(0, '14.679')] +[2024-09-22 12:05:35,268][02925] Saving new best policy, reward=14.679! +[2024-09-22 12:05:40,197][02939] Updated weights for policy 0, policy_version 470 (0.0026) +[2024-09-22 12:05:40,257][00564] Fps is (10 sec: 3686.4, 60 sec: 3891.2, 300 sec: 3873.8). Total num frames: 1925120. Throughput: 0: 962.4. Samples: 480814. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-22 12:05:40,268][00564] Avg episode reward: [(0, '15.380')] +[2024-09-22 12:05:40,271][02925] Saving new best policy, reward=15.380! +[2024-09-22 12:05:45,257][00564] Fps is (10 sec: 4096.0, 60 sec: 3959.6, 300 sec: 3873.8). Total num frames: 1945600. Throughput: 0: 960.6. Samples: 484224. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2024-09-22 12:05:45,259][00564] Avg episode reward: [(0, '15.162')] +[2024-09-22 12:05:50,258][00564] Fps is (10 sec: 3686.0, 60 sec: 3891.1, 300 sec: 3873.8). Total num frames: 1961984. Throughput: 0: 1000.3. Samples: 490536. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2024-09-22 12:05:50,263][00564] Avg episode reward: [(0, '15.142')] +[2024-09-22 12:05:50,868][02939] Updated weights for policy 0, policy_version 480 (0.0049) +[2024-09-22 12:05:55,257][00564] Fps is (10 sec: 3276.7, 60 sec: 3822.9, 300 sec: 3860.0). Total num frames: 1978368. Throughput: 0: 950.3. Samples: 494636. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 12:05:55,264][00564] Avg episode reward: [(0, '15.213')] +[2024-09-22 12:06:00,257][00564] Fps is (10 sec: 4096.5, 60 sec: 3891.2, 300 sec: 3873.8). Total num frames: 2002944. Throughput: 0: 946.0. Samples: 497954. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2024-09-22 12:06:00,263][00564] Avg episode reward: [(0, '15.717')] +[2024-09-22 12:06:00,266][02925] Saving new best policy, reward=15.717! +[2024-09-22 12:06:01,423][02939] Updated weights for policy 0, policy_version 490 (0.0022) +[2024-09-22 12:06:05,257][00564] Fps is (10 sec: 4096.2, 60 sec: 3891.2, 300 sec: 3873.8). Total num frames: 2019328. Throughput: 0: 977.3. Samples: 504632. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-09-22 12:06:05,262][00564] Avg episode reward: [(0, '14.790')] +[2024-09-22 12:06:10,257][00564] Fps is (10 sec: 3276.7, 60 sec: 3822.9, 300 sec: 3860.0). Total num frames: 2035712. Throughput: 0: 945.5. Samples: 509002. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 12:06:10,263][00564] Avg episode reward: [(0, '14.975')] +[2024-09-22 12:06:12,794][02939] Updated weights for policy 0, policy_version 500 (0.0029) +[2024-09-22 12:06:15,257][00564] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3846.1). Total num frames: 2056192. Throughput: 0: 933.4. Samples: 511950. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-22 12:06:15,263][00564] Avg episode reward: [(0, '15.269')] +[2024-09-22 12:06:20,257][00564] Fps is (10 sec: 4096.2, 60 sec: 3891.2, 300 sec: 3860.0). Total num frames: 2076672. Throughput: 0: 946.6. Samples: 518410. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2024-09-22 12:06:20,261][00564] Avg episode reward: [(0, '15.993')] +[2024-09-22 12:06:20,287][02925] Saving new best policy, reward=15.993! +[2024-09-22 12:06:23,520][02939] Updated weights for policy 0, policy_version 510 (0.0049) +[2024-09-22 12:06:25,257][00564] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3860.0). Total num frames: 2093056. Throughput: 0: 942.8. Samples: 523240. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 12:06:25,264][00564] Avg episode reward: [(0, '16.240')] +[2024-09-22 12:06:25,276][02925] Saving new best policy, reward=16.240! +[2024-09-22 12:06:30,257][00564] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3832.2). Total num frames: 2109440. Throughput: 0: 909.2. Samples: 525138. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-09-22 12:06:30,260][00564] Avg episode reward: [(0, '17.546')] +[2024-09-22 12:06:30,265][02925] Saving new best policy, reward=17.546! +[2024-09-22 12:06:34,707][02939] Updated weights for policy 0, policy_version 520 (0.0019) +[2024-09-22 12:06:35,257][00564] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3832.2). Total num frames: 2129920. Throughput: 0: 909.6. Samples: 531466. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-22 12:06:35,259][00564] Avg episode reward: [(0, '16.963')] +[2024-09-22 12:06:40,257][00564] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3846.1). Total num frames: 2146304. Throughput: 0: 939.7. Samples: 536922. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 12:06:40,260][00564] Avg episode reward: [(0, '17.208')] +[2024-09-22 12:06:45,257][00564] Fps is (10 sec: 2867.2, 60 sec: 3549.9, 300 sec: 3818.3). Total num frames: 2158592. Throughput: 0: 908.5. Samples: 538838. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-09-22 12:06:45,264][00564] Avg episode reward: [(0, '16.878')] +[2024-09-22 12:06:45,272][00564] Components not started: RolloutWorker_w0, wait_time=600.0 seconds +[2024-09-22 12:06:45,288][02925] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000528_2162688.pth... +[2024-09-22 12:06:45,417][02925] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000306_1253376.pth +[2024-09-22 12:06:47,385][02939] Updated weights for policy 0, policy_version 530 (0.0020) +[2024-09-22 12:06:50,257][00564] Fps is (10 sec: 3686.4, 60 sec: 3686.5, 300 sec: 3818.3). Total num frames: 2183168. Throughput: 0: 881.3. Samples: 544290. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 12:06:50,260][00564] Avg episode reward: [(0, '17.517')] +[2024-09-22 12:06:55,257][00564] Fps is (10 sec: 4096.0, 60 sec: 3686.4, 300 sec: 3818.3). Total num frames: 2199552. Throughput: 0: 925.5. Samples: 550650. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 12:06:55,264][00564] Avg episode reward: [(0, '18.107')] +[2024-09-22 12:06:55,275][02925] Saving new best policy, reward=18.107! +[2024-09-22 12:06:58,757][02939] Updated weights for policy 0, policy_version 540 (0.0029) +[2024-09-22 12:07:00,257][00564] Fps is (10 sec: 2867.2, 60 sec: 3481.6, 300 sec: 3790.5). Total num frames: 2211840. Throughput: 0: 899.8. Samples: 552442. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-09-22 12:07:00,259][00564] Avg episode reward: [(0, '18.823')] +[2024-09-22 12:07:00,266][02925] Saving new best policy, reward=18.823! +[2024-09-22 12:07:05,257][00564] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3790.6). Total num frames: 2232320. Throughput: 0: 863.1. Samples: 557248. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 12:07:05,261][00564] Avg episode reward: [(0, '19.792')] +[2024-09-22 12:07:05,272][02925] Saving new best policy, reward=19.792! +[2024-09-22 12:07:09,444][02939] Updated weights for policy 0, policy_version 550 (0.0024) +[2024-09-22 12:07:10,257][00564] Fps is (10 sec: 4096.0, 60 sec: 3618.2, 300 sec: 3790.6). Total num frames: 2252800. Throughput: 0: 894.9. Samples: 563512. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-22 12:07:10,264][00564] Avg episode reward: [(0, '20.138')] +[2024-09-22 12:07:10,267][02925] Saving new best policy, reward=20.138! +[2024-09-22 12:07:15,259][00564] Fps is (10 sec: 3685.8, 60 sec: 3549.8, 300 sec: 3790.5). Total num frames: 2269184. Throughput: 0: 909.3. Samples: 566060. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 12:07:15,264][00564] Avg episode reward: [(0, '19.187')] +[2024-09-22 12:07:20,257][00564] Fps is (10 sec: 3276.8, 60 sec: 3481.6, 300 sec: 3776.7). Total num frames: 2285568. Throughput: 0: 860.0. Samples: 570166. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 12:07:20,260][00564] Avg episode reward: [(0, '21.074')] +[2024-09-22 12:07:20,261][02925] Saving new best policy, reward=21.074! +[2024-09-22 12:07:21,863][02939] Updated weights for policy 0, policy_version 560 (0.0026) +[2024-09-22 12:07:25,257][00564] Fps is (10 sec: 3687.0, 60 sec: 3549.9, 300 sec: 3762.8). Total num frames: 2306048. Throughput: 0: 879.8. Samples: 576512. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 12:07:25,259][00564] Avg episode reward: [(0, '19.628')] +[2024-09-22 12:07:30,259][00564] Fps is (10 sec: 3685.8, 60 sec: 3549.8, 300 sec: 3776.7). Total num frames: 2322432. Throughput: 0: 911.4. Samples: 579852. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 12:07:30,261][00564] Avg episode reward: [(0, '20.178')] +[2024-09-22 12:07:33,257][02939] Updated weights for policy 0, policy_version 570 (0.0023) +[2024-09-22 12:07:35,257][00564] Fps is (10 sec: 3276.8, 60 sec: 3481.6, 300 sec: 3748.9). Total num frames: 2338816. Throughput: 0: 883.2. Samples: 584032. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2024-09-22 12:07:35,260][00564] Avg episode reward: [(0, '19.354')] +[2024-09-22 12:07:40,257][00564] Fps is (10 sec: 3687.0, 60 sec: 3549.9, 300 sec: 3748.9). Total num frames: 2359296. Throughput: 0: 882.0. Samples: 590338. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-09-22 12:07:40,263][00564] Avg episode reward: [(0, '20.230')] +[2024-09-22 12:07:43,000][02939] Updated weights for policy 0, policy_version 580 (0.0032) +[2024-09-22 12:07:45,257][00564] Fps is (10 sec: 4505.6, 60 sec: 3754.7, 300 sec: 3790.5). Total num frames: 2383872. Throughput: 0: 916.1. Samples: 593668. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-22 12:07:45,259][00564] Avg episode reward: [(0, '19.011')] +[2024-09-22 12:07:50,257][00564] Fps is (10 sec: 3686.4, 60 sec: 3549.9, 300 sec: 3748.9). Total num frames: 2396160. Throughput: 0: 923.5. Samples: 598806. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-22 12:07:50,259][00564] Avg episode reward: [(0, '19.385')] +[2024-09-22 12:07:54,207][02939] Updated weights for policy 0, policy_version 590 (0.0043) +[2024-09-22 12:07:55,257][00564] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3762.8). Total num frames: 2420736. Throughput: 0: 914.7. Samples: 604674. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2024-09-22 12:07:55,262][00564] Avg episode reward: [(0, '18.563')] +[2024-09-22 12:08:00,257][00564] Fps is (10 sec: 4505.6, 60 sec: 3822.9, 300 sec: 3776.7). Total num frames: 2441216. Throughput: 0: 936.2. Samples: 608186. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 12:08:00,260][00564] Avg episode reward: [(0, '17.560')] +[2024-09-22 12:08:04,563][02939] Updated weights for policy 0, policy_version 600 (0.0020) +[2024-09-22 12:08:05,257][00564] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3776.7). Total num frames: 2457600. Throughput: 0: 972.7. Samples: 613936. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-22 12:08:05,259][00564] Avg episode reward: [(0, '17.569')] +[2024-09-22 12:08:10,257][00564] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3776.7). Total num frames: 2478080. Throughput: 0: 941.0. Samples: 618858. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-09-22 12:08:10,260][00564] Avg episode reward: [(0, '18.019')] +[2024-09-22 12:08:15,125][02939] Updated weights for policy 0, policy_version 610 (0.0021) +[2024-09-22 12:08:15,257][00564] Fps is (10 sec: 4096.0, 60 sec: 3823.0, 300 sec: 3776.7). Total num frames: 2498560. Throughput: 0: 940.0. Samples: 622150. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 12:08:15,261][00564] Avg episode reward: [(0, '17.417')] +[2024-09-22 12:08:20,257][00564] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3776.6). Total num frames: 2514944. Throughput: 0: 989.6. Samples: 628566. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 12:08:20,261][00564] Avg episode reward: [(0, '18.545')] +[2024-09-22 12:08:25,257][00564] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3762.8). Total num frames: 2531328. Throughput: 0: 946.9. Samples: 632948. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-09-22 12:08:25,259][00564] Avg episode reward: [(0, '18.794')] +[2024-09-22 12:08:26,419][02939] Updated weights for policy 0, policy_version 620 (0.0021) +[2024-09-22 12:08:30,257][00564] Fps is (10 sec: 4096.0, 60 sec: 3891.3, 300 sec: 3776.7). Total num frames: 2555904. Throughput: 0: 951.4. Samples: 636480. Policy #0 lag: (min: 0.0, avg: 0.2, max: 1.0) +[2024-09-22 12:08:30,259][00564] Avg episode reward: [(0, '18.973')] +[2024-09-22 12:08:35,278][00564] Fps is (10 sec: 4496.2, 60 sec: 3958.1, 300 sec: 3790.3). Total num frames: 2576384. Throughput: 0: 985.3. Samples: 643164. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 12:08:35,281][00564] Avg episode reward: [(0, '20.161')] +[2024-09-22 12:08:36,532][02939] Updated weights for policy 0, policy_version 630 (0.0040) +[2024-09-22 12:08:40,257][00564] Fps is (10 sec: 3276.8, 60 sec: 3822.9, 300 sec: 3762.8). Total num frames: 2588672. Throughput: 0: 953.2. Samples: 647566. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 12:08:40,261][00564] Avg episode reward: [(0, '21.471')] +[2024-09-22 12:08:40,265][02925] Saving new best policy, reward=21.471! +[2024-09-22 12:08:45,257][00564] Fps is (10 sec: 3283.7, 60 sec: 3754.7, 300 sec: 3762.8). Total num frames: 2609152. Throughput: 0: 938.7. Samples: 650428. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-09-22 12:08:45,259][00564] Avg episode reward: [(0, '22.327')] +[2024-09-22 12:08:45,277][02925] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000637_2609152.pth... +[2024-09-22 12:08:45,406][02925] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000417_1708032.pth +[2024-09-22 12:08:45,424][02925] Saving new best policy, reward=22.327! +[2024-09-22 12:08:47,217][02939] Updated weights for policy 0, policy_version 640 (0.0017) +[2024-09-22 12:08:50,257][00564] Fps is (10 sec: 4505.6, 60 sec: 3959.5, 300 sec: 3790.5). Total num frames: 2633728. Throughput: 0: 963.8. Samples: 657308. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-22 12:08:50,259][00564] Avg episode reward: [(0, '22.854')] +[2024-09-22 12:08:50,266][02925] Saving new best policy, reward=22.854! +[2024-09-22 12:08:55,259][00564] Fps is (10 sec: 4095.2, 60 sec: 3822.8, 300 sec: 3790.5). Total num frames: 2650112. Throughput: 0: 971.6. Samples: 662580. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-22 12:08:55,261][00564] Avg episode reward: [(0, '21.214')] +[2024-09-22 12:08:58,242][02939] Updated weights for policy 0, policy_version 650 (0.0013) +[2024-09-22 12:09:00,257][00564] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3790.5). Total num frames: 2670592. Throughput: 0: 950.8. Samples: 664936. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-22 12:09:00,261][00564] Avg episode reward: [(0, '20.250')] +[2024-09-22 12:09:05,257][00564] Fps is (10 sec: 4506.5, 60 sec: 3959.5, 300 sec: 3804.4). Total num frames: 2695168. Throughput: 0: 966.6. Samples: 672064. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-22 12:09:05,264][00564] Avg episode reward: [(0, '18.547')] +[2024-09-22 12:09:06,798][02939] Updated weights for policy 0, policy_version 660 (0.0014) +[2024-09-22 12:09:10,257][00564] Fps is (10 sec: 4096.0, 60 sec: 3891.2, 300 sec: 3804.4). Total num frames: 2711552. Throughput: 0: 1001.2. Samples: 678002. Policy #0 lag: (min: 0.0, avg: 0.2, max: 1.0) +[2024-09-22 12:09:10,263][00564] Avg episode reward: [(0, '19.469')] +[2024-09-22 12:09:15,257][00564] Fps is (10 sec: 3276.8, 60 sec: 3822.9, 300 sec: 3790.5). Total num frames: 2727936. Throughput: 0: 967.5. Samples: 680018. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-09-22 12:09:15,261][00564] Avg episode reward: [(0, '19.685')] +[2024-09-22 12:09:18,478][02939] Updated weights for policy 0, policy_version 670 (0.0021) +[2024-09-22 12:09:20,257][00564] Fps is (10 sec: 3686.4, 60 sec: 3891.2, 300 sec: 3776.7). Total num frames: 2748416. Throughput: 0: 961.4. Samples: 686406. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-09-22 12:09:20,259][00564] Avg episode reward: [(0, '18.393')] +[2024-09-22 12:09:25,257][00564] Fps is (10 sec: 4096.0, 60 sec: 3959.5, 300 sec: 3804.5). Total num frames: 2768896. Throughput: 0: 1001.7. Samples: 692644. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 12:09:25,265][00564] Avg episode reward: [(0, '18.293')] +[2024-09-22 12:09:30,157][02939] Updated weights for policy 0, policy_version 680 (0.0032) +[2024-09-22 12:09:30,257][00564] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3776.7). Total num frames: 2785280. Throughput: 0: 982.4. Samples: 694636. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-22 12:09:30,261][00564] Avg episode reward: [(0, '19.445')] +[2024-09-22 12:09:35,257][00564] Fps is (10 sec: 3686.4, 60 sec: 3824.3, 300 sec: 3776.7). Total num frames: 2805760. Throughput: 0: 961.1. Samples: 700556. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-09-22 12:09:35,259][00564] Avg episode reward: [(0, '20.040')] +[2024-09-22 12:09:38,944][02939] Updated weights for policy 0, policy_version 690 (0.0018) +[2024-09-22 12:09:40,257][00564] Fps is (10 sec: 4505.6, 60 sec: 4027.7, 300 sec: 3804.5). Total num frames: 2830336. Throughput: 0: 999.8. Samples: 707570. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 12:09:40,263][00564] Avg episode reward: [(0, '21.272')] +[2024-09-22 12:09:45,257][00564] Fps is (10 sec: 3686.4, 60 sec: 3891.2, 300 sec: 3776.6). Total num frames: 2842624. Throughput: 0: 998.4. Samples: 709864. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2024-09-22 12:09:45,260][00564] Avg episode reward: [(0, '22.525')] +[2024-09-22 12:09:50,257][00564] Fps is (10 sec: 3276.7, 60 sec: 3822.9, 300 sec: 3776.6). Total num frames: 2863104. Throughput: 0: 953.6. Samples: 714974. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-22 12:09:50,259][00564] Avg episode reward: [(0, '24.205')] +[2024-09-22 12:09:50,328][02925] Saving new best policy, reward=24.205! +[2024-09-22 12:09:50,335][02939] Updated weights for policy 0, policy_version 700 (0.0016) +[2024-09-22 12:09:55,257][00564] Fps is (10 sec: 4505.6, 60 sec: 3959.6, 300 sec: 3790.5). Total num frames: 2887680. Throughput: 0: 976.8. Samples: 721956. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-09-22 12:09:55,260][00564] Avg episode reward: [(0, '24.621')] +[2024-09-22 12:09:55,268][02925] Saving new best policy, reward=24.621! +[2024-09-22 12:10:00,257][00564] Fps is (10 sec: 4096.1, 60 sec: 3891.2, 300 sec: 3790.5). Total num frames: 2904064. Throughput: 0: 1001.9. Samples: 725104. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-22 12:10:00,263][00564] Avg episode reward: [(0, '24.019')] +[2024-09-22 12:10:00,636][02939] Updated weights for policy 0, policy_version 710 (0.0032) +[2024-09-22 12:10:05,257][00564] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3790.5). Total num frames: 2924544. Throughput: 0: 958.2. Samples: 729526. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 12:10:05,262][00564] Avg episode reward: [(0, '23.558')] +[2024-09-22 12:10:10,257][00564] Fps is (10 sec: 4096.0, 60 sec: 3891.2, 300 sec: 3776.7). Total num frames: 2945024. Throughput: 0: 972.2. Samples: 736392. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-22 12:10:10,260][00564] Avg episode reward: [(0, '23.822')] +[2024-09-22 12:10:10,510][02939] Updated weights for policy 0, policy_version 720 (0.0018) +[2024-09-22 12:10:15,257][00564] Fps is (10 sec: 4096.0, 60 sec: 3959.5, 300 sec: 3804.4). Total num frames: 2965504. Throughput: 0: 1002.2. Samples: 739736. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2024-09-22 12:10:15,263][00564] Avg episode reward: [(0, '23.137')] +[2024-09-22 12:10:20,257][00564] Fps is (10 sec: 3276.8, 60 sec: 3822.9, 300 sec: 3762.8). Total num frames: 2977792. Throughput: 0: 968.9. Samples: 744158. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-09-22 12:10:20,262][00564] Avg episode reward: [(0, '23.408')] +[2024-09-22 12:10:22,225][02939] Updated weights for policy 0, policy_version 730 (0.0022) +[2024-09-22 12:10:25,257][00564] Fps is (10 sec: 3686.4, 60 sec: 3891.2, 300 sec: 3776.6). Total num frames: 3002368. Throughput: 0: 955.5. Samples: 750566. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 12:10:25,262][00564] Avg episode reward: [(0, '21.859')] +[2024-09-22 12:10:30,261][00564] Fps is (10 sec: 4503.8, 60 sec: 3959.2, 300 sec: 3790.5). Total num frames: 3022848. Throughput: 0: 981.4. Samples: 754030. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2024-09-22 12:10:30,263][00564] Avg episode reward: [(0, '21.459')] +[2024-09-22 12:10:31,866][02939] Updated weights for policy 0, policy_version 740 (0.0018) +[2024-09-22 12:10:35,262][00564] Fps is (10 sec: 3684.6, 60 sec: 3890.9, 300 sec: 3776.6). Total num frames: 3039232. Throughput: 0: 984.5. Samples: 759280. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-09-22 12:10:35,267][00564] Avg episode reward: [(0, '20.418')] +[2024-09-22 12:10:40,257][00564] Fps is (10 sec: 3687.8, 60 sec: 3822.9, 300 sec: 3776.6). Total num frames: 3059712. Throughput: 0: 959.7. Samples: 765142. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-22 12:10:40,265][00564] Avg episode reward: [(0, '19.682')] +[2024-09-22 12:10:42,389][02939] Updated weights for policy 0, policy_version 750 (0.0023) +[2024-09-22 12:10:45,257][00564] Fps is (10 sec: 4507.7, 60 sec: 4027.7, 300 sec: 3804.4). Total num frames: 3084288. Throughput: 0: 966.0. Samples: 768574. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-09-22 12:10:45,260][00564] Avg episode reward: [(0, '20.476')] +[2024-09-22 12:10:45,272][02925] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000753_3084288.pth... +[2024-09-22 12:10:45,393][02925] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000528_2162688.pth +[2024-09-22 12:10:50,257][00564] Fps is (10 sec: 4096.1, 60 sec: 3959.5, 300 sec: 3804.4). Total num frames: 3100672. Throughput: 0: 997.7. Samples: 774422. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-22 12:10:50,259][00564] Avg episode reward: [(0, '21.444')] +[2024-09-22 12:10:53,759][02939] Updated weights for policy 0, policy_version 760 (0.0023) +[2024-09-22 12:10:55,262][00564] Fps is (10 sec: 3275.3, 60 sec: 3822.6, 300 sec: 3776.6). Total num frames: 3117056. Throughput: 0: 958.2. Samples: 779514. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 12:10:55,270][00564] Avg episode reward: [(0, '21.795')] +[2024-09-22 12:11:00,257][00564] Fps is (10 sec: 4096.0, 60 sec: 3959.5, 300 sec: 3804.4). Total num frames: 3141632. Throughput: 0: 958.8. Samples: 782882. Policy #0 lag: (min: 0.0, avg: 0.2, max: 1.0) +[2024-09-22 12:11:00,263][00564] Avg episode reward: [(0, '21.647')] +[2024-09-22 12:11:02,626][02939] Updated weights for policy 0, policy_version 770 (0.0027) +[2024-09-22 12:11:05,260][00564] Fps is (10 sec: 4096.9, 60 sec: 3891.0, 300 sec: 3804.4). Total num frames: 3158016. Throughput: 0: 1001.8. Samples: 789240. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2024-09-22 12:11:05,265][00564] Avg episode reward: [(0, '21.520')] +[2024-09-22 12:11:10,257][00564] Fps is (10 sec: 3276.8, 60 sec: 3822.9, 300 sec: 3790.5). Total num frames: 3174400. Throughput: 0: 958.2. Samples: 793684. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 12:11:10,262][00564] Avg episode reward: [(0, '21.675')] +[2024-09-22 12:11:14,201][02939] Updated weights for policy 0, policy_version 780 (0.0027) +[2024-09-22 12:11:15,257][00564] Fps is (10 sec: 3687.4, 60 sec: 3822.9, 300 sec: 3790.5). Total num frames: 3194880. Throughput: 0: 956.6. Samples: 797072. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 12:11:15,259][00564] Avg episode reward: [(0, '21.778')] +[2024-09-22 12:11:20,263][00564] Fps is (10 sec: 4503.0, 60 sec: 4027.3, 300 sec: 3818.2). Total num frames: 3219456. Throughput: 0: 994.2. Samples: 804022. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-22 12:11:20,265][00564] Avg episode reward: [(0, '22.753')] +[2024-09-22 12:11:25,257][00564] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3804.4). Total num frames: 3231744. Throughput: 0: 965.1. Samples: 808572. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-22 12:11:25,262][00564] Avg episode reward: [(0, '23.360')] +[2024-09-22 12:11:25,290][02939] Updated weights for policy 0, policy_version 790 (0.0026) +[2024-09-22 12:11:30,259][00564] Fps is (10 sec: 3687.8, 60 sec: 3891.3, 300 sec: 3818.3). Total num frames: 3256320. Throughput: 0: 960.4. Samples: 811794. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-22 12:11:30,268][00564] Avg episode reward: [(0, '23.530')] +[2024-09-22 12:11:34,070][02939] Updated weights for policy 0, policy_version 800 (0.0018) +[2024-09-22 12:11:35,258][00564] Fps is (10 sec: 4914.7, 60 sec: 4028.0, 300 sec: 3846.1). Total num frames: 3280896. Throughput: 0: 987.8. Samples: 818876. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 12:11:35,263][00564] Avg episode reward: [(0, '23.236')] +[2024-09-22 12:11:40,257][00564] Fps is (10 sec: 3687.1, 60 sec: 3891.2, 300 sec: 3846.1). Total num frames: 3293184. Throughput: 0: 990.5. Samples: 824080. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-09-22 12:11:40,263][00564] Avg episode reward: [(0, '23.545')] +[2024-09-22 12:11:45,257][00564] Fps is (10 sec: 3277.2, 60 sec: 3823.0, 300 sec: 3832.2). Total num frames: 3313664. Throughput: 0: 970.8. Samples: 826566. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-22 12:11:45,265][00564] Avg episode reward: [(0, '23.825')] +[2024-09-22 12:11:45,268][02939] Updated weights for policy 0, policy_version 810 (0.0025) +[2024-09-22 12:11:50,257][00564] Fps is (10 sec: 4505.6, 60 sec: 3959.5, 300 sec: 3860.0). Total num frames: 3338240. Throughput: 0: 985.6. Samples: 833588. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-22 12:11:50,261][00564] Avg episode reward: [(0, '23.526')] +[2024-09-22 12:11:55,257][00564] Fps is (10 sec: 4096.0, 60 sec: 3959.8, 300 sec: 3873.8). Total num frames: 3354624. Throughput: 0: 1017.1. Samples: 839452. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2024-09-22 12:11:55,260][00564] Avg episode reward: [(0, '24.016')] +[2024-09-22 12:11:55,482][02939] Updated weights for policy 0, policy_version 820 (0.0028) +[2024-09-22 12:12:00,257][00564] Fps is (10 sec: 3686.4, 60 sec: 3891.2, 300 sec: 3873.8). Total num frames: 3375104. Throughput: 0: 990.1. Samples: 841628. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 12:12:00,260][00564] Avg episode reward: [(0, '23.877')] +[2024-09-22 12:12:05,180][02939] Updated weights for policy 0, policy_version 830 (0.0038) +[2024-09-22 12:12:05,257][00564] Fps is (10 sec: 4505.6, 60 sec: 4027.9, 300 sec: 3887.7). Total num frames: 3399680. Throughput: 0: 986.0. Samples: 848388. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-09-22 12:12:05,261][00564] Avg episode reward: [(0, '23.069')] +[2024-09-22 12:12:10,258][00564] Fps is (10 sec: 4095.4, 60 sec: 4027.6, 300 sec: 3887.7). Total num frames: 3416064. Throughput: 0: 1027.7. Samples: 854822. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-09-22 12:12:10,261][00564] Avg episode reward: [(0, '21.703')] +[2024-09-22 12:12:15,257][00564] Fps is (10 sec: 3276.8, 60 sec: 3959.5, 300 sec: 3887.7). Total num frames: 3432448. Throughput: 0: 1004.8. Samples: 857010. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-09-22 12:12:15,259][00564] Avg episode reward: [(0, '21.238')] +[2024-09-22 12:12:16,430][02939] Updated weights for policy 0, policy_version 840 (0.0023) +[2024-09-22 12:12:20,257][00564] Fps is (10 sec: 4096.6, 60 sec: 3959.9, 300 sec: 3901.6). Total num frames: 3457024. Throughput: 0: 980.3. Samples: 862988. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2024-09-22 12:12:20,259][00564] Avg episode reward: [(0, '21.717')] +[2024-09-22 12:12:25,052][02939] Updated weights for policy 0, policy_version 850 (0.0013) +[2024-09-22 12:12:25,257][00564] Fps is (10 sec: 4915.2, 60 sec: 4164.3, 300 sec: 3929.4). Total num frames: 3481600. Throughput: 0: 1023.5. Samples: 870136. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2024-09-22 12:12:25,260][00564] Avg episode reward: [(0, '21.569')] +[2024-09-22 12:12:30,260][00564] Fps is (10 sec: 3685.3, 60 sec: 3959.4, 300 sec: 3915.5). Total num frames: 3493888. Throughput: 0: 1016.8. Samples: 872326. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2024-09-22 12:12:30,262][00564] Avg episode reward: [(0, '22.038')] +[2024-09-22 12:12:35,257][00564] Fps is (10 sec: 3276.8, 60 sec: 3891.3, 300 sec: 3915.5). Total num frames: 3514368. Throughput: 0: 981.9. Samples: 877774. Policy #0 lag: (min: 0.0, avg: 0.2, max: 1.0) +[2024-09-22 12:12:35,259][00564] Avg episode reward: [(0, '23.234')] +[2024-09-22 12:12:36,153][02939] Updated weights for policy 0, policy_version 860 (0.0021) +[2024-09-22 12:12:40,257][00564] Fps is (10 sec: 4506.9, 60 sec: 4096.0, 300 sec: 3915.5). Total num frames: 3538944. Throughput: 0: 1004.9. Samples: 884674. Policy #0 lag: (min: 0.0, avg: 0.2, max: 1.0) +[2024-09-22 12:12:40,259][00564] Avg episode reward: [(0, '23.811')] +[2024-09-22 12:12:45,264][00564] Fps is (10 sec: 4093.2, 60 sec: 4027.3, 300 sec: 3929.3). Total num frames: 3555328. Throughput: 0: 1021.3. Samples: 887592. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 12:12:45,266][00564] Avg episode reward: [(0, '24.913')] +[2024-09-22 12:12:45,279][02925] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000868_3555328.pth... +[2024-09-22 12:12:45,461][02925] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000637_2609152.pth +[2024-09-22 12:12:45,481][02925] Saving new best policy, reward=24.913! +[2024-09-22 12:12:47,922][02939] Updated weights for policy 0, policy_version 870 (0.0034) +[2024-09-22 12:12:50,257][00564] Fps is (10 sec: 3276.7, 60 sec: 3891.2, 300 sec: 3901.6). Total num frames: 3571712. Throughput: 0: 967.4. Samples: 891920. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 12:12:50,265][00564] Avg episode reward: [(0, '24.021')] +[2024-09-22 12:12:55,257][00564] Fps is (10 sec: 4098.8, 60 sec: 4027.7, 300 sec: 3915.5). Total num frames: 3596288. Throughput: 0: 978.0. Samples: 898832. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 12:12:55,261][00564] Avg episode reward: [(0, '22.475')] +[2024-09-22 12:12:56,674][02939] Updated weights for policy 0, policy_version 880 (0.0025) +[2024-09-22 12:13:00,257][00564] Fps is (10 sec: 4096.1, 60 sec: 3959.5, 300 sec: 3915.5). Total num frames: 3612672. Throughput: 0: 1006.1. Samples: 902286. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-22 12:13:00,262][00564] Avg episode reward: [(0, '23.169')] +[2024-09-22 12:13:05,257][00564] Fps is (10 sec: 3276.8, 60 sec: 3822.9, 300 sec: 3901.6). Total num frames: 3629056. Throughput: 0: 970.0. Samples: 906640. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2024-09-22 12:13:05,264][00564] Avg episode reward: [(0, '21.721')] +[2024-09-22 12:13:08,166][02939] Updated weights for policy 0, policy_version 890 (0.0028) +[2024-09-22 12:13:10,257][00564] Fps is (10 sec: 4096.0, 60 sec: 3959.6, 300 sec: 3915.5). Total num frames: 3653632. Throughput: 0: 957.0. Samples: 913200. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 12:13:10,264][00564] Avg episode reward: [(0, '20.285')] +[2024-09-22 12:13:15,257][00564] Fps is (10 sec: 4505.6, 60 sec: 4027.7, 300 sec: 3929.4). Total num frames: 3674112. Throughput: 0: 985.2. Samples: 916658. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-22 12:13:15,260][00564] Avg episode reward: [(0, '21.309')] +[2024-09-22 12:13:18,927][02939] Updated weights for policy 0, policy_version 900 (0.0023) +[2024-09-22 12:13:20,257][00564] Fps is (10 sec: 3276.8, 60 sec: 3822.9, 300 sec: 3915.5). Total num frames: 3686400. Throughput: 0: 974.7. Samples: 921636. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-22 12:13:20,259][00564] Avg episode reward: [(0, '22.630')] +[2024-09-22 12:13:25,257][00564] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3915.5). Total num frames: 3710976. Throughput: 0: 951.7. Samples: 927502. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-22 12:13:25,263][00564] Avg episode reward: [(0, '22.529')] +[2024-09-22 12:13:28,666][02939] Updated weights for policy 0, policy_version 910 (0.0016) +[2024-09-22 12:13:30,257][00564] Fps is (10 sec: 4505.6, 60 sec: 3959.7, 300 sec: 3915.8). Total num frames: 3731456. Throughput: 0: 963.5. Samples: 930942. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-09-22 12:13:30,260][00564] Avg episode reward: [(0, '23.620')] +[2024-09-22 12:13:35,265][00564] Fps is (10 sec: 3683.5, 60 sec: 3890.7, 300 sec: 3929.3). Total num frames: 3747840. Throughput: 0: 995.5. Samples: 936726. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2024-09-22 12:13:35,269][00564] Avg episode reward: [(0, '24.449')] +[2024-09-22 12:13:40,047][02939] Updated weights for policy 0, policy_version 920 (0.0022) +[2024-09-22 12:13:40,257][00564] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3929.4). Total num frames: 3768320. Throughput: 0: 954.0. Samples: 941760. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 12:13:40,259][00564] Avg episode reward: [(0, '23.882')] +[2024-09-22 12:13:45,257][00564] Fps is (10 sec: 4099.2, 60 sec: 3891.6, 300 sec: 3915.5). Total num frames: 3788800. Throughput: 0: 951.8. Samples: 945118. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 12:13:45,259][00564] Avg episode reward: [(0, '23.351')] +[2024-09-22 12:13:50,258][00564] Fps is (10 sec: 3686.1, 60 sec: 3891.2, 300 sec: 3915.5). Total num frames: 3805184. Throughput: 0: 990.2. Samples: 951200. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2024-09-22 12:13:50,266][00564] Avg episode reward: [(0, '22.978')] +[2024-09-22 12:13:50,682][02939] Updated weights for policy 0, policy_version 930 (0.0016) +[2024-09-22 12:13:55,257][00564] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3901.6). Total num frames: 3821568. Throughput: 0: 943.9. Samples: 955676. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-09-22 12:13:55,267][00564] Avg episode reward: [(0, '21.939')] +[2024-09-22 12:14:00,257][00564] Fps is (10 sec: 4096.4, 60 sec: 3891.2, 300 sec: 3901.6). Total num frames: 3846144. Throughput: 0: 944.5. Samples: 959160. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 12:14:00,261][00564] Avg episode reward: [(0, '22.123')] +[2024-09-22 12:14:00,694][02939] Updated weights for policy 0, policy_version 940 (0.0034) +[2024-09-22 12:14:05,257][00564] Fps is (10 sec: 4505.6, 60 sec: 3959.5, 300 sec: 3915.5). Total num frames: 3866624. Throughput: 0: 985.5. Samples: 965984. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-22 12:14:05,262][00564] Avg episode reward: [(0, '23.629')] +[2024-09-22 12:14:10,257][00564] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3901.6). Total num frames: 3878912. Throughput: 0: 952.4. Samples: 970362. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-22 12:14:10,263][00564] Avg episode reward: [(0, '23.958')] +[2024-09-22 12:14:12,330][02939] Updated weights for policy 0, policy_version 950 (0.0015) +[2024-09-22 12:14:15,257][00564] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3915.5). Total num frames: 3903488. Throughput: 0: 940.8. Samples: 973278. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-09-22 12:14:15,264][00564] Avg episode reward: [(0, '24.731')] +[2024-09-22 12:14:20,258][00564] Fps is (10 sec: 4505.2, 60 sec: 3959.4, 300 sec: 3915.5). Total num frames: 3923968. Throughput: 0: 959.8. Samples: 979910. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-22 12:14:20,264][00564] Avg episode reward: [(0, '26.122')] +[2024-09-22 12:14:20,266][02925] Saving new best policy, reward=26.122! +[2024-09-22 12:14:22,355][02939] Updated weights for policy 0, policy_version 960 (0.0029) +[2024-09-22 12:14:25,259][00564] Fps is (10 sec: 3276.1, 60 sec: 3754.5, 300 sec: 3901.6). Total num frames: 3936256. Throughput: 0: 956.2. Samples: 984792. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-09-22 12:14:25,268][00564] Avg episode reward: [(0, '26.457')] +[2024-09-22 12:14:25,292][02925] Saving new best policy, reward=26.457! +[2024-09-22 12:14:30,257][00564] Fps is (10 sec: 3277.1, 60 sec: 3754.7, 300 sec: 3901.6). Total num frames: 3956736. Throughput: 0: 934.2. Samples: 987156. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-09-22 12:14:30,262][00564] Avg episode reward: [(0, '26.405')] +[2024-09-22 12:14:33,018][02939] Updated weights for policy 0, policy_version 970 (0.0030) +[2024-09-22 12:14:35,257][00564] Fps is (10 sec: 4506.6, 60 sec: 3891.7, 300 sec: 3901.6). Total num frames: 3981312. Throughput: 0: 953.2. Samples: 994094. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2024-09-22 12:14:35,259][00564] Avg episode reward: [(0, '25.965')] +[2024-09-22 12:14:40,259][00564] Fps is (10 sec: 4095.2, 60 sec: 3822.8, 300 sec: 3915.5). Total num frames: 3997696. Throughput: 0: 979.1. Samples: 999736. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-09-22 12:14:40,261][00564] Avg episode reward: [(0, '25.226')] +[2024-09-22 12:14:42,516][02925] Stopping Batcher_0... +[2024-09-22 12:14:42,516][02925] Loop batcher_evt_loop terminating... +[2024-09-22 12:14:42,516][00564] Component Batcher_0 stopped! +[2024-09-22 12:14:42,521][00564] Component RolloutWorker_w0 process died already! Don't wait for it. +[2024-09-22 12:14:42,524][02925] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000978_4005888.pth... +[2024-09-22 12:14:42,614][02939] Weights refcount: 2 0 +[2024-09-22 12:14:42,621][02939] Stopping InferenceWorker_p0-w0... +[2024-09-22 12:14:42,621][02939] Loop inference_proc0-0_evt_loop terminating... +[2024-09-22 12:14:42,621][00564] Component InferenceWorker_p0-w0 stopped! +[2024-09-22 12:14:42,680][02925] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000753_3084288.pth +[2024-09-22 12:14:42,691][02925] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000978_4005888.pth... +[2024-09-22 12:14:43,000][02925] Stopping LearnerWorker_p0... +[2024-09-22 12:14:43,001][02925] Loop learner_proc0_evt_loop terminating... +[2024-09-22 12:14:43,001][00564] Component LearnerWorker_p0 stopped! +[2024-09-22 12:14:43,030][00564] Component RolloutWorker_w3 stopped! +[2024-09-22 12:14:43,035][02942] Stopping RolloutWorker_w3... +[2024-09-22 12:14:43,036][02942] Loop rollout_proc3_evt_loop terminating... +[2024-09-22 12:14:43,051][00564] Component RolloutWorker_w7 stopped! +[2024-09-22 12:14:43,057][02945] Stopping RolloutWorker_w7... +[2024-09-22 12:14:43,058][02945] Loop rollout_proc7_evt_loop terminating... +[2024-09-22 12:14:43,084][00564] Component RolloutWorker_w5 stopped! +[2024-09-22 12:14:43,090][02943] Stopping RolloutWorker_w5... +[2024-09-22 12:14:43,091][02943] Loop rollout_proc5_evt_loop terminating... +[2024-09-22 12:14:43,101][00564] Component RolloutWorker_w1 stopped! +[2024-09-22 12:14:43,109][02940] Stopping RolloutWorker_w1... +[2024-09-22 12:14:43,110][02940] Loop rollout_proc1_evt_loop terminating... +[2024-09-22 12:14:43,128][02944] Stopping RolloutWorker_w4... +[2024-09-22 12:14:43,130][02944] Loop rollout_proc4_evt_loop terminating... +[2024-09-22 12:14:43,128][00564] Component RolloutWorker_w4 stopped! +[2024-09-22 12:14:43,149][02946] Stopping RolloutWorker_w6... +[2024-09-22 12:14:43,148][00564] Component RolloutWorker_w6 stopped! +[2024-09-22 12:14:43,152][02946] Loop rollout_proc6_evt_loop terminating... +[2024-09-22 12:14:43,185][02941] Stopping RolloutWorker_w2... +[2024-09-22 12:14:43,184][00564] Component RolloutWorker_w2 stopped! +[2024-09-22 12:14:43,186][00564] Waiting for process learner_proc0 to stop... +[2024-09-22 12:14:43,189][02941] Loop rollout_proc2_evt_loop terminating... +[2024-09-22 12:14:44,440][00564] Waiting for process inference_proc0-0 to join... +[2024-09-22 12:14:44,443][00564] Waiting for process rollout_proc0 to join... +[2024-09-22 12:14:44,450][00564] Waiting for process rollout_proc1 to join... +[2024-09-22 12:14:46,279][00564] Waiting for process rollout_proc2 to join... +[2024-09-22 12:14:46,282][00564] Waiting for process rollout_proc3 to join... +[2024-09-22 12:14:46,288][00564] Waiting for process rollout_proc4 to join... +[2024-09-22 12:14:46,290][00564] Waiting for process rollout_proc5 to join... +[2024-09-22 12:14:46,293][00564] Waiting for process rollout_proc6 to join... +[2024-09-22 12:14:46,297][00564] Waiting for process rollout_proc7 to join... +[2024-09-22 12:14:46,299][00564] Batcher 0 profile tree view: +batching: 24.6516, releasing_batches: 0.0268 +[2024-09-22 12:14:46,301][00564] InferenceWorker_p0-w0 profile tree view: +wait_policy: 0.0001 + wait_policy_total: 408.5732 +update_model: 9.2120 + weight_update: 0.0017 +one_step: 0.0226 + handle_policy_step: 591.5292 + deserialize: 14.3376, stack: 3.2877, obs_to_device_normalize: 121.9759, forward: 319.5675, send_messages: 25.8998 + prepare_outputs: 77.5571 + to_cpu: 45.2189 +[2024-09-22 12:14:46,303][00564] Learner 0 profile tree view: +misc: 0.0052, prepare_batch: 13.5165 +train: 71.9820 + epoch_init: 0.0058, minibatch_init: 0.0111, losses_postprocess: 0.5760, kl_divergence: 0.5258, after_optimizer: 33.3866 + calculate_losses: 25.1242 + losses_init: 0.0294, forward_head: 1.2535, bptt_initial: 16.9256, tail: 1.0223, advantages_returns: 0.2459, losses: 3.6640 + bptt: 1.7097 + bptt_forward_core: 1.6222 + update: 11.7491 + clip: 0.8856 +[2024-09-22 12:14:46,304][00564] RolloutWorker_w7 profile tree view: +wait_for_trajectories: 0.3666, enqueue_policy_requests: 95.0201, env_step: 824.7435, overhead: 14.1138, complete_rollouts: 8.0628 +save_policy_outputs: 22.4268 + split_output_tensors: 8.8564 +[2024-09-22 12:14:46,306][00564] Loop Runner_EvtLoop terminating... +[2024-09-22 12:14:46,309][00564] Runner profile tree view: +main_loop: 1075.4156 +[2024-09-22 12:14:46,310][00564] Collected {0: 4005888}, FPS: 3725.0 +[2024-09-22 12:19:19,267][00564] Loading existing experiment configuration from /content/train_dir/default_experiment/config.json +[2024-09-22 12:19:19,268][00564] Overriding arg 'num_workers' with value 1 passed from command line +[2024-09-22 12:19:19,271][00564] Adding new argument 'no_render'=True that is not in the saved config file! +[2024-09-22 12:19:19,273][00564] Adding new argument 'save_video'=True that is not in the saved config file! +[2024-09-22 12:19:19,275][00564] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file! +[2024-09-22 12:19:19,278][00564] Adding new argument 'video_name'=None that is not in the saved config file! +[2024-09-22 12:19:19,279][00564] Adding new argument 'max_num_frames'=1000000000.0 that is not in the saved config file! +[2024-09-22 12:19:19,281][00564] Adding new argument 'max_num_episodes'=10 that is not in the saved config file! +[2024-09-22 12:19:19,283][00564] Adding new argument 'push_to_hub'=False that is not in the saved config file! +[2024-09-22 12:19:19,284][00564] Adding new argument 'hf_repository'=None that is not in the saved config file! +[2024-09-22 12:19:19,285][00564] Adding new argument 'policy_index'=0 that is not in the saved config file! +[2024-09-22 12:19:19,287][00564] Adding new argument 'eval_deterministic'=False that is not in the saved config file! +[2024-09-22 12:19:19,288][00564] Adding new argument 'train_script'=None that is not in the saved config file! +[2024-09-22 12:19:19,290][00564] Adding new argument 'enjoy_script'=None that is not in the saved config file! +[2024-09-22 12:19:19,291][00564] Using frameskip 1 and render_action_repeat=4 for evaluation +[2024-09-22 12:19:19,323][00564] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-09-22 12:19:19,326][00564] RunningMeanStd input shape: (3, 72, 128) +[2024-09-22 12:19:19,329][00564] RunningMeanStd input shape: (1,) +[2024-09-22 12:19:19,346][00564] ConvEncoder: input_channels=3 +[2024-09-22 12:19:19,446][00564] Conv encoder output size: 512 +[2024-09-22 12:19:19,447][00564] Policy head output size: 512 +[2024-09-22 12:19:19,632][00564] Loading state from checkpoint /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000978_4005888.pth... +[2024-09-22 12:19:20,467][00564] Num frames 100... +[2024-09-22 12:19:20,590][00564] Num frames 200... +[2024-09-22 12:19:20,712][00564] Num frames 300... +[2024-09-22 12:19:20,876][00564] Avg episode rewards: #0: 6.890, true rewards: #0: 3.890 +[2024-09-22 12:19:20,878][00564] Avg episode reward: 6.890, avg true_objective: 3.890 +[2024-09-22 12:19:20,895][00564] Num frames 400... +[2024-09-22 12:19:21,018][00564] Num frames 500... +[2024-09-22 12:19:21,146][00564] Num frames 600... +[2024-09-22 12:19:21,273][00564] Num frames 700... +[2024-09-22 12:19:21,397][00564] Num frames 800... +[2024-09-22 12:19:21,521][00564] Num frames 900... +[2024-09-22 12:19:21,655][00564] Num frames 1000... +[2024-09-22 12:19:21,794][00564] Num frames 1100... +[2024-09-22 12:19:21,919][00564] Num frames 1200... +[2024-09-22 12:19:22,072][00564] Avg episode rewards: #0: 12.910, true rewards: #0: 6.410 +[2024-09-22 12:19:22,073][00564] Avg episode reward: 12.910, avg true_objective: 6.410 +[2024-09-22 12:19:22,104][00564] Num frames 1300... +[2024-09-22 12:19:22,223][00564] Num frames 1400... +[2024-09-22 12:19:22,349][00564] Num frames 1500... +[2024-09-22 12:19:22,470][00564] Num frames 1600... +[2024-09-22 12:19:22,591][00564] Num frames 1700... +[2024-09-22 12:19:22,716][00564] Num frames 1800... +[2024-09-22 12:19:22,809][00564] Avg episode rewards: #0: 11.753, true rewards: #0: 6.087 +[2024-09-22 12:19:22,810][00564] Avg episode reward: 11.753, avg true_objective: 6.087 +[2024-09-22 12:19:22,903][00564] Num frames 1900... +[2024-09-22 12:19:23,021][00564] Num frames 2000... +[2024-09-22 12:19:23,141][00564] Num frames 2100... +[2024-09-22 12:19:23,267][00564] Num frames 2200... +[2024-09-22 12:19:23,392][00564] Num frames 2300... +[2024-09-22 12:19:23,515][00564] Num frames 2400... +[2024-09-22 12:19:23,635][00564] Num frames 2500... +[2024-09-22 12:19:23,766][00564] Num frames 2600... +[2024-09-22 12:19:23,886][00564] Num frames 2700... +[2024-09-22 12:19:24,003][00564] Num frames 2800... +[2024-09-22 12:19:24,127][00564] Num frames 2900... +[2024-09-22 12:19:24,247][00564] Num frames 3000... +[2024-09-22 12:19:24,379][00564] Num frames 3100... +[2024-09-22 12:19:24,499][00564] Num frames 3200... +[2024-09-22 12:19:24,621][00564] Num frames 3300... +[2024-09-22 12:19:24,749][00564] Num frames 3400... +[2024-09-22 12:19:24,869][00564] Num frames 3500... +[2024-09-22 12:19:24,985][00564] Num frames 3600... +[2024-09-22 12:19:25,104][00564] Num frames 3700... +[2024-09-22 12:19:25,221][00564] Num frames 3800... +[2024-09-22 12:19:25,327][00564] Avg episode rewards: #0: 19.855, true rewards: #0: 9.605 +[2024-09-22 12:19:25,329][00564] Avg episode reward: 19.855, avg true_objective: 9.605 +[2024-09-22 12:19:25,400][00564] Num frames 3900... +[2024-09-22 12:19:25,522][00564] Num frames 4000... +[2024-09-22 12:19:25,643][00564] Num frames 4100... +[2024-09-22 12:19:25,769][00564] Num frames 4200... +[2024-09-22 12:19:25,887][00564] Num frames 4300... +[2024-09-22 12:19:26,008][00564] Num frames 4400... +[2024-09-22 12:19:26,126][00564] Num frames 4500... +[2024-09-22 12:19:26,247][00564] Num frames 4600... +[2024-09-22 12:19:26,376][00564] Num frames 4700... +[2024-09-22 12:19:26,523][00564] Avg episode rewards: #0: 20.140, true rewards: #0: 9.540 +[2024-09-22 12:19:26,524][00564] Avg episode reward: 20.140, avg true_objective: 9.540 +[2024-09-22 12:19:26,564][00564] Num frames 4800... +[2024-09-22 12:19:26,680][00564] Num frames 4900... +[2024-09-22 12:19:26,839][00564] Num frames 5000... +[2024-09-22 12:19:27,007][00564] Num frames 5100... +[2024-09-22 12:19:27,170][00564] Num frames 5200... +[2024-09-22 12:19:27,331][00564] Num frames 5300... +[2024-09-22 12:19:27,498][00564] Num frames 5400... +[2024-09-22 12:19:27,662][00564] Num frames 5500... +[2024-09-22 12:19:27,821][00564] Num frames 5600... +[2024-09-22 12:19:27,995][00564] Num frames 5700... +[2024-09-22 12:19:28,161][00564] Num frames 5800... +[2024-09-22 12:19:28,270][00564] Avg episode rewards: #0: 20.883, true rewards: #0: 9.717 +[2024-09-22 12:19:28,272][00564] Avg episode reward: 20.883, avg true_objective: 9.717 +[2024-09-22 12:19:28,390][00564] Num frames 5900... +[2024-09-22 12:19:28,574][00564] Num frames 6000... +[2024-09-22 12:19:28,754][00564] Num frames 6100... +[2024-09-22 12:19:28,932][00564] Num frames 6200... +[2024-09-22 12:19:29,107][00564] Num frames 6300... +[2024-09-22 12:19:29,249][00564] Num frames 6400... +[2024-09-22 12:19:29,373][00564] Num frames 6500... +[2024-09-22 12:19:29,510][00564] Num frames 6600... +[2024-09-22 12:19:29,633][00564] Num frames 6700... +[2024-09-22 12:19:29,765][00564] Num frames 6800... +[2024-09-22 12:19:29,890][00564] Num frames 6900... +[2024-09-22 12:19:30,014][00564] Num frames 7000... +[2024-09-22 12:19:30,135][00564] Num frames 7100... +[2024-09-22 12:19:30,256][00564] Num frames 7200... +[2024-09-22 12:19:30,382][00564] Num frames 7300... +[2024-09-22 12:19:30,511][00564] Num frames 7400... +[2024-09-22 12:19:30,636][00564] Num frames 7500... +[2024-09-22 12:19:30,765][00564] Num frames 7600... +[2024-09-22 12:19:30,886][00564] Num frames 7700... +[2024-09-22 12:19:31,004][00564] Avg episode rewards: #0: 25.357, true rewards: #0: 11.071 +[2024-09-22 12:19:31,005][00564] Avg episode reward: 25.357, avg true_objective: 11.071 +[2024-09-22 12:19:31,067][00564] Num frames 7800... +[2024-09-22 12:19:31,184][00564] Num frames 7900... +[2024-09-22 12:19:31,308][00564] Num frames 8000... +[2024-09-22 12:19:31,427][00564] Num frames 8100... +[2024-09-22 12:19:31,557][00564] Num frames 8200... +[2024-09-22 12:19:31,680][00564] Num frames 8300... +[2024-09-22 12:19:31,808][00564] Num frames 8400... +[2024-09-22 12:19:31,940][00564] Num frames 8500... +[2024-09-22 12:19:32,060][00564] Num frames 8600... +[2024-09-22 12:19:32,178][00564] Num frames 8700... +[2024-09-22 12:19:32,302][00564] Num frames 8800... +[2024-09-22 12:19:32,404][00564] Avg episode rewards: #0: 25.422, true rewards: #0: 11.047 +[2024-09-22 12:19:32,405][00564] Avg episode reward: 25.422, avg true_objective: 11.047 +[2024-09-22 12:19:32,485][00564] Num frames 8900... +[2024-09-22 12:19:32,613][00564] Num frames 9000... +[2024-09-22 12:19:32,742][00564] Num frames 9100... +[2024-09-22 12:19:32,867][00564] Num frames 9200... +[2024-09-22 12:19:32,986][00564] Num frames 9300... +[2024-09-22 12:19:33,106][00564] Num frames 9400... +[2024-09-22 12:19:33,229][00564] Num frames 9500... +[2024-09-22 12:19:33,349][00564] Num frames 9600... +[2024-09-22 12:19:33,468][00564] Num frames 9700... +[2024-09-22 12:19:33,598][00564] Num frames 9800... +[2024-09-22 12:19:33,728][00564] Num frames 9900... +[2024-09-22 12:19:33,851][00564] Num frames 10000... +[2024-09-22 12:19:33,978][00564] Num frames 10100... +[2024-09-22 12:19:34,099][00564] Num frames 10200... +[2024-09-22 12:19:34,220][00564] Num frames 10300... +[2024-09-22 12:19:34,343][00564] Num frames 10400... +[2024-09-22 12:19:34,463][00564] Num frames 10500... +[2024-09-22 12:19:34,588][00564] Avg episode rewards: #0: 27.280, true rewards: #0: 11.724 +[2024-09-22 12:19:34,590][00564] Avg episode reward: 27.280, avg true_objective: 11.724 +[2024-09-22 12:19:34,650][00564] Num frames 10600... +[2024-09-22 12:19:34,779][00564] Num frames 10700... +[2024-09-22 12:19:34,904][00564] Num frames 10800... +[2024-09-22 12:19:35,021][00564] Num frames 10900... +[2024-09-22 12:19:35,142][00564] Num frames 11000... +[2024-09-22 12:19:35,264][00564] Num frames 11100... +[2024-09-22 12:19:35,384][00564] Num frames 11200... +[2024-09-22 12:19:35,508][00564] Num frames 11300... +[2024-09-22 12:19:35,637][00564] Num frames 11400... +[2024-09-22 12:19:35,762][00564] Num frames 11500... +[2024-09-22 12:19:35,882][00564] Num frames 11600... +[2024-09-22 12:19:36,003][00564] Num frames 11700... +[2024-09-22 12:19:36,121][00564] Num frames 11800... +[2024-09-22 12:19:36,244][00564] Num frames 11900... +[2024-09-22 12:19:36,364][00564] Num frames 12000... +[2024-09-22 12:19:36,483][00564] Num frames 12100... +[2024-09-22 12:19:36,608][00564] Num frames 12200... +[2024-09-22 12:19:36,748][00564] Num frames 12300... +[2024-09-22 12:19:36,869][00564] Num frames 12400... +[2024-09-22 12:19:36,990][00564] Num frames 12500... +[2024-09-22 12:19:37,117][00564] Avg episode rewards: #0: 29.660, true rewards: #0: 12.560 +[2024-09-22 12:19:37,118][00564] Avg episode reward: 29.660, avg true_objective: 12.560 +[2024-09-22 12:20:51,928][00564] Replay video saved to /content/train_dir/default_experiment/replay.mp4! +[2024-09-22 12:25:52,984][00564] Loading existing experiment configuration from /content/train_dir/default_experiment/config.json +[2024-09-22 12:25:52,986][00564] Overriding arg 'num_workers' with value 1 passed from command line +[2024-09-22 12:25:52,988][00564] Adding new argument 'no_render'=True that is not in the saved config file! +[2024-09-22 12:25:52,990][00564] Adding new argument 'save_video'=True that is not in the saved config file! +[2024-09-22 12:25:52,991][00564] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file! +[2024-09-22 12:25:52,993][00564] Adding new argument 'video_name'=None that is not in the saved config file! +[2024-09-22 12:25:52,995][00564] Adding new argument 'max_num_frames'=100000 that is not in the saved config file! +[2024-09-22 12:25:52,997][00564] Adding new argument 'max_num_episodes'=10 that is not in the saved config file! +[2024-09-22 12:25:52,997][00564] Adding new argument 'push_to_hub'=True that is not in the saved config file! +[2024-09-22 12:25:52,998][00564] Adding new argument 'hf_repository'='kalmi901/rl_course_vizdoom_health_gathering_supreme' that is not in the saved config file! +[2024-09-22 12:25:53,000][00564] Adding new argument 'policy_index'=0 that is not in the saved config file! +[2024-09-22 12:25:53,000][00564] Adding new argument 'eval_deterministic'=False that is not in the saved config file! +[2024-09-22 12:25:53,001][00564] Adding new argument 'train_script'=None that is not in the saved config file! +[2024-09-22 12:25:53,002][00564] Adding new argument 'enjoy_script'=None that is not in the saved config file! +[2024-09-22 12:25:53,003][00564] Using frameskip 1 and render_action_repeat=4 for evaluation +[2024-09-22 12:25:53,030][00564] RunningMeanStd input shape: (3, 72, 128) +[2024-09-22 12:25:53,032][00564] RunningMeanStd input shape: (1,) +[2024-09-22 12:25:53,044][00564] ConvEncoder: input_channels=3 +[2024-09-22 12:25:53,079][00564] Conv encoder output size: 512 +[2024-09-22 12:25:53,081][00564] Policy head output size: 512 +[2024-09-22 12:25:53,099][00564] Loading state from checkpoint /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000978_4005888.pth... +[2024-09-22 12:25:53,518][00564] Num frames 100... +[2024-09-22 12:25:53,648][00564] Num frames 200... +[2024-09-22 12:25:53,784][00564] Num frames 300... +[2024-09-22 12:25:53,911][00564] Num frames 400... +[2024-09-22 12:25:54,031][00564] Num frames 500... +[2024-09-22 12:25:54,150][00564] Num frames 600... +[2024-09-22 12:25:54,295][00564] Avg episode rewards: #0: 11.720, true rewards: #0: 6.720 +[2024-09-22 12:25:54,296][00564] Avg episode reward: 11.720, avg true_objective: 6.720 +[2024-09-22 12:25:54,335][00564] Num frames 700... +[2024-09-22 12:25:54,457][00564] Num frames 800... +[2024-09-22 12:25:54,591][00564] Num frames 900... +[2024-09-22 12:25:54,711][00564] Num frames 1000... +[2024-09-22 12:25:54,838][00564] Num frames 1100... +[2024-09-22 12:25:54,973][00564] Avg episode rewards: #0: 11.330, true rewards: #0: 5.830 +[2024-09-22 12:25:54,975][00564] Avg episode reward: 11.330, avg true_objective: 5.830 +[2024-09-22 12:25:55,018][00564] Num frames 1200... +[2024-09-22 12:25:55,156][00564] Num frames 1300... +[2024-09-22 12:25:55,330][00564] Num frames 1400... +[2024-09-22 12:25:55,498][00564] Num frames 1500... +[2024-09-22 12:25:55,664][00564] Num frames 1600... +[2024-09-22 12:25:55,832][00564] Num frames 1700... +[2024-09-22 12:25:55,998][00564] Num frames 1800... +[2024-09-22 12:25:56,159][00564] Num frames 1900... +[2024-09-22 12:25:56,318][00564] Num frames 2000... +[2024-09-22 12:25:56,486][00564] Num frames 2100... +[2024-09-22 12:25:56,667][00564] Num frames 2200... +[2024-09-22 12:25:56,865][00564] Num frames 2300... +[2024-09-22 12:25:57,045][00564] Num frames 2400... +[2024-09-22 12:25:57,219][00564] Num frames 2500... +[2024-09-22 12:25:57,391][00564] Num frames 2600... +[2024-09-22 12:25:57,566][00564] Num frames 2700... +[2024-09-22 12:25:57,753][00564] Num frames 2800... +[2024-09-22 12:25:57,818][00564] Avg episode rewards: #0: 21.687, true rewards: #0: 9.353 +[2024-09-22 12:25:57,820][00564] Avg episode reward: 21.687, avg true_objective: 9.353 +[2024-09-22 12:25:57,930][00564] Num frames 2900... +[2024-09-22 12:25:58,046][00564] Num frames 3000... +[2024-09-22 12:25:58,168][00564] Num frames 3100... +[2024-09-22 12:25:58,286][00564] Num frames 3200... +[2024-09-22 12:25:58,408][00564] Num frames 3300... +[2024-09-22 12:25:58,527][00564] Num frames 3400... +[2024-09-22 12:25:58,654][00564] Num frames 3500... +[2024-09-22 12:25:58,808][00564] Avg episode rewards: #0: 21.185, true rewards: #0: 8.935 +[2024-09-22 12:25:58,809][00564] Avg episode reward: 21.185, avg true_objective: 8.935 +[2024-09-22 12:25:58,845][00564] Num frames 3600... +[2024-09-22 12:25:58,962][00564] Num frames 3700... +[2024-09-22 12:25:59,081][00564] Num frames 3800... +[2024-09-22 12:25:59,201][00564] Num frames 3900... +[2024-09-22 12:25:59,321][00564] Num frames 4000... +[2024-09-22 12:25:59,442][00564] Num frames 4100... +[2024-09-22 12:25:59,563][00564] Num frames 4200... +[2024-09-22 12:25:59,692][00564] Num frames 4300... +[2024-09-22 12:25:59,822][00564] Num frames 4400... +[2024-09-22 12:25:59,942][00564] Num frames 4500... +[2024-09-22 12:26:00,059][00564] Num frames 4600... +[2024-09-22 12:26:00,235][00564] Avg episode rewards: #0: 21.794, true rewards: #0: 9.394 +[2024-09-22 12:26:00,237][00564] Avg episode reward: 21.794, avg true_objective: 9.394 +[2024-09-22 12:26:00,243][00564] Num frames 4700... +[2024-09-22 12:26:00,371][00564] Num frames 4800... +[2024-09-22 12:26:00,491][00564] Num frames 4900... +[2024-09-22 12:26:00,608][00564] Num frames 5000... +[2024-09-22 12:26:00,743][00564] Num frames 5100... +[2024-09-22 12:26:00,861][00564] Num frames 5200... +[2024-09-22 12:26:00,979][00564] Num frames 5300... +[2024-09-22 12:26:01,098][00564] Num frames 5400... +[2024-09-22 12:26:01,217][00564] Num frames 5500... +[2024-09-22 12:26:01,338][00564] Num frames 5600... +[2024-09-22 12:26:01,461][00564] Num frames 5700... +[2024-09-22 12:26:01,580][00564] Num frames 5800... +[2024-09-22 12:26:01,714][00564] Num frames 5900... +[2024-09-22 12:26:01,838][00564] Num frames 6000... +[2024-09-22 12:26:01,961][00564] Num frames 6100... +[2024-09-22 12:26:02,083][00564] Num frames 6200... +[2024-09-22 12:26:02,203][00564] Num frames 6300... +[2024-09-22 12:26:02,328][00564] Num frames 6400... +[2024-09-22 12:26:02,448][00564] Num frames 6500... +[2024-09-22 12:26:02,568][00564] Num frames 6600... +[2024-09-22 12:26:02,692][00564] Num frames 6700... +[2024-09-22 12:26:02,880][00564] Avg episode rewards: #0: 27.328, true rewards: #0: 11.328 +[2024-09-22 12:26:02,881][00564] Avg episode reward: 27.328, avg true_objective: 11.328 +[2024-09-22 12:26:02,889][00564] Num frames 6800... +[2024-09-22 12:26:03,009][00564] Num frames 6900... +[2024-09-22 12:26:03,126][00564] Num frames 7000... +[2024-09-22 12:26:03,247][00564] Num frames 7100... +[2024-09-22 12:26:03,367][00564] Num frames 7200... +[2024-09-22 12:26:03,498][00564] Num frames 7300... +[2024-09-22 12:26:03,620][00564] Num frames 7400... +[2024-09-22 12:26:03,746][00564] Num frames 7500... +[2024-09-22 12:26:03,876][00564] Num frames 7600... +[2024-09-22 12:26:03,998][00564] Num frames 7700... +[2024-09-22 12:26:04,115][00564] Num frames 7800... +[2024-09-22 12:26:04,236][00564] Num frames 7900... +[2024-09-22 12:26:04,355][00564] Num frames 8000... +[2024-09-22 12:26:04,466][00564] Avg episode rewards: #0: 27.493, true rewards: #0: 11.493 +[2024-09-22 12:26:04,468][00564] Avg episode reward: 27.493, avg true_objective: 11.493 +[2024-09-22 12:26:04,533][00564] Num frames 8100... +[2024-09-22 12:26:04,651][00564] Num frames 8200... +[2024-09-22 12:26:04,780][00564] Num frames 8300... +[2024-09-22 12:26:04,905][00564] Num frames 8400... +[2024-09-22 12:26:05,024][00564] Num frames 8500... +[2024-09-22 12:26:05,145][00564] Num frames 8600... +[2024-09-22 12:26:05,265][00564] Num frames 8700... +[2024-09-22 12:26:05,383][00564] Num frames 8800... +[2024-09-22 12:26:05,506][00564] Num frames 8900... +[2024-09-22 12:26:05,627][00564] Num frames 9000... +[2024-09-22 12:26:05,759][00564] Num frames 9100... +[2024-09-22 12:26:05,889][00564] Num frames 9200... +[2024-09-22 12:26:06,054][00564] Avg episode rewards: #0: 27.366, true rewards: #0: 11.616 +[2024-09-22 12:26:06,056][00564] Avg episode reward: 27.366, avg true_objective: 11.616 +[2024-09-22 12:26:06,067][00564] Num frames 9300... +[2024-09-22 12:26:06,188][00564] Num frames 9400... +[2024-09-22 12:26:06,305][00564] Num frames 9500... +[2024-09-22 12:26:06,425][00564] Num frames 9600... +[2024-09-22 12:26:06,545][00564] Num frames 9700... +[2024-09-22 12:26:06,662][00564] Num frames 9800... +[2024-09-22 12:26:06,790][00564] Num frames 9900... +[2024-09-22 12:26:06,849][00564] Avg episode rewards: #0: 25.446, true rewards: #0: 11.001 +[2024-09-22 12:26:06,850][00564] Avg episode reward: 25.446, avg true_objective: 11.001 +[2024-09-22 12:26:06,965][00564] Num frames 10000... +[2024-09-22 12:26:07,096][00564] Num frames 10100... +[2024-09-22 12:26:07,212][00564] Num frames 10200... +[2024-09-22 12:26:07,330][00564] Num frames 10300... +[2024-09-22 12:26:07,446][00564] Num frames 10400... +[2024-09-22 12:26:07,563][00564] Num frames 10500... +[2024-09-22 12:26:07,684][00564] Num frames 10600... +[2024-09-22 12:26:07,747][00564] Avg episode rewards: #0: 24.005, true rewards: #0: 10.605 +[2024-09-22 12:26:07,749][00564] Avg episode reward: 24.005, avg true_objective: 10.605 +[2024-09-22 12:27:09,362][00564] Replay video saved to /content/train_dir/default_experiment/replay.mp4!