import os import psutil from pympler.asizeof import asizeof from tensorboardX import SummaryWriter from typing import Optional, Callable def random_collect( policy_cfg: 'EasyDict', # noqa policy: 'Policy', # noqa RandomPolicy: 'Policy', # noqa collector: 'ISerialCollector', # noqa collector_env: 'BaseEnvManager', # noqa replay_buffer: 'IBuffer', # noqa postprocess_data_fn: Optional[Callable] = None ) -> None: # noqa assert policy_cfg.random_collect_episode_num > 0 random_policy = RandomPolicy(cfg=policy_cfg, action_space=collector_env.env_ref.action_space) # set the policy to random policy collector.reset_policy(random_policy.collect_mode) # set temperature for visit count distributions according to the train_iter, # please refer to Appendix D in MuZero paper for details. collect_kwargs = {'temperature': 1, 'epsilon': 0.0} # Collect data by default config n_sample/n_episode. new_data = collector.collect(n_episode=policy_cfg.random_collect_episode_num, train_iter=0, policy_kwargs=collect_kwargs) if postprocess_data_fn is not None: new_data = postprocess_data_fn(new_data) # save returned new_data collected by the collector replay_buffer.push_game_segments(new_data) # remove the oldest data if the replay buffer is full. replay_buffer.remove_oldest_data_to_fit() # restore the policy collector.reset_policy(policy.collect_mode) def log_buffer_memory_usage(train_iter: int, buffer: "GameBuffer", writer: SummaryWriter) -> None: """ Overview: Log the memory usage of the buffer and the current process to TensorBoard. Arguments: - train_iter (:obj:`int`): The current training iteration. - buffer (:obj:`GameBuffer`): The game buffer. - writer (:obj:`SummaryWriter`): The TensorBoard writer. """ writer.add_scalar('Buffer/num_of_all_collected_episodes', buffer.num_of_collected_episodes, train_iter) writer.add_scalar('Buffer/num_of_game_segments', len(buffer.game_segment_buffer), train_iter) writer.add_scalar('Buffer/num_of_transitions', len(buffer.game_segment_game_pos_look_up), train_iter) game_segment_buffer = buffer.game_segment_buffer # Calculate the amount of memory occupied by self.game_segment_buffer (in bytes). buffer_memory_usage = asizeof(game_segment_buffer) # Convert buffer_memory_usage to megabytes (MB). buffer_memory_usage_mb = buffer_memory_usage / (1024 * 1024) # Record the memory usage of self.game_segment_buffer to TensorBoard. writer.add_scalar('Buffer/memory_usage/game_segment_buffer', buffer_memory_usage_mb, train_iter) # Get the amount of memory currently used by the process (in bytes). process = psutil.Process(os.getpid()) process_memory_usage = process.memory_info().rss # Convert process_memory_usage to megabytes (MB). process_memory_usage_mb = process_memory_usage / (1024 * 1024) # Record the memory usage of the process to TensorBoard. writer.add_scalar('Buffer/memory_usage/process', process_memory_usage_mb, train_iter)