#!/usr/bin/env python3 # -*- coding:utf-8 -*- # Copyright (c) Megvii Inc. All rights reserved. import os import subprocess from loguru import logger import cv2 from .dist import get_world_size, is_main_process __all__ = ["configure_nccl", "configure_module", "configure_omp"] def configure_nccl(): """Configure multi-machine environment variables of NCCL.""" os.environ["NCCL_LAUNCH_MODE"] = "PARALLEL" os.environ["NCCL_IB_HCA"] = subprocess.getoutput( "pushd /sys/class/infiniband/ > /dev/null; for i in mlx5_*; " "do cat $i/ports/1/gid_attrs/types/* 2>/dev/null " "| grep v >/dev/null && echo $i ; done; popd > /dev/null" ) os.environ["NCCL_IB_GID_INDEX"] = "3" os.environ["NCCL_IB_TC"] = "106" def configure_omp(num_threads=1): """ If OMP_NUM_THREADS is not configured and world_size is greater than 1, Configure OMP_NUM_THREADS environment variables of NCCL to `num_thread`. Args: num_threads (int): value of `OMP_NUM_THREADS` to set. """ # We set OMP_NUM_THREADS=1 by default, which achieves the best speed on our machines # feel free to change it for better performance. if "OMP_NUM_THREADS" not in os.environ and get_world_size() > 1: os.environ["OMP_NUM_THREADS"] = str(num_threads) if is_main_process(): logger.info( "\n***************************************************************\n" "We set `OMP_NUM_THREADS` for each process to {} to speed up.\n" "please further tune the variable for optimal performance.\n" "***************************************************************".format( os.environ["OMP_NUM_THREADS"] ) ) def configure_module(ulimit_value=8192): """ Configure pytorch module environment. setting of ulimit and cv2 will be set. Args: ulimit_value(int): default open file number on linux. Default value: 8192. """ # system setting try: import resource rlimit = resource.getrlimit(resource.RLIMIT_NOFILE) resource.setrlimit(resource.RLIMIT_NOFILE, (ulimit_value, rlimit[1])) except Exception: # Exception might be raised in Windows OS or rlimit reaches max limit number. # However, set rlimit value might not be necessary. pass # cv2 # multiprocess might be harmful on performance of torch dataloader os.environ["OPENCV_OPENCL_RUNTIME"] = "disabled" try: cv2.setNumThreads(0) cv2.ocl.setUseOpenCL(False) except Exception: # cv2 version mismatch might rasie exceptions. pass