chendl's picture
Add application file
0b7b08a
raw
history blame
2.68 kB
#!/usr/bin/env python3
# -*- coding:utf-8 -*-
# Copyright (c) Megvii Inc. All rights reserved.
import os
import subprocess
from loguru import logger
import cv2
from .dist import get_world_size, is_main_process
__all__ = ["configure_nccl", "configure_module", "configure_omp"]
def configure_nccl():
"""Configure multi-machine environment variables of NCCL."""
os.environ["NCCL_LAUNCH_MODE"] = "PARALLEL"
os.environ["NCCL_IB_HCA"] = subprocess.getoutput(
"pushd /sys/class/infiniband/ > /dev/null; for i in mlx5_*; "
"do cat $i/ports/1/gid_attrs/types/* 2>/dev/null "
"| grep v >/dev/null && echo $i ; done; popd > /dev/null"
)
os.environ["NCCL_IB_GID_INDEX"] = "3"
os.environ["NCCL_IB_TC"] = "106"
def configure_omp(num_threads=1):
"""
If OMP_NUM_THREADS is not configured and world_size is greater than 1,
Configure OMP_NUM_THREADS environment variables of NCCL to `num_thread`.
Args:
num_threads (int): value of `OMP_NUM_THREADS` to set.
"""
# We set OMP_NUM_THREADS=1 by default, which achieves the best speed on our machines
# feel free to change it for better performance.
if "OMP_NUM_THREADS" not in os.environ and get_world_size() > 1:
os.environ["OMP_NUM_THREADS"] = str(num_threads)
if is_main_process():
logger.info(
"\n***************************************************************\n"
"We set `OMP_NUM_THREADS` for each process to {} to speed up.\n"
"please further tune the variable for optimal performance.\n"
"***************************************************************".format(
os.environ["OMP_NUM_THREADS"]
)
)
def configure_module(ulimit_value=8192):
"""
Configure pytorch module environment. setting of ulimit and cv2 will be set.
Args:
ulimit_value(int): default open file number on linux. Default value: 8192.
"""
# system setting
try:
import resource
rlimit = resource.getrlimit(resource.RLIMIT_NOFILE)
resource.setrlimit(resource.RLIMIT_NOFILE, (ulimit_value, rlimit[1]))
except Exception:
# Exception might be raised in Windows OS or rlimit reaches max limit number.
# However, set rlimit value might not be necessary.
pass
# cv2
# multiprocess might be harmful on performance of torch dataloader
os.environ["OPENCV_OPENCL_RUNTIME"] = "disabled"
try:
cv2.setNumThreads(0)
cv2.ocl.setUseOpenCL(False)
except Exception:
# cv2 version mismatch might rasie exceptions.
pass