Spaces:
Runtime error
Runtime error
#!/usr/bin/env python3 | |
# -*- coding: utf-8 -*- | |
# Copyright (c) Megvii Inc. All rights reserved. | |
import functools | |
import os | |
import time | |
from collections import defaultdict, deque | |
import psutil | |
import numpy as np | |
import torch | |
__all__ = [ | |
"AverageMeter", | |
"MeterBuffer", | |
"get_total_and_free_memory_in_Mb", | |
"occupy_mem", | |
"gpu_mem_usage", | |
"mem_usage" | |
] | |
def get_total_and_free_memory_in_Mb(cuda_device): | |
devices_info_str = os.popen( | |
"nvidia-smi --query-gpu=memory.total,memory.used --format=csv,nounits,noheader" | |
) | |
devices_info = devices_info_str.read().strip().split("\n") | |
if "CUDA_VISIBLE_DEVICES" in os.environ: | |
visible_devices = os.environ["CUDA_VISIBLE_DEVICES"].split(',') | |
cuda_device = int(visible_devices[cuda_device]) | |
total, used = devices_info[int(cuda_device)].split(",") | |
return int(total), int(used) | |
def occupy_mem(cuda_device, mem_ratio=0.9): | |
""" | |
pre-allocate gpu memory for training to avoid memory Fragmentation. | |
""" | |
total, used = get_total_and_free_memory_in_Mb(cuda_device) | |
max_mem = int(total * mem_ratio) | |
block_mem = max_mem - used | |
x = torch.cuda.FloatTensor(256, 1024, block_mem) | |
del x | |
time.sleep(5) | |
def gpu_mem_usage(): | |
""" | |
Compute the GPU memory usage for the current device (MB). | |
""" | |
mem_usage_bytes = torch.cuda.max_memory_allocated() | |
return mem_usage_bytes / (1024 * 1024) | |
def mem_usage(): | |
""" | |
Compute the memory usage for the current machine (GB). | |
""" | |
gb = 1 << 30 | |
mem = psutil.virtual_memory() | |
return mem.used / gb | |
class AverageMeter: | |
"""Track a series of values and provide access to smoothed values over a | |
window or the global series average. | |
""" | |
def __init__(self, window_size=50): | |
self._deque = deque(maxlen=window_size) | |
self._total = 0.0 | |
self._count = 0 | |
def update(self, value): | |
self._deque.append(value) | |
self._count += 1 | |
self._total += value | |
def median(self): | |
d = np.array(list(self._deque)) | |
return np.median(d) | |
def avg(self): | |
# if deque is empty, nan will be returned. | |
d = np.array(list(self._deque)) | |
return d.mean() | |
def global_avg(self): | |
return self._total / max(self._count, 1e-5) | |
def latest(self): | |
return self._deque[-1] if len(self._deque) > 0 else None | |
def total(self): | |
return self._total | |
def reset(self): | |
self._deque.clear() | |
self._total = 0.0 | |
self._count = 0 | |
def clear(self): | |
self._deque.clear() | |
class MeterBuffer(defaultdict): | |
"""Computes and stores the average and current value""" | |
def __init__(self, window_size=20): | |
factory = functools.partial(AverageMeter, window_size=window_size) | |
super().__init__(factory) | |
def reset(self): | |
for v in self.values(): | |
v.reset() | |
def get_filtered_meter(self, filter_key="time"): | |
return {k: v for k, v in self.items() if filter_key in k} | |
def update(self, values=None, **kwargs): | |
if values is None: | |
values = {} | |
values.update(kwargs) | |
for k, v in values.items(): | |
if isinstance(v, torch.Tensor): | |
v = v.detach() | |
self[k].update(v) | |
def clear_meters(self): | |
for v in self.values(): | |
v.clear() | |