Spaces:
Runtime error
Runtime error
File size: 3,456 Bytes
0b7b08a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 |
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Copyright (c) Megvii Inc. All rights reserved.
import functools
import os
import time
from collections import defaultdict, deque
import psutil
import numpy as np
import torch
__all__ = [
"AverageMeter",
"MeterBuffer",
"get_total_and_free_memory_in_Mb",
"occupy_mem",
"gpu_mem_usage",
"mem_usage"
]
def get_total_and_free_memory_in_Mb(cuda_device):
devices_info_str = os.popen(
"nvidia-smi --query-gpu=memory.total,memory.used --format=csv,nounits,noheader"
)
devices_info = devices_info_str.read().strip().split("\n")
if "CUDA_VISIBLE_DEVICES" in os.environ:
visible_devices = os.environ["CUDA_VISIBLE_DEVICES"].split(',')
cuda_device = int(visible_devices[cuda_device])
total, used = devices_info[int(cuda_device)].split(",")
return int(total), int(used)
def occupy_mem(cuda_device, mem_ratio=0.9):
"""
pre-allocate gpu memory for training to avoid memory Fragmentation.
"""
total, used = get_total_and_free_memory_in_Mb(cuda_device)
max_mem = int(total * mem_ratio)
block_mem = max_mem - used
x = torch.cuda.FloatTensor(256, 1024, block_mem)
del x
time.sleep(5)
def gpu_mem_usage():
"""
Compute the GPU memory usage for the current device (MB).
"""
mem_usage_bytes = torch.cuda.max_memory_allocated()
return mem_usage_bytes / (1024 * 1024)
def mem_usage():
"""
Compute the memory usage for the current machine (GB).
"""
gb = 1 << 30
mem = psutil.virtual_memory()
return mem.used / gb
class AverageMeter:
"""Track a series of values and provide access to smoothed values over a
window or the global series average.
"""
def __init__(self, window_size=50):
self._deque = deque(maxlen=window_size)
self._total = 0.0
self._count = 0
def update(self, value):
self._deque.append(value)
self._count += 1
self._total += value
@property
def median(self):
d = np.array(list(self._deque))
return np.median(d)
@property
def avg(self):
# if deque is empty, nan will be returned.
d = np.array(list(self._deque))
return d.mean()
@property
def global_avg(self):
return self._total / max(self._count, 1e-5)
@property
def latest(self):
return self._deque[-1] if len(self._deque) > 0 else None
@property
def total(self):
return self._total
def reset(self):
self._deque.clear()
self._total = 0.0
self._count = 0
def clear(self):
self._deque.clear()
class MeterBuffer(defaultdict):
"""Computes and stores the average and current value"""
def __init__(self, window_size=20):
factory = functools.partial(AverageMeter, window_size=window_size)
super().__init__(factory)
def reset(self):
for v in self.values():
v.reset()
def get_filtered_meter(self, filter_key="time"):
return {k: v for k, v in self.items() if filter_key in k}
def update(self, values=None, **kwargs):
if values is None:
values = {}
values.update(kwargs)
for k, v in values.items():
if isinstance(v, torch.Tensor):
v = v.detach()
self[k].update(v)
def clear_meters(self):
for v in self.values():
v.clear()
|