获取显卡使用率和内存使用率,并生成 metrics
信息
# -*- coding: utf-8 -*-
from log import logger
from io import StringIO
import pynvml
def make_metrics(index=0):
output = StringIO()
try:
device_count = pynvml.nvmlDeviceGetCount()
if index >= device_count:
info = "max-device-count {}\n".format(device_count)
output.write(info)
raise Exception("index[{}] is greater the max device index[{}]".format(index, device_count))
handle = pynvml.nvmlDeviceGetHandleByIndex(index)
gpu_uuid = pynvml.nvmlDeviceGetUUID(handle)
gpu_utilization = pynvml.nvmlDeviceGetUtilizationRates(handle)
mem = pynvml.nvmlDeviceGetMemoryInfo(handle)
output.write("UUID={UUID} INDEX={INDEX}\n".format(
INDEX=index, UUID=gpu_uuid))
output.write("UUID={UUID} UTILIZATIONRATE={UTILIZATIONRATE}\n".format(
UUID=gpu_uuid, UTILIZATIONRATE=gpu_utilization.gpu / 100))
output.write("UUID={UUID} MEMORYUTILIZATION={MEMORYUTILIZATION}\n".format(
UUID=gpu_uuid, MEMORYUTILIZATION=mem.used / mem.total))
except Exception as e:
logger.exception(e)
metrics = output.getvalue()
return metrics
def get_metrics(indexes=None):
metrics = ""
if None == indexes:
device_count = pynvml.nvmlDeviceGetCount()
indexes = [i for i in range(device_count)]
if isinstance(indexes, int):
indexes = [indexes]
for idx in indexes:
metrics += make_metrics(idx)
return metrics
def main():
print(get_metrics())
if "__main__" == __name__:
main()