获取显卡使用率和内存使用率,并生成 metrics 信息
# -*- coding: utf-8 -*-
from log import logger
from io import StringIO
import pynvml
def make_metrics(index=0):
    output = StringIO()
    try:
        device_count = pynvml.nvmlDeviceGetCount()
        if index >= device_count:
            info = "max-device-count {}\n".format(device_count)
            output.write(info)
            raise Exception("index[{}] is greater the max device index[{}]".format(index, device_count))
        handle = pynvml.nvmlDeviceGetHandleByIndex(index)
        gpu_uuid = pynvml.nvmlDeviceGetUUID(handle)
        gpu_utilization = pynvml.nvmlDeviceGetUtilizationRates(handle)
        mem = pynvml.nvmlDeviceGetMemoryInfo(handle)
        output.write("UUID={UUID} INDEX={INDEX}\n".format(
            INDEX=index, UUID=gpu_uuid))
        output.write("UUID={UUID} UTILIZATIONRATE={UTILIZATIONRATE}\n".format(
            UUID=gpu_uuid, UTILIZATIONRATE=gpu_utilization.gpu / 100))
        output.write("UUID={UUID} MEMORYUTILIZATION={MEMORYUTILIZATION}\n".format(
            UUID=gpu_uuid, MEMORYUTILIZATION=mem.used / mem.total))
    except Exception as e:
        logger.exception(e)
    metrics = output.getvalue()
    return metrics
def get_metrics(indexes=None):
    metrics = ""
    if None == indexes:
        device_count = pynvml.nvmlDeviceGetCount()
        indexes = [i for i in range(device_count)]
    if isinstance(indexes, int):
        indexes = [indexes]
    for idx in indexes:
        metrics += make_metrics(idx)
    return metrics
def main():
    print(get_metrics())
if "__main__" == __name__:
    main()