????

Your IP : 52.15.66.233


Current Path : /lib/python3.6/site-packages/glances/plugins/
Upload File :
Current File : //lib/python3.6/site-packages/glances/plugins/glances_gpu.py

# -*- coding: utf-8 -*-
#
# This file is part of Glances.
#
# Copyright (C) 2020 Kirby Banman <kirby.banman@gmail.com>
#
# SPDX-License-Identifier: LGPL-3.0-only
#

"""GPU plugin (limited to NVIDIA chipsets)."""

from glances.compat import nativestr, to_fahrenheit
from glances.logger import logger
from glances.plugins.glances_plugin import GlancesPlugin

# In Glances 3.1.4 or higher, we use the py3nvml lib (see issue #1523)
try:
    import py3nvml.py3nvml as pynvml
except Exception as e:
    import_error_tag = True
    # Display debug message if import KeyError
    logger.warning("Missing Python Lib ({}), Nvidia GPU plugin is disabled".format(e))
else:
    import_error_tag = False

# Define the history items list
# All items in this list will be historised if the --enable-history tag is set
items_history_list = [
    {'name': 'proc', 'description': 'GPU processor', 'y_unit': '%'},
    {'name': 'mem', 'description': 'Memory consumption', 'y_unit': '%'},
]


class Plugin(GlancesPlugin):
    """Glances GPU plugin (limited to NVIDIA chipsets).

    stats is a list of dictionaries with one entry per GPU
    """

    def __init__(self, args=None, config=None):
        """Init the plugin."""
        super(Plugin, self).__init__(args=args, config=config, stats_init_value=[])

        # Init the Nvidia API
        self.init_nvidia()

        # We want to display the stat in the curse interface
        self.display_curse = True

    def init_nvidia(self):
        """Init the NVIDIA API."""
        if import_error_tag:
            self.nvml_ready = False

        try:
            pynvml.nvmlInit()
            self.device_handles = get_device_handles()
            self.nvml_ready = True
        except Exception:
            logger.debug("pynvml could not be initialized.")
            self.nvml_ready = False

        return self.nvml_ready

    def get_key(self):
        """Return the key of the list."""
        return 'gpu_id'

    @GlancesPlugin._check_decorator
    @GlancesPlugin._log_result_decorator
    def update(self):
        """Update the GPU stats."""
        # Init new stats
        stats = self.get_init_value()

        if not self.nvml_ready:
            # !!!
            # Uncomment to test on computer without GPU
            # One GPU sample:
            # self.stats = [
            #     {
            #         "key": "gpu_id",
            #         "gpu_id": 0,
            #         "name": "Fake GeForce GTX",
            #         "mem": 5.792331695556641,
            #         "proc": 4,
            #         "temperature": 26
            #     }
            # ]
            # Two GPU sample:
            # self.stats = [
            #     {
            #         "key": "gpu_id",
            #         "gpu_id": 0,
            #         "name": "Fake GeForce GTX1",
            #         "mem": 5.792331695556641,
            #         "proc": 4,
            #         "temperature": 26
            #     },
            #     {
            #         "key": "gpu_id",
            #         "gpu_id": 1,
            #         "name": "Fake GeForce GTX2",
            #         "mem": 15,
            #         "proc": 8,
            #         "temperature": 65
            #     }
            # ]
            return self.stats

        if self.input_method == 'local':
            stats = self.get_device_stats()
        elif self.input_method == 'snmp':
            # not available
            pass

        # Update the stats
        self.stats = stats

        return self.stats

    def update_views(self):
        """Update stats views."""
        # Call the father's method
        super(Plugin, self).update_views()

        # Add specifics information
        # Alert
        for i in self.stats:
            # Init the views for the current GPU
            self.views[i[self.get_key()]] = {'proc': {}, 'mem': {}, 'temperature': {}}
            # Processor alert
            if 'proc' in i:
                alert = self.get_alert(i['proc'], header='proc')
                self.views[i[self.get_key()]]['proc']['decoration'] = alert
            # Memory alert
            if 'mem' in i:
                alert = self.get_alert(i['mem'], header='mem')
                self.views[i[self.get_key()]]['mem']['decoration'] = alert
            # Temperature alert
            if 'temperature' in i:
                alert = self.get_alert(i['temperature'], header='temperature')
                self.views[i[self.get_key()]]['temperature']['decoration'] = alert

        return True

    def msg_curse(self, args=None, max_width=None):
        """Return the dict to display in the curse interface."""
        # Init the return message
        ret = []

        # Only process if stats exist, not empty (issue #871) and plugin not disabled
        if not self.stats or (self.stats == []) or self.is_disabled():
            return ret

        # Check if all GPU have the same name
        same_name = all(s['name'] == self.stats[0]['name'] for s in self.stats)

        # gpu_stats contain the first GPU in the list
        gpu_stats = self.stats[0]

        # Header
        header = ''
        if len(self.stats) > 1:
            header += '{} '.format(len(self.stats))
        if same_name:
            header += '{} {}'.format('GPU', gpu_stats['name'])
        else:
            header += '{}'.format('GPU')
        msg = header[:17]
        ret.append(self.curse_add_line(msg, "TITLE"))

        # Build the string message
        if len(self.stats) == 1 or args.meangpu:
            # GPU stat summary or mono GPU
            # New line
            ret.append(self.curse_new_line())
            # GPU PROC
            try:
                mean_proc = sum(s['proc'] for s in self.stats if s is not None) / len(self.stats)
            except TypeError:
                mean_proc_msg = '{:>4}'.format('N/A')
            else:
                mean_proc_msg = '{:>3.0f}%'.format(mean_proc)
            if len(self.stats) > 1:
                msg = '{:13}'.format('proc mean:')
            else:
                msg = '{:13}'.format('proc:')
            ret.append(self.curse_add_line(msg))
            ret.append(
                self.curse_add_line(
                    mean_proc_msg, self.get_views(item=gpu_stats[self.get_key()], key='proc', option='decoration')
                )
            )
            # New line
            ret.append(self.curse_new_line())
            # GPU MEM
            try:
                mean_mem = sum(s['mem'] for s in self.stats if s is not None) / len(self.stats)
            except TypeError:
                mean_mem_msg = '{:>4}'.format('N/A')
            else:
                mean_mem_msg = '{:>3.0f}%'.format(mean_mem)
            if len(self.stats) > 1:
                msg = '{:13}'.format('mem mean:')
            else:
                msg = '{:13}'.format('mem:')
            ret.append(self.curse_add_line(msg))
            ret.append(
                self.curse_add_line(
                    mean_mem_msg, self.get_views(item=gpu_stats[self.get_key()], key='mem', option='decoration')
                )
            )
            # New line
            ret.append(self.curse_new_line())
            # GPU TEMPERATURE
            try:
                mean_temperature = sum(s['temperature'] for s in self.stats if s is not None) / len(self.stats)
            except TypeError:
                mean_temperature_msg = '{:>4}'.format('N/A')
            else:
                unit = 'C'
                if args.fahrenheit:
                    mean_temperature = to_fahrenheit(mean_temperature)
                    unit = 'F'
                mean_temperature_msg = '{:>3.0f}{}'.format(mean_temperature, unit)
            if len(self.stats) > 1:
                msg = '{:13}'.format('temp mean:')
            else:
                msg = '{:13}'.format('temperature:')
            ret.append(self.curse_add_line(msg))
            ret.append(
                self.curse_add_line(
                    mean_temperature_msg,
                    self.get_views(item=gpu_stats[self.get_key()], key='temperature', option='decoration'),
                )
            )
        else:
            # Multi GPU
            # Temperature is not displayed in this mode...
            for gpu_stats in self.stats:
                # New line
                ret.append(self.curse_new_line())
                # GPU ID + PROC + MEM + TEMPERATURE
                id_msg = '{}'.format(gpu_stats['gpu_id'])
                try:
                    proc_msg = '{:>3.0f}%'.format(gpu_stats['proc'])
                except (ValueError, TypeError):
                    proc_msg = '{:>4}'.format('N/A')
                try:
                    mem_msg = '{:>3.0f}%'.format(gpu_stats['mem'])
                except (ValueError, TypeError):
                    mem_msg = '{:>4}'.format('N/A')
                msg = '{}: {} mem: {}'.format(id_msg, proc_msg, mem_msg)
                ret.append(self.curse_add_line(msg))

        return ret

    def get_device_stats(self):
        """Get GPU stats."""
        stats = []

        for index, device_handle in enumerate(self.device_handles):
            device_stats = dict()
            # Dictionary key is the GPU_ID
            device_stats['key'] = self.get_key()
            # GPU id (for multiple GPU, start at 0)
            device_stats['gpu_id'] = index
            # GPU name
            device_stats['name'] = get_device_name(device_handle)
            # Memory consumption in % (not available on all GPU)
            device_stats['mem'] = get_mem(device_handle)
            # Processor consumption in %
            device_stats['proc'] = get_proc(device_handle)
            # Processor temperature in °C
            device_stats['temperature'] = get_temperature(device_handle)
            stats.append(device_stats)

        return stats

    def exit(self):
        """Overwrite the exit method to close the GPU API."""
        if self.nvml_ready:
            try:
                pynvml.nvmlShutdown()
            except Exception as e:
                logger.debug("pynvml failed to shutdown correctly ({})".format(e))

        # Call the father exit method
        super(Plugin, self).exit()


def get_device_handles():
    """Get a list of NVML device handles, one per device.

    Can throw NVMLError.
    """
    return [pynvml.nvmlDeviceGetHandleByIndex(i) for i in range(pynvml.nvmlDeviceGetCount())]


def get_device_name(device_handle):
    """Get GPU device name."""
    try:
        return nativestr(pynvml.nvmlDeviceGetName(device_handle))
    except pynvml.NVMlError:
        return "NVIDIA"


def get_mem(device_handle):
    """Get GPU device memory consumption in percent."""
    try:
        memory_info = pynvml.nvmlDeviceGetMemoryInfo(device_handle)
        return memory_info.used * 100.0 / memory_info.total
    except pynvml.NVMLError:
        return None


def get_proc(device_handle):
    """Get GPU device CPU consumption in percent."""
    try:
        return pynvml.nvmlDeviceGetUtilizationRates(device_handle).gpu
    except pynvml.NVMLError:
        return None


def get_temperature(device_handle):
    """Get GPU device CPU consumption in percent."""
    try:
        return pynvml.nvmlDeviceGetTemperature(device_handle, pynvml.NVML_TEMPERATURE_GPU)
    except pynvml.NVMLError:
        return None