????
Current Path : /lib/python3.6/site-packages/glances/plugins/ |
Current File : //lib/python3.6/site-packages/glances/plugins/glances_gpu.py |
# -*- coding: utf-8 -*- # # This file is part of Glances. # # Copyright (C) 2020 Kirby Banman <kirby.banman@gmail.com> # # SPDX-License-Identifier: LGPL-3.0-only # """GPU plugin (limited to NVIDIA chipsets).""" from glances.compat import nativestr, to_fahrenheit from glances.logger import logger from glances.plugins.glances_plugin import GlancesPlugin # In Glances 3.1.4 or higher, we use the py3nvml lib (see issue #1523) try: import py3nvml.py3nvml as pynvml except Exception as e: import_error_tag = True # Display debug message if import KeyError logger.warning("Missing Python Lib ({}), Nvidia GPU plugin is disabled".format(e)) else: import_error_tag = False # Define the history items list # All items in this list will be historised if the --enable-history tag is set items_history_list = [ {'name': 'proc', 'description': 'GPU processor', 'y_unit': '%'}, {'name': 'mem', 'description': 'Memory consumption', 'y_unit': '%'}, ] class Plugin(GlancesPlugin): """Glances GPU plugin (limited to NVIDIA chipsets). stats is a list of dictionaries with one entry per GPU """ def __init__(self, args=None, config=None): """Init the plugin.""" super(Plugin, self).__init__(args=args, config=config, stats_init_value=[]) # Init the Nvidia API self.init_nvidia() # We want to display the stat in the curse interface self.display_curse = True def init_nvidia(self): """Init the NVIDIA API.""" if import_error_tag: self.nvml_ready = False try: pynvml.nvmlInit() self.device_handles = get_device_handles() self.nvml_ready = True except Exception: logger.debug("pynvml could not be initialized.") self.nvml_ready = False return self.nvml_ready def get_key(self): """Return the key of the list.""" return 'gpu_id' @GlancesPlugin._check_decorator @GlancesPlugin._log_result_decorator def update(self): """Update the GPU stats.""" # Init new stats stats = self.get_init_value() if not self.nvml_ready: # !!! # Uncomment to test on computer without GPU # One GPU sample: # self.stats = [ # { # "key": "gpu_id", # "gpu_id": 0, # "name": "Fake GeForce GTX", # "mem": 5.792331695556641, # "proc": 4, # "temperature": 26 # } # ] # Two GPU sample: # self.stats = [ # { # "key": "gpu_id", # "gpu_id": 0, # "name": "Fake GeForce GTX1", # "mem": 5.792331695556641, # "proc": 4, # "temperature": 26 # }, # { # "key": "gpu_id", # "gpu_id": 1, # "name": "Fake GeForce GTX2", # "mem": 15, # "proc": 8, # "temperature": 65 # } # ] return self.stats if self.input_method == 'local': stats = self.get_device_stats() elif self.input_method == 'snmp': # not available pass # Update the stats self.stats = stats return self.stats def update_views(self): """Update stats views.""" # Call the father's method super(Plugin, self).update_views() # Add specifics information # Alert for i in self.stats: # Init the views for the current GPU self.views[i[self.get_key()]] = {'proc': {}, 'mem': {}, 'temperature': {}} # Processor alert if 'proc' in i: alert = self.get_alert(i['proc'], header='proc') self.views[i[self.get_key()]]['proc']['decoration'] = alert # Memory alert if 'mem' in i: alert = self.get_alert(i['mem'], header='mem') self.views[i[self.get_key()]]['mem']['decoration'] = alert # Temperature alert if 'temperature' in i: alert = self.get_alert(i['temperature'], header='temperature') self.views[i[self.get_key()]]['temperature']['decoration'] = alert return True def msg_curse(self, args=None, max_width=None): """Return the dict to display in the curse interface.""" # Init the return message ret = [] # Only process if stats exist, not empty (issue #871) and plugin not disabled if not self.stats or (self.stats == []) or self.is_disabled(): return ret # Check if all GPU have the same name same_name = all(s['name'] == self.stats[0]['name'] for s in self.stats) # gpu_stats contain the first GPU in the list gpu_stats = self.stats[0] # Header header = '' if len(self.stats) > 1: header += '{} '.format(len(self.stats)) if same_name: header += '{} {}'.format('GPU', gpu_stats['name']) else: header += '{}'.format('GPU') msg = header[:17] ret.append(self.curse_add_line(msg, "TITLE")) # Build the string message if len(self.stats) == 1 or args.meangpu: # GPU stat summary or mono GPU # New line ret.append(self.curse_new_line()) # GPU PROC try: mean_proc = sum(s['proc'] for s in self.stats if s is not None) / len(self.stats) except TypeError: mean_proc_msg = '{:>4}'.format('N/A') else: mean_proc_msg = '{:>3.0f}%'.format(mean_proc) if len(self.stats) > 1: msg = '{:13}'.format('proc mean:') else: msg = '{:13}'.format('proc:') ret.append(self.curse_add_line(msg)) ret.append( self.curse_add_line( mean_proc_msg, self.get_views(item=gpu_stats[self.get_key()], key='proc', option='decoration') ) ) # New line ret.append(self.curse_new_line()) # GPU MEM try: mean_mem = sum(s['mem'] for s in self.stats if s is not None) / len(self.stats) except TypeError: mean_mem_msg = '{:>4}'.format('N/A') else: mean_mem_msg = '{:>3.0f}%'.format(mean_mem) if len(self.stats) > 1: msg = '{:13}'.format('mem mean:') else: msg = '{:13}'.format('mem:') ret.append(self.curse_add_line(msg)) ret.append( self.curse_add_line( mean_mem_msg, self.get_views(item=gpu_stats[self.get_key()], key='mem', option='decoration') ) ) # New line ret.append(self.curse_new_line()) # GPU TEMPERATURE try: mean_temperature = sum(s['temperature'] for s in self.stats if s is not None) / len(self.stats) except TypeError: mean_temperature_msg = '{:>4}'.format('N/A') else: unit = 'C' if args.fahrenheit: mean_temperature = to_fahrenheit(mean_temperature) unit = 'F' mean_temperature_msg = '{:>3.0f}{}'.format(mean_temperature, unit) if len(self.stats) > 1: msg = '{:13}'.format('temp mean:') else: msg = '{:13}'.format('temperature:') ret.append(self.curse_add_line(msg)) ret.append( self.curse_add_line( mean_temperature_msg, self.get_views(item=gpu_stats[self.get_key()], key='temperature', option='decoration'), ) ) else: # Multi GPU # Temperature is not displayed in this mode... for gpu_stats in self.stats: # New line ret.append(self.curse_new_line()) # GPU ID + PROC + MEM + TEMPERATURE id_msg = '{}'.format(gpu_stats['gpu_id']) try: proc_msg = '{:>3.0f}%'.format(gpu_stats['proc']) except (ValueError, TypeError): proc_msg = '{:>4}'.format('N/A') try: mem_msg = '{:>3.0f}%'.format(gpu_stats['mem']) except (ValueError, TypeError): mem_msg = '{:>4}'.format('N/A') msg = '{}: {} mem: {}'.format(id_msg, proc_msg, mem_msg) ret.append(self.curse_add_line(msg)) return ret def get_device_stats(self): """Get GPU stats.""" stats = [] for index, device_handle in enumerate(self.device_handles): device_stats = dict() # Dictionary key is the GPU_ID device_stats['key'] = self.get_key() # GPU id (for multiple GPU, start at 0) device_stats['gpu_id'] = index # GPU name device_stats['name'] = get_device_name(device_handle) # Memory consumption in % (not available on all GPU) device_stats['mem'] = get_mem(device_handle) # Processor consumption in % device_stats['proc'] = get_proc(device_handle) # Processor temperature in °C device_stats['temperature'] = get_temperature(device_handle) stats.append(device_stats) return stats def exit(self): """Overwrite the exit method to close the GPU API.""" if self.nvml_ready: try: pynvml.nvmlShutdown() except Exception as e: logger.debug("pynvml failed to shutdown correctly ({})".format(e)) # Call the father exit method super(Plugin, self).exit() def get_device_handles(): """Get a list of NVML device handles, one per device. Can throw NVMLError. """ return [pynvml.nvmlDeviceGetHandleByIndex(i) for i in range(pynvml.nvmlDeviceGetCount())] def get_device_name(device_handle): """Get GPU device name.""" try: return nativestr(pynvml.nvmlDeviceGetName(device_handle)) except pynvml.NVMlError: return "NVIDIA" def get_mem(device_handle): """Get GPU device memory consumption in percent.""" try: memory_info = pynvml.nvmlDeviceGetMemoryInfo(device_handle) return memory_info.used * 100.0 / memory_info.total except pynvml.NVMLError: return None def get_proc(device_handle): """Get GPU device CPU consumption in percent.""" try: return pynvml.nvmlDeviceGetUtilizationRates(device_handle).gpu except pynvml.NVMLError: return None def get_temperature(device_handle): """Get GPU device CPU consumption in percent.""" try: return pynvml.nvmlDeviceGetTemperature(device_handle, pynvml.NVML_TEMPERATURE_GPU) except pynvml.NVMLError: return None