Skip to content

Commit

Permalink
Show nvidia-pynvml compatibility warnings for #161
Browse files Browse the repository at this point in the history
NVIDIA 535.43, 535.86 can display process information correctly only
with nvidia-ml-py==12.535.77. Display an warning message when an
incompatible combination is detected.

See #161 for more details.
  • Loading branch information
wookayin committed Oct 30, 2023
1 parent ed69c2d commit 038923d
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 0 deletions.
3 changes: 3 additions & 0 deletions gpustat/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@

import gpustat.util as util
from gpustat.nvml import pynvml as N
from gpustat.nvml import check_driver_nvml_version

NOT_SUPPORTED = 'Not Supported'
MB = 1024 * 1024
Expand Down Expand Up @@ -613,8 +614,10 @@ def _wrapped(*args, **kwargs):
gpu_list.append(gpu_stat)

# 2. additional info (driver version, etc).
# TODO: check this only once, no need to call multiple times
try:
driver_version = _decode(N.nvmlSystemGetDriverVersion())
check_driver_nvml_version(driver_version)
except N.NVMLError as e:
log.add_exception("driver_version", e)
driver_version = None # N/A
Expand Down
30 changes: 30 additions & 0 deletions gpustat/nvml.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,36 @@
""")) from e


class NvidiaCompatibilityWarning(UserWarning):
pass


def check_driver_nvml_version(driver_version_str: str):
"""Make warnings when incompatible driver is used."""
driver_version = tuple(int(c) for c in
driver_version_str.strip().split("."))
is_pynvml_535_77 = hasattr(pynvml.c_nvmlProcessInfo_t, 'usedGpuCcProtectedMemory')

if (535, 43) <= driver_version < (535, 86):
# See #161: these are buggy, gives wrong process information
# except for nvidia-ml-py == 12.535.77 (which is a buggy version too).
# Note: NVIDIA 535.86+ and nvidia-ml-py 12.535.108+ fixes the bug
if not is_pynvml_535_77:
warnings.warn(
f"This version of NVIDIA Driver {driver_version_str} is incompatible, "
"process information will be inaccurate. "
"Upgrade the NVIDIA driver to 535.104.05 or higher. "
"See https://github.com/wookayin/gpustat/issues/161 for more details.",
category=NvidiaCompatibilityWarning, stacklevel=2)
else:
if is_pynvml_535_77: # pynvml 12.535.77 should not be used
warnings.warn(
"This version of nvidia-ml-py (possibly 12.535.77) is incompatible. "
"Please upgrade nvidia-ml-py to the latest version. "
"(pip install --upgrade --force-reinstall nvidia-ml-py)",
category=NvidiaCompatibilityWarning, stacklevel=2)


# Monkey-patch nvml due to breaking changes in pynvml.
# See #107, #141, and test_gpustat.py for more details.

Expand Down

0 comments on commit 038923d

Please sign in to comment.