From 67f2df10556030b6798e2b0d97aa3b5c97331032 Mon Sep 17 00:00:00 2001 From: rjzamora Date: Tue, 4 Oct 2022 08:16:45 -0700 Subject: [PATCH 1/5] avoid numba context in import --- merlin/core/compat.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/merlin/core/compat.py b/merlin/core/compat.py index 4c70179e1..594abc7d8 100644 --- a/merlin/core/compat.py +++ b/merlin/core/compat.py @@ -13,13 +13,22 @@ # See the License for the specific language governing permissions and # limitations under the License. # -HAS_GPU = False try: from numba import cuda - try: - HAS_GPU = len(cuda.gpus.lst) > 0 - except cuda.cudadrv.error.CudaSupportError: - pass except ImportError: cuda = None + +HAS_GPU = False +try: + import dask_cuda + + HAS_GPU = dask_cuda.utils.get_gpu_count() +except ImportError: + # Don't let numba.cuda set the context + # unless dask_cuda is not installed + if cuda is not None: + try: + HAS_GPU = len(cuda.gpus.lst) > 0 + except cuda.cudadrv.error.CudaSupportError: + pass From d6fb5a928876832391d287460be1f0762ab70a4e Mon Sep 17 00:00:00 2001 From: rjzamora Date: Tue, 4 Oct 2022 10:44:51 -0700 Subject: [PATCH 2/5] use pynvml instead of dask-cuda --- merlin/core/compat.py | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/merlin/core/compat.py b/merlin/core/compat.py index 594abc7d8..900785408 100644 --- a/merlin/core/compat.py +++ b/merlin/core/compat.py @@ -19,16 +19,13 @@ except ImportError: cuda = None -HAS_GPU = False try: - import dask_cuda + import pynvml - HAS_GPU = dask_cuda.utils.get_gpu_count() + try: + pynvml.nvmlInit() + HAS_GPU = pynvml.nvmlDeviceGetCount() > 0 + except pynvml.nvml.NVMLError_LibraryNotFound: + HAS_GPU = False except ImportError: - # Don't let numba.cuda set the context - # unless dask_cuda is not installed - if cuda is not None: - try: - HAS_GPU = len(cuda.gpus.lst) > 0 - except cuda.cudadrv.error.CudaSupportError: - pass + HAS_GPU = False From 18c42f3fbf138b1429ff1d6944cddc31888c7b1c Mon Sep 17 00:00:00 2001 From: rjzamora Date: Tue, 4 Oct 2022 10:46:38 -0700 Subject: [PATCH 3/5] reduce diff --- merlin/core/compat.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/merlin/core/compat.py b/merlin/core/compat.py index 900785408..182822b0c 100644 --- a/merlin/core/compat.py +++ b/merlin/core/compat.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # +HAS_GPU = False try: from numba import cuda @@ -26,6 +27,6 @@ pynvml.nvmlInit() HAS_GPU = pynvml.nvmlDeviceGetCount() > 0 except pynvml.nvml.NVMLError_LibraryNotFound: - HAS_GPU = False + pass except ImportError: - HAS_GPU = False + pass From e9e750bbbe88eebd840375d415949cb98b4a7a87 Mon Sep 17 00:00:00 2001 From: rjzamora Date: Tue, 4 Oct 2022 10:59:43 -0700 Subject: [PATCH 4/5] pylint --- merlin/core/compat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/merlin/core/compat.py b/merlin/core/compat.py index 182822b0c..cb187752a 100644 --- a/merlin/core/compat.py +++ b/merlin/core/compat.py @@ -15,7 +15,7 @@ # HAS_GPU = False try: - from numba import cuda + from numba import cuda # pylint: disable=unused-import except ImportError: cuda = None From 9efdbf9e0d652fe1b1c68fb206733625a75c9af5 Mon Sep 17 00:00:00 2001 From: rjzamora Date: Tue, 4 Oct 2022 11:26:25 -0700 Subject: [PATCH 5/5] use distributed.nvml --- merlin/core/compat.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/merlin/core/compat.py b/merlin/core/compat.py index cb187752a..d372f8a73 100644 --- a/merlin/core/compat.py +++ b/merlin/core/compat.py @@ -13,20 +13,19 @@ # See the License for the specific language governing permissions and # limitations under the License. # -HAS_GPU = False try: - from numba import cuda # pylint: disable=unused-import + from numba import cuda except ImportError: cuda = None +HAS_GPU = False try: - import pynvml + from dask.distributed.diagnostics import nvml - try: - pynvml.nvmlInit() - HAS_GPU = pynvml.nvmlDeviceGetCount() > 0 - except pynvml.nvml.NVMLError_LibraryNotFound: - pass + HAS_GPU = nvml.device_get_count() > 0 except ImportError: - pass + # We can use `cuda` to set `HAS_GPU` now that we + # know `distributed` is not installed (otherwise + # the `nvml` import would have succeeded) + HAS_GPU = cuda is not None