NVIDIA · mdboom · Jan 22, 2026 · Jan 16, 2026 · Jan 16, 2026 · Jan 16, 2026
diff --git a/cuda_core/cuda/core/_device.pyx b/cuda_core/cuda/core/_device.pyx
@@ -1034,6 +1034,29 @@ class Device:
         total = system.get_num_devices()
         return tuple(cls(device_id) for device_id in range(total))
 
+    def to_system_device(self) -> 'cuda.core.system.Device':
+        """
+        Get the corresponding :class:`cuda.core.system.Device` (which is used
+        for NVIDIA Machine Library (NVML) access) for this
+        :class:`cuda.core.Device` (which is used for CUDA access).
+
+        The devices are mapped to one another by their UUID.
+
+        Returns
+        -------
+        cuda.core.system.Device
+            The corresponding system-level device instance used for NVML access.
+        """
+        from cuda.core.system._system import CUDA_BINDINGS_NVML_IS_COMPATIBLE
+
+        if not CUDA_BINDINGS_NVML_IS_COMPATIBLE:
+            raise RuntimeError(
+                "cuda.core.system.Device requires cuda_bindings 13.1.2+ or 12.9.6+"
+            )
+
+        from cuda.core.system import Device as SystemDevice
+        return SystemDevice(uuid=self.uuid)
+
     @property
     def device_id(self) -> int:
         """Return device ordinal."""

diff --git a/cuda_core/cuda/core/system/_device.pyx b/cuda_core/cuda/core/system/_device.pyx
@@ -722,6 +722,30 @@ cdef class Device:
                 pci_bus_id = pci_bus_id.decode("ascii")
             self._handle = nvml.device_get_handle_by_pci_bus_id_v2(pci_bus_id)
 
+    def to_cuda_device(self) -> "cuda.core.Device":
+        """
+        Get the corresponding :class:`cuda.core.Device` (which is used for CUDA
+        access) for this :class:`cuda.core.system.Device` (which is used for
+        NVIDIA machine library (NVML) access).
+
+        The devices are mapped to one another by their UUID.
+
+        Returns
+        -------
+        cuda.core.Device
+            The corresponding CUDA device.
+        """
+        from cuda.core import Device as CudaDevice
+
+        # CUDA does not have an API to get a device by its UUID, so we just
+        # search all the devices for one with a matching UUID.
+
+        for cuda_device in CudaDevice.get_all_devices():
+            if cuda_device.uuid == self.uuid:
+                return cuda_device
+
+        raise RuntimeError("No corresponding CUDA device found for this NVML device.")
+
     @classmethod
     def get_device_count(cls) -> int:
         """
@@ -1036,8 +1060,16 @@ cdef class Device:
         Retrieves the globally unique immutable UUID associated with this
         device, as a 5 part hexadecimal string, that augments the immutable,
         board serial identifier.
+
+        In the upstream NVML C++ API, the UUID includes a ``gpu-`` or ``mig-``
+        prefix.  That is not included in ``cuda.core.system``.
         """
-        return nvml.device_get_uuid(self._handle)
+        # NVML UUIDs have a `GPU-` or `MIG-` prefix.  We remove that here.
+
+        # TODO: If the user cares about the prefix, we will expose that in the
+        # future using the MIG-related APIs in NVML.
+
+        return nvml.device_get_uuid(self._handle)[4:]
 
     def register_events(self, events: EventType | int | list[EventType | int]) -> DeviceEvents:
         """

diff --git a/cuda_core/tests/system/test_system_device.py b/cuda_core/tests/system/test_system_device.py
@@ -33,6 +33,23 @@ def test_device_count():
     assert system.Device.get_device_count() == system.get_num_devices()
 
 
+def test_to_cuda_device():
+    from cuda.core import Device as CudaDevice
+
+    for device in system.Device.get_all_devices():
+        cuda_device = device.to_cuda_device()
+
+        assert isinstance(cuda_device, CudaDevice)
+        assert cuda_device.uuid == device.uuid
+
+        # Technically, this test will only work with PCI devices, but are there
+        # non-PCI devices we need to support?
+
+        # CUDA only returns a 2-byte PCI bus ID domain, whereas NVML returns a
+        # 4-byte domain
+        assert cuda_device.pci_bus_id == device.pci_info.bus_id[4:]
+
+
 def test_device_architecture():
     for device in system.Device.get_all_devices():
         device_arch = device.architecture

diff --git a/cuda_core/tests/test_device.py b/cuda_core/tests/test_device.py
@@ -25,6 +25,30 @@ def cuda_version():
     return _py_major_ver, _driver_ver
 
 
+def test_to_system_device(deinit_cuda):
+    from cuda.core.system import _system
+
+    device = Device()
+
+    if not _system.CUDA_BINDINGS_NVML_IS_COMPATIBLE:
+        with pytest.raises(RuntimeError):
+            device.to_system_device()
+        pytest.skip("NVML support requires cuda.bindings version 12.9.6+ or 13.1.2+")
+
+    from cuda.core.system import Device as SystemDevice
+
+    system_device = device.to_system_device()
+    assert isinstance(system_device, SystemDevice)
+    assert system_device.uuid == device.uuid
+
+    # Technically, this test will only work with PCI devices, but are there
+    # non-PCI devices we need to support?
+
+    # CUDA only returns a 2-byte PCI bus ID domain, whereas NVML returns a
+    # 4-byte domain
+    assert device.pci_bus_id == system_device.pci_info.bus_id[4:]
+
+
 def test_device_set_current(deinit_cuda):
     device = Device()
     device.set_current()