rapidsai · rapids-bot · Apr 22, 2025 · Apr 21, 2025 · Apr 21, 2025 · Apr 21, 2025
@@ -141,4 +141,30 @@ inline bool is_c_contiguous(DLManagedTensor* managed_tensor)
   return true;
 }
 
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-function"
+static void free_dlmanaged_tensor_shape(DLManagedTensor* tensor)
+{
+  delete[] tensor->dl_tensor.shape;
+}
+#pragma GCC diagnostic pop
+
+template <typename MdspanType, typename = raft::is_mdspan_t<MdspanType>>
+static void to_dlpack(MdspanType src, DLManagedTensor* dst)
+{
+  auto tensor = &dst->dl_tensor;
+
+  tensor->dtype  = data_type_to_DLDataType<typename MdspanType::value_type>();
+  tensor->device = accessor_type_to_DLDevice<typename MdspanType::accessor_type>();
+  tensor->ndim   = MdspanType::extents_type::rank();
+  tensor->data   = src.data_handle();
+
+  tensor->shape = new int64_t[tensor->ndim];
+  dst->deleter  = free_dlmanaged_tensor_shape;
+
+  for (int64_t i = 0; i < tensor->ndim; ++i) {
+    tensor->shape[i] = src.extent(i);
+  }
+}
+
 }  // namespace cuvs::core::detail
@@ -68,7 +68,7 @@ inline bool is_c_contiguous(DLManagedTensor* tensor) { return detail::is_c_conti
 inline bool is_f_contiguous(DLManagedTensor* tensor) { return detail::is_f_contiguous(tensor); }
 
 /**
- * @brief Convert a DLManagedTensor to an mdspan
+ * @brief Convert a DLManagedTensor to a mdspan
  * NOTE: This function only supports compact row-major and col-major layouts.
  *
  * @code {.cpp}
@@ -93,6 +93,19 @@ inline MdspanType from_dlpack(DLManagedTensor* managed_tensor)
   return detail::from_dlpack<MdspanType>(managed_tensor);
 }
 
+/**
+ * @brief Convert a mdspan to a DLManagedTensor
+ *
+ * Converts a mdspan to a DLManagedTensor object. This lets us pass non-owning
+ * views from C++ to C code without copying.  Note that returned DLManagedTensor
+ * is a non-owning view, and doesn't ensure that the underlying memory stays valid.
+ */
+template <typename MdspanType, typename = raft::is_mdspan_t<MdspanType>>
+void to_dlpack(MdspanType src, DLManagedTensor* dst)
+{
+  return detail::to_dlpack(src, dst);
+}
+
 /**
  * @}
  */

@@ -171,11 +171,29 @@ cuvsError_t cuvsNNDescentBuild(cuvsResources_t res,
 /**
  * @brief Get the KNN graph from a built NN-Descent index
  *
+ * @param[in] res cuvsResources_t opaque C handle
  * @param[in] index cuvsNNDescentIndex_t Built NN-Descent index
- * @param[inout] graph Optional preallocated graph on host memory to store output
+ * @param[out] graph Preallocated graph on host memory to store output
+ * @return cuvsError_t
+ */
+cuvsError_t cuvsNNDescentIndexGetGraph(cuvsResources_t res,
+                                       cuvsNNDescentIndex_t index,
+                                       DLManagedTensor* graph);
+
+/**
+ * @brief Get the distances from a build NN_Descent index
+ *
+ * This requires that the `return_distances` parameter was set when building the
+ * graph
+ *
+ * @param[in] res cuvsResources_t opaque C handle
+ * @param[in] index cuvsNNDescentIndex_t Built NN-Descent index
+ * @param[out] distances Preallocated memory to store the output distances tensor
  * @return cuvsError_t
  */
-cuvsError_t cuvsNNDescentIndexGetGraph(cuvsNNDescentIndex_t index, DLManagedTensor* graph);
+cuvsError_t cuvsNNDescentIndexGetDistances(cuvsResources_t res,
+                                           cuvsNNDescentIndex_t index,
+                                           DLManagedTensor* distances);
 #ifdef __cplusplus
 }
 #endif
@@ -71,6 +71,57 @@ void* _build(cuvsResources_t res,
     RAFT_FAIL("dataset must be accessible on host or device memory");
   }
 }
+
+template <typename output_mdspan_type>
+void _get_graph(cuvsResources_t res, cuvsNNDescentIndex_t index, DLManagedTensor* graph)
+{
+  auto dtype = index->dtype;
+  if ((dtype.code == kDLUInt) && (dtype.bits == 32)) {
+    auto index_ptr = reinterpret_cast<cuvs::neighbors::nn_descent::index<uint32_t>*>(index->addr);
+    auto dst       = cuvs::core::from_dlpack<output_mdspan_type>(graph);
+    auto src       = index_ptr->graph();
+    auto res_ptr   = reinterpret_cast<raft::resources*>(res);
+
+    RAFT_EXPECTS(src.extent(0) == dst.extent(0), "Output graph has incorrect number of rows");
+    RAFT_EXPECTS(src.extent(1) == dst.extent(1), "Output graph has incorrect number of cols");
+
+    cudaMemcpyAsync(dst.data_handle(),
+                    src.data_handle(),
+                    dst.extent(0) * dst.extent(1) * sizeof(uint32_t),
+                    cudaMemcpyDefault,
+                    raft::resource::get_cuda_stream(*res_ptr));
+  } else {
+    RAFT_FAIL("Unsupported nn-descent index dtype: %d and bits: %d", dtype.code, dtype.bits);
+  }
+}
+
+template <typename output_mdspan_type>
+void _get_distances(cuvsResources_t res, cuvsNNDescentIndex_t index, DLManagedTensor* distances)
+{
+  auto dtype = index->dtype;
+  if ((dtype.code == kDLUInt) && (dtype.bits == 32)) {
+    auto index_ptr = reinterpret_cast<cuvs::neighbors::nn_descent::index<uint32_t>*>(index->addr);
+    auto src       = index_ptr->distances();
+    if (!src.has_value()) {
+      RAFT_FAIL("nn-descent index doesn't contain distances - set return_distances when building");
+    }
+
+    auto res_ptr = reinterpret_cast<raft::resources*>(res);
+    auto dst     = cuvs::core::from_dlpack<output_mdspan_type>(distances);
+
+    RAFT_EXPECTS(src->extent(0) == dst.extent(0), "Output distances has incorrect number of rows");
+    RAFT_EXPECTS(src->extent(1) == dst.extent(1), "Output distances has incorrect number of cols");
+
+    cudaMemcpyAsync(dst.data_handle(),
+                    src->data_handle(),
+                    dst.extent(0) * dst.extent(1) * sizeof(float),
+                    cudaMemcpyDefault,
+                    raft::resource::get_cuda_stream(*res_ptr));
+
+  } else {
+    RAFT_FAIL("Unsupported nn-descent index dtype: %d and bits: %d", dtype.code, dtype.bits);
+  }
+}
 }  // namespace
 
 extern "C" cuvsError_t cuvsNNDescentIndexCreate(cuvsNNDescentIndex_t* index)
@@ -146,22 +197,32 @@ extern "C" cuvsError_t cuvsNNDescentIndexParamsDestroy(cuvsNNDescentIndexParams_
   return cuvs::core::translate_exceptions([=] { delete params; });
 }
 
-extern "C" cuvsError_t cuvsNNDescentIndexGetGraph(cuvsNNDescentIndex_t index,
+extern "C" cuvsError_t cuvsNNDescentIndexGetGraph(cuvsResources_t res,
+                                                  cuvsNNDescentIndex_t index,
                                                   DLManagedTensor* graph)
 {
   return cuvs::core::translate_exceptions([=] {
-    auto dtype = index->dtype;
-    if ((dtype.code == kDLUInt) && (dtype.bits == 32)) {
-      auto index_ptr = reinterpret_cast<cuvs::neighbors::nn_descent::index<uint32_t>*>(index->addr);
+    if (cuvs::core::is_dlpack_device_compatible(graph->dl_tensor)) {
+      using output_mdspan_type = raft::device_matrix_view<uint32_t, int64_t, raft::row_major>;
+      _get_graph<output_mdspan_type>(res, index, graph);
+    } else {
       using output_mdspan_type = raft::host_matrix_view<uint32_t, int64_t, raft::row_major>;
-      auto dst                 = cuvs::core::from_dlpack<output_mdspan_type>(graph);
-      auto src                 = index_ptr->graph();
+      _get_graph<output_mdspan_type>(res, index, graph);
+    }
+  });
+}
 
-      RAFT_EXPECTS(src.extent(0) == dst.extent(0), "Output graph has incorrect number of rows");
-      RAFT_EXPECTS(src.extent(1) == dst.extent(1), "Output graph has incorrect number of cols");
-      std::copy(src.data_handle(), src.data_handle() + dst.size(), dst.data_handle());
+extern "C" cuvsError_t cuvsNNDescentIndexGetDistances(cuvsResources_t res,
+                                                      cuvsNNDescentIndex_t index,
+                                                      DLManagedTensor* distances)
+{
+  return cuvs::core::translate_exceptions([=] {
+    if (cuvs::core::is_dlpack_device_compatible(distances->dl_tensor)) {
+      using output_mdspan_type = raft::device_matrix_view<float, int64_t, raft::row_major>;
+      _get_distances<output_mdspan_type>(res, index, distances);
     } else {
-      RAFT_FAIL("Unsupported nn-descent index dtype: %d and bits: %d", dtype.code, dtype.bits);
+      using output_mdspan_type = raft::host_matrix_view<float, int64_t, raft::row_major>;
+      _get_distances<output_mdspan_type>(res, index, distances);
     }
   });
 }
@@ -53,9 +53,14 @@ cdef extern from "cuvs/neighbors/nn_descent.h" nogil:
 
     cuvsError_t cuvsNNDescentIndexDestroy(cuvsNNDescentIndex_t index)
 
-    cuvsError_t cuvsNNDescentIndexGetGraph(cuvsNNDescentIndex_t index,
+    cuvsError_t cuvsNNDescentIndexGetGraph(cuvsResources_t res,
+                                           cuvsNNDescentIndex_t index,
                                            DLManagedTensor * output)
 
+    cuvsError_t cuvsNNDescentIndexGetDistances(cuvsResources_t res,
+                                               cuvsNNDescentIndex_t index,
+                                               DLManagedTensor * output)
+
     cuvsError_t cuvsNNDescentBuild(cuvsResources_t res,
                                    cuvsNNDescentIndexParams* params,
                                    DLManagedTensor* dataset,

@@ -88,7 +88,8 @@ cdef class IndexParams:
                  intermediate_graph_degree=None,
                  max_iterations=None,
                  termination_threshold=None,
-                 n_clusters=None
+                 n_clusters=None,
+                 return_distances=None
                  ):
         if metric is not None:
             self.params.metric = <cuvsDistanceType>DISTANCE_TYPES[metric]
@@ -102,11 +103,8 @@ cdef class IndexParams:
             self.params.termination_threshold = termination_threshold
         if n_clusters is not None:
             self.params.n_clusters = n_clusters
-
-        # setting this parameter to true will cause an exception in the c++
-        # api (`Using return_distances set to true requires distance view to
-        # be allocated.`) - so instead force to be false here
-        self.params.return_distances = False
+        if return_distances is not None:
+            self.params.return_distances = return_distances
 
     @property
     def metric(self):
@@ -163,13 +161,39 @@ cdef class Index:
 
     @property
     def graph(self):
+        return self._get_graph()
+
+    @property
+    def distances(self):
+        return self._get_distances()
+
+    @auto_sync_resources
+    def _get_graph(self, resources=None):
         if not self.trained:
             raise ValueError("Index needs to be built before getting graph")
 
+        cdef cuvsResources_t res = <cuvsResources_t>resources.get_c_obj()
+
         output = np.empty((self.num_rows, self.graph_degree), dtype='uint32')
         ai = wrap_array(output)
         cdef cydlpack.DLManagedTensor* output_dlpack = cydlpack.dlpack_c(ai)
-        check_cuvs(cuvsNNDescentIndexGetGraph(self.index, output_dlpack))
+        check_cuvs(cuvsNNDescentIndexGetGraph(res, self.index, output_dlpack))
+        return output
+
+    @auto_sync_resources
+    def _get_distances(self, resources=None):
+        if not self.trained:
+            raise ValueError("Index needs to be built before getting"
+                             " distances")
+
+        cdef cuvsResources_t res = <cuvsResources_t>resources.get_c_obj()
+
+        output = np.empty((self.num_rows, self.graph_degree), dtype='float32')
+        ai = wrap_array(output)
+        cdef cydlpack.DLManagedTensor* output_dlpack = cydlpack.dlpack_c(ai)
+        check_cuvs(cuvsNNDescentIndexGetDistances(res,
+                                                  self.index,
+                                                  output_dlpack))
         return output
 
     def __repr__(self):
@@ -221,6 +245,16 @@ def build(IndexParams index_params, dataset, graph=None, resources=None):
 
     cdef cydlpack.DLManagedTensor* graph_dlpack = NULL
     if graph is not None:
+        if params.return_distances:
+            # When using a pre-existing graph - having return_distances set to
+            # true will cause an exception in the C++ api
+            # (`Using return_distances set to true requires distance view to
+            # be allocated.`). Raise a more informative error here instead of
+            # the C++ exception
+            raise ValueError("Can't use return_distances with an existing"
+                             " graph. Either set params.return_distances to"
+                             " False, or set graph to None")
+
         graph_ai = wrap_array(graph)
         graph_dlpack = cydlpack.dlpack_c(graph_ai)
 

@@ -26,15 +26,27 @@
 @pytest.mark.parametrize("device_memory", [True, False])
 @pytest.mark.parametrize("dtype", [np.float32])
 @pytest.mark.parametrize("inplace", [True, False])
-def test_nn_descent(n_rows, n_cols, device_memory, dtype, inplace):
+@pytest.mark.parametrize("return_distances", [True, False])
+def test_nn_descent(
+    n_rows, n_cols, device_memory, dtype, inplace, return_distances
+):
+    # because of a limitation in the c++ api, we can't both return the
+    # distances and have an inplace graph
+    if inplace and return_distances:
+        pytest.skip("Can't return distances with an inplace graph")
+
     metric = "sqeuclidean"
     graph_degree = 64
 
     input1 = np.random.random_sample((n_rows, n_cols)).astype(dtype)
     input1_device = device_ndarray(input1)
     graph = np.zeros((n_rows, graph_degree), dtype="uint32")
 
-    params = nn_descent.IndexParams(metric=metric, graph_degree=graph_degree)
+    params = nn_descent.IndexParams(
+        metric=metric,
+        graph_degree=graph_degree,
+        return_distances=return_distances,
+    )
     index = nn_descent.build(
         params,
         input1_device if device_memory else input1,
@@ -50,4 +62,8 @@ def test_nn_descent(n_rows, n_cols, device_memory, dtype, inplace):
     )
     bfknn_graph = bfknn_graph.copy_to_host()
 
+    if return_distances:
+        distances = index.distances
+        assert distances.shape == graph.shape
+
     assert calc_recall(graph, bfknn_graph) > 0.9