Merge pull request #1518 from rapidsai/branch-0.19

GPUtester · web-flow · commit fb14030b632f · 2021-04-07T11:11:31.000-04:00
[gpuCI] Forward-merge branch-0.19 to branch-0.20 [skip ci]
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
@@ -402,6 +402,7 @@ endif(BUILD_STATIC_FAISS)
 add_library(cugraph SHARED
     src/utilities/spmv_1D.cu
     src/utilities/cython.cu
+    src/utilities/path_retrieval.cu
     src/structure/graph.cu
     src/linear_assignment/hungarian.cu
     src/link_analysis/gunrock_hits.cpp
diff --git a/cpp/include/utilities/path_retrieval.hpp b/cpp/include/utilities/path_retrieval.hpp
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <raft/handle.hpp>
+
+namespace cugraph {
+
+/**
+ * @brief Takes the results of BFS or SSSP function call and sums the given
+ * weights along the path to the starting vertex.
+ *
+ * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type.
+ * @tparam weight_t Type of edge weights. Needs to be a floating point type.
+ * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and
+ * handles to various CUDA libraries) to run graph algorithms. Must have at least one worker stream.
+ * @param vertices Pointer to vertex ids.
+ * @param preds Pointer to predecessors.
+ * @param info_weights Secondary weights along the edge from predecessor to vertex.
+ * @param out Contains for each index the sum of weights along the path unfolding.
+ * @param num_vertices Number of vertices.
+ **/
+template <typename vertex_t, typename weight_t>
+void get_traversed_cost(raft::handle_t const &handle,
+                        vertex_t const *vertices,
+                        vertex_t const *preds,
+                        weight_t const *info_weights,
+                        weight_t *out,
+                        vertex_t stop_vertex,
+                        vertex_t num_vertices);
+}  // namespace cugraph
diff --git a/cpp/src/utilities/path_retrieval.cu b/cpp/src/utilities/path_retrieval.cu
@@ -0,0 +1,133 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <rmm/thrust_rmm_allocator.h>
+#include <rmm/device_uvector.hpp>
+
+#include <raft/handle.hpp>
+
+#include <utilities/error.hpp>
+#include <utilities/path_retrieval.hpp>
+
+namespace cugraph {
+namespace detail {
+
+template <typename vertex_t, typename weight_t>
+__global__ void get_traversed_cost_kernel(vertex_t const *vertices,
+                                          vertex_t const *preds,
+                                          vertex_t const *vtx_map,
+                                          weight_t const *info_weights,
+                                          weight_t *out,
+                                          vertex_t stop_vertex,
+                                          vertex_t num_vertices)
+{
+  for (vertex_t i = threadIdx.x + blockIdx.x * blockDim.x; i < num_vertices;
+       i += gridDim.x * blockDim.x) {
+    weight_t sum  = info_weights[i];
+    vertex_t pred = preds[i];
+    while (pred != stop_vertex) {
+      vertex_t pos = vtx_map[pred];
+      sum += info_weights[pos];
+      pred = preds[pos];
+    }
+    out[i] = sum;
+  }
+}
+
+template <typename vertex_t, typename weight_t>
+void get_traversed_cost_impl(raft::handle_t const &handle,
+                             vertex_t const *vertices,
+                             vertex_t const *preds,
+                             weight_t const *info_weights,
+                             weight_t *out,
+                             vertex_t stop_vertex,
+                             vertex_t num_vertices)
+{
+  auto stream          = handle.get_stream();
+  vertex_t max_blocks  = handle.get_device_properties().maxGridSize[0];
+  vertex_t max_threads = handle.get_device_properties().maxThreadsPerBlock;
+
+  dim3 nthreads, nblocks;
+  nthreads.x = std::min<vertex_t>(num_vertices, max_threads);
+  nthreads.y = 1;
+  nthreads.z = 1;
+  nblocks.x  = std::min<vertex_t>((num_vertices + nthreads.x - 1) / nthreads.x, max_blocks);
+  nblocks.y  = 1;
+  nblocks.z  = 1;
+
+  rmm::device_uvector<vertex_t> vtx_map_v(num_vertices, stream);
+  rmm::device_uvector<vertex_t> vtx_keys_v(num_vertices, stream);
+  vertex_t *vtx_map  = vtx_map_v.data();
+  vertex_t *vtx_keys = vtx_keys_v.data();
+  raft::copy(vtx_keys, vertices, num_vertices, stream);
+
+  thrust::sequence(rmm::exec_policy(stream)->on(stream), vtx_map, vtx_map + num_vertices);
+
+  thrust::stable_sort_by_key(
+    rmm::exec_policy(stream)->on(stream), vtx_keys, vtx_keys + num_vertices, vtx_map);
+
+  get_traversed_cost_kernel<<<nblocks, nthreads>>>(
+    vertices, preds, vtx_map, info_weights, out, stop_vertex, num_vertices);
+}
+}  // namespace detail
+
+template <typename vertex_t, typename weight_t>
+void get_traversed_cost(raft::handle_t const &handle,
+                        vertex_t const *vertices,
+                        vertex_t const *preds,
+                        weight_t const *info_weights,
+                        weight_t *out,
+                        vertex_t stop_vertex,
+                        vertex_t num_vertices)
+{
+  CUGRAPH_EXPECTS(num_vertices > 0, "num_vertices should be strictly positive");
+  CUGRAPH_EXPECTS(out != nullptr, "out should be of size num_vertices");
+  cugraph::detail::get_traversed_cost_impl(
+    handle, vertices, preds, info_weights, out, stop_vertex, num_vertices);
+}
+
+template void get_traversed_cost<int32_t, float>(raft::handle_t const &handle,
+                                                 int32_t const *vertices,
+                                                 int32_t const *preds,
+                                                 float const *info_weights,
+                                                 float *out,
+                                                 int32_t stop_vertex,
+                                                 int32_t num_vertices);
+
+template void get_traversed_cost<int32_t, double>(raft::handle_t const &handle,
+                                                  int32_t const *vertices,
+                                                  int32_t const *preds,
+                                                  double const *info_weights,
+                                                  double *out,
+                                                  int32_t stop_vertex,
+                                                  int32_t num_vertices);
+
+template void get_traversed_cost<int64_t, float>(raft::handle_t const &handle,
+                                                 int64_t const *vertices,
+                                                 int64_t const *preds,
+                                                 float const *info_weights,
+                                                 float *out,
+                                                 int64_t stop_vertex,
+                                                 int64_t num_vertices);
+
+template void get_traversed_cost<int64_t, double>(raft::handle_t const &handle,
+                                                  int64_t const *vertices,
+                                                  int64_t const *preds,
+                                                  double const *info_weights,
+                                                  double *out,
+                                                  int64_t stop_vertex,
+                                                  int64_t num_vertices);
+}  // namespace cugraph
diff --git a/python/cugraph/tests/test_utils.py b/python/cugraph/tests/test_utils.py
@@ -17,7 +17,9 @@
 import pytest
 
 import cugraph
+import cudf
 from cugraph.tests import utils
+import numpy as np
 
 
 def test_bfs_paths():
@@ -68,3 +70,29 @@ def test_bfs_paths_array():
         answer = cugraph.utils.get_traversed_path_list(df, 100)
 
         assert "not in the result set" in str(ErrorMsg)
+
+
+@pytest.mark.parametrize("graph_file", utils.DATASETS)
+def test_get_traversed_cost(graph_file):
+    cu_M = utils.read_csv_file(graph_file)
+
+    noise = cudf.Series(np.random.randint(10, size=(cu_M.shape[0])))
+    cu_M['info'] = cu_M['2'] + noise
+
+    G = cugraph.Graph()
+    G.from_cudf_edgelist(cu_M, source='0', destination='1', edge_attr='info')
+
+    # run SSSP starting at vertex 17
+    df = cugraph.sssp(G,  16)
+
+    answer = cugraph.utilities.path_retrieval.get_traversed_cost(df, 16,
+                                                                 cu_M['0'],
+                                                                 cu_M['1'],
+                                                                 cu_M['info']
+                                                                 )
+
+    df = df.sort_values(by='vertex').reset_index()
+    answer = answer.sort_values(by='vertex').reset_index()
+
+    assert df.shape[0] == answer.shape[0]
+    assert np.allclose(df['distance'], answer['info'])
diff --git a/python/cugraph/utilities/__init__.py b/python/cugraph/utilities/__init__.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2020, NVIDIA CORPORATION.
+# Copyright (c) 2019-2021, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -25,3 +25,4 @@
                                      is_cp_matrix_type,
                                      is_sp_matrix_type,
                                      )
+from cugraph.utilities.path_retrieval import get_traversed_cost
diff --git a/python/cugraph/utilities/path_retrieval.pxd b/python/cugraph/utilities/path_retrieval.pxd
@@ -0,0 +1,30 @@
+# Copyright (c) 2021, NVIDIA CORPORATION.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# cython: profile=False
+# distutils: language = c++
+# cython: embedsignature = True
+# cython: language_level = 3
+
+from cugraph.structure.graph_primtypes cimport *
+
+cdef extern from "utilities/path_retrieval.hpp" namespace "cugraph":
+
+    cdef void get_traversed_cost[vertex_t, weight_t](const handle_t &handle,
+            const vertex_t *vertices,
+            const vertex_t *preds,
+            const weight_t *info_weights,
+            weight_t *out,
+            vertex_t stop_vertex,
+            vertex_t num_vertices) except +
+
diff --git a/python/cugraph/utilities/path_retrieval.py b/python/cugraph/utilities/path_retrieval.py
@@ -0,0 +1,100 @@
+# Copyright (c) 2021, NVIDIA CORPORATION.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import numpy as np
+import cudf
+
+from cugraph.structure.symmetrize import symmetrize
+from cugraph.structure.number_map import NumberMap
+from cugraph.utilities import path_retrieval_wrapper
+
+
+def get_traversed_cost(df, source, source_col, dest_col, value_col):
+    """
+    Take the DataFrame result from a BFS or SSSP function call and sums
+    the given weights along the path to the starting vertex.
+    The source_col, dest_col identifiers need to match with the vertex and
+    predecessor columns of df.
+
+    Input Parameters
+    ----------
+    df : cudf.DataFrame
+        The dataframe containing the results of a BFS or SSSP call
+    source: int
+        Index of the source vertex.
+    source_col : cudf.DataFrame
+        This cudf.Series wraps a gdf_column of size E (E: number of edges).
+        The gdf column contains the source index for each edge.
+        Source indices must be an integer type.
+    dest_col : cudf.Series
+        This cudf.Series wraps a gdf_column of size E (E: number of edges).
+        The gdf column contains the destination index for each edge.
+        Destination indices must be an integer type.
+    value_col : cudf.Series
+        This cudf.Series wraps a gdf_column of size E (E: number of edges).
+        The gdf column contains values associated with this edge.
+        Weight should be a floating type.
+
+    Returns
+    ---------
+    df : cudf.DataFrame
+        DataFrame containing two columns 'vertex' and 'info'.
+        Unreachable vertices will have value the max value of the weight type.
+    """
+
+    if 'vertex' not in df.columns:
+        raise ValueError("DataFrame does not appear to be a BFS or "
+                         "SSP result - 'vertex' column missing")
+    if 'distance' not in df.columns:
+        raise ValueError("DataFrame does not appear to be a BFS or "
+                         "SSP result - 'distance' column missing")
+    if 'predecessor' not in df.columns:
+        raise ValueError("DataFrame does not appear to be a BFS or "
+                         "SSP result - 'predecessor' column missing")
+
+    src, dst, val = symmetrize(source_col,
+                               dest_col,
+                               value_col)
+
+    symmetrized_df = cudf.DataFrame()
+    symmetrized_df['source'] = src
+    symmetrized_df['destination'] = dst
+    symmetrized_df['weights'] = val
+
+    input_df = df.merge(symmetrized_df,
+                        left_on=['vertex', 'predecessor'],
+                        right_on=['source', 'destination'],
+                        how="left"
+                        )
+
+    # Set unreachable vertex weights to max float and source vertex weight to 0
+    max_val = np.finfo(val.dtype).max
+    input_df[['weights']] = input_df[['weights']].fillna(max_val)
+    input_df.loc[input_df['vertex'] == source, 'weights'] = 0
+
+    # Renumber
+    renumbered_gdf, renumber_map = NumberMap.renumber(input_df,
+                                                      ["vertex"],
+                                                      ["predecessor"],
+                                                      preserve_order=True)
+    renumbered_gdf = renumbered_gdf.rename(columns={'src': 'vertex',
+                                                    'dst': 'predecessor'})
+    stop_vertex = renumber_map.to_internal_vertex_id(cudf.Series(-1)).values[0]
+
+    out_df = path_retrieval_wrapper.get_traversed_cost(renumbered_gdf,
+                                                       stop_vertex)
+
+    # Unrenumber
+    out_df['vertex'] = renumber_map.unrenumber(renumbered_gdf, 'vertex',
+                                               preserve_order=True)["vertex"]
+    return out_df
diff --git a/python/cugraph/utilities/path_retrieval_wrapper.pyx b/python/cugraph/utilities/path_retrieval_wrapper.pyx