Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
98fd1b2
move the current implementation of mg neighborhood sampling to proto
May 18, 2022
063e443
remove experimental prefix
May 18, 2022
e6ed994
refactor mg neighborhood sampling bindings
May 18, 2022
4581645
add and test mechanism for creating graph with edge index as weight
ChuckHastings May 19, 2022
57680f6
Merge mechanism for creating graph with edge index as weight
May 19, 2022
16cea30
rename create*_with_ids to create*_with_edge_ids
ChuckHastings May 19, 2022
2fd99b5
rename create*_with_ids to create*_with_edge_ids from Chuck
May 19, 2022
3f90963
update python bindings to create graph with edge index as weight
May 20, 2022
d460193
fix bug in MG case... cugraph_ops function doesn't handle an empty re…
ChuckHastings May 20, 2022
c7d0a11
merge bug fix in MG case by Chuck
May 20, 2022
e6210a2
Merge remote-tracking branch 'upstream/branch-22.06' into branch-22.0…
May 23, 2022
3b76a49
add bindings for SG uniform_neighbor_sample
May 24, 2022
e3b5fe4
Merge remote-tracking branch 'upstream/branch-22.06' into branch-22.0…
May 24, 2022
9933120
remove debug print
May 24, 2022
604ab0f
update pylibcugraph uniform_neighbor_sample tests because of the API …
May 24, 2022
c579261
drop the directory proto
May 24, 2022
626a833
enable support for weigths
May 25, 2022
6d91c39
remove debug prints, address PR comments
May 26, 2022
f038688
move uniform_neighbor_sample to stable API, convert edge_ids to weigh…
May 27, 2022
d97ce67
update uniform neighborhood sampling tests
May 30, 2022
c9482c2
merge latest change and update branch
jnke2016 May 30, 2022
720b05d
remove uniform neighbor sample older mechanism
May 30, 2022
78f7dd6
add end of line
May 30, 2022
ce97653
resolve merge conflict
jnke2016 Jun 1, 2022
7fdc09d
remove merge labels
jnke2016 Jun 1, 2022
b629ca9
remove outdated fixme
jnke2016 Jun 1, 2022
8a8f063
remove unused import
jnke2016 Jun 1, 2022
1af8e7b
add end of line
Jun 1, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion python/cugraph/cugraph/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,13 @@
from raft import raft_include_test
from cugraph.dask.comms import comms

from cugraph.sampling import random_walks, rw_path, node2vec
from cugraph.sampling import (
random_walks,
rw_path,
node2vec,
uniform_neighbor_sample,
)


from cugraph import experimental

Expand Down
1 change: 1 addition & 0 deletions python/cugraph/cugraph/dask/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,5 @@
from .community.triangle_count import triangle_count
from .centrality.katz_centrality import katz_centrality
from .components.connectivity import weakly_connected_components
from .sampling.uniform_neighbor_sample import uniform_neighbor_sample
from .centrality.eigenvector_centrality import eigenvector_centrality
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,14 @@
import dask_cudf
import cudf

from pylibcugraph.experimental import (MGGraph,
ResourceHandle,
GraphProperties,
uniform_neighborhood_sampling,
)
from pylibcugraph import (ResourceHandle,
GraphProperties,
MGGraph
)

from pylibcugraph import \
uniform_neighbor_sample as pylibcugraph_uniform_neighbor_sample

from cugraph.dask.common.input_utils import get_distributed_data
from cugraph.dask.comms import comms as Comms

Expand All @@ -34,7 +37,6 @@ def call_nbr_sampling(sID,
num_edges,
do_expensive_check,
start_list,
info_list,
h_fan_out,
with_replacement):

Expand All @@ -59,36 +61,38 @@ def call_nbr_sampling(sID,
num_edges,
do_expensive_check)

ret_val = uniform_neighborhood_sampling(handle,
mg,
start_list,
info_list,
h_fan_out,
with_replacement,
do_expensive_check)
ret_val = pylibcugraph_uniform_neighbor_sample(handle,
mg,
start_list,
h_fan_out,
with_replacement,
do_expensive_check)
return ret_val


def convert_to_cudf(cp_arrays):
def convert_to_cudf(cp_arrays, weight_t):
"""
Creates a cudf DataFrame from cupy arrays from pylibcugraph wrapper
"""
cupy_sources, cupy_destinations, cupy_labels, cupy_indices = cp_arrays
# cupy_sources, cupy_destinations, cupy_labels, cupy_indices,
# cupy_counts = cp_arrays
cupy_sources, cupy_destinations, cupy_indices = cp_arrays

df = cudf.DataFrame()
df["sources"] = cupy_sources
df["destinations"] = cupy_destinations
df["labels"] = cupy_labels
df["indices"] = cupy_indices
# df["counts"] = cupy_counts

if weight_t == "int32":
df.indices = df.indices.astype("int32")
elif weight_t == "int64":
df.indices = df.indices.astype("int64")

return df


def EXPERIMENTAL__uniform_neighborhood(input_graph,
start_info_list,
fanout_vals,
with_replacement=True):
def uniform_neighbor_sample(input_graph,
start_list,
fanout_vals,
with_replacement=True):
"""
Does neighborhood sampling, which samples nodes from a graph based on the
current node's neighbors, with a corresponding fanout value at each hop.
Expand All @@ -99,10 +103,8 @@ def EXPERIMENTAL__uniform_neighborhood(input_graph,
cuGraph graph, which contains connectivity information as dask cudf
edge list dataframe

start_info_list : tuple of list or cudf.Series (int32)
Tuple of a list of starting vertices for sampling, along with a
corresponding list of label for reorganizing results after sending
the input to different callers.
start_list : list or cudf.Series (int32)
a list of starting vertices for sampling

fanout_vals : list (int32)
List of branching out (fan-out) degrees per starting vertex for each
Expand All @@ -111,6 +113,7 @@ def EXPERIMENTAL__uniform_neighborhood(input_graph,
with_replacement: bool, optional (default=True)
Flag to specify if the random sampling is done with replacement


Returns
-------
result : dask_cudf.DataFrame
Expand All @@ -120,8 +123,6 @@ def EXPERIMENTAL__uniform_neighborhood(input_graph,
Contains the source vertices from the sampling result
ddf['destinations']: dask_cudf.Series
Contains the destination vertices from the sampling result
ddf['labels']: dask_cudf.Series
Contains the start labels from the sampling result
ddf['indices']: dask_cudf.Series
Contains the indices from the sampling result for path
reconstruction
Expand All @@ -135,18 +136,15 @@ def EXPERIMENTAL__uniform_neighborhood(input_graph,
input_graph.compute_renumber_edge_list(
transposed=False, legacy_renum_only=True)

start_list, info_list = start_info_list
if isinstance(start_list, int):
start_list = [start_list]

if isinstance(start_list, list):
start_list = cudf.Series(start_list)
if start_list.dtype != 'int32':
if start_list.dtype != "int32":
raise ValueError(f"'start_list' must have int32 values, "
f"got: {start_list.dtype}")
if isinstance(info_list, list):
info_list = cudf.Series(info_list)
if info_list.dtype != 'int32':
raise ValueError(f"'info_list' must have int32 values, "
f"got: {info_list.dtype}")

# fanout_vals must be a host array!
# FIXME: ensure other sequence types (eg. cudf Series) can be handled.
if isinstance(fanout_vals, list):
Expand All @@ -156,12 +154,18 @@ def EXPERIMENTAL__uniform_neighborhood(input_graph,
f"got: {type(fanout_vals)}")

ddf = input_graph.edgelist.edgelist_df
num_edges = len(ddf)
data = get_distributed_data(ddf)

src_col_name = input_graph.renumber_map.renumbered_src_col_name
dst_col_name = input_graph.renumber_map.renumbered_dst_col_name

weight_t = ddf["value"].dtype
if weight_t == "int32":
ddf = ddf.astype({"value": "float32"})
elif weight_t == "int64":
ddf = ddf.astype({"value": "float64"})

num_edges = len(ddf)
data = get_distributed_data(ddf)

# start_list uses "external" vertex IDs, but if the graph has been
# renumbered, the start vertex IDs must also be renumbered.
if input_graph.renumbered:
Expand All @@ -177,7 +181,6 @@ def EXPERIMENTAL__uniform_neighborhood(input_graph,
num_edges,
do_expensive_check,
start_list,
info_list,
fanout_vals,
with_replacement,
workers=[wf[0]])
Expand All @@ -186,14 +189,14 @@ def EXPERIMENTAL__uniform_neighborhood(input_graph,
wait(result)

cudf_result = [client.submit(convert_to_cudf,
cp_arrays)
cp_arrays, weight_t)
for cp_arrays in result]

wait(cudf_result)

ddf = dask_cudf.from_delayed(cudf_result)
if input_graph.renumbered:
ddf = input_graph.unrenumber(ddf, "sources")
ddf = input_graph.unrenumber(ddf, "destinations")
ddf = input_graph.unrenumber(ddf, "sources", preserve_order=True)
ddf = input_graph.unrenumber(ddf, "destinations", preserve_order=True)

return ddf
7 changes: 0 additions & 7 deletions python/cugraph/cugraph/experimental/dask/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,3 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from cugraph.utilities.api_tools import experimental_warning_wrapper

from cugraph.dask.sampling.neighborhood_sampling import \
EXPERIMENTAL__uniform_neighborhood
uniform_neighborhood_sampling = \
experimental_warning_wrapper(EXPERIMENTAL__uniform_neighborhood)
2 changes: 1 addition & 1 deletion python/cugraph/cugraph/proto/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2019-2021, NVIDIA CORPORATION.
# Copyright (c) 2019-2022, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Expand Down
2 changes: 2 additions & 0 deletions python/cugraph/cugraph/sampling/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,5 @@

from cugraph.sampling.random_walks import random_walks, rw_path
from cugraph.sampling.node2vec import node2vec
from cugraph.sampling.uniform_neighbor_sample import \
uniform_neighbor_sample
132 changes: 132 additions & 0 deletions python/cugraph/cugraph/sampling/uniform_neighbor_sample.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
# Copyright (c) 2022, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from pylibcugraph import (ResourceHandle,
GraphProperties,
SGGraph,
)
from pylibcugraph import uniform_neighbor_sample as \
pylibcugraph_uniform_neighbor_sample

import numpy

import cudf


def uniform_neighbor_sample(G,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Might want to add both this SG and the MG uniform_neighbor_sample to the API docs

start_list,
fanout_vals,
with_replacement=True,
is_edge_ids=False):
"""
Does neighborhood sampling, which samples nodes from a graph based on the
current node's neighbors, with a corresponding fanout value at each hop.

Parameters
----------
G : cugraph.Graph
cuGraph graph, which contains connectivity information as dask cudf
edge list dataframe

start_list : list or cudf.Series (int32)
a list of starting vertices for sampling

fanout_vals : list (int32)
List of branching out (fan-out) degrees per starting vertex for each
hop level.

with_replacement: bool, optional (default=True)
Flag to specify if the random sampling is done with replacement

Returns
-------
result : cudf.DataFrame
GPU data frame containing two cudf.Series

df['sources']: cudf.Series
Contains the source vertices from the sampling result
df['destinations']: cudf.Series
Contains the destination vertices from the sampling result
df['indices']: cudf.Series
Contains the indices from the sampling result for path
reconstruction
"""

if isinstance(start_list, int):
start_list = [start_list]

if isinstance(start_list, list):
start_list = cudf.Series(start_list, dtype="int32")
if start_list.dtype != "int32":
raise ValueError(f"'start_list' must have int32 values, "
f"got: {start_list.dtype}")

# fanout_vals must be a host array!
# FIXME: ensure other sequence types (eg. cudf Series) can be handled.
if isinstance(fanout_vals, list):
fanout_vals = numpy.asarray(fanout_vals, dtype="int32")
else:
raise TypeError("fanout_vals must be a list, "
f"got: {type(fanout_vals)}")

if G.renumbered is True:
if isinstance(start_list, cudf.DataFrame):
start_list = G.lookup_internal_vertex_id(
start_list, start_list.columns)
else:
start_list = G.lookup_internal_vertex_id(start_list)

srcs = G.edgelist.edgelist_df['src']
dsts = G.edgelist.edgelist_df['dst']
weights = G.edgelist.edgelist_df['weights']
weight_t = weights.dtype

if weight_t == "int32":
weights = weights.astype("float32")
if weight_t == "int64":
weights = weights.astype("float64")

if srcs.dtype != 'int32':
raise ValueError(f"Graph vertices must have int32 values, "
f"got: {srcs.dtype}")

resource_handle = ResourceHandle()
graph_props = GraphProperties(is_multigraph=G.is_multigraph())
store_transposed = False
renumber = False
do_expensive_check = False

sg = SGGraph(resource_handle, graph_props, srcs, dsts, weights,
store_transposed, renumber, do_expensive_check)

sources, destinations, indices = \
pylibcugraph_uniform_neighbor_sample(resource_handle, sg, start_list,
fanout_vals, with_replacement,
do_expensive_check)

df = cudf.DataFrame()
df["sources"] = sources
df["destinations"] = destinations
df["indices"] = indices
if weight_t == "int32":
df["indices"] = indices.astype("int32")
elif weight_t == "int64":
df["indices"] = indices.astype("int64")
else:
df["indices"] = indices

if G.renumbered:
df = G.unrenumber(df, "sources", preserve_order=True)
df = G.unrenumber(df, "destinations", preserve_order=True)

return df
15 changes: 9 additions & 6 deletions python/cugraph/cugraph/structure/graph_classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,8 @@ def from_cudf_edgelist(
source="source",
destination="destination",
edge_attr=None,
renumber=True
renumber=True,
legacy_renum_only=False
):
"""
Initialize a graph from the edge list. It is an error to call this
Expand Down Expand Up @@ -150,11 +151,13 @@ def from_cudf_edgelist(
elif (self._Impl.edgelist is not None or
self._Impl.adjlist is not None):
raise RuntimeError("Graph already has values")
self._Impl._simpleGraphImpl__from_edgelist(input_df,
source=source,
destination=destination,
edge_attr=edge_attr,
renumber=renumber)
self._Impl._simpleGraphImpl__from_edgelist(
input_df,
source=source,
destination=destination,
edge_attr=edge_attr,
renumber=renumber,
legacy_renum_only=legacy_renum_only)

def from_cudf_adjlist(self, offset_col, index_col, value_col=None):
"""
Expand Down
Loading