Skip to content

Commit 1e6da2b

Browse files
[BUG] Fix Calls to cudf.DataFrame/Series.unique that relied on old behavior (#3616)
Updates the code that relied on `unique()` to return values in sorted order by explicitly sorting the unique values. Closes #3615 Authors: - Alex Barghi (https://github.com/alexbarghi-nv) - Brad Rees (https://github.com/BradReesWork) Approvers: - Rick Ratzel (https://github.com/rlratzel) - Vibhu Jawa (https://github.com/VibhuJawa) URL: #3616
1 parent aa00704 commit 1e6da2b

File tree

7 files changed

+21
-15
lines changed

7 files changed

+21
-15
lines changed

python/cugraph-service/tests/test_remote_graph.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -392,9 +392,11 @@ def test_extract_subgraph(
392392

393393
assert remote_sg.get_num_vertices() == sg.number_of_vertices()
394394

395-
expected_vertex_ids = cudf.concat(
396-
[sg.edgelist.edgelist_df["src"], sg.edgelist.edgelist_df["dst"]]
397-
).unique()
395+
expected_vertex_ids = (
396+
cudf.concat([sg.edgelist.edgelist_df["src"], sg.edgelist.edgelist_df["dst"]])
397+
.unique()
398+
.sort_values()
399+
)
398400
if renumber:
399401
expected_vertex_ids = sg.unrenumber(
400402
cudf.DataFrame({"v": expected_vertex_ids}), "v"

python/cugraph/cugraph/components/connectivity.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ def _convert_df_to_output_type(df, input_type, return_labels):
8484
# The number of connected components (number of unique labels).
8585
# labels: ndarray
8686
# The length-N array of labels of the connected components.
87-
n_components = len(df["labels"].unique())
87+
n_components = df["labels"].nunique()
8888
sorted_df = df.sort_values("vertex")
8989
if return_labels:
9090
if is_cp_matrix_type(input_type):

python/cugraph/cugraph/dask/structure/mg_property_graph.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -379,7 +379,11 @@ def get_vertices(self, selection=None):
379379
vert_sers = self.__get_all_vertices_series()
380380
if vert_sers:
381381
if self.__series_type is dask_cudf.Series:
382-
return dask_cudf.concat(vert_sers, ignore_index=True).unique()
382+
return (
383+
dask_cudf.concat(vert_sers, ignore_index=True)
384+
.unique()
385+
.sort_values()
386+
)
383387
else:
384388
raise TypeError("dataframe must be a CUDF Dask dataframe.")
385389
return self.__series_type()

python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -397,7 +397,7 @@ def view_edge_list(self):
397397
df[dst] : cudf.Series
398398
contains the destination index for each edge
399399
400-
df[weight] : cusd.Series
400+
df[weight] : cudf.Series
401401
Column is only present for weighted Graph,
402402
then containing the weight value for each edge
403403
"""
@@ -1179,7 +1179,8 @@ def edges(self):
11791179

11801180
def nodes(self):
11811181
"""
1182-
Returns all the nodes in the graph as a cudf.Series.
1182+
Returns all the nodes in the graph as a cudf.Series, in order of appearance
1183+
in the edgelist (source column first, then destination column).
11831184
If multi columns vertices, return a cudf.DataFrame.
11841185
"""
11851186
if self.edgelist is not None:

python/cugraph/cugraph/structure/hypergraph.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright (c) 2020-2022, NVIDIA CORPORATION.
1+
# Copyright (c) 2020-2023, NVIDIA CORPORATION.
22
# Licensed under the Apache License, Version 2.0 (the "License");
33
# you may not use this file except in compliance with the License.
44
# You may obtain a copy of the License at
@@ -325,7 +325,7 @@ def _create_entity_nodes(
325325

326326
for key, col in events[columns].items():
327327
cat = categories.get(key, key)
328-
col = col.unique()
328+
col = col.unique().sort_values()
329329
col = col.nans_to_nulls().dropna() if dropna else col
330330
if len(col) == 0:
331331
continue

python/cugraph/cugraph/structure/property_graph.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -488,7 +488,7 @@ def get_num_edges(self, type=None):
488488
def get_vertices(self, selection=None):
489489
"""
490490
Return a Series containing the unique vertex IDs contained in both
491-
the vertex and edge property data.
491+
the vertex and edge property data in ascending order.
492492
Selection is not yet supported.
493493
494494
Parameters
@@ -530,12 +530,11 @@ def get_vertices(self, selection=None):
530530
if vert_sers:
531531
if self.__series_type is cudf.Series:
532532
return self.__series_type(
533-
cudf.concat(vert_sers, ignore_index=True).unique()
533+
cudf.concat(vert_sers, ignore_index=True).unique().sort_values()
534534
)
535535
else:
536-
return self.__series_type(
537-
pd.concat(vert_sers, ignore_index=True).unique()
538-
)
536+
x = pd.Series(pd.concat(vert_sers, ignore_index=True).unique())
537+
return self.__series_type(x.sort_values())
539538
return self.__series_type()
540539

541540
def vertices_ids(self):

python/cugraph/cugraph/tests/structure/test_graph.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -646,7 +646,7 @@ def test_bipartite_api(graph_file):
646646
# This test only tests the functionality of adding set of nodes and
647647
# retrieving them. The datasets currently used are not truly bipartite.
648648
cu_M = utils.read_csv_file(graph_file)
649-
nodes = cudf.concat([cu_M["0"], cu_M["1"]]).unique()
649+
nodes = cudf.concat([cu_M["0"], cu_M["1"]]).unique().sort_values()
650650

651651
# Create set of nodes for partition
652652
set1_exp = cudf.Series(nodes[0 : int(len(nodes) / 2)])

0 commit comments

Comments
 (0)