rapidsai
diff --git a/‎notebooks/structure/Renumber-2.ipynb‎
Lines changed: 3 additions & 1 deletion b/‎notebooks/structure/Renumber-2.ipynb‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎notebooks/structure/Renumber.ipynb‎
Lines changed: 8 additions & 6 deletions b/‎notebooks/structure/Renumber.ipynb‎
Lines changed: 8 additions & 6 deletions
diff --git a/‎python/cugraph/cugraph/dask/centrality/katz_centrality.py‎
Lines changed: 9 additions & 0 deletions b/‎python/cugraph/cugraph/dask/centrality/katz_centrality.py‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎python/cugraph/cugraph/dask/centrality/mg_katz_centrality_wrapper.pyx‎
Lines changed: 7 additions & 5 deletions b/‎python/cugraph/cugraph/dask/centrality/mg_katz_centrality_wrapper.pyx‎
Lines changed: 7 additions & 5 deletions
diff --git a/‎python/cugraph/cugraph/dask/common/input_utils.py‎
Lines changed: 4 additions & 2 deletions b/‎python/cugraph/cugraph/dask/common/input_utils.py‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎python/cugraph/cugraph/dask/community/louvain.py‎
Lines changed: 9 additions & 0 deletions b/‎python/cugraph/cugraph/dask/community/louvain.py‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎python/cugraph/cugraph/dask/community/louvain_wrapper.pyx‎
Lines changed: 5 additions & 3 deletions b/‎python/cugraph/cugraph/dask/community/louvain_wrapper.pyx‎
Lines changed: 5 additions & 3 deletions
diff --git a/‎python/cugraph/cugraph/dask/components/connectivity.py‎
Lines changed: 9 additions & 0 deletions b/‎python/cugraph/cugraph/dask/components/connectivity.py‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎python/cugraph/cugraph/dask/components/mg_connectivity_wrapper.pyx‎
Lines changed: 7 additions & 5 deletions b/‎python/cugraph/cugraph/dask/components/mg_connectivity_wrapper.pyx‎
Lines changed: 7 additions & 5 deletions
diff --git a/‎python/cugraph/cugraph/dask/link_analysis/mg_pagerank_wrapper.pyx‎
Lines changed: 7 additions & 5 deletions b/‎python/cugraph/cugraph/dask/link_analysis/mg_pagerank_wrapper.pyx‎
Lines changed: 7 additions & 5 deletions
@@ -155,9 +155,11 @@
     "gdf['order'] = gdf.index\n",
     "\n",
     "tmp_df, numbering = NumberMap.renumber(gdf, ['src_ip'], ['dst_ip'])\n",
+    "new_src_col_name = numbering.renumbered_src_col_name\n",
+    "new_dst_col_name = numbering.renumbered_dst_col_name\n",
     "\n",
     "gdf = gdf.merge(tmp_df, on='order').sort_values('order').set_index(keys='order', drop=True)\n",
-    "gdf = gdf.rename(columns={'src': 'src_r', 'dst': 'dst_r'})"
+    "gdf = gdf.rename(columns={new_src_col_name: 'src_r', new_dst_col_name: 'dst_r'})"
    ]
   },
   {
 
@@ -126,7 +126,7 @@
    "source": [
     "# Run renumbering\n",
     "\n",
-    "Output from renumbering is a data frame and a NumberMap object.  The data frame contains the renumbered sources and destinations.  The NumberMap will allow you to translate from external to internal vertex identifiers.\n",
+    "Output from renumbering is a data frame and a NumberMap object.  The data frame contains the renumbered sources and destinations.  The NumberMap will allow you to translate from external to internal vertex identifiers.  The renumbering call will rename the specified source and destination columns to indicate they were renumbered and no longer contain the original data, and the new names are guaranteed to be unique and not collide with other column names.\n",
     "\n",
     "Note that renumbering does not guarantee that the output data frame is in the same order as the input data frame (although in our simple example it will match).  To address this we will add the index as a column of gdf before renumbering.\n"
    ]
@@ -140,6 +140,8 @@
     "gdf['order'] = gdf.index\n",
     "\n",
     "renumbered_df, numbering = NumberMap.renumber(gdf, ['source_as_int'], ['dest_as_int'])\n",
+    "new_src_col_name = numbering.renumbered_src_col_name\n",
+    "new_dst_col_name = numbering.renumbered_dst_col_name\n",
     "\n",
     "renumbered_df"
    ]
@@ -204,10 +206,10 @@
     "for i in range(len(renumbered_df)):\n",
     "    print(\" \", i,\n",
     "          \": (\",  source_as_int[i], \",\", dest_as_int[i],\n",
-    "          \"), renumbered: (\", renumbered_df['src'][i], \",\", renumbered_df['dst'][i], \n",
+    "          \"), renumbered: (\", renumbered_df[new_src_col_name][i], \",\", renumbered_df[new_dst_col_name][i], \n",
     "          \"), translate back: (\",\n",
-    "          numbering.from_internal_vertex_id(cudf.Series([renumbered_df['src'][i]]))['0'][0], \",\",\n",
-    "          numbering.from_internal_vertex_id(cudf.Series([renumbered_df['dst'][i]]))['0'][0], \")\"\n",
+    "          numbering.from_internal_vertex_id(cudf.Series([renumbered_df[new_src_col_name][i]]))['0'][0], \",\",\n",
+    "          numbering.from_internal_vertex_id(cudf.Series([renumbered_df[new_dst_col_name][i]]))['0'][0], \")\"\n",
     "         )\n"
    ]
   },
@@ -230,8 +232,8 @@
    "source": [
     "G = cugraph.Graph()\n",
     "gdf_r = cudf.DataFrame()\n",
-    "gdf_r[\"src\"] = renumbered_df[\"src\"]\n",
-    "gdf_r[\"dst\"] = renumbered_df[\"dst\"]\n",
+    "gdf_r[\"src\"] = renumbered_df[new_src_col_name]\n",
+    "gdf_r[\"dst\"] = renumbered_df[new_dst_col_name]\n",
     "\n",
     "G.from_cudf_edgelist(gdf_r, source='src', destination='dst', renumber=False)\n",
     "\n",
 
@@ -24,6 +24,8 @@
 
 def call_katz_centrality(sID,
                          data,
+                         src_col_name,
+                         dst_col_name,
                          num_verts,
                          num_edges,
                          vertex_partition_offsets,
@@ -40,6 +42,8 @@ def call_katz_centrality(sID,
     segment_offsets = \
         aggregate_segment_offsets[local_size * wid: local_size * (wid + 1)]
     return mg_katz_centrality.mg_katz_centrality(data[0],
+                                                 src_col_name,
+                                                 dst_col_name,
                                                  num_verts,
                                                  num_edges,
                                                  vertex_partition_offsets,
@@ -153,9 +157,14 @@ def katz_centrality(input_graph,
     num_edges = len(ddf)
     data = get_distributed_data(ddf)
 
+    src_col_name = input_graph.renumber_map.renumbered_src_col_name
+    dst_col_name = input_graph.renumber_map.renumbered_dst_col_name
+
     result = [client.submit(call_katz_centrality,
                             Comms.get_session_id(),
                             wf[1],
+                            src_col_name,
+                            dst_col_name,
                             num_verts,
                             num_edges,
                             vertex_partition_offsets,
 
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2020-2021, NVIDIA CORPORATION.
+# Copyright (c) 2020-2022, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -25,6 +25,8 @@ import numpy as np
 
 
 def mg_katz_centrality(input_df,
+                       src_col_name,
+                       dst_col_name,
                        num_global_verts,
                        num_global_edges,
                        vertex_partition_offsets,
@@ -43,8 +45,8 @@ def mg_katz_centrality(input_df,
     cdef size_t handle_size_t = <size_t>handle.getHandle()
     handle_ = <c_katz_centrality.handle_t*>handle_size_t
 
-    src = input_df['src']
-    dst = input_df['dst']
+    src = input_df[src_col_name]
+    dst = input_df[dst_col_name]
     vertex_t = src.dtype
     if num_global_edges > (2**31 - 1):
         edge_t = np.dtype("int64")
@@ -79,7 +81,7 @@ def mg_katz_centrality(input_df,
     cdef uintptr_t c_edge_weights = <uintptr_t>NULL
     if weights is not None:
       c_edge_weights = weights.__cuda_array_interface__['data'][0]
-    
+
     # FIXME: data is on device, move to host (to_pandas()), convert to np array and access pointer to pass to C
     vertex_partition_offsets_host = vertex_partition_offsets.values_host
     cdef uintptr_t c_vertex_partition_offsets = vertex_partition_offsets_host.__array_interface__['data'][0]
@@ -109,7 +111,7 @@ def mg_katz_centrality(input_df,
                              num_global_verts, num_global_edges,
                              is_weighted,
                              False,
-                             True, True) 
+                             True, True)
 
     df = cudf.DataFrame()
     df['vertex'] = cudf.Series(np.arange(vertex_partition_offsets.iloc[rank], vertex_partition_offsets.iloc[rank+1]), dtype=vertex_t)
 
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2021, NVIDIA CORPORATION.
+# Copyright (c) 2020-2022, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -229,7 +229,9 @@ def get_vertex_partition_offsets(input_graph):
     renumber_vertex_count = input_graph.renumber_map.implementation.ddf.\
         map_partitions(len).compute()
     renumber_vertex_cumsum = renumber_vertex_count.cumsum()
-    vertex_dtype = input_graph.edgelist.edgelist_df['src'].dtype
+    # Assume the input_graph edgelist was renumbered
+    src_col_name = input_graph.renumber_map.renumbered_src_col_name
+    vertex_dtype = input_graph.edgelist.edgelist_df[src_col_name].dtype
     vertex_partition_offsets = cudf.Series([0], dtype=vertex_dtype)
     vertex_partition_offsets = vertex_partition_offsets.append(cudf.Series(
         renumber_vertex_cumsum, dtype=vertex_dtype))
 
@@ -26,6 +26,8 @@
 
 def call_louvain(sID,
                  data,
+                 src_col_name,
+                 dst_col_name,
                  num_verts,
                  num_edges,
                  vertex_partition_offsets,
@@ -38,6 +40,8 @@ def call_louvain(sID,
     segment_offsets = \
         aggregate_segment_offsets[local_size * wid: local_size * (wid + 1)]
     return c_mg_louvain.louvain(data[0],
+                                src_col_name,
+                                dst_col_name,
                                 num_verts,
                                 num_edges,
                                 vertex_partition_offsets,
@@ -130,9 +134,14 @@ def louvain(input_graph, max_iter=100, resolution=1.0):
     num_edges = len(ddf)
     data = get_distributed_data(ddf)
 
+    src_col_name = input_graph.renumber_map.renumbered_src_col_name
+    dst_col_name = input_graph.renumber_map.renumbered_dst_col_name
+
     futures = [client.submit(call_louvain,
                              Comms.get_session_id(),
                              wf[1],
+                             src_col_name,
+                             dst_col_name,
                              num_verts,
                              num_edges,
                              vertex_partition_offsets,
 
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2021, NVIDIA CORPORATION.
+# Copyright (c) 2020-2022, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -33,6 +33,8 @@ numberTypeMap = {np.dtype("int32") : <int>numberTypeEnum.int32Type,
 
 
 def louvain(input_df,
+            src_col_name,
+            dst_col_name,
             num_global_verts,
             num_global_edges,
             vertex_partition_offsets,
@@ -55,8 +57,8 @@ def louvain(input_df,
     # FIXME: much of this code is common to other algo wrappers, consider adding
     #        this to a shared utility as well
 
-    src = input_df['src']
-    dst = input_df['dst']
+    src = input_df[src_col_name]
+    dst = input_df[dst_col_name]
     num_local_edges = len(src)
 
     if "value" in input_df.columns:
 
@@ -21,6 +21,8 @@
 
 def call_wcc(sID,
              data,
+             src_col_name,
+             dst_col_name,
              num_verts,
              num_edges,
              vertex_partition_offsets,
@@ -31,6 +33,8 @@ def call_wcc(sID,
     segment_offsets = \
         aggregate_segment_offsets[local_size * wid: local_size * (wid + 1)]
     return mg_connectivity.mg_wcc(data[0],
+                                  src_col_name,
+                                  dst_col_name,
                                   num_verts,
                                   num_edges,
                                   vertex_partition_offsets,
@@ -62,9 +66,14 @@ def weakly_connected_components(input_graph):
     num_edges = len(ddf)
     data = get_distributed_data(ddf)
 
+    src_col_name = input_graph.renumber_map.renumbered_src_col_name
+    dst_col_name = input_graph.renumber_map.renumbered_dst_col_name
+
     result = [client.submit(call_wcc,
                             Comms.get_session_id(),
                             wf[1],
+                            src_col_name,
+                            dst_col_name,
                             num_verts,
                             num_edges,
                             vertex_partition_offsets,
 
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2021, NVIDIA CORPORATION.
+# Copyright (c) 2021-2022, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -25,6 +25,8 @@ import numpy as np
 
 
 def mg_wcc(input_df,
+           src_col_name,
+           dst_col_name,
            num_global_verts,
            num_global_edges,
            vertex_partition_offsets,
@@ -35,8 +37,8 @@ def mg_wcc(input_df,
     cdef size_t handle_size_t = <size_t>handle.getHandle()
     handle_ = <c_connectivity.handle_t*>handle_size_t
 
-    src = input_df['src']
-    dst = input_df['dst']
+    src = input_df[src_col_name]
+    dst = input_df[dst_col_name]
     vertex_t = src.dtype
     if num_global_edges > (2**31 - 1):
         edge_t = np.dtype("int64")
@@ -91,15 +93,15 @@ def mg_wcc(input_df,
                              is_weighted,
                              True,
                              False,
-                             True) 
+                             True)
 
     df = cudf.DataFrame()
     df['vertex'] = cudf.Series(np.arange(vertex_partition_offsets.iloc[rank], vertex_partition_offsets.iloc[rank+1]), dtype=vertex_t)
     df['labels'] = cudf.Series(np.zeros(len(df['vertex']), dtype=vertex_t))
 
     cdef uintptr_t c_labels_val = df['labels'].__cuda_array_interface__['data'][0];
 
-    if vertex_t == np.int32:    
+    if vertex_t == np.int32:
         c_connectivity.call_wcc[int, float](handle_[0],
                                             graph_container,
                                             <int*>c_labels_val)
 
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2020-2021, NVIDIA CORPORATION.
+# Copyright (c) 2020-2022, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -25,6 +25,8 @@ import numpy as np
 
 
 def mg_pagerank(input_df,
+                src_col_name,
+                dst_col_name,
                 num_global_verts,
                 num_global_edges,
                 vertex_partition_offsets,
@@ -42,8 +44,8 @@ def mg_pagerank(input_df,
     cdef size_t handle_size_t = <size_t>handle.getHandle()
     handle_ = <c_pagerank.handle_t*>handle_size_t
 
-    src = input_df['src']
-    dst = input_df['dst']
+    src = input_df[src_col_name]
+    dst = input_df[dst_col_name]
     vertex_t = src.dtype
     if num_global_edges > (2**31 - 1):
         edge_t = np.dtype("int64")
@@ -74,7 +76,7 @@ def mg_pagerank(input_df,
     cdef uintptr_t c_edge_weights = <uintptr_t>NULL
     if weights is not None:
       c_edge_weights = weights.__cuda_array_interface__['data'][0]
-    
+
     # FIXME: data is on device, move to host (to_pandas()), convert to np array and access pointer to pass to C
     vertex_partition_offsets_host = vertex_partition_offsets.values_host
     cdef uintptr_t c_vertex_partition_offsets = vertex_partition_offsets_host.__array_interface__['data'][0]
@@ -104,7 +106,7 @@ def mg_pagerank(input_df,
                              num_global_verts, num_global_edges,
                              is_weighted,
                              False,
-                             True, True) 
+                             True, True)
 
     df = cudf.DataFrame()
     df['vertex'] = cudf.Series(np.arange(vertex_partition_offsets.iloc[rank], vertex_partition_offsets.iloc[rank+1]), dtype=vertex_t)
Original file line number	Diff line number	Diff line change
`@@ -155,9 +155,11 @@`
`155`	`155`	`"gdf['order'] = gdf.index\n",`
`156`	`156`	`"\n",`
`157`	`157`	`"tmp_df, numbering = NumberMap.renumber(gdf, ['src_ip'], ['dst_ip'])\n",`
	`158`	`+ "new_src_col_name = numbering.renumbered_src_col_name\n",`
	`159`	`+ "new_dst_col_name = numbering.renumbered_dst_col_name\n",`
`158`	`160`	`"\n",`
`159`	`161`	`"gdf = gdf.merge(tmp_df, on='order').sort_values('order').set_index(keys='order', drop=True)\n",`
`160`		`- "gdf = gdf.rename(columns={'src': 'src_r', 'dst': 'dst_r'})"`
	`162`	`+ "gdf = gdf.rename(columns={new_src_col_name: 'src_r', new_dst_col_name: 'dst_r'})"`
`161`	`163`	`]`
`162`	`164`	`},`
`163`	`165`	`{`