[REVIEW]Fix out of index errors encountered with sampling on out of index samples #2825
Conversation
…raph into dgl_graphstore_fix
| class CuFeatureStorage: | ||
| """ | ||
| Storage for node/edge feature data. | ||
| """ | ||
|
|
There was a problem hiding this comment.
Removed out of the graph_store file, see below:
cugraph/python/cugraph/cugraph/gnn/graph_store.py
Lines 503 to 583 in fbd5f20
VibhuJawa
left a comment
There was a problem hiding this comment.
Added comments to help reviewers on where each file is coming from
| def _update_feature_map( | ||
| pg_feature_map, feat_name_obj, contains_vector_features, columns | ||
| ): | ||
| """ | ||
| Update the existing feature map `pg_feature_map` based on `feat_name_obj` | ||
| """ |
There was a problem hiding this comment.
Just moved from the graph_store file
cugraph/python/cugraph/cugraph/gnn/graph_store.py
Lines 707 to 749 in fbd5f20
| vid_n = PropertyGraph.vertex_col_name | ||
|
|
||
|
|
||
| def get_subgraph_and_src_range_from_edgelist(edge_list, is_mg, reverse_edges=False): |
There was a problem hiding this comment.
Just moved from this file:
cugraph/python/cugraph/cugraph/gnn/graph_store.py
Lines 676 to 704 in fbd5f20
| def sample_single_sg( | ||
| sg, | ||
| sample_f, | ||
| start_list, | ||
| start_list_dtype, | ||
| start_list_range, | ||
| fanout, | ||
| with_replacement, | ||
| ): | ||
| if isinstance(start_list, dict): | ||
| start_list = cudf.concat(list(start_list.values())) | ||
|
|
||
| # Uniform sampling fails when the dtype | ||
| # of the seed dtype is not same as the node dtype | ||
| start_list = start_list.astype(start_list_dtype) | ||
| # Filter start list by ranges | ||
| # https://github.com/rapidsai/cugraph/blob/branch-22.12/cpp/src/prims/per_v_random_select_transform_outgoing_e.cuh | ||
|
|
||
| start_list = start_list[ | ||
| (start_list >= start_list_range[0]) & (start_list <= start_list_range[1]) | ||
| ] | ||
|
|
||
| sampled_df = sample_f( | ||
| sg, | ||
| start_list=start_list, | ||
| fanout_vals=[fanout], | ||
| with_replacement=with_replacement, | ||
| # FIXME: is_edge_ids=True does not seem to do anything | ||
| # issue https://github.com/rapidsai/cugraph/issues/2562 | ||
| ) | ||
| return sampled_df |
There was a problem hiding this comment.
Moved from graph_store:
cugraph/python/cugraph/cugraph/gnn/graph_store.py
Lines 601 to 618 in fbd5f20
| def create_dlpack_d(d): | ||
| dlpack_d = {} | ||
| for k, df in d.items(): | ||
| if len(df) == 0: | ||
| dlpack_d[k] = (None, None, None) | ||
| else: | ||
| dlpack_d[k] = ( | ||
| df[src_n].to_dlpack(), | ||
| df[dst_n].to_dlpack(), | ||
| df[eid_n].to_dlpack(), | ||
| ) | ||
|
|
||
| return dlpack_d |
There was a problem hiding this comment.
Moved from cugraph_store and renamed to create_dlpack_d.
cugraph/python/cugraph/cugraph/gnn/graph_store.py
Lines 586 to 598 in fbd5f20
| def _convert_can_etype_s_to_tup(canonical_etype_s): | ||
| src_type, etype, dst_type = canonical_etype_s.split(",") | ||
| src_type = src_type[2:-1] | ||
| dst_type = dst_type[2:-2] | ||
| etype = etype[2:-1] | ||
| return (src_type, etype, dst_type) |
There was a problem hiding this comment.
Moved from graph_store , see below:
cugraph/python/cugraph/cugraph/gnn/graph_store.py
Lines 668 to 673 in fbd5f20
| def _edge_types_contains_canonical_etype(can_etype, edge_types, edge_dir): | ||
| src_type, _, dst_type = can_etype | ||
| if edge_dir == "in": | ||
| return dst_type in edge_types | ||
| else: | ||
| return src_type in edge_types |
There was a problem hiding this comment.
Moved from graph_store,
cugraph/python/cugraph/cugraph/gnn/graph_store.py
Lines 660 to 665 in fbd5f20
| # FIXME: Remove after we have consistent naming | ||
| # https://github.com/rapidsai/cugraph/issues/2618 | ||
| sg_columns = sg.edgelist.edgelist_df.columns | ||
| if "src" in sg_columns: | ||
| # src for single node graph | ||
| sg_node_dtype = sg.edgelist.edgelist_df["src"].dtype | ||
| elif src_n in sg_columns: | ||
| # _SRC_ for multi-node graphs | ||
| sg_node_dtype = sg.edgelist.edgelist_df[src_n].dtype | ||
| else: | ||
| raise ValueError(f"Source column {src_n} not found in the subgraph") | ||
|
|
||
| return sg_node_dtype |
There was a problem hiding this comment.
Moved from cugraph_store and put in a function.
cugraph/python/cugraph/cugraph/gnn/graph_store.py
Lines 409 to 417 in fbd5f20
| def sample_multiple_sgs( | ||
| sgs, | ||
| sample_f, | ||
| start_list_d, | ||
| start_list_dtype, | ||
| edge_dir, | ||
| fanout, | ||
| with_replacement, | ||
| ): |
There was a problem hiding this comment.
Moved from graph_store:
cugraph/python/cugraph/cugraph/gnn/graph_store.py
Lines 621 to 658 in fbd5f20
| def sample_single_sg( | ||
| sg, | ||
| sample_f, | ||
| start_list, | ||
| start_list_dtype, | ||
| start_list_range, | ||
| fanout, | ||
| with_replacement, | ||
| ): |
There was a problem hiding this comment.
Moved from:
cugraph/python/cugraph/cugraph/gnn/graph_store.py
Lines 601 to 618 in fbd5f20
…/cugraph into fix_out_of_index_error
| # Filter start list by ranges | ||
| # to enure the seed is with in index values | ||
| # see below: | ||
| # https://github.com/rapidsai/cugraph/blob/branch-22.12/cpp/src/prims/per_v_random_select_transform_outgoing_e.cuh | ||
| start_list = start_list[ | ||
| (start_list >= start_list_range[0]) & (start_list <= start_list_range[1]) | ||
| ] |
There was a problem hiding this comment.
This is the logic change in this PR to fix : #2828 . Also see test added in this PR
rlratzel
left a comment
There was a problem hiding this comment.
Just a couple of minor questions/suggestions.
Co-authored-by: Rick Ratzel <3039903+rlratzel@users.noreply.github.com>
Co-authored-by: Rick Ratzel <3039903+rlratzel@users.noreply.github.com>
|
@gpucibot merge |
…ples (#2825) THIS PR does the following - [x] Ensure we dont sample on out of range values Issue: rapidsai/cugraph#2828 - [x] Add tests for the sampling error - [x] Ensure all the DGL examples here pass https://github.com/rapidsai/dgl/blob/6ece904c69687adcd35a5ea41d1f5ca4ea01c0e2/examples/cugraph-pytorch/cugraph-local/rgcn-hetero/README.MD - [x] Reformat out the non class specific utilities in prepration for DGL graph service class Authors: - Vibhu Jawa (https://github.com/VibhuJawa) Approvers: - Rick Ratzel (https://github.com/rlratzel) - Joseph Nke (https://github.com/jnke2016) URL: rapidsai/cugraph#2825
THIS PR does the following
Issue: [BUG] cugraph storage fails with out of index samples #2828
https://github.com/rapidsai/dgl/blob/6ece904c69687adcd35a5ea41d1f5ca4ea01c0e2/examples/cugraph-pytorch/cugraph-local/rgcn-hetero/README.MD