From 6cc869c126e55674a34934e67cf155fbe07c299c Mon Sep 17 00:00:00 2001 From: baohan <44225751+BaoHhhhhan@users.noreply.github.com> Date: Tue, 6 Jul 2021 17:57:26 +0800 Subject: [PATCH 1/7] Update prod_env_mat.hip.cu speed up format_nlist_b --- source/lib/src/rocm/prod_env_mat.hip.cu | 61 +++++++++++++++++-------- 1 file changed, 42 insertions(+), 19 deletions(-) diff --git a/source/lib/src/rocm/prod_env_mat.hip.cu b/source/lib/src/rocm/prod_env_mat.hip.cu index af1f8022f7..1ba909ec93 100644 --- a/source/lib/src/rocm/prod_env_mat.hip.cu +++ b/source/lib/src/rocm/prod_env_mat.hip.cu @@ -146,6 +146,29 @@ __global__ void format_nlist_fill_a( } } +template +__global__ void fill_nei_iter( + int * nei_iter_dev, + const FPTYPE * key, + const int nloc, + const int max_nbor_size, + const int sec_size) +{ + int row = blockIdx.y; + int col = blockIdx.x * blockDim.x + threadIdx.x; + const FPTYPE * key_out = key + nloc * max_nbor_size + row * max_nbor_size; + int nei_type_cur = -1, nbor_idx_cur = 0; + int nei_type_pre = -1, nbor_idx_pre = 0; + if (col < max_nbor_size && key_out[col] != key_out[max_nbor_size - 1]){ + if (col >= 1) + decoding_nbor_info(nei_type_pre, nbor_idx_pre, key_out[col - 1]); + decoding_nbor_info(nei_type_cur, nbor_idx_cur, key_out[col]); + } + if (nei_type_cur != nei_type_pre){ + nei_iter_dev[row * sec_size + nei_type_cur] = col; + } +} + template __global__ void format_nlist_fill_b( int * nlist, @@ -157,23 +180,19 @@ __global__ void format_nlist_fill_b( int * nei_iter_dev, const int max_nbor_size) { - const unsigned int idx = blockIdx.x * blockDim.x + threadIdx.x; - if(idx >= nloc) { - return; - } - - int * row_nlist = nlist + idx * nlist_size; - int * nei_iter = nei_iter_dev + idx * sec_size; - FPTYPE * key_out = key + nloc * max_nbor_size + idx * max_nbor_size; - for (int ii = 0; ii < sec_size; ii++) { - nei_iter[ii] = sec[ii]; - } - - int nei_type = 0, nbor_idx = 0; - for (unsigned int kk = 0; key_out[kk] != key_out[max_nbor_size - 1]; kk++) { - decoding_nbor_info(nei_type, nbor_idx, key_out[kk]); - if (nei_iter[nei_type] < sec[nei_type + 1]) { - row_nlist[nei_iter[nei_type]++] = nbor_idx; + int row = blockIdx.y; + int col = blockIdx.x * blockDim.x + threadIdx.x; + int * nei_iter = nei_iter_dev + row * sec_size; + FPTYPE * key_out = key + nloc * max_nbor_size + row * max_nbor_size; + int * row_nlist = nlist + row * nlist_size; + if (col < max_nbor_size){ + if (key_out[col] != key_out[max_nbor_size - 1]){ + int nei_type = 0, nbor_idx = 0; + decoding_nbor_info(nei_type, nbor_idx, key_out[col]); + int out_indx = col - nei_iter[nei_type] + sec[nei_type]; + if (out_indx < sec[nei_type + 1]){ + row_nlist[out_indx] = nbor_idx; + } } } } @@ -474,8 +493,12 @@ void format_nbor_list_gpu_rocm( key, coord, type, gpu_inlist, nloc, rcut, i_idx); } - - hipLaunchKernelGGL(format_nlist_fill_b, nblock, LEN, 0, 0, + + hipLaunchKernelGGL(fill_nei_iter, dim3((max_nbor_size + LEN - 1) / LEN, nloc) , LEN, 0, 0, + nei_iter, + key, nloc, max_nbor_size, sec.size()); + + hipLaunchKernelGGL(format_nlist_fill_b, dim3((max_nbor_size + LEN - 1) / LEN, nloc), LEN, 0, 0, nlist, nnei, nloc, key, sec_dev, sec.size(), nei_iter, max_nbor_size); } From 802936a51f6d3a7daf5b8d91339095275d41c4e0 Mon Sep 17 00:00:00 2001 From: baohan <44225751+BaoHhhhhan@users.noreply.github.com> Date: Fri, 9 Jul 2021 15:56:32 +0800 Subject: [PATCH 2/7] Update source/lib/src/rocm/prod_env_mat.hip.cu Co-authored-by: Denghui Lu --- source/lib/src/rocm/prod_env_mat.hip.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/lib/src/rocm/prod_env_mat.hip.cu b/source/lib/src/rocm/prod_env_mat.hip.cu index 1ba909ec93..c9b1c9c0f2 100644 --- a/source/lib/src/rocm/prod_env_mat.hip.cu +++ b/source/lib/src/rocm/prod_env_mat.hip.cu @@ -154,7 +154,7 @@ __global__ void fill_nei_iter( const int max_nbor_size, const int sec_size) { - int row = blockIdx.y; + int row = blockIdx.x; int col = blockIdx.x * blockDim.x + threadIdx.x; const FPTYPE * key_out = key + nloc * max_nbor_size + row * max_nbor_size; int nei_type_cur = -1, nbor_idx_cur = 0; From 86e4659849f0fab670fc0ffb24ba4ecf240a945c Mon Sep 17 00:00:00 2001 From: baohan <44225751+BaoHhhhhan@users.noreply.github.com> Date: Fri, 9 Jul 2021 15:56:41 +0800 Subject: [PATCH 3/7] Update source/lib/src/rocm/prod_env_mat.hip.cu Co-authored-by: Denghui Lu --- source/lib/src/rocm/prod_env_mat.hip.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/lib/src/rocm/prod_env_mat.hip.cu b/source/lib/src/rocm/prod_env_mat.hip.cu index c9b1c9c0f2..1a23c73464 100644 --- a/source/lib/src/rocm/prod_env_mat.hip.cu +++ b/source/lib/src/rocm/prod_env_mat.hip.cu @@ -155,7 +155,7 @@ __global__ void fill_nei_iter( const int sec_size) { int row = blockIdx.x; - int col = blockIdx.x * blockDim.x + threadIdx.x; + int col = blockIdx.y * blockDim.x + threadIdx.x; const FPTYPE * key_out = key + nloc * max_nbor_size + row * max_nbor_size; int nei_type_cur = -1, nbor_idx_cur = 0; int nei_type_pre = -1, nbor_idx_pre = 0; From 9b4ba5f9171539839468dfa5e968011686d756e1 Mon Sep 17 00:00:00 2001 From: baohan <44225751+BaoHhhhhan@users.noreply.github.com> Date: Fri, 9 Jul 2021 15:56:50 +0800 Subject: [PATCH 4/7] Update source/lib/src/rocm/prod_env_mat.hip.cu Co-authored-by: Denghui Lu --- source/lib/src/rocm/prod_env_mat.hip.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/lib/src/rocm/prod_env_mat.hip.cu b/source/lib/src/rocm/prod_env_mat.hip.cu index 1a23c73464..1c640a81b4 100644 --- a/source/lib/src/rocm/prod_env_mat.hip.cu +++ b/source/lib/src/rocm/prod_env_mat.hip.cu @@ -180,7 +180,7 @@ __global__ void format_nlist_fill_b( int * nei_iter_dev, const int max_nbor_size) { - int row = blockIdx.y; + int row = blockIdx.x; int col = blockIdx.x * blockDim.x + threadIdx.x; int * nei_iter = nei_iter_dev + row * sec_size; FPTYPE * key_out = key + nloc * max_nbor_size + row * max_nbor_size; From 1e1375bcd8a012501b9076ef2a42439116926131 Mon Sep 17 00:00:00 2001 From: baohan <44225751+BaoHhhhhan@users.noreply.github.com> Date: Fri, 9 Jul 2021 15:56:57 +0800 Subject: [PATCH 5/7] Update source/lib/src/rocm/prod_env_mat.hip.cu Co-authored-by: Denghui Lu --- source/lib/src/rocm/prod_env_mat.hip.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/lib/src/rocm/prod_env_mat.hip.cu b/source/lib/src/rocm/prod_env_mat.hip.cu index 1c640a81b4..68384b4767 100644 --- a/source/lib/src/rocm/prod_env_mat.hip.cu +++ b/source/lib/src/rocm/prod_env_mat.hip.cu @@ -181,7 +181,7 @@ __global__ void format_nlist_fill_b( const int max_nbor_size) { int row = blockIdx.x; - int col = blockIdx.x * blockDim.x + threadIdx.x; + int col = blockIdx.y * blockDim.x + threadIdx.x; int * nei_iter = nei_iter_dev + row * sec_size; FPTYPE * key_out = key + nloc * max_nbor_size + row * max_nbor_size; int * row_nlist = nlist + row * nlist_size; From 297fdc2a0489b1a5a34a153160e676bd0f7ee572 Mon Sep 17 00:00:00 2001 From: baohan <44225751+BaoHhhhhan@users.noreply.github.com> Date: Fri, 9 Jul 2021 15:57:06 +0800 Subject: [PATCH 6/7] Update source/lib/src/rocm/prod_env_mat.hip.cu Co-authored-by: Denghui Lu --- source/lib/src/rocm/prod_env_mat.hip.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/lib/src/rocm/prod_env_mat.hip.cu b/source/lib/src/rocm/prod_env_mat.hip.cu index 68384b4767..9a3c79d988 100644 --- a/source/lib/src/rocm/prod_env_mat.hip.cu +++ b/source/lib/src/rocm/prod_env_mat.hip.cu @@ -494,7 +494,7 @@ void format_nbor_list_gpu_rocm( coord, type, gpu_inlist, nloc, rcut, i_idx); } - hipLaunchKernelGGL(fill_nei_iter, dim3((max_nbor_size + LEN - 1) / LEN, nloc) , LEN, 0, 0, + hipLaunchKernelGGL(fill_nei_iter, dim3(nloc, (max_nbor_size + LEN - 1) / LEN) , LEN, 0, 0, nei_iter, key, nloc, max_nbor_size, sec.size()); From fccfaadccd38c4449ab379bf5acd1773ac311c47 Mon Sep 17 00:00:00 2001 From: baohan <44225751+BaoHhhhhan@users.noreply.github.com> Date: Fri, 9 Jul 2021 15:57:15 +0800 Subject: [PATCH 7/7] Update source/lib/src/rocm/prod_env_mat.hip.cu Co-authored-by: Denghui Lu --- source/lib/src/rocm/prod_env_mat.hip.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/lib/src/rocm/prod_env_mat.hip.cu b/source/lib/src/rocm/prod_env_mat.hip.cu index 9a3c79d988..b1251c6a42 100644 --- a/source/lib/src/rocm/prod_env_mat.hip.cu +++ b/source/lib/src/rocm/prod_env_mat.hip.cu @@ -498,7 +498,7 @@ void format_nbor_list_gpu_rocm( nei_iter, key, nloc, max_nbor_size, sec.size()); - hipLaunchKernelGGL(format_nlist_fill_b, dim3((max_nbor_size + LEN - 1) / LEN, nloc), LEN, 0, 0, + hipLaunchKernelGGL(format_nlist_fill_b, dim3(nloc, (max_nbor_size + LEN - 1) / LEN), LEN, 0, 0, nlist, nnei, nloc, key, sec_dev, sec.size(), nei_iter, max_nbor_size); }