From 70cb2263c40fcd1e0a9c9a72c24f1ec831c0650e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9D=8E=E6=B3=BD=E5=AE=87?= Date: Wed, 30 Jun 2021 02:48:02 +0800 Subject: [PATCH 1/3] Synchronize CUDA _r modifications to ROCM --- source/lib/src/rocm/prod_force.hip.cu | 10 +++++----- source/lib/src/rocm/prod_virial.hip.cu | 10 +++++----- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/source/lib/src/rocm/prod_force.hip.cu b/source/lib/src/rocm/prod_force.hip.cu index 9a0b07e282..48b12dfa50 100644 --- a/source/lib/src/rocm/prod_force.hip.cu +++ b/source/lib/src/rocm/prod_force.hip.cu @@ -80,11 +80,11 @@ __global__ void force_deriv_wrt_neighbors_r( const int nnei) { // idy -> nnei - const unsigned int idx = blockIdx.x * blockDim.x + threadIdx.x; - const unsigned int idy = blockIdx.y; + const unsigned int idx = blockIdx.x; + const unsigned int idy = blockIdx.y * blockDim.x + threadIdx.x; const unsigned int idz = threadIdx.y; const int ndescrpt = nnei * 1; - if (idx >= nloc) { + if (idy >= nnei) { return; } // deriv wrt neighbors @@ -146,8 +146,8 @@ namespace deepmd { net_deriv, in_deriv, ndescrpt); const int LEN = 64; - const int nblock = (nloc + LEN -1) / LEN; - dim3 block_grid(nblock, nnei); + const int nblock = (nnei + LEN -1) / LEN; + dim3 block_grid(nloc, nblock); dim3 thread_grid(LEN, 3); hipLaunchKernelGGL(force_deriv_wrt_neighbors_r, block_grid, thread_grid, 0, 0, force, diff --git a/source/lib/src/rocm/prod_virial.hip.cu b/source/lib/src/rocm/prod_virial.hip.cu index d6ef5546e1..ff8017a687 100644 --- a/source/lib/src/rocm/prod_virial.hip.cu +++ b/source/lib/src/rocm/prod_virial.hip.cu @@ -80,12 +80,12 @@ __global__ void virial_deriv_wrt_neighbors_r( // idz = dd0 * 3 + dd1 // dd0 = idz / 3 // dd1 = idz % 3 - const unsigned int idx = blockIdx.x * blockDim.x + threadIdx.x; - const unsigned int idy = blockIdx.y; + const unsigned int idx = blockIdx.x; + const unsigned int idy = blockIdx.y * blockDim.x + threadIdx.x; const unsigned int idz = threadIdx.y; const int ndescrpt = nnei * 1; - if (idx >= nloc) { + if (idy >= nnei) { return; } int j_idx = nlist[idx * nnei + idy]; @@ -154,8 +154,8 @@ void prod_virial_r_gpu_rocm( 0.0, sizeof(FPTYPE) * 9 * nall)); const int LEN = 16; - int nblock = (nloc + LEN -1) / LEN; - dim3 block_grid(nblock, nnei); + int nblock = (nnei + LEN -1) / LEN; + dim3 block_grid(nloc, nblock); dim3 thread_grid(LEN, 9); // compute virial of a frame hipLaunchKernelGGL(virial_deriv_wrt_neighbors_r, block_grid, thread_grid, 0, 0, From 8588bbb106c98ccd3aa88cf354a8f316c9867405 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9D=8E=E6=B3=BD=E5=AE=87?= Date: Mon, 5 Jul 2021 16:36:09 +0800 Subject: [PATCH 2/3] fix bug 824 and Synchronize updates to CUDA code bug 824 Fixed it in ROCM because of a bug caused by an array going out of bounds --- source/lib/src/rocm/coord.hip.cu | 4 ++-- source/lib/src/rocm/gelu.hip.cu | 12 ++++++++++++ source/lib/src/rocm/prod_env_mat.hip.cu | 3 +-- source/lib/src/rocm/tabulate.hip.cu | 2 ++ 4 files changed, 17 insertions(+), 4 deletions(-) diff --git a/source/lib/src/rocm/coord.hip.cu b/source/lib/src/rocm/coord.hip.cu index b5516d3603..12c797ab3e 100644 --- a/source/lib/src/rocm/coord.hip.cu +++ b/source/lib/src/rocm/coord.hip.cu @@ -66,8 +66,8 @@ __global__ void normalize_one( FPTYPE inter[3]; phys2Inter(inter,out_c+idy*3,rec_boxt); for (int dd = 0; dd < 3; ++dd) { - while(inter[dd] >= 1.) inter[dd] -= 1.; - while(inter[dd] < 0.) inter[dd] += 1.; + inter[dd]=(FPTYPE)fmod((double)inter[dd], 1.); + if (inter[dd] < 0.) inter[dd] += 1.; } inter2Phys(out_c+idy*3,inter,boxt); } diff --git a/source/lib/src/rocm/gelu.hip.cu b/source/lib/src/rocm/gelu.hip.cu index d8f02ae9fc..83e7a3be6d 100644 --- a/source/lib/src/rocm/gelu.hip.cu +++ b/source/lib/src/rocm/gelu.hip.cu @@ -56,6 +56,10 @@ namespace deepmd { const FPTYPE * xx, const int size) { + if(size <= 0) + { + return ; + } const int THREAD_ITEMS = 1024; const int BLOCK_NUMS = (size + THREAD_ITEMS - 1) / THREAD_ITEMS; @@ -69,6 +73,10 @@ namespace deepmd { const FPTYPE * dy, const int size) { + if(size <= 0) + { + return; + } const int THREAD_ITEMS = 1024; const int BLOCK_NUMS = (size + THREAD_ITEMS - 1) / THREAD_ITEMS; @@ -83,6 +91,10 @@ namespace deepmd { const FPTYPE * dy_2, const int size) { + if(size <= 0) + { + return; + } const int THREAD_ITEMS = 1024; const int BLOCK_NUMS = (size + THREAD_ITEMS - 1) / THREAD_ITEMS; diff --git a/source/lib/src/rocm/prod_env_mat.hip.cu b/source/lib/src/rocm/prod_env_mat.hip.cu index 3d1e66c006..ecca85600b 100644 --- a/source/lib/src/rocm/prod_env_mat.hip.cu +++ b/source/lib/src/rocm/prod_env_mat.hip.cu @@ -504,7 +504,7 @@ void prod_env_mat_a_gpu_rocm( const int ndescrpt = nnei * 4; hipErrcheck(hipMemset(em, 0.0, sizeof(FPTYPE) * nloc * ndescrpt)); hipErrcheck(hipMemset(em_deriv, 0.0, sizeof(FPTYPE) * nloc * ndescrpt * 3)); - hipErrcheck(hipMemset(rij, 0., sizeof(FPTYPE) * nloc * nnei * 3)); + hipErrcheck(hipMemset(rij, 0.0, sizeof(FPTYPE) * nloc * nnei * 3)); format_nbor_list_gpu_rocm( nlist, @@ -541,7 +541,6 @@ void prod_env_mat_r_gpu_rocm( const int ndescrpt = nnei * 1; hipErrcheck(hipMemset(em, 0.0, sizeof(FPTYPE) * nloc * ndescrpt)); hipErrcheck(hipMemset(em_deriv, 0.0, sizeof(FPTYPE) * nloc * ndescrpt * 3)); - hipErrcheck(hipMemset(rij, 0., sizeof(FPTYPE) * nloc * nnei * 3)); format_nbor_list_gpu_rocm( nlist, diff --git a/source/lib/src/rocm/tabulate.hip.cu b/source/lib/src/rocm/tabulate.hip.cu index 80aec8af8f..b4097083b0 100644 --- a/source/lib/src/rocm/tabulate.hip.cu +++ b/source/lib/src/rocm/tabulate.hip.cu @@ -209,6 +209,7 @@ template const int nnei, const int last_layer_size) { + if(nloc <= 0){return;} hipLaunchKernelGGL(HIP_KERNEL_NAME(tabulate_fusion_fifth_order_polynomial), nloc, last_layer_size, sizeof(FPTYPE) * MM * last_layer_size, 0, out, table, em_x, em, table_info[0], table_info[1], table_info[2], table_info[3], table_info[4], nnei, last_layer_size); @@ -227,6 +228,7 @@ template const int nnei, const int last_layer_size) { + if( nloc<=0 ) { return;} hipErrcheck(hipMemset( dy_dem_x, 0.0, sizeof(FPTYPE) * nloc * nnei)); From 1f0cb9a03db65fe6c5e542f6d30566001b407142 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9D=8E=E6=B3=BD=E5=AE=87?= Date: Mon, 5 Jul 2021 16:54:06 +0800 Subject: [PATCH 3/3] Update prod_env_mat.hip.cu --- source/lib/src/rocm/prod_env_mat.hip.cu | 1 + 1 file changed, 1 insertion(+) diff --git a/source/lib/src/rocm/prod_env_mat.hip.cu b/source/lib/src/rocm/prod_env_mat.hip.cu index ecca85600b..af1f8022f7 100644 --- a/source/lib/src/rocm/prod_env_mat.hip.cu +++ b/source/lib/src/rocm/prod_env_mat.hip.cu @@ -541,6 +541,7 @@ void prod_env_mat_r_gpu_rocm( const int ndescrpt = nnei * 1; hipErrcheck(hipMemset(em, 0.0, sizeof(FPTYPE) * nloc * ndescrpt)); hipErrcheck(hipMemset(em_deriv, 0.0, sizeof(FPTYPE) * nloc * ndescrpt * 3)); + hipErrcheck(hipMemset(rij, 0.0, sizeof(FPTYPE) * nloc * nnei * 3)); format_nbor_list_gpu_rocm( nlist,