Skip to content
22 changes: 10 additions & 12 deletions source/api_cc/src/DeepPotPT.cc
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ void DeepPotPT::compute(ENERGYVTYPE& ener,
nlist_data.copy_from_nlist(lmp_list);
nlist_data.shuffle_exclude_empty(fwd_map);
nlist_data.padding();
if (do_message_passing == 1 && nghost > 0) {
if (do_message_passing == 1) {
int nswap = lmp_list.nswap;
torch::Tensor sendproc_tensor =
torch::from_blob(lmp_list.sendproc, {nswap}, int32_option);
Expand All @@ -180,10 +180,14 @@ void DeepPotPT::compute(ENERGYVTYPE& ener,
torch::from_blob(lmp_list.recvnum, {nswap}, int32_option);
torch::Tensor sendnum_tensor =
torch::from_blob(lmp_list.sendnum, {nswap}, int32_option);
torch::Tensor communicator_tensor = torch::from_blob(
const_cast<void*>(lmp_list.world), {1}, torch::kInt64);
// torch::Tensor communicator_tensor =
// torch::tensor(lmp_list.world, int32_option);
torch::Tensor communicator_tensor;
if (lmp_list.world == 0) {
communicator_tensor = torch::empty({1}, torch::kInt64);
} else {
communicator_tensor = torch::from_blob(
const_cast<void*>(lmp_list.world), {1}, torch::kInt64);
}

torch::Tensor nswap_tensor = torch::tensor(nswap, int32_option);
int total_send =
std::accumulate(lmp_list.sendnum, lmp_list.sendnum + nswap, 0);
Expand All @@ -196,12 +200,6 @@ void DeepPotPT::compute(ENERGYVTYPE& ener,
comm_dict.insert("recv_num", recvnum_tensor);
comm_dict.insert("communicator", communicator_tensor);
}
if (do_message_passing == 1 && nghost == 0) {
// for the situation that no ghost atoms (e.g. serial nopbc)
// set the mapping arange(nloc) is enough
auto option = torch::TensorOptions().device(device).dtype(torch::kInt64);
mapping_tensor = at::arange(nloc_real, option).unsqueeze(0);
}
}
at::Tensor firstneigh = createNlistTensor(nlist_data.jlist);
firstneigh_tensor = firstneigh.to(torch::kInt64).to(device);
Expand All @@ -224,7 +222,7 @@ void DeepPotPT::compute(ENERGYVTYPE& ener,
.to(device);
}
c10::Dict<c10::IValue, c10::IValue> outputs =
(do_message_passing == 1 && nghost > 0)
(do_message_passing == 1)
? module
.run_method("forward_lower", coord_wrapped_Tensor, atype_Tensor,
firstneigh_tensor, mapping_tensor, fparam_tensor,
Expand Down
8 changes: 0 additions & 8 deletions source/lmp/tests/test_lammps_dpa_pt_nopbc.py
Original file line number Diff line number Diff line change
Expand Up @@ -681,14 +681,6 @@ def test_pair_deepmd_si(lammps_si):
[(["--balance"],), ([],)],
)
def test_pair_deepmd_mpi(balance_args: list):
if balance_args == []:
# python:5331 terminated with signal 11 at PC=7f3e940e3806 SP=7ffd5787edc0. Backtrace:
# /home/runner/work/deepmd-kit/deepmd-kit/dp_test/lib/libdeepmd_op_pt.so(+0x95806)[0x7f3e940e3806]
# /home/runner/work/deepmd-kit/deepmd-kit/dp_test/lib/libdeepmd_op_pt.so(+0x8f76e)[0x7f3e940dd76e]
# /home/runner/work/deepmd-kit/deepmd-kit/dp_test/lib/libdeepmd_op_pt.so(+0x9a38a)[0x7f3e940e838a]
# /home/runner/work/deepmd-kit/deepmd-kit/dp_test/lib/libdeepmd_op_pt.so(_Z9border_opRKN2at6TensorES2_S2_S2_S2_S2_S2_S2_S2_+0x8e)[0x7f3e940dda63]
# /home/runner/work/deepmd-kit/deepmd-kit/dp_test/lib/libdeepmd_op_pt.so(+0xaeac3)[0x7f3e940fcac3]
pytest.skip(reason="Known segfault, see comments for details")
with tempfile.NamedTemporaryFile() as f:
sp.check_call(
[
Expand Down
63 changes: 37 additions & 26 deletions source/op/pt/comm.cc
Original file line number Diff line number Diff line change
Expand Up @@ -87,16 +87,18 @@ class Border : public torch::autograd::Function<Border> {
int mpi_init = 0;
MPI_Initialized(&mpi_init);
int cuda_aware = 1;
int me;
int me = 0;
MPI_Comm world;
int world_size = 0;
unpack_communicator(communicator_tensor, world);
MPI_Comm_rank(world, &me);
MPI_Comm_size(world, &world_size);
if (mpi_init) {
unpack_communicator(communicator_tensor, world);
MPI_Comm_rank(world, &me);
MPI_Comm_size(world, &world_size);
}
MPI_Datatype mpi_type = get_mpi_type<FPTYPE>();
MPI_Request request;
#if defined(GOOGLE_CUDA) || defined(TENSORFLOW_USE_ROCM)
if (world_size != 1) {
if (world_size >= 1) {
int version, subversion;
MPI_Get_version(&version, &subversion);
if (version >= 4) {
Expand All @@ -120,11 +122,15 @@ class Border : public torch::autograd::Function<Border> {
for (int iswap = 0; iswap < nswap; ++iswap) {
int nrecv = recvnum[iswap];
int nsend = sendnum[iswap];
torch::Tensor isendlist =
torch::from_blob(sendlist[iswap], {nsend}, int32_options)
.to(recv_g1_tensor.device());
torch::Tensor send_g1_tensor = recv_g1_tensor.index_select(0, isendlist);
FPTYPE* send_g1 = send_g1_tensor.data_ptr<FPTYPE>();
torch::Tensor isendlist;
torch::Tensor send_g1_tensor;
FPTYPE* send_g1;
if (nsend != 0) {
isendlist = torch::from_blob(sendlist[iswap], {nsend}, int32_options)
.to(recv_g1_tensor.device());
send_g1_tensor = recv_g1_tensor.index_select(0, isendlist);
send_g1 = send_g1_tensor.data_ptr<FPTYPE>();
}
#ifdef USE_MPI
if (sendproc[iswap] != me) {
if (nrecv) {
Expand Down Expand Up @@ -207,15 +213,17 @@ class Border : public torch::autograd::Function<Border> {
MPI_Initialized(&mpi_init);
int world_size = 0;
int cuda_aware = 1;
int me = 0;
MPI_Comm world;
unpack_communicator(communicator_tensor, world);
int me;
MPI_Comm_rank(world, &me);
MPI_Comm_size(world, &world_size);
if (mpi_init) {
unpack_communicator(communicator_tensor, world);
MPI_Comm_rank(world, &me);
MPI_Comm_size(world, &world_size);
}
MPI_Datatype mpi_type = get_mpi_type<FPTYPE>();
MPI_Request request;
#if defined(GOOGLE_CUDA) || defined(TENSORFLOW_USE_ROCM)
if (world_size != 1) {
if (world_size >= 1) {
int version, subversion;
MPI_Get_version(&version, &subversion);
if (version >= 4) {
Expand Down Expand Up @@ -248,17 +256,20 @@ class Border : public torch::autograd::Function<Border> {
int nlocal = nlocal_tensor.item<int>();
int nghost = nghost_tensor.item<int>();
int ntotal = nlocal + nghost;

torch::Tensor send_g1_tensor = d_local_g1_tensor;

int max_recvnum = sendnum_tensor.max().item<int>();
auto options = torch::TensorOptions()
.dtype(d_local_g1_tensor.dtype())
.device(d_local_g1_tensor.device());
torch::Tensor recv_g1_tensor =
torch::empty({max_recvnum, tensor_size}, options);
FPTYPE* recv_g1 = recv_g1_tensor.data_ptr<FPTYPE>();
FPTYPE* send_g1 = send_g1_tensor.data_ptr<FPTYPE>() + ntotal * tensor_size;
torch::Tensor send_g1_tensor;
torch::Tensor recv_g1_tensor;
FPTYPE* recv_g1;
FPTYPE* send_g1;
if (nswap != 0) {
send_g1_tensor = d_local_g1_tensor;
int max_recvnum = sendnum_tensor.max().item<int>();
auto options = torch::TensorOptions()
.dtype(d_local_g1_tensor.dtype())
.device(d_local_g1_tensor.device());
recv_g1_tensor = torch::empty({max_recvnum, tensor_size}, options);
recv_g1 = recv_g1_tensor.data_ptr<FPTYPE>();
send_g1 = send_g1_tensor.data_ptr<FPTYPE>() + ntotal * tensor_size;
}

int end = ntotal;
auto int32_options = torch::TensorOptions().dtype(torch::kInt32);
Expand Down