Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion source/api_cc/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ configure_file(
set(opname "deepmd_op")
set(OP_BASE_DIR ${CMAKE_SOURCE_DIR}/../../op)
# file(GLOB OP_SRC ${OP_BASE_DIR}/*.cc)
file(GLOB OP_SRC ${OP_BASE_DIR}/prod_force.cc ${OP_BASE_DIR}/prod_virial.cc ${OP_BASE_DIR}/descrpt.cc ${OP_BASE_DIR}/descrpt_se_a_ef.cc ${OP_BASE_DIR}/descrpt_se_a_ef.cc ${OP_BASE_DIR}/descrpt_se_a_ef_para.cc ${OP_BASE_DIR}/descrpt_se_a_ef_vert.cc ${OP_BASE_DIR}/pair_tab.cc ${OP_BASE_DIR}/prod_force_multi_device.cc ${OP_BASE_DIR}/prod_virial_multi_device.cc ${OP_BASE_DIR}/soft_min.cc ${OP_BASE_DIR}/soft_min_force.cc ${OP_BASE_DIR}/soft_min_virial.cc ${OP_BASE_DIR}/ewald_recp.cc ${OP_BASE_DIR}/gelu_multi_device.cc ${OP_BASE_DIR}/map_aparam.cc ${OP_BASE_DIR}/neighbor_stat.cc ${OP_BASE_DIR}/unaggregated_grad.cc ${OP_BASE_DIR}/tabulate_multi_device.cc ${OP_BASE_DIR}/prod_env_mat_multi_device.cc)
file(GLOB OP_SRC ${OP_BASE_DIR}/custom_op.cc ${OP_BASE_DIR}/prod_force.cc ${OP_BASE_DIR}/prod_virial.cc ${OP_BASE_DIR}/descrpt.cc ${OP_BASE_DIR}/descrpt_se_a_ef.cc ${OP_BASE_DIR}/descrpt_se_a_ef.cc ${OP_BASE_DIR}/descrpt_se_a_ef_para.cc ${OP_BASE_DIR}/descrpt_se_a_ef_vert.cc ${OP_BASE_DIR}/pair_tab.cc ${OP_BASE_DIR}/prod_force_multi_device.cc ${OP_BASE_DIR}/prod_virial_multi_device.cc ${OP_BASE_DIR}/soft_min.cc ${OP_BASE_DIR}/soft_min_force.cc ${OP_BASE_DIR}/soft_min_virial.cc ${OP_BASE_DIR}/ewald_recp.cc ${OP_BASE_DIR}/gelu_multi_device.cc ${OP_BASE_DIR}/map_aparam.cc ${OP_BASE_DIR}/neighbor_stat.cc ${OP_BASE_DIR}/unaggregated_grad.cc ${OP_BASE_DIR}/tabulate_multi_device.cc ${OP_BASE_DIR}/prod_env_mat_multi_device.cc)
add_library(${opname} SHARED ${OP_SRC})

list (APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/../../cmake/)
Expand Down
3 changes: 2 additions & 1 deletion source/lib/include/SimulationRegion_Impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include <limits>
#include <typeinfo>
#include <stdexcept>
#include "errors.h"

// using namespace std;

Expand Down Expand Up @@ -502,7 +503,7 @@ computeVolume()
boxt[0*3+2] * (boxt[1*3+0]*boxt[2*3+1] - boxt[2*3+0]*boxt[1*3+1]);
volumei = static_cast<double>(1.)/volume;
if (volume < 0) {
throw std::runtime_error("Negative volume detected. Please make sure the simulation cell obeys the right-hand rule.");
throw deepmd::deepmd_exception("Negative volume detected. Please make sure the simulation cell obeys the right-hand rule.");
}
}

Expand Down
20 changes: 20 additions & 0 deletions source/lib/include/errors.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#pragma once

#include <stdexcept>
#include <string>

namespace deepmd{
struct
deepmd_exception: public std::runtime_error {
public:
deepmd_exception(): runtime_error("DeePMD-kit Error!") {};
deepmd_exception(const std::string& msg): runtime_error(std::string("DeePMD-kit Error: ") + msg) {};
};

struct
deepmd_exception_oom: public std::runtime_error{
public:
deepmd_exception_oom(): runtime_error("DeePMD-kit OOM!") {};
deepmd_exception_oom(const std::string& msg): runtime_error(std::string("DeePMD-kit OOM: ") + msg) {};
};
};
9 changes: 5 additions & 4 deletions source/lib/include/gpu_cuda.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#include <stdio.h>
#include <assert.h>
#include <cuda_runtime.h>
#include "errors.h"

#define GPU_MAX_NBOR_SIZE 4096
#define DPErrcheck(res) {DPAssert((res), __FILE__, __LINE__);}
Expand All @@ -12,7 +13,6 @@ inline void DPAssert(cudaError_t code, const char *file, int line, bool abort=tr
fprintf(stderr,"cuda assert: %s %s %d\n", cudaGetErrorString(code), file, line);
if (code == 2) {
// out of memory
// TODO: I have no idea how to thorw errors back to Python interface
fprintf(stderr, "Your memory is not enough, thus an error has been raised " \
"above. You need to take the following actions:\n" \
"1. Check if the network size of the model is too large.\n" \
Expand All @@ -22,8 +22,9 @@ inline void DPAssert(cudaError_t code, const char *file, int line, bool abort=tr
"4. Check if another program is using the same GPU by execuating `nvidia-smi`. " \
"The usage of GPUs is controlled by `CUDA_VISIBLE_DEVICES` " \
"environment variable.\n");
if (abort) throw deepmd::deepmd_exception_oom("CUDA Assert");
}
if (abort) exit(code);
if (abort) throw deepmd::deepmd_exception("CUDA Assert");
}
}

Expand All @@ -34,7 +35,6 @@ inline void nborAssert(cudaError_t code, const char *file, int line, bool abort=
fprintf(stderr,"cuda assert: %s %s %d\n", "DeePMD-kit:\tillegal nbor list sorting", file, line);
if (code == 2) {
// out of memory
// TODO: I have no idea how to thorw errors back to Python interface
fprintf(stderr, "Your memory is not enough, thus an error has been raised " \
"above. You need to take the following actions:\n" \
"1. Check if the network size of the model is too large.\n" \
Expand All @@ -44,8 +44,9 @@ inline void nborAssert(cudaError_t code, const char *file, int line, bool abort=
"4. Check if another program is using the same GPU by execuating `nvidia-smi`. " \
"The usage of GPUs is controlled by `CUDA_VISIBLE_DEVICES` " \
"environment variable.\n");
if (abort) throw deepmd::deepmd_exception_oom("CUDA Assert");
}
if (abort) exit(code);
if (abort) throw deepmd::deepmd_exception("CUDA Assert");
}
}

Expand Down
5 changes: 3 additions & 2 deletions source/lib/include/gpu_rocm.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,22 +5,23 @@
#include<hip/hip_runtime.h>
//#include<rocprim/rocprim.hpp>
//#include <hipcub/hipcub.hpp>
#include "errors.h"

#define GPU_MAX_NBOR_SIZE 4096

#define DPErrcheck(res) { DPAssert((res), __FILE__, __LINE__); }
inline void DPAssert(hipError_t code, const char *file, int line, bool abort=true) {
if (code != hipSuccess) {
fprintf(stderr,"hip assert: %s %s %d\n", hipGetErrorString(code), file, line);
if (abort) exit(code);
if (abort) throw deepmd::deepmd_exception("CUDA Assert");
}
}

#define nborErrcheck(res) {nborAssert((res), __FILE__, __LINE__);}
inline void nborAssert(hipError_t code, const char *file, int line, bool abort=true) {
if (code != hipSuccess) {
fprintf(stderr,"hip assert: %s %s %d\n", "DeePMD-kit:\tillegal nbor list sorting", file, line);
if (abort) exit(code);
if (abort) throw deepmd::deepmd_exception("CUDA Assert");
}
}

Expand Down
3 changes: 2 additions & 1 deletion source/lib/src/fmt_nlist.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include "fmt_nlist.h"
#include "SimulationRegion.h"
#include <iostream>
#include "errors.h"

using namespace deepmd;

Expand Down Expand Up @@ -185,7 +186,7 @@ format_nlist_cpu (
<< fmt_ilist.size()
<< " which does not match "
<< nnei << std::endl;
exit(1);
throw deepmd::deepmd_exception();
}
std::copy(fmt_ilist.begin(), fmt_ilist.end(), cur_nlist);
}
Expand Down
3 changes: 2 additions & 1 deletion source/lib/src/pair_tab.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#include <cassert>
#include <vector>
#include "pair_tab.h"
#include "errors.h"

inline
void _pair_tabulated_inter (
Expand All @@ -25,7 +26,7 @@ void _pair_tabulated_inter (
// std::cout << rr << " " << rmin << " " << hh << " " << uu << std::endl;
if (uu < 0) {
std::cerr << "coord go beyond table lower boundary" << std::endl;
exit(1);
throw deepmd::deepmd_exception();
}
int idx = uu;
if (idx >= nspline) {
Expand Down
3 changes: 2 additions & 1 deletion source/lib/src/prod_force.cc
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#include <stdexcept>
#include <cstring>
#include "prod_force.h"
#include "errors.h"

inline void
make_index_range (
Expand All @@ -14,7 +15,7 @@ make_index_range (
idx_end = nei_idx * 4 + 4;
}
else {
throw std::runtime_error("should no reach here");
throw deepmd::deepmd_exception("should no reach here");
}
}

Expand Down
3 changes: 2 additions & 1 deletion source/lib/src/prod_force_grad.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#include <stdexcept>
#include <cstring>
#include "prod_force_grad.h"
#include "errors.h"

inline void
make_index_range (
Expand All @@ -15,7 +16,7 @@ make_index_range (
idx_end = nei_idx * 4 + 4;
}
else {
throw std::runtime_error("should no reach here");
throw deepmd::deepmd_exception("should no reach here");
}
}

Expand Down
3 changes: 2 additions & 1 deletion source/lib/src/prod_virial.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#include <stdexcept>
#include <cstring>
#include "prod_virial.h"
#include "errors.h"

inline void
make_index_range (
Expand All @@ -15,7 +16,7 @@ make_index_range (
idx_end = nei_idx * 4 + 4;
}
else {
throw std::runtime_error("should no reach here");
throw deepmd::deepmd_exception("should no reach here");
}
}

Expand Down
3 changes: 2 additions & 1 deletion source/lib/src/prod_virial_grad.cc
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#include <stdexcept>
#include <cstring>
#include "prod_virial_grad.h"
#include "errors.h"

inline void
make_index_range (
Expand All @@ -14,7 +15,7 @@ make_index_range (
idx_end = nei_idx * 4 + 4;
}
else {
throw std::runtime_error("should no reach here");
throw deepmd::deepmd_exception("should no reach here");
}
}

Expand Down
3 changes: 2 additions & 1 deletion source/lib/src/region.cc
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#include <stdexcept>
#include <algorithm>
#include "region.h"
#include "errors.h"
#define BOXT_DIM 9

using namespace deepmd;
Expand Down Expand Up @@ -33,7 +34,7 @@ compute_volume(const FPTYPE * boxt)
boxt[0*3+1] * (boxt[1*3+0]*boxt[2*3+2] - boxt[2*3+0]*boxt[1*3+2]) +
boxt[0*3+2] * (boxt[1*3+0]*boxt[2*3+1] - boxt[2*3+0]*boxt[1*3+1]);
if (volume < 0) {
throw std::runtime_error("Negative volume detected. Please make sure the simulation cell obeys the right-hand rule.");
throw deepmd::deepmd_exception("Negative volume detected. Please make sure the simulation cell obeys the right-hand rule.");
}
return volume;
}
Expand Down
8 changes: 4 additions & 4 deletions source/op/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@
set(OP_LIB ${PROJECT_SOURCE_DIR}/lib/src/SimulationRegion.cpp ${PROJECT_SOURCE_DIR}/lib/src/neighbor_list.cc)

set (OP_CXX_FLAG -D_GLIBCXX_USE_CXX11_ABI=${OP_CXX_ABI} )
file(GLOB OP_SRC prod_force.cc prod_virial.cc descrpt.cc descrpt_se_a_ef.cc descrpt_se_a_ef.cc descrpt_se_a_ef_para.cc descrpt_se_a_ef_vert.cc pair_tab.cc prod_force_multi_device.cc prod_virial_multi_device.cc soft_min.cc soft_min_force.cc soft_min_virial.cc ewald_recp.cc gelu_multi_device.cc map_aparam.cc neighbor_stat.cc unaggregated_grad.cc tabulate_multi_device.cc prod_env_mat_multi_device.cc)
file(GLOB OP_CUDA_SRC prod_force.cc prod_virial.cc descrpt.cc prod_env_mat_multi_device.cc pair_tab.cc prod_force_multi_device.cc prod_virial_multi_device.cc soft_min.cc soft_min_force.cc soft_min_virial.cc gelu_multi_device.cc tabulate_multi_device.cc)
file(GLOB OP_ROCM_SRC prod_force.cc prod_virial.cc descrpt.cc prod_env_mat_multi_device.cc pair_tab.cc prod_force_multi_device.cc prod_virial_multi_device.cc soft_min.cc soft_min_force.cc soft_min_virial.cc gelu_multi_device.cc tabulate_multi_device.cc)
file(GLOB OP_GRADS_SRC prod_force_grad.cc prod_force_grad_multi_device.cc prod_virial_grad.cc prod_virial_grad_multi_device.cc soft_min_force_grad.cc soft_min_virial_grad.cc )
file(GLOB OP_SRC custom_op.cc prod_force.cc prod_virial.cc descrpt.cc descrpt_se_a_ef.cc descrpt_se_a_ef.cc descrpt_se_a_ef_para.cc descrpt_se_a_ef_vert.cc pair_tab.cc prod_force_multi_device.cc prod_virial_multi_device.cc soft_min.cc soft_min_force.cc soft_min_virial.cc ewald_recp.cc gelu_multi_device.cc map_aparam.cc neighbor_stat.cc unaggregated_grad.cc tabulate_multi_device.cc prod_env_mat_multi_device.cc)
file(GLOB OP_CUDA_SRC custom_op.cc prod_force.cc prod_virial.cc descrpt.cc prod_env_mat_multi_device.cc pair_tab.cc prod_force_multi_device.cc prod_virial_multi_device.cc soft_min.cc soft_min_force.cc soft_min_virial.cc gelu_multi_device.cc tabulate_multi_device.cc)
file(GLOB OP_ROCM_SRC custom_op.cc prod_force.cc prod_virial.cc descrpt.cc prod_env_mat_multi_device.cc pair_tab.cc prod_force_multi_device.cc prod_virial_multi_device.cc soft_min.cc soft_min_force.cc soft_min_virial.cc gelu_multi_device.cc tabulate_multi_device.cc)
file(GLOB OP_GRADS_SRC custom_op.cc prod_force_grad.cc prod_force_grad_multi_device.cc prod_virial_grad.cc prod_virial_grad_multi_device.cc soft_min_force_grad.cc soft_min_virial_grad.cc )
file(GLOB OP_PY *.py)

if (BUILD_CPP_IF)
Expand Down
20 changes: 20 additions & 0 deletions source/op/custom_op.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#include "custom_op.h"
#include "errors.h"

namespace deepmd {
void safe_compute(OpKernelContext* context, std::function<void(OpKernelContext*)> ff) {
try{
ff(context);
} catch (deepmd::deepmd_exception_oom& e){
OP_REQUIRES_OK(
context,
errors::ResourceExhausted("Operation received an exception: ", e.what(),
", in file ",__FILE__, ":", __LINE__));
} catch (deepmd::deepmd_exception& e) {
OP_REQUIRES_OK(
context,
errors::Internal("Operation received an exception: ", e.what(),
", in file ",__FILE__, ":", __LINE__));
}
}
};
4 changes: 4 additions & 0 deletions source/op/custom_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,4 +26,8 @@ struct DeviceFunctor {
device = "GPU";
}
#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
};

namespace deepmd {
void safe_compute(OpKernelContext* context, std::function<void(OpKernelContext*)> ff);
};
9 changes: 7 additions & 2 deletions source/op/descrpt.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#include "ComputeDescriptor.h"
#include "neighbor_list.h"
#include "fmt_nlist.h"
#include "errors.h"

typedef double boxtensor_t ;
typedef double compute_t;
Expand Down Expand Up @@ -49,6 +50,10 @@ class DescrptOp : public OpKernel {
}

void Compute(OpKernelContext* context) override {
deepmd::safe_compute(context, [this](OpKernelContext* context) {this->_Compute(context);});
}

void _Compute(OpKernelContext* context) {
// Grab the input tensor
const Tensor& coord_tensor = context->input(0);
const Tensor& type_tensor = context->input(1);
Expand Down Expand Up @@ -105,7 +110,7 @@ class DescrptOp : public OpKernel {
nei_mode = -1;
}
else {
throw std::runtime_error("invalid mesh tensor");
throw deepmd::deepmd_exception("invalid mesh tensor");
}
bool b_pbc = true;
// if region is given extended, do not use pbc
Expand Down Expand Up @@ -254,7 +259,7 @@ class DescrptOp : public OpKernel {
::build_nlist (d_nlist_a, d_nlist_r, d_coord3, rcut_a, rcut_r, NULL);
}
else {
throw std::runtime_error("unknow neighbor mode");
throw deepmd::deepmd_exception("unknow neighbor mode");
}

// loop over atoms, compute descriptors for each atom
Expand Down
9 changes: 7 additions & 2 deletions source/op/descrpt_se_a_ef.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#include "ComputeDescriptor.h"
#include "neighbor_list.h"
#include "fmt_nlist.h"
#include "errors.h"

typedef double boxtensor_t ;
typedef double compute_t;
Expand Down Expand Up @@ -49,6 +50,10 @@ class DescrptSeAEfOp : public OpKernel {
}

void Compute(OpKernelContext* context) override {
deepmd::safe_compute(context, [this](OpKernelContext* context) {this->_Compute(context);});
}

void _Compute(OpKernelContext* context) {
// Grab the input tensor
int context_input_index = 0;
const Tensor& coord_tensor = context->input(context_input_index++);
Expand Down Expand Up @@ -112,7 +117,7 @@ class DescrptSeAEfOp : public OpKernel {
nei_mode = -1;
}
else {
throw std::runtime_error("invalid mesh tensor");
throw deepmd::deepmd_exception("invalid mesh tensor");
}
bool b_pbc = true;
// if region is given extended, do not use pbc
Expand Down Expand Up @@ -267,7 +272,7 @@ class DescrptSeAEfOp : public OpKernel {
::build_nlist (d_nlist_a, d_nlist_r, d_coord3, rcut_a, rcut_r, NULL);
}
else {
throw std::runtime_error("unknow neighbor mode");
throw deepmd::deepmd_exception("unknow neighbor mode");
}

// loop over atoms, compute descriptors for each atom
Expand Down
9 changes: 7 additions & 2 deletions source/op/descrpt_se_a_ef_para.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#include "ComputeDescriptor.h"
#include "neighbor_list.h"
#include "fmt_nlist.h"
#include "errors.h"

typedef double boxtensor_t ;
typedef double compute_t;
Expand Down Expand Up @@ -48,6 +49,10 @@ class DescrptSeAEfParaOp : public OpKernel {
}

void Compute(OpKernelContext* context) override {
deepmd::safe_compute(context, [this](OpKernelContext* context) {this->_Compute(context);});
}

void _Compute(OpKernelContext* context) {
// Grab the input tensor
int context_input_index = 0;
const Tensor& coord_tensor = context->input(context_input_index++);
Expand Down Expand Up @@ -111,7 +116,7 @@ class DescrptSeAEfParaOp : public OpKernel {
nei_mode = -1;
}
else {
throw std::runtime_error("invalid mesh tensor");
throw deepmd::deepmd_exception("invalid mesh tensor");
}
bool b_pbc = true;
// if region is given extended, do not use pbc
Expand Down Expand Up @@ -266,7 +271,7 @@ class DescrptSeAEfParaOp : public OpKernel {
::build_nlist (d_nlist_a, d_nlist_r, d_coord3, rcut_a, rcut_r, NULL);
}
else {
throw std::runtime_error("unknow neighbor mode");
throw deepmd::deepmd_exception("unknow neighbor mode");
}

// loop over atoms, compute descriptors for each atom
Expand Down
Loading