diff --git a/source/api_cc/tests/CMakeLists.txt b/source/api_cc/tests/CMakeLists.txt index 6768ff2ee6..1a5b56fca0 100644 --- a/source/api_cc/tests/CMakeLists.txt +++ b/source/api_cc/tests/CMakeLists.txt @@ -37,7 +37,7 @@ configure_file( set(opname "deepmd_op") set(OP_BASE_DIR ${CMAKE_SOURCE_DIR}/../../op) # file(GLOB OP_SRC ${OP_BASE_DIR}/*.cc) -file(GLOB OP_SRC ${OP_BASE_DIR}/prod_force.cc ${OP_BASE_DIR}/prod_virial.cc ${OP_BASE_DIR}/descrpt.cc ${OP_BASE_DIR}/descrpt_se_a_ef.cc ${OP_BASE_DIR}/descrpt_se_a_ef.cc ${OP_BASE_DIR}/descrpt_se_a_ef_para.cc ${OP_BASE_DIR}/descrpt_se_a_ef_vert.cc ${OP_BASE_DIR}/pair_tab.cc ${OP_BASE_DIR}/prod_force_multi_device.cc ${OP_BASE_DIR}/prod_virial_multi_device.cc ${OP_BASE_DIR}/soft_min.cc ${OP_BASE_DIR}/soft_min_force.cc ${OP_BASE_DIR}/soft_min_virial.cc ${OP_BASE_DIR}/ewald_recp.cc ${OP_BASE_DIR}/gelu_multi_device.cc ${OP_BASE_DIR}/map_aparam.cc ${OP_BASE_DIR}/neighbor_stat.cc ${OP_BASE_DIR}/unaggregated_grad.cc ${OP_BASE_DIR}/tabulate_multi_device.cc ${OP_BASE_DIR}/prod_env_mat_multi_device.cc) +file(GLOB OP_SRC ${OP_BASE_DIR}/custom_op.cc ${OP_BASE_DIR}/prod_force.cc ${OP_BASE_DIR}/prod_virial.cc ${OP_BASE_DIR}/descrpt.cc ${OP_BASE_DIR}/descrpt_se_a_ef.cc ${OP_BASE_DIR}/descrpt_se_a_ef.cc ${OP_BASE_DIR}/descrpt_se_a_ef_para.cc ${OP_BASE_DIR}/descrpt_se_a_ef_vert.cc ${OP_BASE_DIR}/pair_tab.cc ${OP_BASE_DIR}/prod_force_multi_device.cc ${OP_BASE_DIR}/prod_virial_multi_device.cc ${OP_BASE_DIR}/soft_min.cc ${OP_BASE_DIR}/soft_min_force.cc ${OP_BASE_DIR}/soft_min_virial.cc ${OP_BASE_DIR}/ewald_recp.cc ${OP_BASE_DIR}/gelu_multi_device.cc ${OP_BASE_DIR}/map_aparam.cc ${OP_BASE_DIR}/neighbor_stat.cc ${OP_BASE_DIR}/unaggregated_grad.cc ${OP_BASE_DIR}/tabulate_multi_device.cc ${OP_BASE_DIR}/prod_env_mat_multi_device.cc) add_library(${opname} SHARED ${OP_SRC}) list (APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/../../cmake/) diff --git a/source/lib/include/SimulationRegion_Impl.h b/source/lib/include/SimulationRegion_Impl.h index 5b7b8248fd..528402b7d6 100644 --- a/source/lib/include/SimulationRegion_Impl.h +++ b/source/lib/include/SimulationRegion_Impl.h @@ -6,6 +6,7 @@ #include #include #include +#include "errors.h" // using namespace std; @@ -502,7 +503,7 @@ computeVolume() boxt[0*3+2] * (boxt[1*3+0]*boxt[2*3+1] - boxt[2*3+0]*boxt[1*3+1]); volumei = static_cast(1.)/volume; if (volume < 0) { - throw std::runtime_error("Negative volume detected. Please make sure the simulation cell obeys the right-hand rule."); + throw deepmd::deepmd_exception("Negative volume detected. Please make sure the simulation cell obeys the right-hand rule."); } } diff --git a/source/lib/include/errors.h b/source/lib/include/errors.h new file mode 100644 index 0000000000..fe0a21fc50 --- /dev/null +++ b/source/lib/include/errors.h @@ -0,0 +1,20 @@ +#pragma once + +#include +#include + +namespace deepmd{ + struct + deepmd_exception: public std::runtime_error { + public: + deepmd_exception(): runtime_error("DeePMD-kit Error!") {}; + deepmd_exception(const std::string& msg): runtime_error(std::string("DeePMD-kit Error: ") + msg) {}; + }; + + struct + deepmd_exception_oom: public std::runtime_error{ + public: + deepmd_exception_oom(): runtime_error("DeePMD-kit OOM!") {}; + deepmd_exception_oom(const std::string& msg): runtime_error(std::string("DeePMD-kit OOM: ") + msg) {}; + }; +}; \ No newline at end of file diff --git a/source/lib/include/gpu_cuda.h b/source/lib/include/gpu_cuda.h index cd82ee4657..8a2b617c95 100644 --- a/source/lib/include/gpu_cuda.h +++ b/source/lib/include/gpu_cuda.h @@ -3,6 +3,7 @@ #include #include #include +#include "errors.h" #define GPU_MAX_NBOR_SIZE 4096 #define DPErrcheck(res) {DPAssert((res), __FILE__, __LINE__);} @@ -12,7 +13,6 @@ inline void DPAssert(cudaError_t code, const char *file, int line, bool abort=tr fprintf(stderr,"cuda assert: %s %s %d\n", cudaGetErrorString(code), file, line); if (code == 2) { // out of memory - // TODO: I have no idea how to thorw errors back to Python interface fprintf(stderr, "Your memory is not enough, thus an error has been raised " \ "above. You need to take the following actions:\n" \ "1. Check if the network size of the model is too large.\n" \ @@ -22,8 +22,9 @@ inline void DPAssert(cudaError_t code, const char *file, int line, bool abort=tr "4. Check if another program is using the same GPU by execuating `nvidia-smi`. " \ "The usage of GPUs is controlled by `CUDA_VISIBLE_DEVICES` " \ "environment variable.\n"); + if (abort) throw deepmd::deepmd_exception_oom("CUDA Assert"); } - if (abort) exit(code); + if (abort) throw deepmd::deepmd_exception("CUDA Assert"); } } @@ -34,7 +35,6 @@ inline void nborAssert(cudaError_t code, const char *file, int line, bool abort= fprintf(stderr,"cuda assert: %s %s %d\n", "DeePMD-kit:\tillegal nbor list sorting", file, line); if (code == 2) { // out of memory - // TODO: I have no idea how to thorw errors back to Python interface fprintf(stderr, "Your memory is not enough, thus an error has been raised " \ "above. You need to take the following actions:\n" \ "1. Check if the network size of the model is too large.\n" \ @@ -44,8 +44,9 @@ inline void nborAssert(cudaError_t code, const char *file, int line, bool abort= "4. Check if another program is using the same GPU by execuating `nvidia-smi`. " \ "The usage of GPUs is controlled by `CUDA_VISIBLE_DEVICES` " \ "environment variable.\n"); + if (abort) throw deepmd::deepmd_exception_oom("CUDA Assert"); } - if (abort) exit(code); + if (abort) throw deepmd::deepmd_exception("CUDA Assert"); } } diff --git a/source/lib/include/gpu_rocm.h b/source/lib/include/gpu_rocm.h index 955ffe5bf7..b6439c3bb8 100644 --- a/source/lib/include/gpu_rocm.h +++ b/source/lib/include/gpu_rocm.h @@ -5,6 +5,7 @@ #include //#include //#include +#include "errors.h" #define GPU_MAX_NBOR_SIZE 4096 @@ -12,7 +13,7 @@ inline void DPAssert(hipError_t code, const char *file, int line, bool abort=true) { if (code != hipSuccess) { fprintf(stderr,"hip assert: %s %s %d\n", hipGetErrorString(code), file, line); - if (abort) exit(code); + if (abort) throw deepmd::deepmd_exception("CUDA Assert"); } } @@ -20,7 +21,7 @@ inline void DPAssert(hipError_t code, const char *file, int line, bool abort=tru inline void nborAssert(hipError_t code, const char *file, int line, bool abort=true) { if (code != hipSuccess) { fprintf(stderr,"hip assert: %s %s %d\n", "DeePMD-kit:\tillegal nbor list sorting", file, line); - if (abort) exit(code); + if (abort) throw deepmd::deepmd_exception("CUDA Assert"); } } diff --git a/source/lib/src/fmt_nlist.cc b/source/lib/src/fmt_nlist.cc index add83dadcf..35155d77d1 100644 --- a/source/lib/src/fmt_nlist.cc +++ b/source/lib/src/fmt_nlist.cc @@ -4,6 +4,7 @@ #include "fmt_nlist.h" #include "SimulationRegion.h" #include +#include "errors.h" using namespace deepmd; @@ -185,7 +186,7 @@ format_nlist_cpu ( << fmt_ilist.size() << " which does not match " << nnei << std::endl; - exit(1); + throw deepmd::deepmd_exception(); } std::copy(fmt_ilist.begin(), fmt_ilist.end(), cur_nlist); } diff --git a/source/lib/src/pair_tab.cc b/source/lib/src/pair_tab.cc index 5137e17ac9..2c48ce957a 100644 --- a/source/lib/src/pair_tab.cc +++ b/source/lib/src/pair_tab.cc @@ -3,6 +3,7 @@ #include #include #include "pair_tab.h" +#include "errors.h" inline void _pair_tabulated_inter ( @@ -25,7 +26,7 @@ void _pair_tabulated_inter ( // std::cout << rr << " " << rmin << " " << hh << " " << uu << std::endl; if (uu < 0) { std::cerr << "coord go beyond table lower boundary" << std::endl; - exit(1); + throw deepmd::deepmd_exception(); } int idx = uu; if (idx >= nspline) { diff --git a/source/lib/src/prod_force.cc b/source/lib/src/prod_force.cc index ffe177e16c..e9784d3409 100644 --- a/source/lib/src/prod_force.cc +++ b/source/lib/src/prod_force.cc @@ -1,6 +1,7 @@ #include #include #include "prod_force.h" +#include "errors.h" inline void make_index_range ( @@ -14,7 +15,7 @@ make_index_range ( idx_end = nei_idx * 4 + 4; } else { - throw std::runtime_error("should no reach here"); + throw deepmd::deepmd_exception("should no reach here"); } } diff --git a/source/lib/src/prod_force_grad.cc b/source/lib/src/prod_force_grad.cc index 7872ea5c55..110bf790f4 100644 --- a/source/lib/src/prod_force_grad.cc +++ b/source/lib/src/prod_force_grad.cc @@ -2,6 +2,7 @@ #include #include #include "prod_force_grad.h" +#include "errors.h" inline void make_index_range ( @@ -15,7 +16,7 @@ make_index_range ( idx_end = nei_idx * 4 + 4; } else { - throw std::runtime_error("should no reach here"); + throw deepmd::deepmd_exception("should no reach here"); } } diff --git a/source/lib/src/prod_virial.cc b/source/lib/src/prod_virial.cc index 086bc94245..f1c598c807 100644 --- a/source/lib/src/prod_virial.cc +++ b/source/lib/src/prod_virial.cc @@ -2,6 +2,7 @@ #include #include #include "prod_virial.h" +#include "errors.h" inline void make_index_range ( @@ -15,7 +16,7 @@ make_index_range ( idx_end = nei_idx * 4 + 4; } else { - throw std::runtime_error("should no reach here"); + throw deepmd::deepmd_exception("should no reach here"); } } diff --git a/source/lib/src/prod_virial_grad.cc b/source/lib/src/prod_virial_grad.cc index 59c3192fc0..8e225c0793 100644 --- a/source/lib/src/prod_virial_grad.cc +++ b/source/lib/src/prod_virial_grad.cc @@ -1,6 +1,7 @@ #include #include #include "prod_virial_grad.h" +#include "errors.h" inline void make_index_range ( @@ -14,7 +15,7 @@ make_index_range ( idx_end = nei_idx * 4 + 4; } else { - throw std::runtime_error("should no reach here"); + throw deepmd::deepmd_exception("should no reach here"); } } diff --git a/source/lib/src/region.cc b/source/lib/src/region.cc index 62dcdb9b68..90704016c2 100644 --- a/source/lib/src/region.cc +++ b/source/lib/src/region.cc @@ -1,6 +1,7 @@ #include #include #include "region.h" +#include "errors.h" #define BOXT_DIM 9 using namespace deepmd; @@ -33,7 +34,7 @@ compute_volume(const FPTYPE * boxt) boxt[0*3+1] * (boxt[1*3+0]*boxt[2*3+2] - boxt[2*3+0]*boxt[1*3+2]) + boxt[0*3+2] * (boxt[1*3+0]*boxt[2*3+1] - boxt[2*3+0]*boxt[1*3+1]); if (volume < 0) { - throw std::runtime_error("Negative volume detected. Please make sure the simulation cell obeys the right-hand rule."); + throw deepmd::deepmd_exception("Negative volume detected. Please make sure the simulation cell obeys the right-hand rule."); } return volume; } diff --git a/source/op/CMakeLists.txt b/source/op/CMakeLists.txt index 340c5601fb..1075847953 100644 --- a/source/op/CMakeLists.txt +++ b/source/op/CMakeLists.txt @@ -3,10 +3,10 @@ set(OP_LIB ${PROJECT_SOURCE_DIR}/lib/src/SimulationRegion.cpp ${PROJECT_SOURCE_DIR}/lib/src/neighbor_list.cc) set (OP_CXX_FLAG -D_GLIBCXX_USE_CXX11_ABI=${OP_CXX_ABI} ) -file(GLOB OP_SRC prod_force.cc prod_virial.cc descrpt.cc descrpt_se_a_ef.cc descrpt_se_a_ef.cc descrpt_se_a_ef_para.cc descrpt_se_a_ef_vert.cc pair_tab.cc prod_force_multi_device.cc prod_virial_multi_device.cc soft_min.cc soft_min_force.cc soft_min_virial.cc ewald_recp.cc gelu_multi_device.cc map_aparam.cc neighbor_stat.cc unaggregated_grad.cc tabulate_multi_device.cc prod_env_mat_multi_device.cc) -file(GLOB OP_CUDA_SRC prod_force.cc prod_virial.cc descrpt.cc prod_env_mat_multi_device.cc pair_tab.cc prod_force_multi_device.cc prod_virial_multi_device.cc soft_min.cc soft_min_force.cc soft_min_virial.cc gelu_multi_device.cc tabulate_multi_device.cc) -file(GLOB OP_ROCM_SRC prod_force.cc prod_virial.cc descrpt.cc prod_env_mat_multi_device.cc pair_tab.cc prod_force_multi_device.cc prod_virial_multi_device.cc soft_min.cc soft_min_force.cc soft_min_virial.cc gelu_multi_device.cc tabulate_multi_device.cc) -file(GLOB OP_GRADS_SRC prod_force_grad.cc prod_force_grad_multi_device.cc prod_virial_grad.cc prod_virial_grad_multi_device.cc soft_min_force_grad.cc soft_min_virial_grad.cc ) +file(GLOB OP_SRC custom_op.cc prod_force.cc prod_virial.cc descrpt.cc descrpt_se_a_ef.cc descrpt_se_a_ef.cc descrpt_se_a_ef_para.cc descrpt_se_a_ef_vert.cc pair_tab.cc prod_force_multi_device.cc prod_virial_multi_device.cc soft_min.cc soft_min_force.cc soft_min_virial.cc ewald_recp.cc gelu_multi_device.cc map_aparam.cc neighbor_stat.cc unaggregated_grad.cc tabulate_multi_device.cc prod_env_mat_multi_device.cc) +file(GLOB OP_CUDA_SRC custom_op.cc prod_force.cc prod_virial.cc descrpt.cc prod_env_mat_multi_device.cc pair_tab.cc prod_force_multi_device.cc prod_virial_multi_device.cc soft_min.cc soft_min_force.cc soft_min_virial.cc gelu_multi_device.cc tabulate_multi_device.cc) +file(GLOB OP_ROCM_SRC custom_op.cc prod_force.cc prod_virial.cc descrpt.cc prod_env_mat_multi_device.cc pair_tab.cc prod_force_multi_device.cc prod_virial_multi_device.cc soft_min.cc soft_min_force.cc soft_min_virial.cc gelu_multi_device.cc tabulate_multi_device.cc) +file(GLOB OP_GRADS_SRC custom_op.cc prod_force_grad.cc prod_force_grad_multi_device.cc prod_virial_grad.cc prod_virial_grad_multi_device.cc soft_min_force_grad.cc soft_min_virial_grad.cc ) file(GLOB OP_PY *.py) if (BUILD_CPP_IF) diff --git a/source/op/custom_op.cc b/source/op/custom_op.cc new file mode 100644 index 0000000000..741fb3ace6 --- /dev/null +++ b/source/op/custom_op.cc @@ -0,0 +1,20 @@ +#include "custom_op.h" +#include "errors.h" + +namespace deepmd { + void safe_compute(OpKernelContext* context, std::function ff) { + try{ + ff(context); + } catch (deepmd::deepmd_exception_oom& e){ + OP_REQUIRES_OK( + context, + errors::ResourceExhausted("Operation received an exception: ", e.what(), + ", in file ",__FILE__, ":", __LINE__)); + } catch (deepmd::deepmd_exception& e) { + OP_REQUIRES_OK( + context, + errors::Internal("Operation received an exception: ", e.what(), + ", in file ",__FILE__, ":", __LINE__)); + } + } +}; \ No newline at end of file diff --git a/source/op/custom_op.h b/source/op/custom_op.h index e4f9211e61..8482e92b03 100644 --- a/source/op/custom_op.h +++ b/source/op/custom_op.h @@ -26,4 +26,8 @@ struct DeviceFunctor { device = "GPU"; } #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM +}; + +namespace deepmd { + void safe_compute(OpKernelContext* context, std::function ff); }; \ No newline at end of file diff --git a/source/op/descrpt.cc b/source/op/descrpt.cc index 10ba125594..7fdf81d986 100644 --- a/source/op/descrpt.cc +++ b/source/op/descrpt.cc @@ -2,6 +2,7 @@ #include "ComputeDescriptor.h" #include "neighbor_list.h" #include "fmt_nlist.h" +#include "errors.h" typedef double boxtensor_t ; typedef double compute_t; @@ -49,6 +50,10 @@ class DescrptOp : public OpKernel { } void Compute(OpKernelContext* context) override { + deepmd::safe_compute(context, [this](OpKernelContext* context) {this->_Compute(context);}); + } + + void _Compute(OpKernelContext* context) { // Grab the input tensor const Tensor& coord_tensor = context->input(0); const Tensor& type_tensor = context->input(1); @@ -105,7 +110,7 @@ class DescrptOp : public OpKernel { nei_mode = -1; } else { - throw std::runtime_error("invalid mesh tensor"); + throw deepmd::deepmd_exception("invalid mesh tensor"); } bool b_pbc = true; // if region is given extended, do not use pbc @@ -254,7 +259,7 @@ class DescrptOp : public OpKernel { ::build_nlist (d_nlist_a, d_nlist_r, d_coord3, rcut_a, rcut_r, NULL); } else { - throw std::runtime_error("unknow neighbor mode"); + throw deepmd::deepmd_exception("unknow neighbor mode"); } // loop over atoms, compute descriptors for each atom diff --git a/source/op/descrpt_se_a_ef.cc b/source/op/descrpt_se_a_ef.cc index 3ba41624d9..121205c9cf 100644 --- a/source/op/descrpt_se_a_ef.cc +++ b/source/op/descrpt_se_a_ef.cc @@ -3,6 +3,7 @@ #include "ComputeDescriptor.h" #include "neighbor_list.h" #include "fmt_nlist.h" +#include "errors.h" typedef double boxtensor_t ; typedef double compute_t; @@ -49,6 +50,10 @@ class DescrptSeAEfOp : public OpKernel { } void Compute(OpKernelContext* context) override { + deepmd::safe_compute(context, [this](OpKernelContext* context) {this->_Compute(context);}); + } + + void _Compute(OpKernelContext* context) { // Grab the input tensor int context_input_index = 0; const Tensor& coord_tensor = context->input(context_input_index++); @@ -112,7 +117,7 @@ class DescrptSeAEfOp : public OpKernel { nei_mode = -1; } else { - throw std::runtime_error("invalid mesh tensor"); + throw deepmd::deepmd_exception("invalid mesh tensor"); } bool b_pbc = true; // if region is given extended, do not use pbc @@ -267,7 +272,7 @@ class DescrptSeAEfOp : public OpKernel { ::build_nlist (d_nlist_a, d_nlist_r, d_coord3, rcut_a, rcut_r, NULL); } else { - throw std::runtime_error("unknow neighbor mode"); + throw deepmd::deepmd_exception("unknow neighbor mode"); } // loop over atoms, compute descriptors for each atom diff --git a/source/op/descrpt_se_a_ef_para.cc b/source/op/descrpt_se_a_ef_para.cc index 2cb3b3445c..952c53d473 100644 --- a/source/op/descrpt_se_a_ef_para.cc +++ b/source/op/descrpt_se_a_ef_para.cc @@ -2,6 +2,7 @@ #include "ComputeDescriptor.h" #include "neighbor_list.h" #include "fmt_nlist.h" +#include "errors.h" typedef double boxtensor_t ; typedef double compute_t; @@ -48,6 +49,10 @@ class DescrptSeAEfParaOp : public OpKernel { } void Compute(OpKernelContext* context) override { + deepmd::safe_compute(context, [this](OpKernelContext* context) {this->_Compute(context);}); + } + + void _Compute(OpKernelContext* context) { // Grab the input tensor int context_input_index = 0; const Tensor& coord_tensor = context->input(context_input_index++); @@ -111,7 +116,7 @@ class DescrptSeAEfParaOp : public OpKernel { nei_mode = -1; } else { - throw std::runtime_error("invalid mesh tensor"); + throw deepmd::deepmd_exception("invalid mesh tensor"); } bool b_pbc = true; // if region is given extended, do not use pbc @@ -266,7 +271,7 @@ class DescrptSeAEfParaOp : public OpKernel { ::build_nlist (d_nlist_a, d_nlist_r, d_coord3, rcut_a, rcut_r, NULL); } else { - throw std::runtime_error("unknow neighbor mode"); + throw deepmd::deepmd_exception("unknow neighbor mode"); } // loop over atoms, compute descriptors for each atom diff --git a/source/op/descrpt_se_a_ef_vert.cc b/source/op/descrpt_se_a_ef_vert.cc index 615b153bf3..4ef76f8e0f 100644 --- a/source/op/descrpt_se_a_ef_vert.cc +++ b/source/op/descrpt_se_a_ef_vert.cc @@ -2,6 +2,7 @@ #include "ComputeDescriptor.h" #include "neighbor_list.h" #include "fmt_nlist.h" +#include "errors.h" typedef double boxtensor_t ; typedef double compute_t; @@ -48,6 +49,10 @@ class DescrptSeAEfVertOp : public OpKernel { } void Compute(OpKernelContext* context) override { + deepmd::safe_compute(context, [this](OpKernelContext* context) {this->_Compute(context);}); + } + + void _Compute(OpKernelContext* context) { // Grab the input tensor int context_input_index = 0; const Tensor& coord_tensor = context->input(context_input_index++); @@ -111,7 +116,7 @@ class DescrptSeAEfVertOp : public OpKernel { nei_mode = -1; } else { - throw std::runtime_error("invalid mesh tensor"); + throw deepmd::deepmd_exception("invalid mesh tensor"); } bool b_pbc = true; // if region is given extended, do not use pbc @@ -266,7 +271,7 @@ class DescrptSeAEfVertOp : public OpKernel { ::build_nlist (d_nlist_a, d_nlist_r, d_coord3, rcut_a, rcut_r, NULL); } else { - throw std::runtime_error("unknow neighbor mode"); + throw deepmd::deepmd_exception("unknow neighbor mode"); } // loop over atoms, compute descriptors for each atom diff --git a/source/op/ewald_recp.cc b/source/op/ewald_recp.cc index 9159dc5931..c9cc22b480 100644 --- a/source/op/ewald_recp.cc +++ b/source/op/ewald_recp.cc @@ -28,6 +28,10 @@ class EwaldRecpOp : public OpKernel { } void Compute(OpKernelContext* context) override { + deepmd::safe_compute(context, [this](OpKernelContext* context) {this->_Compute(context);}); + } + + void _Compute(OpKernelContext* context) { // Grab the input tensor int cc = 0; const Tensor& coord_tensor = context->input(cc++); diff --git a/source/op/gelu_multi_device.cc b/source/op/gelu_multi_device.cc index 508f60ccef..dc86ab6c8d 100644 --- a/source/op/gelu_multi_device.cc +++ b/source/op/gelu_multi_device.cc @@ -26,6 +26,10 @@ class GeluOp : public OpKernel { public : explicit GeluOp(OpKernelConstruction* context) : OpKernel(context) {} void Compute(OpKernelContext* context) override { + deepmd::safe_compute(context, [this](OpKernelContext* context) {this->_Compute(context);}); + } + + void _Compute(OpKernelContext* context) { // Grab the input tensor const Tensor& x_tensor = context->input(0); Tensor * output_tensor = NULL; @@ -73,6 +77,10 @@ class GeluGradOp : public OpKernel { public : explicit GeluGradOp(OpKernelConstruction* context) : OpKernel(context) {} void Compute(OpKernelContext* context) override { + deepmd::safe_compute(context, [this](OpKernelContext* context) {this->_Compute(context);}); + } + + void _Compute(OpKernelContext* context) { // Grab the input tensor const Tensor& dy_tensor = context->input(0); const Tensor& x_tensor = context->input(1); @@ -122,6 +130,10 @@ class GeluGradGradOp : public OpKernel { public : explicit GeluGradGradOp(OpKernelConstruction* context) : OpKernel(context) {} void Compute(OpKernelContext* context) override { + deepmd::safe_compute(context, [this](OpKernelContext* context) {this->_Compute(context);}); + } + + void _Compute(OpKernelContext* context) { // Grab the input tensor const Tensor& dy_tensor = context->input(0); const Tensor& dy_2_tensor = context->input(1); diff --git a/source/op/legacy/descrpt_se_a.cc b/source/op/legacy/descrpt_se_a.cc index 51b8e26e0f..cd7abf8a76 100644 --- a/source/op/legacy/descrpt_se_a.cc +++ b/source/op/legacy/descrpt_se_a.cc @@ -3,6 +3,7 @@ #include "neighbor_list.h" #include "fmt_nlist.h" #include "env_mat.h" +#include "errors.h" typedef double boxtensor_t ; typedef double compute_t; @@ -107,7 +108,7 @@ class DescrptSeAOp : public OpKernel { nei_mode = -1; } else { - throw std::runtime_error("invalid mesh tensor"); + throw deepmd::deepmd_exception("invalid mesh tensor"); } bool b_pbc = true; // if region is given extended, do not use pbc @@ -253,7 +254,7 @@ class DescrptSeAOp : public OpKernel { ::build_nlist (d_nlist_a, d_nlist_r, d_coord3, rcut_a, rcut_r, NULL); } else { - throw std::runtime_error("unknow neighbor mode"); + throw deepmd::deepmd_exception("unknow neighbor mode"); } // loop over atoms, compute descriptors for each atom diff --git a/source/op/legacy/descrpt_se_r.cc b/source/op/legacy/descrpt_se_r.cc index 7031ed20e8..408818fbee 100644 --- a/source/op/legacy/descrpt_se_r.cc +++ b/source/op/legacy/descrpt_se_r.cc @@ -3,6 +3,7 @@ #include "neighbor_list.h" #include "fmt_nlist.h" #include "env_mat.h" +#include "errors.h" typedef double boxtensor_t ; typedef double compute_t; @@ -99,7 +100,7 @@ class DescrptSeROp : public OpKernel { nei_mode = -1; } else { - throw std::runtime_error("invalid mesh tensor"); + throw deepmd::deepmd_exception("invalid mesh tensor"); } bool b_pbc = true; // if region is given extended, do not use pbc @@ -238,7 +239,7 @@ class DescrptSeROp : public OpKernel { ::build_nlist (d_nlist_null, d_nlist, d_coord3, -1, rcut, NULL); } else { - throw std::runtime_error("unknow neighbor mode"); + throw deepmd::deepmd_exception("unknow neighbor mode"); } // loop over atoms, compute descriptors for each atom diff --git a/source/op/map_aparam.cc b/source/op/map_aparam.cc index f1c98bdc9c..cd70435f99 100644 --- a/source/op/map_aparam.cc +++ b/source/op/map_aparam.cc @@ -20,6 +20,10 @@ class MapAparamOp : public OpKernel { } void Compute(OpKernelContext* context) override { + deepmd::safe_compute(context, [this](OpKernelContext* context) {this->_Compute(context);}); + } + + void _Compute(OpKernelContext* context) { // Grab the input tensor int context_input_index = 0; const Tensor& aparam_tensor = context->input(context_input_index++); diff --git a/source/op/neighbor_stat.cc b/source/op/neighbor_stat.cc index 11f991b4b7..fad4617cc5 100644 --- a/source/op/neighbor_stat.cc +++ b/source/op/neighbor_stat.cc @@ -1,5 +1,6 @@ #include "custom_op.h" #include "neighbor_list.h" +#include "errors.h" typedef double boxtensor_t ; typedef double compute_t; @@ -23,6 +24,10 @@ class NeighborStatOp : public OpKernel { } void Compute(OpKernelContext* context) override { + deepmd::safe_compute(context, [this](OpKernelContext* context) {this->_Compute(context);}); + } + + void _Compute(OpKernelContext* context) { // Grab the input tensor int context_input_index = 0; const Tensor& coord_tensor = context->input(context_input_index++); @@ -60,7 +65,7 @@ class NeighborStatOp : public OpKernel { nei_mode = -1; } else { - throw std::runtime_error("invalid mesh tensor"); + throw deepmd::deepmd_exception("invalid mesh tensor"); } // if region is given extended, do not use pbc bool b_pbc = (nei_mode >= 1 || nei_mode == -1) ? false : true; @@ -139,7 +144,7 @@ class NeighborStatOp : public OpKernel { ::build_nlist (d_nlist_a, d_nlist_r, d_coord3, -1, rcut, NULL); } else { - throw std::runtime_error("unknow neighbor mode"); + throw deepmd::deepmd_exception("unknow neighbor mode"); } int MAX_NNEI = 0; diff --git a/source/op/pair_tab.cc b/source/op/pair_tab.cc index e09ef460b4..2a22e17102 100644 --- a/source/op/pair_tab.cc +++ b/source/op/pair_tab.cc @@ -34,6 +34,10 @@ class PairTabOp : public OpKernel { } void Compute(OpKernelContext* context) override { + deepmd::safe_compute(context, [this](OpKernelContext* context) {this->_Compute(context);}); + } + + void _Compute(OpKernelContext* context) { // Grab the input tensor int tmp_idx = 0; const Tensor& table_info_tensor = context->input(tmp_idx++); diff --git a/source/op/prod_env_mat_multi_device.cc b/source/op/prod_env_mat_multi_device.cc index 22fb223289..69e08eaa5e 100644 --- a/source/op/prod_env_mat_multi_device.cc +++ b/source/op/prod_env_mat_multi_device.cc @@ -4,6 +4,7 @@ #include "region.h" #include "neighbor_list.h" #include "prod_env_mat.h" +#include "errors.h" REGISTER_OP("ProdEnvMatA") .Attr("T: {float, double} = DT_DOUBLE") @@ -321,6 +322,10 @@ class ProdEnvMatAOp : public OpKernel { } void Compute(OpKernelContext* context) override { + deepmd::safe_compute(context, [this](OpKernelContext* context) {this->_Compute(context);}); + } + + void _Compute(OpKernelContext* context) { // Grab the input tensor int context_input_index = 0; const Tensor& coord_tensor = context->input(context_input_index++); @@ -382,7 +387,7 @@ class ProdEnvMatAOp : public OpKernel { nei_mode = -1; } else { - throw std::runtime_error("invalid mesh tensor"); + throw deepmd::deepmd_exception("invalid mesh tensor"); } // Create output tensors @@ -584,6 +589,10 @@ class ProdEnvMatROp : public OpKernel { } void Compute(OpKernelContext* context) override { + deepmd::safe_compute(context, [this](OpKernelContext* context) {this->_Compute(context);}); + } + + void _Compute(OpKernelContext* context) { // Grab the input tensor int context_input_index = 0; const Tensor& coord_tensor = context->input(context_input_index++); @@ -642,7 +651,7 @@ class ProdEnvMatROp : public OpKernel { nei_mode = -1; } else { - throw std::runtime_error("invalid mesh tensor"); + throw deepmd::deepmd_exception("invalid mesh tensor"); } // Create an output tensor diff --git a/source/op/prod_force.cc b/source/op/prod_force.cc index 307d00a85d..a97fb6c575 100644 --- a/source/op/prod_force.cc +++ b/source/op/prod_force.cc @@ -26,6 +26,10 @@ class ProdForceOp : public OpKernel { } void Compute(OpKernelContext* context) override { + deepmd::safe_compute(context, [this](OpKernelContext* context) {this->_Compute(context);}); + } + + void _Compute(OpKernelContext* context) { // Grab the input tensor const Tensor& net_deriv_tensor = context->input(0); const Tensor& in_deriv_tensor = context->input(1); diff --git a/source/op/prod_force_grad.cc b/source/op/prod_force_grad.cc index 52c8ed845f..67423d7489 100644 --- a/source/op/prod_force_grad.cc +++ b/source/op/prod_force_grad.cc @@ -25,6 +25,10 @@ class ProdForceGradOp : public OpKernel } void Compute(OpKernelContext* context) override { + deepmd::safe_compute(context, [this](OpKernelContext* context) {this->_Compute(context);}); + } + + void _Compute(OpKernelContext* context) { // Grab the input tensor const Tensor& grad_tensor = context->input(0); const Tensor& net_deriv_tensor = context->input(1); diff --git a/source/op/prod_force_grad_multi_device.cc b/source/op/prod_force_grad_multi_device.cc index 5aff4bbbef..533f6cbf14 100644 --- a/source/op/prod_force_grad_multi_device.cc +++ b/source/op/prod_force_grad_multi_device.cc @@ -31,6 +31,10 @@ class ProdForceSeAGradOp : public OpKernel { } void Compute(OpKernelContext* context) override { + deepmd::safe_compute(context, [this](OpKernelContext* context) {this->_Compute(context);}); + } + + void _Compute(OpKernelContext* context) { // Grab the input tensor int context_input_index = 0; const Tensor& grad_tensor = context->input(context_input_index++); @@ -139,6 +143,10 @@ class ProdForceSeRGradOp : public OpKernel explicit ProdForceSeRGradOp(OpKernelConstruction* context) : OpKernel(context) {} void Compute(OpKernelContext* context) override { + deepmd::safe_compute(context, [this](OpKernelContext* context) {this->_Compute(context);}); + } + + void _Compute(OpKernelContext* context) { // Grab the input tensor int context_input_index = 0; const Tensor& grad_tensor = context->input(context_input_index++); diff --git a/source/op/prod_force_multi_device.cc b/source/op/prod_force_multi_device.cc index 63e6945906..8df25636f6 100644 --- a/source/op/prod_force_multi_device.cc +++ b/source/op/prod_force_multi_device.cc @@ -25,6 +25,10 @@ class ProdForceSeAOp : public OpKernel { explicit ProdForceSeAOp(OpKernelConstruction* context) : OpKernel(context) {} void Compute(OpKernelContext* context) override { + deepmd::safe_compute(context, [this](OpKernelContext* context) {this->_Compute(context);}); + } + + void _Compute(OpKernelContext* context) { // Grab the input tensor int context_input_index = 0; const Tensor& net_deriv_tensor = context->input(context_input_index++); diff --git a/source/op/prod_force_se_a_grad.cc b/source/op/prod_force_se_a_grad.cc index 7617c244ed..84b2a7ed3b 100644 --- a/source/op/prod_force_se_a_grad.cc +++ b/source/op/prod_force_se_a_grad.cc @@ -25,6 +25,10 @@ class ProdForceSeAGradOp : public OpKernel } void Compute(OpKernelContext* context) override { + deepmd::safe_compute(context, [this](OpKernelContext* context) {this->_Compute(context);}); + } + + void _Compute(OpKernelContext* context) { // Grab the input tensor int context_input_index = 0; const Tensor& grad_tensor = context->input(context_input_index++); diff --git a/source/op/prod_force_se_r_grad.cc b/source/op/prod_force_se_r_grad.cc index 9fff3724ed..e02f0c8750 100644 --- a/source/op/prod_force_se_r_grad.cc +++ b/source/op/prod_force_se_r_grad.cc @@ -20,6 +20,10 @@ class ProdForceSeRGradOp : public OpKernel } void Compute(OpKernelContext* context) override { + deepmd::safe_compute(context, [this](OpKernelContext* context) {this->_Compute(context);}); + } + + void _Compute(OpKernelContext* context) { // Grab the input tensor int context_input_index = 0; const Tensor& grad_tensor = context->input(context_input_index++); diff --git a/source/op/prod_virial.cc b/source/op/prod_virial.cc index d83ab27225..a8df2bc848 100644 --- a/source/op/prod_virial.cc +++ b/source/op/prod_virial.cc @@ -28,6 +28,10 @@ class ProdVirialOp : public OpKernel { } void Compute(OpKernelContext* context) override { + deepmd::safe_compute(context, [this](OpKernelContext* context) {this->_Compute(context);}); + } + + void _Compute(OpKernelContext* context) { // Grab the input tensor const Tensor& net_deriv_tensor = context->input(0); const Tensor& in_deriv_tensor = context->input(1); diff --git a/source/op/prod_virial_grad.cc b/source/op/prod_virial_grad.cc index d07a661cb9..33fa0348dc 100644 --- a/source/op/prod_virial_grad.cc +++ b/source/op/prod_virial_grad.cc @@ -26,6 +26,10 @@ class ProdVirialGradOp : public OpKernel } void Compute(OpKernelContext* context) override { + deepmd::safe_compute(context, [this](OpKernelContext* context) {this->_Compute(context);}); + } + + void _Compute(OpKernelContext* context) { // Grab the input tensor const Tensor& grad_tensor = context->input(0); const Tensor& net_deriv_tensor = context->input(1); diff --git a/source/op/prod_virial_grad_multi_device.cc b/source/op/prod_virial_grad_multi_device.cc index 7a37da9b38..9afd4462eb 100644 --- a/source/op/prod_virial_grad_multi_device.cc +++ b/source/op/prod_virial_grad_multi_device.cc @@ -34,6 +34,10 @@ class ProdVirialSeAGradOp : public OpKernel } void Compute(OpKernelContext* context) override { + deepmd::safe_compute(context, [this](OpKernelContext* context) {this->_Compute(context);}); + } + + void _Compute(OpKernelContext* context) { // Grab the input tensor int context_input_index = 0; const Tensor& grad_tensor = context->input(context_input_index++); @@ -153,6 +157,10 @@ class ProdVirialSeRGradOp : public OpKernel explicit ProdVirialSeRGradOp(OpKernelConstruction* context) : OpKernel(context) {} void Compute(OpKernelContext* context) override { + deepmd::safe_compute(context, [this](OpKernelContext* context) {this->_Compute(context);}); + } + + void _Compute(OpKernelContext* context) { // Grab the input tensor int context_input_index = 0; const Tensor& grad_tensor = context->input(context_input_index++); diff --git a/source/op/prod_virial_multi_device.cc b/source/op/prod_virial_multi_device.cc index 02c212a2d9..33c263ef84 100644 --- a/source/op/prod_virial_multi_device.cc +++ b/source/op/prod_virial_multi_device.cc @@ -28,6 +28,10 @@ class ProdVirialSeAOp : public OpKernel { public: explicit ProdVirialSeAOp(OpKernelConstruction* context) : OpKernel(context) {} void Compute(OpKernelContext* context) override { + deepmd::safe_compute(context, [this](OpKernelContext* context) {this->_Compute(context);}); + } + + void _Compute(OpKernelContext* context) { // Grab the input tensor int context_input_index = 0; const Tensor& net_deriv_tensor = context->input(context_input_index++); @@ -120,6 +124,10 @@ class ProdVirialSeROp : public OpKernel { public: explicit ProdVirialSeROp(OpKernelConstruction* context) : OpKernel(context) {} void Compute(OpKernelContext* context) override { + deepmd::safe_compute(context, [this](OpKernelContext* context) {this->_Compute(context);}); + } + + void _Compute(OpKernelContext* context) { // Grab the input tensor int context_input_index = 0; const Tensor& net_deriv_tensor = context->input(context_input_index++); diff --git a/source/op/prod_virial_se_a_grad.cc b/source/op/prod_virial_se_a_grad.cc index cb76d29512..00a88e0f76 100644 --- a/source/op/prod_virial_se_a_grad.cc +++ b/source/op/prod_virial_se_a_grad.cc @@ -26,6 +26,10 @@ class ProdVirialSeAGradOp : public OpKernel } void Compute(OpKernelContext* context) override { + deepmd::safe_compute(context, [this](OpKernelContext* context) {this->_Compute(context);}); + } + + void _Compute(OpKernelContext* context) { // Grab the input tensor int context_input_index = 0; const Tensor& grad_tensor = context->input(context_input_index++); diff --git a/source/op/prod_virial_se_r_grad.cc b/source/op/prod_virial_se_r_grad.cc index 247f2ee909..7f9005abe4 100644 --- a/source/op/prod_virial_se_r_grad.cc +++ b/source/op/prod_virial_se_r_grad.cc @@ -21,6 +21,10 @@ class ProdVirialSeRGradOp : public OpKernel } void Compute(OpKernelContext* context) override { + deepmd::safe_compute(context, [this](OpKernelContext* context) {this->_Compute(context);}); + } + + void _Compute(OpKernelContext* context) { // Grab the input tensor int context_input_index = 0; const Tensor& grad_tensor = context->input(context_input_index++); diff --git a/source/op/soft_min.cc b/source/op/soft_min.cc index c30d9c409a..f7770ab58b 100644 --- a/source/op/soft_min.cc +++ b/source/op/soft_min.cc @@ -37,6 +37,10 @@ class SoftMinSwitchOp : public OpKernel { } void Compute(OpKernelContext* context) override { + deepmd::safe_compute(context, [this](OpKernelContext* context) {this->_Compute(context);}); + } + + void _Compute(OpKernelContext* context) { // Grab the input tensor int tmp_idx = 0; const Tensor& type_tensor = context->input(tmp_idx++); diff --git a/source/op/soft_min_force.cc b/source/op/soft_min_force.cc index 7d09da6613..f10a48dc26 100644 --- a/source/op/soft_min_force.cc +++ b/source/op/soft_min_force.cc @@ -24,6 +24,10 @@ class SoftMinForceOp : public OpKernel { } void Compute(OpKernelContext* context) override { + deepmd::safe_compute(context, [this](OpKernelContext* context) {this->_Compute(context);}); + } + + void _Compute(OpKernelContext* context) { // Grab the input tensor const Tensor& du_tensor = context->input(0); const Tensor& sw_deriv_tensor = context->input(1); diff --git a/source/op/soft_min_force_grad.cc b/source/op/soft_min_force_grad.cc index a7328734b6..d5095d1005 100644 --- a/source/op/soft_min_force_grad.cc +++ b/source/op/soft_min_force_grad.cc @@ -24,6 +24,10 @@ class SoftMinForceGradOp : public OpKernel } void Compute(OpKernelContext* context) override { + deepmd::safe_compute(context, [this](OpKernelContext* context) {this->_Compute(context);}); + } + + void _Compute(OpKernelContext* context) { // Grab the input tensor int context_input_index = 0; const Tensor& grad_tensor = context->input(context_input_index++); diff --git a/source/op/soft_min_virial.cc b/source/op/soft_min_virial.cc index 3273160fe3..72d4a21e55 100644 --- a/source/op/soft_min_virial.cc +++ b/source/op/soft_min_virial.cc @@ -26,6 +26,10 @@ class SoftMinVirialOp : public OpKernel { } void Compute(OpKernelContext* context) override { + deepmd::safe_compute(context, [this](OpKernelContext* context) {this->_Compute(context);}); + } + + void _Compute(OpKernelContext* context) { // Grab the input tensor int context_input_index = 0; const Tensor& du_tensor = context->input(context_input_index++); diff --git a/source/op/soft_min_virial_grad.cc b/source/op/soft_min_virial_grad.cc index 034aeb7a09..f92ac2a5c9 100644 --- a/source/op/soft_min_virial_grad.cc +++ b/source/op/soft_min_virial_grad.cc @@ -25,6 +25,10 @@ class SoftMinVirialGradOp : public OpKernel } void Compute(OpKernelContext* context) override { + deepmd::safe_compute(context, [this](OpKernelContext* context) {this->_Compute(context);}); + } + + void _Compute(OpKernelContext* context) { // Grab the input tensor int context_input_index = 0; const Tensor& grad_tensor = context->input(context_input_index++); diff --git a/source/op/tabulate_multi_device.cc b/source/op/tabulate_multi_device.cc index 6fafa5698e..3d5765b843 100644 --- a/source/op/tabulate_multi_device.cc +++ b/source/op/tabulate_multi_device.cc @@ -28,6 +28,10 @@ class TabulateFusionOp : public OpKernel { OP_REQUIRES_OK(context, context->GetAttr("last_layer_size", &last_layer_size)); } void Compute(OpKernelContext* context) override { + deepmd::safe_compute(context, [this](OpKernelContext* context) {this->_Compute(context);}); + } + + void _Compute(OpKernelContext* context) { // Grab the input tensor int context_input_index = 0; const Tensor& table_tensor = context->input(context_input_index++); @@ -90,6 +94,10 @@ class TabulateFusionGradOp : public OpKernel { public: explicit TabulateFusionGradOp(OpKernelConstruction* context) : OpKernel(context) {} void Compute(OpKernelContext* context) override { + deepmd::safe_compute(context, [this](OpKernelContext* context) {this->_Compute(context);}); + } + + void _Compute(OpKernelContext* context) { // Grab the input tensor int context_input_index = 0; const Tensor& table_tensor = context->input(context_input_index++); diff --git a/source/op/unaggregated_grad.cc b/source/op/unaggregated_grad.cc index 56502efc55..343a339a92 100644 --- a/source/op/unaggregated_grad.cc +++ b/source/op/unaggregated_grad.cc @@ -136,6 +136,10 @@ class UnaggregatedDyDxSOp : public OpKernel { explicit UnaggregatedDyDxSOp(OpKernelConstruction* context) : OpKernel(context) {} void Compute(OpKernelContext* context) override { + deepmd::safe_compute(context, [this](OpKernelContext* context) {this->_Compute(context);}); + } + + void _Compute(OpKernelContext* context) { // Grab the input tensor int context_input_index = 0; const Tensor& y = context->input(context_input_index++); @@ -169,6 +173,10 @@ class UnaggregatedDy2DxSOp : public OpKernel { explicit UnaggregatedDy2DxSOp(OpKernelConstruction* context) : OpKernel(context) {} void Compute(OpKernelContext* context) override { + deepmd::safe_compute(context, [this](OpKernelContext* context) {this->_Compute(context);}); + } + + void _Compute(OpKernelContext* context) { // Grab the input tensor int context_input_index = 0; const Tensor& y = context->input(context_input_index++); @@ -205,6 +213,10 @@ class UnaggregatedDyDxOp : public OpKernel { explicit UnaggregatedDyDxOp(OpKernelConstruction* context) : OpKernel(context) {} void Compute(OpKernelContext* context) override { + deepmd::safe_compute(context, [this](OpKernelContext* context) {this->_Compute(context);}); + } + + void _Compute(OpKernelContext* context) { // Grab the input tensor int context_input_index = 0; const Tensor& z = context->input(context_input_index++); @@ -242,6 +254,10 @@ class UnaggregatedDy2DxOp : public OpKernel { explicit UnaggregatedDy2DxOp(OpKernelConstruction* context) : OpKernel(context) {} void Compute(OpKernelContext* context) override { + deepmd::safe_compute(context, [this](OpKernelContext* context) {this->_Compute(context);}); + } + + void _Compute(OpKernelContext* context) { // Grab the input tensor int context_input_index = 0; const Tensor& z = context->input(context_input_index++);