diff --git a/deepmd/common.py b/deepmd/common.py index 6a18cda677..1146f291d5 100644 --- a/deepmd/common.py +++ b/deepmd/common.py @@ -34,7 +34,7 @@ from typing import Literal # python >3.6 except ImportError: from typing_extensions import Literal # type: ignore - _ACTIVATION = Literal["relu", "relu6", "softplus", "sigmoid", "tanh", "gelu"] + _ACTIVATION = Literal["relu", "relu6", "softplus", "sigmoid", "tanh", "gelu", "gelu_tf"] _PRECISION = Literal["default", "float16", "float32", "float64"] # define constants @@ -49,7 +49,7 @@ def gelu(x: tf.Tensor) -> tf.Tensor: """Gaussian Error Linear Unit. - This is a smoother version of the RELU. + This is a smoother version of the RELU, implemented by custom operator. Parameters ---------- @@ -58,7 +58,31 @@ def gelu(x: tf.Tensor) -> tf.Tensor: Returns ------- - `x` with the GELU activation applied + tf.Tensor + `x` with the GELU activation applied + + References + ---------- + Original paper + https://arxiv.org/abs/1606.08415 + """ + return op_module.gelu(x) + + +def gelu_tf(x: tf.Tensor) -> tf.Tensor: + """Gaussian Error Linear Unit. + + This is a smoother version of the RELU, implemented by TF. + + Parameters + ---------- + x : tf.Tensor + float Tensor to perform activation + + Returns + ------- + tf.Tensor + `x` with the GELU activation applied References ---------- @@ -69,10 +93,10 @@ def gelu_wrapper(x): try: return tensorflow.nn.gelu(x, approximate=True) except AttributeError: + warnings.warn("TensorFlow does not provide an implementation of gelu, please upgrade your TensorFlow version. Fallback to the custom gelu operator.") return op_module.gelu(x) return (lambda x: gelu_wrapper(x))(x) - # TODO this is not a good way to do things. This is some global variable to which # TODO anyone can write and there is no good way to keep track of the changes data_requirement = {} @@ -84,6 +108,7 @@ def gelu_wrapper(x): "sigmoid": tf.sigmoid, "tanh": tf.nn.tanh, "gelu": gelu, + "gelu_tf": gelu_tf, } diff --git a/deepmd/utils/argcheck.py b/deepmd/utils/argcheck.py index 9784e1cf1a..e7c7edb170 100644 --- a/deepmd/utils/argcheck.py +++ b/deepmd/utils/argcheck.py @@ -28,7 +28,7 @@ def type_embedding_args(): doc_neuron = 'Number of neurons in each hidden layers of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built.' doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection' doc_seed = 'Random seed for parameter initialization' - doc_activation_function = f'The activation function in the embedding net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())}' + doc_activation_function = f'The activation function in the embedding net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())}. Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version.' doc_precision = f'The precision of the embedding net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision.' doc_trainable = 'If the parameters in the embedding net are trainable' @@ -128,7 +128,7 @@ def descrpt_se_a_args(): doc_rcut_smth = 'Where to start smoothing. For example the 1/r term is smoothed from `rcut` to `rcut_smth`' doc_neuron = 'Number of neurons in each hidden layers of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built.' doc_axis_neuron = 'Size of the submatrix of G (embedding matrix).' - doc_activation_function = f'The activation function in the embedding net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())}' + doc_activation_function = f'The activation function in the embedding net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())}. Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version.' doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection' doc_type_one_side = 'Try to build N_types embedding nets. Otherwise, building N_types^2 embedding nets' doc_precision = f'The precision of the embedding net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision.' @@ -162,7 +162,7 @@ def descrpt_se_t_args(): doc_rcut = 'The cut-off radius.' doc_rcut_smth = 'Where to start smoothing. For example the 1/r term is smoothed from `rcut` to `rcut_smth`' doc_neuron = 'Number of neurons in each hidden layers of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built.' - doc_activation_function = f'The activation function in the embedding net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())}' + doc_activation_function = f'The activation function in the embedding net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())}. Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version.' doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection' doc_precision = f'The precision of the embedding net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision.' doc_trainable = 'If the parameters in the embedding net are trainable' @@ -205,7 +205,7 @@ def descrpt_se_r_args(): doc_rcut = 'The cut-off radius.' doc_rcut_smth = 'Where to start smoothing. For example the 1/r term is smoothed from `rcut` to `rcut_smth`' doc_neuron = 'Number of neurons in each hidden layers of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built.' - doc_activation_function = f'The activation function in the embedding net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())}' + doc_activation_function = f'The activation function in the embedding net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())}. Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version.' doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection' doc_type_one_side = 'Try to build N_types embedding nets. Otherwise, building N_types^2 embedding nets' doc_precision = f'The precision of the embedding net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision.' @@ -262,7 +262,7 @@ def fitting_ener(): doc_numb_fparam = 'The dimension of the frame parameter. If set to >0, file `fparam.npy` should be included to provided the input fparams.' doc_numb_aparam = 'The dimension of the atomic parameter. If set to >0, file `aparam.npy` should be included to provided the input aparams.' doc_neuron = 'The number of neurons in each hidden layers of the fitting net. When two hidden layers are of the same size, a skip connection is built.' - doc_activation_function = f'The activation function in the fitting net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())}' + doc_activation_function = f'The activation function in the fitting net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())}. Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version.' doc_precision = f'The precision of the fitting net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision.' doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection' doc_trainable = 'Whether the parameters in the fitting net are trainable. This option can be\n\n\ @@ -288,7 +288,7 @@ def fitting_ener(): def fitting_polar(): doc_neuron = 'The number of neurons in each hidden layers of the fitting net. When two hidden layers are of the same size, a skip connection is built.' - doc_activation_function = f'The activation function in the fitting net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())}' + doc_activation_function = f'The activation function in the fitting net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())}. Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version.' doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection' doc_precision = f'The precision of the fitting net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision.' doc_scale = 'The output of the fitting net (polarizability matrix) will be scaled by ``scale``' @@ -320,7 +320,7 @@ def fitting_polar(): def fitting_dipole(): doc_neuron = 'The number of neurons in each hidden layers of the fitting net. When two hidden layers are of the same size, a skip connection is built.' - doc_activation_function = f'The activation function in the fitting net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())}' + doc_activation_function = f'The activation function in the fitting net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())}. Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version.' doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection' doc_precision = f'The precision of the fitting net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision.' doc_sel_type = 'The atom types for which the atomic dipole will be provided. If not set, all types will be selected.'