From d8897a9eb48e984b173c8647adbec64dc07f3ed1 Mon Sep 17 00:00:00 2001 From: denghuilu Date: Tue, 28 Jun 2022 23:03:14 +0800 Subject: [PATCH 1/4] support optional custom gelu implementation --- deepmd/common.py | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/deepmd/common.py b/deepmd/common.py index 6a18cda677..3185f2da2f 100644 --- a/deepmd/common.py +++ b/deepmd/common.py @@ -34,7 +34,7 @@ from typing import Literal # python >3.6 except ImportError: from typing_extensions import Literal # type: ignore - _ACTIVATION = Literal["relu", "relu6", "softplus", "sigmoid", "tanh", "gelu"] + _ACTIVATION = Literal["relu", "relu6", "softplus", "sigmoid", "tanh", "gelu", "gelu_tf"] _PRECISION = Literal["default", "float16", "float32", "float64"] # define constants @@ -49,7 +49,29 @@ def gelu(x: tf.Tensor) -> tf.Tensor: """Gaussian Error Linear Unit. - This is a smoother version of the RELU. + This is a smoother version of the RELU, implemented by custom operator. + + Parameters + ---------- + x : tf.Tensor + float Tensor to perform activation + + Returns + ------- + `x` with the GELU activation applied + + References + ---------- + Original paper + https://arxiv.org/abs/1606.08415 + """ + return op_module.gelu(x) + + +def gelu_tf(x: tf.Tensor) -> tf.Tensor: + """Gaussian Error Linear Unit. + + This is a smoother version of the RELU, implemented by TF. Parameters ---------- @@ -69,10 +91,10 @@ def gelu_wrapper(x): try: return tensorflow.nn.gelu(x, approximate=True) except AttributeError: + warnings.warn("TensorFlow does not provide an implementation of gelu, please upgrade your TensorFlow version. Fallback to the custom gelu operator.") return op_module.gelu(x) return (lambda x: gelu_wrapper(x))(x) - # TODO this is not a good way to do things. This is some global variable to which # TODO anyone can write and there is no good way to keep track of the changes data_requirement = {} @@ -84,6 +106,7 @@ def gelu_wrapper(x): "sigmoid": tf.sigmoid, "tanh": tf.nn.tanh, "gelu": gelu, + "gelu_tf": gelu_tf, } From d6dfdfbc40973e78fd900bfa792c09ad3988aa20 Mon Sep 17 00:00:00 2001 From: Denghui Lu Date: Tue, 28 Jun 2022 23:18:23 +0800 Subject: [PATCH 2/4] add doc for gelu_tf --- doc/train-input-auto.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/train-input-auto.rst b/doc/train-input-auto.rst index 9201809549..2eb0ed6caa 100644 --- a/doc/train-input-auto.rst +++ b/doc/train-input-auto.rst @@ -268,7 +268,7 @@ model: | type: ``str``, optional, default: ``tanh`` | argument path: ``model/descriptor[se_e2_a]/activation_function`` - The activation function in the embedding net. Supported activation functions are "relu", "relu6", "softplus", "sigmoid", "tanh", "gelu". + The activation function in the embedding net. Supported activation functions are "relu", "relu6", "softplus", "sigmoid", "tanh", "gelu", "gelu_tf". Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF statdard version. .. _`model/descriptor[se_e2_a]/resnet_dt`: @@ -373,7 +373,7 @@ model: | type: ``str``, optional, default: ``tanh`` | argument path: ``model/descriptor[se_e2_r]/activation_function`` - The activation function in the embedding net. Supported activation functions are "relu", "relu6", "softplus", "sigmoid", "tanh", "gelu". + The activation function in the embedding net. Supported activation functions are "relu", "relu6", "softplus", "sigmoid", "tanh", "gelu", "gelu_tf". Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF statdard version. .. _`model/descriptor[se_e2_r]/resnet_dt`: From e9592c6f245a307e239e4a7d418dcb37cb7e4a34 Mon Sep 17 00:00:00 2001 From: denghuilu Date: Wed, 29 Jun 2022 10:38:19 +0800 Subject: [PATCH 3/4] address doc issue --- deepmd/common.py | 6 ++++-- deepmd/utils/argcheck.py | 14 +++++++------- doc/train-input-auto.rst | 4 ++-- 3 files changed, 13 insertions(+), 11 deletions(-) diff --git a/deepmd/common.py b/deepmd/common.py index 3185f2da2f..1146f291d5 100644 --- a/deepmd/common.py +++ b/deepmd/common.py @@ -58,7 +58,8 @@ def gelu(x: tf.Tensor) -> tf.Tensor: Returns ------- - `x` with the GELU activation applied + tf.Tensor + `x` with the GELU activation applied References ---------- @@ -80,7 +81,8 @@ def gelu_tf(x: tf.Tensor) -> tf.Tensor: Returns ------- - `x` with the GELU activation applied + tf.Tensor + `x` with the GELU activation applied References ---------- diff --git a/deepmd/utils/argcheck.py b/deepmd/utils/argcheck.py index 9784e1cf1a..e7c7edb170 100644 --- a/deepmd/utils/argcheck.py +++ b/deepmd/utils/argcheck.py @@ -28,7 +28,7 @@ def type_embedding_args(): doc_neuron = 'Number of neurons in each hidden layers of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built.' doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection' doc_seed = 'Random seed for parameter initialization' - doc_activation_function = f'The activation function in the embedding net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())}' + doc_activation_function = f'The activation function in the embedding net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())}. Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version.' doc_precision = f'The precision of the embedding net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision.' doc_trainable = 'If the parameters in the embedding net are trainable' @@ -128,7 +128,7 @@ def descrpt_se_a_args(): doc_rcut_smth = 'Where to start smoothing. For example the 1/r term is smoothed from `rcut` to `rcut_smth`' doc_neuron = 'Number of neurons in each hidden layers of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built.' doc_axis_neuron = 'Size of the submatrix of G (embedding matrix).' - doc_activation_function = f'The activation function in the embedding net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())}' + doc_activation_function = f'The activation function in the embedding net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())}. Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version.' doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection' doc_type_one_side = 'Try to build N_types embedding nets. Otherwise, building N_types^2 embedding nets' doc_precision = f'The precision of the embedding net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision.' @@ -162,7 +162,7 @@ def descrpt_se_t_args(): doc_rcut = 'The cut-off radius.' doc_rcut_smth = 'Where to start smoothing. For example the 1/r term is smoothed from `rcut` to `rcut_smth`' doc_neuron = 'Number of neurons in each hidden layers of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built.' - doc_activation_function = f'The activation function in the embedding net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())}' + doc_activation_function = f'The activation function in the embedding net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())}. Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version.' doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection' doc_precision = f'The precision of the embedding net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision.' doc_trainable = 'If the parameters in the embedding net are trainable' @@ -205,7 +205,7 @@ def descrpt_se_r_args(): doc_rcut = 'The cut-off radius.' doc_rcut_smth = 'Where to start smoothing. For example the 1/r term is smoothed from `rcut` to `rcut_smth`' doc_neuron = 'Number of neurons in each hidden layers of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built.' - doc_activation_function = f'The activation function in the embedding net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())}' + doc_activation_function = f'The activation function in the embedding net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())}. Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version.' doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection' doc_type_one_side = 'Try to build N_types embedding nets. Otherwise, building N_types^2 embedding nets' doc_precision = f'The precision of the embedding net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision.' @@ -262,7 +262,7 @@ def fitting_ener(): doc_numb_fparam = 'The dimension of the frame parameter. If set to >0, file `fparam.npy` should be included to provided the input fparams.' doc_numb_aparam = 'The dimension of the atomic parameter. If set to >0, file `aparam.npy` should be included to provided the input aparams.' doc_neuron = 'The number of neurons in each hidden layers of the fitting net. When two hidden layers are of the same size, a skip connection is built.' - doc_activation_function = f'The activation function in the fitting net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())}' + doc_activation_function = f'The activation function in the fitting net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())}. Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version.' doc_precision = f'The precision of the fitting net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision.' doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection' doc_trainable = 'Whether the parameters in the fitting net are trainable. This option can be\n\n\ @@ -288,7 +288,7 @@ def fitting_ener(): def fitting_polar(): doc_neuron = 'The number of neurons in each hidden layers of the fitting net. When two hidden layers are of the same size, a skip connection is built.' - doc_activation_function = f'The activation function in the fitting net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())}' + doc_activation_function = f'The activation function in the fitting net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())}. Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version.' doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection' doc_precision = f'The precision of the fitting net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision.' doc_scale = 'The output of the fitting net (polarizability matrix) will be scaled by ``scale``' @@ -320,7 +320,7 @@ def fitting_polar(): def fitting_dipole(): doc_neuron = 'The number of neurons in each hidden layers of the fitting net. When two hidden layers are of the same size, a skip connection is built.' - doc_activation_function = f'The activation function in the fitting net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())}' + doc_activation_function = f'The activation function in the fitting net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())}. Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version.' doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection' doc_precision = f'The precision of the fitting net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision.' doc_sel_type = 'The atom types for which the atomic dipole will be provided. If not set, all types will be selected.' diff --git a/doc/train-input-auto.rst b/doc/train-input-auto.rst index 2eb0ed6caa..f1de54f2f4 100644 --- a/doc/train-input-auto.rst +++ b/doc/train-input-auto.rst @@ -268,7 +268,7 @@ model: | type: ``str``, optional, default: ``tanh`` | argument path: ``model/descriptor[se_e2_a]/activation_function`` - The activation function in the embedding net. Supported activation functions are "relu", "relu6", "softplus", "sigmoid", "tanh", "gelu", "gelu_tf". Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF statdard version. + The activation function in the embedding net. Supported activation functions are "relu", "relu6", "softplus", "sigmoid", "tanh", "gelu". .. _`model/descriptor[se_e2_a]/resnet_dt`: @@ -373,7 +373,7 @@ model: | type: ``str``, optional, default: ``tanh`` | argument path: ``model/descriptor[se_e2_r]/activation_function`` - The activation function in the embedding net. Supported activation functions are "relu", "relu6", "softplus", "sigmoid", "tanh", "gelu", "gelu_tf". Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF statdard version. + The activation function in the embedding net. Supported activation functions are "relu", "relu6", "softplus", "sigmoid", "tanh", "gelu". .. _`model/descriptor[se_e2_r]/resnet_dt`: From e28ddbe9e36d8b36797c6a331d80fff8b457deef Mon Sep 17 00:00:00 2001 From: denghuilu Date: Wed, 29 Jun 2022 10:39:54 +0800 Subject: [PATCH 4/4] Update train-input-auto.rst --- doc/train-input-auto.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/train-input-auto.rst b/doc/train-input-auto.rst index f1de54f2f4..9201809549 100644 --- a/doc/train-input-auto.rst +++ b/doc/train-input-auto.rst @@ -373,7 +373,7 @@ model: | type: ``str``, optional, default: ``tanh`` | argument path: ``model/descriptor[se_e2_r]/activation_function`` - The activation function in the embedding net. Supported activation functions are "relu", "relu6", "softplus", "sigmoid", "tanh", "gelu". + The activation function in the embedding net. Supported activation functions are "relu", "relu6", "softplus", "sigmoid", "tanh", "gelu". .. _`model/descriptor[se_e2_r]/resnet_dt`: