From 716c8f1fb9d80f31e8535c256e01adda91ab46ea Mon Sep 17 00:00:00 2001 From: denghuilu Date: Mon, 10 Jan 2022 21:45:36 +0800 Subject: [PATCH 1/3] fix gelu grad multi definitions --- deepmd/common.py | 7 ++++++- source/op/_gelu.py | 17 ++++++++++------- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/deepmd/common.py b/deepmd/common.py index 695fee1a93..aec1f7f544 100644 --- a/deepmd/common.py +++ b/deepmd/common.py @@ -2,6 +2,7 @@ import json import warnings +import tensorflow from functools import wraps from pathlib import Path from typing import ( @@ -64,7 +65,11 @@ def gelu(x: tf.Tensor) -> tf.Tensor: Original paper https://arxiv.org/abs/1606.08415 """ - return op_module.gelu(x) + try: + gelu = tensorflow.nn.gelu + except AttributeError: + gelu = op_module.gelu + return gelu(x) # TODO this is not a good way to do things. This is some global variable to which diff --git a/source/op/_gelu.py b/source/op/_gelu.py index ac0585da78..db45ef798e 100644 --- a/source/op/_gelu.py +++ b/source/op/_gelu.py @@ -2,14 +2,17 @@ """ First-order derivatives and second-order derivatives for gelu function. """ - +import tensorflow from tensorflow.python.framework import ops from deepmd.env import op_module -@ops.RegisterGradient("Gelu") -def _gelu_cc (op, dy) : - return op_module.gelu_grad(dy, op.inputs[0]) +try: + gelu = tensorflow.nn.gelu +except AttributeError: + @ops.RegisterGradient("Gelu") + def _gelu_cc (op, dy) : + return op_module.gelu_grad(dy, op.inputs[0]) -@ops.RegisterGradient("GeluGrad") -def _gelu_grad_cc (op, dy) : - return [op_module.gelu_grad(dy, op.inputs[1]), op_module.gelu_grad_grad(dy, op.inputs[0], op.inputs[1])] + @ops.RegisterGradient("GeluGrad") + def _gelu_grad_cc (op, dy) : + return [op_module.gelu_grad(dy, op.inputs[1]), op_module.gelu_grad_grad(dy, op.inputs[0], op.inputs[1])] From 863fc20499918bafc9d501fb305fabcd7886fd01 Mon Sep 17 00:00:00 2001 From: Denghui Lu Date: Wed, 12 Jan 2022 11:38:55 +0800 Subject: [PATCH 2/3] use approximate gelu function Co-authored-by: Jinzhe Zeng --- deepmd/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deepmd/common.py b/deepmd/common.py index aec1f7f544..08785281d7 100644 --- a/deepmd/common.py +++ b/deepmd/common.py @@ -66,7 +66,7 @@ def gelu(x: tf.Tensor) -> tf.Tensor: https://arxiv.org/abs/1606.08415 """ try: - gelu = tensorflow.nn.gelu + gelu = lambda x: tensorflow.nn.gelu(x, approximate=True) except AttributeError: gelu = op_module.gelu return gelu(x) From 86c1021fc3eb760292fd41702398a1e1048597c3 Mon Sep 17 00:00:00 2001 From: denghuilu Date: Wed, 12 Jan 2022 17:01:50 +0800 Subject: [PATCH 3/3] fix UT error --- deepmd/common.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/deepmd/common.py b/deepmd/common.py index 08785281d7..1f9d3afb0c 100644 --- a/deepmd/common.py +++ b/deepmd/common.py @@ -65,11 +65,12 @@ def gelu(x: tf.Tensor) -> tf.Tensor: Original paper https://arxiv.org/abs/1606.08415 """ - try: - gelu = lambda x: tensorflow.nn.gelu(x, approximate=True) - except AttributeError: - gelu = op_module.gelu - return gelu(x) + def gelu_wrapper(x): + try: + return tensorflow.nn.gelu(x, approximate=True) + except AttributeError: + return op_module.gelu(x) + return (lambda x: gelu_wrapper(x))(x) # TODO this is not a good way to do things. This is some global variable to which