Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
55 commits
Select commit Hold shift + click to select a range
a6ecd4b
Merge pull request #11 from deepmodeling/devel
iProzd Jun 1, 2021
aab1931
add gpu op unittest in source/tests
iProzd Jun 3, 2021
d5dca99
fix bug in #713
iProzd Jun 14, 2021
91b68f5
Merge pull request #12 from deepmodeling/devel
iProzd Jun 17, 2021
9f9fdae
Fix bug of empty input in gelu.cu
iProzd Jun 26, 2021
a958bcb
Merge pull request #13 from deepmodeling/devel
iProzd Jun 26, 2021
5144a53
Merge pull request #14 from deepmodeling/devel
iProzd Jul 1, 2021
46ab4e8
Merge pull request #15 from deepmodeling/devel
iProzd Jul 16, 2021
7372db6
Merge pull request #16 from deepmodeling/devel
iProzd Jul 26, 2021
7072344
Merge pull request #17 from deepmodeling/devel
iProzd Aug 9, 2021
978b37c
Merge pull request #18 from deepmodeling/devel
iProzd Aug 29, 2021
1cdf4c1
Merge pull request #19 from deepmodeling/devel
iProzd Sep 8, 2021
42ce5b3
Merge pull request #21 from deepmodeling/devel
iProzd Jan 30, 2022
b3ad9a5
Merge pull request #22 from deepmodeling/devel
iProzd May 28, 2022
d807db9
Merge pull request #23 from deepmodeling/devel
iProzd Jul 27, 2022
cb035d4
Upload attention based model
iProzd Aug 19, 2022
c31b283
Fix bugs in DPA-1 when trying to compress other model
iProzd Aug 19, 2022
c5d9d68
Add layernorm support for tf early version
iProzd Aug 20, 2022
b66c2ff
Upload the model image
iProzd Aug 20, 2022
fbfa5fe
Create train-se-atten.md
iProzd Aug 20, 2022
7854dd4
Update the download links of DPA-1 example
iProzd Aug 20, 2022
055401e
Update train-se-atten.md
iProzd Aug 20, 2022
3961e0d
Update train-se-atten.md
iProzd Aug 20, 2022
f04567f
Update train-se-atten.md
iProzd Aug 20, 2022
c0247c1
Update train-se-atten.md
iProzd Aug 20, 2022
087b464
Deal with the required changes
iProzd Aug 23, 2022
9203f02
Fix typo in data_system.py
iProzd Aug 23, 2022
a54524f
Add docs in toc
iProzd Aug 23, 2022
56e0386
Add docs to ntype and nmask
iProzd Aug 24, 2022
6a77184
Fix duplicated period in each doc_activation_function
iProzd Aug 24, 2022
4bbccfe
Optimized mixed_type format
iProzd Aug 24, 2022
798d864
Update common.py
iProzd Aug 24, 2022
3912df5
Git reset common.py of format editing.
iProzd Aug 25, 2022
a6f8258
Match default args of tebd in se_atten
iProzd Aug 25, 2022
becf0cb
Change default activation_function back to 'tanh' in tebd.
iProzd Aug 25, 2022
07326ec
Fix bugs when init_frz_model using tebd
iProzd Sep 2, 2022
54ef470
Revert "Fix bugs when init_frz_model using tebd"
iProzd Sep 3, 2022
cf58504
Merge pull request #24 from deepmodeling/devel
iProzd Sep 3, 2022
93781ad
Fix bugs when init_frz_model using tebd.
iProzd Sep 3, 2022
8bbec3d
Skip test_init_frz_model_se_atten when tf1.15
iProzd Sep 3, 2022
0eab5be
Skip test_init_frz_model_se_atten when tf1.15
iProzd Sep 3, 2022
6e803e7
Fix numb_steps and add GraphWithoutTensorError.
iProzd Sep 4, 2022
019902b
Revert "Fix numb_steps and add GraphWithoutTensorError."
iProzd Sep 4, 2022
70640c5
Fix numb_steps and add GraphWithoutTensorError.
iProzd Sep 4, 2022
2dd21b8
Fix UTs
iProzd Sep 4, 2022
641d5cc
Fix UTs
iProzd Sep 4, 2022
b70a2c7
Revert "Fix UTs"
iProzd Sep 6, 2022
1f9e57a
Revert "Fix UTs"
iProzd Sep 6, 2022
ba79e5d
Fix ckpt path.
iProzd Sep 6, 2022
0a779ba
Fix freeze -c path.
iProzd Sep 6, 2022
97d0b47
Change dtype of self.nei_type to np.int32.
iProzd Sep 7, 2022
451e285
Revert "Change dtype of self.nei_type to np.int32."
iProzd Sep 7, 2022
3019d9b
Simplify the self.nei_type
iProzd Sep 8, 2022
2055c08
Merge branch 'devel' into devel
njzjz Sep 8, 2022
fa055fa
Update test_compat_input.py
iProzd Sep 9, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 4 additions & 6 deletions deepmd/descriptor/se_a.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,10 +164,7 @@ def __init__ (self,
self.embedding_net_variables = None
self.mixed_prec = None
self.place_holders = {}
nei_type = np.array([])
for ii in range(self.ntypes):
nei_type = np.append(nei_type, ii * np.ones(self.sel_a[ii])) # like a mask
self.nei_type = tf.constant(nei_type, dtype = tf.int32)
self.nei_type = np.repeat(np.arange(self.ntypes), self.sel_a) # like a mask

avg_zero = np.zeros([self.ntypes,self.ndescrpt]).astype(GLOBAL_NP_FLOAT_PRECISION)
std_ones = np.ones ([self.ntypes,self.ndescrpt]).astype(GLOBAL_NP_FLOAT_PRECISION)
Expand Down Expand Up @@ -673,8 +670,9 @@ def _concat_type_embedding(
embedding:
environment of each atom represented by embedding.
'''
te_out_dim = type_embedding.get_shape().as_list()[-1]
nei_embed = tf.nn.embedding_lookup(type_embedding,tf.cast(self.nei_type,dtype=tf.int32)) # shape is [self.nnei, 1+te_out_dim]
te_out_dim = type_embedding.get_shape().as_list()[-1]
self.t_nei_type = tf.constant(self.nei_type, dtype=tf.int32)
nei_embed = tf.nn.embedding_lookup(type_embedding,tf.cast(self.t_nei_type,dtype=tf.int32)) # shape is [self.nnei, 1+te_out_dim]
nei_embed = tf.tile(nei_embed,(nframes*natoms[0],1)) # shape is [nframes*natoms[0]*self.nnei, te_out_dim]
nei_embed = tf.reshape(nei_embed,[-1,te_out_dim])
embedding_input = tf.concat([xyz_scatter,nei_embed],1) # shape is [nframes*natoms[0]*self.nnei, 1+te_out_dim]
Expand Down
86 changes: 59 additions & 27 deletions deepmd/descriptor/se_atten.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from deepmd.utils.type_embed import embed_atom_type
from deepmd.utils.sess import run_sess
from deepmd.utils.graph import load_graph_def, get_tensor_by_name_from_graph, get_tensor_by_name
from deepmd.utils.graph import get_attention_layer_variables_from_graph_def
from deepmd.utils.errors import GraphWithoutTensorError
from .descriptor import Descriptor
from .se_a import DescrptSeA
Expand Down Expand Up @@ -117,6 +118,9 @@ def __init__(self,
self.sel_all_r = [0]
avg_zero = np.zeros([self.ntypes, self.ndescrpt]).astype(GLOBAL_NP_FLOAT_PRECISION)
std_ones = np.ones([self.ntypes, self.ndescrpt]).astype(GLOBAL_NP_FLOAT_PRECISION)
self.beta = np.zeros([self.attn_layer, self.filter_neuron[-1]]).astype(GLOBAL_NP_FLOAT_PRECISION)
self.gamma = np.ones([self.attn_layer, self.filter_neuron[-1]]).astype(GLOBAL_NP_FLOAT_PRECISION)
self.attention_layer_variables = None
sub_graph = tf.Graph()
with sub_graph.as_default():
name_pfx = 'd_sea_'
Expand Down Expand Up @@ -305,10 +309,6 @@ def build(self,
self.attn_weight = [None for i in range(self.attn_layer)]
self.angular_weight = [None for i in range(self.attn_layer)]
self.attn_weight_final = [None for i in range(self.attn_layer)]
self.G = None
self.qs = [None for i in range(self.attn_layer)]
self.ks = [None for i in range(self.attn_layer)]
self.vs = [None for i in range(self.attn_layer)]

self.descrpt, self.descrpt_deriv, self.rij, self.nlist, self.nei_type_vec, self.nmask \
= op_module.prod_env_mat_a_mix(coord,
Expand Down Expand Up @@ -365,8 +365,8 @@ def _pass_filter(self,
inputs_i = inputs
inputs_i = tf.reshape(inputs_i, [-1, self.ndescrpt])
type_i = -1
layer, qmat = self._filter(inputs_i, type_i, natoms, name='filter_type_all' + suffix, reuse=reuse,
trainable=trainable, activation_fn=self.filter_activation_fn,
layer, qmat = self._filter(inputs_i, type_i, natoms, name='filter_type_all' + suffix, suffix=suffix,
reuse=reuse, trainable=trainable, activation_fn=self.filter_activation_fn,
type_embedding=type_embedding, atype=atype)
layer = tf.reshape(layer, [tf.shape(inputs)[0], natoms[0], self.get_dim_out()])
qmat = tf.reshape(qmat, [tf.shape(inputs)[0], natoms[0], self.get_dim_rot_mat_1() * 3])
Expand Down Expand Up @@ -508,7 +508,8 @@ def _feedforward(self, input_xyz, d_in, d_mid):
activation_fn=None,
precision=self.filter_precision,
trainable=True,
uniform_seed=self.uniform_seed))
uniform_seed=self.uniform_seed,
initial_variables=self.attention_layer_variables))
input_xyz = one_layer(
input_xyz,
d_in,
Expand All @@ -518,7 +519,8 @@ def _feedforward(self, input_xyz, d_in, d_mid):
activation_fn=None,
precision=self.filter_precision,
trainable=True,
uniform_seed=self.uniform_seed)
uniform_seed=self.uniform_seed,
initial_variables=self.attention_layer_variables)
input_xyz += residual
input_xyz = tf.keras.layers.LayerNormalization()(input_xyz)
return input_xyz
Expand Down Expand Up @@ -553,75 +555,75 @@ def _attention_layers(
input_r,
dotr=False,
do_mask=False,
trainable=True
trainable=True,
suffix=''
):
sd_k = tf.sqrt(tf.cast(1., dtype=self.filter_precision))
self.G = tf.reshape(input_xyz, (-1, shape_i[1] // 4, outputs_size[-1]))[0]
for i in range(layer_num):
with tf.variable_scope('attention_layer{}_'.format(i), reuse=tf.AUTO_REUSE):
name = 'attention_layer_{}{}'.format(i, suffix)
with tf.variable_scope(name, reuse=tf.AUTO_REUSE):
# input_xyz_in = tf.nn.l2_normalize(input_xyz, -1)
Q_c = one_layer(
input_xyz,
self.att_n,
name='c_query',
scope=name+'/',
reuse=tf.AUTO_REUSE,
seed=self.seed,
activation_fn=None,
precision=self.filter_precision,
trainable=trainable,
uniform_seed=self.uniform_seed)
uniform_seed=self.uniform_seed,
initial_variables=self.attention_layer_variables)
K_c = one_layer(
input_xyz,
self.att_n,
name='c_key',
scope=name+'/',
reuse=tf.AUTO_REUSE,
seed=self.seed,
activation_fn=None,
precision=self.filter_precision,
trainable=trainable,
uniform_seed=self.uniform_seed)
uniform_seed=self.uniform_seed,
initial_variables=self.attention_layer_variables)
V_c = one_layer(
input_xyz,
self.att_n,
name='c_value',
scope=name+'/',
reuse=tf.AUTO_REUSE,
seed=self.seed,
activation_fn=None,
precision=self.filter_precision,
trainable=trainable,
uniform_seed=self.uniform_seed)
uniform_seed=self.uniform_seed,
initial_variables=self.attention_layer_variables)
# # natom x nei_type_i x out_size
# xyz_scatter = tf.reshape(xyz_scatter, (-1, shape_i[1] // 4, outputs_size[-1]))
# natom x nei_type_i x att_n
Q_c = tf.nn.l2_normalize(tf.reshape(Q_c, (-1, shape_i[1] // 4, self.att_n)), -1)
K_c = tf.nn.l2_normalize(tf.reshape(K_c, (-1, shape_i[1] // 4, self.att_n)), -1)
V_c = tf.nn.l2_normalize(tf.reshape(V_c, (-1, shape_i[1] // 4, self.att_n)), -1)
# Q_c = tf.reshape(Q_c, (-1, shape_i[1] // 4, self.att_n))
# K_c = tf.reshape(K_c, (-1, shape_i[1] // 4, self.att_n))
# V_c = tf.reshape(V_c, (-1, shape_i[1] // 4, self.att_n))
self.qs[i] = Q_c[0]
self.ks[i] = K_c[0]
self.vs[i] = V_c[0]

input_att = self._scaled_dot_attn(Q_c, K_c, V_c, sd_k, input_r, dotr=dotr, do_mask=do_mask, layer=i)
input_att = tf.reshape(input_att, (-1, self.att_n))

# A_c = tf.nn.softmax(tf.matmul(Q_c, K_c, transpose_b=True)/sd_k)
# # (natom x nei_type_i) x att_n
# input_att = tf.reshape(tf.matmul(A_c, V_c), (-1, self.att_n))

# (natom x nei_type_i) x out_size
input_xyz += one_layer(
input_att,
outputs_size[-1],
name='c_out',
scope=name+'/',
reuse=tf.AUTO_REUSE,
seed=self.seed,
activation_fn=None,
precision=self.filter_precision,
trainable=trainable,
uniform_seed=self.uniform_seed)
input_xyz = tf.keras.layers.LayerNormalization()(input_xyz)
uniform_seed=self.uniform_seed,
initial_variables=self.attention_layer_variables)
input_xyz = tf.keras.layers.LayerNormalization(beta_initializer=tf.constant_initializer(self.beta[i]),
gamma_initializer=tf.constant_initializer(self.gamma[i]))(input_xyz)
# input_xyz = self._feedforward(input_xyz, outputs_size[-1], self.att_n)
return input_xyz

Expand Down Expand Up @@ -688,7 +690,7 @@ def _filter_lower(
# natom x nei_type_i x out_size
xyz_scatter_att = tf.reshape(
self._attention_layers(xyz_scatter, self.attn_layer, shape_i, outputs_size, input_r,
dotr=self.attn_dotr, do_mask=self.attn_mask, trainable=trainable),
dotr=self.attn_dotr, do_mask=self.attn_mask, trainable=trainable, suffix=suffix),
(-1, shape_i[1] // 4, outputs_size[-1]))
# xyz_scatter = tf.reshape(xyz_scatter, (-1, shape_i[1] // 4, outputs_size[-1]))
else:
Expand All @@ -712,6 +714,7 @@ def _filter(
activation_fn=tf.nn.tanh,
stddev=1.0,
bavg=0.0,
suffix='',
name='linear',
reuse=None,
trainable=True):
Expand Down Expand Up @@ -745,6 +748,7 @@ def _filter(
stddev=stddev,
bavg=bavg,
trainable=trainable,
suffix=suffix,
name=name,
reuse=reuse,
atype=atype)
Expand Down Expand Up @@ -775,3 +779,31 @@ def _filter(
result = tf.reshape(result, [-1, outputs_size_2 * outputs_size[-1]])

return result, qmat

def init_variables(self,
graph: tf.Graph,
graph_def: tf.GraphDef,
suffix: str = "",
) -> None:
"""
Init the embedding net variables with the given dict

Parameters
----------
graph : tf.Graph
The input frozen model graph
graph_def : tf.GraphDef
The input frozen model graph_def
suffix : str, optional
The suffix of the scope
"""
super().init_variables(graph=graph, graph_def=graph_def, suffix=suffix)
self.attention_layer_variables = get_attention_layer_variables_from_graph_def(graph_def, suffix=suffix)
if self.attn_layer > 0:
self.beta[0] = self.attention_layer_variables['attention_layer_0{}/layer_normalization/beta'.format(suffix)]
self.gamma[0] = self.attention_layer_variables['attention_layer_0{}/layer_normalization/gamma'.format(suffix)]
for i in range(1, self.attn_layer):
self.beta[i] = self.attention_layer_variables[
'attention_layer_{}{}/layer_normalization_{}/beta'.format(i, suffix, i)]
self.gamma[i] = self.attention_layer_variables[
'attention_layer_{}{}/layer_normalization_{}/gamma'.format(i, suffix, i)]
25 changes: 25 additions & 0 deletions deepmd/env.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@
"TRANSFER_PATTERN",
"FITTING_NET_PATTERN",
"EMBEDDING_NET_PATTERN",
"TYPE_EMBEDDING_PATTERN",
"ATTENTION_LAYER_PATTERN",
"TF_VERSION"
]

Expand All @@ -59,18 +61,26 @@
r"filter_type_\d+/matrix_\d+_\d+|"
r"filter_type_\d+/bias_\d+_\d+|"
r"filter_type_\d+/idt_\d+_\d+|"
r"filter_type_all/matrix_\d+|"
r"filter_type_all/matrix_\d+_\d+|"
r"filter_type_all/matrix_\d+_\d+_\d+|"
r"filter_type_all/bias_\d+|"
r"filter_type_all/bias_\d+_\d+|"
r"filter_type_all/bias_\d+_\d+_\d+|"
r"filter_type_all/idt_\d+|"
r"filter_type_all/idt_\d+_\d+|"
)

FITTING_NET_PATTERN = str(
r"layer_\d+/matrix|"
r"layer_\d+_type_\d+/matrix|"
r"layer_\d+/bias|"
r"layer_\d+_type_\d+/bias|"
r"layer_\d+/idt|"
r"layer_\d+_type_\d+/idt|"
r"final_layer/matrix|"
r"final_layer_type_\d+/matrix|"
r"final_layer/bias|"
r"final_layer_type_\d+/bias|"
)

Expand All @@ -80,6 +90,21 @@
r"type_embed_net+/idt_\d+|"
)

ATTENTION_LAYER_PATTERN = str(
r"attention_layer_\d+/c_query/matrix|"
r"attention_layer_\d+/c_query/bias|"
r"attention_layer_\d+/c_key/matrix|"
r"attention_layer_\d+/c_key/bias|"
r"attention_layer_\d+/c_value/matrix|"
r"attention_layer_\d+/c_value/bias|"
r"attention_layer_\d+/c_out/matrix|"
r"attention_layer_\d+/c_out/bias|"
r"attention_layer_\d+/layer_normalization/beta|"
r"attention_layer_\d+/layer_normalization/gamma|"
r"attention_layer_\d+/layer_normalization_\d+/beta|"
r"attention_layer_\d+/layer_normalization_\d+/gamma|"
)

TRANSFER_PATTERN = \
EMBEDDING_NET_PATTERN + \
FITTING_NET_PATTERN + \
Expand Down
17 changes: 12 additions & 5 deletions deepmd/fit/ener.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from deepmd.utils.network import one_layer as one_layer_deepmd
from deepmd.utils.type_embed import embed_atom_type
from deepmd.utils.graph import get_fitting_net_variables_from_graph_def, load_graph_def, get_tensor_by_name_from_graph
from deepmd.utils.errors import GraphWithoutTensorError
from deepmd.fit.fitting import Fitting

from deepmd.env import global_cvt_2_tf_float
Expand Down Expand Up @@ -400,6 +401,8 @@ def build (self,
if input_dict is None:
input_dict = {}
bias_atom_e = self.bias_atom_e
type_embedding = input_dict.get('type_embedding', None)
atype = input_dict.get('atype', None)
if self.numb_fparam > 0:
if self.fparam_avg is None:
self.fparam_avg = 0.
Expand All @@ -418,9 +421,10 @@ def build (self,
t_daparam = tf.constant(self.numb_aparam,
name = 'daparam',
dtype = tf.int32)
self.t_bias_atom_e = tf.get_variable('t_bias_atom_e',
if type_embedding is not None:
self.t_bias_atom_e = tf.get_variable('t_bias_atom_e',
self.bias_atom_e.shape,
dtype=GLOBAL_TF_FLOAT_PRECISION,
dtype=self.fitting_precision,
trainable=False,
initializer=tf.constant_initializer(self.bias_atom_e))
if self.numb_fparam > 0:
Expand Down Expand Up @@ -471,9 +475,7 @@ def build (self,
aparam = tf.reshape(aparam, [-1, self.numb_aparam])
aparam = (aparam - t_aparam_avg) * t_aparam_istd
aparam = tf.reshape(aparam, [-1, self.numb_aparam * natoms[0]])

type_embedding = input_dict.get('type_embedding', None)
atype = input_dict.get('atype', None)

if type_embedding is not None:
atype_nall = tf.reshape(atype, [-1, natoms[1]])
self.atype_nloc = tf.reshape(tf.slice(atype_nall, [0, 0], [-1, natoms[0]]), [-1]) ## lammps will make error
Expand Down Expand Up @@ -570,6 +572,11 @@ def init_variables(self,
if self.numb_aparam > 0:
self.aparam_avg = get_tensor_by_name_from_graph(graph, 'fitting_attr%s/t_aparam_avg' % suffix)
self.aparam_inv_std = get_tensor_by_name_from_graph(graph, 'fitting_attr%s/t_aparam_istd' % suffix)
try:
self.bias_atom_e = get_tensor_by_name_from_graph(graph, 'fitting_attr%s/t_bias_atom_e' % suffix)
except GraphWithoutTensorError:
# model without type_embedding has no t_bias_atom_e
pass

def enable_compression(self,
model_file: str,
Expand Down
9 changes: 5 additions & 4 deletions deepmd/train/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,7 +288,8 @@ def _init_param(self, jdata):

def build (self,
data = None,
stop_batch = 0) :
stop_batch = 0,
suffix = "") :
self.ntypes = self.model.get_ntypes()
self.stop_batch = stop_batch

Expand Down Expand Up @@ -348,7 +349,7 @@ def build (self,
self.fitting.enable_mixed_precision(self.mixed_prec)

self._build_lr()
self._build_network(data)
self._build_network(data, suffix)
self._build_training()


Expand All @@ -358,7 +359,7 @@ def _build_lr(self):
self.learning_rate = self.lr.build(self.global_step, self.stop_batch)
log.info("built lr")

def _build_network(self, data):
def _build_network(self, data, suffix=""):
self.place_holders = {}
if self.is_compress :
for kk in ['coord', 'box']:
Expand All @@ -379,7 +380,7 @@ def _build_network(self, data):
self.place_holders['default_mesh'],
self.place_holders,
self.frz_model,
suffix = "",
suffix = suffix,
reuse = False)

self.l2_l, self.l2_more\
Expand Down
6 changes: 3 additions & 3 deletions deepmd/utils/argcheck.py
Original file line number Diff line number Diff line change
Expand Up @@ -276,9 +276,9 @@ def descrpt_se_atten_args():
Argument("seed", [int, None], optional=True, doc=doc_seed),
Argument("exclude_types", list, optional=True, default=[], doc=doc_exclude_types),
Argument("set_davg_zero", bool, optional=True, default=False, doc=doc_set_davg_zero),
Argument("attn", int, optional=True, default=100, doc=doc_attn),
Argument("attn_layer", int, optional=True, default=4, doc=doc_attn_layer),
Argument("attn_dotr", bool, optional=True, default=False, doc=doc_attn_dotr),
Argument("attn", int, optional=True, default=128, doc=doc_attn),
Argument("attn_layer", int, optional=True, default=2, doc=doc_attn_layer),
Argument("attn_dotr", bool, optional=True, default=True, doc=doc_attn_dotr),
Argument("attn_mask", bool, optional=True, default=False, doc=doc_attn_mask)
]

Expand Down
Loading