From 45cf4e73f95272026e17480ba083733548779d31 Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Thu, 13 Jun 2024 20:06:00 -0400 Subject: [PATCH 01/13] dpmodel seed Signed-off-by: Jinzhe Zeng --- deepmd/dpmodel/descriptor/dpa1.py | 4 ++++ deepmd/dpmodel/descriptor/se_e2_a.py | 1 + deepmd/dpmodel/descriptor/se_r.py | 1 + deepmd/dpmodel/descriptor/se_t.py | 1 + deepmd/dpmodel/fitting/general_fitting.py | 4 ++++ deepmd/dpmodel/utils/type_embed.py | 1 + 6 files changed, 12 insertions(+) diff --git a/deepmd/dpmodel/descriptor/dpa1.py b/deepmd/dpmodel/descriptor/dpa1.py index 876062cce6..177f2fe718 100644 --- a/deepmd/dpmodel/descriptor/dpa1.py +++ b/deepmd/dpmodel/descriptor/dpa1.py @@ -294,6 +294,7 @@ def __init__( env_protection=env_protection, trainable_ln=trainable_ln, ln_eps=ln_eps, + seed=seed, ) self.use_econf_tebd = use_econf_tebd self.type_map = type_map @@ -625,6 +626,7 @@ def __init__( trainable_ln: bool = True, ln_eps: Optional[float] = 1e-5, smooth: bool = True, + seed: Optional[int] = None, ) -> None: self.rcut = rcut self.rcut_smth = rcut_smth @@ -674,6 +676,7 @@ def __init__( self.activation_function, self.resnet_dt, self.precision, + seed=seed, ) if self.tebd_input_mode in ["strip"]: self.embeddings_strip = NetworkCollection( @@ -687,6 +690,7 @@ def __init__( self.activation_function, self.resnet_dt, self.precision, + seed=seed, ) else: self.embeddings_strip = None diff --git a/deepmd/dpmodel/descriptor/se_e2_a.py b/deepmd/dpmodel/descriptor/se_e2_a.py index 504e357aeb..52531512c6 100644 --- a/deepmd/dpmodel/descriptor/se_e2_a.py +++ b/deepmd/dpmodel/descriptor/se_e2_a.py @@ -198,6 +198,7 @@ def __init__( self.activation_function, self.resnet_dt, self.precision, + seed=seed, ) self.env_mat = EnvMat(self.rcut, self.rcut_smth, protection=self.env_protection) self.nnei = np.sum(self.sel) diff --git a/deepmd/dpmodel/descriptor/se_r.py b/deepmd/dpmodel/descriptor/se_r.py index 938826d16c..68968bbe89 100644 --- a/deepmd/dpmodel/descriptor/se_r.py +++ b/deepmd/dpmodel/descriptor/se_r.py @@ -156,6 +156,7 @@ def __init__( self.activation_function, self.resnet_dt, self.precision, + seed=seed, ) self.env_mat = EnvMat(self.rcut, self.rcut_smth, protection=self.env_protection) self.nnei = np.sum(self.sel) diff --git a/deepmd/dpmodel/descriptor/se_t.py b/deepmd/dpmodel/descriptor/se_t.py index b91f9a6c6e..5c4664e1f9 100644 --- a/deepmd/dpmodel/descriptor/se_t.py +++ b/deepmd/dpmodel/descriptor/se_t.py @@ -136,6 +136,7 @@ def __init__( self.activation_function, self.resnet_dt, self.precision, + seed=seed, ) self.env_mat = EnvMat(self.rcut, self.rcut_smth, protection=self.env_protection) self.nnei = np.sum(self.sel) diff --git a/deepmd/dpmodel/fitting/general_fitting.py b/deepmd/dpmodel/fitting/general_fitting.py index 2f0b3c7ac6..8fe462a2f6 100644 --- a/deepmd/dpmodel/fitting/general_fitting.py +++ b/deepmd/dpmodel/fitting/general_fitting.py @@ -82,6 +82,8 @@ class GeneralFitting(NativeOP, BaseFitting): length as `ntypes` signaling if or not removing the vaccum contribution for the atom types in the list. type_map: List[str], Optional A list of strings. Give the name to each type of atoms. + seed: Optional[int] + Random seed for initializing the network parameters. """ def __init__( @@ -106,6 +108,7 @@ def __init__( exclude_types: List[int] = [], remove_vaccum_contribution: Optional[List[bool]] = None, type_map: Optional[List[str]] = None, + seed: Optional[int] = None, ): self.var_name = var_name self.ntypes = ntypes @@ -166,6 +169,7 @@ def __init__( self.resnet_dt, self.precision, bias_out=True, + seed=seed, ) for ii in range(self.ntypes if not self.mixed_types else 1) ], diff --git a/deepmd/dpmodel/utils/type_embed.py b/deepmd/dpmodel/utils/type_embed.py index 99508ea7b3..6d3f6054cb 100644 --- a/deepmd/dpmodel/utils/type_embed.py +++ b/deepmd/dpmodel/utils/type_embed.py @@ -83,6 +83,7 @@ def __init__( self.activation_function, self.resnet_dt, self.precision, + seed=self.seed, ) def call(self) -> np.ndarray: From 7f5580a4f36b810b226bb92fa2c3c2da8cd83595 Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Fri, 14 Jun 2024 16:06:58 -0400 Subject: [PATCH 02/13] different seeds for different layer Signed-off-by: Jinzhe Zeng --- deepmd/dpmodel/utils/network.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/deepmd/dpmodel/utils/network.py b/deepmd/dpmodel/utils/network.py index 7a17cc459d..6e8c4958de 100644 --- a/deepmd/dpmodel/utils/network.py +++ b/deepmd/dpmodel/utils/network.py @@ -581,7 +581,7 @@ def __init__( activation_function=activation_function, resnet=True, precision=precision, - seed=seed, + seed=seed + idx, ).serialize() ) i_in = i_ot @@ -688,7 +688,7 @@ def __init__( activation_function=None, resnet=False, precision=precision, - seed=seed, + seed=seed + len(self.layers), ) ) self.out_dim = out_dim From 87d8fe157522d79caab88aa68a1397295347aefa Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Fri, 14 Jun 2024 16:09:17 -0400 Subject: [PATCH 03/13] fix missing docstring Signed-off-by: Jinzhe Zeng --- deepmd/dpmodel/utils/network.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/deepmd/dpmodel/utils/network.py b/deepmd/dpmodel/utils/network.py index 6e8c4958de..6bc8075772 100644 --- a/deepmd/dpmodel/utils/network.py +++ b/deepmd/dpmodel/utils/network.py @@ -61,6 +61,10 @@ class NativeLayer(NativeOP): The activation function of the layer. resnet : bool, optional Whether the layer is a residual layer. + precision : str, optional + The precision of the layer. + seed : int, optional + Random seed. """ def __init__( @@ -299,6 +303,12 @@ class LayerNorm(NativeLayer): A small value added to prevent division by zero in calculations. uni_init : bool, optional If initialize the weights to be zeros and ones. + trainable : bool, optional + If the weights are trainable. + precision : str, optional + The precision of the layer. + seed : int, optional + Random seed. """ def __init__( @@ -556,7 +566,8 @@ class EN(T_Network): Use time step at the resnet architecture. precision Floating point precision for the model paramters. - + seed : int, optional + Random seed. """ def __init__( @@ -656,7 +667,8 @@ class FN(T_EmbeddingNet): Floating point precision for the model paramters. bias_out The last linear layer has bias. - + seed : int, optional + Random seed. """ def __init__( From aa29d16e5475dc757cc67752e9ce69f7f8367063 Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Fri, 14 Jun 2024 16:15:24 -0400 Subject: [PATCH 04/13] descriptor Signed-off-by: Jinzhe Zeng --- deepmd/dpmodel/descriptor/se_e2_a.py | 6 +++--- deepmd/dpmodel/descriptor/se_r.py | 2 +- deepmd/dpmodel/descriptor/se_t.py | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/deepmd/dpmodel/descriptor/se_e2_a.py b/deepmd/dpmodel/descriptor/se_e2_a.py index 52531512c6..c1fefaf9eb 100644 --- a/deepmd/dpmodel/descriptor/se_e2_a.py +++ b/deepmd/dpmodel/descriptor/se_e2_a.py @@ -189,8 +189,8 @@ def __init__( ndim=(1 if self.type_one_side else 2), network_type="embedding_network", ) - for embedding_idx in itertools.product( - range(self.ntypes), repeat=self.embeddings.ndim + for ii, embedding_idx in enumerate( + itertools.product(range(self.ntypes), repeat=self.embeddings.ndim) ): self.embeddings[embedding_idx] = EmbeddingNet( in_dim, @@ -198,7 +198,7 @@ def __init__( self.activation_function, self.resnet_dt, self.precision, - seed=seed, + seed=seed + len(self.neuron) * ii if seed is not None else None, ) self.env_mat = EnvMat(self.rcut, self.rcut_smth, protection=self.env_protection) self.nnei = np.sum(self.sel) diff --git a/deepmd/dpmodel/descriptor/se_r.py b/deepmd/dpmodel/descriptor/se_r.py index 68968bbe89..eadd8f1001 100644 --- a/deepmd/dpmodel/descriptor/se_r.py +++ b/deepmd/dpmodel/descriptor/se_r.py @@ -156,7 +156,7 @@ def __init__( self.activation_function, self.resnet_dt, self.precision, - seed=seed, + seed=seed + len(self.neuron) * ii if seed is not None else None, ) self.env_mat = EnvMat(self.rcut, self.rcut_smth, protection=self.env_protection) self.nnei = np.sum(self.sel) diff --git a/deepmd/dpmodel/descriptor/se_t.py b/deepmd/dpmodel/descriptor/se_t.py index 5c4664e1f9..1b1845bf34 100644 --- a/deepmd/dpmodel/descriptor/se_t.py +++ b/deepmd/dpmodel/descriptor/se_t.py @@ -127,8 +127,8 @@ def __init__( ndim=2, network_type="embedding_network", ) - for embedding_idx in itertools.product( - range(self.ntypes), repeat=self.embeddings.ndim + for ii, embedding_idx in enumerate( + itertools.product(range(self.ntypes), repeat=self.embeddings.ndim) ): self.embeddings[embedding_idx] = EmbeddingNet( in_dim, @@ -136,7 +136,7 @@ def __init__( self.activation_function, self.resnet_dt, self.precision, - seed=seed, + seed=seed + len(self.neuron) * ii if seed is not None else None, ) self.env_mat = EnvMat(self.rcut, self.rcut_smth, protection=self.env_protection) self.nnei = np.sum(self.sel) From 25b9d6edc0b55bfbdd18ba4616e5777f371ad781 Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Fri, 14 Jun 2024 16:16:27 -0400 Subject: [PATCH 05/13] fitting Signed-off-by: Jinzhe Zeng --- deepmd/dpmodel/fitting/general_fitting.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/deepmd/dpmodel/fitting/general_fitting.py b/deepmd/dpmodel/fitting/general_fitting.py index 8fe462a2f6..5f73e0b1aa 100644 --- a/deepmd/dpmodel/fitting/general_fitting.py +++ b/deepmd/dpmodel/fitting/general_fitting.py @@ -169,7 +169,9 @@ def __init__( self.resnet_dt, self.precision, bias_out=True, - seed=seed, + seed=seed + ii * (len(self.neuron) + 1) + if seed is not None + else None, ) for ii in range(self.ntypes if not self.mixed_types else 1) ], From 0794e152bda6f4a6610a84bca42479bf6f71de72 Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Fri, 14 Jun 2024 16:19:03 -0400 Subject: [PATCH 06/13] dpa1 Signed-off-by: Jinzhe Zeng --- deepmd/dpmodel/descriptor/dpa1.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deepmd/dpmodel/descriptor/dpa1.py b/deepmd/dpmodel/descriptor/dpa1.py index 177f2fe718..8d4d425052 100644 --- a/deepmd/dpmodel/descriptor/dpa1.py +++ b/deepmd/dpmodel/descriptor/dpa1.py @@ -690,7 +690,7 @@ def __init__( self.activation_function, self.resnet_dt, self.precision, - seed=seed, + seed=seed + 1 if seed is not None else None, ) else: self.embeddings_strip = None From 18b27a9d4b43e77a2053f0e13aefd15a1b6f0ac5 Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Fri, 14 Jun 2024 16:19:58 -0400 Subject: [PATCH 07/13] seed may be none Signed-off-by: Jinzhe Zeng --- deepmd/dpmodel/utils/network.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/deepmd/dpmodel/utils/network.py b/deepmd/dpmodel/utils/network.py index 6bc8075772..3a49ef2be7 100644 --- a/deepmd/dpmodel/utils/network.py +++ b/deepmd/dpmodel/utils/network.py @@ -592,7 +592,7 @@ def __init__( activation_function=activation_function, resnet=True, precision=precision, - seed=seed + idx, + seed=seed + idx if seed is not None else None, ).serialize() ) i_in = i_ot @@ -700,7 +700,7 @@ def __init__( activation_function=None, resnet=False, precision=precision, - seed=seed + len(self.layers), + seed=seed + len(self.layers) if seed is not None else None, ) ) self.out_dim = out_dim From 3d2d4bc43826f441227eb614d17297b11f9142bf Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Fri, 14 Jun 2024 16:53:21 -0400 Subject: [PATCH 08/13] pt & dp Signed-off-by: Jinzhe Zeng --- deepmd/dpmodel/descriptor/dpa1.py | 19 +- deepmd/dpmodel/descriptor/dpa2.py | 8 + deepmd/dpmodel/descriptor/repformers.py | 232 +++++++++++------- deepmd/pt/model/descriptor/dpa1.py | 2 +- deepmd/pt/model/descriptor/dpa2.py | 9 +- deepmd/pt/model/descriptor/repformer_layer.py | 47 +++- deepmd/pt/model/descriptor/repformers.py | 2 +- deepmd/pt/model/descriptor/se_a.py | 6 +- deepmd/pt/model/descriptor/se_atten.py | 12 +- deepmd/pt/model/descriptor/se_r.py | 2 +- deepmd/pt/model/descriptor/se_t.py | 6 +- deepmd/pt/model/task/fitting.py | 2 +- 12 files changed, 227 insertions(+), 120 deletions(-) diff --git a/deepmd/dpmodel/descriptor/dpa1.py b/deepmd/dpmodel/descriptor/dpa1.py index 8d4d425052..f4b5d77cfe 100644 --- a/deepmd/dpmodel/descriptor/dpa1.py +++ b/deepmd/dpmodel/descriptor/dpa1.py @@ -306,6 +306,7 @@ def __init__( precision=precision, use_econf_tebd=use_econf_tebd, type_map=type_map, + seed=seed + len(neuron) * 2 + attn_layer * 3 if seed is not None else None, ) self.tebd_dim = tebd_dim self.concat_output_tebd = concat_output_tebd @@ -690,7 +691,7 @@ def __init__( self.activation_function, self.resnet_dt, self.precision, - seed=seed + 1 if seed is not None else None, + seed=seed + len(self.neuron) if seed is not None else None, ) else: self.embeddings_strip = None @@ -707,6 +708,7 @@ def __init__( ln_eps=self.ln_eps, smooth=self.smooth, precision=self.precision, + seed=seed + len(self.neuron) * 2 if seed is not None else None, ) wanted_shape = (self.ntypes, self.nnei, 4) @@ -954,6 +956,7 @@ def __init__( ln_eps: float = 1e-5, smooth: bool = True, precision: str = DEFAULT_PRECISION, + seed: Optional[int] = None, ): """Construct a neighbor-wise attention net.""" super().__init__() @@ -986,8 +989,9 @@ def __init__( ln_eps=ln_eps, smooth=smooth, precision=precision, + seed=seed + ii * 3 if seed is not None else None, ) - for _ in range(layer_num) + for ii in range(layer_num) ] def call( @@ -1080,6 +1084,7 @@ def __init__( ln_eps: float = 1e-5, smooth: bool = True, precision: str = DEFAULT_PRECISION, + seed: Optional[int] = None, ): """Construct a neighbor-wise attention layer.""" super().__init__() @@ -1105,9 +1110,14 @@ def __init__( temperature=temperature, smooth=smooth, precision=precision, + seed=seed, ) self.attn_layer_norm = LayerNorm( - self.embed_dim, eps=ln_eps, trainable=self.trainable_ln, precision=precision + self.embed_dim, + eps=ln_eps, + trainable=self.trainable_ln, + precision=precision, + seed=seed + 2 if seed is not None else None, ) def call( @@ -1180,6 +1190,7 @@ def __init__( bias: bool = True, smooth: bool = True, precision: str = DEFAULT_PRECISION, + seed: Optional[int] = None, ): """Construct a multi-head neighbor-wise attention net.""" super().__init__() @@ -1208,6 +1219,7 @@ def __init__( bias=bias, use_timestep=False, precision=precision, + seed=seed, ) self.out_proj = NativeLayer( hidden_dim, @@ -1215,6 +1227,7 @@ def __init__( bias=bias, use_timestep=False, precision=precision, + seed=seed + 1 if seed is not None else None, ) def call(self, query, nei_mask, input_r=None, sw=None, attnw_shift=20.0): diff --git a/deepmd/dpmodel/descriptor/dpa2.py b/deepmd/dpmodel/descriptor/dpa2.py index 766fe19302..c4e92780fa 100644 --- a/deepmd/dpmodel/descriptor/dpa2.py +++ b/deepmd/dpmodel/descriptor/dpa2.py @@ -408,6 +408,7 @@ def init_subclass_params(sub_data, sub_class): resnet_dt=self.repinit_args.resnet_dt, smooth=smooth, type_one_side=self.repinit_args.type_one_side, + seed=seed, ) self.repformers = DescrptBlockRepformers( self.repformer_args.rcut, @@ -442,6 +443,7 @@ def init_subclass_params(sub_data, sub_class): precision=precision, trainable_ln=self.repformer_args.trainable_ln, ln_eps=self.repformer_args.ln_eps, + seed=seed + len(self.repinit_args.neuron) * 2 if seed is not None else None, ) self.use_econf_tebd = use_econf_tebd self.type_map = type_map @@ -453,6 +455,12 @@ def init_subclass_params(sub_data, sub_class): precision=precision, use_econf_tebd=use_econf_tebd, type_map=type_map, + seed=seed + + len(self.repinit_args.neuron) * 2 + + 1 + + self.repformer_args.nlayers * 14 + if seed is not None + else None, ) self.concat_output_tebd = concat_output_tebd self.precision = precision diff --git a/deepmd/dpmodel/descriptor/repformers.py b/deepmd/dpmodel/descriptor/repformers.py index db11268eca..6c8649c66b 100644 --- a/deepmd/dpmodel/descriptor/repformers.py +++ b/deepmd/dpmodel/descriptor/repformers.py @@ -40,6 +40,87 @@ @DescriptorBlock.register("se_repformer") @DescriptorBlock.register("se_uni") class DescrptBlockRepformers(NativeOP, DescriptorBlock): + r""" + The repformer descriptor block. + + Parameters + ---------- + rcut : float + The cut-off radius. + rcut_smth : float + Where to start smoothing. For example the 1/r term is smoothed from rcut to rcut_smth. + sel : int + Maximally possible number of selected neighbors. + ntypes : int + Number of element types + nlayers : int, optional + Number of repformer layers. + g1_dim : int, optional + Dimension of the first graph convolution layer. + g2_dim : int, optional + Dimension of the second graph convolution layer. + axis_neuron : int, optional + Size of the submatrix of G (embedding matrix). + direct_dist : bool, optional + Whether to use direct distance information (1/r term) in the repformer block. + update_g1_has_conv : bool, optional + Whether to update the g1 rep with convolution term. + update_g1_has_drrd : bool, optional + Whether to update the g1 rep with the drrd term. + update_g1_has_grrg : bool, optional + Whether to update the g1 rep with the grrg term. + update_g1_has_attn : bool, optional + Whether to update the g1 rep with the localized self-attention. + update_g2_has_g1g1 : bool, optional + Whether to update the g2 rep with the g1xg1 term. + update_g2_has_attn : bool, optional + Whether to update the g2 rep with the gated self-attention. + update_h2 : bool, optional + Whether to update the h2 rep. + attn1_hidden : int, optional + The hidden dimension of localized self-attention to update the g1 rep. + attn1_nhead : int, optional + The number of heads in localized self-attention to update the g1 rep. + attn2_hidden : int, optional + The hidden dimension of gated self-attention to update the g2 rep. + attn2_nhead : int, optional + The number of heads in gated self-attention to update the g2 rep. + attn2_has_gate : bool, optional + Whether to use gate in the gated self-attention to update the g2 rep. + activation_function : str, optional + The activation function in the embedding net. + update_style : str, optional + Style to update a representation. + Supported options are: + -'res_avg': Updates a rep `u` with: u = 1/\\sqrt{n+1} (u + u_1 + u_2 + ... + u_n) + -'res_incr': Updates a rep `u` with: u = u + 1/\\sqrt{n} (u_1 + u_2 + ... + u_n) + -'res_residual': Updates a rep `u` with: u = u + (r1*u_1 + r2*u_2 + ... + r3*u_n) + where `r1`, `r2` ... `r3` are residual weights defined by `update_residual` + and `update_residual_init`. + update_residual : float, optional + When update using residual mode, the initial std of residual vector weights. + update_residual_init : str, optional + When update using residual mode, the initialization mode of residual vector weights. + set_davg_zero : bool, optional + Set the normalization average to zero. + precision : str, optional + The precision of the embedding net parameters. + smooth : bool, optional + Whether to use smoothness in processes such as attention weights calculation. + exclude_types : List[List[int]], optional + The excluded pairs of types which have no interaction with each other. + For example, `[[0, 1]]` means no interaction between type 0 and type 1. + env_protection : float, optional + Protection parameter to prevent division by zero errors during environment matrix calculations. + For example, when using paddings, there may be zero distances of neighbors, which may make division by zero error during environment matrix calculations without protection. + trainable_ln : bool, optional + Whether to use trainable shift and scale weights in layer normalization. + ln_eps : float, optional + The epsilon value for layer normalization. + seed : int, optional + The random seed for initialization. + """ + def __init__( self, rcut, @@ -74,85 +155,8 @@ def __init__( precision: str = "float64", trainable_ln: bool = True, ln_eps: Optional[float] = 1e-5, + seed: Optional[int] = None, ): - r""" - The repformer descriptor block. - - Parameters - ---------- - rcut : float - The cut-off radius. - rcut_smth : float - Where to start smoothing. For example the 1/r term is smoothed from rcut to rcut_smth. - sel : int - Maximally possible number of selected neighbors. - ntypes : int - Number of element types - nlayers : int, optional - Number of repformer layers. - g1_dim : int, optional - Dimension of the first graph convolution layer. - g2_dim : int, optional - Dimension of the second graph convolution layer. - axis_neuron : int, optional - Size of the submatrix of G (embedding matrix). - direct_dist : bool, optional - Whether to use direct distance information (1/r term) in the repformer block. - update_g1_has_conv : bool, optional - Whether to update the g1 rep with convolution term. - update_g1_has_drrd : bool, optional - Whether to update the g1 rep with the drrd term. - update_g1_has_grrg : bool, optional - Whether to update the g1 rep with the grrg term. - update_g1_has_attn : bool, optional - Whether to update the g1 rep with the localized self-attention. - update_g2_has_g1g1 : bool, optional - Whether to update the g2 rep with the g1xg1 term. - update_g2_has_attn : bool, optional - Whether to update the g2 rep with the gated self-attention. - update_h2 : bool, optional - Whether to update the h2 rep. - attn1_hidden : int, optional - The hidden dimension of localized self-attention to update the g1 rep. - attn1_nhead : int, optional - The number of heads in localized self-attention to update the g1 rep. - attn2_hidden : int, optional - The hidden dimension of gated self-attention to update the g2 rep. - attn2_nhead : int, optional - The number of heads in gated self-attention to update the g2 rep. - attn2_has_gate : bool, optional - Whether to use gate in the gated self-attention to update the g2 rep. - activation_function : str, optional - The activation function in the embedding net. - update_style : str, optional - Style to update a representation. - Supported options are: - -'res_avg': Updates a rep `u` with: u = 1/\\sqrt{n+1} (u + u_1 + u_2 + ... + u_n) - -'res_incr': Updates a rep `u` with: u = u + 1/\\sqrt{n} (u_1 + u_2 + ... + u_n) - -'res_residual': Updates a rep `u` with: u = u + (r1*u_1 + r2*u_2 + ... + r3*u_n) - where `r1`, `r2` ... `r3` are residual weights defined by `update_residual` - and `update_residual_init`. - update_residual : float, optional - When update using residual mode, the initial std of residual vector weights. - update_residual_init : str, optional - When update using residual mode, the initialization mode of residual vector weights. - set_davg_zero : bool, optional - Set the normalization average to zero. - precision : str, optional - The precision of the embedding net parameters. - smooth : bool, optional - Whether to use smoothness in processes such as attention weights calculation. - exclude_types : List[List[int]], optional - The excluded pairs of types which have no interaction with each other. - For example, `[[0, 1]]` means no interaction between type 0 and type 1. - env_protection : float, optional - Protection parameter to prevent division by zero errors during environment matrix calculations. - For example, when using paddings, there may be zero distances of neighbors, which may make division by zero error during environment matrix calculations without protection. - trainable_ln : bool, optional - Whether to use trainable shift and scale weights in layer normalization. - ln_eps : float, optional - The epsilon value for layer normalization. - """ super().__init__() self.rcut = rcut self.rcut_smth = rcut_smth @@ -196,7 +200,7 @@ def __init__( self.ln_eps = ln_eps self.epsilon = 1e-4 - self.g2_embd = NativeLayer(1, self.g2_dim, precision=precision) + self.g2_embd = NativeLayer(1, self.g2_dim, precision=precision, seed=seed) layers = [] for ii in range(nlayers): layers.append( @@ -229,6 +233,7 @@ def __init__( trainable_ln=self.trainable_ln, ln_eps=self.ln_eps, precision=precision, + seed=seed + 1 + ii * 14 if seed is not None else None, ) ) self.layers = layers @@ -399,6 +404,7 @@ def get_residual( _mode: str = "norm", trainable: bool = True, precision: str = "float64", + seed: Optional[int] = None, ) -> np.ndarray: """ Get residual tensor for one update vector. @@ -419,7 +425,7 @@ def get_residual( The precision of the residual tensor. """ residual = np.zeros(_dim, dtype=PRECISION_DICT[precision]) - rng = np.random.default_rng() + rng = np.random.default_rng(seed=seed) if trainable: if _mode == "norm": residual = rng.normal(scale=_scale, size=_dim).astype( @@ -634,6 +640,7 @@ def __init__( smooth: bool = True, attnw_shift: float = 20.0, precision: str = "float64", + seed: Optional[int] = None, ): """Return neighbor-wise multi-head self-attention maps, with gate mechanism.""" super().__init__() @@ -641,7 +648,11 @@ def __init__( self.hidden_dim = hidden_dim self.head_num = head_num self.mapqk = NativeLayer( - input_dim, hidden_dim * 2 * head_num, bias=False, precision=precision + input_dim, + hidden_dim * 2 * head_num, + bias=False, + precision=precision, + seed=seed, ) self.has_gate = has_gate self.smooth = smooth @@ -894,22 +905,31 @@ def __init__( smooth: bool = True, attnw_shift: float = 20.0, precision: str = "float64", + seed: Optional[int] = None, ): super().__init__() self.input_dim = input_dim self.hidden_dim = hidden_dim self.head_num = head_num self.mapq = NativeLayer( - input_dim, hidden_dim * 1 * head_num, bias=False, precision=precision + input_dim, + hidden_dim * 1 * head_num, + bias=False, + precision=precision, + seed=seed, ) self.mapkv = NativeLayer( input_dim, (hidden_dim + input_dim) * head_num, bias=False, precision=precision, + seed=seed + 1 if seed is not None else None, ) self.head_map = NativeLayer( - input_dim * head_num, input_dim, precision=precision + input_dim * head_num, + input_dim, + precision=precision, + seed=seed + 2 if seed is not None else None, ) self.smooth = smooth self.attnw_shift = attnw_shift @@ -1044,6 +1064,7 @@ def __init__( precision: str = "float64", trainable_ln: bool = True, ln_eps: Optional[float] = 1e-5, + seed: Optional[int] = None, ): super().__init__() self.epsilon = 1e-4 # protection of 1./nnei @@ -1099,11 +1120,17 @@ def __init__( self.update_residual, self.update_residual_init, precision=precision, + seed=seed, ) ) g1_in_dim = self.cal_1_dim(g1_dim, g2_dim, self.axis_neuron) - self.linear1 = NativeLayer(g1_in_dim, g1_dim, precision=precision) + self.linear1 = NativeLayer( + g1_in_dim, + g1_dim, + precision=precision, + seed=seed + 1 if seed is not None else None, + ) self.linear2 = None self.proj_g1g2 = None self.proj_g1g1g2 = None @@ -1114,7 +1141,12 @@ def __init__( self.loc_attn = None if self.update_chnnl_2: - self.linear2 = NativeLayer(g2_dim, g2_dim, precision=precision) + self.linear2 = NativeLayer( + g2_dim, + g2_dim, + precision=precision, + seed=seed + 2 if seed is not None else None, + ) if self.update_style == "res_residual": self.g2_residual.append( get_residual( @@ -1122,15 +1154,24 @@ def __init__( self.update_residual, self.update_residual_init, precision=precision, + seed=seed + 3 if seed is not None else None, ) ) if self.update_g1_has_conv: self.proj_g1g2 = NativeLayer( - g1_dim, g2_dim, bias=False, precision=precision + g1_dim, + g2_dim, + bias=False, + precision=precision, + seed=seed + 4 if seed is not None else None, ) if self.update_g2_has_g1g1: self.proj_g1g1g2 = NativeLayer( - g1_dim, g2_dim, bias=False, precision=precision + g1_dim, + g2_dim, + bias=False, + precision=precision, + seed=seed + 5 if seed is not None else None, ) if self.update_style == "res_residual": self.g2_residual.append( @@ -1139,6 +1180,7 @@ def __init__( self.update_residual, self.update_residual_init, precision=precision, + seed=seed + 6 if seed is not None else None, ) ) if self.update_g2_has_attn or self.update_h2: @@ -1149,13 +1191,18 @@ def __init__( attn2_has_gate, self.smooth, precision=precision, + seed=seed + 7 if seed is not None else None, ) if self.update_g2_has_attn: self.attn2_mh_apply = Atten2MultiHeadApply( g2_dim, attn2_nhead, precision=precision ) self.attn2_lm = LayerNorm( - g2_dim, eps=ln_eps, trainable=trainable_ln, precision=precision + g2_dim, + eps=ln_eps, + trainable=trainable_ln, + precision=precision, + seed=seed + 8 if seed is not None else None, ) if self.update_style == "res_residual": self.g2_residual.append( @@ -1178,11 +1225,17 @@ def __init__( self.update_residual, self.update_residual_init, precision=precision, + seed=seed + 9 if seed is not None else None, ) ) if self.update_g1_has_attn: self.loc_attn = LocalAtten( - g1_dim, attn1_hidden, attn1_nhead, self.smooth, precision=precision + g1_dim, + attn1_hidden, + attn1_nhead, + self.smooth, + precision=precision, + seed=seed + 10 if seed is not None else None, ) if self.update_style == "res_residual": self.g1_residual.append( @@ -1191,6 +1244,7 @@ def __init__( self.update_residual, self.update_residual_init, precision=precision, + seed=seed + 13 if seed is not None else None, ) ) diff --git a/deepmd/pt/model/descriptor/dpa1.py b/deepmd/pt/model/descriptor/dpa1.py index ff29d14e1d..5ec66bdacb 100644 --- a/deepmd/pt/model/descriptor/dpa1.py +++ b/deepmd/pt/model/descriptor/dpa1.py @@ -295,7 +295,7 @@ def __init__( ntypes, tebd_dim, precision=precision, - seed=seed, + seed=seed + len(neuron) * 2 + attn_layer * 3 if seed is not None else None, use_econf_tebd=use_econf_tebd, type_map=type_map, ) diff --git a/deepmd/pt/model/descriptor/dpa2.py b/deepmd/pt/model/descriptor/dpa2.py index ae8c924e9a..457d83329f 100644 --- a/deepmd/pt/model/descriptor/dpa2.py +++ b/deepmd/pt/model/descriptor/dpa2.py @@ -204,7 +204,7 @@ def init_subclass_params(sub_data, sub_class): precision=precision, trainable_ln=self.repformer_args.trainable_ln, ln_eps=self.repformer_args.ln_eps, - seed=seed, + seed=seed + len(self.repinit_args.neuron) * 2 if seed is not None else None, old_impl=old_impl, ) self.use_econf_tebd = use_econf_tebd @@ -213,7 +213,12 @@ def init_subclass_params(sub_data, sub_class): ntypes, self.repinit_args.tebd_dim, precision=precision, - seed=seed, + seed=seed + + len(self.repinit_args.neuron) * 2 + + 1 + + self.repformer_args.nlayers * 14 + if seed is not None + else None, use_econf_tebd=self.use_econf_tebd, type_map=type_map, ) diff --git a/deepmd/pt/model/descriptor/repformer_layer.py b/deepmd/pt/model/descriptor/repformer_layer.py index 3f377f9de5..d3854e0196 100644 --- a/deepmd/pt/model/descriptor/repformer_layer.py +++ b/deepmd/pt/model/descriptor/repformer_layer.py @@ -454,10 +454,13 @@ def __init__( (hidden_dim + input_dim) * head_num, bias=False, precision=precision, - seed=seed, + seed=seed + 1 if seed is not None else None, ) self.head_map = MLPLayer( - input_dim * head_num, input_dim, precision=precision, seed=seed + input_dim * head_num, + input_dim, + precision=precision, + seed=seed + 2 if seed is not None else None, ) self.smooth = smooth self.attnw_shift = attnw_shift @@ -651,7 +654,12 @@ def __init__( ) g1_in_dim = self.cal_1_dim(g1_dim, g2_dim, self.axis_neuron) - self.linear1 = MLPLayer(g1_in_dim, g1_dim, precision=precision, seed=seed) + self.linear1 = MLPLayer( + g1_in_dim, + g1_dim, + precision=precision, + seed=seed + 1 if seed is not None else None, + ) self.linear2 = None self.proj_g1g2 = None self.proj_g1g1g2 = None @@ -662,7 +670,12 @@ def __init__( self.loc_attn = None if self.update_chnnl_2: - self.linear2 = MLPLayer(g2_dim, g2_dim, precision=precision, seed=seed) + self.linear2 = MLPLayer( + g2_dim, + g2_dim, + precision=precision, + seed=seed + 2 if seed is not None else None, + ) if self.update_style == "res_residual": self.g2_residual.append( get_residual( @@ -675,11 +688,19 @@ def __init__( ) if self.update_g1_has_conv: self.proj_g1g2 = MLPLayer( - g1_dim, g2_dim, bias=False, precision=precision, seed=seed + g1_dim, + g2_dim, + bias=False, + precision=precision, + seed=seed + 3 if seed is not None else None, ) if self.update_g2_has_g1g1: self.proj_g1g1g2 = MLPLayer( - g1_dim, g2_dim, bias=False, precision=precision, seed=seed + g1_dim, + g2_dim, + bias=False, + precision=precision, + seed=seed + 4 if seed is not None else None, ) if self.update_style == "res_residual": self.g2_residual.append( @@ -688,7 +709,7 @@ def __init__( self.update_residual, self.update_residual_init, precision=precision, - seed=seed, + seed=seed + 5 if seed is not None else None, ) ) if self.update_g2_has_attn or self.update_h2: @@ -699,7 +720,7 @@ def __init__( attn2_has_gate, self.smooth, precision=precision, - seed=seed, + seed=seed + 6 if seed is not None else None, ) if self.update_g2_has_attn: self.attn2_mh_apply = Atten2MultiHeadApply( @@ -710,7 +731,7 @@ def __init__( eps=ln_eps, trainable=trainable_ln, precision=precision, - seed=seed, + seed=seed + 7 if seed is not None else None, ) if self.update_style == "res_residual": self.g2_residual.append( @@ -719,7 +740,7 @@ def __init__( self.update_residual, self.update_residual_init, precision=precision, - seed=seed, + seed=seed + 8 if seed is not None else None, ) ) @@ -734,7 +755,7 @@ def __init__( self.update_residual, self.update_residual_init, precision=precision, - seed=seed, + seed=seed + 9 if seed is not None else None, ) ) if self.update_g1_has_attn: @@ -744,7 +765,7 @@ def __init__( attn1_nhead, self.smooth, precision=precision, - seed=seed, + seed=seed + 10 if seed is not None else None, ) if self.update_style == "res_residual": self.g1_residual.append( @@ -753,7 +774,7 @@ def __init__( self.update_residual, self.update_residual_init, precision=precision, - seed=seed, + seed=seed + 13 if seed is not None else None, ) ) diff --git a/deepmd/pt/model/descriptor/repformers.py b/deepmd/pt/model/descriptor/repformers.py index 54fbc2d216..e3e8d85b94 100644 --- a/deepmd/pt/model/descriptor/repformers.py +++ b/deepmd/pt/model/descriptor/repformers.py @@ -291,7 +291,7 @@ def __init__( trainable_ln=self.trainable_ln, ln_eps=self.ln_eps, precision=precision, - seed=seed, + seed=seed + 1 + ii * 14 if seed is not None else None, ) ) self.layers = torch.nn.ModuleList(layers) diff --git a/deepmd/pt/model/descriptor/se_a.py b/deepmd/pt/model/descriptor/se_a.py index e771c03e52..81e8ddd04f 100644 --- a/deepmd/pt/model/descriptor/se_a.py +++ b/deepmd/pt/model/descriptor/se_a.py @@ -448,14 +448,16 @@ def __init__( filter_layers = NetworkCollection( ndim=ndim, ntypes=len(sel), network_type="embedding_network" ) - for embedding_idx in itertools.product(range(self.ntypes), repeat=ndim): + for ii, embedding_idx in enumerate( + itertools.product(range(self.ntypes), repeat=ndim) + ): filter_layers[embedding_idx] = EmbeddingNet( 1, self.filter_neuron, activation_function=self.activation_function, precision=self.precision, resnet_dt=self.resnet_dt, - seed=self.seed, + seed=self.seed + ii if self.seed is not None else None, ) self.filter_layers = filter_layers self.stats = None diff --git a/deepmd/pt/model/descriptor/se_atten.py b/deepmd/pt/model/descriptor/se_atten.py index a2ab5dd9cf..3a7991388e 100644 --- a/deepmd/pt/model/descriptor/se_atten.py +++ b/deepmd/pt/model/descriptor/se_atten.py @@ -271,7 +271,7 @@ def __init__( activation_function=self.activation_function, precision=self.precision, resnet_dt=self.resnet_dt, - seed=self.seed, + seed=self.seed + self.attn_layer * 3 if self.seed is not None else None, ) self.filter_layers = filter_layers if self.tebd_input_mode in ["strip"]: @@ -284,7 +284,9 @@ def __init__( activation_function=self.activation_function, precision=self.precision, resnet_dt=self.resnet_dt, - seed=self.seed, + seed=self.seed + self.attn_layer * 3 + len(self.filter_neuron) + if self.seed is not None + else None, ) self.filter_layers_strip = filter_layers_strip self.stats = None @@ -640,7 +642,7 @@ def __init__( ln_eps=ln_eps, smooth=smooth, precision=precision, - seed=seed, + seed=seed + i * 3 if seed is not None else None, ) ) self.attention_layers = nn.ModuleList(attention_layers) @@ -782,7 +784,7 @@ def __init__( eps=ln_eps, trainable=trainable_ln, precision=precision, - seed=seed, + seed=seed + 2 if seed is not None else None, ) def forward( @@ -897,7 +899,7 @@ def __init__( bavg=0.0, stddev=1.0, precision=precision, - seed=seed, + seed=seed + 1 if seed is not None else None, ) def forward( diff --git a/deepmd/pt/model/descriptor/se_r.py b/deepmd/pt/model/descriptor/se_r.py index e6ebe53c26..50e450ba24 100644 --- a/deepmd/pt/model/descriptor/se_r.py +++ b/deepmd/pt/model/descriptor/se_r.py @@ -120,7 +120,7 @@ def __init__( activation_function=self.activation_function, precision=self.precision, resnet_dt=self.resnet_dt, - seed=self.seed, + seed=self.seed + ii if self.seed is not None else None, ) self.filter_layers = filter_layers self.stats = None diff --git a/deepmd/pt/model/descriptor/se_t.py b/deepmd/pt/model/descriptor/se_t.py index caa4c9ce45..3436791f53 100644 --- a/deepmd/pt/model/descriptor/se_t.py +++ b/deepmd/pt/model/descriptor/se_t.py @@ -474,14 +474,16 @@ def __init__( filter_layers = NetworkCollection( ndim=ndim, ntypes=len(sel), network_type="embedding_network" ) - for embedding_idx in itertools.product(range(self.ntypes), repeat=ndim): + for ii, embedding_idx in enumerate( + itertools.product(range(self.ntypes), repeat=ndim) + ): filter_layers[embedding_idx] = EmbeddingNet( 1, self.filter_neuron, activation_function=self.activation_function, precision=self.precision, resnet_dt=self.resnet_dt, - seed=self.seed, + seed=self.seed + ii if self.seed is not None else None, ) self.filter_layers = filter_layers self.stats = None diff --git a/deepmd/pt/model/task/fitting.py b/deepmd/pt/model/task/fitting.py index 0ca2c5c896..f5901c897a 100644 --- a/deepmd/pt/model/task/fitting.py +++ b/deepmd/pt/model/task/fitting.py @@ -238,7 +238,7 @@ def __init__( self.resnet_dt, self.precision, bias_out=True, - seed=seed, + seed=seed + ii if seed is not None else None, ) for ii in range(self.ntypes if not self.mixed_types else 1) ], From 2ef21410953b632dbe813b63163c8aa405a44c1c Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Fri, 14 Jun 2024 17:06:53 -0400 Subject: [PATCH 09/13] fix TF seeds Signed-off-by: Jinzhe Zeng --- deepmd/tf/descriptor/se_a.py | 7 ++++++- deepmd/tf/descriptor/se_atten.py | 22 ++++++++++++++++++++-- 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/deepmd/tf/descriptor/se_a.py b/deepmd/tf/descriptor/se_a.py index babec2d68e..1248e3cfc8 100644 --- a/deepmd/tf/descriptor/se_a.py +++ b/deepmd/tf/descriptor/se_a.py @@ -1036,6 +1036,8 @@ def _filter_lower( mixed_prec=self.mixed_prec, ) net_output = tf.nn.embedding_lookup(net_output, idx) + if (not self.uniform_seed) and (self.seed is not None): + self.seed += self.seed_shift net_output = tf.reshape(net_output, [-1, self.filter_neuron[-1]]) else: xyz_scatter = self._concat_type_embedding( @@ -1047,7 +1049,7 @@ def _filter_lower( ) # natom x 4 x outputs_size if nvnmd_cfg.enable: - return filter_lower_R42GR( + oo = filter_lower_R42GR( type_i, type_input, inputs_i, @@ -1065,6 +1067,9 @@ def _filter_lower( self.filter_resnet_dt, self.embedding_net_variables, ) + if (not self.uniform_seed) and (self.seed is not None): + self.seed += self.seed_shift + return oo if self.compress and (not is_exclude): if self.stripped_type_embedding: net_output = tf.nn.embedding_lookup( diff --git a/deepmd/tf/descriptor/se_atten.py b/deepmd/tf/descriptor/se_atten.py index 312a7481ba..8df4cb0db2 100644 --- a/deepmd/tf/descriptor/se_atten.py +++ b/deepmd/tf/descriptor/se_atten.py @@ -63,6 +63,7 @@ ) from deepmd.tf.utils.network import ( embedding_net, + embedding_net_rand_seed_shift, layernorm, one_layer, ) @@ -997,6 +998,8 @@ def _attention_layers( uniform_seed=self.uniform_seed, initial_variables=self.attention_layer_variables, ) + if not self.uniform_seed and self.seed is not None: + self.seed += 1 K_c = one_layer( input_xyz, self.att_n, @@ -1010,6 +1013,8 @@ def _attention_layers( uniform_seed=self.uniform_seed, initial_variables=self.attention_layer_variables, ) + if not self.uniform_seed and self.seed is not None: + self.seed += 1 V_c = one_layer( input_xyz, self.att_n, @@ -1023,6 +1028,8 @@ def _attention_layers( uniform_seed=self.uniform_seed, initial_variables=self.attention_layer_variables, ) + if not self.uniform_seed and self.seed is not None: + self.seed += 1 # # natom x nei_type_i x out_size # xyz_scatter = tf.reshape(xyz_scatter, (-1, shape_i[1] // 4, outputs_size[-1])) # natom x nei_type_i x att_n @@ -1055,6 +1062,8 @@ def _attention_layers( uniform_seed=self.uniform_seed, initial_variables=self.attention_layer_variables, ) + if not self.uniform_seed and self.seed is not None: + self.seed += 1 input_xyz = layernorm( input_xyz, outputs_size[-1], @@ -1068,6 +1077,8 @@ def _attention_layers( eps=self.ln_eps, initial_variables=self.attention_layer_variables, ) + if not self.uniform_seed and self.seed is not None: + self.seed += 1 return input_xyz def _filter_lower( @@ -1125,6 +1136,8 @@ def _filter_lower( initial_variables=self.embedding_net_variables, mixed_prec=self.mixed_prec, ) + if (not self.uniform_seed) and (self.seed is not None): + self.seed += self.seed_shift else: if self.attn_layer == 0: log.info( @@ -1164,6 +1177,8 @@ def _filter_lower( initial_variables=self.embedding_net_variables, mixed_prec=self.mixed_prec, ) + if (not self.uniform_seed) and (self.seed is not None): + self.seed += self.seed_shift else: net = "filter_net" info = [ @@ -1221,6 +1236,8 @@ def _filter_lower( initial_variables=self.two_side_embeeding_net_variables, mixed_prec=self.mixed_prec, ) + if (not self.uniform_seed) and (self.seed is not None): + self.seed += self.seed_shift two_embd = tf.nn.embedding_lookup( embedding_of_two_side_type_embedding, index_of_two_side ) @@ -1239,8 +1256,6 @@ def _filter_lower( is_sorted=len(self.exclude_types) == 0, ) - if (not self.uniform_seed) and (self.seed is not None): - self.seed += self.seed_shift input_r = tf.slice( tf.reshape(inputs_i, (-1, shape_i[1] // 4, 4)), [0, 0, 1], [-1, -1, 3] ) @@ -2163,6 +2178,7 @@ def __init__( use_econf_tebd=use_econf_tebd, type_map=type_map, # precision=precision, + seed=seed, ) self.concat_output_tebd = concat_output_tebd if self.tebd_input_mode in ["concat"]: @@ -2185,6 +2201,8 @@ def build( suffix: str = "", ) -> tf.Tensor: type_embedding = self.type_embedding.build(self.ntypes, suffix=suffix) + if (not self.uniform_seed) and (self.seed is not None): + self.seed += embedding_net_rand_seed_shift(1) input_dict["type_embedding"] = type_embedding # nf x nloc x out_dim From 6ba273404d1553b33d1a3084ecda58469df96bed Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Fri, 14 Jun 2024 17:20:39 -0400 Subject: [PATCH 10/13] fix embedding_net_rand_seed_shift Signed-off-by: Jinzhe Zeng --- deepmd/tf/descriptor/se_atten.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deepmd/tf/descriptor/se_atten.py b/deepmd/tf/descriptor/se_atten.py index 8df4cb0db2..de652bc91d 100644 --- a/deepmd/tf/descriptor/se_atten.py +++ b/deepmd/tf/descriptor/se_atten.py @@ -2202,7 +2202,7 @@ def build( ) -> tf.Tensor: type_embedding = self.type_embedding.build(self.ntypes, suffix=suffix) if (not self.uniform_seed) and (self.seed is not None): - self.seed += embedding_net_rand_seed_shift(1) + self.seed += embedding_net_rand_seed_shift([self.tebd_dim]) input_dict["type_embedding"] = type_embedding # nf x nloc x out_dim From fa2ea3ddae5ef0fa6ba911c127f9280d4f65eddb Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Fri, 14 Jun 2024 17:45:26 -0400 Subject: [PATCH 11/13] update test results Signed-off-by: Jinzhe Zeng --- source/tests/tf/test_model_se_a_ebd_v2.py | 56 +++++++++++------------ source/tests/tf/test_pairwise_dprc.py | 4 +- 2 files changed, 30 insertions(+), 30 deletions(-) diff --git a/source/tests/tf/test_model_se_a_ebd_v2.py b/source/tests/tf/test_model_se_a_ebd_v2.py index 1aa4fdb92b..ffa558da35 100644 --- a/source/tests/tf/test_model_se_a_ebd_v2.py +++ b/source/tests/tf/test_model_se_a_ebd_v2.py @@ -139,37 +139,37 @@ def test_model(self): f = f.reshape([-1]) v = v.reshape([-1]) - refe = [5.435394596262052014e-01] + refe = [6.100037044296185e-01] reff = [ - 6.583728125594628944e-02, - 7.228993116083935744e-02, - 1.971543579114074483e-03, - 6.567474563776359853e-02, - 7.809421727465599983e-02, - -4.866958849094786890e-03, - -8.670511901715304004e-02, - 3.525374157021862048e-02, - 1.415748959800727487e-03, - 6.375813001810648473e-02, - -1.139053242798149790e-01, - -4.178593754384440744e-03, - -1.471737787218250215e-01, - 4.189712704724830872e-02, - 7.011731363309440038e-03, - 3.860874082716164030e-02, - -1.136296927731473005e-01, - -1.353471298745012206e-03, + 8.448651008616304e-02, + 8.613568658155157e-02, + 4.377711655236228e-03, + 9.264613309788312e-02, + 9.351200240060925e-02, + -6.743918515275118e-03, + -1.268078358219972e-01, + 4.855965861982662e-02, + 1.361334787979757e-04, + 4.193213089916692e-02, + -1.324120032345251e-01, + -4.507320444374342e-03, + -1.314595297986654e-01, + 4.120567370248839e-02, + 7.896917575801866e-03, + 3.920259153744955e-02, + -1.370010180699507e-01, + -1.159523750186610e-03, ] refv = [ - -4.243979601186427253e-01, - 1.097173849143971286e-01, - 1.227299373463585502e-02, - 1.097173849143970314e-01, - -2.462891443164323124e-01, - -5.711664180530139426e-03, - 1.227299373463585502e-02, - -5.711664180530143763e-03, - -6.217348853341628408e-04, + -0.277134219204478, + 0.088897922530779, + 0.008633318264458, + 0.088897922530779, + -0.292191560546969, + -0.005709595520904, + 0.008633318264458, + -0.005709595520904, + -0.000682136341924, ] refe = np.reshape(refe, [-1]) reff = np.reshape(reff, [-1]) diff --git a/source/tests/tf/test_pairwise_dprc.py b/source/tests/tf/test_pairwise_dprc.py index 3a5836ce45..22c73beb3b 100644 --- a/source/tests/tf/test_pairwise_dprc.py +++ b/source/tests/tf/test_pairwise_dprc.py @@ -522,8 +522,8 @@ def test_model_ener(self): # the model is pairwise! self.assertAllClose(e[1] + e[2] + e[3] - 3 * e[0], e[4] - e[0]) self.assertAllClose(f[1] + f[2] + f[3] - 3 * f[0], f[4] - f[0]) - self.assertAllClose(e[0], 0.189075, 1e-6) - self.assertAllClose(f[0, 0], 0.060047, 1e-6) + self.assertAllClose(e[0], 4.82969, 1e-6) + self.assertAllClose(f[0, 0], -0.104339, 1e-6) # test input requirement for the model self.assertCountEqual( From 4c396ba84eee08516c73aaf862ec78218799ac45 Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Mon, 17 Jun 2024 19:23:31 -0400 Subject: [PATCH 12/13] use List[int] as the input seed Signed-off-by: Jinzhe Zeng --- deepmd/dpmodel/descriptor/dpa1.py | 33 +++++---- deepmd/dpmodel/descriptor/dpa2.py | 16 ++--- deepmd/dpmodel/descriptor/repformers.py | 71 ++++++++++++------- deepmd/dpmodel/descriptor/se_atten_v2.py | 2 +- deepmd/dpmodel/descriptor/se_e2_a.py | 8 ++- deepmd/dpmodel/descriptor/se_r.py | 8 ++- deepmd/dpmodel/descriptor/se_t.py | 8 ++- deepmd/dpmodel/fitting/dipole_fitting.py | 3 +- deepmd/dpmodel/fitting/dos_fitting.py | 2 +- deepmd/dpmodel/fitting/ener_fitting.py | 3 +- deepmd/dpmodel/fitting/general_fitting.py | 12 ++-- .../dpmodel/fitting/polarizability_fitting.py | 3 +- deepmd/dpmodel/utils/network.py | 15 ++-- deepmd/dpmodel/utils/seed.py | 41 +++++++++++ deepmd/dpmodel/utils/type_embed.py | 3 +- deepmd/pt/model/descriptor/dpa1.py | 9 ++- deepmd/pt/model/descriptor/dpa2.py | 20 +++--- deepmd/pt/model/descriptor/repformer_layer.py | 59 ++++++++------- deepmd/pt/model/descriptor/repformers.py | 11 ++- deepmd/pt/model/descriptor/se_a.py | 9 ++- deepmd/pt/model/descriptor/se_atten.py | 29 ++++---- deepmd/pt/model/descriptor/se_atten_v2.py | 2 +- deepmd/pt/model/descriptor/se_r.py | 7 +- deepmd/pt/model/descriptor/se_t.py | 9 ++- deepmd/pt/model/network/layernorm.py | 4 +- deepmd/pt/model/network/mlp.py | 4 +- deepmd/pt/model/network/network.py | 5 +- deepmd/pt/model/task/dipole.py | 2 +- deepmd/pt/model/task/dos.py | 2 +- deepmd/pt/model/task/ener.py | 3 +- deepmd/pt/model/task/fitting.py | 7 +- deepmd/pt/model/task/invar_fitting.py | 3 +- deepmd/pt/model/task/polarizability.py | 2 +- deepmd/pt/utils/utils.py | 9 ++- 34 files changed, 272 insertions(+), 152 deletions(-) create mode 100644 deepmd/dpmodel/utils/seed.py diff --git a/deepmd/dpmodel/descriptor/dpa1.py b/deepmd/dpmodel/descriptor/dpa1.py index f4b5d77cfe..b633bc6807 100644 --- a/deepmd/dpmodel/descriptor/dpa1.py +++ b/deepmd/dpmodel/descriptor/dpa1.py @@ -25,6 +25,9 @@ LayerNorm, NativeLayer, ) +from deepmd.dpmodel.utils.seed import ( + child_seed, +) from deepmd.dpmodel.utils.type_embed import ( TypeEmbedNet, ) @@ -251,7 +254,7 @@ def __init__( use_econf_tebd: bool = False, type_map: Optional[List[str]] = None, # consistent with argcheck, not used though - seed: Optional[int] = None, + seed: Optional[Union[int, List[int]]] = None, ) -> None: ## seed, uniform_seed, not included. # Ensure compatibility with the deprecated stripped_type_embedding option. @@ -294,7 +297,7 @@ def __init__( env_protection=env_protection, trainable_ln=trainable_ln, ln_eps=ln_eps, - seed=seed, + seed=child_seed(seed, 0), ) self.use_econf_tebd = use_econf_tebd self.type_map = type_map @@ -306,7 +309,7 @@ def __init__( precision=precision, use_econf_tebd=use_econf_tebd, type_map=type_map, - seed=seed + len(neuron) * 2 + attn_layer * 3 if seed is not None else None, + seed=child_seed(seed, 1), ) self.tebd_dim = tebd_dim self.concat_output_tebd = concat_output_tebd @@ -627,7 +630,7 @@ def __init__( trainable_ln: bool = True, ln_eps: Optional[float] = 1e-5, smooth: bool = True, - seed: Optional[int] = None, + seed: Optional[Union[int, List[int]]] = None, ) -> None: self.rcut = rcut self.rcut_smth = rcut_smth @@ -677,7 +680,7 @@ def __init__( self.activation_function, self.resnet_dt, self.precision, - seed=seed, + seed=child_seed(seed, 0), ) if self.tebd_input_mode in ["strip"]: self.embeddings_strip = NetworkCollection( @@ -691,7 +694,7 @@ def __init__( self.activation_function, self.resnet_dt, self.precision, - seed=seed + len(self.neuron) if seed is not None else None, + seed=child_seed(seed, 1), ) else: self.embeddings_strip = None @@ -708,7 +711,7 @@ def __init__( ln_eps=self.ln_eps, smooth=self.smooth, precision=self.precision, - seed=seed + len(self.neuron) * 2 if seed is not None else None, + seed=child_seed(seed, 2), ) wanted_shape = (self.ntypes, self.nnei, 4) @@ -956,7 +959,7 @@ def __init__( ln_eps: float = 1e-5, smooth: bool = True, precision: str = DEFAULT_PRECISION, - seed: Optional[int] = None, + seed: Optional[Union[int, List[int]]] = None, ): """Construct a neighbor-wise attention net.""" super().__init__() @@ -989,7 +992,7 @@ def __init__( ln_eps=ln_eps, smooth=smooth, precision=precision, - seed=seed + ii * 3 if seed is not None else None, + seed=child_seed(seed, ii), ) for ii in range(layer_num) ] @@ -1084,7 +1087,7 @@ def __init__( ln_eps: float = 1e-5, smooth: bool = True, precision: str = DEFAULT_PRECISION, - seed: Optional[int] = None, + seed: Optional[Union[int, List[int]]] = None, ): """Construct a neighbor-wise attention layer.""" super().__init__() @@ -1110,14 +1113,14 @@ def __init__( temperature=temperature, smooth=smooth, precision=precision, - seed=seed, + seed=child_seed(seed, 0), ) self.attn_layer_norm = LayerNorm( self.embed_dim, eps=ln_eps, trainable=self.trainable_ln, precision=precision, - seed=seed + 2 if seed is not None else None, + seed=child_seed(seed, 1), ) def call( @@ -1190,7 +1193,7 @@ def __init__( bias: bool = True, smooth: bool = True, precision: str = DEFAULT_PRECISION, - seed: Optional[int] = None, + seed: Optional[Union[int, List[int]]] = None, ): """Construct a multi-head neighbor-wise attention net.""" super().__init__() @@ -1219,7 +1222,7 @@ def __init__( bias=bias, use_timestep=False, precision=precision, - seed=seed, + seed=child_seed(seed, 0), ) self.out_proj = NativeLayer( hidden_dim, @@ -1227,7 +1230,7 @@ def __init__( bias=bias, use_timestep=False, precision=precision, - seed=seed + 1 if seed is not None else None, + seed=child_seed(seed, 1), ) def call(self, query, nei_mask, input_r=None, sw=None, attnw_shift=20.0): diff --git a/deepmd/dpmodel/descriptor/dpa2.py b/deepmd/dpmodel/descriptor/dpa2.py index c4e92780fa..d42a9da26e 100644 --- a/deepmd/dpmodel/descriptor/dpa2.py +++ b/deepmd/dpmodel/descriptor/dpa2.py @@ -23,6 +23,9 @@ build_multiple_neighbor_list, get_multiple_nlist_key, ) +from deepmd.dpmodel.utils.seed import ( + child_seed, +) from deepmd.dpmodel.utils.type_embed import ( TypeEmbedNet, ) @@ -325,7 +328,7 @@ def __init__( exclude_types: List[Tuple[int, int]] = [], env_protection: float = 0.0, trainable: bool = True, - seed: Optional[int] = None, + seed: Optional[Union[int, List[int]]] = None, add_tebd_to_repinit_out: bool = False, use_econf_tebd: bool = False, type_map: Optional[List[str]] = None, @@ -408,7 +411,7 @@ def init_subclass_params(sub_data, sub_class): resnet_dt=self.repinit_args.resnet_dt, smooth=smooth, type_one_side=self.repinit_args.type_one_side, - seed=seed, + seed=child_seed(seed, 0), ) self.repformers = DescrptBlockRepformers( self.repformer_args.rcut, @@ -443,7 +446,7 @@ def init_subclass_params(sub_data, sub_class): precision=precision, trainable_ln=self.repformer_args.trainable_ln, ln_eps=self.repformer_args.ln_eps, - seed=seed + len(self.repinit_args.neuron) * 2 if seed is not None else None, + seed=child_seed(seed, 1), ) self.use_econf_tebd = use_econf_tebd self.type_map = type_map @@ -455,12 +458,7 @@ def init_subclass_params(sub_data, sub_class): precision=precision, use_econf_tebd=use_econf_tebd, type_map=type_map, - seed=seed - + len(self.repinit_args.neuron) * 2 - + 1 - + self.repformer_args.nlayers * 14 - if seed is not None - else None, + seed=child_seed(seed, 2), ) self.concat_output_tebd = concat_output_tebd self.precision = precision diff --git a/deepmd/dpmodel/descriptor/repformers.py b/deepmd/dpmodel/descriptor/repformers.py index 6c8649c66b..67c72e8d31 100644 --- a/deepmd/dpmodel/descriptor/repformers.py +++ b/deepmd/dpmodel/descriptor/repformers.py @@ -22,6 +22,9 @@ NativeLayer, get_activation_fn, ) +from deepmd.dpmodel.utils.seed import ( + child_seed, +) from deepmd.utils.path import ( DPPath, ) @@ -155,7 +158,7 @@ def __init__( precision: str = "float64", trainable_ln: bool = True, ln_eps: Optional[float] = 1e-5, - seed: Optional[int] = None, + seed: Optional[Union[int, List[int]]] = None, ): super().__init__() self.rcut = rcut @@ -200,7 +203,9 @@ def __init__( self.ln_eps = ln_eps self.epsilon = 1e-4 - self.g2_embd = NativeLayer(1, self.g2_dim, precision=precision, seed=seed) + self.g2_embd = NativeLayer( + 1, self.g2_dim, precision=precision, seed=child_seed(seed, 0) + ) layers = [] for ii in range(nlayers): layers.append( @@ -233,7 +238,7 @@ def __init__( trainable_ln=self.trainable_ln, ln_eps=self.ln_eps, precision=precision, - seed=seed + 1 + ii * 14 if seed is not None else None, + seed=child_seed(child_seed(seed, 1), ii), ) ) self.layers = layers @@ -404,7 +409,7 @@ def get_residual( _mode: str = "norm", trainable: bool = True, precision: str = "float64", - seed: Optional[int] = None, + seed: Optional[Union[int, List[int]]] = None, ) -> np.ndarray: """ Get residual tensor for one update vector. @@ -640,7 +645,7 @@ def __init__( smooth: bool = True, attnw_shift: float = 20.0, precision: str = "float64", - seed: Optional[int] = None, + seed: Optional[Union[int, List[int]]] = None, ): """Return neighbor-wise multi-head self-attention maps, with gate mechanism.""" super().__init__() @@ -758,15 +763,23 @@ def __init__( input_dim: int, head_num: int, precision: str = "float64", + seed: Optional[Union[int, List[int]]] = None, ): super().__init__() self.input_dim = input_dim self.head_num = head_num self.mapv = NativeLayer( - input_dim, input_dim * head_num, bias=False, precision=precision + input_dim, + input_dim * head_num, + bias=False, + precision=precision, + seed=child_seed(seed, 0), ) self.head_map = NativeLayer( - input_dim * head_num, input_dim, precision=precision + input_dim * head_num, + input_dim, + precision=precision, + seed=child_seed(seed, 1), ) self.precision = precision @@ -835,11 +848,14 @@ def __init__( input_dim: int, head_num: int, precision: str = "float64", + seed: Optional[Union[int, List[int]]] = None, ): super().__init__() self.input_dim = input_dim self.head_num = head_num - self.head_map = NativeLayer(head_num, 1, bias=False, precision=precision) + self.head_map = NativeLayer( + head_num, 1, bias=False, precision=precision, seed=seed + ) self.precision = precision def call( @@ -905,7 +921,7 @@ def __init__( smooth: bool = True, attnw_shift: float = 20.0, precision: str = "float64", - seed: Optional[int] = None, + seed: Optional[Union[int, List[int]]] = None, ): super().__init__() self.input_dim = input_dim @@ -916,20 +932,20 @@ def __init__( hidden_dim * 1 * head_num, bias=False, precision=precision, - seed=seed, + seed=child_seed(seed, 0), ) self.mapkv = NativeLayer( input_dim, (hidden_dim + input_dim) * head_num, bias=False, precision=precision, - seed=seed + 1 if seed is not None else None, + seed=child_seed(seed, 1), ) self.head_map = NativeLayer( input_dim * head_num, input_dim, precision=precision, - seed=seed + 2 if seed is not None else None, + seed=child_seed(seed, 2), ) self.smooth = smooth self.attnw_shift = attnw_shift @@ -1064,7 +1080,7 @@ def __init__( precision: str = "float64", trainable_ln: bool = True, ln_eps: Optional[float] = 1e-5, - seed: Optional[int] = None, + seed: Optional[Union[int, List[int]]] = None, ): super().__init__() self.epsilon = 1e-4 # protection of 1./nnei @@ -1120,7 +1136,7 @@ def __init__( self.update_residual, self.update_residual_init, precision=precision, - seed=seed, + seed=child_seed(seed, 0), ) ) @@ -1129,7 +1145,7 @@ def __init__( g1_in_dim, g1_dim, precision=precision, - seed=seed + 1 if seed is not None else None, + seed=child_seed(seed, 1), ) self.linear2 = None self.proj_g1g2 = None @@ -1145,7 +1161,7 @@ def __init__( g2_dim, g2_dim, precision=precision, - seed=seed + 2 if seed is not None else None, + seed=child_seed(seed, 2), ) if self.update_style == "res_residual": self.g2_residual.append( @@ -1154,7 +1170,7 @@ def __init__( self.update_residual, self.update_residual_init, precision=precision, - seed=seed + 3 if seed is not None else None, + seed=child_seed(seed, 3), ) ) if self.update_g1_has_conv: @@ -1163,7 +1179,7 @@ def __init__( g2_dim, bias=False, precision=precision, - seed=seed + 4 if seed is not None else None, + seed=child_seed(seed, 4), ) if self.update_g2_has_g1g1: self.proj_g1g1g2 = NativeLayer( @@ -1171,7 +1187,7 @@ def __init__( g2_dim, bias=False, precision=precision, - seed=seed + 5 if seed is not None else None, + seed=child_seed(seed, 5), ) if self.update_style == "res_residual": self.g2_residual.append( @@ -1180,7 +1196,7 @@ def __init__( self.update_residual, self.update_residual_init, precision=precision, - seed=seed + 6 if seed is not None else None, + seed=child_seed(seed, 6), ) ) if self.update_g2_has_attn or self.update_h2: @@ -1191,18 +1207,18 @@ def __init__( attn2_has_gate, self.smooth, precision=precision, - seed=seed + 7 if seed is not None else None, + seed=child_seed(seed, 7), ) if self.update_g2_has_attn: self.attn2_mh_apply = Atten2MultiHeadApply( - g2_dim, attn2_nhead, precision=precision + g2_dim, attn2_nhead, precision=precision, seed=child_seed(seed, 8) ) self.attn2_lm = LayerNorm( g2_dim, eps=ln_eps, trainable=trainable_ln, precision=precision, - seed=seed + 8 if seed is not None else None, + seed=child_seed(seed, 9), ) if self.update_style == "res_residual": self.g2_residual.append( @@ -1211,12 +1227,13 @@ def __init__( self.update_residual, self.update_residual_init, precision=precision, + seed=child_seed(seed, 10), ) ) if self.update_h2: self.attn2_ev_apply = Atten2EquiVarApply( - g2_dim, attn2_nhead, precision=precision + g2_dim, attn2_nhead, precision=precision, seed=child_seed(seed, 11) ) if self.update_style == "res_residual": self.h2_residual.append( @@ -1225,7 +1242,7 @@ def __init__( self.update_residual, self.update_residual_init, precision=precision, - seed=seed + 9 if seed is not None else None, + seed=child_seed(seed, 12), ) ) if self.update_g1_has_attn: @@ -1235,7 +1252,7 @@ def __init__( attn1_nhead, self.smooth, precision=precision, - seed=seed + 10 if seed is not None else None, + seed=child_seed(seed, 13), ) if self.update_style == "res_residual": self.g1_residual.append( @@ -1244,7 +1261,7 @@ def __init__( self.update_residual, self.update_residual_init, precision=precision, - seed=seed + 13 if seed is not None else None, + seed=child_seed(seed, 14), ) ) diff --git a/deepmd/dpmodel/descriptor/se_atten_v2.py b/deepmd/dpmodel/descriptor/se_atten_v2.py index 1375d2265f..037222076e 100644 --- a/deepmd/dpmodel/descriptor/se_atten_v2.py +++ b/deepmd/dpmodel/descriptor/se_atten_v2.py @@ -66,7 +66,7 @@ def __init__( use_econf_tebd: bool = False, type_map: Optional[List[str]] = None, # consistent with argcheck, not used though - seed: Optional[int] = None, + seed: Optional[Union[int, List[int]]] = None, ) -> None: DescrptDPA1.__init__( self, diff --git a/deepmd/dpmodel/descriptor/se_e2_a.py b/deepmd/dpmodel/descriptor/se_e2_a.py index c1fefaf9eb..75ac11dbed 100644 --- a/deepmd/dpmodel/descriptor/se_e2_a.py +++ b/deepmd/dpmodel/descriptor/se_e2_a.py @@ -6,6 +6,7 @@ List, Optional, Tuple, + Union, ) import numpy as np @@ -21,6 +22,9 @@ NetworkCollection, PairExcludeMask, ) +from deepmd.dpmodel.utils.seed import ( + child_seed, +) from deepmd.dpmodel.utils.update_sel import ( UpdateSel, ) @@ -158,7 +162,7 @@ def __init__( type_map: Optional[List[str]] = None, ntypes: Optional[int] = None, # to be compat with input # consistent with argcheck, not used though - seed: Optional[int] = None, + seed: Optional[Union[int, List[int]]] = None, ) -> None: del ntypes ## seed, uniform_seed, not included. @@ -198,7 +202,7 @@ def __init__( self.activation_function, self.resnet_dt, self.precision, - seed=seed + len(self.neuron) * ii if seed is not None else None, + seed=child_seed(seed, ii), ) self.env_mat = EnvMat(self.rcut, self.rcut_smth, protection=self.env_protection) self.nnei = np.sum(self.sel) diff --git a/deepmd/dpmodel/descriptor/se_r.py b/deepmd/dpmodel/descriptor/se_r.py index eadd8f1001..20a6fe49dd 100644 --- a/deepmd/dpmodel/descriptor/se_r.py +++ b/deepmd/dpmodel/descriptor/se_r.py @@ -5,6 +5,7 @@ List, Optional, Tuple, + Union, ) import numpy as np @@ -20,6 +21,9 @@ NetworkCollection, PairExcludeMask, ) +from deepmd.dpmodel.utils.seed import ( + child_seed, +) from deepmd.dpmodel.utils.update_sel import ( UpdateSel, ) @@ -115,7 +119,7 @@ def __init__( type_map: Optional[List[str]] = None, ntypes: Optional[int] = None, # to be compat with input # consistent with argcheck, not used though - seed: Optional[int] = None, + seed: Optional[Union[int, List[int]]] = None, ) -> None: del ntypes ## seed, uniform_seed, not included. @@ -156,7 +160,7 @@ def __init__( self.activation_function, self.resnet_dt, self.precision, - seed=seed + len(self.neuron) * ii if seed is not None else None, + seed=child_seed(seed, ii), ) self.env_mat = EnvMat(self.rcut, self.rcut_smth, protection=self.env_protection) self.nnei = np.sum(self.sel) diff --git a/deepmd/dpmodel/descriptor/se_t.py b/deepmd/dpmodel/descriptor/se_t.py index 1b1845bf34..72d8a24bd9 100644 --- a/deepmd/dpmodel/descriptor/se_t.py +++ b/deepmd/dpmodel/descriptor/se_t.py @@ -5,6 +5,7 @@ List, Optional, Tuple, + Union, ) import numpy as np @@ -20,6 +21,9 @@ NetworkCollection, PairExcludeMask, ) +from deepmd.dpmodel.utils.seed import ( + child_seed, +) from deepmd.dpmodel.utils.update_sel import ( UpdateSel, ) @@ -98,7 +102,7 @@ def __init__( exclude_types: List[Tuple[int, int]] = [], precision: str = DEFAULT_PRECISION, trainable: bool = True, - seed: Optional[int] = None, + seed: Optional[Union[int, List[int]]] = None, type_map: Optional[List[str]] = None, ntypes: Optional[int] = None, # to be compat with input ) -> None: @@ -136,7 +140,7 @@ def __init__( self.activation_function, self.resnet_dt, self.precision, - seed=seed + len(self.neuron) * ii if seed is not None else None, + seed=child_seed(self.seed, ii), ) self.env_mat = EnvMat(self.rcut, self.rcut_smth, protection=self.env_protection) self.nnei = np.sum(self.sel) diff --git a/deepmd/dpmodel/fitting/dipole_fitting.py b/deepmd/dpmodel/fitting/dipole_fitting.py index f922b57367..6932b0d1e2 100644 --- a/deepmd/dpmodel/fitting/dipole_fitting.py +++ b/deepmd/dpmodel/fitting/dipole_fitting.py @@ -5,6 +5,7 @@ Dict, List, Optional, + Union, ) import numpy as np @@ -108,7 +109,7 @@ def __init__( type_map: Optional[List[str]] = None, old_impl=False, # not used - seed: Optional[int] = None, + seed: Optional[Union[int, List[int]]] = None, ): # seed, uniform_seed are not included if tot_ener_zero: diff --git a/deepmd/dpmodel/fitting/dos_fitting.py b/deepmd/dpmodel/fitting/dos_fitting.py index 2c113c1f7d..5a5eb8840a 100644 --- a/deepmd/dpmodel/fitting/dos_fitting.py +++ b/deepmd/dpmodel/fitting/dos_fitting.py @@ -46,7 +46,7 @@ def __init__( exclude_types: List[int] = [], type_map: Optional[List[str]] = None, # not used - seed: Optional[int] = None, + seed: Optional[Union[int, List[int]]] = None, ): if bias_dos is not None: self.bias_dos = bias_dos diff --git a/deepmd/dpmodel/fitting/ener_fitting.py b/deepmd/dpmodel/fitting/ener_fitting.py index 7c262209d9..9e639c1de2 100644 --- a/deepmd/dpmodel/fitting/ener_fitting.py +++ b/deepmd/dpmodel/fitting/ener_fitting.py @@ -5,6 +5,7 @@ Any, List, Optional, + Union, ) from deepmd.dpmodel.common import ( @@ -46,7 +47,7 @@ def __init__( exclude_types: List[int] = [], type_map: Optional[List[str]] = None, # not used - seed: Optional[int] = None, + seed: Optional[Union[int, List[int]]] = None, ): super().__init__( var_name="energy", diff --git a/deepmd/dpmodel/fitting/general_fitting.py b/deepmd/dpmodel/fitting/general_fitting.py index 5f73e0b1aa..6d45d3ac29 100644 --- a/deepmd/dpmodel/fitting/general_fitting.py +++ b/deepmd/dpmodel/fitting/general_fitting.py @@ -8,6 +8,7 @@ Dict, List, Optional, + Union, ) import numpy as np @@ -21,6 +22,9 @@ FittingNet, NetworkCollection, ) +from deepmd.dpmodel.utils.seed import ( + child_seed, +) from deepmd.utils.finetune import ( get_index_between_two_maps, map_atom_exclude_types, @@ -82,7 +86,7 @@ class GeneralFitting(NativeOP, BaseFitting): length as `ntypes` signaling if or not removing the vaccum contribution for the atom types in the list. type_map: List[str], Optional A list of strings. Give the name to each type of atoms. - seed: Optional[int] + seed: Optional[Union[int, List[int]]] Random seed for initializing the network parameters. """ @@ -108,7 +112,7 @@ def __init__( exclude_types: List[int] = [], remove_vaccum_contribution: Optional[List[bool]] = None, type_map: Optional[List[str]] = None, - seed: Optional[int] = None, + seed: Optional[Union[int, List[int]]] = None, ): self.var_name = var_name self.ntypes = ntypes @@ -169,9 +173,7 @@ def __init__( self.resnet_dt, self.precision, bias_out=True, - seed=seed + ii * (len(self.neuron) + 1) - if seed is not None - else None, + seed=child_seed(seed, ii), ) for ii in range(self.ntypes if not self.mixed_types else 1) ], diff --git a/deepmd/dpmodel/fitting/polarizability_fitting.py b/deepmd/dpmodel/fitting/polarizability_fitting.py index 67b4888c67..05f4aa349d 100644 --- a/deepmd/dpmodel/fitting/polarizability_fitting.py +++ b/deepmd/dpmodel/fitting/polarizability_fitting.py @@ -5,6 +5,7 @@ Dict, List, Optional, + Union, ) import numpy as np @@ -114,7 +115,7 @@ def __init__( shift_diag: bool = True, type_map: Optional[List[str]] = None, # not used - seed: Optional[int] = None, + seed: Optional[Union[int, List[int]]] = None, ): # seed, uniform_seed are not included if tot_ener_zero: diff --git a/deepmd/dpmodel/utils/network.py b/deepmd/dpmodel/utils/network.py index 3a49ef2be7..53a170ac4a 100644 --- a/deepmd/dpmodel/utils/network.py +++ b/deepmd/dpmodel/utils/network.py @@ -22,6 +22,9 @@ PRECISION_DICT, NativeOP, ) +from deepmd.dpmodel.utils.seed import ( + child_seed, +) from deepmd.utils.version import ( check_version_compatibility, ) @@ -76,7 +79,7 @@ def __init__( activation_function: Optional[str] = None, resnet: bool = False, precision: str = DEFAULT_PRECISION, - seed: Optional[int] = None, + seed: Optional[Union[int, List[int]]] = None, ) -> None: prec = PRECISION_DICT[precision.lower()] self.precision = precision @@ -318,7 +321,7 @@ def __init__( uni_init: bool = True, trainable: bool = True, precision: str = DEFAULT_PRECISION, - seed: Optional[int] = None, + seed: Optional[Union[int, List[int]]] = None, ) -> None: self.eps = eps self.uni_init = uni_init @@ -577,7 +580,7 @@ def __init__( activation_function: str = "tanh", resnet_dt: bool = False, precision: str = DEFAULT_PRECISION, - seed: Optional[int] = None, + seed: Optional[Union[int, List[int]]] = None, ): layers = [] i_in = in_dim @@ -592,7 +595,7 @@ def __init__( activation_function=activation_function, resnet=True, precision=precision, - seed=seed + idx if seed is not None else None, + seed=child_seed(seed, idx), ).serialize() ) i_in = i_ot @@ -680,7 +683,7 @@ def __init__( resnet_dt: bool = False, precision: str = DEFAULT_PRECISION, bias_out: bool = True, - seed: Optional[int] = None, + seed: Optional[Union[int, List[int]]] = None, ): super().__init__( in_dim, @@ -700,7 +703,7 @@ def __init__( activation_function=None, resnet=False, precision=precision, - seed=seed + len(self.layers) if seed is not None else None, + seed=child_seed(seed, len(neuron)), ) ) self.out_dim = out_dim diff --git a/deepmd/dpmodel/utils/seed.py b/deepmd/dpmodel/utils/seed.py new file mode 100644 index 0000000000..4ceab80066 --- /dev/null +++ b/deepmd/dpmodel/utils/seed.py @@ -0,0 +1,41 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +from typing import ( + List, + Optional, + Union, + overload, +) + + +@overload +def child_seed(seed: None, idx: int) -> None: ... + + +@overload +def child_seed(seed: Union[int, List[int]], idx: int) -> List[int]: ... + + +def child_seed(seed: Optional[Union[int, List[int]]], idx: int) -> Optional[List[int]]: + """Generate a child seed from a parent seed. + + Parameters + ---------- + seed + The parent seed. + idx + The index of the child seed. + + Returns + ------- + Optional[List[int]] + The child seed. + """ + # See https://numpy.org/doc/stable/reference/random/parallel.html#sequence-of-integer-seeds + if seed is None: + return None + elif isinstance(seed, int): + return [idx, seed] + elif isinstance(seed, list): + return [idx, *seed] + else: + raise TypeError(f"seed must be int or list, not {type(seed)}") diff --git a/deepmd/dpmodel/utils/type_embed.py b/deepmd/dpmodel/utils/type_embed.py index 6d3f6054cb..0db064fb5e 100644 --- a/deepmd/dpmodel/utils/type_embed.py +++ b/deepmd/dpmodel/utils/type_embed.py @@ -2,6 +2,7 @@ from typing import ( List, Optional, + Union, ) import numpy as np @@ -57,7 +58,7 @@ def __init__( activation_function: str = "tanh", precision: str = "default", trainable: bool = True, - seed: Optional[int] = None, + seed: Optional[Union[int, List[int]]] = None, padding: bool = False, use_econf_tebd: bool = False, type_map: Optional[List[str]] = None, diff --git a/deepmd/pt/model/descriptor/dpa1.py b/deepmd/pt/model/descriptor/dpa1.py index 5ec66bdacb..e6dba7f88f 100644 --- a/deepmd/pt/model/descriptor/dpa1.py +++ b/deepmd/pt/model/descriptor/dpa1.py @@ -11,6 +11,9 @@ import torch from deepmd.dpmodel.utils import EnvMat as DPEnvMat +from deepmd.dpmodel.utils.seed import ( + child_seed, +) from deepmd.pt.model.network.mlp import ( NetworkCollection, ) @@ -236,7 +239,7 @@ def __init__( smooth_type_embedding: bool = True, type_one_side: bool = False, stripped_type_embedding: Optional[bool] = None, - seed: Optional[int] = None, + seed: Optional[Union[int, List[int]]] = None, use_econf_tebd: bool = False, type_map: Optional[List[str]] = None, # not implemented @@ -286,7 +289,7 @@ def __init__( env_protection=env_protection, trainable_ln=trainable_ln, ln_eps=ln_eps, - seed=seed, + seed=child_seed(seed, 1), old_impl=old_impl, ) self.use_econf_tebd = use_econf_tebd @@ -295,7 +298,7 @@ def __init__( ntypes, tebd_dim, precision=precision, - seed=seed + len(neuron) * 2 + attn_layer * 3 if seed is not None else None, + seed=child_seed(seed, 2), use_econf_tebd=use_econf_tebd, type_map=type_map, ) diff --git a/deepmd/pt/model/descriptor/dpa2.py b/deepmd/pt/model/descriptor/dpa2.py index 457d83329f..aa5aa11f67 100644 --- a/deepmd/pt/model/descriptor/dpa2.py +++ b/deepmd/pt/model/descriptor/dpa2.py @@ -15,6 +15,9 @@ RepinitArgs, ) from deepmd.dpmodel.utils import EnvMat as DPEnvMat +from deepmd.dpmodel.utils.seed import ( + child_seed, +) from deepmd.pt.model.network.mlp import ( Identity, MLPLayer, @@ -84,7 +87,7 @@ def __init__( exclude_types: List[Tuple[int, int]] = [], env_protection: float = 0.0, trainable: bool = True, - seed: Optional[int] = None, + seed: Optional[Union[int, List[int]]] = None, add_tebd_to_repinit_out: bool = False, use_econf_tebd: bool = False, type_map: Optional[List[str]] = None, @@ -169,7 +172,7 @@ def init_subclass_params(sub_data, sub_class): resnet_dt=self.repinit_args.resnet_dt, smooth=smooth, type_one_side=self.repinit_args.type_one_side, - seed=seed, + seed=child_seed(seed, 0), ) self.repformers = DescrptBlockRepformers( self.repformer_args.rcut, @@ -204,7 +207,7 @@ def init_subclass_params(sub_data, sub_class): precision=precision, trainable_ln=self.repformer_args.trainable_ln, ln_eps=self.repformer_args.ln_eps, - seed=seed + len(self.repinit_args.neuron) * 2 if seed is not None else None, + seed=child_seed(seed, 1), old_impl=old_impl, ) self.use_econf_tebd = use_econf_tebd @@ -213,12 +216,7 @@ def init_subclass_params(sub_data, sub_class): ntypes, self.repinit_args.tebd_dim, precision=precision, - seed=seed - + len(self.repinit_args.neuron) * 2 - + 1 - + self.repformer_args.nlayers * 14 - if seed is not None - else None, + seed=child_seed(seed, 2), use_econf_tebd=self.use_econf_tebd, type_map=type_map, ) @@ -239,7 +237,7 @@ def init_subclass_params(sub_data, sub_class): bias=False, precision=precision, init="glorot", - seed=seed, + seed=child_seed(seed, 3), ) self.tebd_transform = None if self.add_tebd_to_repinit_out: @@ -248,7 +246,7 @@ def init_subclass_params(sub_data, sub_class): self.repformers.dim_in, bias=False, precision=precision, - seed=seed, + seed=child_seed(seed, 4), ) assert self.repinit.rcut > self.repformers.rcut assert self.repinit.sel[0] > self.repformers.sel[0] diff --git a/deepmd/pt/model/descriptor/repformer_layer.py b/deepmd/pt/model/descriptor/repformer_layer.py index d3854e0196..85a9800c73 100644 --- a/deepmd/pt/model/descriptor/repformer_layer.py +++ b/deepmd/pt/model/descriptor/repformer_layer.py @@ -2,11 +2,15 @@ from typing import ( List, Optional, + Union, ) import torch import torch.nn as nn +from deepmd.dpmodel.utils.seed import ( + child_seed, +) from deepmd.pt.model.network.init import ( constant_, normal_, @@ -40,7 +44,7 @@ def get_residual( _mode: str = "norm", trainable: bool = True, precision: str = "float64", - seed: Optional[int] = None, + seed: Optional[Union[int, List[int]]] = None, ) -> torch.Tensor: r""" Get residual tensor for one update vector. @@ -156,7 +160,7 @@ def __init__( smooth: bool = True, attnw_shift: float = 20.0, precision: str = "float64", - seed: Optional[int] = None, + seed: Optional[Union[int, List[int]]] = None, ): """Return neighbor-wise multi-head self-attention maps, with gate mechanism.""" super().__init__() @@ -281,7 +285,7 @@ def __init__( input_dim: int, head_num: int, precision: str = "float64", - seed: Optional[int] = None, + seed: Optional[Union[int, List[int]]] = None, ): super().__init__() self.input_dim = input_dim @@ -291,10 +295,13 @@ def __init__( input_dim * head_num, bias=False, precision=precision, - seed=seed, + seed=child_seed(seed, 0), ) self.head_map = MLPLayer( - input_dim * head_num, input_dim, precision=precision, seed=seed + input_dim * head_num, + input_dim, + precision=precision, + seed=child_seed(seed, 1), ) self.precision = precision @@ -363,7 +370,7 @@ def __init__( input_dim: int, head_num: int, precision: str = "float64", - seed: Optional[int] = None, + seed: Optional[Union[int, List[int]]] = None, ): super().__init__() self.input_dim = input_dim @@ -436,7 +443,7 @@ def __init__( smooth: bool = True, attnw_shift: float = 20.0, precision: str = "float64", - seed: Optional[int] = None, + seed: Optional[Union[int, List[int]]] = None, ): super().__init__() self.input_dim = input_dim @@ -447,20 +454,20 @@ def __init__( hidden_dim * 1 * head_num, bias=False, precision=precision, - seed=seed, + seed=child_seed(seed, 0), ) self.mapkv = MLPLayer( input_dim, (hidden_dim + input_dim) * head_num, bias=False, precision=precision, - seed=seed + 1 if seed is not None else None, + seed=child_seed(seed, 1), ) self.head_map = MLPLayer( input_dim * head_num, input_dim, precision=precision, - seed=seed + 2 if seed is not None else None, + seed=child_seed(seed, 2), ) self.smooth = smooth self.attnw_shift = attnw_shift @@ -592,7 +599,7 @@ def __init__( precision: str = "float64", trainable_ln: bool = True, ln_eps: Optional[float] = 1e-5, - seed: Optional[int] = None, + seed: Optional[Union[int, List[int]]] = None, ): super().__init__() self.epsilon = 1e-4 # protection of 1./nnei @@ -649,7 +656,7 @@ def __init__( self.update_residual, self.update_residual_init, precision=precision, - seed=seed, + seed=child_seed(seed, 0), ) ) @@ -658,7 +665,7 @@ def __init__( g1_in_dim, g1_dim, precision=precision, - seed=seed + 1 if seed is not None else None, + seed=child_seed(seed, 1), ) self.linear2 = None self.proj_g1g2 = None @@ -674,7 +681,7 @@ def __init__( g2_dim, g2_dim, precision=precision, - seed=seed + 2 if seed is not None else None, + seed=child_seed(seed, 2), ) if self.update_style == "res_residual": self.g2_residual.append( @@ -683,7 +690,7 @@ def __init__( self.update_residual, self.update_residual_init, precision=precision, - seed=seed, + seed=child_seed(seed, 3), ) ) if self.update_g1_has_conv: @@ -692,7 +699,7 @@ def __init__( g2_dim, bias=False, precision=precision, - seed=seed + 3 if seed is not None else None, + seed=child_seed(seed, 4), ) if self.update_g2_has_g1g1: self.proj_g1g1g2 = MLPLayer( @@ -700,7 +707,7 @@ def __init__( g2_dim, bias=False, precision=precision, - seed=seed + 4 if seed is not None else None, + seed=child_seed(seed, 5), ) if self.update_style == "res_residual": self.g2_residual.append( @@ -709,7 +716,7 @@ def __init__( self.update_residual, self.update_residual_init, precision=precision, - seed=seed + 5 if seed is not None else None, + seed=child_seed(seed, 6), ) ) if self.update_g2_has_attn or self.update_h2: @@ -720,18 +727,18 @@ def __init__( attn2_has_gate, self.smooth, precision=precision, - seed=seed + 6 if seed is not None else None, + seed=child_seed(seed, 7), ) if self.update_g2_has_attn: self.attn2_mh_apply = Atten2MultiHeadApply( - g2_dim, attn2_nhead, precision=precision, seed=seed + g2_dim, attn2_nhead, precision=precision, seed=child_seed(seed, 8) ) self.attn2_lm = LayerNorm( g2_dim, eps=ln_eps, trainable=trainable_ln, precision=precision, - seed=seed + 7 if seed is not None else None, + seed=child_seed(seed, 9), ) if self.update_style == "res_residual": self.g2_residual.append( @@ -740,13 +747,13 @@ def __init__( self.update_residual, self.update_residual_init, precision=precision, - seed=seed + 8 if seed is not None else None, + seed=child_seed(seed, 10), ) ) if self.update_h2: self.attn2_ev_apply = Atten2EquiVarApply( - g2_dim, attn2_nhead, precision=precision, seed=seed + g2_dim, attn2_nhead, precision=precision, seed=child_seed(seed, 11) ) if self.update_style == "res_residual": self.h2_residual.append( @@ -755,7 +762,7 @@ def __init__( self.update_residual, self.update_residual_init, precision=precision, - seed=seed + 9 if seed is not None else None, + seed=child_seed(seed, 12), ) ) if self.update_g1_has_attn: @@ -765,7 +772,7 @@ def __init__( attn1_nhead, self.smooth, precision=precision, - seed=seed + 10 if seed is not None else None, + seed=child_seed(seed, 13), ) if self.update_style == "res_residual": self.g1_residual.append( @@ -774,7 +781,7 @@ def __init__( self.update_residual, self.update_residual_init, precision=precision, - seed=seed + 13 if seed is not None else None, + seed=child_seed(seed, 14), ) ) diff --git a/deepmd/pt/model/descriptor/repformers.py b/deepmd/pt/model/descriptor/repformers.py index e3e8d85b94..8653d79140 100644 --- a/deepmd/pt/model/descriptor/repformers.py +++ b/deepmd/pt/model/descriptor/repformers.py @@ -10,6 +10,9 @@ import torch +from deepmd.dpmodel.utils.seed import ( + child_seed, +) from deepmd.pt.model.descriptor.descriptor import ( DescriptorBlock, ) @@ -101,7 +104,7 @@ def __init__( precision: str = "float64", trainable_ln: bool = True, ln_eps: Optional[float] = 1e-5, - seed: Optional[int] = None, + seed: Optional[Union[int, List[int]]] = None, old_impl: bool = False, ): r""" @@ -229,7 +232,9 @@ def __init__( self.seed = seed self.old_impl = old_impl - self.g2_embd = MLPLayer(1, self.g2_dim, precision=precision, seed=seed) + self.g2_embd = MLPLayer( + 1, self.g2_dim, precision=precision, seed=child_seed(seed, 0) + ) layers = [] for ii in range(nlayers): if self.old_impl: @@ -291,7 +296,7 @@ def __init__( trainable_ln=self.trainable_ln, ln_eps=self.ln_eps, precision=precision, - seed=seed + 1 + ii * 14 if seed is not None else None, + seed=child_seed(child_seed(seed, 1), ii), ) ) self.layers = torch.nn.ModuleList(layers) diff --git a/deepmd/pt/model/descriptor/se_a.py b/deepmd/pt/model/descriptor/se_a.py index 81e8ddd04f..8e51b03fc2 100644 --- a/deepmd/pt/model/descriptor/se_a.py +++ b/deepmd/pt/model/descriptor/se_a.py @@ -13,6 +13,9 @@ import numpy as np import torch +from deepmd.dpmodel.utils.seed import ( + child_seed, +) from deepmd.pt.model.descriptor import ( DescriptorBlock, prod_env_mat, @@ -86,7 +89,7 @@ def __init__( old_impl: bool = False, type_one_side: bool = True, trainable: bool = True, - seed: Optional[int] = None, + seed: Optional[Union[int, List[int]]] = None, ntypes: Optional[int] = None, # to be compat with input type_map: Optional[List[str]] = None, # not implemented @@ -384,7 +387,7 @@ def __init__( old_impl: bool = False, type_one_side: bool = True, trainable: bool = True, - seed: Optional[int] = None, + seed: Optional[Union[int, List[int]]] = None, **kwargs, ): """Construct an embedding net of type `se_a`. @@ -457,7 +460,7 @@ def __init__( activation_function=self.activation_function, precision=self.precision, resnet_dt=self.resnet_dt, - seed=self.seed + ii if self.seed is not None else None, + seed=child_seed(self.seed, ii), ) self.filter_layers = filter_layers self.stats = None diff --git a/deepmd/pt/model/descriptor/se_atten.py b/deepmd/pt/model/descriptor/se_atten.py index 3a7991388e..a30869f24a 100644 --- a/deepmd/pt/model/descriptor/se_atten.py +++ b/deepmd/pt/model/descriptor/se_atten.py @@ -12,6 +12,9 @@ import torch.nn as nn import torch.nn.functional as torch_func +from deepmd.dpmodel.utils.seed import ( + child_seed, +) from deepmd.pt.model.descriptor.descriptor import ( DescriptorBlock, ) @@ -83,7 +86,7 @@ def __init__( env_protection: float = 0.0, trainable_ln: bool = True, ln_eps: Optional[float] = 1e-5, - seed: Optional[int] = None, + seed: Optional[Union[int, List[int]]] = None, type: Optional[str] = None, old_impl: bool = False, ): @@ -227,7 +230,7 @@ def __init__( ln_eps=self.ln_eps, smooth=self.smooth, precision=self.precision, - seed=self.seed, + seed=child_seed(self.seed, 0), ) wanted_shape = (self.ntypes, self.nnei, 4) @@ -271,7 +274,7 @@ def __init__( activation_function=self.activation_function, precision=self.precision, resnet_dt=self.resnet_dt, - seed=self.seed + self.attn_layer * 3 if self.seed is not None else None, + seed=child_seed(self.seed, 1), ) self.filter_layers = filter_layers if self.tebd_input_mode in ["strip"]: @@ -284,9 +287,7 @@ def __init__( activation_function=self.activation_function, precision=self.precision, resnet_dt=self.resnet_dt, - seed=self.seed + self.attn_layer * 3 + len(self.filter_neuron) - if self.seed is not None - else None, + seed=child_seed(self.seed, 2), ) self.filter_layers_strip = filter_layers_strip self.stats = None @@ -607,7 +608,7 @@ def __init__( ln_eps: float = 1e-5, smooth: bool = True, precision: str = DEFAULT_PRECISION, - seed: Optional[int] = None, + seed: Optional[Union[int, List[int]]] = None, ): """Construct a neighbor-wise attention net.""" super().__init__() @@ -642,7 +643,7 @@ def __init__( ln_eps=ln_eps, smooth=smooth, precision=precision, - seed=seed + i * 3 if seed is not None else None, + seed=child_seed(seed, i), ) ) self.attention_layers = nn.ModuleList(attention_layers) @@ -750,7 +751,7 @@ def __init__( trainable_ln: bool = True, ln_eps: float = 1e-5, precision: str = DEFAULT_PRECISION, - seed: Optional[int] = None, + seed: Optional[Union[int, List[int]]] = None, ): """Construct a neighbor-wise attention layer.""" super().__init__() @@ -777,14 +778,14 @@ def __init__( temperature=temperature, smooth=smooth, precision=precision, - seed=seed, + seed=child_seed(seed, 0), ) self.attn_layer_norm = LayerNorm( self.embed_dim, eps=ln_eps, trainable=trainable_ln, precision=precision, - seed=seed + 2 if seed is not None else None, + seed=child_seed(seed, 1), ) def forward( @@ -857,7 +858,7 @@ def __init__( bias: bool = True, smooth: bool = True, precision: str = DEFAULT_PRECISION, - seed: Optional[int] = None, + seed: Optional[Union[int, List[int]]] = None, ): """Construct a multi-head neighbor-wise attention net.""" super().__init__() @@ -889,7 +890,7 @@ def __init__( bavg=0.0, stddev=1.0, precision=precision, - seed=seed, + seed=child_seed(seed, 0), ) self.out_proj = MLPLayer( hidden_dim, @@ -899,7 +900,7 @@ def __init__( bavg=0.0, stddev=1.0, precision=precision, - seed=seed + 1 if seed is not None else None, + seed=child_seed(seed, 1), ) def forward( diff --git a/deepmd/pt/model/descriptor/se_atten_v2.py b/deepmd/pt/model/descriptor/se_atten_v2.py index 3b350ded98..8fc6271665 100644 --- a/deepmd/pt/model/descriptor/se_atten_v2.py +++ b/deepmd/pt/model/descriptor/se_atten_v2.py @@ -66,7 +66,7 @@ def __init__( ln_eps: Optional[float] = 1e-5, type_one_side: bool = False, stripped_type_embedding: Optional[bool] = None, - seed: Optional[int] = None, + seed: Optional[Union[int, List[int]]] = None, use_econf_tebd: bool = False, type_map: Optional[List[str]] = None, # not implemented diff --git a/deepmd/pt/model/descriptor/se_r.py b/deepmd/pt/model/descriptor/se_r.py index 50e450ba24..d27aba5725 100644 --- a/deepmd/pt/model/descriptor/se_r.py +++ b/deepmd/pt/model/descriptor/se_r.py @@ -12,6 +12,9 @@ import torch from deepmd.dpmodel.utils import EnvMat as DPEnvMat +from deepmd.dpmodel.utils.seed import ( + child_seed, +) from deepmd.pt.model.descriptor import ( prod_env_mat, ) @@ -70,7 +73,7 @@ def __init__( env_protection: float = 0.0, old_impl: bool = False, trainable: bool = True, - seed: Optional[int] = None, + seed: Optional[Union[int, List[int]]] = None, type_map: Optional[List[str]] = None, **kwargs, ): @@ -120,7 +123,7 @@ def __init__( activation_function=self.activation_function, precision=self.precision, resnet_dt=self.resnet_dt, - seed=self.seed + ii if self.seed is not None else None, + seed=child_seed(self.seed, ii), ) self.filter_layers = filter_layers self.stats = None diff --git a/deepmd/pt/model/descriptor/se_t.py b/deepmd/pt/model/descriptor/se_t.py index 3436791f53..2fe5c16059 100644 --- a/deepmd/pt/model/descriptor/se_t.py +++ b/deepmd/pt/model/descriptor/se_t.py @@ -13,6 +13,9 @@ import numpy as np import torch +from deepmd.dpmodel.utils.seed import ( + child_seed, +) from deepmd.pt.model.descriptor import ( DescriptorBlock, prod_env_mat, @@ -118,7 +121,7 @@ def __init__( exclude_types: List[Tuple[int, int]] = [], precision: str = "float64", trainable: bool = True, - seed: Optional[int] = None, + seed: Optional[Union[int, List[int]]] = None, type_map: Optional[List[str]] = None, ntypes: Optional[int] = None, # to be compat with input # not implemented @@ -406,7 +409,7 @@ def __init__( exclude_types: List[Tuple[int, int]] = [], precision: str = "float64", trainable: bool = True, - seed: Optional[int] = None, + seed: Optional[Union[int, List[int]]] = None, ): r"""Construct an embedding net of type `se_e3`. @@ -483,7 +486,7 @@ def __init__( activation_function=self.activation_function, precision=self.precision, resnet_dt=self.resnet_dt, - seed=self.seed + ii if self.seed is not None else None, + seed=child_seed(self.seed, ii), ) self.filter_layers = filter_layers self.stats = None diff --git a/deepmd/pt/model/network/layernorm.py b/deepmd/pt/model/network/layernorm.py index 385bbaf270..f5cd6b965f 100644 --- a/deepmd/pt/model/network/layernorm.py +++ b/deepmd/pt/model/network/layernorm.py @@ -1,6 +1,8 @@ # SPDX-License-Identifier: LGPL-3.0-or-later from typing import ( + List, Optional, + Union, ) import numpy as np @@ -43,7 +45,7 @@ def __init__( stddev: float = 1.0, precision: str = DEFAULT_PRECISION, trainable: bool = True, - seed: Optional[int] = None, + seed: Optional[Union[int, List[int]]] = None, ): super().__init__() self.eps = eps diff --git a/deepmd/pt/model/network/mlp.py b/deepmd/pt/model/network/mlp.py index dada7ed1b8..090d64fbcf 100644 --- a/deepmd/pt/model/network/mlp.py +++ b/deepmd/pt/model/network/mlp.py @@ -2,7 +2,9 @@ from typing import ( ClassVar, Dict, + List, Optional, + Union, ) import numpy as np @@ -81,7 +83,7 @@ def __init__( stddev: float = 1.0, precision: str = DEFAULT_PRECISION, init: str = "default", - seed: Optional[int] = None, + seed: Optional[Union[int, List[int]]] = None, ): super().__init__() # only use_timestep when skip connection is established. diff --git a/deepmd/pt/model/network/network.py b/deepmd/pt/model/network/network.py index 0475c35750..0879daf6ec 100644 --- a/deepmd/pt/model/network/network.py +++ b/deepmd/pt/model/network/network.py @@ -2,6 +2,7 @@ from typing import ( List, Optional, + Union, ) import numpy as np @@ -570,7 +571,7 @@ def __init__( bavg=0.0, stddev=1.0, precision="default", - seed: Optional[int] = None, + seed: Optional[Union[int, List[int]]] = None, use_econf_tebd=False, type_map=None, ): @@ -667,7 +668,7 @@ def __init__( activation_function: str = "tanh", precision: str = "default", trainable: bool = True, - seed: Optional[int] = None, + seed: Optional[Union[int, List[int]]] = None, padding: bool = False, use_econf_tebd: bool = False, type_map: Optional[List[str]] = None, diff --git a/deepmd/pt/model/task/dipole.py b/deepmd/pt/model/task/dipole.py index 917af1bdcc..782af05d3c 100644 --- a/deepmd/pt/model/task/dipole.py +++ b/deepmd/pt/model/task/dipole.py @@ -87,7 +87,7 @@ def __init__( precision: str = DEFAULT_PRECISION, mixed_types: bool = True, rcond: Optional[float] = None, - seed: Optional[int] = None, + seed: Optional[Union[int, List[int]]] = None, exclude_types: List[int] = [], r_differentiable: bool = True, c_differentiable: bool = True, diff --git a/deepmd/pt/model/task/dos.py b/deepmd/pt/model/task/dos.py index c6a533ce7e..c78b589939 100644 --- a/deepmd/pt/model/task/dos.py +++ b/deepmd/pt/model/task/dos.py @@ -52,7 +52,7 @@ def __init__( rcond: Optional[float] = None, bias_dos: Optional[torch.Tensor] = None, trainable: Union[bool, List[bool]] = True, - seed: Optional[int] = None, + seed: Optional[Union[int, List[int]]] = None, activation_function: str = "tanh", precision: str = DEFAULT_PRECISION, exclude_types: List[int] = [], diff --git a/deepmd/pt/model/task/ener.py b/deepmd/pt/model/task/ener.py index 6db937f72c..e129221a2a 100644 --- a/deepmd/pt/model/task/ener.py +++ b/deepmd/pt/model/task/ener.py @@ -5,6 +5,7 @@ List, Optional, Tuple, + Union, ) import numpy as np @@ -55,7 +56,7 @@ def __init__( activation_function: str = "tanh", precision: str = DEFAULT_PRECISION, mixed_types: bool = True, - seed: Optional[int] = None, + seed: Optional[Union[int, List[int]]] = None, type_map: Optional[List[str]] = None, **kwargs, ): diff --git a/deepmd/pt/model/task/fitting.py b/deepmd/pt/model/task/fitting.py index f5901c897a..95242eb67c 100644 --- a/deepmd/pt/model/task/fitting.py +++ b/deepmd/pt/model/task/fitting.py @@ -13,6 +13,9 @@ import numpy as np import torch +from deepmd.dpmodel.utils.seed import ( + child_seed, +) from deepmd.pt.model.network.mlp import ( FittingNet, NetworkCollection, @@ -143,7 +146,7 @@ def __init__( precision: str = DEFAULT_PRECISION, mixed_types: bool = True, rcond: Optional[float] = None, - seed: Optional[int] = None, + seed: Optional[Union[int, List[int]]] = None, exclude_types: List[int] = [], trainable: Union[bool, List[bool]] = True, remove_vaccum_contribution: Optional[List[bool]] = None, @@ -238,7 +241,7 @@ def __init__( self.resnet_dt, self.precision, bias_out=True, - seed=seed + ii if seed is not None else None, + seed=child_seed(self.seed, ii), ) for ii in range(self.ntypes if not self.mixed_types else 1) ], diff --git a/deepmd/pt/model/task/invar_fitting.py b/deepmd/pt/model/task/invar_fitting.py index 2a8aab9734..ac0c4d59e5 100644 --- a/deepmd/pt/model/task/invar_fitting.py +++ b/deepmd/pt/model/task/invar_fitting.py @@ -4,6 +4,7 @@ from typing import ( List, Optional, + Union, ) import torch @@ -95,7 +96,7 @@ def __init__( precision: str = DEFAULT_PRECISION, mixed_types: bool = True, rcond: Optional[float] = None, - seed: Optional[int] = None, + seed: Optional[Union[int, List[int]]] = None, exclude_types: List[int] = [], atom_ener: Optional[List[Optional[torch.Tensor]]] = None, type_map: Optional[List[str]] = None, diff --git a/deepmd/pt/model/task/polarizability.py b/deepmd/pt/model/task/polarizability.py index 66120a1523..a4592a8890 100644 --- a/deepmd/pt/model/task/polarizability.py +++ b/deepmd/pt/model/task/polarizability.py @@ -91,7 +91,7 @@ def __init__( precision: str = DEFAULT_PRECISION, mixed_types: bool = True, rcond: Optional[float] = None, - seed: Optional[int] = None, + seed: Optional[Union[int, List[int]]] = None, exclude_types: List[int] = [], fit_diag: bool = True, scale: Optional[Union[List[float], float]] = None, diff --git a/deepmd/pt/utils/utils.py b/deepmd/pt/utils/utils.py index 86cede347a..6892cc4047 100644 --- a/deepmd/pt/utils/utils.py +++ b/deepmd/pt/utils/utils.py @@ -1,6 +1,8 @@ # SPDX-License-Identifier: LGPL-3.0-or-later from typing import ( + List, Optional, + Union, overload, ) @@ -113,8 +115,13 @@ def dict_to_device(sample_dict): sample_dict[key] = sample_dict[key].to(DEVICE) -def get_generator(seed: Optional[int] = None) -> Optional[torch.Generator]: +def get_generator( + seed: Optional[Union[int, List[int]]] = None, +) -> Optional[torch.Generator]: if seed is not None: + if isinstance(seed, list): + # suggested by GitHub Copilot + seed = hash(tuple(seed)) generator = torch.Generator(device=DEVICE) generator.manual_seed(seed) return generator From 8aacdea6204a395336ced7dd9e7fb25b21d6a6bb Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Mon, 17 Jun 2024 20:39:42 -0400 Subject: [PATCH 13/13] mix_entropy without hash Signed-off-by: Jinzhe Zeng --- deepmd/pt/utils/utils.py | 39 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 37 insertions(+), 2 deletions(-) diff --git a/deepmd/pt/utils/utils.py b/deepmd/pt/utils/utils.py index 6892cc4047..9ccdbfdb5d 100644 --- a/deepmd/pt/utils/utils.py +++ b/deepmd/pt/utils/utils.py @@ -115,13 +115,48 @@ def dict_to_device(sample_dict): sample_dict[key] = sample_dict[key].to(DEVICE) +# https://github.com/numpy/numpy/blob/a4cddb60489f821a1a4dffc16cd5c69755d43bdb/numpy/random/bit_generator.pyx#L58-L63 +INIT_A = 0x43B0D7E5 +MULT_A = 0x931E8875 +MIX_MULT_L = 0xCA01F9DD +MIX_MULT_R = 0x4973F715 +XSHIFT = 16 + + +def hashmix(value: int, hash_const: List[int]): + value ^= INIT_A + hash_const[0] *= MULT_A + value *= INIT_A + # prevent overflow + hash_const[0] &= 0xFFFF_FFFF_FFFF_FFFF + value &= 0xFFFF_FFFF_FFFF_FFFF + value ^= value >> XSHIFT + return value + + +def mix(x: int, y: int): + result = MIX_MULT_L * x - MIX_MULT_R * y + # prevent overflow + result &= 0xFFFF_FFFF_FFFF_FFFF + result ^= result >> XSHIFT + return result + + +def mix_entropy(entropy_array: List[int]) -> int: + # https://github.com/numpy/numpy/blob/a4cddb60489f821a1a4dffc16cd5c69755d43bdb/numpy/random/bit_generator.pyx#L341-L374 + hash_const = [INIT_A] + mixer = hashmix(entropy_array[0], hash_const) + for i_src in range(1, len(entropy_array)): + mixer = mix(mixer, hashmix(entropy_array[i_src], hash_const)) + return mixer + + def get_generator( seed: Optional[Union[int, List[int]]] = None, ) -> Optional[torch.Generator]: if seed is not None: if isinstance(seed, list): - # suggested by GitHub Copilot - seed = hash(tuple(seed)) + seed = mix_entropy(seed) generator = torch.Generator(device=DEVICE) generator.manual_seed(seed) return generator