diff --git a/deepmd/dpmodel/descriptor/dpa1.py b/deepmd/dpmodel/descriptor/dpa1.py index 51c56e9681..9e9da46aec 100644 --- a/deepmd/dpmodel/descriptor/dpa1.py +++ b/deepmd/dpmodel/descriptor/dpa1.py @@ -319,6 +319,7 @@ def __init__( trainable_ln=trainable_ln, ln_eps=ln_eps, seed=child_seed(seed, 0), + trainable=trainable, ) self.use_econf_tebd = use_econf_tebd self.use_tebd_bias = use_tebd_bias @@ -333,6 +334,7 @@ def __init__( use_tebd_bias=use_tebd_bias, type_map=type_map, seed=child_seed(seed, 1), + trainable=trainable, ) self.tebd_dim = tebd_dim self.concat_output_tebd = concat_output_tebd @@ -691,6 +693,7 @@ def __init__( ln_eps: Optional[float] = 1e-5, smooth: bool = True, seed: Optional[Union[int, list[int]]] = None, + trainable: bool = True, ) -> None: self.rcut = rcut self.rcut_smth = rcut_smth @@ -741,6 +744,7 @@ def __init__( self.resnet_dt, self.precision, seed=child_seed(seed, 0), + trainable=trainable, ) self.embeddings = embeddings if self.tebd_input_mode in ["strip"]: @@ -756,6 +760,7 @@ def __init__( self.resnet_dt, self.precision, seed=child_seed(seed, 1), + trainable=trainable, ) self.embeddings_strip = embeddings_strip else: @@ -774,6 +779,7 @@ def __init__( smooth=self.smooth, precision=self.precision, seed=child_seed(seed, 2), + trainable=trainable, ) wanted_shape = (self.ntypes, self.nnei, 4) @@ -1186,6 +1192,7 @@ def __init__( smooth: bool = True, precision: str = DEFAULT_PRECISION, seed: Optional[Union[int, list[int]]] = None, + trainable: bool = True, ) -> None: """Construct a neighbor-wise attention net.""" super().__init__() @@ -1219,6 +1226,7 @@ def __init__( smooth=smooth, precision=precision, seed=child_seed(seed, ii), + trainable=trainable, ) for ii in range(layer_num) ] @@ -1314,6 +1322,7 @@ def __init__( smooth: bool = True, precision: str = DEFAULT_PRECISION, seed: Optional[Union[int, list[int]]] = None, + trainable: bool = True, ) -> None: """Construct a neighbor-wise attention layer.""" super().__init__() @@ -1340,6 +1349,7 @@ def __init__( smooth=smooth, precision=precision, seed=child_seed(seed, 0), + trainable=trainable, ) self.attn_layer_norm = LayerNorm( self.embed_dim, @@ -1420,6 +1430,7 @@ def __init__( smooth: bool = True, precision: str = DEFAULT_PRECISION, seed: Optional[Union[int, list[int]]] = None, + trainable: bool = True, ) -> None: """Construct a multi-head neighbor-wise attention net.""" super().__init__() @@ -1449,6 +1460,7 @@ def __init__( use_timestep=False, precision=precision, seed=child_seed(seed, 0), + trainable=trainable, ) self.out_proj = NativeLayer( hidden_dim, @@ -1457,6 +1469,7 @@ def __init__( use_timestep=False, precision=precision, seed=child_seed(seed, 1), + trainable=trainable, ) def call(self, query, nei_mask, input_r=None, sw=None, attnw_shift=20.0): diff --git a/deepmd/dpmodel/descriptor/dpa2.py b/deepmd/dpmodel/descriptor/dpa2.py index 70accefa30..da39afdc23 100644 --- a/deepmd/dpmodel/descriptor/dpa2.py +++ b/deepmd/dpmodel/descriptor/dpa2.py @@ -474,6 +474,7 @@ def init_subclass_params(sub_data, sub_class): smooth=smooth, type_one_side=self.repinit_args.type_one_side, seed=child_seed(seed, 0), + trainable=trainable, ) self.use_three_body = self.repinit_args.use_three_body if self.use_three_body: @@ -493,6 +494,7 @@ def init_subclass_params(sub_data, sub_class): resnet_dt=self.repinit_args.resnet_dt, smooth=smooth, seed=child_seed(seed, 5), + trainable=trainable, ) else: self.repinit_three_body = None @@ -533,6 +535,7 @@ def init_subclass_params(sub_data, sub_class): g1_out_mlp=self.repformer_args.g1_out_mlp, ln_eps=self.repformer_args.ln_eps, seed=child_seed(seed, 1), + trainable=trainable, ) self.rcsl_list = [ (self.repformers.get_rcut(), self.repformers.get_nsel()), @@ -562,6 +565,7 @@ def init_subclass_params(sub_data, sub_class): use_tebd_bias=use_tebd_bias, type_map=type_map, seed=child_seed(seed, 2), + trainable=trainable, ) self.concat_output_tebd = concat_output_tebd self.precision = precision @@ -585,6 +589,7 @@ def init_subclass_params(sub_data, sub_class): bias=False, precision=precision, seed=child_seed(seed, 3), + trainable=trainable, ) self.tebd_transform = None if self.add_tebd_to_repinit_out: @@ -594,6 +599,7 @@ def init_subclass_params(sub_data, sub_class): bias=False, precision=precision, seed=child_seed(seed, 4), + trainable=trainable, ) assert self.repinit.rcut > self.repformers.rcut assert self.repinit.sel[0] > self.repformers.sel[0] diff --git a/deepmd/dpmodel/descriptor/dpa3.py b/deepmd/dpmodel/descriptor/dpa3.py index f9210b0574..3a03b2a9ad 100644 --- a/deepmd/dpmodel/descriptor/dpa3.py +++ b/deepmd/dpmodel/descriptor/dpa3.py @@ -357,6 +357,7 @@ def init_subclass_params(sub_data, sub_class): env_protection=env_protection, precision=precision, seed=child_seed(seed, 1), + trainable=trainable, ) self.use_econf_tebd = use_econf_tebd @@ -374,6 +375,7 @@ def init_subclass_params(sub_data, sub_class): use_tebd_bias=use_tebd_bias, type_map=type_map, seed=child_seed(seed, 2), + trainable=trainable, ) self.concat_output_tebd = concat_output_tebd self.precision = precision diff --git a/deepmd/dpmodel/descriptor/repflows.py b/deepmd/dpmodel/descriptor/repflows.py index f8c329b515..43fe844262 100644 --- a/deepmd/dpmodel/descriptor/repflows.py +++ b/deepmd/dpmodel/descriptor/repflows.py @@ -167,6 +167,8 @@ class DescrptBlockRepflows(NativeOP, DescriptorBlock): For example, when using paddings, there may be zero distances of neighbors, which may make division by zero error during environment matrix calculations without protection. seed : int, optional Random seed for parameter initialization. + trainable : bool, default: True + Whether the block is trainable """ def __init__( @@ -205,6 +207,7 @@ def __init__( sel_reduce_factor: float = 10.0, use_loc_mapping: bool = True, seed: Optional[Union[int, list[int]]] = None, + trainable: bool = True, ) -> None: super().__init__() self.e_rcut = float(e_rcut) @@ -269,10 +272,19 @@ def __init__( self.seed = seed self.edge_embd = NativeLayer( - 1, self.e_dim, precision=precision, seed=child_seed(seed, 0) + 1, + self.e_dim, + precision=precision, + seed=child_seed(seed, 0), + trainable=trainable, ) self.angle_embd = NativeLayer( - 1, self.a_dim, precision=precision, bias=False, seed=child_seed(seed, 1) + 1, + self.a_dim, + precision=precision, + bias=False, + seed=child_seed(seed, 1), + trainable=trainable, ) layers = [] for ii in range(nlayers): @@ -304,6 +316,7 @@ def __init__( sel_reduce_factor=self.sel_reduce_factor, smooth_edge_update=self.smooth_edge_update, seed=child_seed(child_seed(seed, 1), ii), + trainable=trainable, ) ) self.layers = layers @@ -860,6 +873,7 @@ def __init__( update_residual_init: str = "const", precision: str = "float64", seed: Optional[Union[int, list[int]]] = None, + trainable: bool = True, ) -> None: super().__init__() self.epsilon = 1e-4 # protection of 1./nnei @@ -922,6 +936,7 @@ def __init__( n_dim, precision=precision, seed=child_seed(seed, 0), + trainable=trainable, ) if self.update_style == "res_residual": self.n_residual.append( @@ -931,6 +946,7 @@ def __init__( self.update_residual_init, precision=precision, seed=child_seed(seed, 1), + trainable=trainable, ) ) @@ -941,6 +957,7 @@ def __init__( n_dim, precision=precision, seed=child_seed(seed, 2), + trainable=trainable, ) if self.update_style == "res_residual": self.n_residual.append( @@ -950,6 +967,7 @@ def __init__( self.update_residual_init, precision=precision, seed=child_seed(seed, 3), + trainable=trainable, ) ) @@ -959,6 +977,7 @@ def __init__( self.n_multi_edge_message * n_dim, precision=precision, seed=child_seed(seed, 4), + trainable=trainable, ) if self.update_style == "res_residual": for head_index in range(self.n_multi_edge_message): @@ -969,6 +988,7 @@ def __init__( self.update_residual_init, precision=precision, seed=child_seed(child_seed(seed, 5), head_index), + trainable=trainable, ) ) @@ -978,6 +998,7 @@ def __init__( e_dim, precision=precision, seed=child_seed(seed, 6), + trainable=trainable, ) if self.update_style == "res_residual": self.e_residual.append( @@ -987,6 +1008,7 @@ def __init__( self.update_residual_init, precision=precision, seed=child_seed(seed, 7), + trainable=trainable, ) ) @@ -1015,6 +1037,7 @@ def __init__( precision=precision, bias=False, seed=child_seed(seed, 8), + trainable=trainable, ) self.a_compress_e_linear = NativeLayer( self.e_dim, @@ -1022,6 +1045,7 @@ def __init__( precision=precision, bias=False, seed=child_seed(seed, 9), + trainable=trainable, ) else: self.a_compress_n_linear = None @@ -1033,12 +1057,14 @@ def __init__( self.e_dim, precision=precision, seed=child_seed(seed, 10), + trainable=trainable, ) self.edge_angle_linear2 = NativeLayer( self.e_dim, self.e_dim, precision=precision, seed=child_seed(seed, 11), + trainable=trainable, ) if self.update_style == "res_residual": self.e_residual.append( @@ -1048,6 +1074,7 @@ def __init__( self.update_residual_init, precision=precision, seed=child_seed(seed, 12), + trainable=trainable, ) ) @@ -1057,6 +1084,7 @@ def __init__( self.a_dim, precision=precision, seed=child_seed(seed, 13), + trainable=trainable, ) if self.update_style == "res_residual": self.a_residual.append( @@ -1066,6 +1094,7 @@ def __init__( self.update_residual_init, precision=precision, seed=child_seed(seed, 14), + trainable=trainable, ) ) else: diff --git a/deepmd/dpmodel/descriptor/repformers.py b/deepmd/dpmodel/descriptor/repformers.py index 3d02054350..6ac9675d28 100644 --- a/deepmd/dpmodel/descriptor/repformers.py +++ b/deepmd/dpmodel/descriptor/repformers.py @@ -164,6 +164,8 @@ class DescrptBlockRepformers(NativeOP, DescriptorBlock): The epsilon value for layer normalization. seed : int, optional The random seed for initialization. + trainable : bool, default: True + Whether the block is trainable """ def __init__( @@ -204,6 +206,7 @@ def __init__( g1_out_mlp: bool = True, ln_eps: Optional[float] = 1e-5, seed: Optional[Union[int, list[int]]] = None, + trainable: bool = True, ) -> None: super().__init__() self.rcut = rcut @@ -252,7 +255,11 @@ def __init__( self.epsilon = 1e-4 self.g2_embd = NativeLayer( - 1, self.g2_dim, precision=precision, seed=child_seed(seed, 0) + 1, + self.g2_dim, + precision=precision, + seed=child_seed(seed, 0), + trainable=trainable, ) layers = [] for ii in range(nlayers): @@ -290,6 +297,7 @@ def __init__( g1_out_conv=self.g1_out_conv, g1_out_mlp=self.g1_out_mlp, seed=child_seed(child_seed(seed, 1), ii), + trainable=trainable, ) ) self.layers = layers @@ -847,6 +855,7 @@ def __init__( attnw_shift: float = 20.0, precision: str = "float64", seed: Optional[Union[int, list[int]]] = None, + trainable: bool = True, ) -> None: """Return neighbor-wise multi-head self-attention maps, with gate mechanism.""" super().__init__() @@ -859,6 +868,7 @@ def __init__( bias=False, precision=precision, seed=seed, + trainable=trainable, ) self.has_gate = has_gate self.smooth = smooth @@ -970,6 +980,7 @@ def __init__( head_num: int, precision: str = "float64", seed: Optional[Union[int, list[int]]] = None, + trainable: bool = True, ) -> None: super().__init__() self.input_dim = input_dim @@ -980,12 +991,14 @@ def __init__( bias=False, precision=precision, seed=child_seed(seed, 0), + trainable=trainable, ) self.head_map = NativeLayer( input_dim * head_num, input_dim, precision=precision, seed=child_seed(seed, 1), + trainable=trainable, ) self.precision = precision @@ -1058,12 +1071,18 @@ def __init__( head_num: int, precision: str = "float64", seed: Optional[Union[int, list[int]]] = None, + trainable: bool = True, ) -> None: super().__init__() self.input_dim = input_dim self.head_num = head_num self.head_map = NativeLayer( - head_num, 1, bias=False, precision=precision, seed=seed + head_num, + 1, + bias=False, + precision=precision, + seed=seed, + trainable=trainable, ) self.precision = precision @@ -1133,6 +1152,7 @@ def __init__( attnw_shift: float = 20.0, precision: str = "float64", seed: Optional[Union[int, list[int]]] = None, + trainable: bool = True, ) -> None: super().__init__() self.input_dim = input_dim @@ -1144,6 +1164,7 @@ def __init__( bias=False, precision=precision, seed=child_seed(seed, 0), + trainable=trainable, ) self.mapkv = NativeLayer( input_dim, @@ -1151,12 +1172,14 @@ def __init__( bias=False, precision=precision, seed=child_seed(seed, 1), + trainable=trainable, ) self.head_map = NativeLayer( input_dim * head_num, input_dim, precision=precision, seed=child_seed(seed, 2), + trainable=trainable, ) self.smooth = smooth self.attnw_shift = attnw_shift @@ -1295,6 +1318,7 @@ def __init__( g1_out_mlp: bool = True, ln_eps: Optional[float] = 1e-5, seed: Optional[Union[int, list[int]]] = None, + trainable: bool = True, ) -> None: super().__init__() self.epsilon = 1e-4 # protection of 1./nnei @@ -1354,6 +1378,7 @@ def __init__( self.update_residual_init, precision=precision, seed=child_seed(seed, 0), + trainable=trainable, ) ) @@ -1363,6 +1388,7 @@ def __init__( g1_dim, precision=precision, seed=child_seed(seed, 1), + trainable=trainable, ) self.linear2 = None self.proj_g1g2 = None @@ -1379,6 +1405,7 @@ def __init__( g2_dim, precision=precision, seed=child_seed(seed, 2), + trainable=trainable, ) if self.update_style == "res_residual": g2_residual.append( @@ -1388,6 +1415,7 @@ def __init__( self.update_residual_init, precision=precision, seed=child_seed(seed, 3), + trainable=trainable, ) ) if self.g1_out_mlp: @@ -1396,6 +1424,7 @@ def __init__( g1_dim, precision=precision, seed=child_seed(seed, 15), + trainable=trainable, ) if self.update_style == "res_residual": g1_residual.append( @@ -1405,6 +1434,7 @@ def __init__( self.update_residual_init, precision=precision, seed=child_seed(seed, 16), + trainable=trainable, ) ) else: @@ -1417,6 +1447,7 @@ def __init__( bias=False, precision=precision, seed=child_seed(seed, 4), + trainable=trainable, ) else: self.proj_g1g2 = NativeLayer( @@ -1425,6 +1456,7 @@ def __init__( bias=False, precision=precision, seed=child_seed(seed, 4), + trainable=trainable, ) if self.update_style == "res_residual": g1_residual.append( @@ -1434,6 +1466,7 @@ def __init__( self.update_residual_init, precision=precision, seed=child_seed(seed, 17), + trainable=trainable, ) ) if self.update_g2_has_g1g1: @@ -1443,6 +1476,7 @@ def __init__( bias=False, precision=precision, seed=child_seed(seed, 5), + trainable=trainable, ) if self.update_style == "res_residual": g2_residual.append( @@ -1452,6 +1486,7 @@ def __init__( self.update_residual_init, precision=precision, seed=child_seed(seed, 6), + trainable=trainable, ) ) if self.update_g2_has_attn or self.update_h2: @@ -1463,10 +1498,15 @@ def __init__( self.smooth, precision=precision, seed=child_seed(seed, 7), + trainable=trainable, ) if self.update_g2_has_attn: self.attn2_mh_apply = Atten2MultiHeadApply( - g2_dim, attn2_nhead, precision=precision, seed=child_seed(seed, 8) + g2_dim, + attn2_nhead, + precision=precision, + seed=child_seed(seed, 8), + trainable=trainable, ) self.attn2_lm = LayerNorm( g2_dim, @@ -1483,12 +1523,17 @@ def __init__( self.update_residual_init, precision=precision, seed=child_seed(seed, 10), + trainable=trainable, ) ) if self.update_h2: self.attn2_ev_apply = Atten2EquiVarApply( - g2_dim, attn2_nhead, precision=precision, seed=child_seed(seed, 11) + g2_dim, + attn2_nhead, + precision=precision, + seed=child_seed(seed, 11), + trainable=trainable, ) if self.update_style == "res_residual": h2_residual.append( @@ -1498,6 +1543,7 @@ def __init__( self.update_residual_init, precision=precision, seed=child_seed(seed, 12), + trainable=trainable, ) ) if self.update_g1_has_attn: @@ -1508,6 +1554,7 @@ def __init__( self.smooth, precision=precision, seed=child_seed(seed, 13), + trainable=trainable, ) if self.update_style == "res_residual": g1_residual.append( @@ -1517,6 +1564,7 @@ def __init__( self.update_residual_init, precision=precision, seed=child_seed(seed, 14), + trainable=trainable, ) ) diff --git a/deepmd/dpmodel/descriptor/se_e2_a.py b/deepmd/dpmodel/descriptor/se_e2_a.py index bd72d936e3..5bcffc6c53 100644 --- a/deepmd/dpmodel/descriptor/se_e2_a.py +++ b/deepmd/dpmodel/descriptor/se_e2_a.py @@ -207,6 +207,7 @@ def __init__( self.resnet_dt, self.precision, seed=child_seed(seed, ii), + trainable=trainable, ) self.embeddings = embeddings self.env_mat = EnvMat(self.rcut, self.rcut_smth, protection=self.env_protection) diff --git a/deepmd/dpmodel/descriptor/se_r.py b/deepmd/dpmodel/descriptor/se_r.py index 5b2931b23f..9d485b15a9 100644 --- a/deepmd/dpmodel/descriptor/se_r.py +++ b/deepmd/dpmodel/descriptor/se_r.py @@ -166,6 +166,7 @@ def __init__( self.resnet_dt, self.precision, seed=child_seed(seed, ii), + trainable=trainable, ) self.embeddings = embeddings self.env_mat = EnvMat(self.rcut, self.rcut_smth, protection=self.env_protection) diff --git a/deepmd/dpmodel/descriptor/se_t.py b/deepmd/dpmodel/descriptor/se_t.py index fb30f04961..496dd3e090 100644 --- a/deepmd/dpmodel/descriptor/se_t.py +++ b/deepmd/dpmodel/descriptor/se_t.py @@ -147,6 +147,7 @@ def __init__( self.resnet_dt, self.precision, seed=child_seed(self.seed, ii), + trainable=trainable, ) self.embeddings = embeddings self.env_mat = EnvMat(self.rcut, self.rcut_smth, protection=self.env_protection) diff --git a/deepmd/dpmodel/descriptor/se_t_tebd.py b/deepmd/dpmodel/descriptor/se_t_tebd.py index ff26024aad..ae8f1280d2 100644 --- a/deepmd/dpmodel/descriptor/se_t_tebd.py +++ b/deepmd/dpmodel/descriptor/se_t_tebd.py @@ -157,6 +157,7 @@ def __init__( env_protection=env_protection, smooth=smooth, seed=child_seed(seed, 0), + trainable=trainable, ) self.use_econf_tebd = use_econf_tebd self.type_map = type_map @@ -171,6 +172,7 @@ def __init__( use_tebd_bias=use_tebd_bias, type_map=type_map, seed=child_seed(seed, 1), + trainable=trainable, ) self.tebd_dim = tebd_dim self.concat_output_tebd = concat_output_tebd @@ -497,6 +499,7 @@ def __init__( env_protection: float = 0.0, smooth: bool = True, seed: Optional[Union[int, list[int]]] = None, + trainable: bool = True, ) -> None: self.rcut = rcut self.rcut_smth = rcut_smth @@ -542,6 +545,7 @@ def __init__( self.resnet_dt, self.precision, seed=child_seed(seed, 0), + trainable=trainable, ) self.embeddings = embeddings if self.tebd_input_mode in ["strip"]: @@ -557,6 +561,7 @@ def __init__( self.resnet_dt, self.precision, seed=child_seed(seed, 1), + trainable=trainable, ) self.embeddings_strip = embeddings_strip else: diff --git a/deepmd/dpmodel/fitting/general_fitting.py b/deepmd/dpmodel/fitting/general_fitting.py index cd0d4e72d4..db94580243 100644 --- a/deepmd/dpmodel/fitting/general_fitting.py +++ b/deepmd/dpmodel/fitting/general_fitting.py @@ -198,6 +198,7 @@ def __init__( self.precision, bias_out=True, seed=child_seed(seed, ii), + trainable=trainable, ) for ii in range(self.ntypes if not self.mixed_types else 1) ], diff --git a/deepmd/dpmodel/utils/network.py b/deepmd/dpmodel/utils/network.py index bf28b66b7b..4e8e662865 100644 --- a/deepmd/dpmodel/utils/network.py +++ b/deepmd/dpmodel/utils/network.py @@ -87,6 +87,8 @@ class NativeLayer(NativeOP): The precision of the layer. seed : int, optional Random seed. + trainable : bool, default=True + Whether the layer is trainable. """ def __init__( @@ -99,7 +101,10 @@ def __init__( resnet: bool = False, precision: str = DEFAULT_PRECISION, seed: Optional[Union[int, list[int]]] = None, + trainable: bool = True, ) -> None: + # trainable must be set before any array attribute is set + self.trainable = trainable prec = PRECISION_DICT[precision.lower()] self.precision = precision # only use_timestep when skip connection is established. @@ -139,13 +144,14 @@ def serialize(self) -> dict: } return { "@class": "Layer", - "@version": 1, + "@version": 2, "bias": self.b is not None, "use_timestep": self.idt is not None, "activation_function": self.activation_function, "resnet": self.resnet, # make deterministic "precision": np.dtype(PRECISION_DICT[self.precision]).name, + "trainable": self.trainable, "@variables": data, } @@ -159,7 +165,7 @@ def deserialize(cls, data: dict) -> "NativeLayer": The dict to deserialize from. """ data = data.copy() - check_version_compatibility(data.pop("@version", 1), 1, 1) + check_version_compatibility(data.pop("@version", 1), 2, 1) data.pop("@class", None) variables = data.pop("@variables") assert variables["w"] is not None and len(variables["w"].shape) == 2 @@ -240,6 +246,8 @@ def __getitem__(self, key): return self.resnet elif key == "precision": return self.precision + elif key == "trainable": + return self.trainable else: raise KeyError(key) @@ -429,6 +437,7 @@ def __init__( resnet=False, precision=precision, seed=seed, + trainable=trainable, ) xp = array_api_compat.array_namespace(self.w, self.b) self.w = xp.squeeze(self.w, 0) # keep the weight shape to be [num_in] @@ -681,9 +690,12 @@ def __init__( precision: str = DEFAULT_PRECISION, seed: Optional[Union[int, list[int]]] = None, bias: bool = True, + trainable: Union[bool, list[bool]] = True, ) -> None: layers = [] i_in = in_dim + if isinstance(trainable, bool): + trainable = [trainable] * len(neuron) for idx, ii in enumerate(neuron): i_ot = ii layers.append( @@ -696,6 +708,7 @@ def __init__( resnet=True, precision=precision, seed=child_seed(seed, idx), + trainable=trainable[idx], ).serialize() ) i_in = i_ot @@ -786,7 +799,14 @@ def __init__( precision: str = DEFAULT_PRECISION, bias_out: bool = True, seed: Optional[Union[int, list[int]]] = None, + trainable: Union[bool, list[bool]] = True, ) -> None: + if trainable is None: + trainable = [True] * (len(neuron) + 1) + elif isinstance(trainable, bool): + trainable = [trainable] * (len(neuron) + 1) + else: + pass super().__init__( in_dim, neuron=neuron, @@ -794,6 +814,7 @@ def __init__( resnet_dt=resnet_dt, precision=precision, seed=seed, + trainable=trainable[:-1], ) i_in = neuron[-1] if len(neuron) > 0 else in_dim i_ot = out_dim @@ -807,6 +828,7 @@ def __init__( resnet=False, precision=precision, seed=child_seed(seed, len(neuron)), + trainable=trainable[-1], ) ) self.out_dim = out_dim diff --git a/deepmd/dpmodel/utils/type_embed.py b/deepmd/dpmodel/utils/type_embed.py index 17e40f3592..d533d71ee9 100644 --- a/deepmd/dpmodel/utils/type_embed.py +++ b/deepmd/dpmodel/utils/type_embed.py @@ -93,6 +93,7 @@ def __init__( self.precision, seed=self.seed, bias=self.use_tebd_bias, + trainable=trainable, ) @support_array_api(version="2022.12") diff --git a/deepmd/jax/utils/network.py b/deepmd/jax/utils/network.py index 2c406095cd..78da4c96f5 100644 --- a/deepmd/jax/utils/network.py +++ b/deepmd/jax/utils/network.py @@ -16,6 +16,7 @@ make_multilayer_network, ) from deepmd.jax.common import ( + ArrayAPIVariable, flax_module, to_jax_array, ) @@ -44,7 +45,10 @@ def __setattr__(self, name: str, value: Any) -> None: if name in {"w", "b", "idt"}: value = to_jax_array(value) if value is not None: - value = ArrayAPIParam(value) + if self.trainable: + value = ArrayAPIParam(value) + else: + value = ArrayAPIVariable(value) return super().__setattr__(name, value) diff --git a/deepmd/pd/model/descriptor/dpa1.py b/deepmd/pd/model/descriptor/dpa1.py index 6942b096c9..ad45c13d1d 100644 --- a/deepmd/pd/model/descriptor/dpa1.py +++ b/deepmd/pd/model/descriptor/dpa1.py @@ -292,6 +292,7 @@ def __init__( trainable_ln=trainable_ln, ln_eps=ln_eps, seed=child_seed(seed, 1), + trainable=trainable, ) self.use_econf_tebd = use_econf_tebd self.use_tebd_bias = use_tebd_bias @@ -305,6 +306,7 @@ def __init__( use_econf_tebd=use_econf_tebd, use_tebd_bias=use_tebd_bias, type_map=type_map, + trainable=trainable, ) self.prec = PRECISION_DICT[precision] self.tebd_dim = tebd_dim diff --git a/deepmd/pd/model/descriptor/dpa2.py b/deepmd/pd/model/descriptor/dpa2.py index 0e3b24397f..44b3229f66 100644 --- a/deepmd/pd/model/descriptor/dpa2.py +++ b/deepmd/pd/model/descriptor/dpa2.py @@ -184,6 +184,7 @@ def init_subclass_params(sub_data, sub_class): smooth=smooth, type_one_side=self.repinit_args.type_one_side, seed=child_seed(seed, 0), + trainable=trainable, ) self.use_three_body = self.repinit_args.use_three_body if self.use_three_body: @@ -203,6 +204,7 @@ def init_subclass_params(sub_data, sub_class): resnet_dt=self.repinit_args.resnet_dt, smooth=smooth, seed=child_seed(seed, 5), + trainable=trainable, ) else: self.repinit_three_body = None @@ -243,6 +245,7 @@ def init_subclass_params(sub_data, sub_class): g1_out_conv=self.repformer_args.g1_out_conv, g1_out_mlp=self.repformer_args.g1_out_mlp, seed=child_seed(seed, 1), + trainable=trainable, ) self.rcsl_list = [ (self.repformers.get_rcut(), self.repformers.get_nsel()), @@ -270,6 +273,7 @@ def init_subclass_params(sub_data, sub_class): use_econf_tebd=self.use_econf_tebd, use_tebd_bias=use_tebd_bias, type_map=type_map, + trainable=trainable, ) self.concat_output_tebd = concat_output_tebd self.precision = precision @@ -295,6 +299,7 @@ def init_subclass_params(sub_data, sub_class): precision=precision, init="glorot", seed=child_seed(seed, 3), + trainable=trainable, ) self.tebd_transform = None if self.add_tebd_to_repinit_out: @@ -304,6 +309,7 @@ def init_subclass_params(sub_data, sub_class): bias=False, precision=precision, seed=child_seed(seed, 4), + trainable=trainable, ) assert self.repinit.rcut > self.repformers.rcut assert self.repinit.sel[0] > self.repformers.sel[0] diff --git a/deepmd/pd/model/descriptor/dpa3.py b/deepmd/pd/model/descriptor/dpa3.py index 0f1a8f4c2f..e022169930 100644 --- a/deepmd/pd/model/descriptor/dpa3.py +++ b/deepmd/pd/model/descriptor/dpa3.py @@ -167,6 +167,7 @@ def init_subclass_params(sub_data, sub_class): env_protection=env_protection, precision=precision, seed=child_seed(seed, 1), + trainable=trainable, ) self.use_econf_tebd = use_econf_tebd @@ -182,6 +183,7 @@ def init_subclass_params(sub_data, sub_class): use_econf_tebd=self.use_econf_tebd, use_tebd_bias=use_tebd_bias, type_map=type_map, + trainable=trainable, ) self.concat_output_tebd = concat_output_tebd self.precision = precision diff --git a/deepmd/pd/model/descriptor/repflow_layer.py b/deepmd/pd/model/descriptor/repflow_layer.py index f1bdd0439d..d059e13775 100644 --- a/deepmd/pd/model/descriptor/repflow_layer.py +++ b/deepmd/pd/model/descriptor/repflow_layer.py @@ -61,6 +61,7 @@ def __init__( update_residual_init: str = "const", precision: str = "float64", seed: Optional[Union[int, list[int]]] = None, + trainable: bool = True, ) -> None: super().__init__() self.epsilon = 1e-4 # protection of 1./nnei @@ -123,6 +124,7 @@ def __init__( n_dim, precision=precision, seed=child_seed(seed, 0), + trainable=trainable, ) if self.update_style == "res_residual": self.n_residual.append( @@ -132,6 +134,7 @@ def __init__( self.update_residual_init, precision=precision, seed=child_seed(seed, 1), + trainable=trainable, ) ) @@ -142,6 +145,7 @@ def __init__( n_dim, precision=precision, seed=child_seed(seed, 2), + trainable=trainable, ) if self.update_style == "res_residual": self.n_residual.append( @@ -151,6 +155,7 @@ def __init__( self.update_residual_init, precision=precision, seed=child_seed(seed, 3), + trainable=trainable, ) ) @@ -160,6 +165,7 @@ def __init__( self.n_multi_edge_message * n_dim, precision=precision, seed=child_seed(seed, 4), + trainable=trainable, ) if self.update_style == "res_residual": for head_index in range(self.n_multi_edge_message): @@ -170,6 +176,7 @@ def __init__( self.update_residual_init, precision=precision, seed=child_seed(child_seed(seed, 5), head_index), + trainable=trainable, ) ) @@ -179,6 +186,7 @@ def __init__( e_dim, precision=precision, seed=child_seed(seed, 6), + trainable=trainable, ) if self.update_style == "res_residual": self.e_residual.append( @@ -188,6 +196,7 @@ def __init__( self.update_residual_init, precision=precision, seed=child_seed(seed, 7), + trainable=trainable, ) ) @@ -216,6 +225,7 @@ def __init__( precision=precision, bias=False, seed=child_seed(seed, 8), + trainable=trainable, ) self.a_compress_e_linear = MLPLayer( self.e_dim, @@ -223,6 +233,7 @@ def __init__( precision=precision, bias=False, seed=child_seed(seed, 9), + trainable=trainable, ) else: self.a_compress_n_linear = None @@ -234,12 +245,14 @@ def __init__( self.e_dim, precision=precision, seed=child_seed(seed, 10), + trainable=trainable, ) self.edge_angle_linear2 = MLPLayer( self.e_dim, self.e_dim, precision=precision, seed=child_seed(seed, 11), + trainable=trainable, ) if self.update_style == "res_residual": self.e_residual.append( @@ -249,6 +262,7 @@ def __init__( self.update_residual_init, precision=precision, seed=child_seed(seed, 12), + trainable=trainable, ) ) @@ -258,6 +272,7 @@ def __init__( self.a_dim, precision=precision, seed=child_seed(seed, 13), + trainable=trainable, ) if self.update_style == "res_residual": self.a_residual.append( @@ -267,6 +282,7 @@ def __init__( self.update_residual_init, precision=precision, seed=child_seed(seed, 14), + trainable=trainable, ) ) else: diff --git a/deepmd/pd/model/descriptor/repflows.py b/deepmd/pd/model/descriptor/repflows.py index 3200c26dba..f00bdd2cb5 100644 --- a/deepmd/pd/model/descriptor/repflows.py +++ b/deepmd/pd/model/descriptor/repflows.py @@ -131,6 +131,8 @@ class DescrptBlockRepflows(DescriptorBlock): For example, when using paddings, there may be zero distances of neighbors, which may make division by zero error during environment matrix calculations without protection. seed : int, optional Random seed for parameter initialization. + trainable : bool, default: True + Whether this block is trainable """ def __init__( @@ -167,6 +169,7 @@ def __init__( use_loc_mapping: bool = False, optim_update: bool = True, seed: Optional[Union[int, list[int]]] = None, + trainable: bool = True, ) -> None: super().__init__() self.e_rcut = float(e_rcut) @@ -223,10 +226,19 @@ def __init__( self.seed = seed self.edge_embd = MLPLayer( - 1, self.e_dim, precision=precision, seed=child_seed(seed, 0) + 1, + self.e_dim, + precision=precision, + seed=child_seed(seed, 0), + trainable=trainable, ) self.angle_embd = MLPLayer( - 1, self.a_dim, precision=precision, bias=False, seed=child_seed(seed, 1) + 1, + self.a_dim, + precision=precision, + bias=False, + seed=child_seed(seed, 1), + trainable=trainable, ) layers = [] for ii in range(nlayers): @@ -258,6 +270,7 @@ def __init__( sel_reduce_factor=self.sel_reduce_factor, smooth_edge_update=self.smooth_edge_update, seed=child_seed(child_seed(seed, 1), ii), + trainable=trainable, ) ) self.layers = paddle.nn.LayerList(layers) diff --git a/deepmd/pd/model/descriptor/repformer_layer.py b/deepmd/pd/model/descriptor/repformer_layer.py index b4d93d8301..4dad08fff8 100644 --- a/deepmd/pd/model/descriptor/repformer_layer.py +++ b/deepmd/pd/model/descriptor/repformer_layer.py @@ -163,6 +163,7 @@ def __init__( attnw_shift: float = 20.0, precision: str = "float64", seed: Optional[Union[int, list[int]]] = None, + trainable: bool = True, ) -> None: """Return neighbor-wise multi-head self-attention maps, with gate mechanism.""" super().__init__() @@ -175,6 +176,7 @@ def __init__( bias=False, precision=precision, seed=seed, + trainable=trainable, ) self.has_gate = has_gate self.smooth = smooth @@ -288,6 +290,7 @@ def __init__( head_num: int, precision: str = "float64", seed: Optional[Union[int, list[int]]] = None, + trainable: bool = True, ) -> None: super().__init__() self.input_dim = input_dim @@ -298,12 +301,14 @@ def __init__( bias=False, precision=precision, seed=child_seed(seed, 0), + trainable=trainable, ) self.head_map = MLPLayer( input_dim * head_num, input_dim, precision=precision, seed=child_seed(seed, 1), + trainable=trainable, ) self.precision = precision @@ -375,12 +380,18 @@ def __init__( head_num: int, precision: str = "float64", seed: Optional[Union[int, list[int]]] = None, + trainable: bool = True, ) -> None: super().__init__() self.input_dim = input_dim self.head_num = head_num self.head_map = MLPLayer( - head_num, 1, bias=False, precision=precision, seed=seed + head_num, + 1, + bias=False, + precision=precision, + seed=seed, + trainable=trainable, ) self.precision = precision @@ -448,6 +459,7 @@ def __init__( attnw_shift: float = 20.0, precision: str = "float64", seed: Optional[Union[int, list[int]]] = None, + trainable: bool = True, ) -> None: super().__init__() self.input_dim = input_dim @@ -459,6 +471,7 @@ def __init__( bias=False, precision=precision, seed=child_seed(seed, 0), + trainable=trainable, ) self.mapkv = MLPLayer( input_dim, @@ -466,12 +479,14 @@ def __init__( bias=False, precision=precision, seed=child_seed(seed, 1), + trainable=trainable, ) self.head_map = MLPLayer( input_dim * head_num, input_dim, precision=precision, seed=child_seed(seed, 2), + trainable=trainable, ) self.smooth = smooth self.attnw_shift = attnw_shift @@ -612,6 +627,7 @@ def __init__( g1_out_conv: bool = True, g1_out_mlp: bool = True, seed: Optional[Union[int, list[int]]] = None, + trainable: bool = True, ) -> None: super().__init__() self.epsilon = 1e-4 # protection of 1./nnei @@ -672,6 +688,7 @@ def __init__( self.update_residual_init, precision=precision, seed=child_seed(seed, 0), + trainable=trainable, ) ) @@ -681,6 +698,7 @@ def __init__( g1_dim, precision=precision, seed=child_seed(seed, 1), + trainable=trainable, ) self.linear2 = None self.proj_g1g2 = None @@ -697,6 +715,7 @@ def __init__( g2_dim, precision=precision, seed=child_seed(seed, 2), + trainable=trainable, ) if self.update_style == "res_residual": self.g2_residual.append( @@ -706,6 +725,7 @@ def __init__( self.update_residual_init, precision=precision, seed=child_seed(seed, 3), + trainable=trainable, ) ) if self.g1_out_mlp: @@ -714,6 +734,7 @@ def __init__( g1_dim, precision=precision, seed=child_seed(seed, 15), + trainable=trainable, ) if self.update_style == "res_residual": self.g1_residual.append( @@ -723,6 +744,7 @@ def __init__( self.update_residual_init, precision=precision, seed=child_seed(seed, 16), + trainable=trainable, ) ) else: @@ -735,6 +757,7 @@ def __init__( bias=False, precision=precision, seed=child_seed(seed, 4), + trainable=trainable, ) else: self.proj_g1g2 = MLPLayer( @@ -743,6 +766,7 @@ def __init__( bias=False, precision=precision, seed=child_seed(seed, 4), + trainable=trainable, ) if self.update_style == "res_residual": self.g1_residual.append( @@ -752,6 +776,7 @@ def __init__( self.update_residual_init, precision=precision, seed=child_seed(seed, 17), + trainable=trainable, ) ) if self.update_g2_has_g1g1: @@ -761,6 +786,7 @@ def __init__( bias=False, precision=precision, seed=child_seed(seed, 5), + trainable=trainable, ) if self.update_style == "res_residual": self.g2_residual.append( @@ -770,6 +796,7 @@ def __init__( self.update_residual_init, precision=precision, seed=child_seed(seed, 6), + trainable=trainable, ) ) if self.update_g2_has_attn or self.update_h2: @@ -781,10 +808,15 @@ def __init__( self.smooth, precision=precision, seed=child_seed(seed, 7), + trainable=trainable, ) if self.update_g2_has_attn: self.attn2_mh_apply = Atten2MultiHeadApply( - g2_dim, attn2_nhead, precision=precision, seed=child_seed(seed, 8) + g2_dim, + attn2_nhead, + precision=precision, + seed=child_seed(seed, 8), + trainable=trainable, ) self.attn2_lm = LayerNorm( g2_dim, @@ -801,12 +833,17 @@ def __init__( self.update_residual_init, precision=precision, seed=child_seed(seed, 10), + trainable=trainable, ) ) if self.update_h2: self.attn2_ev_apply = Atten2EquiVarApply( - g2_dim, attn2_nhead, precision=precision, seed=child_seed(seed, 11) + g2_dim, + attn2_nhead, + precision=precision, + seed=child_seed(seed, 11), + trainable=trainable, ) if self.update_style == "res_residual": self.h2_residual.append( @@ -816,6 +853,7 @@ def __init__( self.update_residual_init, precision=precision, seed=child_seed(seed, 12), + trainable=trainable, ) ) if self.update_g1_has_attn: @@ -826,6 +864,7 @@ def __init__( self.smooth, precision=precision, seed=child_seed(seed, 13), + trainable=trainable, ) if self.update_style == "res_residual": self.g1_residual.append( @@ -835,6 +874,7 @@ def __init__( self.update_residual_init, precision=precision, seed=child_seed(seed, 14), + trainable=trainable, ) ) diff --git a/deepmd/pd/model/descriptor/repformers.py b/deepmd/pd/model/descriptor/repformers.py index 32f88dd1d3..0c197b3092 100644 --- a/deepmd/pd/model/descriptor/repformers.py +++ b/deepmd/pd/model/descriptor/repformers.py @@ -87,6 +87,7 @@ def __init__( use_sqrt_nnei: bool = True, g1_out_conv: bool = True, g1_out_mlp: bool = True, + trainable: bool = True, ) -> None: r""" The repformer descriptor block. @@ -173,6 +174,8 @@ def __init__( The epsilon value for layer normalization. seed : int, optional Random seed for parameter initialization. + trainable : bool, default: True + Whether this block is trainable """ super().__init__() self.rcut = float(rcut) @@ -223,7 +226,11 @@ def __init__( self.seed = seed self.g2_embd = MLPLayer( - 1, self.g2_dim, precision=precision, seed=child_seed(seed, 0) + 1, + self.g2_dim, + precision=precision, + seed=child_seed(seed, 0), + trainable=trainable, ) layers = [] for ii in range(nlayers): @@ -261,6 +268,7 @@ def __init__( g1_out_conv=self.g1_out_conv, g1_out_mlp=self.g1_out_mlp, seed=child_seed(child_seed(seed, 1), ii), + trainable=trainable, ) ) self.layers = paddle.nn.LayerList(layers) diff --git a/deepmd/pd/model/descriptor/se_a.py b/deepmd/pd/model/descriptor/se_a.py index 7b70a742ce..9cd9f7b0b7 100644 --- a/deepmd/pd/model/descriptor/se_a.py +++ b/deepmd/pd/model/descriptor/se_a.py @@ -481,6 +481,7 @@ def __init__( precision=self.precision, resnet_dt=self.resnet_dt, seed=child_seed(self.seed, ii), + trainable=trainable, ) self.filter_layers = filter_layers self.stats = None diff --git a/deepmd/pd/model/descriptor/se_atten.py b/deepmd/pd/model/descriptor/se_atten.py index 6bec47b12e..788ab211a7 100644 --- a/deepmd/pd/model/descriptor/se_atten.py +++ b/deepmd/pd/model/descriptor/se_atten.py @@ -81,6 +81,7 @@ def __init__( ln_eps: Optional[float] = 1e-5, seed: Optional[Union[int, list[int]]] = None, type: Optional[str] = None, + trainable: bool = True, ) -> None: r"""Construct an embedding net of type `se_atten`. @@ -146,6 +147,8 @@ def __init__( If not None, the scaling of attention weights is `temperature` itself. seed : int, Optional Random seed for parameter initialization. + trainable : bool, default: True + Whether this block is trainable """ super().__init__() del type @@ -205,6 +208,7 @@ def __init__( smooth=self.smooth, precision=self.precision, seed=child_seed(self.seed, 0), + trainable=trainable, ) wanted_shape = (self.ntypes, self.nnei, 4) @@ -229,6 +233,7 @@ def __init__( precision=self.precision, resnet_dt=self.resnet_dt, seed=child_seed(self.seed, 1), + trainable=trainable, ) self.filter_layers = filter_layers if self.tebd_input_mode in ["strip"]: @@ -242,6 +247,7 @@ def __init__( precision=self.precision, resnet_dt=self.resnet_dt, seed=child_seed(self.seed, 2), + trainable=trainable, ) self.filter_layers_strip = filter_layers_strip self.stats = None @@ -655,6 +661,7 @@ def __init__( smooth: bool = True, precision: str = DEFAULT_PRECISION, seed: Optional[Union[int, list[int]]] = None, + trainable: bool = True, ) -> None: """Construct a neighbor-wise attention net.""" super().__init__() @@ -690,6 +697,7 @@ def __init__( smooth=smooth, precision=precision, seed=child_seed(seed, i), + trainable=trainable, ) ) self.attention_layers = nn.LayerList(attention_layers) @@ -797,6 +805,7 @@ def __init__( ln_eps: float = 1e-5, precision: str = DEFAULT_PRECISION, seed: Optional[Union[int, list[int]]] = None, + trainable: bool = True, ) -> None: """Construct a neighbor-wise attention layer.""" super().__init__() @@ -824,6 +833,7 @@ def __init__( smooth=smooth, precision=precision, seed=child_seed(seed, 0), + trainable=trainable, ) self.attn_layer_norm = LayerNorm( self.embed_dim, @@ -904,6 +914,7 @@ def __init__( smooth: bool = True, precision: str = DEFAULT_PRECISION, seed: Optional[Union[int, list[int]]] = None, + trainable: bool = True, ) -> None: """Construct a multi-head neighbor-wise attention net.""" super().__init__() @@ -936,6 +947,7 @@ def __init__( stddev=1.0, precision=precision, seed=child_seed(seed, 0), + trainable=trainable, ) self.out_proj = MLPLayer( hidden_dim, @@ -946,6 +958,7 @@ def __init__( stddev=1.0, precision=precision, seed=child_seed(seed, 1), + trainable=trainable, ) def forward( diff --git a/deepmd/pd/model/descriptor/se_t_tebd.py b/deepmd/pd/model/descriptor/se_t_tebd.py index 2898283f0c..3ebf62d7a5 100644 --- a/deepmd/pd/model/descriptor/se_t_tebd.py +++ b/deepmd/pd/model/descriptor/se_t_tebd.py @@ -160,6 +160,7 @@ def __init__( env_protection=env_protection, smooth=smooth, seed=child_seed(seed, 1), + trainable=trainable, ) self.prec = PRECISION_DICT[precision] self.use_econf_tebd = use_econf_tebd @@ -173,6 +174,7 @@ def __init__( use_econf_tebd=use_econf_tebd, type_map=type_map, use_tebd_bias=use_tebd_bias, + trainable=trainable, ) self.tebd_dim = tebd_dim self.tebd_input_mode = tebd_input_mode @@ -529,6 +531,7 @@ def __init__( env_protection: float = 0.0, smooth: bool = True, seed: Optional[Union[int, list[int]]] = None, + trainable: bool = True, ) -> None: super().__init__() self.rcut = float(rcut) @@ -585,6 +588,7 @@ def __init__( precision=self.precision, resnet_dt=self.resnet_dt, seed=child_seed(self.seed, 1), + trainable=trainable, ) self.filter_layers = filter_layers if self.tebd_input_mode in ["strip"]: @@ -598,6 +602,7 @@ def __init__( precision=self.precision, resnet_dt=self.resnet_dt, seed=child_seed(self.seed, 2), + trainable=trainable, ) self.filter_layers_strip = filter_layers_strip self.stats = None diff --git a/deepmd/pd/model/network/mlp.py b/deepmd/pd/model/network/mlp.py index 41286fbbae..ee408b8719 100644 --- a/deepmd/pd/model/network/mlp.py +++ b/deepmd/pd/model/network/mlp.py @@ -85,8 +85,10 @@ def __init__( precision: str = DEFAULT_PRECISION, init: str = "default", seed: int | list[int] | None = None, + trainable: bool = True, ): super().__init__() + self.trainable = trainable # only use_timestep when skip connection is established. self.use_timestep = use_timestep and ( num_out == num_in or num_out == num_in * 2 @@ -251,6 +253,7 @@ def serialize(self) -> dict: activation_function=self.activate_name, resnet=self.resnet, precision=self.precision, + trainable=self.trainable, ) nl.w, nl.b, nl.idt = ( to_numpy_array(self.matrix), @@ -277,6 +280,7 @@ def deserialize(cls, data: dict) -> MLPLayer: activation_function=nl["activation_function"], resnet=nl["resnet"], precision=nl["precision"], + trainable=nl["trainable"], ) prec = PRECISION_DICT[obj.precision] diff --git a/deepmd/pd/model/network/network.py b/deepmd/pd/model/network/network.py index 9cdb7b3adc..320fc55eed 100644 --- a/deepmd/pd/model/network/network.py +++ b/deepmd/pd/model/network/network.py @@ -45,6 +45,7 @@ def __init__( use_econf_tebd=False, use_tebd_bias: bool = False, type_map=None, + trainable: bool = True, ) -> None: """Construct a type embedding net.""" super().__init__() @@ -65,6 +66,7 @@ def __init__( type_map=type_map, precision=precision, seed=seed, + trainable=trainable, ) # init.normal_(self.embedding.weight[:-1], mean=bavg, std=stddev) @@ -195,6 +197,7 @@ def __init__( self.precision, self.seed, bias=self.use_tebd_bias, + trainable=trainable, ) for param in self.parameters(): param.stop_gradient = not trainable diff --git a/deepmd/pt/model/descriptor/dpa1.py b/deepmd/pt/model/descriptor/dpa1.py index 9c1e144f48..16603dc75d 100644 --- a/deepmd/pt/model/descriptor/dpa1.py +++ b/deepmd/pt/model/descriptor/dpa1.py @@ -298,6 +298,7 @@ def __init__( trainable_ln=trainable_ln, ln_eps=ln_eps, seed=child_seed(seed, 1), + trainable=trainable, ) self.use_econf_tebd = use_econf_tebd self.use_tebd_bias = use_tebd_bias @@ -311,6 +312,7 @@ def __init__( use_econf_tebd=use_econf_tebd, use_tebd_bias=use_tebd_bias, type_map=type_map, + trainable=trainable, ) self.prec = PRECISION_DICT[precision] self.tebd_dim = tebd_dim diff --git a/deepmd/pt/model/descriptor/dpa2.py b/deepmd/pt/model/descriptor/dpa2.py index 969fdca5fc..0d6fbd84e5 100644 --- a/deepmd/pt/model/descriptor/dpa2.py +++ b/deepmd/pt/model/descriptor/dpa2.py @@ -188,6 +188,7 @@ def init_subclass_params(sub_data, sub_class): smooth=smooth, type_one_side=self.repinit_args.type_one_side, seed=child_seed(seed, 0), + trainable=trainable, ) self.use_three_body = self.repinit_args.use_three_body if self.use_three_body: @@ -207,6 +208,7 @@ def init_subclass_params(sub_data, sub_class): resnet_dt=self.repinit_args.resnet_dt, smooth=smooth, seed=child_seed(seed, 5), + trainable=trainable, ) else: self.repinit_three_body = None @@ -247,6 +249,7 @@ def init_subclass_params(sub_data, sub_class): g1_out_conv=self.repformer_args.g1_out_conv, g1_out_mlp=self.repformer_args.g1_out_mlp, seed=child_seed(seed, 1), + trainable=trainable, ) self.rcsl_list = [ (self.repformers.get_rcut(), self.repformers.get_nsel()), @@ -274,6 +277,7 @@ def init_subclass_params(sub_data, sub_class): use_econf_tebd=self.use_econf_tebd, use_tebd_bias=use_tebd_bias, type_map=type_map, + trainable=trainable, ) self.concat_output_tebd = concat_output_tebd self.precision = precision @@ -299,6 +303,7 @@ def init_subclass_params(sub_data, sub_class): precision=precision, init="glorot", seed=child_seed(seed, 3), + trainable=trainable, ) self.tebd_transform = None if self.add_tebd_to_repinit_out: @@ -308,6 +313,7 @@ def init_subclass_params(sub_data, sub_class): bias=False, precision=precision, seed=child_seed(seed, 4), + trainable=trainable, ) assert self.repinit.rcut > self.repformers.rcut assert self.repinit.sel[0] > self.repformers.sel[0] diff --git a/deepmd/pt/model/descriptor/dpa3.py b/deepmd/pt/model/descriptor/dpa3.py index 5d45c0633a..36b09230de 100644 --- a/deepmd/pt/model/descriptor/dpa3.py +++ b/deepmd/pt/model/descriptor/dpa3.py @@ -169,6 +169,7 @@ def init_subclass_params(sub_data, sub_class): env_protection=env_protection, precision=precision, seed=child_seed(seed, 1), + trainable=trainable, ) self.use_econf_tebd = use_econf_tebd @@ -184,6 +185,7 @@ def init_subclass_params(sub_data, sub_class): use_econf_tebd=self.use_econf_tebd, use_tebd_bias=use_tebd_bias, type_map=type_map, + trainable=trainable, ) self.concat_output_tebd = concat_output_tebd self.precision = precision diff --git a/deepmd/pt/model/descriptor/repflow_layer.py b/deepmd/pt/model/descriptor/repflow_layer.py index 37d4f07bb4..a52e5eba30 100644 --- a/deepmd/pt/model/descriptor/repflow_layer.py +++ b/deepmd/pt/model/descriptor/repflow_layer.py @@ -64,6 +64,7 @@ def __init__( update_residual_init: str = "const", precision: str = "float64", seed: Optional[Union[int, list[int]]] = None, + trainable: bool = True, ) -> None: super().__init__() self.epsilon = 1e-4 # protection of 1./nnei @@ -126,6 +127,7 @@ def __init__( n_dim, precision=precision, seed=child_seed(seed, 0), + trainable=trainable, ) if self.update_style == "res_residual": self.n_residual.append( @@ -135,6 +137,7 @@ def __init__( self.update_residual_init, precision=precision, seed=child_seed(seed, 1), + trainable=trainable, ) ) @@ -145,6 +148,7 @@ def __init__( n_dim, precision=precision, seed=child_seed(seed, 2), + trainable=trainable, ) if self.update_style == "res_residual": self.n_residual.append( @@ -154,6 +158,7 @@ def __init__( self.update_residual_init, precision=precision, seed=child_seed(seed, 3), + trainable=trainable, ) ) @@ -163,6 +168,7 @@ def __init__( self.n_multi_edge_message * n_dim, precision=precision, seed=child_seed(seed, 4), + trainable=trainable, ) if self.update_style == "res_residual": for head_index in range(self.n_multi_edge_message): @@ -173,6 +179,7 @@ def __init__( self.update_residual_init, precision=precision, seed=child_seed(child_seed(seed, 5), head_index), + trainable=trainable, ) ) @@ -182,6 +189,7 @@ def __init__( e_dim, precision=precision, seed=child_seed(seed, 6), + trainable=trainable, ) if self.update_style == "res_residual": self.e_residual.append( @@ -191,6 +199,7 @@ def __init__( self.update_residual_init, precision=precision, seed=child_seed(seed, 7), + trainable=trainable, ) ) @@ -219,6 +228,7 @@ def __init__( precision=precision, bias=False, seed=child_seed(seed, 8), + trainable=trainable, ) self.a_compress_e_linear = MLPLayer( self.e_dim, @@ -226,6 +236,7 @@ def __init__( precision=precision, bias=False, seed=child_seed(seed, 9), + trainable=trainable, ) else: self.a_compress_n_linear = None @@ -237,12 +248,14 @@ def __init__( self.e_dim, precision=precision, seed=child_seed(seed, 10), + trainable=trainable, ) self.edge_angle_linear2 = MLPLayer( self.e_dim, self.e_dim, precision=precision, seed=child_seed(seed, 11), + trainable=trainable, ) if self.update_style == "res_residual": self.e_residual.append( @@ -252,6 +265,7 @@ def __init__( self.update_residual_init, precision=precision, seed=child_seed(seed, 12), + trainable=trainable, ) ) @@ -261,6 +275,7 @@ def __init__( self.a_dim, precision=precision, seed=child_seed(seed, 13), + trainable=trainable, ) if self.update_style == "res_residual": self.a_residual.append( @@ -270,6 +285,7 @@ def __init__( self.update_residual_init, precision=precision, seed=child_seed(seed, 14), + trainable=trainable, ) ) else: diff --git a/deepmd/pt/model/descriptor/repflows.py b/deepmd/pt/model/descriptor/repflows.py index 5889b0a819..d6b38a7f20 100644 --- a/deepmd/pt/model/descriptor/repflows.py +++ b/deepmd/pt/model/descriptor/repflows.py @@ -181,6 +181,8 @@ class DescrptBlockRepflows(DescriptorBlock): For example, when using paddings, there may be zero distances of neighbors, which may make division by zero error during environment matrix calculations without protection. seed : int, optional Random seed for parameter initialization. + trainable : bool, default: True + Whether this block is trainable """ def __init__( @@ -219,6 +221,7 @@ def __init__( use_loc_mapping: bool = True, optim_update: bool = True, seed: Optional[Union[int, list[int]]] = None, + trainable: bool = True, ) -> None: super().__init__() self.e_rcut = float(e_rcut) @@ -283,10 +286,19 @@ def __init__( self.seed = seed self.edge_embd = MLPLayer( - 1, self.e_dim, precision=precision, seed=child_seed(seed, 0) + 1, + self.e_dim, + precision=precision, + seed=child_seed(seed, 0), + trainable=trainable, ) self.angle_embd = MLPLayer( - 1, self.a_dim, precision=precision, bias=False, seed=child_seed(seed, 1) + 1, + self.a_dim, + precision=precision, + bias=False, + seed=child_seed(seed, 1), + trainable=trainable, ) layers = [] for ii in range(nlayers): @@ -318,6 +330,7 @@ def __init__( sel_reduce_factor=self.sel_reduce_factor, smooth_edge_update=self.smooth_edge_update, seed=child_seed(child_seed(seed, 1), ii), + trainable=trainable, ) ) self.layers = torch.nn.ModuleList(layers) diff --git a/deepmd/pt/model/descriptor/repformer_layer.py b/deepmd/pt/model/descriptor/repformer_layer.py index 1e2cba66d6..9715b7479b 100644 --- a/deepmd/pt/model/descriptor/repformer_layer.py +++ b/deepmd/pt/model/descriptor/repformer_layer.py @@ -160,6 +160,7 @@ def __init__( attnw_shift: float = 20.0, precision: str = "float64", seed: Optional[Union[int, list[int]]] = None, + trainable: bool = True, ) -> None: """Return neighbor-wise multi-head self-attention maps, with gate mechanism.""" super().__init__() @@ -172,6 +173,7 @@ def __init__( bias=False, precision=precision, seed=seed, + trainable=trainable, ) self.has_gate = has_gate self.smooth = smooth @@ -285,6 +287,7 @@ def __init__( head_num: int, precision: str = "float64", seed: Optional[Union[int, list[int]]] = None, + trainable: bool = True, ) -> None: super().__init__() self.input_dim = input_dim @@ -295,12 +298,14 @@ def __init__( bias=False, precision=precision, seed=child_seed(seed, 0), + trainable=trainable, ) self.head_map = MLPLayer( input_dim * head_num, input_dim, precision=precision, seed=child_seed(seed, 1), + trainable=trainable, ) self.precision = precision @@ -370,12 +375,18 @@ def __init__( head_num: int, precision: str = "float64", seed: Optional[Union[int, list[int]]] = None, + trainable: bool = True, ) -> None: super().__init__() self.input_dim = input_dim self.head_num = head_num self.head_map = MLPLayer( - head_num, 1, bias=False, precision=precision, seed=seed + head_num, + 1, + bias=False, + precision=precision, + seed=seed, + trainable=trainable, ) self.precision = precision @@ -443,6 +454,7 @@ def __init__( attnw_shift: float = 20.0, precision: str = "float64", seed: Optional[Union[int, list[int]]] = None, + trainable: bool = True, ) -> None: super().__init__() self.input_dim = input_dim @@ -454,6 +466,7 @@ def __init__( bias=False, precision=precision, seed=child_seed(seed, 0), + trainable=trainable, ) self.mapkv = MLPLayer( input_dim, @@ -461,12 +474,14 @@ def __init__( bias=False, precision=precision, seed=child_seed(seed, 1), + trainable=trainable, ) self.head_map = MLPLayer( input_dim * head_num, input_dim, precision=precision, seed=child_seed(seed, 2), + trainable=trainable, ) self.smooth = smooth self.attnw_shift = attnw_shift @@ -602,6 +617,7 @@ def __init__( g1_out_conv: bool = True, g1_out_mlp: bool = True, seed: Optional[Union[int, list[int]]] = None, + trainable: bool = True, ) -> None: super().__init__() self.epsilon = 1e-4 # protection of 1./nnei @@ -662,6 +678,7 @@ def __init__( self.update_residual_init, precision=precision, seed=child_seed(seed, 0), + trainable=trainable, ) ) @@ -671,6 +688,7 @@ def __init__( g1_dim, precision=precision, seed=child_seed(seed, 1), + trainable=trainable, ) self.linear2 = None self.proj_g1g2 = None @@ -687,6 +705,7 @@ def __init__( g2_dim, precision=precision, seed=child_seed(seed, 2), + trainable=trainable, ) if self.update_style == "res_residual": self.g2_residual.append( @@ -696,6 +715,7 @@ def __init__( self.update_residual_init, precision=precision, seed=child_seed(seed, 3), + trainable=trainable, ) ) if self.g1_out_mlp: @@ -704,6 +724,7 @@ def __init__( g1_dim, precision=precision, seed=child_seed(seed, 15), + trainable=trainable, ) if self.update_style == "res_residual": self.g1_residual.append( @@ -713,6 +734,7 @@ def __init__( self.update_residual_init, precision=precision, seed=child_seed(seed, 16), + trainable=trainable, ) ) else: @@ -725,6 +747,7 @@ def __init__( bias=False, precision=precision, seed=child_seed(seed, 4), + trainable=trainable, ) else: self.proj_g1g2 = MLPLayer( @@ -733,6 +756,7 @@ def __init__( bias=False, precision=precision, seed=child_seed(seed, 4), + trainable=trainable, ) if self.update_style == "res_residual": self.g1_residual.append( @@ -742,6 +766,7 @@ def __init__( self.update_residual_init, precision=precision, seed=child_seed(seed, 17), + trainable=trainable, ) ) if self.update_g2_has_g1g1: @@ -751,6 +776,7 @@ def __init__( bias=False, precision=precision, seed=child_seed(seed, 5), + trainable=trainable, ) if self.update_style == "res_residual": self.g2_residual.append( @@ -760,6 +786,7 @@ def __init__( self.update_residual_init, precision=precision, seed=child_seed(seed, 6), + trainable=trainable, ) ) if self.update_g2_has_attn or self.update_h2: @@ -771,10 +798,15 @@ def __init__( self.smooth, precision=precision, seed=child_seed(seed, 7), + trainable=trainable, ) if self.update_g2_has_attn: self.attn2_mh_apply = Atten2MultiHeadApply( - g2_dim, attn2_nhead, precision=precision, seed=child_seed(seed, 8) + g2_dim, + attn2_nhead, + precision=precision, + seed=child_seed(seed, 8), + trainable=trainable, ) self.attn2_lm = LayerNorm( g2_dim, @@ -791,12 +823,17 @@ def __init__( self.update_residual_init, precision=precision, seed=child_seed(seed, 10), + trainable=trainable, ) ) if self.update_h2: self.attn2_ev_apply = Atten2EquiVarApply( - g2_dim, attn2_nhead, precision=precision, seed=child_seed(seed, 11) + g2_dim, + attn2_nhead, + precision=precision, + seed=child_seed(seed, 11), + trainable=trainable, ) if self.update_style == "res_residual": self.h2_residual.append( @@ -806,6 +843,7 @@ def __init__( self.update_residual_init, precision=precision, seed=child_seed(seed, 12), + trainable=trainable, ) ) if self.update_g1_has_attn: @@ -816,6 +854,7 @@ def __init__( self.smooth, precision=precision, seed=child_seed(seed, 13), + trainable=trainable, ) if self.update_style == "res_residual": self.g1_residual.append( @@ -825,6 +864,7 @@ def __init__( self.update_residual_init, precision=precision, seed=child_seed(seed, 14), + trainable=trainable, ) ) diff --git a/deepmd/pt/model/descriptor/repformers.py b/deepmd/pt/model/descriptor/repformers.py index 82773d1a78..022c7510df 100644 --- a/deepmd/pt/model/descriptor/repformers.py +++ b/deepmd/pt/model/descriptor/repformers.py @@ -111,6 +111,7 @@ def __init__( use_sqrt_nnei: bool = True, g1_out_conv: bool = True, g1_out_mlp: bool = True, + trainable: bool = True, ) -> None: r""" The repformer descriptor block. @@ -197,6 +198,8 @@ def __init__( The epsilon value for layer normalization. seed : int, optional Random seed for parameter initialization. + trainable : bool + Whether the block is trainable """ super().__init__() self.rcut = float(rcut) @@ -247,7 +250,11 @@ def __init__( self.seed = seed self.g2_embd = MLPLayer( - 1, self.g2_dim, precision=precision, seed=child_seed(seed, 0) + 1, + self.g2_dim, + precision=precision, + seed=child_seed(seed, 0), + trainable=trainable, ) layers = [] for ii in range(nlayers): @@ -285,6 +292,7 @@ def __init__( g1_out_conv=self.g1_out_conv, g1_out_mlp=self.g1_out_mlp, seed=child_seed(child_seed(seed, 1), ii), + trainable=trainable, ) ) self.layers = torch.nn.ModuleList(layers) diff --git a/deepmd/pt/model/descriptor/se_a.py b/deepmd/pt/model/descriptor/se_a.py index fc3e14bd25..f49b5a1276 100644 --- a/deepmd/pt/model/descriptor/se_a.py +++ b/deepmd/pt/model/descriptor/se_a.py @@ -525,6 +525,7 @@ def __init__( precision=self.precision, resnet_dt=self.resnet_dt, seed=child_seed(self.seed, ii), + trainable=trainable, ) self.filter_layers = filter_layers self.stats = None diff --git a/deepmd/pt/model/descriptor/se_atten.py b/deepmd/pt/model/descriptor/se_atten.py index 1ce6ad4583..27c5716919 100644 --- a/deepmd/pt/model/descriptor/se_atten.py +++ b/deepmd/pt/model/descriptor/se_atten.py @@ -100,6 +100,7 @@ def __init__( ln_eps: Optional[float] = 1e-5, seed: Optional[Union[int, list[int]]] = None, type: Optional[str] = None, + trainable: bool = True, ) -> None: r"""Construct an embedding net of type `se_atten`. @@ -165,6 +166,8 @@ def __init__( If not None, the scaling of attention weights is `temperature` itself. seed : int, Optional Random seed for parameter initialization. + trainable : bool, default: True + Whether this block is trainable """ super().__init__() del type @@ -224,6 +227,7 @@ def __init__( smooth=self.smooth, precision=self.precision, seed=child_seed(self.seed, 0), + trainable=trainable, ) wanted_shape = (self.ntypes, self.nnei, 4) @@ -248,6 +252,7 @@ def __init__( precision=self.precision, resnet_dt=self.resnet_dt, seed=child_seed(self.seed, 1), + trainable=trainable, ) self.filter_layers = filter_layers if self.tebd_input_mode in ["strip"]: @@ -261,6 +266,7 @@ def __init__( precision=self.precision, resnet_dt=self.resnet_dt, seed=child_seed(self.seed, 2), + trainable=trainable, ) self.filter_layers_strip = filter_layers_strip self.stats = None @@ -680,6 +686,7 @@ def __init__( smooth: bool = True, precision: str = DEFAULT_PRECISION, seed: Optional[Union[int, list[int]]] = None, + trainable: bool = True, ) -> None: """Construct a neighbor-wise attention net.""" super().__init__() @@ -715,6 +722,7 @@ def __init__( smooth=smooth, precision=precision, seed=child_seed(seed, i), + trainable=trainable, ) ) self.attention_layers = nn.ModuleList(attention_layers) @@ -823,6 +831,7 @@ def __init__( ln_eps: float = 1e-5, precision: str = DEFAULT_PRECISION, seed: Optional[Union[int, list[int]]] = None, + trainable: bool = True, ) -> None: """Construct a neighbor-wise attention layer.""" super().__init__() @@ -850,6 +859,7 @@ def __init__( smooth=smooth, precision=precision, seed=child_seed(seed, 0), + trainable=trainable, ) self.attn_layer_norm = LayerNorm( self.embed_dim, @@ -930,6 +940,7 @@ def __init__( smooth: bool = True, precision: str = DEFAULT_PRECISION, seed: Optional[Union[int, list[int]]] = None, + trainable: bool = True, ) -> None: """Construct a multi-head neighbor-wise attention net.""" super().__init__() @@ -962,6 +973,7 @@ def __init__( stddev=1.0, precision=precision, seed=child_seed(seed, 0), + trainable=trainable, ) self.out_proj = MLPLayer( hidden_dim, @@ -972,6 +984,7 @@ def __init__( stddev=1.0, precision=precision, seed=child_seed(seed, 1), + trainable=trainable, ) def forward( diff --git a/deepmd/pt/model/descriptor/se_r.py b/deepmd/pt/model/descriptor/se_r.py index a91757460c..9ce92fb8b4 100644 --- a/deepmd/pt/model/descriptor/se_r.py +++ b/deepmd/pt/model/descriptor/se_r.py @@ -142,6 +142,7 @@ def __init__( precision=self.precision, resnet_dt=self.resnet_dt, seed=child_seed(self.seed, ii), + trainable=trainable, ) self.filter_layers = filter_layers self.stats = None diff --git a/deepmd/pt/model/descriptor/se_t.py b/deepmd/pt/model/descriptor/se_t.py index 6e075a04e4..f3bd0f65ef 100644 --- a/deepmd/pt/model/descriptor/se_t.py +++ b/deepmd/pt/model/descriptor/se_t.py @@ -575,6 +575,7 @@ def __init__( precision=self.precision, resnet_dt=self.resnet_dt, seed=child_seed(self.seed, ii), + trainable=trainable, ) self.filter_layers = filter_layers self.stats = None diff --git a/deepmd/pt/model/descriptor/se_t_tebd.py b/deepmd/pt/model/descriptor/se_t_tebd.py index 7e27805bd5..3ee7929151 100644 --- a/deepmd/pt/model/descriptor/se_t_tebd.py +++ b/deepmd/pt/model/descriptor/se_t_tebd.py @@ -160,6 +160,7 @@ def __init__( env_protection=env_protection, smooth=smooth, seed=child_seed(seed, 1), + trainable=trainable, ) self.prec = PRECISION_DICT[precision] self.use_econf_tebd = use_econf_tebd @@ -170,6 +171,7 @@ def __init__( tebd_dim, precision=precision, seed=child_seed(seed, 2), + trainable=trainable, use_econf_tebd=use_econf_tebd, type_map=type_map, use_tebd_bias=use_tebd_bias, @@ -525,6 +527,7 @@ def __init__( env_protection: float = 0.0, smooth: bool = True, seed: Optional[Union[int, list[int]]] = None, + trainable: bool = True, ) -> None: super().__init__() self.rcut = float(rcut) @@ -577,6 +580,7 @@ def __init__( precision=self.precision, resnet_dt=self.resnet_dt, seed=child_seed(self.seed, 1), + trainable=trainable, ) self.filter_layers = filter_layers if self.tebd_input_mode in ["strip"]: @@ -590,6 +594,7 @@ def __init__( precision=self.precision, resnet_dt=self.resnet_dt, seed=child_seed(self.seed, 2), + trainable=trainable, ) self.filter_layers_strip = filter_layers_strip self.stats = None diff --git a/deepmd/pt/model/network/mlp.py b/deepmd/pt/model/network/mlp.py index 22675d6163..ea07f617d4 100644 --- a/deepmd/pt/model/network/mlp.py +++ b/deepmd/pt/model/network/mlp.py @@ -83,8 +83,10 @@ def __init__( precision: str = DEFAULT_PRECISION, init: str = "default", seed: Optional[Union[int, list[int]]] = None, + trainable: bool = True, ) -> None: super().__init__() + self.trainable = trainable # only use_timestep when skip connection is established. self.use_timestep = use_timestep and ( num_out == num_in or num_out == num_in * 2 @@ -233,6 +235,7 @@ def serialize(self) -> dict: activation_function=self.activate_name, resnet=self.resnet, precision=self.precision, + trainable=self.trainable, ) nl.w, nl.b, nl.idt = ( to_numpy_array(self.matrix), @@ -259,6 +262,7 @@ def deserialize(cls, data: dict) -> "MLPLayer": activation_function=nl["activation_function"], resnet=nl["resnet"], precision=nl["precision"], + trainable=nl["trainable"], ) prec = PRECISION_DICT[obj.precision] diff --git a/deepmd/pt/model/network/network.py b/deepmd/pt/model/network/network.py index ab01a90774..71f335e446 100644 --- a/deepmd/pt/model/network/network.py +++ b/deepmd/pt/model/network/network.py @@ -253,6 +253,7 @@ def __init__( use_econf_tebd=False, use_tebd_bias: bool = False, type_map=None, + trainable: bool = True, ) -> None: """Construct a type embedding net.""" super().__init__() @@ -273,6 +274,7 @@ def __init__( type_map=type_map, precision=precision, seed=seed, + trainable=trainable, ) # nn.init.normal_(self.embedding.weight[:-1], mean=bavg, std=stddev) diff --git a/deepmd/pt/model/task/fitting.py b/deepmd/pt/model/task/fitting.py index 0865b61f52..6d0f3041dc 100644 --- a/deepmd/pt/model/task/fitting.py +++ b/deepmd/pt/model/task/fitting.py @@ -320,6 +320,7 @@ def __init__( self.precision, bias_out=True, seed=child_seed(self.seed, ii), + trainable=trainable, ) for ii in range(self.ntypes if not self.mixed_types else 1) ], diff --git a/deepmd/tf/descriptor/se.py b/deepmd/tf/descriptor/se.py index 2863704143..5b04c5ba00 100644 --- a/deepmd/tf/descriptor/se.py +++ b/deepmd/tf/descriptor/se.py @@ -192,6 +192,7 @@ def serialize_network( resnet_dt: bool, variables: dict, excluded_types: set[tuple[int, int]] = set(), + trainable: bool = True, suffix: str = "", ) -> dict: """Serialize network. @@ -214,6 +215,8 @@ def serialize_network( The input variables excluded_types : set[tuple[int, int]], optional The excluded types + trainable : bool + Whether the network is trainable suffix : str, optional The suffix of the scope @@ -236,6 +239,7 @@ def serialize_network( activation_function=activation_function, resnet_dt=resnet_dt, precision=self.precision.name, + trainable=trainable, ) embeddings[(type_j, type_i)] = EmbeddingNet( in_dim=in_dim, @@ -243,6 +247,7 @@ def serialize_network( activation_function=activation_function, resnet_dt=resnet_dt, precision=self.precision.name, + trainable=trainable, ) embeddings[(type_i, type_j)].clear() embeddings[(type_j, type_i)].clear() @@ -278,6 +283,7 @@ def serialize_network( activation_function=activation_function, resnet_dt=resnet_dt, precision=self.precision.name, + trainable=trainable, ) assert embeddings[network_idx] is not None if weight_name == "idt": diff --git a/deepmd/tf/descriptor/se_a.py b/deepmd/tf/descriptor/se_a.py index e3ae7bf99c..ba21925b13 100644 --- a/deepmd/tf/descriptor/se_a.py +++ b/deepmd/tf/descriptor/se_a.py @@ -1462,6 +1462,7 @@ def serialize(self, suffix: str = "") -> dict: resnet_dt=self.filter_resnet_dt, variables=self.embedding_net_variables, excluded_types=self.exclude_types, + trainable=self.trainable, suffix=suffix, ), "env_mat": EnvMat(self.rcut_r, self.rcut_r_smth).serialize(), diff --git a/deepmd/tf/descriptor/se_atten.py b/deepmd/tf/descriptor/se_atten.py index 3a9b86a0d6..002e7bd3d3 100644 --- a/deepmd/tf/descriptor/se_atten.py +++ b/deepmd/tf/descriptor/se_atten.py @@ -1593,6 +1593,7 @@ def serialize_attention_layers( bias=bias, use_timestep=False, precision=self.precision.name, + trainable=self.trainable, ) matrix_list = [ attention_layer_params[layer_idx][key]["matrix"] @@ -1611,6 +1612,7 @@ def serialize_attention_layers( bias=bias, use_timestep=False, precision=self.precision.name, + trainable=self.trainable, ) out_proj["matrix"] = attention_layer_params[layer_idx]["c_out"]["matrix"] if bias: @@ -1654,6 +1656,7 @@ def serialize_network_strip( variables: dict, suffix: str = "", type_one_side: bool = False, + trainable: bool = True, ) -> dict: """Serialize network. @@ -1679,6 +1682,8 @@ def serialize_network_strip( If 'False', type embeddings of both neighbor and central atoms are considered. If 'True', only type embeddings of neighbor atoms are considered. Default is 'False'. + trainable : bool + Whether the network is trainable Returns ------- @@ -1719,6 +1724,7 @@ def serialize_network_strip( activation_function=activation_function, resnet_dt=resnet_dt, precision=self.precision.name, + trainable=trainable, ) assert embeddings[network_idx] is not None if weight_name == "idt": @@ -1983,6 +1989,7 @@ def serialize(self, suffix: str = "") -> dict: resnet_dt=self.filter_resnet_dt, variables=self.embedding_net_variables, excluded_types=self.exclude_types, + trainable=self.trainable, suffix=suffix, ), "attention_layers": self.serialize_attention_layers( @@ -2032,6 +2039,7 @@ def serialize(self, suffix: str = "") -> dict: variables=self.two_side_embeeding_net_variables, suffix=suffix, type_one_side=self.type_one_side, + trainable=self.trainable, ) } ) diff --git a/deepmd/tf/descriptor/se_r.py b/deepmd/tf/descriptor/se_r.py index ed66d6ad25..9613a9fa9b 100644 --- a/deepmd/tf/descriptor/se_r.py +++ b/deepmd/tf/descriptor/se_r.py @@ -795,6 +795,7 @@ def serialize(self, suffix: str = "") -> dict: resnet_dt=self.filter_resnet_dt, variables=self.embedding_net_variables, excluded_types=self.exclude_types, + trainable=self.trainable, suffix=suffix, ), "env_mat": EnvMat(self.rcut, self.rcut_smth).serialize(), diff --git a/deepmd/tf/descriptor/se_t.py b/deepmd/tf/descriptor/se_t.py index c5d50744af..ec6a1122d6 100644 --- a/deepmd/tf/descriptor/se_t.py +++ b/deepmd/tf/descriptor/se_t.py @@ -726,6 +726,7 @@ def serialize_network( resnet_dt: bool, variables: dict, excluded_types: set[tuple[int, int]] = set(), + trainable: bool = True, suffix: str = "", ) -> dict: """Serialize network. @@ -748,6 +749,8 @@ def serialize_network( The input variables excluded_types : set[tuple[int, int]], optional The excluded types + trainable : bool, optional + Whether the network is trainable suffix : str, optional The suffix of the scope @@ -771,6 +774,7 @@ def clear_ij(type_i, type_j) -> None: activation_function=activation_function, resnet_dt=resnet_dt, precision=self.precision.name, + trainable=trainable, ) embeddings[(type_i, type_j)].clear() @@ -805,6 +809,7 @@ def clear_ij(type_i, type_j) -> None: activation_function=activation_function, resnet_dt=resnet_dt, precision=self.precision.name, + trainable=trainable, ) assert embeddings[network_idx] is not None if weight_name == "idt": @@ -941,6 +946,7 @@ def serialize(self, suffix: str = "") -> dict: resnet_dt=self.filter_resnet_dt, variables=self.embedding_net_variables, excluded_types=self.exclude_types, + trainable=self.trainable, suffix=suffix, ), "env_mat": EnvMat(self.rcut_r, self.rcut_r_smth).serialize(), diff --git a/deepmd/tf/fit/dipole.py b/deepmd/tf/fit/dipole.py index 4428d06536..d9cb0002cb 100644 --- a/deepmd/tf/fit/dipole.py +++ b/deepmd/tf/fit/dipole.py @@ -75,6 +75,10 @@ class DipoleFittingSeA(Fitting): different fitting nets for different atom types. type_map: list[str], Optional A list of strings. Give the name to each type of atoms. + trainable : list[bool], Optional + If the weights of fitting net are trainable. + Suppose that we have :math:`N_l` hidden layers in the fitting net, + this list is of length :math:`N_l + 1`, specifying if the hidden layers and the output layer are trainable. """ def __init__( @@ -94,6 +98,7 @@ def __init__( uniform_seed: bool = False, mixed_types: bool = False, type_map: Optional[list[str]] = None, # to be compat with input + trainable: Optional[list[bool]] = None, **kwargs, ) -> None: """Constructor.""" @@ -135,6 +140,15 @@ def __init__( self.aparam_avg = None self.aparam_std = None self.aparam_inv_std = None + if trainable is None: + self.trainable = [True for _ in range(len(self.n_neuron) + 1)] + elif isinstance(trainable, bool): + self.trainable = [trainable] * (len(self.n_neuron) + 1) + else: + self.trainable = trainable + assert len(self.trainable) == len(self.n_neuron) + 1, ( + "length of trainable should be that of n_neuron + 1" + ) def get_sel_type(self) -> int: """Get selected type.""" @@ -166,6 +180,7 @@ def _build_lower(self, start_index, natoms, inputs, rot_mat, suffix="", reuse=No uniform_seed=self.uniform_seed, initial_variables=self.fitting_net_variables, mixed_prec=self.mixed_prec, + trainable=self.trainable[ii], ) else: layer = one_layer( @@ -179,6 +194,7 @@ def _build_lower(self, start_index, natoms, inputs, rot_mat, suffix="", reuse=No uniform_seed=self.uniform_seed, initial_variables=self.fitting_net_variables, mixed_prec=self.mixed_prec, + trainable=self.trainable[ii], ) if (not self.uniform_seed) and (self.seed is not None): self.seed += self.seed_shift @@ -195,6 +211,7 @@ def _build_lower(self, start_index, natoms, inputs, rot_mat, suffix="", reuse=No initial_variables=self.fitting_net_variables, mixed_prec=self.mixed_prec, final_layer=True, + trainable=self.trainable[-1], ) if (not self.uniform_seed) and (self.seed is not None): self.seed += self.seed_shift @@ -414,6 +431,7 @@ def serialize(self, suffix: str) -> dict: activation_function=self.activation_function_name, resnet_dt=self.resnet_dt, variables=self.fitting_net_variables, + trainable=self.trainable, suffix=suffix, ), "type_map": self.type_map, diff --git a/deepmd/tf/fit/dos.py b/deepmd/tf/fit/dos.py index 8fa3167bfc..96e9470692 100644 --- a/deepmd/tf/fit/dos.py +++ b/deepmd/tf/fit/dos.py @@ -730,6 +730,7 @@ def serialize(self, suffix: str = "") -> dict: activation_function=self.activation_function, resnet_dt=self.resnet_dt, variables=self.fitting_net_variables, + trainable=self.trainable, suffix=suffix, ), "@variables": { diff --git a/deepmd/tf/fit/ener.py b/deepmd/tf/fit/ener.py index e10468df32..2458081a88 100644 --- a/deepmd/tf/fit/ener.py +++ b/deepmd/tf/fit/ener.py @@ -944,6 +944,7 @@ def serialize(self, suffix: str = "") -> dict: activation_function=self.activation_function_name, resnet_dt=self.resnet_dt, variables=self.fitting_net_variables, + trainable=self.trainable, suffix=suffix, ), "@variables": { diff --git a/deepmd/tf/fit/fitting.py b/deepmd/tf/fit/fitting.py index f159de1628..4f7436a52c 100644 --- a/deepmd/tf/fit/fitting.py +++ b/deepmd/tf/fit/fitting.py @@ -135,6 +135,7 @@ def serialize_network( resnet_dt: bool, variables: dict, out_dim: Optional[int] = 1, + trainable: Optional[list[bool]] = None, suffix: str = "", ) -> dict: """Serialize network. @@ -155,6 +156,8 @@ def serialize_network( Whether to use resnet variables : dict The input variables + trainable : list[bool] + Whether the network is trainable suffix : str, optional The suffix of the scope out_dim : int, optional @@ -191,6 +194,8 @@ def serialize_network( raise ValueError(f"Invalid ndim: {ndim}") if fittings[network_idx] is None: # initialize the network if it is not initialized + if trainable is None: + trainable = [True for _ in range(len(neuron) + 1)] fittings[network_idx] = FittingNet( in_dim=in_dim, out_dim=out_dim, @@ -199,6 +204,7 @@ def serialize_network( resnet_dt=resnet_dt, precision=self.precision.name, bias_out=True, + trainable=trainable, ) assert fittings[network_idx] is not None if weight_name == "idt": diff --git a/deepmd/tf/fit/polar.py b/deepmd/tf/fit/polar.py index c8fd4e86e8..c44af58a5a 100644 --- a/deepmd/tf/fit/polar.py +++ b/deepmd/tf/fit/polar.py @@ -90,6 +90,10 @@ class PolarFittingSeA(Fitting): different fitting nets for different atom types. type_map: list[str], Optional A list of strings. Give the name to each type of atoms. + trainable : list[bool], Optional + If the weights of fitting net are trainable. + Suppose that we have :math:`N_l` hidden layers in the fitting net, + this list is of length :math:`N_l + 1`, specifying if the hidden layers and the output layer are trainable. """ def __init__( @@ -113,6 +117,7 @@ def __init__( uniform_seed: bool = False, mixed_types: bool = False, type_map: Optional[list[str]] = None, # to be compat with input + trainable: Optional[list[bool]] = None, **kwargs, ) -> None: """Constructor.""" @@ -182,6 +187,15 @@ def __init__( self.aparam_avg = None self.aparam_std = None self.aparam_inv_std = None + if trainable is None: + self.trainable = [True for _ in range(len(self.n_neuron) + 1)] + elif isinstance(trainable, bool): + self.trainable = [trainable] * (len(self.n_neuron) + 1) + else: + self.trainable = trainable + assert len(self.trainable) == len(self.n_neuron) + 1, ( + "length of trainable should be that of n_neuron + 1" + ) def get_sel_type(self) -> list[int]: """Get selected atom types.""" @@ -312,6 +326,7 @@ def _build_lower(self, start_index, natoms, inputs, rot_mat, suffix="", reuse=No uniform_seed=self.uniform_seed, initial_variables=self.fitting_net_variables, mixed_prec=self.mixed_prec, + trainable=self.trainable[ii], ) else: layer = one_layer( @@ -325,6 +340,7 @@ def _build_lower(self, start_index, natoms, inputs, rot_mat, suffix="", reuse=No uniform_seed=self.uniform_seed, initial_variables=self.fitting_net_variables, mixed_prec=self.mixed_prec, + trainable=self.trainable[ii], ) if (not self.uniform_seed) and (self.seed is not None): self.seed += self.seed_shift @@ -347,6 +363,7 @@ def _build_lower(self, start_index, natoms, inputs, rot_mat, suffix="", reuse=No initial_variables=self.fitting_net_variables, mixed_prec=self.mixed_prec, final_layer=True, + trainable=self.trainable[-1], ) if (not self.uniform_seed) and (self.seed is not None): self.seed += self.seed_shift @@ -638,6 +655,7 @@ def serialize(self, suffix: str) -> dict: activation_function=self.activation_function_name, resnet_dt=self.resnet_dt, variables=self.fitting_net_variables, + trainable=self.trainable, suffix=suffix, ), "@variables": { diff --git a/source/tests/consistent/descriptor/test_dpa1.py b/source/tests/consistent/descriptor/test_dpa1.py index db5fe4dae0..d31cf289b9 100644 --- a/source/tests/consistent/descriptor/test_dpa1.py +++ b/source/tests/consistent/descriptor/test_dpa1.py @@ -127,6 +127,7 @@ def data(self) -> dict: "use_tebd_bias": use_tebd_bias, "type_map": ["O", "H"] if use_econf_tebd else None, "seed": 1145141919810, + "trainable": False, } def is_meaningless_zero_attention_layer_tests( diff --git a/source/tests/consistent/descriptor/test_dpa2.py b/source/tests/consistent/descriptor/test_dpa2.py index ef840bf9d7..6864d91f26 100644 --- a/source/tests/consistent/descriptor/test_dpa2.py +++ b/source/tests/consistent/descriptor/test_dpa2.py @@ -181,7 +181,7 @@ def data(self) -> dict: "smooth": smooth, "exclude_types": exclude_types, "env_protection": 0.0, - "trainable": True, + "trainable": False, "use_econf_tebd": use_econf_tebd, "use_tebd_bias": use_tebd_bias, "type_map": ["O", "H"] if use_econf_tebd else None, diff --git a/source/tests/consistent/descriptor/test_dpa3.py b/source/tests/consistent/descriptor/test_dpa3.py index b99117b9e7..47cc4e1004 100644 --- a/source/tests/consistent/descriptor/test_dpa3.py +++ b/source/tests/consistent/descriptor/test_dpa3.py @@ -130,7 +130,7 @@ def data(self) -> dict: "exclude_types": exclude_types, "env_protection": 0.0, "use_loc_mapping": use_loc_mapping, - "trainable": True, + "trainable": False, } @property