From c0bc1016379b7d3ac06f8ae149ec891032823a20 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Mon, 7 Mar 2022 18:45:49 -0500
Subject: [PATCH 01/29] add the deepmodeling banner to doc (#1529)

---
 doc/conf.py | 6 ++++--
 setup.py    | 2 +-
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/doc/conf.py b/doc/conf.py
index 3803691044..5dc12be1f7 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -15,6 +15,7 @@
 import sys
 import recommonmark
 from recommonmark.transform import AutoStructify
+from datetime import date
 
 def mkindex(dirname):
     dirname = dirname + "/"
@@ -106,8 +107,8 @@ def classify_index_TS():
 # -- Project information -----------------------------------------------------
 
 project = 'DeePMD-kit'
-copyright = '2017-2021, Deep Modeling'
-author = 'Deep Modeling'
+copyright = '2017-%d, DeepModeling' % date.today().year
+author = 'DeepModeling'
 
 def run_doxygen(folder):
     """Run the doxygen make command in the designated folder"""
@@ -167,6 +168,7 @@ def setup(app):
 #classify_index_TS()
 
 extensions = [
+    "deepmodeling_sphinx",
     "sphinx_rtd_theme",
     'myst_parser',
     'sphinx.ext.autosummary',
diff --git a/setup.py b/setup.py
index f843395ed3..2623f0f149 100644
--- a/setup.py
+++ b/setup.py
@@ -127,7 +127,7 @@
     cmake_minimum_required_version="3.0",
     extras_require={
         "test": ["dpdata>=0.1.9", "ase", "pytest", "pytest-cov", "pytest-sugar"],
-        "docs": ["sphinx>=3.1.1", "recommonmark", "sphinx_rtd_theme>=1.0.0rc1", "sphinx_markdown_tables", "myst-parser", "breathe", "exhale", "numpydoc", "ase"],
+        "docs": ["sphinx>=3.1.1", "recommonmark", "sphinx_rtd_theme>=1.0.0rc1", "sphinx_markdown_tables", "myst-parser", "breathe", "exhale", "numpydoc", "ase", "deepmodeling-sphinx"],
         **extras_require,
     },
     entry_points={"console_scripts": ["dp = deepmd.entrypoints.main:main"]},

From 69989b4c948b24ab885e2743bd0cf51e24fa85c2 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Mon, 7 Mar 2022 19:09:59 -0500
Subject: [PATCH 02/29] fix the name of deeptensor/atom and dplr plugin (#1548)

---
 source/lmp/plugin/deepmdplugin.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/source/lmp/plugin/deepmdplugin.cpp b/source/lmp/plugin/deepmdplugin.cpp
index ace1dc93bd..bc54b8ea46 100644
--- a/source/lmp/plugin/deepmdplugin.cpp
+++ b/source/lmp/plugin/deepmdplugin.cpp
@@ -39,12 +39,14 @@ extern "C" void lammpsplugin_init(void *lmp, void *handle, void *regfunc)
   (*register_plugin)(&plugin, lmp);
 
   plugin.style = "compute";
+  plugin.name = "deeptensor/atom";
   plugin.info = "compute deeptensor/atom v2.0";
   plugin.creator.v1 = (lammpsplugin_factory1 *) &computedeepmdtensoratom;
   (*register_plugin)(&plugin, lmp);
 
   plugin.style = "fix";
+  plugin.name = "dplr";
   plugin.info = "fix dplr v2.0";
   plugin.creator.v1 = (lammpsplugin_factory1 *) &fixdplr;
   (*register_plugin)(&plugin, lmp);
-}
\ No newline at end of file
+}

From 70fcda6f8beb7f301849dc791bc82d9e5d373830 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Sat, 12 Mar 2022 00:19:00 -0500
Subject: [PATCH 03/29] fix macos library name (#1566)

The library type was changed from SHARED to MODULE in #1384.

Fixes errors in conda-forge/deepmd-kit-feedstock#31
---
 deepmd/env.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/deepmd/env.py b/deepmd/env.py
index 5942b4c062..422092b635 100644
--- a/deepmd/env.py
+++ b/deepmd/env.py
@@ -208,8 +208,8 @@ def get_module(module_name: str) -> "ModuleType":
     """
     if platform.system() == "Windows":
         ext = ".dll"
-    elif platform.system() == "Darwin":
-        ext = ".dylib"
+    #elif platform.system() == "Darwin":
+    #    ext = ".dylib"
     else:
         ext = ".so"
 

From d8a0844aba5ac64aed569c45b233b4e6fae92e47 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Mon, 14 Mar 2022 05:27:26 -0400
Subject: [PATCH 04/29] support `type_one_side` along with `exclude_types`
 (#1423)

* support `type_one_side` with `exclude_types`

* add support for se_r

* fix compress issues

* rewrite comments

* fix compatibility with Python 3.6

* add unittests

* rename model name to prevent conflict

Co-authored-by: Jinzhe Zeng <jz748@syrah.rutgers.edu>
---
 deepmd/descriptor/se_a.py                     |  12 +-
 deepmd/descriptor/se_r.py                     |  10 +-
 deepmd/utils/tabulate.py                      |  61 ++++++--
 ...ession_se_a_type_one_side_exclude_types.py | 147 ++++++++++++++++++
 source/tests/test_type_one_side.py            | 135 ++++++++++++++++
 5 files changed, 346 insertions(+), 19 deletions(-)
 create mode 100644 source/tests/test_model_compression_se_a_type_one_side_exclude_types.py
 create mode 100644 source/tests/test_type_one_side.py

diff --git a/deepmd/descriptor/se_a.py b/deepmd/descriptor/se_a.py
index cf218309bd..69fef5a274 100644
--- a/deepmd/descriptor/se_a.py
+++ b/deepmd/descriptor/se_a.py
@@ -142,8 +142,6 @@ def __init__ (self,
             self.exclude_types.add((tt[1], tt[0]))
         self.set_davg_zero = set_davg_zero
         self.type_one_side = type_one_side
-        if self.type_one_side and len(exclude_types) != 0:
-            raise RuntimeError('"type_one_side" is not compatible with "exclude_types"')
 
         # descrpt config
         self.sel_r = [ 0 for ii in range(len(self.sel_a)) ]
@@ -552,13 +550,19 @@ def _pass_filter(self,
         inputs = tf.reshape(inputs, [-1, self.ndescrpt * natoms[0]])
         output = []
         output_qmat = []
-        if not self.type_one_side and type_embedding is None:
+        if not (self.type_one_side and len(self.exclude_types) == 0) and type_embedding is None:
             for type_i in range(self.ntypes):
                 inputs_i = tf.slice (inputs,
                                      [ 0, start_index*      self.ndescrpt],
                                      [-1, natoms[2+type_i]* self.ndescrpt] )
                 inputs_i = tf.reshape(inputs_i, [-1, self.ndescrpt])
-                layer, qmat = self._filter(inputs_i, type_i, name='filter_type_'+str(type_i)+suffix, natoms=natoms, reuse=reuse, trainable = trainable, activation_fn = self.filter_activation_fn)
+                if self.type_one_side:
+                    # reuse NN parameters for all types to support type_one_side along with exclude_types
+                    reuse = tf.AUTO_REUSE
+                    filter_name = 'filter_type_all'+suffix
+                else:
+                    filter_name = 'filter_type_'+str(type_i)+suffix
+                layer, qmat = self._filter(inputs_i, type_i, name=filter_name, natoms=natoms, reuse=reuse, trainable = trainable, activation_fn = self.filter_activation_fn)
                 layer = tf.reshape(layer, [tf.shape(inputs)[0], natoms[2+type_i] * self.get_dim_out()])
                 qmat  = tf.reshape(qmat,  [tf.shape(inputs)[0], natoms[2+type_i] * self.get_dim_rot_mat_1() * 3])
                 output.append(layer)
diff --git a/deepmd/descriptor/se_r.py b/deepmd/descriptor/se_r.py
index c92c51225d..b9e5d5aabd 100644
--- a/deepmd/descriptor/se_r.py
+++ b/deepmd/descriptor/se_r.py
@@ -443,13 +443,19 @@ def _pass_filter(self,
         start_index = 0
         inputs = tf.reshape(inputs, [-1, self.ndescrpt * natoms[0]])
         output = []
-        if not self.type_one_side:
+        if not (self.type_one_side and len(self.exclude_types) == 0):
             for type_i in range(self.ntypes):
                 inputs_i = tf.slice (inputs,
                                      [ 0, start_index*      self.ndescrpt],
                                      [-1, natoms[2+type_i]* self.ndescrpt] )
                 inputs_i = tf.reshape(inputs_i, [-1, self.ndescrpt])
-                layer = self._filter_r(inputs_i, type_i, name='filter_type_'+str(type_i)+suffix, natoms=natoms, reuse=reuse, trainable = trainable, activation_fn = self.filter_activation_fn)
+                if self.type_one_side:
+                    # reuse NN parameters for all types to support type_one_side along with exclude_types
+                    reuse = tf.AUTO_REUSE
+                    filter_name = 'filter_type_all'+suffix
+                else:
+                    filter_name = 'filter_type_'+str(type_i)+suffix
+                layer = self._filter_r(inputs_i, type_i, name=filter_name, natoms=natoms, reuse=reuse, trainable = trainable, activation_fn = self.filter_activation_fn)
                 layer = tf.reshape(layer, [tf.shape(inputs)[0], natoms[2+type_i] * self.get_dim_out()])
                 output.append(layer)
                 start_index += natoms[2+type_i]
diff --git a/deepmd/utils/tabulate.py b/deepmd/utils/tabulate.py
index e3107821a8..ee1088bd3c 100644
--- a/deepmd/utils/tabulate.py
+++ b/deepmd/utils/tabulate.py
@@ -4,6 +4,7 @@
 import deepmd
 from typing import Callable
 from typing import Tuple, List
+from functools import lru_cache
 from scipy.special import comb
 from deepmd.env import tf
 from deepmd.env import op_module
@@ -174,7 +175,7 @@ def build(self,
             xx = np.append(xx, np.array([extrapolate * upper], dtype = self.data_type))
             self.nspline = int((upper - lower) / stride0 + (extrapolate * upper - upper) / stride1)
             for ii in range(self.table_size):
-                if self.type_one_side or (ii // self.ntypes, ii % self.ntypes) not in self.exclude_types:
+                if (self.type_one_side and not self._all_excluded(ii)) or (not self.type_one_side and (ii // self.ntypes, ii % self.ntypes) not in self.exclude_types):
                     if self.type_one_side:
                         net = "filter_-1_net_" + str(ii)
                     else:
@@ -198,7 +199,7 @@ def build(self,
             xx = np.append(xx, np.array([extrapolate * upper], dtype = self.data_type))
             self.nspline = int((upper - lower) / stride0 + (extrapolate * upper - upper) / stride1)
             for ii in range(self.table_size):
-                if self.type_one_side or (ii // self.ntypes, ii % self.ntypes) not in self.exclude_types:
+                if (self.type_one_side and not self._all_excluded(ii)) or (not self.type_one_side and (ii // self.ntypes, ii % self.ntypes) not in self.exclude_types):
                     if self.type_one_side:
                         net = "filter_-1_net_" + str(ii)
                     else:
@@ -249,8 +250,11 @@ def _get_bias(self):
             if isinstance(self.descrpt, deepmd.descriptor.DescrptSeA):
                 if self.type_one_side:
                     for ii in range(0, self.ntypes):
-                        node = self.embedding_net_nodes[f"filter_type_all{self.suffix}/bias_{layer}_{ii}"]
-                        bias["layer_" + str(layer)].append(tf.make_ndarray(node))
+                        if not self._all_excluded(ii):
+                            node = self.embedding_net_nodes[f"filter_type_all{self.suffix}/bias_{layer}_{ii}"]
+                            bias["layer_" + str(layer)].append(tf.make_ndarray(node))
+                        else:
+                            bias["layer_" + str(layer)].append(np.array([]))
                 else:
                     for ii in range(0, self.ntypes * self.ntypes):
                         if (ii // self.ntypes, ii % self.ntypes) not in self.exclude_types:
@@ -266,8 +270,11 @@ def _get_bias(self):
             elif isinstance(self.descrpt, deepmd.descriptor.DescrptSeR):
                 if self.type_one_side:
                     for ii in range(0, self.ntypes):
-                        node = self.embedding_net_nodes[f"filter_type_all{self.suffix}/bias_{layer}_{ii}"]
-                        bias["layer_" + str(layer)].append(tf.make_ndarray(node))
+                        if not self._all_excluded(ii):
+                            node = self.embedding_net_nodes[f"filter_type_all{self.suffix}/bias_{layer}_{ii}"]
+                            bias["layer_" + str(layer)].append(tf.make_ndarray(node))
+                        else:
+                            bias["layer_" + str(layer)].append(np.array([]))
                 else:
                     for ii in range(0, self.ntypes * self.ntypes):
                         if (ii // self.ntypes, ii % self.ntypes) not in self.exclude_types:
@@ -286,8 +293,11 @@ def _get_matrix(self):
             if isinstance(self.descrpt, deepmd.descriptor.DescrptSeA):
                 if self.type_one_side:
                     for ii in range(0, self.ntypes):
-                        node = self.embedding_net_nodes[f"filter_type_all{self.suffix}/matrix_{layer}_{ii}"]
-                        matrix["layer_" + str(layer)].append(tf.make_ndarray(node))
+                        if not self._all_excluded(ii):
+                            node = self.embedding_net_nodes[f"filter_type_all{self.suffix}/matrix_{layer}_{ii}"]
+                            matrix["layer_" + str(layer)].append(tf.make_ndarray(node))
+                        else:
+                            matrix["layer_" + str(layer)].append(np.array([]))
                 else:
                     for ii in range(0, self.ntypes * self.ntypes):
                         if (ii // self.ntypes, ii % self.ntypes) not in self.exclude_types:
@@ -303,8 +313,11 @@ def _get_matrix(self):
             elif isinstance(self.descrpt, deepmd.descriptor.DescrptSeR):
                 if self.type_one_side:
                     for ii in range(0, self.ntypes):
-                        node = self.embedding_net_nodes[f"filter_type_all{self.suffix}/matrix_{layer}_{ii}"]
-                        matrix["layer_" + str(layer)].append(tf.make_ndarray(node))
+                        if not self._all_excluded(ii):
+                            node = self.embedding_net_nodes[f"filter_type_all{self.suffix}/matrix_{layer}_{ii}"]
+                            matrix["layer_" + str(layer)].append(tf.make_ndarray(node))
+                        else:
+                            matrix["layer_" + str(layer)].append(np.array([]))
                 else:
                     for ii in range(0, self.ntypes * self.ntypes):
                         if (ii // self.ntypes, ii % self.ntypes) not in self.exclude_types:
@@ -411,16 +424,38 @@ def _get_layer_size(self):
         if isinstance(self.descrpt, deepmd.descriptor.DescrptSeA):
             layer_size = len(self.embedding_net_nodes) // ((self.ntypes * self.ntypes - len(self.exclude_types)) * 2)
             if self.type_one_side :
-                layer_size = len(self.embedding_net_nodes) // (self.ntypes * 2)
+                layer_size = len(self.embedding_net_nodes) // ((self.ntypes - self._n_all_excluded) * 2)
         elif isinstance(self.descrpt, deepmd.descriptor.DescrptSeT):
             layer_size = len(self.embedding_net_nodes) // int(comb(self.ntypes + 1, 2) * 2)
         elif isinstance(self.descrpt, deepmd.descriptor.DescrptSeR):
             layer_size = len(self.embedding_net_nodes) // ((self.ntypes * self.ntypes - len(self.exclude_types)) * 2)
             if self.type_one_side :
-                layer_size = len(self.embedding_net_nodes) // (self.ntypes * 2)
+                layer_size = len(self.embedding_net_nodes) // ((self.ntypes - self._n_all_excluded) * 2)
         else:
             raise RuntimeError("Unsupported descriptor")
         return layer_size
+    
+    @property
+    @lru_cache()
+    def _n_all_excluded(self) -> int:
+        """Then number of types excluding all types."""
+        return sum((int(self._all_excluded(ii)) for ii in range(0, self.ntypes)))
+
+    @lru_cache()
+    def _all_excluded(self, ii: int) -> bool:
+        """Check if type ii excluds all types.
+        
+        Parameters
+        ----------
+        ii : int
+            type index
+
+        Returns
+        -------
+        bool
+            if type ii excluds all types
+        """
+        return all([(ii, type_i) in self.exclude_types for type_i in range(self.ntypes)])
 
     def _get_table_size(self):
         table_size = 0
@@ -448,4 +483,4 @@ def _get_last_layer_size(self):
         for item in self.matrix["layer_" + str(self.layer_size)]:
             if len(item) != 0:
                 return item.shape[1]
-        return 0
\ No newline at end of file
+        return 0
diff --git a/source/tests/test_model_compression_se_a_type_one_side_exclude_types.py b/source/tests/test_model_compression_se_a_type_one_side_exclude_types.py
new file mode 100644
index 0000000000..b223742d2b
--- /dev/null
+++ b/source/tests/test_model_compression_se_a_type_one_side_exclude_types.py
@@ -0,0 +1,147 @@
+import os,sys,platform,shutil,dpdata,json
+import numpy as np
+import unittest
+import subprocess as sp
+
+from deepmd.infer import DeepPot
+from deepmd.env import MODEL_VERSION
+# from deepmd.entrypoints.compress import compress
+from common import j_loader, tests_path
+
+from deepmd.env import GLOBAL_NP_FLOAT_PRECISION
+if GLOBAL_NP_FLOAT_PRECISION == np.float32 :
+    default_places = 4
+else :
+    default_places = 10
+
+def _file_delete(file) :
+    if os.path.isdir(file):
+        os.rmdir(file)
+    elif os.path.isfile(file):
+        os.remove(file)
+
+def _subprocess_run(command):
+    popen = sp.Popen(command.split(), shell=False, stdout=sp.PIPE, stderr=sp.STDOUT)
+    for line in iter(popen.stdout.readline, b''):
+        if hasattr(line, 'decode'):
+            line = line.decode('utf-8')
+        line = line.rstrip()
+        print(line)
+    popen.wait()
+    return popen.returncode
+
+def _init_models():
+    data_file  = str(tests_path / os.path.join("model_compression", "data"))
+    frozen_model = str(tests_path / "dp-original-type-one-side-exclude-types.pb")
+    compressed_model = str(tests_path / "dp-compressed-type-one-side-exclude-types.pb")
+    INPUT = str(tests_path / "input.json")
+    jdata = j_loader(str(tests_path / os.path.join("model_compression", "input.json")))
+    jdata["training"]["training_data"]["systems"] = data_file
+    jdata["training"]["validation_data"]["systems"] = data_file
+    jdata["model"]["descriptor"]["type_one_side"] = True
+    jdata["model"]["descriptor"]["exclude_types"] = [[0, 0]]
+    with open(INPUT, "w") as fp:
+        json.dump(jdata, fp, indent=4)
+
+    ret = _subprocess_run("dp train " + INPUT)
+    np.testing.assert_equal(ret, 0, 'DP train failed!')
+    ret = _subprocess_run("dp freeze -o " + frozen_model)
+    np.testing.assert_equal(ret, 0, 'DP freeze failed!')
+    ret = _subprocess_run("dp compress " + " -i " + frozen_model + " -o " + compressed_model)
+    np.testing.assert_equal(ret, 0, 'DP model compression failed!')
+    return INPUT, frozen_model, compressed_model
+
+INPUT, FROZEN_MODEL, COMPRESSED_MODEL = _init_models()
+
+class TestDeepPotAPBCTypeOneSideExcludeTypes(unittest.TestCase) :
+    @classmethod
+    def setUpClass(self):
+        self.dp_original = DeepPot(FROZEN_MODEL)
+        self.dp_compressed = DeepPot(COMPRESSED_MODEL)
+        self.coords = np.array([12.83, 2.56, 2.18,
+                                12.09, 2.87, 2.74,
+                                00.25, 3.32, 1.68,
+                                3.36, 3.00, 1.81,
+                                3.51, 2.51, 2.60,
+                                4.27, 3.22, 1.56])
+        self.atype = [0, 1, 1, 0, 1, 1]
+        self.box = np.array([13., 0., 0., 0., 13., 0., 0., 0., 13.])
+
+    def test_attrs(self):
+        self.assertEqual(self.dp_original.get_ntypes(), 2)
+        self.assertAlmostEqual(self.dp_original.get_rcut(), 6.0, places = default_places)
+        self.assertEqual(self.dp_original.get_type_map(), ['O', 'H'])
+        self.assertEqual(self.dp_original.get_dim_fparam(), 0)
+        self.assertEqual(self.dp_original.get_dim_aparam(), 0)
+
+        self.assertEqual(self.dp_compressed.get_ntypes(), 2)
+        self.assertAlmostEqual(self.dp_compressed.get_rcut(), 6.0, places = default_places)
+        self.assertEqual(self.dp_compressed.get_type_map(), ['O', 'H'])
+        self.assertEqual(self.dp_compressed.get_dim_fparam(), 0)
+        self.assertEqual(self.dp_compressed.get_dim_aparam(), 0)
+
+    def test_1frame(self):
+        ee0, ff0, vv0 = self.dp_original.eval(self.coords, self.box, self.atype, atomic = False)
+        ee1, ff1, vv1 = self.dp_compressed.eval(self.coords, self.box, self.atype, atomic = False)
+        # check shape of the returns
+        nframes = 1
+        natoms = len(self.atype)
+        self.assertEqual(ee0.shape, (nframes,1))
+        self.assertEqual(ff0.shape, (nframes,natoms,3))
+        self.assertEqual(vv0.shape, (nframes,9))
+        self.assertEqual(ee1.shape, (nframes,1))
+        self.assertEqual(ff1.shape, (nframes,natoms,3))
+        self.assertEqual(vv1.shape, (nframes,9))
+        # check values
+        np.testing.assert_almost_equal(ff0, ff1, default_places)
+        np.testing.assert_almost_equal(ee0, ee1, default_places)
+        np.testing.assert_almost_equal(vv0, vv1, default_places)
+
+    def test_1frame_atm(self):
+        ee0, ff0, vv0, ae0, av0 = self.dp_original.eval(self.coords, self.box, self.atype, atomic = True)
+        ee1, ff1, vv1, ae1, av1 = self.dp_compressed.eval(self.coords, self.box, self.atype, atomic = True)
+        # check shape of the returns
+        nframes = 1
+        natoms = len(self.atype)
+        self.assertEqual(ee0.shape, (nframes,1))
+        self.assertEqual(ff0.shape, (nframes,natoms,3))
+        self.assertEqual(vv0.shape, (nframes,9))
+        self.assertEqual(ae0.shape, (nframes,natoms,1))
+        self.assertEqual(av0.shape, (nframes,natoms,9))
+        self.assertEqual(ee1.shape, (nframes,1))
+        self.assertEqual(ff1.shape, (nframes,natoms,3))
+        self.assertEqual(vv1.shape, (nframes,9))
+        self.assertEqual(ae1.shape, (nframes,natoms,1))
+        self.assertEqual(av1.shape, (nframes,natoms,9))
+        # check values
+        np.testing.assert_almost_equal(ff0, ff1, default_places)
+        np.testing.assert_almost_equal(ae0, ae1, default_places)
+        np.testing.assert_almost_equal(av0, av1, default_places)
+        np.testing.assert_almost_equal(ee0, ee1, default_places)
+        np.testing.assert_almost_equal(vv0, vv1, default_places)
+
+    def test_2frame_atm(self):
+        coords2 = np.concatenate((self.coords, self.coords))
+        box2 = np.concatenate((self.box, self.box))
+        ee0, ff0, vv0, ae0, av0 = self.dp_original.eval(coords2, box2, self.atype, atomic = True)
+        ee1, ff1, vv1, ae1, av1 = self.dp_compressed.eval(coords2, box2, self.atype, atomic = True)
+        # check shape of the returns
+        nframes = 2
+        natoms = len(self.atype)
+        self.assertEqual(ee0.shape, (nframes,1))
+        self.assertEqual(ff0.shape, (nframes,natoms,3))
+        self.assertEqual(vv0.shape, (nframes,9))
+        self.assertEqual(ae0.shape, (nframes,natoms,1))
+        self.assertEqual(av0.shape, (nframes,natoms,9))
+        self.assertEqual(ee1.shape, (nframes,1))
+        self.assertEqual(ff1.shape, (nframes,natoms,3))
+        self.assertEqual(vv1.shape, (nframes,9))
+        self.assertEqual(ae1.shape, (nframes,natoms,1))
+        self.assertEqual(av1.shape, (nframes,natoms,9))
+
+        # check values
+        np.testing.assert_almost_equal(ff0, ff1, default_places)
+        np.testing.assert_almost_equal(ae0, ae1, default_places)
+        np.testing.assert_almost_equal(av0, av1, default_places)
+        np.testing.assert_almost_equal(ee0, ee1, default_places)
+        np.testing.assert_almost_equal(vv0, vv1, default_places)
diff --git a/source/tests/test_type_one_side.py b/source/tests/test_type_one_side.py
new file mode 100644
index 0000000000..aa1548f637
--- /dev/null
+++ b/source/tests/test_type_one_side.py
@@ -0,0 +1,135 @@
+
+import numpy as np
+from deepmd.env import tf
+from common import gen_data, j_loader
+
+from deepmd.utils.data_system import DataSystem
+from deepmd.descriptor import Descriptor
+from deepmd.common import j_must_have
+
+GLOBAL_ENER_FLOAT_PRECISION = tf.float64
+GLOBAL_TF_FLOAT_PRECISION = tf.float64
+GLOBAL_NP_FLOAT_PRECISION = np.float64
+
+class TestModel(tf.test.TestCase):
+    def setUp(self):
+        gen_data(nframes=2)
+
+    def test_descriptor_one_side_exclude_types(self):
+        """When we enable type_one_side, the descriptor should be the same
+        for different types, when its environments are the same.
+
+        Here we generates two data. The only difference is the type:
+        (1) 0 1 1 1 1 1
+        (2) 1 1 1 1 1 1
+
+        When type_one_side is true, the first atom should have the same descriptor.
+        Otherwise, it should be different (with random initial variables). We test
+        both situation.
+        """
+        jfile = 'water_se_a.json'
+        jdata = j_loader(jfile)
+
+        systems = j_must_have(jdata, 'systems')
+        set_pfx = j_must_have(jdata, 'set_prefix')
+        batch_size = j_must_have(jdata, 'batch_size')
+        test_size = j_must_have(jdata, 'numb_test')
+        batch_size = 1
+        test_size = 1
+        rcut = j_must_have (jdata['model']['descriptor'], 'rcut')
+        sel = j_must_have (jdata['model']['descriptor'], 'sel')
+        ntypes=len(sel)
+        
+        data = DataSystem(systems, set_pfx, batch_size, test_size, rcut, run_opt = None)
+        
+        test_data = data.get_test ()
+        numb_test = 1
+
+        # set parameters
+        jdata['model']['descriptor']['neuron'] = [5, 5, 5]
+        jdata['model']['descriptor']['axis_neuron'] = 2
+        jdata['model']['descriptor']['type_one_side'] = True
+        jdata['model']['descriptor']['exclude_types'] = [[0, 0]]
+
+        t_prop_c           = tf.placeholder(tf.float32, [5],    name='t_prop_c')
+        t_coord            = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='i_coord')
+        t_type             = tf.placeholder(tf.int32,   [None], name='i_type')
+        t_natoms           = tf.placeholder(tf.int32,   [ntypes+2], name='i_natoms')
+        t_box              = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, 9], name='i_box')
+        t_mesh             = tf.placeholder(tf.int32,   [None], name='i_mesh')
+        is_training        = tf.placeholder(tf.bool)
+
+
+        # successful
+        descrpt = Descriptor(**jdata['model']['descriptor'])
+        dout \
+            = descrpt.build(
+                t_coord,
+                t_type,
+                t_natoms,
+                t_box,
+                t_mesh,
+                {},
+                reuse = False,
+                suffix = "_se_a_1side_exclude_types"
+            )
+        # failed
+        descrpt_failed = Descriptor(**{**jdata['model']['descriptor'], "type_one_side": False})
+        dout_failed \
+            = descrpt_failed.build(
+                t_coord,
+                t_type,
+                t_natoms,
+                t_box,
+                t_mesh,
+                {},
+                reuse = False,
+                suffix = "_se_a_1side_exclude_types_failed"
+            )
+
+        feed_dict_test1 = {t_prop_c:        test_data['prop_c'],
+                          t_coord:         np.reshape(test_data['coord']    [:numb_test, :], [-1]),
+                          t_box:           test_data['box']                 [:numb_test, :],
+                          t_type:          np.reshape(test_data['type']     [:numb_test, :], [-1]),
+                          t_natoms:        test_data['natoms_vec'],
+                          t_mesh:          test_data['default_mesh'],
+                          is_training:     False}
+        feed_dict_test2 = feed_dict_test1.copy()
+        # original type: 0 0 1 1 1 1
+        # current: 0 1 1 1 1 1
+        # current: 1 1 1 1 1 1
+        new_natoms1 = test_data['natoms_vec'].copy()
+        new_natoms1[2] = 1
+        new_natoms1[3] = 5
+        new_type1 = test_data['type'].copy()
+        new_type1[:numb_test, 0] = 0
+        new_type1[:numb_test, 1:6] = 1
+        new_natoms2 = test_data['natoms_vec'].copy()
+        new_natoms2[2] = 0
+        new_natoms2[3] = 6
+        new_type2 = test_data['type'].copy()
+        new_type2[:numb_test] = 1
+        feed_dict_test1[t_type] = np.reshape(new_type1[:numb_test, :], [-1])
+        feed_dict_test1[t_natoms] = new_natoms1
+        feed_dict_test2[t_type] = np.reshape(new_type2[:numb_test, :], [-1])
+        feed_dict_test2[t_natoms] = new_natoms2
+        print(feed_dict_test1,feed_dict_test2)
+
+        with self.test_session() as sess:
+            sess.run(tf.global_variables_initializer())
+            [model_dout1] = sess.run([dout], 
+                                feed_dict = feed_dict_test1)
+            [model_dout2] = sess.run([dout], 
+                                feed_dict = feed_dict_test2)
+            [model_dout1_failed] = sess.run([dout_failed], 
+                                feed_dict = feed_dict_test1)
+            [model_dout2_failed] = sess.run([dout_failed], 
+                                feed_dict = feed_dict_test2)
+        model_dout1 = model_dout1.reshape([6, -1])
+        model_dout2 = model_dout2.reshape([6, -1])
+        model_dout1_failed = model_dout1_failed.reshape([6, -1])
+        model_dout2_failed = model_dout2_failed.reshape([6, -1])
+
+        np.testing.assert_almost_equal(model_dout1[0], model_dout2[0], 10)
+        with self.assertRaises(AssertionError):
+            np.testing.assert_almost_equal(model_dout1_failed[0], model_dout2_failed[0], 10)

From b4603e3cacc121eab6fa77ecf15bee8b20b72369 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Tue, 15 Mar 2022 01:18:55 -0400
Subject: [PATCH 05/29] adjust sel of frozen models (#1573)

* save checkpoints after training if it's not saved

* fix a typo in #1482

* support recovering sel<original_sel

* supports increasing sel

* add unittests

* adjust variable name

* add doc
---
 deepmd/descriptor/loc_frame.py  |   2 +-
 deepmd/descriptor/se.py         |   2 +-
 deepmd/descriptor/se_a.py       |  62 +++++++++-
 deepmd/train/trainer.py         |  45 ++++---
 doc/train/training-advanced.md  |  28 +++++
 source/tests/test_adjust_sel.py | 210 ++++++++++++++++++++++++++++++++
 6 files changed, 324 insertions(+), 25 deletions(-)
 create mode 100644 source/tests/test_adjust_sel.py

diff --git a/deepmd/descriptor/loc_frame.py b/deepmd/descriptor/loc_frame.py
index b85e70823a..1ea8e97760 100644
--- a/deepmd/descriptor/loc_frame.py
+++ b/deepmd/descriptor/loc_frame.py
@@ -383,4 +383,4 @@ def init_variables(self,
             The suffix of the scope
         """
         self.davg = get_tensor_by_name(model_file, 'descrpt_attr%s/t_avg' % suffix)
-        self.tavg = get_tensor_by_name(model_file, 'descrpt_attr%s/t_std' % suffix)
+        self.dstd = get_tensor_by_name(model_file, 'descrpt_attr%s/t_std' % suffix)
diff --git a/deepmd/descriptor/se.py b/deepmd/descriptor/se.py
index 832dcfcd58..2092a59da1 100644
--- a/deepmd/descriptor/se.py
+++ b/deepmd/descriptor/se.py
@@ -107,7 +107,7 @@ def init_variables(self,
         """
         self.embedding_net_variables = get_embedding_net_variables(model_file, suffix = suffix)
         self.davg = get_tensor_by_name(model_file, 'descrpt_attr%s/t_avg' % suffix)
-        self.tavg = get_tensor_by_name(model_file, 'descrpt_attr%s/t_std' % suffix)
+        self.dstd = get_tensor_by_name(model_file, 'descrpt_attr%s/t_std' % suffix)
 
     @property
     def precision(self) -> tf.DType:
diff --git a/deepmd/descriptor/se_a.py b/deepmd/descriptor/se_a.py
index 69fef5a274..284d434d32 100644
--- a/deepmd/descriptor/se_a.py
+++ b/deepmd/descriptor/se_a.py
@@ -13,7 +13,8 @@
 from deepmd.utils.tabulate import DPTabulate
 from deepmd.utils.type_embed import embed_atom_type
 from deepmd.utils.sess import run_sess
-from deepmd.utils.graph import load_graph_def, get_tensor_by_name_from_graph
+from deepmd.utils.graph import load_graph_def, get_tensor_by_name_from_graph, get_tensor_by_name
+from deepmd.utils.errors import GraphWithoutTensorError
 from .descriptor import Descriptor
 from .se import DescrptSe
 
@@ -191,6 +192,7 @@ def __init__ (self,
                                          sel_a = self.sel_a,
                                          sel_r = self.sel_r)
         self.sub_sess = tf.Session(graph = sub_graph, config=default_tf_session_config)
+        self.original_sel = None
 
 
     def get_rcut (self) -> float:
@@ -440,7 +442,8 @@ def build (self,
                                          trainable = False,
                                          initializer = tf.constant_initializer(dstd))
 
-        coord = tf.reshape (coord_, [-1, natoms[1] * 3])
+        with tf.control_dependencies([t_sel]):
+            coord = tf.reshape (coord_, [-1, natoms[1] * 3])
         box   = tf.reshape (box_, [-1, 9])
         atype = tf.reshape (atype_, [-1, natoms[1]])
 
@@ -827,7 +830,12 @@ def _filter(
           # inputs_reshape = tf.reshape(inputs, [-1, shape[1]//4, 4])
           # natom x 4 x outputs_size
           # xyz_scatter_1 = tf.matmul(inputs_reshape, xyz_scatter, transpose_a = True)
-          xyz_scatter_1 = xyz_scatter_1 * (4.0 / shape[1])
+          if self.original_sel is None:
+              # shape[1] = nnei * 4
+              nnei = shape[1] / 4
+          else:
+              nnei = np.sum(self.original_sel)
+          xyz_scatter_1 = xyz_scatter_1 / nnei
           # natom x 4 x outputs_size_2
           xyz_scatter_2 = tf.slice(xyz_scatter_1, [0,0,0],[-1,-1,outputs_size_2])
           # # natom x 3 x outputs_size_2
@@ -842,3 +850,51 @@ def _filter(
           result = tf.reshape(result, [-1, outputs_size_2 * outputs_size[-1]])
 
         return result, qmat
+
+    def init_variables(self,
+                       model_file : str,
+                       suffix : str = "",
+    ) -> None:
+        """
+        Init the embedding net variables with the given frozen model
+
+        Parameters
+        ----------
+        model_file : str
+            The input frozen model file
+        suffix : str, optional
+            The suffix of the scope
+        """
+        super().init_variables(model_file=model_file, suffix=suffix)
+        # check sel == original sel?
+        try:
+            sel = get_tensor_by_name(model_file, 'descrpt_attr%s/sel' % suffix)
+        except GraphWithoutTensorError:
+            # sel is not restored in old graphs
+            pass
+        else:
+            if not np.array_equal(np.array(self.sel_a), sel):
+                if not self.set_davg_zero:
+                    raise RuntimeError("Adjusting sel is only supported when `set_davg_zero` is true!")
+                # as set_davg_zero, self.davg is safely zero
+                self.davg = np.zeros([self.ntypes, self.ndescrpt]).astype(GLOBAL_NP_FLOAT_PRECISION)
+                new_dstd = np.ones([self.ntypes, self.ndescrpt]).astype(GLOBAL_NP_FLOAT_PRECISION)
+                # shape of davg and dstd is (ntypes, ndescrpt), ndescrpt = 4*sel
+                n_descpt = np.array(self.sel_a) * 4
+                n_descpt_old = np.array(sel) * 4
+                end_index = np.cumsum(n_descpt)
+                end_index_old = np.cumsum(n_descpt_old)
+                start_index = np.roll(end_index, 1)
+                start_index[0] = 0
+                start_index_old = np.roll(end_index_old, 1)
+                start_index_old[0] = 0
+
+                for nn, oo, ii, jj in zip(n_descpt, n_descpt_old, start_index, start_index_old):
+                    if nn < oo:
+                        # new size is smaller, copy part of std
+                        new_dstd[:, ii:ii+nn] = self.dstd[:, jj:jj+nn]
+                    else:
+                        # new size is larger, copy all, the rest remains 1
+                        new_dstd[:, ii:ii+oo] = self.dstd[:, jj:jj+oo]
+                self.dstd = new_dstd
+                self.original_sel = sel
diff --git a/deepmd/train/trainer.py b/deepmd/train/trainer.py
index 6009759781..3a26b53b18 100644
--- a/deepmd/train/trainer.py
+++ b/deepmd/train/trainer.py
@@ -531,26 +531,9 @@ def train (self, train_data = None, valid_data=None) :
                                   % (cur_batch, train_time, test_time))
                     train_time = 0
                 if self.save_freq > 0 and cur_batch % self.save_freq == 0 and self.saver is not None:
-                    try:
-                        ckpt_prefix = self.saver.save (self.sess, os.path.join(os.getcwd(), self.save_ckpt), global_step=cur_batch)
-                    except google.protobuf.message.DecodeError as e:
-                        raise GraphTooLargeError(
-                            "The graph size exceeds 2 GB, the hard limitation of protobuf."
-                            " Then a DecodeError was raised by protobuf. You should "
-                            "reduce the size of your model."
-                        ) from e
-                    # make symlinks from prefix with step to that without step to break nothing
-                    # get all checkpoint files
-                    original_files = glob.glob(ckpt_prefix + ".*")
-                    for ori_ff in original_files:
-                        new_ff = self.save_ckpt + ori_ff[len(ckpt_prefix):]
-                        try:
-                            # remove old one
-                            os.remove(new_ff)
-                        except OSError:
-                            pass
-                        os.symlink(ori_ff, new_ff)
-                    log.info("saved checkpoint %s" % self.save_ckpt)
+                    self.save_checkpoint(cur_batch)
+        if (self.save_freq == 0 or cur_batch == 0 or cur_batch % self.save_freq != 0) and self.saver is not None:
+            self.save_checkpoint(cur_batch)
         if self.run_opt.is_chief: 
             fp.close ()
         if self.profiling and self.run_opt.is_chief :
@@ -561,6 +544,28 @@ def train (self, train_data = None, valid_data=None) :
         if self.enable_profiler and self.run_opt.is_chief:
             tfv2.profiler.experimental.stop()
 
+    def save_checkpoint(self, cur_batch: int):
+        try:
+            ckpt_prefix = self.saver.save (self.sess, os.path.join(os.getcwd(), self.save_ckpt), global_step=cur_batch)
+        except google.protobuf.message.DecodeError as e:
+            raise GraphTooLargeError(
+                "The graph size exceeds 2 GB, the hard limitation of protobuf."
+                " Then a DecodeError was raised by protobuf. You should "
+                "reduce the size of your model."
+            ) from e
+        # make symlinks from prefix with step to that without step to break nothing
+        # get all checkpoint files
+        original_files = glob.glob(ckpt_prefix + ".*")
+        for ori_ff in original_files:
+            new_ff = self.save_ckpt + ori_ff[len(ckpt_prefix):]
+            try:
+                # remove old one
+                os.remove(new_ff)
+            except OSError:
+                pass
+            os.symlink(ori_ff, new_ff)
+        log.info("saved checkpoint %s" % self.save_ckpt)
+
     def get_feed_dict(self, batch, is_training):
         feed_dict = {}
         for kk in batch.keys():
diff --git a/doc/train/training-advanced.md b/doc/train/training-advanced.md
index 7b4c475ae7..bb6171b2ff 100644
--- a/doc/train/training-advanced.md
+++ b/doc/train/training-advanced.md
@@ -150,3 +150,31 @@ One can set other environmental variables:
 | Environment variables | Allowed value          | Default value | Usage                      |
 | --------------------- | ---------------------- | ------------- | -------------------------- |
 | DP_INTERFACE_PREC     | `high`, `low`          | `high`        | Control high (double) or low (float) precision of training. |
+
+
+## Adjust `sel` of a frozen model
+
+One can use `--init-frz-model` features to adjust (increase or decrease) [`sel`](../model/sel.md) of a existing model. Firstly, one need to adjust [`sel`](./train-input.rst) in `input.json`. For example, adjust from `[46, 92]` to `[23, 46]`.
+```json
+"model": {
+	"descriptor": {
+		"sel": [23, 46]
+	}
+}
+```
+To obtain the new model at once, [`numb_steps`](./train-input.rst) should be set to zero:
+```json
+"training": {
+	"numb_steps": 0
+}
+```
+
+Then, one can initialize the training from the frozen model and freeze the new model at once:
+```sh
+dp train input.json --init-frz-model frozen_model.pb
+dp freeze -o frozen_model_adjusted_sel.pb
+```
+
+Two models should give the same result when the input satisfies both constraints.
+
+Note: At this time, this feature is only supported by [`se_e2_a`](../model/train-se-e2-a.md) descriptor with [`set_davg_true`](./train-input.rst) enable, or `hybrid` composed of above descriptors.
diff --git a/source/tests/test_adjust_sel.py b/source/tests/test_adjust_sel.py
new file mode 100644
index 0000000000..d16cffe6ce
--- /dev/null
+++ b/source/tests/test_adjust_sel.py
@@ -0,0 +1,210 @@
+import os, json
+import numpy as np
+import unittest
+import subprocess as sp
+
+from deepmd.infer import DeepPot
+from deepmd.env import MODEL_VERSION
+# from deepmd.entrypoints.compress import compress
+from common import j_loader, tests_path
+
+from deepmd.env import GLOBAL_NP_FLOAT_PRECISION
+if GLOBAL_NP_FLOAT_PRECISION == np.float32 :
+    default_places = 4
+else :
+    default_places = 10
+
+def _file_delete(file) :
+    if os.path.isdir(file):
+        os.rmdir(file)
+    elif os.path.isfile(file):
+        os.remove(file)
+
+def _subprocess_run(command):
+    popen = sp.Popen(command.split(), shell=False, stdout=sp.PIPE, stderr=sp.STDOUT)
+    for line in iter(popen.stdout.readline, b''):
+        if hasattr(line, 'decode'):
+            line = line.decode('utf-8')
+        line = line.rstrip()
+        print(line)
+    popen.wait()
+    return popen.returncode
+
+def _init_models():
+    # we use the setting for model compression
+    data_file  = str(tests_path / os.path.join("model_compression", "data"))
+    frozen_model = str(tests_path / "dp-adjust-sel-original.pb")
+    decreased_model = str(tests_path / "dp-adjust-sel-original-decreased.pb")
+    increased_model = str(tests_path / "dp-adjust-sel-original-increased.pb")
+    INPUT = str(tests_path / "input.json")
+    jdata = j_loader(str(tests_path / os.path.join("model_compression", "input.json")))
+    jdata["training"]["training_data"]["systems"] = data_file
+    jdata["training"]["validation_data"]["systems"] = data_file
+    jdata["model"]["descriptor"]["set_davg_zero"] = True
+    with open(INPUT, "w") as fp:
+        json.dump(jdata, fp, indent=4)
+
+    ret = _subprocess_run("dp train " + INPUT + " --skip-neighbor-stat")
+    np.testing.assert_equal(ret, 0, 'DP train failed!')
+    ret = _subprocess_run("dp freeze -o " + frozen_model)
+    np.testing.assert_equal(ret, 0, 'DP freeze failed!')
+
+    jdata["training"]["numb_steps"] = 0
+    jdata["model"]["descriptor"]["sel"] = [2, 4] # equal to data
+    with open(INPUT, "w") as fp:
+        json.dump(jdata, fp, indent=4)
+    ret = _subprocess_run("dp train " + INPUT + " -f " + frozen_model + " --skip-neighbor-stat")
+    np.testing.assert_equal(ret, 0, 'DP model adjust sel failed!')
+    ret = _subprocess_run("dp freeze -o " + decreased_model)
+    np.testing.assert_equal(ret, 0, 'DP freeze failed!')
+
+    jdata["model"]["descriptor"]["sel"] = [300, 300] # equal to data
+    with open(INPUT, "w") as fp:
+        json.dump(jdata, fp, indent=4)
+    ret = _subprocess_run("dp train " + INPUT + " -f " + frozen_model + " --skip-neighbor-stat")
+    np.testing.assert_equal(ret, 0, 'DP model adjust sel failed!')
+    ret = _subprocess_run("dp freeze -o " + increased_model)
+    np.testing.assert_equal(ret, 0, 'DP freeze failed!')
+    return INPUT, frozen_model, decreased_model, increased_model
+
+INPUT, FROZEN_MODEL, DECREASED_MODEL, INCREASED_MODEL = _init_models()
+
+class TestDeepPotAAdjustSel(unittest.TestCase) :
+    @classmethod
+    def setUpClass(self):
+        self.dp_original = DeepPot(FROZEN_MODEL)
+        self.dp_decreased = DeepPot(DECREASED_MODEL)
+        self.dp_increased = DeepPot(INCREASED_MODEL)
+        self.coords = np.array([12.83, 2.56, 2.18,
+                                12.09, 2.87, 2.74,
+                                00.25, 3.32, 1.68,
+                                3.36, 3.00, 1.81,
+                                3.51, 2.51, 2.60,
+                                4.27, 3.22, 1.56])
+        self.atype = [0, 1, 1, 0, 1, 1]
+        self.box = np.array([13., 0., 0., 0., 13., 0., 0., 0., 13.])
+
+    def test_attrs(self):
+        self.assertEqual(self.dp_original.get_ntypes(), 2)
+        self.assertAlmostEqual(self.dp_original.get_rcut(), 6.0, places = default_places)
+        self.assertEqual(self.dp_original.get_type_map(), ['O', 'H'])
+        self.assertEqual(self.dp_original.get_dim_fparam(), 0)
+        self.assertEqual(self.dp_original.get_dim_aparam(), 0)
+
+        self.assertEqual(self.dp_decreased.get_ntypes(), 2)
+        self.assertAlmostEqual(self.dp_decreased.get_rcut(), 6.0, places = default_places)
+        self.assertEqual(self.dp_decreased.get_type_map(), ['O', 'H'])
+        self.assertEqual(self.dp_decreased.get_dim_fparam(), 0)
+        self.assertEqual(self.dp_decreased.get_dim_aparam(), 0)
+
+        self.assertEqual(self.dp_increased.get_ntypes(), 2)
+        self.assertAlmostEqual(self.dp_increased.get_rcut(), 6.0, places = default_places)
+        self.assertEqual(self.dp_increased.get_type_map(), ['O', 'H'])
+        self.assertEqual(self.dp_increased.get_dim_fparam(), 0)
+        self.assertEqual(self.dp_increased.get_dim_aparam(), 0)
+
+    def test_1frame(self):
+        ee0, ff0, vv0 = self.dp_original.eval(self.coords, self.box, self.atype, atomic = False)
+        ee1, ff1, vv1 = self.dp_decreased.eval(self.coords, self.box, self.atype, atomic = False)
+        ee2, ff2, vv2 = self.dp_increased.eval(self.coords, self.box, self.atype, atomic = False)
+        # check shape of the returns
+        nframes = 1
+        natoms = len(self.atype)
+        self.assertEqual(ee0.shape, (nframes,1))
+        self.assertEqual(ff0.shape, (nframes,natoms,3))
+        self.assertEqual(vv0.shape, (nframes,9))
+        self.assertEqual(ee1.shape, (nframes,1))
+        self.assertEqual(ff1.shape, (nframes,natoms,3))
+        self.assertEqual(vv1.shape, (nframes,9))
+        self.assertEqual(ee2.shape, (nframes,1))
+        self.assertEqual(ff2.shape, (nframes,natoms,3))
+        self.assertEqual(vv2.shape, (nframes,9))
+        # check values
+        np.testing.assert_almost_equal(ff0, ff1, default_places)
+        np.testing.assert_almost_equal(ee0, ee1, default_places)
+        np.testing.assert_almost_equal(vv0, vv1, default_places)
+        np.testing.assert_almost_equal(ff0, ff2, default_places)
+        np.testing.assert_almost_equal(ee0, ee2, default_places)
+        np.testing.assert_almost_equal(vv0, vv2, default_places)
+
+    def test_1frame_atm(self):
+        ee0, ff0, vv0, ae0, av0 = self.dp_original.eval(self.coords, self.box, self.atype, atomic = True)
+        ee1, ff1, vv1, ae1, av1 = self.dp_decreased.eval(self.coords, self.box, self.atype, atomic = True)
+        ee2, ff2, vv2, ae2, av2 = self.dp_increased.eval(self.coords, self.box, self.atype, atomic = True)
+        # check shape of the returns
+        nframes = 1
+        natoms = len(self.atype)
+        self.assertEqual(ee0.shape, (nframes,1))
+        self.assertEqual(ff0.shape, (nframes,natoms,3))
+        self.assertEqual(vv0.shape, (nframes,9))
+        self.assertEqual(ae0.shape, (nframes,natoms,1))
+        self.assertEqual(av0.shape, (nframes,natoms,9))
+        self.assertEqual(ee1.shape, (nframes,1))
+        self.assertEqual(ff1.shape, (nframes,natoms,3))
+        self.assertEqual(vv1.shape, (nframes,9))
+        self.assertEqual(ae1.shape, (nframes,natoms,1))
+        self.assertEqual(av1.shape, (nframes,natoms,9))
+        self.assertEqual(ee2.shape, (nframes,1))
+        self.assertEqual(ff2.shape, (nframes,natoms,3))
+        self.assertEqual(vv2.shape, (nframes,9))
+        self.assertEqual(ae2.shape, (nframes,natoms,1))
+        self.assertEqual(av2.shape, (nframes,natoms,9))
+        # check values
+        np.testing.assert_almost_equal(ff0, ff1, default_places)
+        np.testing.assert_almost_equal(ae0, ae1, default_places)
+        np.testing.assert_almost_equal(av0, av1, default_places)
+        np.testing.assert_almost_equal(ee0, ee1, default_places)
+        np.testing.assert_almost_equal(vv0, vv1, default_places)
+        np.testing.assert_almost_equal(ff0, ff2, default_places)
+        np.testing.assert_almost_equal(ae0, ae2, default_places)
+        np.testing.assert_almost_equal(av0, av2, default_places)
+        np.testing.assert_almost_equal(ee0, ee2, default_places)
+        np.testing.assert_almost_equal(vv0, vv2, default_places)
+
+    def test_2frame_atm(self):
+        coords2 = np.concatenate((self.coords, self.coords))
+        box2 = np.concatenate((self.box, self.box))
+        ee0, ff0, vv0, ae0, av0 = self.dp_original.eval(coords2, box2, self.atype, atomic = True)
+        ee1, ff1, vv1, ae1, av1 = self.dp_decreased.eval(coords2, box2, self.atype, atomic = True)
+        ee2, ff2, vv2, ae2, av2 = self.dp_increased.eval(coords2, box2, self.atype, atomic = True)
+        # check shape of the returns
+        nframes = 2
+        natoms = len(self.atype)
+        self.assertEqual(ee0.shape, (nframes,1))
+        self.assertEqual(ff0.shape, (nframes,natoms,3))
+        self.assertEqual(vv0.shape, (nframes,9))
+        self.assertEqual(ae0.shape, (nframes,natoms,1))
+        self.assertEqual(av0.shape, (nframes,natoms,9))
+        self.assertEqual(ee1.shape, (nframes,1))
+        self.assertEqual(ff1.shape, (nframes,natoms,3))
+        self.assertEqual(vv1.shape, (nframes,9))
+        self.assertEqual(ae1.shape, (nframes,natoms,1))
+        self.assertEqual(av1.shape, (nframes,natoms,9))
+        self.assertEqual(ee2.shape, (nframes,1))
+        self.assertEqual(ff2.shape, (nframes,natoms,3))
+        self.assertEqual(vv2.shape, (nframes,9))
+        self.assertEqual(ae2.shape, (nframes,natoms,1))
+        self.assertEqual(av2.shape, (nframes,natoms,9))
+
+        # check values
+        np.testing.assert_almost_equal(ff0, ff1, default_places)
+        np.testing.assert_almost_equal(ae0, ae1, default_places)
+        np.testing.assert_almost_equal(av0, av1, default_places)
+        np.testing.assert_almost_equal(ee0, ee1, default_places)
+        np.testing.assert_almost_equal(vv0, vv1, default_places)
+        np.testing.assert_almost_equal(ff0, ff2, default_places)
+        np.testing.assert_almost_equal(ae0, ae2, default_places)
+        np.testing.assert_almost_equal(av0, av2, default_places)
+        np.testing.assert_almost_equal(ee0, ee2, default_places)
+        np.testing.assert_almost_equal(vv0, vv2, default_places)
+
+    def test_descriptor(self):
+        dd0 = self.dp_original.eval_descriptor(self.coords, self.box, self.atype)
+        dd1 = self.dp_decreased.eval_descriptor(self.coords, self.box, self.atype)
+        dd2 = self.dp_increased.eval_descriptor(self.coords, self.box, self.atype)
+        # check shape of the returns
+        self.assertEqual(dd0.shape, dd1.shape)
+        self.assertEqual(dd0.shape, dd2.shape)
+        # check values
+        np.testing.assert_almost_equal(dd0, dd1, default_places)
+        np.testing.assert_almost_equal(dd0, dd2, default_places)

From dfc2bdcd0cfe229d1aa8336608bc8f217939cb30 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Wed, 16 Mar 2022 19:04:55 -0400
Subject: [PATCH 06/29] restore original_sel in the graph (#1574)

#1573 has an issue: when the frozen graph is recovered again, `original_sel` is not the initial one (but the new one).
In this commit, `original_sel` will be saved into the graph.
---
 deepmd/descriptor/se_a.py | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/deepmd/descriptor/se_a.py b/deepmd/descriptor/se_a.py
index 284d434d32..a4a316b839 100644
--- a/deepmd/descriptor/se_a.py
+++ b/deepmd/descriptor/se_a.py
@@ -431,6 +431,9 @@ def build (self,
             t_sel = tf.constant(self.sel_a, 
                                 name = 'sel', 
                                 dtype = tf.int32)            
+            t_original_sel = tf.constant(self.original_sel if self.original_sel is not None else self.sel_a,
+                name = 'original_sel',
+                dtype = tf.int32)
             self.t_avg = tf.get_variable('t_avg', 
                                          davg.shape, 
                                          dtype = GLOBAL_TF_FLOAT_PRECISION,
@@ -442,7 +445,7 @@ def build (self,
                                          trainable = False,
                                          initializer = tf.constant_initializer(dstd))
 
-        with tf.control_dependencies([t_sel]):
+        with tf.control_dependencies([t_sel, t_original_sel]):
             coord = tf.reshape (coord_, [-1, natoms[1] * 3])
         box   = tf.reshape (box_, [-1, 9])
         atype = tf.reshape (atype_, [-1, natoms[1]])
@@ -866,6 +869,11 @@ def init_variables(self,
             The suffix of the scope
         """
         super().init_variables(model_file=model_file, suffix=suffix)
+        try:
+            self.original_sel = get_tensor_by_name(model_file, 'descrpt_attr%s/original_sel' % suffix)
+        except GraphWithoutTensorError:
+            # original_sel is not restored in old graphs, assume sel never changed before
+            pass
         # check sel == original sel?
         try:
             sel = get_tensor_by_name(model_file, 'descrpt_attr%s/sel' % suffix)
@@ -897,4 +905,5 @@ def init_variables(self,
                         # new size is larger, copy all, the rest remains 1
                         new_dstd[:, ii:ii+oo] = self.dstd[:, jj:jj+oo]
                 self.dstd = new_dstd
-                self.original_sel = sel
+                if self.original_sel is None:
+                    self.original_sel = sel

From 1503fc3a10eb4b875431016d05285e51a5f22af9 Mon Sep 17 00:00:00 2001
From: Denghui Lu <denghuilu@pku.edu.cn>
Date: Thu, 17 Mar 2022 07:06:24 +0800
Subject: [PATCH 07/29] fix model compression bug of nan output (#1575)

---
 source/lib/src/cuda/tabulate.cu | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/source/lib/src/cuda/tabulate.cu b/source/lib/src/cuda/tabulate.cu
index 538f750a57..b9b1f80a0e 100644
--- a/source/lib/src/cuda/tabulate.cu
+++ b/source/lib/src/cuda/tabulate.cu
@@ -164,7 +164,7 @@ __global__ void tabulate_fusion_se_a_fifth_order_polynomial(
   int breakpoint = nnei - 1;
 
   FPTYPE sum[MTILE] = {0.f};
-  int mark_table_idx = 0;
+  int mark_table_idx = -1;
   FPTYPE var[6];
   for (int ii = 0; ii < nnei; ii++) {
     FPTYPE xx = em_x[block_idx * nnei + ii];
@@ -518,7 +518,7 @@ __global__ void tabulate_fusion_se_r_fifth_order_polynomial(
   const int block_idx = blockIdx.x;   // nloc
   const int thread_idx = threadIdx.x; // last_layer_size
 
-  int mark_table_idx = 0;
+  int mark_table_idx = -1;
   FPTYPE var[6];
   for (int ii = 0; ii < nnei; ii++) {
     FPTYPE xx = em[block_idx * nnei + ii];
@@ -528,6 +528,7 @@ __global__ void tabulate_fusion_se_r_fifth_order_polynomial(
       load_polynomial_params(var, table, table_idx, thread_idx, last_layer_size);
     }
     out[block_idx * nnei * last_layer_size + ii * last_layer_size + thread_idx] = var[0] + (var[1] + (var[2] + (var[3] + (var[4] + var[5] * xx) * xx) * xx) * xx) * xx;
+    mark_table_idx = table_idx;
   }
 }
 

From c49533eadb4502a173f7b5a79aeccfca3c216811 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Tue, 22 Mar 2022 21:11:11 -0400
Subject: [PATCH 08/29] support `dp convert-from 1.1` (#1587)

* support `dp convert-from 1.1`

#1583
It looks like there's no breaking changes between v1.1 and v1.2.

* update doc
---
 deepmd/entrypoints/convert.py              | 3 ++-
 deepmd/entrypoints/main.py                 | 2 +-
 doc/troubleshooting/model-compatability.md | 2 +-
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/deepmd/entrypoints/convert.py b/deepmd/entrypoints/convert.py
index 3f277c5134..782bb89241 100644
--- a/deepmd/entrypoints/convert.py
+++ b/deepmd/entrypoints/convert.py
@@ -7,7 +7,8 @@ def convert(
     output_model: str,
     **kwargs,
 ):
-    if FROM == '1.2':
+    if FROM in ['1.1', '1.2']:
+        # no difference between 1.1 and 1.2
         convert_12_to_21(input_model, output_model)
     elif FROM == '1.3':
         convert_13_to_21(input_model, output_model)
diff --git a/deepmd/entrypoints/main.py b/deepmd/entrypoints/main.py
index 3a9d3991ce..043e6523df 100644
--- a/deepmd/entrypoints/main.py
+++ b/deepmd/entrypoints/main.py
@@ -392,7 +392,7 @@ def parse_args(args: Optional[List[str]] = None):
     parser_transform.add_argument(
         'FROM',
         type = str,
-        choices = ['1.2', '1.3', '2.0'],
+        choices = ['1.1', '1.2', '1.3', '2.0'],
         help="The original model compatibility",
     )
     parser_transform.add_argument(
diff --git a/doc/troubleshooting/model-compatability.md b/doc/troubleshooting/model-compatability.md
index 2b7e46a4b7..5c0aa11889 100644
--- a/doc/troubleshooting/model-compatability.md
+++ b/doc/troubleshooting/model-compatability.md
@@ -8,7 +8,7 @@ One can execute `dp convert-from` to convert an old model to a new one.
 
 | Model version | v0.12 | v1.0 | v1.1 | v1.2 | v1.3 | v2.0 | v2.1 |
 |:-:|:-----------:|:----------:|:----------:|:----------:|:----------:|:----------:|:----------:|
-| Compatibility  | 😢 | 😢 | 😢 | 😊 | 😊 | 😄 | 😄 |
+| Compatibility  | 😢 | 😢 | 😊 | 😊 | 😊 | 😄 | 😄 |
 
 **Legend**:
 - 😄: The model is compatible with the DeePMD-kit package.

From 470f829dfbd6e9adb9d021ded1e410097a6f8213 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Thu, 24 Mar 2022 21:53:18 -0400
Subject: [PATCH 09/29] bump default LAMMPS version to
 `stable_29Sep2021_update3` (#1596)

replace `stable_29Sep2021_update2` with `stable_29Sep2021_update3`
---
 doc/install/install-lammps.md  | 18 +++++++++---------
 source/install/build_cc.sh     |  2 +-
 source/install/build_lammps.sh |  2 +-
 3 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/doc/install/install-lammps.md b/doc/install/install-lammps.md
index 97122a3bac..3371708c13 100644
--- a/doc/install/install-lammps.md
+++ b/doc/install/install-lammps.md
@@ -12,12 +12,12 @@ make lammps
 DeePMD-kit will generate a module called `USER-DEEPMD` in the `build` directory. If you need the low precision version, move `env_low.sh` to `env.sh` in the directory. Now download the LAMMPS code (`29Oct2020` or later), and uncompress it:
 ```bash
 cd /some/workspace
-wget https://github.com/lammps/lammps/archive/stable_29Sep2021_update2.tar.gz
-tar xf stable_29Sep2021_update2.tar.gz
+wget https://github.com/lammps/lammps/archive/stable_29Sep2021_update3.tar.gz
+tar xf stable_29Sep2021_update3.tar.gz
 ```
-The source code of LAMMPS is stored in directory `lammps-stable_29Sep2021_update2`. Now go into the LAMMPS code and copy the DeePMD-kit module like this
+The source code of LAMMPS is stored in directory `lammps-stable_29Sep2021_update3`. Now go into the LAMMPS code and copy the DeePMD-kit module like this
 ```bash
-cd lammps-stable_29Sep2021_update2/src/
+cd lammps-stable_29Sep2021_update3/src/
 cp -r $deepmd_source_dir/source/build/USER-DEEPMD .
 ```
 Now build LAMMPS
@@ -43,15 +43,15 @@ Starting from `8Apr2021`, LAMMPS also provides a plugin mode, allowing one to bu
 Now download the LAMMPS code (`8Apr2021` or later), and uncompress it:
 ```bash
 cd /some/workspace
-wget https://github.com/lammps/lammps/archive/stable_29Sep2021_update2.tar.gz
-tar xf stable_29Sep2021_update2.tar.gz
+wget https://github.com/lammps/lammps/archive/stable_29Sep2021_update3.tar.gz
+tar xf stable_29Sep2021_update3.tar.gz
 ```
 
-The source code of LAMMPS is stored in directory `lammps-stable_29Sep2021_update2`. Now go into the LAMMPS directory and create a directory called `build`
+The source code of LAMMPS is stored in directory `lammps-stable_29Sep2021_update3`. Now go into the LAMMPS directory and create a directory called `build`
 
 ```bash
-mkdir -p lammps-stable_29Sep2021_update2/build/
-cd lammps-stable_29Sep2021_update2/build/
+mkdir -p lammps-stable_29Sep2021_update3/build/
+cd lammps-stable_29Sep2021_update3/build/
 ```
 Now build LAMMPS. Note that `PLUGIN` and `KSPACE` package must be enabled, and `BUILD_SHARED_LIBS` must be set to `yes`. You can install any other package you want.
 ```bash
diff --git a/source/install/build_cc.sh b/source/install/build_cc.sh
index a4da73651f..19686b83e7 100755
--- a/source/install/build_cc.sh
+++ b/source/install/build_cc.sh
@@ -20,7 +20,7 @@ NPROC=$(nproc --all)
 BUILD_TMP_DIR=${SCRIPT_PATH}/../build
 mkdir -p ${BUILD_TMP_DIR}
 cd ${BUILD_TMP_DIR}
-cmake -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} -DINSTALL_TENSORFLOW=TRUE ${CUDA_ARGS} -DLAMMPS_VERSION=stable_29Sep2021_update2 -DUSE_TTM=TRUE ..
+cmake -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} -DINSTALL_TENSORFLOW=TRUE ${CUDA_ARGS} -DLAMMPS_VERSION=stable_29Sep2021_update3 -DUSE_TTM=TRUE ..
 make -j${NPROC}
 make install
 
diff --git a/source/install/build_lammps.sh b/source/install/build_lammps.sh
index 8ab3475e64..91c2fc1cd4 100755
--- a/source/install/build_lammps.sh
+++ b/source/install/build_lammps.sh
@@ -15,7 +15,7 @@ BUILD_TMP_DIR=${SCRIPT_PATH}/../build_lammps
 mkdir -p ${BUILD_TMP_DIR}
 cd ${BUILD_TMP_DIR}
 # download LAMMMPS
-LAMMPS_VERSION=stable_29Sep2021_update2
+LAMMPS_VERSION=stable_29Sep2021_update3
 if [ ! -d "lammps-${LAMMPS_VERSION}" ]
 then
 	curl -L -o lammps.tar.gz https://github.com/lammps/lammps/archive/refs/tags/${LAMMPS_VERSION}.tar.gz

From c17873dec592d7c592a10c634e917eae88e3c731 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Thu, 24 Mar 2022 22:15:55 -0400
Subject: [PATCH 10/29] add atom energy bias to type embedding energy (#1592)

* add atom energy bias to type embedding energy

Fix #684, where systems have different `atom_numb`. After this fix, RMSE should be quickly decreased in the very beginning.

* looks like tf.repeat is unavaiable in old TF...

* add statement `self.bias_atom_e is not None`
---
 deepmd/fit/ener.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/deepmd/fit/ener.py b/deepmd/fit/ener.py
index d1e2fb655f..ac207b4df9 100644
--- a/deepmd/fit/ener.py
+++ b/deepmd/fit/ener.py
@@ -1,6 +1,7 @@
 import warnings
 import numpy as np
 from typing import Tuple, List
+from packaging.version import Version
 
 from deepmd.env import tf
 from deepmd.common import add_data_requirement, get_activation_func, get_precision, ACTIVATION_FN_DICT, PRECISION_DICT, docstring_parameter, cast_precision
@@ -11,7 +12,7 @@
 from deepmd.fit.fitting import Fitting
 
 from deepmd.env import global_cvt_2_tf_float
-from deepmd.env import GLOBAL_TF_FLOAT_PRECISION
+from deepmd.env import GLOBAL_TF_FLOAT_PRECISION, TF_VERSION
 
 class EnerFitting (Fitting):
     r"""Fitting the energy of the system. The force and the virial can also be trained.
@@ -490,6 +491,11 @@ def build (self,
                 bias_atom_e=0.0, suffix=suffix, reuse=reuse
             )
             outs = tf.reshape(final_layer, [tf.shape(inputs)[0], natoms[0]])
+            # add atom energy bias; TF will broadcast to all batches
+            # tf.repeat is avaiable in TF>=2.1 or TF 1.15
+            _TF_VERSION = Version(TF_VERSION)
+            if (Version('1.15') <= _TF_VERSION < Version('2') or _TF_VERSION >= Version('2.1')) and self.bias_atom_e is not None:
+                outs += tf.repeat(tf.constant(self.bias_atom_e, dtype=self.fitting_precision), natoms[2:])
 
         if self.tot_ener_zero:
             force_tot_ener = 0.0

From 5d794718fb9114b0b91e830f7772835549287dda Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Fri, 25 Mar 2022 00:58:10 -0400
Subject: [PATCH 11/29] compile CUDA code for all archs (#1595)

---
 source/lib/src/cuda/CMakeLists.txt | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/source/lib/src/cuda/CMakeLists.txt b/source/lib/src/cuda/CMakeLists.txt
index 6941ba9532..ee9791083e 100644
--- a/source/lib/src/cuda/CMakeLists.txt
+++ b/source/lib/src/cuda/CMakeLists.txt
@@ -24,14 +24,9 @@ endif ()
 
 message(STATUS "CUDA major version is " ${CUDA_VERSION_MAJOR})
 
-if (${CUDA_VERSION_MAJOR} GREATER "11")
+if (${CUDA_VERSION_MAJOR} GREATER "11" OR (${CUDA_VERSION_MAJOR} STREQUAL "11" AND ${CUDA_VERSION_MINOR} STREQUAL "5"))
     # nvcc flags
-    set(CUDA_NVCC_FLAGS -gencode arch=compute_60,code=sm_60; # Pascal – GP100/Tesla P100 – DGX-1 (Generic Pascal)
-                        -gencode arch=compute_61,code=sm_61; # Pascal - GTX 1080, GTX 1070, GTX 1060, GTX 1050, GTX 1030, Titan Xp, Tesla P40, Tesla P4, Discrete GPU on the NVIDIA Drive PX2
-                        -gencode arch=compute_70,code=sm_70; # Volta  - GV100/Tesla V100, GTX 1180 (GV104)
-                        -gencode arch=compute_75,code=sm_75; # Turing - RTX 2080, Titan RTX, Quadro R8000
-                        -gencode arch=compute_80,code=sm_80; # Anpere - A100
-                        -gencode arch=compute_86,code=sm_86; # Anpere - RTX 3090
+    set(CUDA_NVCC_FLAGS -arch=all; # embeds a compiled code image for all supported architectures (sm_*), and a PTX program for the highest major virtual architecture
                         -O3; -Xcompiler -fPIC;
         )
 elseif (${CUDA_VERSION_MAJOR} STREQUAL "11" AND ${CUDA_VERSION_MINOR} GREATER "0")

From 27c297c9b0b8cbe6fa63aca757dedbb1a4d171a4 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Fri, 25 Mar 2022 04:44:13 -0400
Subject: [PATCH 12/29] support dp convert-from 1.0 (#1597)

#1583
The commit also contains some common docstring fixes.
---
 deepmd/entrypoints/convert.py              |   6 +-
 deepmd/entrypoints/main.py                 |   2 +-
 deepmd/utils/convert.py                    | 139 ++++++++++++++++++++-
 deepmd/utils/plugin.py                     |   2 +-
 doc/troubleshooting/model-compatability.md |   2 +-
 5 files changed, 145 insertions(+), 6 deletions(-)

diff --git a/deepmd/entrypoints/convert.py b/deepmd/entrypoints/convert.py
index 782bb89241..aa602dbed4 100644
--- a/deepmd/entrypoints/convert.py
+++ b/deepmd/entrypoints/convert.py
@@ -1,4 +1,4 @@
-from deepmd.utils.convert import convert_20_to_21, convert_13_to_21, convert_12_to_21 
+from deepmd.utils.convert import convert_10_to_21, convert_20_to_21, convert_13_to_21, convert_12_to_21 
 
 def convert(
     *,
@@ -7,7 +7,9 @@ def convert(
     output_model: str,
     **kwargs,
 ):
-    if FROM in ['1.1', '1.2']:
+    if FROM == '1.0':
+        convert_10_to_21(input_model, output_model)
+    elif FROM in ['1.1', '1.2']:
         # no difference between 1.1 and 1.2
         convert_12_to_21(input_model, output_model)
     elif FROM == '1.3':
diff --git a/deepmd/entrypoints/main.py b/deepmd/entrypoints/main.py
index 043e6523df..46bdad05de 100644
--- a/deepmd/entrypoints/main.py
+++ b/deepmd/entrypoints/main.py
@@ -392,7 +392,7 @@ def parse_args(args: Optional[List[str]] = None):
     parser_transform.add_argument(
         'FROM',
         type = str,
-        choices = ['1.1', '1.2', '1.3', '2.0'],
+        choices = ['1.0', '1.1', '1.2', '1.3', '2.0'],
         help="The original model compatibility",
     )
     parser_transform.add_argument(
diff --git a/deepmd/utils/convert.py b/deepmd/utils/convert.py
index b17178c761..2c9a653002 100644
--- a/deepmd/utils/convert.py
+++ b/deepmd/utils/convert.py
@@ -3,7 +3,36 @@
 from google.protobuf import text_format
 from tensorflow.python.platform import gfile
 
+
+def convert_13_to_21(input_model: str, output_model: str):
+    """Convert DP 1.3 graph to 2.1 graph.
+    
+    Parameters
+    ----------
+    input_model : str
+        filename of the input graph
+    output_model : str
+        filename of the output graph
+    """
+    convert_pb_to_pbtxt(input_model, 'frozen_model.pbtxt')
+    convert_dp13_to_dp20('frozen_model.pbtxt')
+    convert_dp20_to_dp21('frozen_model.pbtxt')
+    convert_pbtxt_to_pb('frozen_model.pbtxt', output_model)
+    if os.path.isfile('frozen_model.pbtxt'):
+        os.remove('frozen_model.pbtxt')
+    print("the converted output model (2.1 support) is saved in %s" % output_model)
+
+
 def convert_13_to_21(input_model: str, output_model: str):
+    """Convert DP 1.3 graph to 2.1 graph.
+    
+    Parameters
+    ----------
+    input_model : str
+        filename of the input graph
+    output_model : str
+        filename of the output graph
+    """
     convert_pb_to_pbtxt(input_model, 'frozen_model.pbtxt')
     convert_dp13_to_dp20('frozen_model.pbtxt')
     convert_dp20_to_dp21('frozen_model.pbtxt')
@@ -12,8 +41,39 @@ def convert_13_to_21(input_model: str, output_model: str):
         os.remove('frozen_model.pbtxt')
     print("the converted output model (2.1 support) is saved in %s" % output_model)
 
+
 def convert_12_to_21(input_model: str, output_model: str):
+    """Convert DP 1.2 graph to 2.1 graph.
+    
+    Parameters
+    ----------
+    input_model : str
+        filename of the input graph
+    output_model : str
+        filename of the output graph
+    """
+    convert_pb_to_pbtxt(input_model, 'frozen_model.pbtxt')
+    convert_dp12_to_dp13('frozen_model.pbtxt')
+    convert_dp13_to_dp20('frozen_model.pbtxt')
+    convert_dp20_to_dp21('frozen_model.pbtxt')
+    convert_pbtxt_to_pb('frozen_model.pbtxt', output_model)
+    if os.path.isfile('frozen_model.pbtxt'):
+        os.remove('frozen_model.pbtxt')
+    print("the converted output model (2.1 support) is saved in %s" % output_model)
+
+
+def convert_10_to_21(input_model: str, output_model: str):
+    """Convert DP 1.0 graph to 2.1 graph.
+    
+    Parameters
+    ----------
+    input_model : str
+        filename of the input graph
+    output_model : str
+        filename of the output graph
+    """
     convert_pb_to_pbtxt(input_model, 'frozen_model.pbtxt')
+    convert_dp10_to_dp11('frozen_model.pbtxt')
     convert_dp12_to_dp13('frozen_model.pbtxt')
     convert_dp13_to_dp20('frozen_model.pbtxt')
     convert_dp20_to_dp21('frozen_model.pbtxt')
@@ -22,7 +82,17 @@ def convert_12_to_21(input_model: str, output_model: str):
         os.remove('frozen_model.pbtxt')
     print("the converted output model (2.1 support) is saved in %s" % output_model)
 
+
 def convert_20_to_21(input_model: str, output_model: str):
+    """Convert DP 2.0 graph to 2.1 graph.
+    
+    Parameters
+    ----------
+    input_model : str
+        filename of the input graph
+    output_model : str
+        filename of the output graph
+    """
     convert_pb_to_pbtxt(input_model, 'frozen_model.pbtxt')
     convert_dp20_to_dp21('frozen_model.pbtxt')
     convert_pbtxt_to_pb('frozen_model.pbtxt', output_model)
@@ -31,6 +101,15 @@ def convert_20_to_21(input_model: str, output_model: str):
     print("the converted output model (2.1 support) is saved in %s" % output_model)
 
 def convert_pb_to_pbtxt(pbfile: str, pbtxtfile: str):
+    """Convert DP graph to graph text.
+    
+    Parameters
+    ----------
+    pbfile : str
+        filename of the input graph
+    pbtxtfile : str
+        filename of the output graph text
+    """
     with gfile.FastGFile(pbfile, 'rb') as f:
         graph_def = tf.GraphDef()
         graph_def.ParseFromString(f.read())
@@ -38,6 +117,15 @@ def convert_pb_to_pbtxt(pbfile: str, pbtxtfile: str):
         tf.train.write_graph(graph_def, './', pbtxtfile, as_text=True)
 
 def convert_pbtxt_to_pb(pbtxtfile: str, pbfile: str):
+    """Convert DP graph text to graph.
+    
+    Parameters
+    ----------
+    pbtxtfile : str
+        filename of the input graph text
+    pbfile : str
+        filename of the output graph
+    """
     with tf.gfile.FastGFile(pbtxtfile, 'r') as f:
         graph_def = tf.GraphDef()
         file_content = f.read()
@@ -45,7 +133,48 @@ def convert_pbtxt_to_pb(pbtxtfile: str, pbfile: str):
         text_format.Merge(file_content, graph_def)
         tf.train.write_graph(graph_def, './', pbfile, as_text=False)
 
-def convert_dp12_to_dp13(file):
+
+def convert_dp10_to_dp11(file: str):
+    """Convert DP 1.0 graph text to 1.1 graph text.
+    
+    Parameters
+    ----------
+    file : str
+        filename of the graph text
+    """
+    with open(file, 'a') as f:
+        f.write("""
+node {
+  name: "fitting_attr/daparam"
+  op: "Const"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+        }
+        int_val: 0
+      }
+    }                                                                                                                                                 }
+}
+""")
+
+
+def convert_dp12_to_dp13(file: str):
+    """Convert DP 1.2 graph text to 1.3 graph text.
+    
+    Parameters
+    ----------
+    file : str
+        filename of the graph text
+    """
     file_data = ""
     with open(file, "r", encoding="utf-8") as f:
         ii = 0
@@ -67,7 +196,15 @@ def convert_dp12_to_dp13(file):
     with open(file, "w", encoding="utf-8") as f:
         f.write(file_data)
 
+
 def convert_dp13_to_dp20(fname: str):
+    """Convert DP 1.3 graph text to 2.0 graph text.
+    
+    Parameters
+    ----------
+    file : str
+        filename of the graph text
+    """
     with open(fname) as fp:
         file_content = fp.read()
     file_content += """
diff --git a/deepmd/utils/plugin.py b/deepmd/utils/plugin.py
index f195b7808c..6a40e69fab 100644
--- a/deepmd/utils/plugin.py
+++ b/deepmd/utils/plugin.py
@@ -52,7 +52,7 @@ def get_plugin(self, key) -> object:
         
         Parameters
         ----------
-        key ： str
+        key : str
             key of the plugin
         
         Returns
diff --git a/doc/troubleshooting/model-compatability.md b/doc/troubleshooting/model-compatability.md
index 5c0aa11889..820a79210f 100644
--- a/doc/troubleshooting/model-compatability.md
+++ b/doc/troubleshooting/model-compatability.md
@@ -8,7 +8,7 @@ One can execute `dp convert-from` to convert an old model to a new one.
 
 | Model version | v0.12 | v1.0 | v1.1 | v1.2 | v1.3 | v2.0 | v2.1 |
 |:-:|:-----------:|:----------:|:----------:|:----------:|:----------:|:----------:|:----------:|
-| Compatibility  | 😢 | 😢 | 😊 | 😊 | 😊 | 😄 | 😄 |
+| Compatibility  | 😢 | 😊 | 😊 | 😊 | 😊 | 😄 | 😄 |
 
 **Legend**:
 - 😄: The model is compatible with the DeePMD-kit package.

From ce1ad2a71034f1ed3d50bb3a696b79baa2feaa51 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Fri, 25 Mar 2022 04:44:40 -0400
Subject: [PATCH 13/29] patch #1595: GREATER_EQUAL (#1598)

the version should be >=11.5 instead of ==11.5
---
 source/lib/src/cuda/CMakeLists.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/source/lib/src/cuda/CMakeLists.txt b/source/lib/src/cuda/CMakeLists.txt
index ee9791083e..2cf2d59a1a 100644
--- a/source/lib/src/cuda/CMakeLists.txt
+++ b/source/lib/src/cuda/CMakeLists.txt
@@ -1,5 +1,5 @@
 # required cmake version
-cmake_minimum_required(VERSION 3.3)
+cmake_minimum_required(VERSION 3.7)
 # project name
 project(deepmd_op_cuda)
 
@@ -24,7 +24,7 @@ endif ()
 
 message(STATUS "CUDA major version is " ${CUDA_VERSION_MAJOR})
 
-if (${CUDA_VERSION_MAJOR} GREATER "11" OR (${CUDA_VERSION_MAJOR} STREQUAL "11" AND ${CUDA_VERSION_MINOR} STREQUAL "5"))
+if (${CUDA_VERSION_MAJOR} GREATER "11" OR (${CUDA_VERSION_MAJOR} STREQUAL "11" AND ${CUDA_VERSION_MINOR} GREATER_EQUAL "5"))
     # nvcc flags
     set(CUDA_NVCC_FLAGS -arch=all; # embeds a compiled code image for all supported architectures (sm_*), and a PTX program for the highest major virtual architecture
                         -O3; -Xcompiler -fPIC;

From 1aa5041d023af44c4269668c7bfa4a232a2ba5e8 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Sat, 26 Mar 2022 23:41:01 -0400
Subject: [PATCH 14/29] fix lammps plugin creator pointer (#1602)

`compute` and `fix` style should use `creator.v2` - see https://docs.lammps.org/stable/Developer_plugins.html
(however, it looks like there is nothing wrong with v1)
---
 source/lmp/plugin/deepmdplugin.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/source/lmp/plugin/deepmdplugin.cpp b/source/lmp/plugin/deepmdplugin.cpp
index bc54b8ea46..181889fd63 100644
--- a/source/lmp/plugin/deepmdplugin.cpp
+++ b/source/lmp/plugin/deepmdplugin.cpp
@@ -41,12 +41,12 @@ extern "C" void lammpsplugin_init(void *lmp, void *handle, void *regfunc)
   plugin.style = "compute";
   plugin.name = "deeptensor/atom";
   plugin.info = "compute deeptensor/atom v2.0";
-  plugin.creator.v1 = (lammpsplugin_factory1 *) &computedeepmdtensoratom;
+  plugin.creator.v2 = (lammpsplugin_factory2 *) &computedeepmdtensoratom;
   (*register_plugin)(&plugin, lmp);
 
   plugin.style = "fix";
   plugin.name = "dplr";
   plugin.info = "fix dplr v2.0";
-  plugin.creator.v1 = (lammpsplugin_factory1 *) &fixdplr;
+  plugin.creator.v2 = (lammpsplugin_factory2 *) &fixdplr;
   (*register_plugin)(&plugin, lmp);
 }

From 785cfa72a88d4a57d39b04a381e5eb46c2770b65 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Sun, 27 Mar 2022 00:25:44 -0400
Subject: [PATCH 15/29] follow API changes from latest LAMMPS (#1601)

Fix #1599.
See: lammps/lammps#3148, lammps/lammps#3159.
---
 source/lmp/compute_deeptensor_atom.cpp |  4 ++++
 source/lmp/fix_dplr.cpp                |  4 ++++
 source/lmp/pair_deepmd.cpp             | 10 +++++++++-
 3 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/source/lmp/compute_deeptensor_atom.cpp b/source/lmp/compute_deeptensor_atom.cpp
index e29897266a..6030dee3fb 100644
--- a/source/lmp/compute_deeptensor_atom.cpp
+++ b/source/lmp/compute_deeptensor_atom.cpp
@@ -66,12 +66,16 @@ void ComputeDeeptensorAtom::init()
 {
   // need an occasional full neighbor list
 
+#if LAMMPS_VERSION_NUMBER>=20220324
+  neighbor->add_request(this, NeighConst::REQ_FULL | NeighConst::REQ_OCCASIONAL);
+#else
   int irequest = neighbor->request(this,instance_me);
   neighbor->requests[irequest]->half = 0;
   neighbor->requests[irequest]->pair = 0;
   neighbor->requests[irequest]->compute = 1;
   neighbor->requests[irequest]->full = 1;
   neighbor->requests[irequest]->occasional = 1;
+#endif
 }
 
 void ComputeDeeptensorAtom::init_list(int /*id*/, NeighList *ptr)
diff --git a/source/lmp/fix_dplr.cpp b/source/lmp/fix_dplr.cpp
index 1d2323a027..21f0179625 100644
--- a/source/lmp/fix_dplr.cpp
+++ b/source/lmp/fix_dplr.cpp
@@ -442,7 +442,11 @@ void FixDPLR::post_force(int vflag)
   // backward communication of fcorr
   dfcorr_buff.resize(dfcorr.size());
   copy(dfcorr.begin(), dfcorr.end(), dfcorr_buff.begin());
+#if LAMMPS_VERSION_NUMBER>=20220324
+  comm->reverse_comm(this,3);
+#else
   comm->reverse_comm_fix(this,3);
+#endif
   copy(dfcorr_buff.begin(), dfcorr_buff.end(), dfcorr.begin());
   // // check and print
   // cout << "-------------------- fix/dplr: post force " << endl;
diff --git a/source/lmp/pair_deepmd.cpp b/source/lmp/pair_deepmd.cpp
index 17ea766ae2..096f379ca0 100644
--- a/source/lmp/pair_deepmd.cpp
+++ b/source/lmp/pair_deepmd.cpp
@@ -558,7 +558,11 @@ void PairDeepMD::compute(int eflag, int vflag)
 	int rank = comm->me;
 	// std force 
 	if (newton_pair) {
-	  comm->reverse_comm_pair(this);
+#if LAMMPS_VERSION_NUMBER>=20220324
+	  comm->reverse_comm(this);
+#else
+    comm->reverse_comm_pair(this);
+#endif
 	}
 	vector<double> std_f;
 #ifdef HIGH_PREC
@@ -1035,10 +1039,14 @@ void PairDeepMD::coeff(int narg, char **arg)
 
 void PairDeepMD::init_style()
 {
+#if LAMMPS_VERSION_NUMBER>=20220324
+  neighbor->add_request(this, NeighConst::REQ_FULL);
+#else
   int irequest = neighbor->request(this,instance_me);
   neighbor->requests[irequest]->half = 0;
   neighbor->requests[irequest]->full = 1;  
   // neighbor->requests[irequest]->newton = 2;  
+#endif
   if (out_each == 1){
     int ntotal = atom->natoms;
     int nprocs = comm->nprocs;

From 8d4d93d1dc4101bf03176d94106b2f8ba5408a02 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Sun, 27 Mar 2022 20:44:31 -0400
Subject: [PATCH 16/29] add another way to load LAMMPS plugins (#1604)

* install plugin symlink

* fix symlink

* add doc
---
 doc/third-party/lammps-command.md |  8 ++++++++
 source/lmp/plugin/CMakeLists.txt  | 17 ++++++++++++++++-
 2 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/doc/third-party/lammps-command.md b/doc/third-party/lammps-command.md
index 96c40bbb5d..c32b018535 100644
--- a/doc/third-party/lammps-command.md
+++ b/doc/third-party/lammps-command.md
@@ -8,6 +8,14 @@ If you are using the plugin mode, enable DeePMD-kit package in LAMMPS with `plug
 plugin load libdeepmd_lmp.so
 ```
 
+After LAMMPS version `patch_24Mar2022`, another way to load plugins is to set the environmental variable `LAMMPS_PLUGIN_PATH`:
+
+```sh
+LAMMPS_PLUGIN_PATH=$deepmd_root/lib/deepmd_lmp
+```
+
+where `$deepmd_root` is the directory to [install C++ interface](../install/install-from-source.md).
+
 The built-in mode doesn't need this step.
 
 ## pair_style `deepmd`
diff --git a/source/lmp/plugin/CMakeLists.txt b/source/lmp/plugin/CMakeLists.txt
index 9cca8ca771..f6ce774ca5 100644
--- a/source/lmp/plugin/CMakeLists.txt
+++ b/source/lmp/plugin/CMakeLists.txt
@@ -78,10 +78,25 @@ if (DEFINED LAMMPS_SOURCE_ROOT OR DEFINED LAMMPS_VERSION)
 
   install(TARGETS ${libname} DESTINATION lib/)
 
+  if (${LAMMPS_VERSION_NUMBER} GREATER_EQUAL 20220324)
+    set(PLUGINNAME "dpplugin.so")
+    INSTALL(CODE "execute_process( \
+        COMMAND ${CMAKE_COMMAND} -E make_directory \
+		${CMAKE_INSTALL_PREFIX}/lib/${libname}/   \
+        )"
+	)
+    INSTALL(CODE "execute_process( \
+        COMMAND ${CMAKE_COMMAND} -E create_symlink \
+		../${CMAKE_SHARED_LIBRARY_PREFIX}${libname}${CMAKE_SHARED_LIBRARY_SUFFIX} \
+        ${CMAKE_INSTALL_PREFIX}/lib/${libname}/${PLUGINNAME}   \
+        )"
+    )
+  endif()
+
   endfunction()
   _add_lmp_plugin_variant("${HIGH_PREC_VARIANT}" "${HIGH_PREC_DEF}")
   _add_lmp_plugin_variant("${LOW_PREC_VARIANT}" "${LOW_PREC_DEF}")
 
 else()
   message(STATUS "disable LAMMPS plugin mode")
-endif()
\ No newline at end of file
+endif()

From c91a2bdc3ae7a093585e91efe7b8e6854a68b812 Mon Sep 17 00:00:00 2001
From: Han Wang <amcadmus@gmail.com>
Date: Mon, 28 Mar 2022 10:12:11 +0800
Subject: [PATCH 17/29] fix the bug introduced by lammps PR #2481 (#1605)

* fix the bug introduced by lammps PR #2481

* compatible with versions before 20201029

Co-authored-by: Han Wang <wang_han@iapcm.ac.cn>
---
 source/lmp/pair_deepmd.cpp | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/source/lmp/pair_deepmd.cpp b/source/lmp/pair_deepmd.cpp
index 096f379ca0..b9793ff7f9 100644
--- a/source/lmp/pair_deepmd.cpp
+++ b/source/lmp/pair_deepmd.cpp
@@ -238,7 +238,11 @@ PairDeepMD::PairDeepMD(LAMMPS *lmp)
     error->all(FLERR,"Pair deepmd requires metal unit, please set it by \"units metal\"");
   }
   restartinfo = 1;
+#if LAMMPS_VERSION_NUMBER>=20201130
+  centroidstressflag = CENTROID_AVAIL ; // set centroidstressflag = CENTROID_AVAIL to allow the use of the centroid/stress/atom. Added by Davide Tisi
+#else 
   centroidstressflag = 2 ; // set centroidstressflag = 2 to allow the use of the centroid/stress/atom. Added by Davide Tisi
+#endif
   pppmflag = 1;
   respa_enable = 0;
   writedata = 0;

From cec13a1783b710a4c614ce77a350812ae14cf402 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Mon, 28 Mar 2022 21:41:02 -0400
Subject: [PATCH 18/29] add kspace pppm/dplr to lmp plugin library (#1603)

* add kspace pppm/dplr to lmp plugin library

* update LAMMPS_VERSION_NUMBER
---
 source/lmp/plugin/deepmdplugin.cpp | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/source/lmp/plugin/deepmdplugin.cpp b/source/lmp/plugin/deepmdplugin.cpp
index 181889fd63..41f2e8557e 100644
--- a/source/lmp/plugin/deepmdplugin.cpp
+++ b/source/lmp/plugin/deepmdplugin.cpp
@@ -6,6 +6,9 @@
 #include "pair_deepmd.h"
 #include "fix_dplr.h"
 #include "compute_deeptensor_atom.h"
+#if LAMMPS_VERSION_NUMBER>=20220328
+#include "pppm_dplr.h"
+#endif
 
 using namespace LAMMPS_NS;
 
@@ -24,6 +27,13 @@ static Fix *fixdplr(LAMMPS *lmp, int narg, char **arg)
   return new FixDPLR(lmp, narg, arg);
 }
 
+#if LAMMPS_VERSION_NUMBER>=20220328
+static KSpace *pppmdplr(LAMMPS *lmp)
+{
+  return new PPPMDPLR(lmp);
+}
+#endif
+
 extern "C" void lammpsplugin_init(void *lmp, void *handle, void *regfunc)
 {
   lammpsplugin_t plugin;
@@ -49,4 +59,13 @@ extern "C" void lammpsplugin_init(void *lmp, void *handle, void *regfunc)
   plugin.info = "fix dplr v2.0";
   plugin.creator.v2 = (lammpsplugin_factory2 *) &fixdplr;
   (*register_plugin)(&plugin, lmp);
+
+#if LAMMPS_VERSION_NUMBER>=20220328
+  // lammps/lammps#
+  plugin.style = "kspace";
+  plugin.name = "pppm/dplr";
+  plugin.info = "kspace pppm/dplr v2.0";
+  plugin.creator.v1 = (lammpsplugin_factory1 *) &pppmdplr;
+  (*register_plugin)(&plugin, lmp);
+#endif
 }

From 398258a834a1235259c79c95241b0e073f86bf9a Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Mon, 28 Mar 2022 21:45:17 -0400
Subject: [PATCH 19/29] convert constants (which may change) to variables
 (#1606)

* convert constants (which may change) to variables

Following #1592, I noticed that constants cannot be recovered when restart training.

* add dtype to variables

* cast int32

* bugfix

* init atom_ener
---
 deepmd/descriptor/se_a.py | 2 +-
 deepmd/fit/ener.py        | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/deepmd/descriptor/se_a.py b/deepmd/descriptor/se_a.py
index a4a316b839..a71dee2709 100644
--- a/deepmd/descriptor/se_a.py
+++ b/deepmd/descriptor/se_a.py
@@ -837,7 +837,7 @@ def _filter(
               # shape[1] = nnei * 4
               nnei = shape[1] / 4
           else:
-              nnei = np.sum(self.original_sel)
+              nnei = tf.cast(tf.Variable(np.sum(self.original_sel), dtype=tf.int32, trainable=False, name="nnei"), self.filter_precision)
           xyz_scatter_1 = xyz_scatter_1 / nnei
           # natom x 4 x outputs_size_2
           xyz_scatter_2 = tf.slice(xyz_scatter_1, [0,0,0],[-1,-1,outputs_size_2])
diff --git a/deepmd/fit/ener.py b/deepmd/fit/ener.py
index ac207b4df9..5ac4354766 100644
--- a/deepmd/fit/ener.py
+++ b/deepmd/fit/ener.py
@@ -137,7 +137,7 @@ def __init__ (self,
             else:
                 self.atom_ener.append(None)
         self.useBN = False
-        self.bias_atom_e = None
+        self.bias_atom_e = np.zeros(self.ntypes, dtype=np.float64)
         # data requirement
         if self.numb_fparam > 0 :
             add_data_requirement('fparam', self.numb_fparam, atomic=False, must=True, high_prec=False)
@@ -495,7 +495,7 @@ def build (self,
             # tf.repeat is avaiable in TF>=2.1 or TF 1.15
             _TF_VERSION = Version(TF_VERSION)
             if (Version('1.15') <= _TF_VERSION < Version('2') or _TF_VERSION >= Version('2.1')) and self.bias_atom_e is not None:
-                outs += tf.repeat(tf.constant(self.bias_atom_e, dtype=self.fitting_precision), natoms[2:])
+                outs += tf.repeat(tf.Variable(self.bias_atom_e, dtype=self.fitting_precision, trainable=False, name="bias_atom_ei"), natoms[2:])
 
         if self.tot_ener_zero:
             force_tot_ener = 0.0

From c7d87431508c75ef25612ee38ba91786b750e48b Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Tue, 29 Mar 2022 21:19:21 -0400
Subject: [PATCH 20/29] add a graph optimizer to parallelize prod_force_a_cpu
 (#1429)

* add a graph optimizer to parallelize prod_force_a_cpu

This commit adds a custom graph optimizer to enable the parallelization for prod_force_a_cpu. The optimizer is on-the-fly and will not affect saved graph, so one can continue training or inference without the optimizer enable.
The OP is splited into multiple OPs, which are then added together, so it can be run across multiple steams, by controling TF_INTER_OP_PARALLELISM_THREADS. Without optimizer, there's only one steaming working and others are resting. A performance improvement of this OP can be obversed in the water example and also in my own case.
The optimizer is disabled by default and the new attribute of the OP has a default value, so there's no breaking change. One can enable it by environment variables.
A UT is added to confirm the result is not changed.

* only support TF v2 or v1.15

* fix test errors

* fix detecting TF version

* rename the optimizing OP, so optimizing attrs will not be saved into the graph
---
 deepmd/env.py                        |  15 ++-
 doc/train/training-advanced.md       |   1 +
 source/lib/include/prod_force.h      |   3 +-
 source/lib/src/prod_force.cc         |  11 +-
 source/op/CMakeLists.txt             |   3 +-
 source/op/optimizer/parallel.cc      | 151 +++++++++++++++++++++++++++
 source/op/optimizer/parallel.h       |  21 ++++
 source/op/prod_force_multi_device.cc |  42 +++++++-
 source/tests/test_prod_force.py      |  36 ++++++-
 9 files changed, 271 insertions(+), 12 deletions(-)
 create mode 100644 source/op/optimizer/parallel.cc
 create mode 100644 source/op/optimizer/parallel.h

diff --git a/deepmd/env.py b/deepmd/env.py
index 422092b635..a99dde53dd 100644
--- a/deepmd/env.py
+++ b/deepmd/env.py
@@ -8,6 +8,7 @@
 from imp import reload
 from pathlib import Path
 from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple
+from packaging.version import Version
 
 import numpy as np
 
@@ -48,6 +49,12 @@
 
 SHARED_LIB_MODULE = "op"
 
+# Python library version
+try:
+    tf_py_version = tf.version.VERSION
+except AttributeError:
+    tf_py_version = tf.__version__
+
 EMBEDDING_NET_PATTERN = str(
     r"filter_type_\d+/matrix_\d+_\d+|"
     r"filter_type_\d+/bias_\d+_\d+|"
@@ -171,6 +178,8 @@ def get_tf_session_config() -> Any:
         gpu_options=tf.GPUOptions(allow_growth=True),
         intra_op_parallelism_threads=intra, inter_op_parallelism_threads=inter
     )
+    if Version(tf_py_version) >= Version('1.15') and int(os.environ.get("DP_AUTO_PARALLELIZATION", 0)):
+        config.graph_options.rewrite_options.custom_optimizers.add().name = "dpparallel"
     return config
 
 
@@ -248,7 +257,7 @@ def get_module(module_name: str) -> "ModuleType":
             # different versions may cause incompatibility
             # see #406, #447, #557, #774, and #796 for example
             # throw a message if versions are different
-            if TF_VERSION != tf.version.VERSION:
+            if TF_VERSION != tf_py_version:
                 raise RuntimeError(
                     "The version of TensorFlow used to compile this "
                     "deepmd-kit package is %s, but the version of TensorFlow "
@@ -260,10 +269,10 @@ def get_module(module_name: str) -> "ModuleType":
                     "`pip install deepmd-kit --no-binary deepmd-kit` "
                     "instead." % (
                         TF_VERSION,
-                        tf.version.VERSION,
+                        tf_py_version,
                         module_name,
                         TF_VERSION,
-                        tf.version.VERSION,
+                        tf_py_version,
                     )) from e
             raise RuntimeError(
                 "This deepmd-kit package is inconsitent with TensorFlow "
diff --git a/doc/train/training-advanced.md b/doc/train/training-advanced.md
index bb6171b2ff..004c6709b7 100644
--- a/doc/train/training-advanced.md
+++ b/doc/train/training-advanced.md
@@ -150,6 +150,7 @@ One can set other environmental variables:
 | Environment variables | Allowed value          | Default value | Usage                      |
 | --------------------- | ---------------------- | ------------- | -------------------------- |
 | DP_INTERFACE_PREC     | `high`, `low`          | `high`        | Control high (double) or low (float) precision of training. |
+| DP_AUTO_PARALLELIZATION | 0, 1                 | 0             | Enable auto parallelization for CPU operators. |
 
 
 ## Adjust `sel` of a frozen model
diff --git a/source/lib/include/prod_force.h b/source/lib/include/prod_force.h
index 49530a1952..4d3629f2bf 100644
--- a/source/lib/include/prod_force.h
+++ b/source/lib/include/prod_force.h
@@ -10,7 +10,8 @@ void prod_force_a_cpu(
     const int * nlist, 
     const int nloc, 
     const int nall, 
-    const int nnei);
+    const int nnei,
+    const int start_index=0);
 
 template<typename FPTYPE>
 void prod_force_r_cpu(
diff --git a/source/lib/src/prod_force.cc b/source/lib/src/prod_force.cc
index a21d33cb91..6859a5bae3 100644
--- a/source/lib/src/prod_force.cc
+++ b/source/lib/src/prod_force.cc
@@ -30,14 +30,15 @@ prod_force_a_cpu(
     const int * nlist, 
     const int nloc, 
     const int nall, 
-    const int nnei) 
+    const int nnei,
+    const int start_index) 
 {
   const int ndescrpt = 4 * nnei;
 
   memset(force, 0, sizeof(FPTYPE) * nall * 3);
   // compute force of a frame
   #pragma omp parallel
-  for (int i_idx = 0; i_idx < nloc; ++i_idx) {
+  for (int i_idx = start_index; i_idx < start_index + nloc; ++i_idx) {
     // deriv wrt center atom
     #pragma omp single
     for (int aa = 0; aa < ndescrpt; ++aa) {
@@ -71,7 +72,8 @@ prod_force_a_cpu<double>(
     const int * nlist, 
     const int nloc, 
     const int nall, 
-    const int nnei);
+    const int nnei,
+    const int start_index);
 
 template
 void 
@@ -83,7 +85,8 @@ prod_force_a_cpu<float>(
     const int * nlist, 
     const int nloc, 
     const int nall, 
-    const int nnei);
+    const int nnei,
+    const int start_index);
 
 
 template<typename FPTYPE>
diff --git a/source/op/CMakeLists.txt b/source/op/CMakeLists.txt
index 8b483b2e5d..e43c66da13 100644
--- a/source/op/CMakeLists.txt
+++ b/source/op/CMakeLists.txt
@@ -6,6 +6,7 @@ set (OP_CXX_FLAG -D_GLIBCXX_USE_CXX11_ABI=${OP_CXX_ABI} )
 file(GLOB OP_SRC custom_op.cc prod_force.cc prod_virial.cc descrpt.cc descrpt_se_a_ef.cc descrpt_se_a_ef.cc descrpt_se_a_ef_para.cc descrpt_se_a_ef_vert.cc pair_tab.cc prod_force_multi_device.cc prod_virial_multi_device.cc soft_min.cc soft_min_force.cc soft_min_virial.cc ewald_recp.cc gelu_multi_device.cc map_aparam.cc neighbor_stat.cc unaggregated_grad.cc tabulate_multi_device.cc prod_env_mat_multi_device.cc)
 file(GLOB OP_GRADS_SRC custom_op.cc prod_force_grad.cc prod_force_grad_multi_device.cc prod_virial_grad.cc prod_virial_grad_multi_device.cc soft_min_force_grad.cc soft_min_virial_grad.cc )
 file(GLOB OP_PY *.py)
+file(GLOB OP_REMAPPER_SRC optimizer/parallel.cc)
 
 if (BUILD_CPP_IF) 
   add_library(${LIB_DEEPMD_OP} MODULE ${OP_SRC})
@@ -17,7 +18,7 @@ if (BUILD_CPP_IF)
 endif (BUILD_CPP_IF)
 
 if (BUILD_PY_IF)
-  add_library(op_abi MODULE ${OP_SRC} ${OP_LIB})
+  add_library(op_abi MODULE ${OP_SRC} ${OP_LIB} ${OP_REMAPPER_SRC})
   add_library(op_grads MODULE ${OP_GRADS_SRC})
   
   message(STATUS ${TensorFlowFramework_LIBRARY})
diff --git a/source/op/optimizer/parallel.cc b/source/op/optimizer/parallel.cc
new file mode 100644
index 0000000000..3244226514
--- /dev/null
+++ b/source/op/optimizer/parallel.cc
@@ -0,0 +1,151 @@
+// only support v1.15 or v2
+#if TF_MAJOR_VERSION >= 2 && (TF_MAJOR_VERSION == 1 || TF_MINOR_VERSION >= 15)
+
+#include "parallel.h"
+
+#include "tensorflow/core/grappler/devices.h"
+#include "tensorflow/core/grappler/graph_view.h"
+#include "tensorflow/core/grappler/grappler_item.h"
+#include "tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.h"
+#include "tensorflow/core/grappler/utils.h"
+#include "tensorflow/core/grappler/utils/graph_view.h"
+#include "tensorflow/core/platform/cpu_info.h"
+#include "tensorflow/core/protobuf/rewriter_config.pb.h"
+#include "tensorflow/core/util/env_var.h"
+#include "tensorflow/core/util/util.h"
+
+// based on tensorflow/core/grappler/optimizers/remapper.cc
+
+struct RemapperContext {
+  explicit RemapperContext(GrapplerItem *item, Status *status)
+      : nodes_to_preserve(item->NodesToPreserve()),
+        graph_view(&item->graph, status) {}
+
+  std::unordered_set<std::string> nodes_to_preserve;
+  utils::MutableGraphView graph_view;
+};
+
+bool IsProdForce(const NodeDef &node) { return node.op() == "ProdForceSeA"; }
+
+bool FindProdForce(RemapperContext *ctx, int node_index) {
+  const auto *node_view = ctx->graph_view.GetNode(node_index);
+  const auto *node_def = node_view->node();
+  return IsProdForce(*node_def);
+}
+
+int64_t GetNThreads() {
+  // the number of threads is based on the session...
+  // For convenience, we use environment variable directly
+  int64_t tot = 1;
+  Status status =
+      ReadInt64FromEnvVar("TF_INTER_OP_PARALLELISM_THREADS", 1, &tot);
+  if (!status.ok()) {
+    tot = 1;
+  }
+  return tot;
+}
+
+Status ParallelProdForce(RemapperContext *ctx, int node_index,
+                         std::vector<bool> *invalidated_nodes,
+                         std::vector<bool> *nodes_to_delete) {
+  // skip on GPUs
+  if (GetNumAvailableGPUs() > 0)
+    return Status::OK();
+
+  const NodeDef *ori_node = ctx->graph_view.GetNode(node_index)->node();
+  auto &src_attr = ori_node->attr();
+  int64_t tot = GetNThreads();
+  if (tot <= 1)
+    return Status::OK();
+
+  NodeDef sum_node;
+  sum_node.set_name(ori_node->name());
+  sum_node.set_op("AddN");
+  sum_node.set_device(ori_node->device());
+  auto *sum_attr = sum_node.mutable_attr();
+  (*sum_attr)["N"].set_i(tot);
+  (*sum_attr)["T"] = src_attr.at("T");
+
+  utils::Mutation *mutation = ctx->graph_view.GetMutationBuilder();
+  Status status;
+
+  for (int ii = 0; ii < tot; ++ii) {
+    NodeDef sub_node;
+    sub_node.set_name(ori_node->name() + "/sub_" + std::to_string(ii));
+    sub_node.set_op("ParallelProdForceSeA");
+    sub_node.set_device(ori_node->device());
+    // copy input
+    for (int jj = 0; jj < 4; ++jj)
+      sub_node.add_input(ori_node->input(jj));
+    // set frac
+    auto *sub_attr = sub_node.mutable_attr();
+    (*sub_attr)["T"] = src_attr.at("T");
+    (*sub_attr)["n_a_sel"] = src_attr.at("n_a_sel");
+    (*sub_attr)["n_r_sel"] = src_attr.at("n_r_sel");
+    (*sub_attr)["parallel"].set_b(true);
+    (*sub_attr)["start_frac"].set_f((float)ii / (float)tot);
+    (*sub_attr)["end_frac"].set_f((float)(ii + 1) / (float)tot);
+    sum_node.add_input(sub_node.name());
+    mutation->AddNode(std::move(sub_node), &status);
+  }
+
+  mutation->AddNode(std::move(sum_node), &status);
+  TF_RETURN_IF_ERROR(status);
+  TF_RETURN_IF_ERROR(mutation->Apply());
+  (*invalidated_nodes)[node_index] = true;
+
+  return Status::OK();
+}
+
+Status DPParallel::Optimize(Cluster *cluster, const GrapplerItem &item,
+                            GraphDef *optimized_graph) {
+  GrapplerItem mutable_item = item;
+  Status status;
+  RemapperContext ctx(&mutable_item, &status);
+  TF_RETURN_IF_ERROR(status);
+  // Processing graph in reverse-topological sorted order allows to remap
+  // longer chains of dependent ops in one pass.
+  TF_RETURN_IF_ERROR(
+      ctx.graph_view.SortTopologically(/*ignore_cycles=*/false, {}));
+
+  const int num_nodes = item.graph.node_size();
+  // Skip nodes that were invalidated by a remapper, e.g. do not process BiasAdd
+  // and Activation nodes that were fused into a Conv2D node.
+  std::vector<bool> invalidated_nodes(num_nodes);
+  std::vector<bool> nodes_to_delete(num_nodes);
+
+  for (int i = num_nodes - 1; i >= 0; --i) {
+    // Check if node was invalidated by one of the previous remaps.
+    if (invalidated_nodes[i] || nodes_to_delete[i]) {
+      continue;
+    }
+    if (!item.optimization_options().is_eager_mode) {
+
+      // Remap gelu
+      std::map<std::string, int> matched_nodes_map;
+      std::set<int> remove_node_indices;
+      if (FindProdForce(&ctx, i)) {
+        TF_RETURN_IF_ERROR(
+            ParallelProdForce(&ctx, i, &invalidated_nodes, &nodes_to_delete));
+        continue;
+      }
+    }
+  }
+
+  // Remove invalidated nodes.
+  utils::Mutation *mutation = ctx.graph_view.GetMutationBuilder();
+  for (int i = 0; i < num_nodes; ++i) {
+    if (nodes_to_delete[i]) {
+      mutation->RemoveNode(ctx.graph_view.GetNode(i));
+    }
+  }
+  TF_RETURN_IF_ERROR(mutation->Apply());
+
+  *optimized_graph = std::move(mutable_item.graph);
+
+  return Status::OK();
+}
+
+REGISTER_GRAPH_OPTIMIZER_AS(DPParallel, "dpparallel");
+
+#endif
\ No newline at end of file
diff --git a/source/op/optimizer/parallel.h b/source/op/optimizer/parallel.h
new file mode 100644
index 0000000000..7de9f0b7ea
--- /dev/null
+++ b/source/op/optimizer/parallel.h
@@ -0,0 +1,21 @@
+#ifndef DP_REMAPPER_H_
+#define DP_REMAPPER_H_
+
+#include "tensorflow/core/grappler/optimizers/custom_graph_optimizer.h"
+
+using namespace tensorflow;
+using namespace tensorflow::grappler;
+
+class DPParallel : public CustomGraphOptimizer {
+ public:
+  Status Init(
+      const tensorflow::RewriterConfig_CustomGraphOptimizer* config) override {
+    return Status::OK();
+  }
+  std::string name() const override { return "dpparallel"; };
+  bool UsesFunctionLibrary() const override { return false; }
+  Status Optimize(Cluster* cluster, const GrapplerItem& item,
+                  GraphDef* optimized_graph) override;
+};
+
+#endif  // DP_REMAPPER_H_
\ No newline at end of file
diff --git a/source/op/prod_force_multi_device.cc b/source/op/prod_force_multi_device.cc
index 8df25636f6..38a2ee88a6 100644
--- a/source/op/prod_force_multi_device.cc
+++ b/source/op/prod_force_multi_device.cc
@@ -1,5 +1,6 @@
 #include "custom_op.h"
 #include "prod_force.h"
+#include "errors.h"
 
 REGISTER_OP("ProdForceSeA")
     .Attr("T: {float, double} = DT_DOUBLE")
@@ -11,6 +12,20 @@ REGISTER_OP("ProdForceSeA")
     .Attr("n_r_sel: int")
     .Output("force: T");
 
+// rename temp op
+REGISTER_OP("ParallelProdForceSeA")
+    .Attr("T: {float, double} = DT_DOUBLE")
+    .Input("net_deriv: T")
+    .Input("in_deriv: T")
+    .Input("nlist: int32")
+    .Input("natoms: int32")
+    .Attr("n_a_sel: int")
+    .Attr("n_r_sel: int")
+    .Attr("parallel: bool = false")
+    .Attr("start_frac: float = 0.")
+    .Attr("end_frac: float = 1.")
+    .Output("force: T");
+
 REGISTER_OP("ProdForceSeR")
     .Attr("T: {float, double} = DT_DOUBLE")
     .Input("net_deriv: T")
@@ -22,7 +37,11 @@ REGISTER_OP("ProdForceSeR")
 template<typename Device, typename FPTYPE>
 class ProdForceSeAOp : public OpKernel {
 public:
-  explicit ProdForceSeAOp(OpKernelConstruction* context) : OpKernel(context) {}
+  explicit ProdForceSeAOp(OpKernelConstruction* context) : OpKernel(context) {
+    if(context->HasAttr("parallel")) OP_REQUIRES_OK(context, context->GetAttr("parallel", &parallel));
+    if(context->HasAttr("start_frac")) OP_REQUIRES_OK(context, context->GetAttr("start_frac", &start_frac));
+    if(context->HasAttr("end_frac")) OP_REQUIRES_OK(context, context->GetAttr("end_frac", &end_frac));
+  }
 
   void Compute(OpKernelContext* context) override {
     deepmd::safe_compute(context, [this](OpKernelContext* context) {this->_Compute(context);});
@@ -81,6 +100,19 @@ class ProdForceSeAOp : public OpKernel {
     const FPTYPE * p_in_deriv = in_deriv_tensor.flat<FPTYPE>().data();
     const int * p_nlist = nlist_tensor.flat<int>().data();
 
+    int start_index = 0, end_index = nloc, nloc_loc = nloc;
+    if (parallel) {
+      if (device != "CPU")
+        throw deepmd::deepmd_exception("Auto parallelization for ProdForceA is not supported on GPUs!");
+      // we split in_deriv, net_deriv, and nlist along nloc
+      // compute start and end index along nloc
+      // frac belongs to [0, 1]
+      // end_index will be not visited, only visit end_index-1
+      start_index = lround(start_frac * nloc);
+      end_index = lround(end_frac * nloc);
+      nloc_loc = end_index - start_index;
+    }
+
     for(int kk = 0; kk < nframes; ++kk){
       FPTYPE * force = p_force + kk * nall * 3;
       const FPTYPE * net_deriv = p_net_deriv + kk * nloc * ndescrpt;
@@ -102,12 +134,15 @@ class ProdForceSeAOp : public OpKernel {
     else if (device == "CPU") {
       deepmd::prod_force_a_cpu(    
           force, 
-          net_deriv, in_deriv, nlist, nloc, nall, nnei);
+          net_deriv, in_deriv, nlist, nloc_loc, nall, nnei, start_index=start_index);
     }
     }
   }
  private:
   std::string device;
+  bool parallel = false;
+  float start_frac = 0.f;
+  float end_frac = 1.f;
 };
 
 template<typename Device, typename FPTYPE>
@@ -200,6 +235,9 @@ class ProdForceSeROp : public OpKernel {
 REGISTER_KERNEL_BUILDER(                                                                 \
     Name("ProdForceSeA").Device(DEVICE_CPU).TypeConstraint<T>("T"),                      \
     ProdForceSeAOp<CPUDevice, T>);                                                       \
+REGISTER_KERNEL_BUILDER(                                                                 \
+    Name("ParallelProdForceSeA").Device(DEVICE_CPU).TypeConstraint<T>("T"),             \
+    ProdForceSeAOp<CPUDevice, T>);                                                       \
 REGISTER_KERNEL_BUILDER(                                                                 \
     Name("ProdForceSeR").Device(DEVICE_CPU).TypeConstraint<T>("T"),                      \
     ProdForceSeROp<CPUDevice, T>); 
diff --git a/source/tests/test_prod_force.py b/source/tests/test_prod_force.py
index c0aaa29ea2..e6703b6614 100644
--- a/source/tests/test_prod_force.py
+++ b/source/tests/test_prod_force.py
@@ -11,7 +11,10 @@
 
 class TestProdForce(tf.test.TestCase):
     def setUp(self):
-        self.sess = self.test_session().__enter__()
+        config = tf.ConfigProto()
+        if int(os.environ.get("DP_AUTO_PARALLELIZATION", 0)):
+            config.graph_options.rewrite_options.custom_optimizers.add().name = "dpparallel"
+        self.sess = self.test_session(config=config).__enter__()
         self.nframes = 2
         self.dcoord = [
             12.83, 2.56, 2.18,
@@ -104,3 +107,34 @@ def test_prod_force(self):
         self.assertEqual(dforce.shape, (self.nframes, self.nall*3))
         for ff in range(self.nframes):
             np.testing.assert_almost_equal(dforce[ff], self.expected_force, 5)
+    
+    @unittest.skipIf(tf.test.is_gpu_available(), reason="Not supported in GPUs")
+    def test_prod_force_parallel(self):
+        forces = []
+        for ii in range(4):
+            tforce \
+                = op_module.parallel_prod_force_se_a(
+                    self.tnet_deriv,
+                    self.tem_deriv,
+                    self.tnlist,
+                    self.tnatoms, 
+                    n_a_sel=self.nnei,
+                    n_r_sel=0,
+                    parallel=True,
+                    start_frac = ii/4,
+                    end_frac = (ii+1)/4,
+                    )
+            forces.append(tforce)
+        tforce = tf.add_n(forces)
+        self.sess.run (tf.global_variables_initializer())
+        dforce = self.sess.run(
+            tforce,
+            feed_dict = {
+                self.tnet_deriv: self.dnet_deriv,
+                self.tem_deriv: self.dem_deriv,
+                self.tnlist: self.dnlist,
+                self.tnatoms: self.dnatoms}
+        )
+        self.assertEqual(dforce.shape, (self.nframes, self.nall*3))
+        for ff in range(self.nframes):
+            np.testing.assert_almost_equal(dforce[ff], self.expected_force, 5)

From 10fdc930d25673ac979f78c8f43148c3fad4d47f Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Thu, 31 Mar 2022 23:01:01 -0400
Subject: [PATCH 21/29] refactor init_variable and support type embedding
 (#1610)

* refactor init_variable and support type embedding

Fix #1578. This commit refactors init_variable for both Descriptor and Fitting, so they can accept graph and graph_def instead of the filename. Opening the same file multiple time is a bad behavior.
Moves some codes in Trainer to Model. Indeed Trainer should not operate Descriptor and Fitting directly.
Add init_variable method to type_embedding.

* fix compression error
---
 deepmd/descriptor/descriptor.py |  9 +++--
 deepmd/descriptor/hybrid.py     | 11 +++---
 deepmd/descriptor/loc_frame.py  | 17 +++++----
 deepmd/descriptor/se.py         | 19 +++++-----
 deepmd/descriptor/se_a.py       | 17 +++++----
 deepmd/env.py                   |  7 ++++
 deepmd/fit/dipole.py            | 18 ++++++----
 deepmd/fit/ener.py              | 18 ++++++----
 deepmd/fit/fitting.py           | 24 +++++++++++++
 deepmd/fit/polar.py             | 34 ++++++++++++------
 deepmd/model/ener.py            | 40 +++++++++++++++++++--
 deepmd/model/model.py           | 25 ++++++++++++++
 deepmd/model/tensor.py          | 34 +++++++++++++++++-
 deepmd/train/trainer.py         | 45 +++++++++---------------
 deepmd/utils/graph.py           | 61 ++++++++++++++++++++++++++++++++-
 deepmd/utils/type_embed.py      | 21 +++++++++++-
 16 files changed, 315 insertions(+), 85 deletions(-)
 create mode 100644 deepmd/model/model.py

diff --git a/deepmd/descriptor/descriptor.py b/deepmd/descriptor/descriptor.py
index 231f3abe1e..219b21172b 100644
--- a/deepmd/descriptor/descriptor.py
+++ b/deepmd/descriptor/descriptor.py
@@ -351,7 +351,8 @@ def get_feed_dict(self,
         return feed_dict
 
     def init_variables(self,
-                       model_file: str,
+                       graph: tf.Graph,
+                       graph_def: tf.GraphDef,
                        suffix : str = "",
     ) -> None:
         """
@@ -359,8 +360,10 @@ def init_variables(self,
 
         Parameters
         ----------
-        model_file : str
-            The input model file
+        graph : tf.Graph
+            The input frozen model graph
+        graph_def : tf.GraphDef
+            The input frozen model graph_def
         suffix : str, optional
             The suffix of the scope
         
diff --git a/deepmd/descriptor/hybrid.py b/deepmd/descriptor/hybrid.py
index 39cb9fed0e..cfee332b78 100644
--- a/deepmd/descriptor/hybrid.py
+++ b/deepmd/descriptor/hybrid.py
@@ -279,7 +279,8 @@ def enable_mixed_precision(self, mixed_prec : dict = None) -> None:
 
 
     def init_variables(self,
-                       model_file : str,
+                       graph: tf.Graph,
+                       graph_def: tf.GraphDef,
                        suffix : str = "",
     ) -> None:
         """
@@ -287,13 +288,15 @@ def init_variables(self,
 
         Parameters
         ----------
-        model_file : str
-            The input frozen model file
+        graph : tf.Graph
+            The input frozen model graph
+        graph_def : tf.GraphDef
+            The input frozen model graph_def
         suffix : str, optional
             The suffix of the scope
         """
         for idx, ii in enumerate(self.descrpt_list):
-            ii.init_variables(model_file, suffix=f"{suffix}_{idx}")
+            ii.init_variables(graph, graph_def, suffix=f"{suffix}_{idx}")
 
     def get_tensor_names(self, suffix : str = "") -> Tuple[str]:
         """Get names of tensors.
diff --git a/deepmd/descriptor/loc_frame.py b/deepmd/descriptor/loc_frame.py
index 1ea8e97760..d8063505bd 100644
--- a/deepmd/descriptor/loc_frame.py
+++ b/deepmd/descriptor/loc_frame.py
@@ -8,7 +8,7 @@
 from deepmd.env import default_tf_session_config
 from deepmd.utils.sess import run_sess
 from .descriptor import Descriptor
-from deepmd.utils.graph import get_tensor_by_name
+from deepmd.utils.graph import get_tensor_by_name_from_graph
 
 @Descriptor.register("loc_frame")
 class DescrptLocFrame (Descriptor) :
@@ -369,18 +369,21 @@ def _compute_std (self,sumv2, sumv, sumn) :
         return np.sqrt(sumv2/sumn - np.multiply(sumv/sumn, sumv/sumn))
 
     def init_variables(self,
-                       model_file : str,
+                       graph: tf.Graph,
+                       graph_def: tf.GraphDef,
                        suffix : str = "",
     ) -> None:
         """
-        Init the embedding net variables with the given frozen model
+        Init the embedding net variables with the given dict
 
         Parameters
         ----------
-        model_file : str
-            The input frozen model file
+        graph : tf.Graph
+            The input frozen model graph
+        graph_def : tf.GraphDef
+            The input frozen model graph_def
         suffix : str, optional
             The suffix of the scope
         """
-        self.davg = get_tensor_by_name(model_file, 'descrpt_attr%s/t_avg' % suffix)
-        self.dstd = get_tensor_by_name(model_file, 'descrpt_attr%s/t_std' % suffix)
+        self.davg = get_tensor_by_name_from_graph(graph, 'descrpt_attr%s/t_avg' % suffix)
+        self.dstd = get_tensor_by_name_from_graph(graph, 'descrpt_attr%s/t_std' % suffix)
diff --git a/deepmd/descriptor/se.py b/deepmd/descriptor/se.py
index 2092a59da1..059924071f 100644
--- a/deepmd/descriptor/se.py
+++ b/deepmd/descriptor/se.py
@@ -1,7 +1,7 @@
 from typing import Tuple, List
 
 from deepmd.env import tf
-from deepmd.utils.graph import get_embedding_net_variables, get_tensor_by_name
+from deepmd.utils.graph import get_embedding_net_variables_from_graph_def, get_tensor_by_name_from_graph
 from .descriptor import Descriptor
 
 
@@ -92,22 +92,25 @@ def pass_tensors_from_frz_model(self,
         self.descrpt_reshape = descrpt_reshape
 
     def init_variables(self,
-                       model_file : str,
+                       graph: tf.Graph,
+                       graph_def: tf.GraphDef,
                        suffix : str = "",
     ) -> None:
         """
-        Init the embedding net variables with the given frozen model
+        Init the embedding net variables with the given dict
 
         Parameters
         ----------
-        model_file : str
-            The input frozen model file
+        graph : tf.Graph
+            The input frozen model graph
+        graph_def : tf.GraphDef
+            The input frozen model graph_def
         suffix : str, optional
             The suffix of the scope
         """
-        self.embedding_net_variables = get_embedding_net_variables(model_file, suffix = suffix)
-        self.davg = get_tensor_by_name(model_file, 'descrpt_attr%s/t_avg' % suffix)
-        self.dstd = get_tensor_by_name(model_file, 'descrpt_attr%s/t_std' % suffix)
+        self.embedding_net_variables = get_embedding_net_variables_from_graph_def(graph_def, suffix = suffix)
+        self.davg = get_tensor_by_name_from_graph(graph, 'descrpt_attr%s/t_avg' % suffix)
+        self.dstd = get_tensor_by_name_from_graph(graph, 'descrpt_attr%s/t_std' % suffix)
 
     @property
     def precision(self) -> tf.DType:
diff --git a/deepmd/descriptor/se_a.py b/deepmd/descriptor/se_a.py
index a71dee2709..bdc2b37d7b 100644
--- a/deepmd/descriptor/se_a.py
+++ b/deepmd/descriptor/se_a.py
@@ -855,28 +855,31 @@ def _filter(
         return result, qmat
 
     def init_variables(self,
-                       model_file : str,
+                       graph: tf.Graph,
+                       graph_def: tf.GraphDef,
                        suffix : str = "",
     ) -> None:
         """
-        Init the embedding net variables with the given frozen model
+        Init the embedding net variables with the given dict
 
         Parameters
         ----------
-        model_file : str
-            The input frozen model file
+        graph : tf.Graph
+            The input frozen model graph
+        graph_def : tf.GraphDef
+            The input frozen model graph_def
         suffix : str, optional
             The suffix of the scope
         """
-        super().init_variables(model_file=model_file, suffix=suffix)
+        super().init_variables(graph=graph, graph_def=graph_def, suffix=suffix)
         try:
-            self.original_sel = get_tensor_by_name(model_file, 'descrpt_attr%s/original_sel' % suffix)
+            self.original_sel = get_tensor_by_name_from_graph(graph, 'descrpt_attr%s/original_sel' % suffix)
         except GraphWithoutTensorError:
             # original_sel is not restored in old graphs, assume sel never changed before
             pass
         # check sel == original sel?
         try:
-            sel = get_tensor_by_name(model_file, 'descrpt_attr%s/sel' % suffix)
+            sel = get_tensor_by_name_from_graph(graph, 'descrpt_attr%s/sel' % suffix)
         except GraphWithoutTensorError:
             # sel is not restored in old graphs
             pass
diff --git a/deepmd/env.py b/deepmd/env.py
index a99dde53dd..db19fd86c6 100644
--- a/deepmd/env.py
+++ b/deepmd/env.py
@@ -74,9 +74,16 @@
     r"final_layer_type_\d+/bias|"
 )
 
+TYPE_EMBEDDING_PATTERN = str(
+    r"type_embed_net+/matrix_\d+|"
+    r"type_embed_net+/bias_\d+|"
+    r"type_embed_net+/idt_\d+|"
+)
+
 TRANSFER_PATTERN = \
     EMBEDDING_NET_PATTERN + \
     FITTING_NET_PATTERN + \
+    TYPE_EMBEDDING_PATTERN + \
     str(
         r"descrpt_attr/t_avg|"
         r"descrpt_attr/t_std|"
diff --git a/deepmd/fit/dipole.py b/deepmd/fit/dipole.py
index bed93c181a..80ea7178da 100644
--- a/deepmd/fit/dipole.py
+++ b/deepmd/fit/dipole.py
@@ -6,7 +6,7 @@
 from deepmd.common import add_data_requirement, get_activation_func, get_precision, ACTIVATION_FN_DICT, PRECISION_DICT, docstring_parameter, cast_precision
 from deepmd.utils.argcheck import list_to_doc
 from deepmd.utils.network import one_layer, one_layer_rand_seed_shift
-from deepmd.utils.graph import get_fitting_net_variables
+from deepmd.utils.graph import get_fitting_net_variables_from_graph_def
 from deepmd.descriptor import DescrptSeA
 from deepmd.fit.fitting import Fitting
 
@@ -168,17 +168,23 @@ def build (self,
         # return tf.reshape(outs, [tf.shape(inputs)[0] * natoms[0] * 3 // 3])
 
     def init_variables(self,
-                       model_file: str
+                       graph: tf.Graph,
+                       graph_def: tf.GraphDef,
+                       suffix : str = "",
     ) -> None:
         """
-        Init the fitting net variables with the given frozen model
+        Init the fitting net variables with the given dict
 
         Parameters
         ----------
-        model_file : str
-            The input frozen model file
+        graph : tf.Graph
+            The input frozen model graph
+        graph_def : tf.GraphDef
+            The input frozen model graph_def
+        suffix : str
+            suffix to name scope
         """
-        self.fitting_net_variables = get_fitting_net_variables(model_file)
+        self.fitting_net_variables = get_fitting_net_variables_from_graph_def(graph_def)
 
 
     def enable_mixed_precision(self, mixed_prec : dict = None) -> None:
diff --git a/deepmd/fit/ener.py b/deepmd/fit/ener.py
index 5ac4354766..bb1a3844b6 100644
--- a/deepmd/fit/ener.py
+++ b/deepmd/fit/ener.py
@@ -8,7 +8,7 @@
 from deepmd.utils.argcheck import list_to_doc
 from deepmd.utils.network import one_layer, one_layer_rand_seed_shift
 from deepmd.utils.type_embed import embed_atom_type
-from deepmd.utils.graph import get_fitting_net_variables, load_graph_def, get_tensor_by_name_from_graph
+from deepmd.utils.graph import get_fitting_net_variables_from_graph_def, load_graph_def, get_tensor_by_name_from_graph
 from deepmd.fit.fitting import Fitting
 
 from deepmd.env import global_cvt_2_tf_float
@@ -510,17 +510,23 @@ def build (self,
 
 
     def init_variables(self,
-                       model_file: str
+                       graph: tf.Graph,
+                       graph_def: tf.GraphDef,
+                       suffix : str = "",
     ) -> None:
         """
-        Init the fitting net variables with the given frozen model
+        Init the fitting net variables with the given dict
 
         Parameters
         ----------
-        model_file : str
-            The input frozen model file
+        graph : tf.Graph
+            The input frozen model graph
+        graph_def : tf.GraphDef
+            The input frozen model graph_def
+        suffix : str
+            suffix to name scope
         """
-        self.fitting_net_variables = get_fitting_net_variables(model_file)
+        self.fitting_net_variables = get_fitting_net_variables_from_graph_def(graph_def)
 
 
     def enable_compression(self,
diff --git a/deepmd/fit/fitting.py b/deepmd/fit/fitting.py
index 69c2c96e52..034def72d4 100644
--- a/deepmd/fit/fitting.py
+++ b/deepmd/fit/fitting.py
@@ -6,3 +6,27 @@ class Fitting:
     def precision(self) -> tf.DType:
         """Precision of fitting network."""
         return self.fitting_precision
+
+    def init_variables(self,
+                       graph: tf.Graph,
+                       graph_def: tf.GraphDef,
+                       suffix : str = "",
+    ) -> None:
+        """
+        Init the fitting net variables with the given dict
+
+        Parameters
+        ----------
+        graph : tf.Graph
+            The input frozen model graph
+        graph_def : tf.GraphDef
+            The input frozen model graph_def
+        suffix : str
+            suffix to name scope
+        
+        Notes
+        -----
+        This method is called by others when the fitting supported initialization from the given variables.
+        """
+        raise NotImplementedError(
+            "Fitting %s doesn't support initialization from the given variables!" % type(self).__name__)
diff --git a/deepmd/fit/polar.py b/deepmd/fit/polar.py
index f139fcbd99..725a276028 100644
--- a/deepmd/fit/polar.py
+++ b/deepmd/fit/polar.py
@@ -6,7 +6,7 @@
 from deepmd.common import add_data_requirement, cast_precision, get_activation_func, get_precision, ACTIVATION_FN_DICT, PRECISION_DICT, docstring_parameter
 from deepmd.utils.argcheck import list_to_doc
 from deepmd.utils.network import one_layer, one_layer_rand_seed_shift
-from deepmd.utils.graph import get_fitting_net_variables
+from deepmd.utils.graph import get_fitting_net_variables_from_graph_def
 from deepmd.descriptor import DescrptLocFrame
 from deepmd.descriptor import DescrptSeA
 from deepmd.fit.fitting import Fitting
@@ -375,17 +375,23 @@ def build (self,
         return tf.reshape(outs, [-1])
 
     def init_variables(self,
-                       model_file: str
+                       graph: tf.Graph,
+                       graph_def: tf.GraphDef,
+                       suffix : str = "",
     ) -> None:
         """
-        Init the fitting net variables with the given frozen model
+        Init the fitting net variables with the given dict
 
         Parameters
         ----------
-        model_file : str
-            The input frozen model file
+        graph : tf.Graph
+            The input frozen model graph
+        graph_def : tf.GraphDef
+            The input frozen model graph_def
+        suffix : str
+            suffix to name scope
         """
-        self.fitting_net_variables = get_fitting_net_variables(model_file)
+        self.fitting_net_variables = get_fitting_net_variables_from_graph_def(graph_def)
 
 
     def enable_mixed_precision(self, mixed_prec : dict = None) -> None:
@@ -511,17 +517,23 @@ def build (self,
         return tf.reshape(outs, [-1])
     
     def init_variables(self,
-                       model_file: str
+                       graph: tf.Graph,
+                       graph_def: tf.GraphDef,
+                       suffix : str = "",
     ) -> None:
         """
-        Init the fitting net variables with the given frozen model
+        Init the fitting net variables with the given dict
 
         Parameters
         ----------
-        model_file : str
-            The input frozen model file
+        graph : tf.Graph
+            The input frozen model graph
+        graph_def : tf.GraphDef
+            The input frozen model graph_def
+        suffix : str
+            suffix to name scope
         """
-        self.polar_fitting.init_variables(model_file)
+        self.polar_fitting.init_variables(graph=graph, graph_def=graph_def, suffix=suffix)
 
 
     def enable_mixed_precision(self, mixed_prec : dict = None) -> None:
diff --git a/deepmd/model/ener.py b/deepmd/model/ener.py
index 574fbd9f16..471a0b4743 100644
--- a/deepmd/model/ener.py
+++ b/deepmd/model/ener.py
@@ -3,13 +3,15 @@
 
 from deepmd.env import tf
 from deepmd.utils.pair_tab import PairTab
-from deepmd.utils.graph import load_graph_def
+from deepmd.utils.graph import load_graph_def, get_tensor_by_name_from_graph
+from deepmd.utils.errors import GraphWithoutTensorError
 from deepmd.common import ClassArg
 from deepmd.env import global_cvt_2_ener_float, MODEL_VERSION, GLOBAL_TF_FLOAT_PRECISION
 from deepmd.env import op_module
+from .model import Model
 from .model_stat import make_stat_input, merge_sys_stat
 
-class EnerModel() :
+class EnerModel(Model) :
     """Energy model.
     
     Parameters
@@ -274,3 +276,37 @@ def build (self,
     def _import_graph_def_from_frz_model(self, frz_model, feed_dict, return_elements):
         graph, graph_def = load_graph_def(frz_model)
         return tf.import_graph_def(graph_def, input_map = feed_dict, return_elements = return_elements, name = "")
+
+    def init_variables(self,
+                       graph : tf.Graph,
+                       graph_def : tf.GraphDef,
+                       model_type : str = "original_model",
+                       suffix : str = "",
+    ) -> None:
+        """
+        Init the embedding net variables with the given frozen model
+
+        Parameters
+        ----------
+        graph : tf.Graph
+            The input frozen model graph
+        graph_def : tf.GraphDef
+            The input frozen model graph_def
+        model_type : str
+            the type of the model
+        suffix : str
+            suffix to name scope
+        """
+        # self.frz_model will control the self.model to import the descriptor from the given frozen model instead of building from scratch...
+        # initialize fitting net with the given compressed frozen model
+        if model_type == 'original_model':
+            self.descrpt.init_variables(graph, graph_def, suffix=suffix)
+            self.fitting.init_variables(graph, graph_def, suffix=suffix)
+            tf.constant("original_model", name = 'model_type', dtype = tf.string)
+        elif model_type == 'compressed_model':
+            self.fitting.init_variables(graph, graph_def, suffix=suffix)
+            tf.constant("compressed_model", name = 'model_type', dtype = tf.string)
+        else:
+            raise RuntimeError("Unknown model type %s" % model_type)
+        if self.typeebd is not None:
+            self.typeebd.init_variables(graph, graph_def, suffix=suffix)
diff --git a/deepmd/model/model.py b/deepmd/model/model.py
new file mode 100644
index 0000000000..f7c026fa84
--- /dev/null
+++ b/deepmd/model/model.py
@@ -0,0 +1,25 @@
+from deepmd.env import tf
+
+
+class Model:
+    def init_variables(self,
+                       graph : tf.Graph,
+                       graph_def : tf.GraphDef,
+                       model_type : str = "original_model",
+                       suffix : str = "",
+    ) -> None:
+        """
+        Init the embedding net variables with the given frozen model
+
+        Parameters
+        ----------
+        graph : tf.Graph
+            The input frozen model graph
+        graph_def : tf.GraphDef
+            The input frozen model graph_def
+        model_type : str
+            the type of the model
+        suffix : str
+            suffix to name scope
+        """
+        raise RuntimeError("The 'dp train init-frz-model' command do not support this model!")
diff --git a/deepmd/model/tensor.py b/deepmd/model/tensor.py
index 17b9ace29d..141904e9cf 100644
--- a/deepmd/model/tensor.py
+++ b/deepmd/model/tensor.py
@@ -6,9 +6,10 @@
 from deepmd.env import global_cvt_2_ener_float, MODEL_VERSION, GLOBAL_TF_FLOAT_PRECISION
 from deepmd.env import op_module
 from deepmd.utils.graph import load_graph_def
+from .model import Model
 from .model_stat import make_stat_input, merge_sys_stat
 
-class TensorModel() :
+class TensorModel(Model) :
     """Tensor model.
 
     Parameters
@@ -193,6 +194,37 @@ def _import_graph_def_from_frz_model(self, frz_model, feed_dict, return_elements
         graph, graph_def = load_graph_def(frz_model)
         return tf.import_graph_def(graph_def, input_map = feed_dict, return_elements = return_elements, name = "")
 
+    def init_variables(self,
+                       graph : tf.Graph,
+                       graph_def : tf.GraphDef,
+                       model_type : str = "original_model",
+                       suffix : str = "",
+    ) -> None:
+        """
+        Init the embedding net variables with the given frozen model
+
+        Parameters
+        ----------
+        graph : tf.Graph
+            The input frozen model graph
+        graph_def : tf.GraphDef
+            The input frozen model graph_def
+        model_type : str
+            the type of the model
+        suffix : str
+            suffix to name scope
+        """
+        if model_type == 'original_model':
+            self.descrpt.init_variables(graph, graph_def, suffix=suffix)
+            self.fitting.init_variables(graph, graph_def, suffix=suffix)
+            tf.constant("original_model", name = 'model_type', dtype = tf.string)
+        elif model_type == 'compressed_model':
+            self.fitting.init_variables(graph, graph_def, suffix=suffix)
+            tf.constant("compressed_model", name = 'model_type', dtype = tf.string)
+        else:
+            raise RuntimeError("Unknown model type %s" % model_type)
+
+
 class WFCModel(TensorModel):
     def __init__(
             self, 
diff --git a/deepmd/train/trainer.py b/deepmd/train/trainer.py
index 3a26b53b18..40583e6223 100644
--- a/deepmd/train/trainer.py
+++ b/deepmd/train/trainer.py
@@ -22,7 +22,7 @@
 from deepmd.utils.neighbor_stat import NeighborStat
 from deepmd.utils.sess import run_sess
 from deepmd.utils.type_embed import TypeEmbedNet
-from deepmd.utils.graph import get_tensor_by_name
+from deepmd.utils.graph import load_graph_def, get_tensor_by_name_from_graph
 
 from tensorflow.python.client import timeline
 from deepmd.env import op_module, TF_VERSION
@@ -296,8 +296,9 @@ def build (self,
             # TODO: this is a simple fix but we should have a clear
             #       architecture to call neighbor stat
         else :
+            graph, graph_def = load_graph_def(self.model_param['compress']['model_file'])
             self.descrpt.enable_compression(self.model_param['compress']["min_nbor_dist"], self.model_param['compress']['model_file'], self.model_param['compress']['table_config'][0], self.model_param['compress']['table_config'][1], self.model_param['compress']['table_config'][2], self.model_param['compress']['table_config'][3])
-            self.fitting.init_variables(self.model_param['compress']['model_file'])
+            self.fitting.init_variables(graph, graph_def)
             # for fparam or aparam settings in 'ener' type fitting net
             if self.fitting_type == 'ener':
                 self.fitting.enable_compression(self.model_param['compress']['model_file'])
@@ -680,34 +681,22 @@ def _get_place_horders(self, data_dict):
             self.place_holders['find_' + kk] = tf.placeholder(tf.float32, name = 't_find_' + kk)
 
     def _init_from_frz_model(self):
+        try:
+            graph, graph_def = load_graph_def(self.run_opt.init_frz_model)
+        except FileNotFoundError as e:
+            # throw runtime error if there's no frozen model
+            raise RuntimeError(
+                "The input frozen model %s (%s) does not exist! Please check the path of the frozen model. " % (self.run_opt.init_frz_model, os.path.abspath(self.run_opt.init_frz_model))
+            ) from e
         # get the model type from the frozen model(self.run_opt.init_frz_model)
         try:
-            t_model_type = get_tensor_by_name(self.run_opt.init_frz_model, 'model_type')
-            self.model_type = bytes.decode(t_model_type)
+            t_model_type = get_tensor_by_name_from_graph(graph, 'model_type')
         except GraphWithoutTensorError as e:
-            # throw runtime error if there's no frozen model
-            if not os.path.exists(self.run_opt.init_frz_model):
-                raise RuntimeError(
-                    "The input frozen model %s (%s) does not exist! Please check the path of the frozen model. " % (self.run_opt.init_frz_model, os.path.abspath(self.run_opt.init_frz_model))
-                ) from e
             # throw runtime error if the frozen_model has no model type information...
-            else:
-                raise RuntimeError(
-                    "The input frozen model: %s has no 'model_type' information, "
-                    "which is not supported by the 'dp train init-frz-model' interface. " % self.run_opt.init_frz_model
-                ) from e
-        
-        if self.fitting_type != 'ener':
-            raise RuntimeError("The 'dp train init-frz-model' command only supports the 'ener' type fitting net currently!")
-        # self.frz_model will control the self.model to import the descriptor from the given frozen model instead of building from scratch...
-        # initialize fitting net with the given compressed frozen model
-        if self.model_type == 'original_model':
-            self.descrpt.init_variables(self.run_opt.init_frz_model)
-            self.fitting.init_variables(self.run_opt.init_frz_model)
-            tf.constant("original_model", name = 'model_type', dtype = tf.string)
-        elif self.model_type == 'compressed_model':
-            self.frz_model = self.run_opt.init_frz_model
-            self.fitting.init_variables(self.frz_model)
-            tf.constant("compressed_model", name = 'model_type', dtype = tf.string)
+            raise RuntimeError(
+                "The input frozen model: %s has no 'model_type' information, "
+                "which is not supported by the 'dp train init-frz-model' interface. " % self.run_opt.init_frz_model
+            ) from e
         else:
-            raise RuntimeError("Unknown model type %s" % self.model_type)
+            self.model_type = bytes.decode(t_model_type)
+        self.model.init_variables(graph, graph_def, model_type=self.model_type)
diff --git a/deepmd/utils/graph.py b/deepmd/utils/graph.py
index e6ff47b21f..fafca75f20 100644
--- a/deepmd/utils/graph.py
+++ b/deepmd/utils/graph.py
@@ -1,10 +1,11 @@
 import re
 import numpy as np
 from typing import Tuple, Dict
-from deepmd.env import tf, EMBEDDING_NET_PATTERN, FITTING_NET_PATTERN
+from deepmd.env import tf, EMBEDDING_NET_PATTERN, FITTING_NET_PATTERN, TYPE_EMBEDDING_PATTERN
 from deepmd.utils.sess import run_sess
 from deepmd.utils.errors import GraphWithoutTensorError
 
+# TODO (JZ): I think in this file we can merge some duplicated lines into one method... 
 def load_graph_def(model_file: str) -> Tuple[tf.Graph, tf.GraphDef]:
     """
     Load graph as well as the graph_def from the frozen model(model_file)
@@ -319,3 +320,61 @@ def get_fitting_net_variables(model_file : str) -> Dict:
     """
     _, graph_def = load_graph_def(model_file)
     return get_fitting_net_variables_from_graph_def(graph_def)
+
+
+def get_type_embedding_net_nodes_from_graph_def(graph_def: tf.GraphDef, suffix: str = "") -> Dict:
+    """
+    Get the type embedding net nodes with the given tf.GraphDef object
+
+    Parameters
+    ----------
+    graph_def
+        The input tf.GraphDef object
+    suffix : str, optional
+        The scope suffix
+    
+    Returns
+    ----------
+    Dict
+        The type embedding net nodes within the given tf.GraphDef object
+    """
+    if suffix != "":
+        type_embedding_net_pattern = TYPE_EMBEDDING_PATTERN\
+            .replace('/idt',    suffix + '/idt')\
+            .replace('/bias',   suffix + '/bias')\
+            .replace('/matrix', suffix + '/matrix')
+    else:
+        type_embedding_net_pattern = TYPE_EMBEDDING_PATTERN
+
+    type_embedding_net_nodes = get_pattern_nodes_from_graph_def(graph_def, type_embedding_net_pattern)
+    return type_embedding_net_nodes
+
+
+def get_type_embedding_net_variables_from_graph_def(graph_def: tf.GraphDef, suffix: str = "") -> Dict:
+    """
+    Get the type embedding net variables with the given tf.GraphDef object
+
+    Parameters
+    ----------
+    graph_def : tf.GraphDef
+        The input tf.GraphDef object
+    suffix : str, optional
+        The suffix of the scope
+    
+    Returns
+    ----------
+    Dict
+        The embedding net variables within the given tf.GraphDef object 
+    """
+    type_embedding_net_variables = {}
+    type_embedding_net_nodes = get_type_embedding_net_nodes_from_graph_def(graph_def, suffix=suffix)
+    for item in type_embedding_net_nodes:
+        node = type_embedding_net_nodes[item]
+        dtype = tf.as_dtype(node.dtype).as_numpy_dtype
+        tensor_shape = tf.TensorShape(node.tensor_shape).as_list()
+        if (len(tensor_shape) != 1) or (tensor_shape[0] != 1):
+            tensor_value = np.frombuffer(node.tensor_content, dtype = tf.as_dtype(node.dtype).as_numpy_dtype)
+        else:
+            tensor_value = get_tensor_by_type(node, dtype)
+        type_embedding_net_variables[item] = np.reshape(tensor_value, tensor_shape)
+    return type_embedding_net_variables
diff --git a/deepmd/utils/type_embed.py b/deepmd/utils/type_embed.py
index 1bc09855ce..80808c4e8e 100644
--- a/deepmd/utils/type_embed.py
+++ b/deepmd/utils/type_embed.py
@@ -9,7 +9,7 @@
 from deepmd.env import default_tf_session_config
 from deepmd.utils.network import  embedding_net
 
-import math
+from deepmd.utils.graph import get_type_embedding_net_variables_from_graph_def
 from deepmd.common import get_activation_func, get_precision, ACTIVATION_FN_DICT, PRECISION_DICT, docstring_parameter, get_np_precision
 from deepmd.utils.argcheck import list_to_doc
 
@@ -95,6 +95,7 @@ def __init__(
         self.filter_activation_fn = get_activation_func(activation_function)
         self.trainable = trainable
         self.uniform_seed = uniform_seed
+        self.type_embedding_net_variables = None
 
 
     def build(
@@ -136,9 +137,27 @@ def build(
                 resnet_dt = self.filter_resnet_dt,
                 seed = self.seed,
                 trainable = self.trainable, 
+                initial_variables = self.type_embedding_net_variables,
                 uniform_seed = self.uniform_seed)
         ebd_type = tf.reshape(ebd_type, [-1, self.neuron[-1]]) # nnei * neuron[-1]
         self.ebd_type = tf.identity(ebd_type, name ='t_typeebd')
         return self.ebd_type 
 
+    def init_variables(self,
+                       graph: tf.Graph,
+                       graph_def: tf.GraphDef,
+                       suffix = '',
+    ) -> None:
+        """
+        Init the type embedding net variables with the given dict
 
+        Parameters
+        ----------
+        graph : tf.Graph
+            The input frozen model graph
+        graph_def : tf.GraphDef
+            The input frozen model graph_def
+        suffix
+            Name suffix to identify this descriptor
+        """
+        self.type_embedding_net_variables = get_type_embedding_net_variables_from_graph_def(graph_def, suffix = suffix)

From 1cdf527d75ba050831493f981bff6f5d6e17084f Mon Sep 17 00:00:00 2001
From: Yixiao Chen <19890787+y1xiaoc@users.noreply.github.com>
Date: Fri, 1 Apr 2022 23:25:21 -0400
Subject: [PATCH 22/29] optimize dplr data modifier (#1615)

---
 deepmd/infer/data_modifier.py | 46 +++++++++--------------------------
 1 file changed, 11 insertions(+), 35 deletions(-)

diff --git a/deepmd/infer/data_modifier.py b/deepmd/infer/data_modifier.py
index 224b8fbd06..0aa8d9fcc1 100644
--- a/deepmd/infer/data_modifier.py
+++ b/deepmd/infer/data_modifier.py
@@ -109,8 +109,7 @@ def _build_fv_graph_inner(self):
         nfxna = -1
         self.t_box_reshape = tf.reshape(self.t_box, [-1, 9])
         t_nframes = tf.shape(self.t_box_reshape)[0]
-        # (nframes x natoms_sel) x 1 x 3
-        self.t_ef_reshape = tf.reshape(self.t_ef, [nfxnas, 1, 3])
+
         # (nframes x natoms) x ndescrpt
         self.descrpt = self.graph.get_tensor_by_name(os.path.join(self.modifier_prefix, 'o_rmat:0'))
         self.descrpt_deriv = self.graph.get_tensor_by_name(os.path.join(self.modifier_prefix, 'o_rmat_deriv:0'))
@@ -120,43 +119,20 @@ def _build_fv_graph_inner(self):
         # self.descrpt_deriv = tf.reshape(self.descrpt_deriv, [nf, 192 * self.ndescrpt * 3])
 
         # nframes x (natoms_sel x 3)
-        self.t_tensor_reshpe = tf.reshape(self.t_tensor, [t_nframes, -1])
+        self.t_ef_reshape = tf.reshape(self.t_ef,  [t_nframes, -1])
         # nframes x (natoms x 3)
-        self.t_tensor_reshpe = self._enrich(self.t_tensor_reshpe, dof = 3)
+        self.t_ef_reshape = self._enrich(self.t_ef_reshape, dof = 3)
         # (nframes x natoms) x 3
-        self.t_tensor_reshpe = tf.reshape(self.t_tensor_reshpe, [nfxna, 3])
-        # (nframes x natoms) x 1
-        self.t_dipole_x = tf.slice(self.t_tensor_reshpe, [0, 0], [nfxna, 1])
-        self.t_dipole_y = tf.slice(self.t_tensor_reshpe, [0, 1], [nfxna, 1])
-        self.t_dipole_z = tf.slice(self.t_tensor_reshpe, [0, 2], [nfxna, 1])
-        self.t_dipole_z = tf.reshape(self.t_dipole_z, [nfxna, 1])
+        self.t_ef_reshape = tf.reshape(self.t_ef_reshape, [nfxna, 3])
+        # nframes x (natoms_sel x 3)
+        self.t_tensor_reshape = tf.reshape(self.t_tensor, [t_nframes, -1])
+        # nframes x (natoms x 3)
+        self.t_tensor_reshape = self._enrich(self.t_tensor_reshape, dof = 3)
+        # (nframes x natoms) x 3
+        self.t_tensor_reshape = tf.reshape(self.t_tensor_reshape, [nfxna, 3])
         # (nframes x natoms) x ndescrpt
-        [self.t_dipole_x_d] = tf.gradients(self.t_dipole_x, self.descrpt)
-        [self.t_dipole_y_d] = tf.gradients(self.t_dipole_y, self.descrpt)
-        [self.t_dipole_z_d] = tf.gradients(self.t_dipole_z, self.descrpt)
-        # nframes x (natoms x ndescrpt)
-        self.t_dipole_x_d = tf.reshape(self.t_dipole_x_d, [-1, self.t_natoms[0] * self.ndescrpt])
-        self.t_dipole_y_d = tf.reshape(self.t_dipole_y_d, [-1, self.t_natoms[0] * self.ndescrpt])
-        self.t_dipole_z_d = tf.reshape(self.t_dipole_z_d, [-1, self.t_natoms[0] * self.ndescrpt])
-        # nframes x (natoms_sel x ndescrpt)
-        self.t_dipole_x_d = self._slice_descrpt_deriv(self.t_dipole_x_d)
-        self.t_dipole_y_d = self._slice_descrpt_deriv(self.t_dipole_y_d)
-        self.t_dipole_z_d = self._slice_descrpt_deriv(self.t_dipole_z_d)
-        # (nframes x natoms_sel) x ndescrpt
-        self.t_dipole_x_d = tf.reshape(self.t_dipole_x_d, [nfxnas, self.ndescrpt])
-        self.t_dipole_y_d = tf.reshape(self.t_dipole_y_d, [nfxnas, self.ndescrpt])
-        self.t_dipole_z_d = tf.reshape(self.t_dipole_z_d, [nfxnas, self.ndescrpt])
-        # (nframes x natoms_sel) x 3 x ndescrpt
-        self.t_dipole_d = tf.concat([self.t_dipole_x_d, self.t_dipole_y_d, self.t_dipole_z_d], axis = 1)
-        self.t_dipole_d = tf.reshape(self.t_dipole_d, [nfxnas, 3*self.ndescrpt])
-        # (nframes x natoms_sel) x 3 x ndescrpt
-        self.t_dipole_d = tf.reshape(self.t_dipole_d, [-1, 3, self.ndescrpt])
-        # (nframes x natoms_sel) x 1 x ndescrpt
-        self.t_ef_d = tf.matmul(self.t_ef_reshape, self.t_dipole_d)
-        # nframes x (natoms_sel x ndescrpt)
-        self.t_ef_d = tf.reshape(self.t_ef_d, [t_nframes, -1])
+        [self.t_ef_d] = tf.gradients(self.t_tensor_reshape, self.descrpt, self.t_ef_reshape)
         # nframes x (natoms x ndescrpt)
-        self.t_ef_d = self._enrich(self.t_ef_d, dof = self.ndescrpt)
         self.t_ef_d = tf.reshape(self.t_ef_d, [nf, self.t_natoms[0] * self.ndescrpt])
         # t_ef_d is force (with -1), prod_forc takes deriv, so we need the opposite
         self.t_ef_d_oppo = -self.t_ef_d

From bdd339e03f040af7924f49dc3560a39d13c61be7 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Sat, 2 Apr 2022 21:03:19 -0400
Subject: [PATCH 23/29] add system names to model devi header (#1618)

* add system names to model devi header

Otherwise it's difficult to know what which system the model deviation belogs to when there are multiple systems.

* add a line break after header
---
 deepmd/infer/model_devi.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/deepmd/infer/model_devi.py b/deepmd/infer/model_devi.py
index a859aadbc6..46b22b326b 100644
--- a/deepmd/infer/model_devi.py
+++ b/deepmd/infer/model_devi.py
@@ -44,7 +44,7 @@ def calc_model_devi_v(vs: np.ndarray):
     avg_devi_v = np.linalg.norm(vs_devi, axis=-1) / 3
     return max_devi_v, min_devi_v, avg_devi_v
 
-def write_model_devi_out(devi: np.ndarray, fname: str):
+def write_model_devi_out(devi: np.ndarray, fname: str, header: str=""):
     '''
     Parameters
     ----------
@@ -52,9 +52,11 @@ def write_model_devi_out(devi: np.ndarray, fname: str):
         the first column is the steps index
     fname : str
         the file name to dump
+    header : str, default=""
+        the header to dump
     '''
     assert devi.shape[1] == 7
-    header = "%10s" % "step"
+    header = "%s\n%10s" % (header, "step")
     for item in 'vf':
         header += "%19s%19s%19s" % (f"max_devi_{item}", f"min_devi_{item}", f"avg_devi_{item}")
     with open(fname, "ab") as fp:
@@ -212,6 +214,6 @@ def make_model_devi(
             devis.append(devi)
         devis = np.vstack(devis)
         devis[:, 0] = np.arange(nframes_tot) * frequency
-        write_model_devi_out(devis, output)
+        write_model_devi_out(devis, output, header=system)
         devis_coll.append(devis)
     return devis_coll

From c3cbd7eacf587911ba645d18a4770f381bf7b612 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Thu, 14 Apr 2022 02:34:48 -0400
Subject: [PATCH 24/29] update compress cli input file (#1633)

Fix #1549. Close #1630.
---
 deepmd/entrypoints/compress.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/deepmd/entrypoints/compress.py b/deepmd/entrypoints/compress.py
index caf19cef63..742c10ed98 100644
--- a/deepmd/entrypoints/compress.py
+++ b/deepmd/entrypoints/compress.py
@@ -85,6 +85,7 @@ def compress(
         else:
             log.info("stage 0: compute the min_nbor_dist")
             jdata = j_loader(training_script)
+            jdata = update_deepmd_input(jdata)
             t_min_nbor_dist = get_min_nbor_dist(jdata, get_rcut(jdata))
 
     _check_compress_type(input)

From d799306611b7bc522cabd2b901278a53393a2630 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Thu, 14 Apr 2022 20:55:11 -0400
Subject: [PATCH 25/29] add tips for easy installation (#1634)

* add tips for easy installation

Resolves #1362.

* remove "important"

it's not supported by markdown
---
 doc/install/easy-install.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/doc/install/easy-install.md b/doc/install/easy-install.md
index b84033fafd..779ae7ff7c 100644
--- a/doc/install/easy-install.md
+++ b/doc/install/easy-install.md
@@ -4,6 +4,8 @@ There are various easy methods to install DeePMD-kit. Choose one that you prefer
 
 After your easy installation, DeePMD-kit (`dp`) and LAMMPS (`lmp`) will be available to execute. You can try `dp -h` and `lmp -h` to see the help. `mpirun` is also available considering you may want to train models or run LAMMPS in parallel.
 
+Note: The off-line packages and conda packages require the [GNU C Library](https://www.gnu.org/software/libc/) 2.17 or above. The GPU version requires [compatible NVIDIA driver](https://docs.nvidia.com/deploy/cuda-compatibility/index.html#minor-version-compatibility) to be installed in advance. It is possible to force conda to [override detection](https://docs.conda.io/projects/conda/en/latest/user-guide/tasks/manage-virtual.html#overriding-detected-packages) when installation, but these requirements are still necessary during runtime.
+
 - [Install off-line packages](#install-off-line-packages)
 - [Install with conda](#install-with-conda)
 - [Install with docker](#install-with-docker)

From e0ca6addf38f3d72fcef80bb907afaf82d35ca40 Mon Sep 17 00:00:00 2001
From: likefallwind <likefallwind@pku.edu.cn>
Date: Fri, 15 Apr 2022 10:26:14 +0800
Subject: [PATCH 26/29] add the order of box.raw in data-conv.md (#1635)

---
 doc/data/data-conv.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/data/data-conv.md b/doc/data/data-conv.md
index 4458e4eb17..6f25e36ba4 100644
--- a/doc/data/data-conv.md
+++ b/doc/data/data-conv.md
@@ -20,7 +20,7 @@ $ cat force.raw
  6.737  1.554 -5.587 -2.803  0.062  2.222
 -1.968 -0.163  1.020 -0.225 -0.789  0.343
 ```
-This `force.raw` contains 3 frames with each frame having the forces of 2 atoms, thus it has 3 lines and 6 columns. Each line provides all the 3 force components of 2 atoms in 1 frame. The first three numbers are the 3 force components of the first atom, while the second three numbers are the 3 force components of the second atom. The coordinate file `coord.raw` is organized similarly. In `box.raw`, the 9 components of the box vectors should be provided on each line. In `virial.raw`, the 9 components of the virial tensor should be provided on each line in the order `XX XY XZ YX YY YZ ZX ZY ZZ`. The number of lines of all raw files should be identical.
+This `force.raw` contains 3 frames with each frame having the forces of 2 atoms, thus it has 3 lines and 6 columns. Each line provides all the 3 force components of 2 atoms in 1 frame. The first three numbers are the 3 force components of the first atom, while the second three numbers are the 3 force components of the second atom. The coordinate file `coord.raw` is organized similarly. In `box.raw`, the 9 components of the box vectors should be provided on each line in the order `XX XY XZ YX YY YZ ZX ZY ZZ`. In `virial.raw`, the 9 components of the virial tensor should be provided on each line in the order `XX XY XZ YX YY YZ ZX ZY ZZ`. The number of lines of all raw files should be identical.
 
 We assume that the atom types do not change in all frames. It is provided by `type.raw`, which has one line with the types of atoms written one by one. The atom types should be integers. For example the `type.raw` of a system that has 2 atoms with 0 and 1:
 ```bash

From 92d24d0e9e379ee20665464fc33fa94d665d381e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Yifan=20Li=E6=9D=8E=E4=B8=80=E5=B8=86?=
 <yifanl0716@gmail.com>
Date: Thu, 14 Apr 2022 22:46:58 -0400
Subject: [PATCH 27/29] Correct the forward communication at ik differentiation
 mode in pppm_dplr (#1637)

* explicitly set neighbor request to full in compute deeptensor/atom to fix bug #1381

* Corrected the pppm_dplr forward communication for ik differentiation mode at LAMMPS version >= 20210831.
---
 source/lmp/pppm_dplr.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/source/lmp/pppm_dplr.cpp b/source/lmp/pppm_dplr.cpp
index dc3782e058..1a1070c04f 100644
--- a/source/lmp/pppm_dplr.cpp
+++ b/source/lmp/pppm_dplr.cpp
@@ -138,7 +138,7 @@ void PPPMDPLR::compute(int eflag, int vflag)
 #endif
   else
 #if LAMMPS_VERSION_NUMBER>=20210831
-    gc->forward_comm(GridComm::KSPACE,this,1,sizeof(FFT_SCALAR),FORWARD_IK,
+    gc->forward_comm(GridComm::KSPACE,this,3,sizeof(FFT_SCALAR),FORWARD_IK,
                             gc_buf1,gc_buf2,MPI_FFT_SCALAR);
 #else
     gc->forward_comm_kspace(this,3,sizeof(FFT_SCALAR),FORWARD_IK,

From 5b62a7a95af77de963c566efd566e33f66a9c128 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Fri, 15 Apr 2022 05:22:12 -0400
Subject: [PATCH 28/29] fix TF version bug in #1429 (#1638)

1. "and" should be "or" and "or" should be "and"
2. forgot to introduce `TF_MAJOR_VERSION` and `TF_MINOR_VERSION` from `version.h`
So it couldn't be compiled...
---
 source/op/optimizer/parallel.cc | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/source/op/optimizer/parallel.cc b/source/op/optimizer/parallel.cc
index 3244226514..55e120e989 100644
--- a/source/op/optimizer/parallel.cc
+++ b/source/op/optimizer/parallel.cc
@@ -1,5 +1,6 @@
 // only support v1.15 or v2
-#if TF_MAJOR_VERSION >= 2 && (TF_MAJOR_VERSION == 1 || TF_MINOR_VERSION >= 15)
+#include "tensorflow/core/public/version.h"
+#if TF_MAJOR_VERSION >= 2 || (TF_MAJOR_VERSION == 1 && TF_MINOR_VERSION >= 15)
 
 #include "parallel.h"
 
@@ -148,4 +149,4 @@ Status DPParallel::Optimize(Cluster *cluster, const GrapplerItem &item,
 
 REGISTER_GRAPH_OPTIMIZER_AS(DPParallel, "dpparallel");
 
-#endif
\ No newline at end of file
+#endif

From 4994b52735362fae2739bd16d6a7d1d74dc97c2f Mon Sep 17 00:00:00 2001
From: Denghui Lu <denghuilu@pku.edu.cn>
Date: Sat, 16 Apr 2022 08:47:44 +0800
Subject: [PATCH 29/29] Fix compilation error while in a ROCm environment
 (#1628)

* fix compilation error while in a ROCm environment

* fix error of variable declaration
---
 source/lib/src/rocm/tabulate.hip.cu    | 9 ---------
 source/lib/tests/test_tabulate_se_r.cc | 2 +-
 2 files changed, 1 insertion(+), 10 deletions(-)

diff --git a/source/lib/src/rocm/tabulate.hip.cu b/source/lib/src/rocm/tabulate.hip.cu
index 4152668dfc..0354bc68b3 100644
--- a/source/lib/src/rocm/tabulate.hip.cu
+++ b/source/lib/src/rocm/tabulate.hip.cu
@@ -524,15 +524,6 @@ __global__ void tabulate_fusion_se_r_grad_fifth_order_polynomial(
   int warp_idx = __shfl(threadIdx.x / 64, 0);
   int lane_idx = threadIdx.x % 64;
 
-  bool unloop = false;
-  FPTYPE * iteratorA = (FPTYPE *)&_data[0]; // dy
-  for (int ii = 0; ii < MTILE; ii++) {
-    for (int jj = thread_idx; jj < last_layer_size; jj += blockDim.x) {
-      iteratorA[ii * last_layer_size + jj] = ;
-    }
-  }
-  __syncthreads();
-
   for (int ii = 0; ii < nnei; ii += KTILE) {
     FPTYPE xx = em[block_idx * nnei + ii + warp_idx];
     
diff --git a/source/lib/tests/test_tabulate_se_r.cc b/source/lib/tests/test_tabulate_se_r.cc
index 6cc59b02d1..b7173b0123 100644
--- a/source/lib/tests/test_tabulate_se_r.cc
+++ b/source/lib/tests/test_tabulate_se_r.cc
@@ -170,7 +170,7 @@ TEST_F(TestTabulateSeR, tabulate_fusion_se_r_grad_gpu_rocm)
   std::vector<double> dy_dem(em.size(), 0.0);
   std::vector<double> dy(nloc * nnei * last_layer_size, 1.0);
 
-  * dy_dem_dev = NULL, * table_dev = NULL, * em_dev = NULL, * dy_dev = NULL;
+  double * dy_dem_dev = NULL, * table_dev = NULL, * em_dev = NULL, * dy_dev = NULL;
   deepmd::malloc_device_memory_sync(dy_dem_dev, dy_dem);
   deepmd::malloc_device_memory_sync(table_dev, table);
   deepmd::malloc_device_memory_sync(em_dev, em);