From 2508b58f0df2365a4dc93e24e52d983f124e45f9 Mon Sep 17 00:00:00 2001
From: Han Wang <wang_han@iapcm.ac.cn>
Date: Wed, 3 Jun 2026 15:39:39 +0800
Subject: [PATCH 01/12] feat(dpmodel/pt_expt): pluggable NeighborList strategy
 + O(N) vesin builder

Make neighbor-list construction pluggable via an optional `NeighborList`
strategy injected at `forward_common`/`call_common` (the layer where the
system is extended), keeping the exported `forward_common_lower` untouched.

- dpmodel (torch-free core): `NeighborList` base + `DefaultNeighborList`
  (the historical dense extend+build). `neighbor_list=None` reproduces the
  current behavior byte-identically.
- pt_expt: `VesinNeighborList`, a device-aware `vesin.torch` O(N) cell list
  (on the input device for torch; CPU-bridged for numpy/dpmodel). It emits
  the same extended quartet, so force/global-virial/atomic-virial all come
  out of the existing autograd routines unchanged.
- inference: `nlist_backend="auto"|"vesin"|"native"` on the pt_expt DeepEval
  and the ASE calculator; `auto` falls back to native when vesin is
  unavailable/unsupported, `vesin` is strict.
- pyproject: depend on `vesin[torch]`.

Tested: builder equivalence (numpy+torch, PBC/noPBC, device) and full model
equivalence across 8 descriptor families (energy/force/virial/atomic virial)
vs the dense builder, plus nlist_backend dispatch and vesin-vs-native equality
through the compiled .pte.
---
 deepmd/calculator.py                          |   2 +
 deepmd/dpmodel/model/ener_model.py            |   2 +
 deepmd/dpmodel/model/make_model.py            |  42 +-
 deepmd/dpmodel/utils/__init__.py              |   8 +
 deepmd/dpmodel/utils/default_neighbor_list.py |  59 +++
 deepmd/dpmodel/utils/neighbor_list.py         |  64 +++
 deepmd/pt_expt/infer/deep_eval.py             |  57 +++
 deepmd/pt_expt/model/ener_model.py            |   2 +
 deepmd/pt_expt/utils/vesin_neighbor_list.py   | 213 +++++++++
 pyproject.toml                                |   2 +
 source/tests/pt_expt/infer/test_deep_eval.py  |  75 +++
 .../tests/pt_expt/utils/test_neighbor_list.py | 452 ++++++++++++++++++
 12 files changed, 955 insertions(+), 23 deletions(-)
 create mode 100644 deepmd/dpmodel/utils/default_neighbor_list.py
 create mode 100644 deepmd/dpmodel/utils/neighbor_list.py
 create mode 100644 deepmd/pt_expt/utils/vesin_neighbor_list.py
 create mode 100644 source/tests/pt_expt/utils/test_neighbor_list.py

diff --git a/deepmd/calculator.py b/deepmd/calculator.py
index 356bfeb9ce..8c679acf3c 100644
--- a/deepmd/calculator.py
+++ b/deepmd/calculator.py
@@ -91,6 +91,7 @@ def __init__(
         type_dict: dict[str, int] | None = None,
         neighbor_list: Optional["NeighborList"] = None,
         head: str | None = None,
+        nlist_backend: str = "auto",
         **kwargs: Any,
     ) -> None:
         Calculator.__init__(self, label=label, **kwargs)
@@ -98,6 +99,7 @@ def __init__(
             str(Path(model).resolve()),
             neighbor_list=neighbor_list,
             head=head,
+            nlist_backend=nlist_backend,
         )
         if type_dict:
             self.type_dict = type_dict
diff --git a/deepmd/dpmodel/model/ener_model.py b/deepmd/dpmodel/model/ener_model.py
index c8c75d3cca..80619f60de 100644
--- a/deepmd/dpmodel/model/ener_model.py
+++ b/deepmd/dpmodel/model/ener_model.py
@@ -88,6 +88,7 @@ def call(
         aparam: Array | None = None,
         do_atomic_virial: bool = False,
         charge_spin: Array | None = None,
+        neighbor_list: Any = None,
     ) -> dict[str, Array]:
         model_ret = self.call_common(
             coord,
@@ -97,6 +98,7 @@ def call(
             aparam=aparam,
             charge_spin=charge_spin,
             do_atomic_virial=do_atomic_virial,
+            neighbor_list=neighbor_list,
         )
         model_predict = {}
         model_predict["atom_energy"] = model_ret["energy"]
diff --git a/deepmd/dpmodel/model/make_model.py b/deepmd/dpmodel/model/make_model.py
index ebcc671f62..28e1118718 100644
--- a/deepmd/dpmodel/model/make_model.py
+++ b/deepmd/dpmodel/model/make_model.py
@@ -38,10 +38,9 @@
     check_operation_applied,
 )
 from deepmd.dpmodel.utils import (
-    build_neighbor_list,
-    extend_coord_with_ghosts,
+    DefaultNeighborList,
+    NeighborList,
     nlist_distinguish_types,
-    normalize_coord,
 )
 from deepmd.utils.path import (
     DPPath,
@@ -78,6 +77,7 @@ def model_call_from_call_lower(
     do_atomic_virial: bool = False,
     coord_corr_for_virial: Array | None = None,
     charge_spin: Array | None = None,
+    neighbor_list: NeighborList | None = None,
 ) -> dict[str, Array]:
     """Return model prediction from lower interface.
 
@@ -96,6 +96,12 @@ def model_call_from_call_lower(
         atomic parameter. nf x nloc x nda
     do_atomic_virial
         If calculate the atomic virial.
+    neighbor_list
+        The neighbor-list construction strategy.  ``None`` uses the default
+        all-pairs builder (:class:`DefaultNeighborList`), reproducing the
+        historical behavior.  An alternative strategy (e.g. an O(N) cell list)
+        may be injected to speed up neighbor-list construction; it returns the
+        same extended representation, so model outputs are unchanged.
 
     Returns
     -------
@@ -107,26 +113,9 @@ def model_call_from_call_lower(
     nframes, nloc = atype.shape[:2]
     cc, bb, fp, ap = coord, box, fparam, aparam
     del coord, box, fparam, aparam
-    if bb is not None:
-        coord_normalized = normalize_coord(
-            cc.reshape(nframes, nloc, 3),
-            bb.reshape(nframes, 3, 3),
-        )
-    else:
-        xp = array_api_compat.array_namespace(cc)
-        coord_normalized = xp.reshape(cc, (nframes, nloc, 3))
-    extended_coord, extended_atype, mapping = extend_coord_with_ghosts(
-        coord_normalized, atype, bb, rcut
-    )
-    nlist = build_neighbor_list(
-        extended_coord,
-        extended_atype,
-        nloc,
-        rcut,
-        sel,
-        # types will be distinguished in the lower interface,
-        # so it doesn't need to be distinguished here
-        distinguish_types=False,
+    builder = neighbor_list if neighbor_list is not None else DefaultNeighborList()
+    extended_coord, extended_atype, nlist, mapping = builder.build(
+        cc, atype, bb, rcut, sel
     )
     extended_coord = extended_coord.reshape(nframes, -1, 3)
     if coord_corr_for_virial is not None:
@@ -269,6 +258,7 @@ def call_common(
             do_atomic_virial: bool = False,
             coord_corr_for_virial: Array | None = None,
             charge_spin: Array | None = None,
+            neighbor_list: NeighborList | None = None,
         ) -> dict[str, Array]:
             """Return model prediction.
 
@@ -290,6 +280,11 @@ def call_common(
             coord_corr_for_virial
                 The coordinates correction for virial.
                 shape: nf x (nloc x 3)
+            neighbor_list
+                The neighbor-list construction strategy.  ``None`` uses the
+                default all-pairs builder; an alternative strategy (e.g. an O(N)
+                cell list) may be injected to speed up neighbor-list construction
+                without changing model outputs.
 
             Returns
             -------
@@ -316,6 +311,7 @@ def call_common(
                 do_atomic_virial=do_atomic_virial,
                 coord_corr_for_virial=coord_corr_for_virial,
                 charge_spin=cs,
+                neighbor_list=neighbor_list,
             )
             model_predict = self._output_type_cast(model_predict, input_prec)
             return model_predict
diff --git a/deepmd/dpmodel/utils/__init__.py b/deepmd/dpmodel/utils/__init__.py
index 588c8ea1ae..a9af7a50e5 100644
--- a/deepmd/dpmodel/utils/__init__.py
+++ b/deepmd/dpmodel/utils/__init__.py
@@ -1,4 +1,7 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
+from .default_neighbor_list import (
+    DefaultNeighborList,
+)
 from .env_mat import (
     EnvMat,
 )
@@ -15,6 +18,9 @@
     is_lmdb,
     make_neighbor_stat_data,
 )
+from .neighbor_list import (
+    NeighborList,
+)
 from .network import (
     EmbeddingNet,
     FittingNet,
@@ -53,6 +59,7 @@
 
 __all__ = [
     "AtomExcludeMask",
+    "DefaultNeighborList",
     "DistributedSameNlocBatchSampler",
     "EmbeddingNet",
     "EnvMat",
@@ -62,6 +69,7 @@
     "LmdbTestDataNlocView",
     "NativeLayer",
     "NativeNet",
+    "NeighborList",
     "NetworkCollection",
     "PairExcludeMask",
     "SameNlocBatchSampler",
diff --git a/deepmd/dpmodel/utils/default_neighbor_list.py b/deepmd/dpmodel/utils/default_neighbor_list.py
new file mode 100644
index 0000000000..03b4289795
--- /dev/null
+++ b/deepmd/dpmodel/utils/default_neighbor_list.py
@@ -0,0 +1,59 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+"""Default all-pairs neighbor-list builder (historical deepmd behavior)."""
+
+import array_api_compat
+
+from deepmd.dpmodel.array_api import (
+    Array,
+)
+
+from .neighbor_list import (
+    NeighborList,
+)
+from .nlist import (
+    build_neighbor_list,
+    extend_coord_with_ghosts,
+)
+from .region import (
+    normalize_coord,
+)
+
+
+class DefaultNeighborList(NeighborList):
+    """All-pairs builder: replicate the cell into periodic images and rank by
+    distance (:func:`~deepmd.dpmodel.utils.nlist.extend_coord_with_ghosts` +
+    :func:`~deepmd.dpmodel.utils.nlist.build_neighbor_list`).  This is the
+    default when no strategy is supplied, so results are unchanged.
+    """
+
+    def build(
+        self,
+        coord: Array,
+        atype: Array,
+        box: Array | None,
+        rcut: float,
+        sel: list[int],
+    ) -> tuple[Array, Array, Array, Array]:
+        xp = array_api_compat.array_namespace(coord, atype)
+        nframes, nloc = atype.shape[:2]
+        if box is not None:
+            coord_normalized = normalize_coord(
+                xp.reshape(coord, (nframes, nloc, 3)),
+                xp.reshape(box, (nframes, 3, 3)),
+            )
+        else:
+            coord_normalized = xp.reshape(coord, (nframes, nloc, 3))
+        extended_coord, extended_atype, mapping = extend_coord_with_ghosts(
+            coord_normalized, atype, box, rcut
+        )
+        # types are distinguished in the lower interface, so keep them merged here
+        nlist = build_neighbor_list(
+            extended_coord,
+            extended_atype,
+            nloc,
+            rcut,
+            sel,
+            distinguish_types=False,
+        )
+        extended_coord = xp.reshape(extended_coord, (nframes, -1, 3))
+        return extended_coord, extended_atype, nlist, mapping
diff --git a/deepmd/dpmodel/utils/neighbor_list.py b/deepmd/dpmodel/utils/neighbor_list.py
new file mode 100644
index 0000000000..49504cc7c3
--- /dev/null
+++ b/deepmd/dpmodel/utils/neighbor_list.py
@@ -0,0 +1,64 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+"""Pluggable neighbor-list construction strategies.
+
+A :class:`NeighborList` turns local coordinates (and an optional cell) into the
+*extended* representation consumed by the model's lower interface.  The default
+all-pairs builder lives in :mod:`deepmd.dpmodel.utils.default_neighbor_list`;
+backend-specific O(N) builders (e.g. the ``vesin``-based one in
+``deepmd.pt_expt.utils.vesin_neighbor_list``) subclass :class:`NeighborList`
+and are injected into the model, so the rest of the model is agnostic to how the
+neighbor list was built.
+"""
+
+from deepmd.dpmodel.array_api import (
+    Array,
+)
+
+
+class NeighborList:
+    """Strategy that builds the extended neighbor environment from local atoms.
+
+    Implementations turn local coordinates into the extended representation: the
+    coordinates and atom types of local-plus-ghost (periodic-image) atoms, a
+    candidate neighbor list indexing the extended atoms, and a mapping from each
+    extended atom to its local owner.  Implementations are stateless --
+    ``rcut``/``sel`` are supplied by the model at call time.
+    """
+
+    def build(
+        self,
+        coord: Array,
+        atype: Array,
+        box: Array | None,
+        rcut: float,
+        sel: list[int],
+    ) -> tuple[Array, Array, Array, Array]:
+        """Build the extended system and a candidate neighbor list.
+
+        Parameters
+        ----------
+        coord
+            local coordinates, shape (nf, nloc, 3) or (nf, nloc*3).
+        atype
+            local atom types, shape (nf, nloc).
+        box
+            simulation cell, shape (nf, 3, 3) or (nf, 9); ``None`` for non-periodic.
+        rcut
+            cutoff radius.
+        sel
+            number of selected neighbors per type.
+
+        Returns
+        -------
+        extended_coord
+            shape (nf, nall, 3).
+        extended_atype
+            shape (nf, nall).
+        nlist
+            shape (nf, nloc, nnei), type-undistinguished candidate neighbors
+            indexing the extended atoms (the lower interface re-formats it:
+            distance sort, truncate to ``sel``, split by type).
+        mapping
+            shape (nf, nall), mapping each extended atom to its local owner.
+        """
+        raise NotImplementedError
diff --git a/deepmd/pt_expt/infer/deep_eval.py b/deepmd/pt_expt/infer/deep_eval.py
index 18ca3e4038..9db3e5de38 100644
--- a/deepmd/pt_expt/infer/deep_eval.py
+++ b/deepmd/pt_expt/infer/deep_eval.py
@@ -54,6 +54,10 @@
 from deepmd.pt.utils.auto_batch_size import (
     AutoBatchSize,
 )
+from deepmd.pt_expt.utils.vesin_neighbor_list import (
+    VesinNeighborList,
+    is_vesin_torch_available,
+)
 
 if TYPE_CHECKING:
     import ase.neighborlist
@@ -103,6 +107,7 @@ def __init__(
         *args: Any,
         auto_batch_size: bool | int | AutoBatchSize = True,
         neighbor_list: Optional["ase.neighborlist.NewPrimitiveNeighborList"] = None,
+        nlist_backend: str = "auto",
         **kwargs: Any,
     ) -> None:
         self.output_def = output_def
@@ -123,6 +128,8 @@ def __init__(
                 "`.pt` (training checkpoint)."
             )
 
+        self._setup_nlist_backend(nlist_backend)
+
         if isinstance(auto_batch_size, bool):
             if auto_batch_size:
                 self.auto_batch_size = AutoBatchSize()
@@ -135,6 +142,50 @@ def __init__(
         else:
             raise TypeError("auto_batch_size should be bool, int, or AutoBatchSize")
 
+    def _setup_nlist_backend(self, nlist_backend: str) -> None:
+        """Resolve the neighbor-list construction strategy from a user choice.
+
+        ``"native"`` uses the dense all-pairs builder; ``"vesin"`` forces the
+        O(N) ``vesin.torch`` cell list (raising if it is unavailable or the
+        model/inputs are unsupported); ``"auto"`` uses vesin when applicable and
+        silently falls back to the native builder otherwise.  Results are
+        unchanged either way -- only the neighbor-search cost differs.
+        """
+        if nlist_backend not in ("auto", "vesin", "native"):
+            raise ValueError(
+                f"Unknown nlist_backend '{nlist_backend}'; "
+                "expected 'auto', 'vesin', or 'native'."
+            )
+        is_spin = bool(getattr(self, "_is_spin", False))
+        ase_provided = self.neighbor_list is not None
+        # reason vesin cannot be used (None means it can)
+        unsupported = "spin models" if is_spin else None
+        if nlist_backend == "native":
+            self._use_vesin = False
+        elif nlist_backend == "vesin":
+            if not is_vesin_torch_available():
+                raise ImportError(
+                    "nlist_backend='vesin' was requested but 'vesin.torch' is "
+                    "not installed. Install it (`pip install vesin[torch]`) or "
+                    "use nlist_backend='native' (or 'auto')."
+                )
+            if unsupported is not None:
+                raise ValueError(
+                    f"nlist_backend='vesin' is not supported for {unsupported}; "
+                    "use nlist_backend='native' (or 'auto')."
+                )
+            if ase_provided:
+                raise ValueError(
+                    "nlist_backend='vesin' conflicts with an explicitly "
+                    "supplied ASE neighbor_list; pass only one."
+                )
+            self._use_vesin = True
+        else:  # auto: use vesin when possible, otherwise fall back silently
+            self._use_vesin = (
+                is_vesin_torch_available() and unsupported is None and not ase_provided
+            )
+        self._nlist_builder = VesinNeighborList() if self._use_vesin else None
+
     def _init_from_model_json(self, model_json_str: str) -> None:
         """Deserialize model.json and derive model API from the dpmodel instance."""
         from deepmd.pt_expt.model.model import (
@@ -831,6 +882,12 @@ def _build_nlist_native(
         sel = self._sel
         mixed_types = self._mixed_types
 
+        if getattr(self, "_nlist_builder", None) is not None:
+            # O(N) cell-list strategy (e.g. vesin): builds the same extended
+            # representation; the compiled forward_common_lower re-formats the
+            # candidate nlist (sort, truncate, type-split).
+            return self._nlist_builder.build(coords, atom_types, cells, rcut, sel)
+
         if cells is not None:
             box_input = cells.reshape(nframes, 3, 3)
             coord_normalized = normalize_coord(coords, box_input)
diff --git a/deepmd/pt_expt/model/ener_model.py b/deepmd/pt_expt/model/ener_model.py
index 1fdef5eaad..5d5409b028 100644
--- a/deepmd/pt_expt/model/ener_model.py
+++ b/deepmd/pt_expt/model/ener_model.py
@@ -59,6 +59,7 @@ def forward(
         aparam: torch.Tensor | None = None,
         do_atomic_virial: bool = False,
         charge_spin: torch.Tensor | None = None,
+        neighbor_list: Any = None,
     ) -> dict[str, torch.Tensor]:
         model_ret = self.call_common(
             coord,
@@ -68,6 +69,7 @@ def forward(
             aparam=aparam,
             charge_spin=charge_spin,
             do_atomic_virial=do_atomic_virial,
+            neighbor_list=neighbor_list,
         )
         model_predict = {}
         model_predict["atom_energy"] = model_ret["energy"]
diff --git a/deepmd/pt_expt/utils/vesin_neighbor_list.py b/deepmd/pt_expt/utils/vesin_neighbor_list.py
new file mode 100644
index 0000000000..92dcdd01a7
--- /dev/null
+++ b/deepmd/pt_expt/utils/vesin_neighbor_list.py
@@ -0,0 +1,213 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+"""Device-resident O(N) neighbor-list strategy backed by ``vesin.torch``.
+
+This provides a :class:`~deepmd.dpmodel.utils.neighbor_list.NeighborList`
+strategy that replaces the historical all-pairs ghost expansion (~27*N images +
+a dense ``[N, 27N]`` distance matrix) with a cell list.  ``vesin.torch`` returns
+an ``(i, j, S)`` edge list (local neighbor index ``j`` plus integer periodic
+image ``S``); we materialize only the *real-neighbor* ghosts ``coord[j] + S@box``
+and hand back the standard extended quartet ``(extended_coord, extended_atype,
+nlist, mapping)``, so the rest of the model is unchanged.
+
+The neighbor search runs on the device of the input coordinates (CPU or CUDA),
+so on a GPU the whole build stays on the GPU.  When the inputs are numpy arrays
+(the array-API ``dpmodel`` backend) the build is bridged through a CPU torch
+tensor and the result converted back to numpy.  The search itself is
+non-differentiable -- it runs on detached coordinates -- while the returned
+``extended_coord`` is rebuilt from the (possibly grad-carrying) input
+coordinates so autograd for forces/virials flows through unchanged.
+"""
+
+from typing import (
+    Any,
+)
+
+import torch
+
+from deepmd.dpmodel.utils.neighbor_list import (
+    NeighborList,
+)
+
+
+def is_vesin_torch_available() -> bool:
+    """Whether the device-capable ``vesin.torch`` neighbor list is importable."""
+    try:
+        import vesin.torch  # noqa: F401
+    except ImportError:
+        return False
+    return True
+
+
+class VesinNeighborList(NeighborList):
+    """O(N) neighbor-list strategy using the ``vesin.torch`` cell list.
+
+    Implements the :class:`~deepmd.dpmodel.utils.neighbor_list.NeighborList`
+    interface.  Works on torch tensors (on their own device) and on numpy arrays
+    (bridged through a CPU torch tensor); the returned quartet matches the
+    namespace and device of the input coordinates.
+    """
+
+    def build(
+        self,
+        coord: Any,
+        atype: Any,
+        box: Any,
+        rcut: float,
+        sel: list[int],
+    ) -> tuple[Any, Any, Any, Any]:
+        """Build the extended system + candidate neighbor list with vesin.
+
+        See :meth:`deepmd.dpmodel.utils.neighbor_list.NeighborList.build`.  The
+        returned ``nlist`` is distance-sorted and truncated to ``sum(sel)``
+        (matching the default builder); the lower interface still re-formats /
+        type-splits it.
+        """
+        is_numpy = not isinstance(coord, torch.Tensor)
+        # Bridge numpy (dpmodel) through CPU torch; keep torch tensors on-device.
+        coord_t = torch.as_tensor(coord)
+        atype_t = torch.as_tensor(atype).to(torch.int64)
+        box_t = None if box is None else torch.as_tensor(box, dtype=coord_t.dtype)
+
+        nframes = atype_t.shape[0]
+        nloc = atype_t.shape[1]
+        coord_t = coord_t.reshape(nframes, nloc, 3)
+        if box_t is not None:
+            box_t = box_t.reshape(nframes, 3, 3)
+
+        frame_results = [
+            _build_single(
+                coord_t[ff],
+                box_t[ff] if box_t is not None else None,
+                atype_t[ff],
+                rcut,
+                sel,
+            )
+            for ff in range(nframes)
+        ]
+        max_nall = max(ec.shape[0] for ec, _, _, _ in frame_results)
+        device = coord_t.device
+        ext_coords, ext_atypes, nlists, mappings = [], [], [], []
+        for ec, ea, nl, mp in frame_results:
+            pad = max_nall - ec.shape[0]
+            if pad > 0:
+                ec = torch.cat(
+                    [ec, torch.zeros((pad, 3), dtype=ec.dtype, device=device)], dim=0
+                )
+                ea = torch.cat(
+                    [ea, torch.full((pad,), -1, dtype=ea.dtype, device=device)], dim=0
+                )
+                mp = torch.cat(
+                    [mp, torch.zeros((pad,), dtype=mp.dtype, device=device)], dim=0
+                )
+            ext_coords.append(ec)
+            ext_atypes.append(ea)
+            nlists.append(nl)
+            mappings.append(mp)
+        extended_coord = torch.stack(ext_coords, dim=0)
+        extended_atype = torch.stack(ext_atypes, dim=0)
+        nlist = torch.stack(nlists, dim=0)
+        mapping = torch.stack(mappings, dim=0)
+
+        if is_numpy:
+            return (
+                extended_coord.detach().cpu().numpy(),
+                extended_atype.cpu().numpy(),
+                nlist.cpu().numpy(),
+                mapping.cpu().numpy(),
+            )
+        return extended_coord, extended_atype, nlist, mapping
+
+
+def _build_single(
+    positions: torch.Tensor,
+    cell: torch.Tensor | None,
+    atype: torch.Tensor,
+    rcut: float,
+    sel: list[int],
+) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
+    """Single-frame ``(i,j,S)`` -> minimal-extended conversion.
+
+    The cell list runs on detached coordinates (the search is
+    non-differentiable); the returned ``extended_coord`` is rebuilt from
+    ``positions`` so gradients flow to the local atoms and box.
+    """
+    import vesin.torch
+
+    device = positions.device
+    nsel = sum(sel)
+    nloc = positions.shape[0]
+    periodic = cell is not None
+    box = (
+        cell if periodic else torch.zeros((3, 3), dtype=positions.dtype, device=device)
+    )
+
+    # Pin the default device to the input's device: vesin.torch allocates some
+    # internal tensors on the ambient default device, which may be a fake/other
+    # device in some contexts (e.g. tests set a placeholder CUDA default).  The
+    # search runs on detached inputs -- it is non-differentiable.
+    nl = vesin.torch.NeighborList(cutoff=rcut, full_list=True)
+    with torch.device(device):
+        ii, jj, ss = nl.compute(
+            points=positions.detach(),
+            box=box.detach(),
+            periodic=periodic,
+            quantities="ijS",
+        )
+    ii = ii.to(torch.int64)
+    jj = jj.to(torch.int64)
+    ss = ss.to(positions.dtype)
+
+    # ghost atoms: neighbors reached through a non-zero periodic shift.  Rebuild
+    # their coordinates from the grad-carrying `positions`/`box` so autograd for
+    # forces/virials flows through the extended coordinates unchanged.
+    out_mask = torch.any(ss != 0, dim=1)
+    out_idx = jj[out_mask]
+    out_coords = positions[out_idx] + ss[out_mask] @ box
+    nghost = int(out_idx.shape[0])
+
+    extended_coord = torch.cat([positions, out_coords], dim=0)
+    extended_atype = torch.cat([atype, atype[out_idx]], dim=0)
+    mapping = torch.cat(
+        [torch.arange(nloc, dtype=torch.int64, device=device), out_idx], dim=0
+    )
+
+    # remap neighbor column indices: ghosts -> [nloc, nloc + nghost)
+    neigh_idx = jj.clone()
+    neigh_idx[out_mask] = torch.arange(
+        nloc, nloc + nghost, dtype=torch.int64, device=device
+    )
+
+    # group pairs by center atom (vesin does not guarantee CSR ordering)
+    counts = torch.bincount(ii, minlength=nloc)
+    max_nn = int(counts.max()) if counts.numel() > 0 else 0
+    order = torch.argsort(ii, stable=True)
+    rows = ii[order]
+    cols = torch.arange(ii.shape[0], dtype=torch.int64, device=device) - (
+        torch.repeat_interleave(torch.cumsum(counts, 0) - counts, counts)
+    )
+    dense_idx = torch.full((nloc, max_nn), -1, dtype=torch.int64, device=device)
+    if ii.shape[0] > 0:
+        dense_idx[rows, cols] = neigh_idx[order]
+
+    # sort candidates by distance, keep the nsel nearest within rcut, pad with -1
+    valid = dense_idx >= 0
+    lookup = torch.where(valid, dense_idx, torch.zeros_like(dense_idx))
+    dists = torch.linalg.norm(
+        extended_coord.detach()[lookup] - positions.detach()[:, None, :], dim=-1
+    )
+    valid &= dists <= rcut
+    dists = torch.where(valid, dists, torch.full_like(dists, float("inf")))
+    sort_order = torch.argsort(dists, dim=-1)
+    sorted_idx = torch.take_along_dim(dense_idx, sort_order, dim=-1)
+    sorted_valid = torch.take_along_dim(valid, sort_order, dim=-1)
+
+    nlist = torch.full((nloc, nsel), -1, dtype=torch.int64, device=device)
+    keep = min(nsel, max_nn)
+    if keep > 0:
+        nlist[:, :keep] = torch.where(
+            sorted_valid[:, :keep],
+            sorted_idx[:, :keep],
+            torch.full_like(sorted_idx[:, :keep], -1),
+        )
+
+    return extended_coord, extended_atype, nlist, mapping
diff --git a/pyproject.toml b/pyproject.toml
index 35fc0fdb18..f2a89155fa 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -58,6 +58,8 @@ dependencies = [
     'array-api-compat',
     'lmdb',
     'msgpack',
+    # O(N) cell-list neighbor list (vesin.torch) for fast Python/ASE inference
+    'vesin[torch]',
 ]
 requires-python = ">=3.10"
 keywords = ["deepmd"]
diff --git a/source/tests/pt_expt/infer/test_deep_eval.py b/source/tests/pt_expt/infer/test_deep_eval.py
index 7537575f1a..2ce6b7b209 100644
--- a/source/tests/pt_expt/infer/test_deep_eval.py
+++ b/source/tests/pt_expt/infer/test_deep_eval.py
@@ -2203,5 +2203,80 @@ def test_eval_fitting_last_layer_ase_vs_native(self) -> None:
         np.testing.assert_allclose(f_native, f_ase, rtol=1e-10, atol=1e-10)
 
 
+class TestDeepEvalNlistBackend(unittest.TestCase):
+    """Dispatch + equivalence of the ``nlist_backend`` selection (.pte path).
+
+    The vesin O(N) neighbor-list strategy must, through the compiled
+    ``forward_common_lower``, give identical results to the dense native
+    builder; the dispatch must validate the choice and fall back / raise
+    according to ``auto`` / ``native`` / ``vesin``.
+    """
+
+    @classmethod
+    def setUpClass(cls) -> None:
+        cls.rcut = 4.0
+        cls.rcut_smth = 0.5
+        cls.sel = [12, 10]
+        cls.nt = 2
+        cls.type_map = ["foo", "bar"]
+        ds = DescrptSeA(cls.rcut, cls.rcut_smth, cls.sel)
+        ft = EnergyFittingNet(
+            cls.nt, ds.get_dim_out(), mixed_types=ds.mixed_types(), seed=GLOBAL_SEED
+        )
+        cls.model = EnergyModel(ds, ft, type_map=cls.type_map).to(torch.float64)
+        cls.model.eval()
+        cls.tmpfile = tempfile.NamedTemporaryFile(suffix=".pte", delete=False)
+        cls.tmpfile.close()
+        deserialize_to_file(
+            cls.tmpfile.name, {"model": cls.model.serialize()}, do_atomic_virial=True
+        )
+
+    @classmethod
+    def tearDownClass(cls) -> None:
+        os.unlink(cls.tmpfile.name)
+
+    def _vesin_available(self) -> bool:
+        from deepmd.pt_expt.utils.vesin_neighbor_list import (
+            is_vesin_torch_available,
+        )
+
+        return is_vesin_torch_available()
+
+    def test_default_is_auto(self) -> None:
+        dp = DeepPot(self.tmpfile.name)
+        self.assertEqual(dp.deep_eval._use_vesin, self._vesin_available())
+
+    def test_native_disables_vesin(self) -> None:
+        dp = DeepPot(self.tmpfile.name, nlist_backend="native")
+        self.assertFalse(dp.deep_eval._use_vesin)
+
+    def test_invalid_raises(self) -> None:
+        with self.assertRaises(ValueError):
+            DeepPot(self.tmpfile.name, nlist_backend="bogus")
+
+    def test_vesin_requested_but_unavailable_raises(self) -> None:
+        if self._vesin_available():
+            self.skipTest("vesin.torch is installed; cannot test the missing path")
+        with self.assertRaises(ImportError):
+            DeepPot(self.tmpfile.name, nlist_backend="vesin")
+
+    def test_vesin_matches_native(self) -> None:
+        if not self._vesin_available():
+            self.skipTest("vesin.torch is not installed")
+        rng = np.random.default_rng(GLOBAL_SEED + 21)
+        natoms = 6
+        coords = rng.random((1, natoms, 3)) * 8.0
+        atom_types = np.array([i % self.nt for i in range(natoms)], dtype=np.int32)
+        dp_native = DeepPot(self.tmpfile.name, nlist_backend="native")
+        dp_vesin = DeepPot(self.tmpfile.name, nlist_backend="vesin")
+        for cells in (np.eye(3).reshape(1, 9) * 10.0, None):  # PBC and non-PBC
+            ref = dp_native.eval(coords, cells, atom_types, atomic=True)
+            out = dp_vesin.eval(coords, cells, atom_types, atomic=True)
+            for a, b, name in zip(ref, out, ["e", "f", "v", "ae", "av"], strict=True):
+                np.testing.assert_allclose(
+                    a, b, rtol=1e-10, atol=1e-10, err_msg=f"vesin vs native: {name}"
+                )
+
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/source/tests/pt_expt/utils/test_neighbor_list.py b/source/tests/pt_expt/utils/test_neighbor_list.py
new file mode 100644
index 0000000000..0a972b05fd
--- /dev/null
+++ b/source/tests/pt_expt/utils/test_neighbor_list.py
@@ -0,0 +1,452 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+"""Equivalence of the vesin O(N) ``NeighborList`` strategy with the default builder.
+
+A ``NeighborList`` strategy is injected at ``forward_common``/``call_common``,
+replacing the dense all-pairs ghost expansion (~27*N images + an O(N^2) distance
+matrix) with vesin's O(N) cell list.  Both strategies hand the *same* extended
+representation to the downstream model, so every model output (energy, force,
+virial, atomic virial) must match the default builder to fp round-off.
+
+Two layers are covered:
+
+* :class:`TestNeighborListBuilder` -- the builder in isolation, asserting the
+  per-atom neighbor *distance multisets* match the default, for the numpy
+  (dpmodel) and torch (pt/pt_expt) namespaces, periodic and non-periodic, and
+  that the returned tensors live on the input device.
+* :class:`TestNeighborListModelEquivalence` -- full model equivalence across
+  descriptor families (non-mixed, attention/mixed-types, message-passing with
+  single and multiple cutoffs, repflows, hybrid), for dpmodel (energy/atomic
+  energy) and pt_expt (energy/force/virial/atomic virial), periodic and
+  non-periodic, including the ``neighbor_list=None`` default falling back to the
+  dense builder byte-identically.
+"""
+
+import copy
+import unittest
+
+import numpy as np
+import torch
+
+from deepmd.dpmodel.model.model import get_model as get_model_dp
+from deepmd.dpmodel.utils import (
+    DefaultNeighborList,
+    NeighborList,
+)
+from deepmd.pt_expt.model import (
+    get_model,
+)
+from deepmd.pt_expt.utils.vesin_neighbor_list import (
+    VesinNeighborList,
+    is_vesin_torch_available,
+)
+
+from ...seed import (
+    GLOBAL_SEED,
+)
+
+# --- compact model configs (3-type type_map), reduced layers for test speed ---
+TYPE_MAP = ["O", "H", "B"]
+
+model_se_e2_a = {
+    "type_map": TYPE_MAP,
+    "descriptor": {
+        "type": "se_e2_a",
+        "sel": [20, 20, 8],
+        "rcut_smth": 0.5,
+        "rcut": 4.0,
+        "neuron": [6, 12],
+        "resnet_dt": False,
+        "axis_neuron": 4,
+        "seed": 1,
+    },
+    "fitting_net": {"neuron": [8, 8], "resnet_dt": True, "seed": 1},
+}
+
+model_se_r = {
+    "type_map": TYPE_MAP,
+    "descriptor": {
+        "type": "se_e2_r",
+        "sel": [20, 20, 8],
+        "rcut_smth": 0.5,
+        "rcut": 4.0,
+        "neuron": [6, 12],
+        "resnet_dt": False,
+        "seed": 1,
+    },
+    "fitting_net": {"neuron": [8, 8], "resnet_dt": True, "seed": 1},
+}
+
+model_se_e3 = {
+    "type_map": TYPE_MAP,
+    "descriptor": {
+        "type": "se_e3",
+        "sel": [12, 12, 4],
+        "rcut_smth": 0.5,
+        "rcut": 4.0,
+        "neuron": [6, 12],
+        "resnet_dt": False,
+        "seed": 1,
+    },
+    "fitting_net": {"neuron": [8, 8], "resnet_dt": True, "seed": 1},
+}
+
+model_dpa1 = {
+    "type_map": TYPE_MAP,
+    "descriptor": {
+        "type": "se_atten",
+        "sel": 40,
+        "rcut_smth": 0.5,
+        "rcut": 4.0,
+        "neuron": [6, 12, 24],
+        "axis_neuron": 4,
+        "attn": 16,
+        "attn_layer": 2,
+        "attn_dotr": True,
+        "attn_mask": False,
+        "activation_function": "tanh",
+        "scaling_factor": 1.0,
+        "normalize": False,
+        "temperature": 1.0,
+        "set_davg_zero": True,
+        "type_one_side": True,
+        "seed": 1,
+    },
+    "fitting_net": {"neuron": [8, 8], "resnet_dt": True, "seed": 1},
+}
+
+model_se_atten_v2 = {
+    "type_map": TYPE_MAP,
+    "descriptor": {
+        "type": "se_atten_v2",
+        "sel": 40,
+        "rcut_smth": 0.5,
+        "rcut": 4.0,
+        "neuron": [6, 12, 24],
+        "axis_neuron": 4,
+        "attn": 16,
+        "attn_layer": 2,
+        "attn_dotr": True,
+        "attn_mask": False,
+        "activation_function": "tanh",
+        "set_davg_zero": False,
+        "seed": 1,
+    },
+    "fitting_net": {"neuron": [8, 8], "resnet_dt": True, "seed": 1},
+}
+
+model_dpa2 = {
+    "type_map": TYPE_MAP,
+    "descriptor": {
+        "type": "dpa2",
+        "repinit": {
+            "rcut": 6.0,
+            "rcut_smth": 2.0,
+            "nsel": 30,
+            "neuron": [2, 4, 8],
+            "axis_neuron": 4,
+            "activation_function": "tanh",
+        },
+        "repformer": {
+            "rcut": 4.0,
+            "rcut_smth": 0.5,
+            "nsel": 20,
+            "nlayers": 2,
+            "g1_dim": 8,
+            "g2_dim": 5,
+            "attn2_hidden": 3,
+            "attn2_nhead": 1,
+            "attn1_hidden": 5,
+            "attn1_nhead": 1,
+            "axis_neuron": 4,
+            "update_h2": False,
+            "update_g1_has_conv": True,
+            "update_g1_has_grrg": True,
+            "update_g1_has_drrd": True,
+            "update_g1_has_attn": True,
+            "update_g2_has_g1g1": True,
+            "update_g2_has_attn": True,
+            "attn2_has_gate": True,
+        },
+        "seed": 1,
+        "add_tebd_to_repinit_out": False,
+    },
+    "fitting_net": {"neuron": [8, 8], "resnet_dt": True, "seed": 1},
+}
+
+model_dpa3 = {
+    "type_map": TYPE_MAP,
+    "descriptor": {
+        "type": "dpa3",
+        "repflow": {
+            "n_dim": 12,
+            "e_dim": 8,
+            "a_dim": 6,
+            "nlayers": 2,
+            "e_rcut": 6.0,
+            "e_rcut_smth": 3.0,
+            "e_sel": 20,
+            "a_rcut": 4.0,
+            "a_rcut_smth": 2.0,
+            "a_sel": 10,
+            "axis_neuron": 4,
+            "update_angle": True,
+            "update_style": "res_residual",
+            "update_residual": 0.1,
+            "update_residual_init": "const",
+        },
+        "activation_function": "tanh",
+        "use_tebd_bias": False,
+        "concat_output_tebd": False,
+        "seed": 1,
+    },
+    "fitting_net": {"neuron": [8, 8], "resnet_dt": True, "seed": 1},
+}
+
+model_hybrid = {
+    "type_map": TYPE_MAP,
+    "descriptor": {
+        "type": "hybrid",
+        "list": [
+            {
+                "type": "se_e2_a",
+                "sel": [20, 20, 8],
+                "rcut_smth": 0.5,
+                "rcut": 4.0,
+                "neuron": [6, 12],
+                "resnet_dt": False,
+                "axis_neuron": 4,
+                "seed": 1,
+            },
+            {
+                "type": "se_e2_r",
+                "sel": [20, 20, 8],
+                "rcut_smth": 0.5,
+                "rcut": 4.0,
+                "neuron": [6, 12],
+                "resnet_dt": False,
+                "seed": 1,
+            },
+        ],
+    },
+    "fitting_net": {"neuron": [8, 8], "resnet_dt": True, "seed": 1},
+}
+
+ALL_MODELS = {
+    "se_e2_a": model_se_e2_a,
+    "se_r": model_se_r,
+    "se_e3": model_se_e3,
+    "dpa1": model_dpa1,
+    "se_atten_v2": model_se_atten_v2,
+    "dpa2": model_dpa2,
+    "dpa3": model_dpa3,
+    "hybrid": model_hybrid,
+}
+
+
+def _system(natoms: int = 6, box_len: float = 10.0, seed: int = GLOBAL_SEED):
+    """A small 3-type periodic system; returns numpy (coord, atype, box)."""
+    rng = np.random.default_rng(seed)
+    coord = (rng.random((1, natoms, 3)) * box_len).astype(np.float64)
+    atype = np.array([[0, 0, 1, 1, 2, 0]], dtype=np.int64)[:, :natoms]
+    box = (np.eye(3) * box_len).reshape(1, 9).astype(np.float64)
+    return coord, atype, box
+
+
+def _per_atom_neighbor_dists(ext_coord, nlist, coord):
+    """Sorted, rounded valid-neighbor distances for each local atom."""
+    ext_coord = np.asarray(ext_coord).reshape(-1, 3)
+    coord = np.asarray(coord).reshape(-1, 3)
+    out = []
+    for i in range(coord.shape[0]):
+        ds = [
+            round(float(np.linalg.norm(ext_coord[j] - coord[i])), 6)
+            for j in np.asarray(nlist)[i]
+            if j >= 0
+        ]
+        out.append(sorted(ds))
+    return out
+
+
+@unittest.skipIf(not is_vesin_torch_available(), "vesin.torch is not installed")
+class TestNeighborListBuilder(unittest.TestCase):
+    """The vesin builder must produce the same neighbor relationships as the
+    default all-pairs builder, in both namespaces and on the input device.
+    """
+
+    def setUp(self) -> None:
+        self.coord_np, self.atype_np, self.box_np = _system()
+        self.rcut = 4.0
+        self.sel = [20, 20, 8]
+
+    def _compare(self, box_np) -> None:
+        default = DefaultNeighborList()
+        vesin = VesinNeighborList()
+        # numpy (dpmodel) namespace
+        ec_d, _, nl_d, _ = default.build(
+            self.coord_np, self.atype_np, box_np, self.rcut, self.sel
+        )
+        ec_v, _, nl_v, _ = vesin.build(
+            self.coord_np, self.atype_np, box_np, self.rcut, self.sel
+        )
+        self.assertEqual(
+            _per_atom_neighbor_dists(ec_d, nl_d[0], self.coord_np[0]),
+            _per_atom_neighbor_dists(ec_v, nl_v[0], self.coord_np[0]),
+        )
+        # torch namespace
+        coord_t = torch.tensor(self.coord_np, dtype=torch.float64)
+        atype_t = torch.tensor(self.atype_np, dtype=torch.int64)
+        box_t = None if box_np is None else torch.tensor(box_np, dtype=torch.float64)
+        ec_vt, _, nl_vt, _ = vesin.build(coord_t, atype_t, box_t, self.rcut, self.sel)
+        self.assertTrue(torch.is_tensor(ec_vt))
+        self.assertEqual(
+            _per_atom_neighbor_dists(ec_d, nl_d[0], self.coord_np[0]),
+            _per_atom_neighbor_dists(
+                ec_vt[0].cpu().numpy(), nl_vt[0].cpu().numpy(), self.coord_np[0]
+            ),
+        )
+
+    def test_pbc(self) -> None:
+        self._compare(self.box_np)
+
+    def test_nopbc(self) -> None:
+        self._compare(None)
+
+    def test_outputs_on_input_device(self) -> None:
+        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        coord_t = torch.tensor(self.coord_np, dtype=torch.float64, device=device)
+        atype_t = torch.tensor(self.atype_np, dtype=torch.int64, device=device)
+        box_t = torch.tensor(self.box_np, dtype=torch.float64, device=device)
+        outs = VesinNeighborList().build(coord_t, atype_t, box_t, self.rcut, self.sel)
+        for t in outs:
+            self.assertEqual(t.device.type, device.type)
+
+    def test_isinstance(self) -> None:
+        self.assertIsInstance(VesinNeighborList(), NeighborList)
+        self.assertIsInstance(DefaultNeighborList(), NeighborList)
+
+
+@unittest.skipIf(not is_vesin_torch_available(), "vesin.torch is not installed")
+class TestNeighborListModelEquivalence(unittest.TestCase):
+    """Full model outputs must be invariant to the neighbor-list strategy."""
+
+    def setUp(self) -> None:
+        self.coord_np, self.atype_np, self.box_np = _system()
+
+    def _tol(self, model_dict) -> dict:
+        prec = str(model_dict["descriptor"].get("precision", "float64"))
+        if "float32" in prec or "32" in prec:
+            return {"rtol": 1e-5, "atol": 1e-5}
+        return {"rtol": 1e-9, "atol": 1e-9}
+
+    def _run_dpmodel(self, name, model_dict, box_np) -> None:
+        md = get_model_dp(copy.deepcopy(model_dict))
+        tol = self._tol(model_dict)
+        r0 = md.call(
+            self.coord_np,
+            self.atype_np,
+            box=box_np,
+            neighbor_list=DefaultNeighborList(),
+        )
+        r1 = md.call(
+            self.coord_np, self.atype_np, box=box_np, neighbor_list=VesinNeighborList()
+        )
+        for k in ("energy", "atom_energy"):
+            np.testing.assert_allclose(
+                r0[k], r1[k], err_msg=f"dpmodel {name} {k}", **tol
+            )
+
+    def _run_pt_expt(self, name, model_dict, box_np) -> None:
+        md = get_model(copy.deepcopy(model_dict))
+        md.eval()
+        tol = self._tol(model_dict)
+        box_t = None if box_np is None else torch.tensor(box_np, dtype=torch.float64)
+        atype_t = torch.tensor(self.atype_np, dtype=torch.int64)
+        results = {}
+        for tag, nl in (("def", DefaultNeighborList()), ("ves", VesinNeighborList())):
+            coord_t = torch.tensor(self.coord_np, dtype=torch.float64).requires_grad_(
+                True
+            )
+            results[tag] = md.forward(
+                coord_t, atype_t, box=box_t, do_atomic_virial=True, neighbor_list=nl
+            )
+        for k in ("energy", "atom_energy", "force", "virial", "atom_virial"):
+            a, b = results["def"].get(k), results["ves"].get(k)
+            if a is None or b is None:
+                continue
+            np.testing.assert_allclose(
+                a.detach().cpu().numpy(),
+                b.detach().cpu().numpy(),
+                err_msg=f"pt_expt {name} {k}",
+                **tol,
+            )
+
+    def _run_default_fallback(self, name, model_dict, box_np) -> None:
+        """``neighbor_list=None`` must equal an explicit DefaultNeighborList."""
+        md = get_model(copy.deepcopy(model_dict))
+        md.eval()
+        box_t = None if box_np is None else torch.tensor(box_np, dtype=torch.float64)
+        atype_t = torch.tensor(self.atype_np, dtype=torch.int64)
+        outs = {}
+        for tag, kw in (
+            ("none", {}),
+            ("explicit", {"neighbor_list": DefaultNeighborList()}),
+        ):
+            coord_t = torch.tensor(self.coord_np, dtype=torch.float64).requires_grad_(
+                True
+            )
+            outs[tag] = md.forward(
+                coord_t, atype_t, box=box_t, do_atomic_virial=True, **kw
+            )
+        for k in ("energy", "force", "virial"):
+            np.testing.assert_array_equal(
+                outs["none"][k].detach().cpu().numpy(),
+                outs["explicit"][k].detach().cpu().numpy(),
+                err_msg=f"default fallback {name} {k}",
+            )
+
+
+def _make_dpmodel_test(name, model_dict, periodic):
+    def test(self) -> None:
+        box = self.box_np if periodic else None
+        self._run_dpmodel(name, model_dict, box)
+
+    return test
+
+
+def _make_pt_expt_test(name, model_dict, periodic):
+    def test(self) -> None:
+        box = self.box_np if periodic else None
+        self._run_pt_expt(name, model_dict, box)
+
+    return test
+
+
+def _make_fallback_test(name, model_dict):
+    def test(self) -> None:
+        self._run_default_fallback(name, model_dict, self.box_np)
+
+    return test
+
+
+# generate one test per (family, pbc/nopbc) so failures pinpoint the family
+for _name, _dict in ALL_MODELS.items():
+    for _pbc, _suffix in ((True, "pbc"), (False, "nopbc")):
+        setattr(
+            TestNeighborListModelEquivalence,
+            f"test_dpmodel_{_name}_{_suffix}",
+            _make_dpmodel_test(_name, _dict, _pbc),
+        )
+        setattr(
+            TestNeighborListModelEquivalence,
+            f"test_pt_expt_{_name}_{_suffix}",
+            _make_pt_expt_test(_name, _dict, _pbc),
+        )
+    setattr(
+        TestNeighborListModelEquivalence,
+        f"test_default_fallback_{_name}",
+        _make_fallback_test(_name, _dict),
+    )
+
+
+if __name__ == "__main__":
+    unittest.main()

From 6fd4ee579926c740d8224ac913b9bd99077edb36 Mon Sep 17 00:00:00 2001
From: Han Wang <wang_han@iapcm.ac.cn>
Date: Wed, 3 Jun 2026 16:13:22 +0800
Subject: [PATCH 02/12] test(pt_expt): use pytest.mark.parametrize for
 neighbor-list equivalence tests

Replace the dynamic setattr metaprogramming over (descriptor family, pbc) with
pytest.mark.parametrize, per project test conventions: one parametrized test
per (family, periodic) so cases can be selected individually with -k and
failures pinpoint the family. No coverage change (44 cases).
---
 .../tests/pt_expt/utils/test_neighbor_list.py | 316 +++++++-----------
 1 file changed, 130 insertions(+), 186 deletions(-)

diff --git a/source/tests/pt_expt/utils/test_neighbor_list.py b/source/tests/pt_expt/utils/test_neighbor_list.py
index 0a972b05fd..603c209da3 100644
--- a/source/tests/pt_expt/utils/test_neighbor_list.py
+++ b/source/tests/pt_expt/utils/test_neighbor_list.py
@@ -9,22 +9,23 @@
 
 Two layers are covered:
 
-* :class:`TestNeighborListBuilder` -- the builder in isolation, asserting the
-  per-atom neighbor *distance multisets* match the default, for the numpy
-  (dpmodel) and torch (pt/pt_expt) namespaces, periodic and non-periodic, and
-  that the returned tensors live on the input device.
-* :class:`TestNeighborListModelEquivalence` -- full model equivalence across
-  descriptor families (non-mixed, attention/mixed-types, message-passing with
-  single and multiple cutoffs, repflows, hybrid), for dpmodel (energy/atomic
-  energy) and pt_expt (energy/force/virial/atomic virial), periodic and
-  non-periodic, including the ``neighbor_list=None`` default falling back to the
-  dense builder byte-identically.
+* ``test_builder_*`` -- the builder in isolation, asserting the per-atom
+  neighbor *distance multisets* match the default, for the numpy (dpmodel) and
+  torch (pt/pt_expt) namespaces, periodic and non-periodic, and that the
+  returned tensors live on the input device.
+* ``test_dpmodel_equivalence`` / ``test_pt_expt_equivalence`` /
+  ``test_default_fallback`` -- full model equivalence across descriptor families
+  (non-mixed, attention/mixed-types, message-passing with single and multiple
+  cutoffs, repflows, hybrid), for dpmodel (energy/atomic energy) and pt_expt
+  (energy/force/virial/atomic virial), periodic and non-periodic, including the
+  ``neighbor_list=None`` default falling back to the dense builder
+  byte-identically.
 """
 
 import copy
-import unittest
 
 import numpy as np
+import pytest
 import torch
 
 from deepmd.dpmodel.model.model import get_model as get_model_dp
@@ -267,186 +268,129 @@ def _per_atom_neighbor_dists(ext_coord, nlist, coord):
     return out
 
 
-@unittest.skipIf(not is_vesin_torch_available(), "vesin.torch is not installed")
-class TestNeighborListBuilder(unittest.TestCase):
-    """The vesin builder must produce the same neighbor relationships as the
-    default all-pairs builder, in both namespaces and on the input device.
-    """
+pytestmark = pytest.mark.skipif(
+    not is_vesin_torch_available(), reason="vesin.torch is not installed"
+)
+
+
+def _tol(model_dict: dict) -> dict:
+    """Equivalence tolerance; loosened only for float32 models."""
+    prec = str(model_dict["descriptor"].get("precision", "float64"))
+    if "32" in prec:
+        return {"rtol": 1e-5, "atol": 1e-5}
+    return {"rtol": 1e-9, "atol": 1e-9}
+
+
+def test_builder_isinstance() -> None:
+    assert isinstance(VesinNeighborList(), NeighborList)
+    assert isinstance(DefaultNeighborList(), NeighborList)
 
-    def setUp(self) -> None:
-        self.coord_np, self.atype_np, self.box_np = _system()
-        self.rcut = 4.0
-        self.sel = [20, 20, 8]
-
-    def _compare(self, box_np) -> None:
-        default = DefaultNeighborList()
-        vesin = VesinNeighborList()
-        # numpy (dpmodel) namespace
-        ec_d, _, nl_d, _ = default.build(
-            self.coord_np, self.atype_np, box_np, self.rcut, self.sel
-        )
-        ec_v, _, nl_v, _ = vesin.build(
-            self.coord_np, self.atype_np, box_np, self.rcut, self.sel
-        )
-        self.assertEqual(
-            _per_atom_neighbor_dists(ec_d, nl_d[0], self.coord_np[0]),
-            _per_atom_neighbor_dists(ec_v, nl_v[0], self.coord_np[0]),
-        )
-        # torch namespace
-        coord_t = torch.tensor(self.coord_np, dtype=torch.float64)
-        atype_t = torch.tensor(self.atype_np, dtype=torch.int64)
-        box_t = None if box_np is None else torch.tensor(box_np, dtype=torch.float64)
-        ec_vt, _, nl_vt, _ = vesin.build(coord_t, atype_t, box_t, self.rcut, self.sel)
-        self.assertTrue(torch.is_tensor(ec_vt))
-        self.assertEqual(
-            _per_atom_neighbor_dists(ec_d, nl_d[0], self.coord_np[0]),
-            _per_atom_neighbor_dists(
-                ec_vt[0].cpu().numpy(), nl_vt[0].cpu().numpy(), self.coord_np[0]
-            ),
-        )
 
-    def test_pbc(self) -> None:
-        self._compare(self.box_np)
-
-    def test_nopbc(self) -> None:
-        self._compare(None)
-
-    def test_outputs_on_input_device(self) -> None:
-        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-        coord_t = torch.tensor(self.coord_np, dtype=torch.float64, device=device)
-        atype_t = torch.tensor(self.atype_np, dtype=torch.int64, device=device)
-        box_t = torch.tensor(self.box_np, dtype=torch.float64, device=device)
-        outs = VesinNeighborList().build(coord_t, atype_t, box_t, self.rcut, self.sel)
-        for t in outs:
-            self.assertEqual(t.device.type, device.type)
-
-    def test_isinstance(self) -> None:
-        self.assertIsInstance(VesinNeighborList(), NeighborList)
-        self.assertIsInstance(DefaultNeighborList(), NeighborList)
-
-
-@unittest.skipIf(not is_vesin_torch_available(), "vesin.torch is not installed")
-class TestNeighborListModelEquivalence(unittest.TestCase):
-    """Full model outputs must be invariant to the neighbor-list strategy."""
-
-    def setUp(self) -> None:
-        self.coord_np, self.atype_np, self.box_np = _system()
-
-    def _tol(self, model_dict) -> dict:
-        prec = str(model_dict["descriptor"].get("precision", "float64"))
-        if "float32" in prec or "32" in prec:
-            return {"rtol": 1e-5, "atol": 1e-5}
-        return {"rtol": 1e-9, "atol": 1e-9}
-
-    def _run_dpmodel(self, name, model_dict, box_np) -> None:
-        md = get_model_dp(copy.deepcopy(model_dict))
-        tol = self._tol(model_dict)
-        r0 = md.call(
-            self.coord_np,
-            self.atype_np,
-            box=box_np,
-            neighbor_list=DefaultNeighborList(),
+@pytest.mark.parametrize("periodic", [False, True])  # non-PBC vs PBC
+def test_builder_matches_default(periodic: bool) -> None:
+    """The vesin builder produces the same per-atom neighbor distance multisets
+    as the default all-pairs builder, in both the numpy (dpmodel) and torch
+    (pt/pt_expt) namespaces.
+    """
+    coord_np, atype_np, box_np = _system()
+    box_np = box_np if periodic else None
+    rcut, sel = 4.0, [20, 20, 8]
+    ec_d, _, nl_d, _ = DefaultNeighborList().build(
+        coord_np, atype_np, box_np, rcut, sel
+    )
+    ref = _per_atom_neighbor_dists(ec_d, nl_d[0], coord_np[0])
+    # numpy (dpmodel) namespace
+    ec_v, _, nl_v, _ = VesinNeighborList().build(coord_np, atype_np, box_np, rcut, sel)
+    assert _per_atom_neighbor_dists(ec_v, nl_v[0], coord_np[0]) == ref
+    # torch (pt/pt_expt) namespace
+    coord_t = torch.tensor(coord_np, dtype=torch.float64)
+    atype_t = torch.tensor(atype_np, dtype=torch.int64)
+    box_t = None if box_np is None else torch.tensor(box_np, dtype=torch.float64)
+    ec_vt, _, nl_vt, _ = VesinNeighborList().build(coord_t, atype_t, box_t, rcut, sel)
+    assert torch.is_tensor(ec_vt)
+    assert (
+        _per_atom_neighbor_dists(
+            ec_vt[0].cpu().numpy(), nl_vt[0].cpu().numpy(), coord_np[0]
         )
-        r1 = md.call(
-            self.coord_np, self.atype_np, box=box_np, neighbor_list=VesinNeighborList()
+        == ref
+    )
+
+
+def test_builder_outputs_on_input_device() -> None:
+    coord_np, atype_np, box_np = _system()
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    coord_t = torch.tensor(coord_np, dtype=torch.float64, device=device)
+    atype_t = torch.tensor(atype_np, dtype=torch.int64, device=device)
+    box_t = torch.tensor(box_np, dtype=torch.float64, device=device)
+    for t in VesinNeighborList().build(coord_t, atype_t, box_t, 4.0, [20, 20, 8]):
+        assert t.device.type == device.type
+
+
+@pytest.mark.parametrize("name", list(ALL_MODELS))  # descriptor family
+@pytest.mark.parametrize("periodic", [False, True])  # non-PBC vs PBC
+def test_dpmodel_equivalence(name: str, periodic: bool) -> None:
+    """Energy / atomic energy (dpmodel) are invariant to the nlist strategy."""
+    coord_np, atype_np, box_np = _system()
+    box_np = box_np if periodic else None
+    model_dict = ALL_MODELS[name]
+    md = get_model_dp(copy.deepcopy(model_dict))
+    r0 = md.call(coord_np, atype_np, box=box_np, neighbor_list=DefaultNeighborList())
+    r1 = md.call(coord_np, atype_np, box=box_np, neighbor_list=VesinNeighborList())
+    for k in ("energy", "atom_energy"):
+        np.testing.assert_allclose(
+            r0[k], r1[k], err_msg=f"{name} {k}", **_tol(model_dict)
         )
-        for k in ("energy", "atom_energy"):
-            np.testing.assert_allclose(
-                r0[k], r1[k], err_msg=f"dpmodel {name} {k}", **tol
-            )
-
-    def _run_pt_expt(self, name, model_dict, box_np) -> None:
-        md = get_model(copy.deepcopy(model_dict))
-        md.eval()
-        tol = self._tol(model_dict)
-        box_t = None if box_np is None else torch.tensor(box_np, dtype=torch.float64)
-        atype_t = torch.tensor(self.atype_np, dtype=torch.int64)
-        results = {}
-        for tag, nl in (("def", DefaultNeighborList()), ("ves", VesinNeighborList())):
-            coord_t = torch.tensor(self.coord_np, dtype=torch.float64).requires_grad_(
-                True
-            )
-            results[tag] = md.forward(
-                coord_t, atype_t, box=box_t, do_atomic_virial=True, neighbor_list=nl
-            )
-        for k in ("energy", "atom_energy", "force", "virial", "atom_virial"):
-            a, b = results["def"].get(k), results["ves"].get(k)
-            if a is None or b is None:
-                continue
-            np.testing.assert_allclose(
-                a.detach().cpu().numpy(),
-                b.detach().cpu().numpy(),
-                err_msg=f"pt_expt {name} {k}",
-                **tol,
-            )
-
-    def _run_default_fallback(self, name, model_dict, box_np) -> None:
-        """``neighbor_list=None`` must equal an explicit DefaultNeighborList."""
-        md = get_model(copy.deepcopy(model_dict))
-        md.eval()
-        box_t = None if box_np is None else torch.tensor(box_np, dtype=torch.float64)
-        atype_t = torch.tensor(self.atype_np, dtype=torch.int64)
-        outs = {}
-        for tag, kw in (
-            ("none", {}),
-            ("explicit", {"neighbor_list": DefaultNeighborList()}),
-        ):
-            coord_t = torch.tensor(self.coord_np, dtype=torch.float64).requires_grad_(
-                True
-            )
-            outs[tag] = md.forward(
-                coord_t, atype_t, box=box_t, do_atomic_virial=True, **kw
-            )
-        for k in ("energy", "force", "virial"):
-            np.testing.assert_array_equal(
-                outs["none"][k].detach().cpu().numpy(),
-                outs["explicit"][k].detach().cpu().numpy(),
-                err_msg=f"default fallback {name} {k}",
-            )
-
-
-def _make_dpmodel_test(name, model_dict, periodic):
-    def test(self) -> None:
-        box = self.box_np if periodic else None
-        self._run_dpmodel(name, model_dict, box)
-
-    return test
-
-
-def _make_pt_expt_test(name, model_dict, periodic):
-    def test(self) -> None:
-        box = self.box_np if periodic else None
-        self._run_pt_expt(name, model_dict, box)
-
-    return test
-
-
-def _make_fallback_test(name, model_dict):
-    def test(self) -> None:
-        self._run_default_fallback(name, model_dict, self.box_np)
-
-    return test
-
-
-# generate one test per (family, pbc/nopbc) so failures pinpoint the family
-for _name, _dict in ALL_MODELS.items():
-    for _pbc, _suffix in ((True, "pbc"), (False, "nopbc")):
-        setattr(
-            TestNeighborListModelEquivalence,
-            f"test_dpmodel_{_name}_{_suffix}",
-            _make_dpmodel_test(_name, _dict, _pbc),
+
+
+@pytest.mark.parametrize("name", list(ALL_MODELS))  # descriptor family
+@pytest.mark.parametrize("periodic", [False, True])  # non-PBC vs PBC
+def test_pt_expt_equivalence(name: str, periodic: bool) -> None:
+    """pt_expt energy / force / virial / atomic virial are invariant to the
+    nlist strategy (force/virial come from the existing autograd routines).
+    """
+    coord_np, atype_np, box_np = _system()
+    box_np = box_np if periodic else None
+    model_dict = ALL_MODELS[name]
+    md = get_model(copy.deepcopy(model_dict))
+    md.eval()
+    box_t = None if box_np is None else torch.tensor(box_np, dtype=torch.float64)
+    atype_t = torch.tensor(atype_np, dtype=torch.int64)
+    results = {}
+    for tag, nl in (("def", DefaultNeighborList()), ("ves", VesinNeighborList())):
+        coord_t = torch.tensor(coord_np, dtype=torch.float64).requires_grad_(True)
+        results[tag] = md.forward(
+            coord_t, atype_t, box=box_t, do_atomic_virial=True, neighbor_list=nl
         )
-        setattr(
-            TestNeighborListModelEquivalence,
-            f"test_pt_expt_{_name}_{_suffix}",
-            _make_pt_expt_test(_name, _dict, _pbc),
+    for k in ("energy", "atom_energy", "force", "virial", "atom_virial"):
+        a, b = results["def"].get(k), results["ves"].get(k)
+        if a is None or b is None:
+            continue
+        np.testing.assert_allclose(
+            a.detach().cpu().numpy(),
+            b.detach().cpu().numpy(),
+            err_msg=f"{name} {k}",
+            **_tol(model_dict),
         )
-    setattr(
-        TestNeighborListModelEquivalence,
-        f"test_default_fallback_{_name}",
-        _make_fallback_test(_name, _dict),
-    )
 
 
-if __name__ == "__main__":
-    unittest.main()
+@pytest.mark.parametrize("name", list(ALL_MODELS))  # descriptor family
+def test_default_fallback(name: str) -> None:
+    """``neighbor_list=None`` equals an explicit DefaultNeighborList byte-for-byte."""
+    coord_np, atype_np, box_np = _system()
+    md = get_model(copy.deepcopy(ALL_MODELS[name]))
+    md.eval()
+    box_t = torch.tensor(box_np, dtype=torch.float64)
+    atype_t = torch.tensor(atype_np, dtype=torch.int64)
+    outs = {}
+    for tag, kw in (
+        ("none", {}),
+        ("explicit", {"neighbor_list": DefaultNeighborList()}),
+    ):
+        coord_t = torch.tensor(coord_np, dtype=torch.float64).requires_grad_(True)
+        outs[tag] = md.forward(coord_t, atype_t, box=box_t, do_atomic_virial=True, **kw)
+    for k in ("energy", "force", "virial"):
+        np.testing.assert_array_equal(
+            outs["none"][k].detach().cpu().numpy(),
+            outs["explicit"][k].detach().cpu().numpy(),
+            err_msg=f"{name} {k}",
+        )

From 4a64333fea4ebf1e2968f3e612a047e5d8944197 Mon Sep 17 00:00:00 2001
From: Han Wang <wang_han@iapcm.ac.cn>
Date: Wed, 3 Jun 2026 16:38:43 +0800
Subject: [PATCH 03/12] feat(pt): support O(N) vesin nlist_backend for the pt
 backend

Extend the pluggable neighbor list to the pt (torch.jit) backend. The pt model
is reconstructed eagerly in DeepEval, so when nlist_backend selects vesin we
build the (i,j,S) extended representation with VesinNeighborList and run the
model's forward_common_lower + communicate_extended_output directly, leaving the
TorchScript graph untouched. vesin is gated off for spin/hessian models and ASE
neighbor_list conflicts; auto falls back to native, vesin is strict.

Also:
- VesinNeighborList: avoid torch.as_tensor without an explicit device (under a
  non-CPU ambient default device it triggers CUDA init even for CPU tensors);
  bridge numpy via from_numpy and use torch tensors directly. Makes the builder
  device-robust.
- drop the getattr(self, "_use_vesin"/"_nlist_builder", ...) defensive defaults;
  both attributes are always initialized in __init__ via _setup_nlist_backend.

Tested: source/tests/pt/model/test_nlist_backend.py (dispatch + vesin-vs-native
energy/force/virial/atomic-virial equivalence for se_e2_a and dpa1, PBC/non-PBC).
---
 deepmd/pt/infer/deep_eval.py                | 136 ++++++++++++++++++--
 deepmd/pt_expt/infer/deep_eval.py           |   2 +-
 deepmd/pt_expt/utils/vesin_neighbor_list.py |  21 ++-
 source/tests/pt/model/test_nlist_backend.py | 128 ++++++++++++++++++
 4 files changed, 271 insertions(+), 16 deletions(-)
 create mode 100644 source/tests/pt/model/test_nlist_backend.py

diff --git a/deepmd/pt/infer/deep_eval.py b/deepmd/pt/infer/deep_eval.py
index 0887ceb9df..44c4cf30ca 100644
--- a/deepmd/pt/infer/deep_eval.py
+++ b/deepmd/pt/infer/deep_eval.py
@@ -46,6 +46,9 @@
 from deepmd.pt.model.model import (
     get_model,
 )
+from deepmd.pt.model.model.transform_output import (
+    communicate_extended_output,
+)
 from deepmd.pt.model.network.network import (
     TypeEmbedNetConsistent,
 )
@@ -67,6 +70,10 @@
     to_numpy_array,
     to_torch_tensor,
 )
+from deepmd.pt_expt.utils.vesin_neighbor_list import (
+    VesinNeighborList,
+    is_vesin_torch_available,
+)
 from deepmd.utils.batch_size import (
     RetrySignal,
 )
@@ -129,10 +136,12 @@ def __init__(
         neighbor_list: Optional["ase.neighborlist.NewPrimitiveNeighborList"] = None,
         head: str | int | None = None,
         no_jit: bool = False,
+        nlist_backend: str = "auto",
         **kwargs: Any,
     ) -> None:
         self.output_def = output_def
         self.model_path = model_file
+        self.neighbor_list = neighbor_list
         if str(self.model_path).endswith(".pt"):
             state_dict = torch.load(
                 model_file, map_location=env.DEVICE, weights_only=True
@@ -239,6 +248,54 @@ def __init__(
         if callable(self._has_spin):
             self._has_spin = self._has_spin()
         self._has_hessian = self.model_def_script.get("hessian_mode", False)
+        self._setup_nlist_backend(nlist_backend)
+
+    def _setup_nlist_backend(self, nlist_backend: str) -> None:
+        """Resolve the neighbor-list construction strategy from a user choice.
+
+        ``"native"`` uses the dense all-pairs builder; ``"vesin"`` forces the
+        O(N) ``vesin.torch`` cell list (raising if it is unavailable or the
+        model/inputs are unsupported); ``"auto"`` uses vesin when applicable and
+        silently falls back to the native builder otherwise.  Results are
+        unchanged either way -- only the neighbor-search cost differs.
+        """
+        if nlist_backend not in ("auto", "vesin", "native"):
+            raise ValueError(
+                f"Unknown nlist_backend '{nlist_backend}'; "
+                "expected 'auto', 'vesin', or 'native'."
+            )
+        # reason vesin cannot be used (None means it can)
+        unsupported = None
+        if self._has_spin:
+            unsupported = "spin models"
+        elif self._has_hessian:
+            unsupported = "hessian models"
+        ase_provided = self.neighbor_list is not None
+        if nlist_backend == "native":
+            self._use_vesin = False
+        elif nlist_backend == "vesin":
+            if not is_vesin_torch_available():
+                raise ImportError(
+                    "nlist_backend='vesin' was requested but 'vesin.torch' is "
+                    "not installed. Install it (`pip install vesin[torch]`) or "
+                    "use nlist_backend='native' (or 'auto')."
+                )
+            if unsupported is not None:
+                raise ValueError(
+                    f"nlist_backend='vesin' is not supported for {unsupported}; "
+                    "use nlist_backend='native' (or 'auto')."
+                )
+            if ase_provided:
+                raise ValueError(
+                    "nlist_backend='vesin' conflicts with an explicitly "
+                    "supplied ASE neighbor_list; pass only one."
+                )
+            self._use_vesin = True
+        else:  # auto: use vesin when possible, otherwise fall back silently
+            self._use_vesin = (
+                is_vesin_torch_available() and unsupported is None and not ase_provided
+            )
+        self._nlist_builder = VesinNeighborList() if self._use_vesin else None
 
     def get_rcut(self) -> float:
         """Get the cutoff radius of this model."""
@@ -586,17 +643,28 @@ def _eval_model(
         do_atomic_virial = any(
             x.category == OutputVariableCategory.DERV_C for x in request_defs
         )
-        batch_output = model(
-            coord_input,
-            type_input,
-            box=box_input,
-            do_atomic_virial=do_atomic_virial,
-            fparam=fparam_input,
-            aparam=aparam_input,
-            charge_spin=charge_spin_input,
-        )
-        if isinstance(batch_output, tuple):
-            batch_output = batch_output[0]
+        if self._use_vesin:
+            batch_output = self._eval_lower_vesin(
+                coord_input,
+                type_input,
+                box_input,
+                fparam_input,
+                aparam_input,
+                charge_spin_input,
+                do_atomic_virial,
+            )
+        else:
+            batch_output = model(
+                coord_input,
+                type_input,
+                box=box_input,
+                do_atomic_virial=do_atomic_virial,
+                fparam=fparam_input,
+                aparam=aparam_input,
+                charge_spin=charge_spin_input,
+            )
+            if isinstance(batch_output, tuple):
+                batch_output = batch_output[0]
 
         results = []
         for odef in request_defs:
@@ -612,6 +680,52 @@ def _eval_model(
                 )  # this is kinda hacky
         return tuple(results)
 
+    def _eval_lower_vesin(
+        self,
+        coord: torch.Tensor,
+        atype: torch.Tensor,
+        box: torch.Tensor | None,
+        fparam: torch.Tensor | None,
+        aparam: torch.Tensor | None,
+        charge_spin: torch.Tensor | None,
+        do_atomic_virial: bool,
+    ) -> dict[str, torch.Tensor]:
+        """Evaluate via the O(N) vesin-built ``(i,j,S)`` extended neighbor list.
+
+        Builds the extended representation with the vesin cell list, runs the
+        model's ``forward_common_lower``, and maps the extended outputs back to
+        local atoms with ``communicate_extended_output``.  Returns a dict keyed
+        by backend names, matching the normal ``model()`` output so the caller's
+        extraction is unchanged.  ``forward_common_atomic`` sets
+        ``requires_grad`` on the extended coordinates internally, exactly as on
+        the native path, so forces/virials are produced identically.
+        """
+        inner = self.dp.model["Default"]
+        ext_coord, ext_atype, nlist, mapping = self._nlist_builder.build(
+            coord, atype, box, self.rcut, list(inner.get_sel())
+        )
+        model_lower = inner.forward_common_lower(
+            ext_coord,
+            ext_atype,
+            nlist,
+            mapping,
+            fparam=fparam,
+            aparam=aparam,
+            do_atomic_virial=do_atomic_virial,
+            charge_spin=charge_spin,
+        )
+        predict = communicate_extended_output(
+            model_lower,
+            inner.model_output_def(),
+            mapping,
+            do_atomic_virial=do_atomic_virial,
+        )
+        return {
+            backend: predict[internal]
+            for internal, backend in self._OUTDEF_DP2BACKEND.items()
+            if predict.get(internal) is not None
+        }
+
     def _eval_model_spin(
         self,
         coords: np.ndarray,
diff --git a/deepmd/pt_expt/infer/deep_eval.py b/deepmd/pt_expt/infer/deep_eval.py
index 9db3e5de38..a45c92c96a 100644
--- a/deepmd/pt_expt/infer/deep_eval.py
+++ b/deepmd/pt_expt/infer/deep_eval.py
@@ -882,7 +882,7 @@ def _build_nlist_native(
         sel = self._sel
         mixed_types = self._mixed_types
 
-        if getattr(self, "_nlist_builder", None) is not None:
+        if self._nlist_builder is not None:
             # O(N) cell-list strategy (e.g. vesin): builds the same extended
             # representation; the compiled forward_common_lower re-formats the
             # candidate nlist (sort, truncate, type-split).
diff --git a/deepmd/pt_expt/utils/vesin_neighbor_list.py b/deepmd/pt_expt/utils/vesin_neighbor_list.py
index 92dcdd01a7..3ab7acd303 100644
--- a/deepmd/pt_expt/utils/vesin_neighbor_list.py
+++ b/deepmd/pt_expt/utils/vesin_neighbor_list.py
@@ -22,6 +22,7 @@
     Any,
 )
 
+import numpy as np
 import torch
 
 from deepmd.dpmodel.utils.neighbor_list import (
@@ -63,10 +64,22 @@ def build(
         type-splits it.
         """
         is_numpy = not isinstance(coord, torch.Tensor)
-        # Bridge numpy (dpmodel) through CPU torch; keep torch tensors on-device.
-        coord_t = torch.as_tensor(coord)
-        atype_t = torch.as_tensor(atype).to(torch.int64)
-        box_t = None if box is None else torch.as_tensor(box, dtype=coord_t.dtype)
+        # Bridge numpy (dpmodel) through CPU torch; keep torch tensors on their
+        # own device.  Avoid ``torch.as_tensor`` without an explicit device: under
+        # a non-CPU ambient default device (e.g. tests set a placeholder CUDA
+        # default) it can trigger CUDA init even for an already-CPU tensor.
+        if is_numpy:
+            coord_t = torch.from_numpy(np.ascontiguousarray(coord))
+            atype_t = torch.from_numpy(np.ascontiguousarray(atype)).to(torch.int64)
+            box_t = (
+                None
+                if box is None
+                else torch.from_numpy(np.ascontiguousarray(box)).to(coord_t.dtype)
+            )
+        else:
+            coord_t = coord
+            atype_t = atype.to(torch.int64)
+            box_t = None if box is None else box.to(coord_t.dtype)
 
         nframes = atype_t.shape[0]
         nloc = atype_t.shape[1]
diff --git a/source/tests/pt/model/test_nlist_backend.py b/source/tests/pt/model/test_nlist_backend.py
new file mode 100644
index 0000000000..5a9c83f2cf
--- /dev/null
+++ b/source/tests/pt/model/test_nlist_backend.py
@@ -0,0 +1,128 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+"""``nlist_backend`` dispatch + vesin/native equivalence for the pt backend.
+
+The pt model is reconstructed eagerly in ``DeepEval`` and evaluated via
+``forward_common_lower`` when the O(N) vesin neighbor list is selected (the
+exported TorchScript graph is untouched).  native and vesin must give identical
+results, and the ``nlist_backend`` choice must dispatch / validate correctly.
+"""
+
+import copy
+
+import numpy as np
+import pytest
+import torch
+
+from deepmd.infer.deep_pot import (
+    DeepPot,
+)
+from deepmd.pt.model.model import (
+    get_model,
+)
+from deepmd.pt.train.wrapper import (
+    ModelWrapper,
+)
+from deepmd.pt_expt.utils.vesin_neighbor_list import (
+    is_vesin_torch_available,
+)
+
+pytestmark = pytest.mark.skipif(
+    not is_vesin_torch_available(), reason="vesin.torch is not installed"
+)
+
+TYPE_MAP = ["O", "H", "B"]
+
+model_se_e2_a = {
+    "type_map": TYPE_MAP,
+    "descriptor": {
+        "type": "se_e2_a",
+        "sel": [20, 20, 8],
+        "rcut_smth": 0.5,
+        "rcut": 4.0,
+        "neuron": [6, 12],
+        "resnet_dt": False,
+        "axis_neuron": 4,
+        "seed": 1,
+    },
+    "fitting_net": {"neuron": [8, 8], "resnet_dt": True, "seed": 1},
+}
+
+model_dpa1 = {
+    "type_map": TYPE_MAP,
+    "descriptor": {
+        "type": "se_atten",
+        "sel": 40,
+        "rcut_smth": 0.5,
+        "rcut": 4.0,
+        "neuron": [6, 12, 24],
+        "axis_neuron": 4,
+        "attn": 16,
+        "attn_layer": 2,
+        "attn_dotr": True,
+        "attn_mask": False,
+        "activation_function": "tanh",
+        "set_davg_zero": True,
+        "type_one_side": True,
+        "seed": 1,
+    },
+    "fitting_net": {"neuron": [8, 8], "resnet_dt": True, "seed": 1},
+}
+
+ALL_MODELS = {"se_e2_a": model_se_e2_a, "dpa1": model_dpa1}
+
+
+def _save_pt(md_dict: dict, path: str) -> None:
+    """Write a minimal loadable .pt (state_dict + model_params) for DeepPot."""
+    model = get_model(copy.deepcopy(md_dict)).to(torch.float64)
+    wrapper = ModelWrapper(model, model_params=copy.deepcopy(md_dict))
+    torch.save(wrapper.state_dict(), path)
+
+
+def _system():
+    rng = np.random.default_rng(20240604)
+    coords = (rng.random((1, 8, 3)) * 6.0).astype(np.float64)
+    atype = np.array([0, 0, 1, 1, 2, 0, 1, 2], dtype=np.int64)
+    box = (np.eye(3) * 6.0).reshape(1, 9).astype(np.float64)
+    return coords, atype, box
+
+
+@pytest.fixture(scope="module")
+def pt_files(tmp_path_factory):
+    d = tmp_path_factory.mktemp("nlist_backend")
+    files = {}
+    for name, md in ALL_MODELS.items():
+        p = str(d / f"{name}.pt")
+        _save_pt(md, p)
+        files[name] = p
+    return files
+
+
+def test_default_is_auto(pt_files) -> None:
+    # vesin is available (module skip guard), non-spin/non-hessian -> auto picks it
+    assert DeepPot(pt_files["se_e2_a"]).deep_eval._use_vesin is True
+
+
+def test_native_disables_vesin(pt_files) -> None:
+    dp = DeepPot(pt_files["se_e2_a"], nlist_backend="native")
+    assert dp.deep_eval._use_vesin is False
+
+
+def test_invalid_raises(pt_files) -> None:
+    with pytest.raises(ValueError):
+        DeepPot(pt_files["se_e2_a"], nlist_backend="bogus")
+
+
+@pytest.mark.parametrize("name", list(ALL_MODELS))  # descriptor family
+@pytest.mark.parametrize("periodic", [False, True])  # non-PBC vs PBC
+def test_vesin_matches_native(pt_files, name: str, periodic: bool) -> None:
+    """Vesin and native give identical energy/force/virial/atomic-virial."""
+    coords, atype, box = _system()
+    cells = box if periodic else None
+    dp_native = DeepPot(pt_files[name], nlist_backend="native")
+    dp_vesin = DeepPot(pt_files[name], nlist_backend="vesin")
+    ref = dp_native.eval(coords, cells, atype, atomic=True)
+    out = dp_vesin.eval(coords, cells, atype, atomic=True)
+    for a, b, label in zip(ref, out, ["e", "f", "v", "ae", "av"], strict=True):
+        np.testing.assert_allclose(
+            a, b, rtol=1e-9, atol=1e-9, err_msg=f"{name} {label}"
+        )

From 6a00c4c2951afefb70f449dce201e0753877bdd5 Mon Sep 17 00:00:00 2001
From: Han Wang <wang_han@iapcm.ac.cn>
Date: Wed, 3 Jun 2026 16:47:19 +0800
Subject: [PATCH 04/12] test(pt/pt_expt): add multi-frame vesin neighbor-list
 coverage

Add multi-frame tests (frames with different box sizes -> different per-frame
ghost counts) that exercise the VesinNeighborList per-frame loop +
pad-to-common-nall + stack path, which was previously only covered with
nframes=1:

- pt_expt: builder-level multi-frame neighbor-multiset equivalence (numpy +
  torch) and end-to-end model multi-frame equivalence (se_e2_a, dpa1).
- pt: multi-frame DeepPot.eval vesin-vs-native equivalence (se_e2_a, dpa1).

The pt suite now runs green on CPU under the cuda:9999999 sentinel default
device (the builder was made device-clean in the previous commit), so it no
longer depends on a GPU runner.
---
 source/tests/pt/model/test_nlist_backend.py   | 28 +++++++
 .../tests/pt_expt/utils/test_neighbor_list.py | 74 +++++++++++++++++++
 2 files changed, 102 insertions(+)

diff --git a/source/tests/pt/model/test_nlist_backend.py b/source/tests/pt/model/test_nlist_backend.py
index 5a9c83f2cf..ffc4b3e64f 100644
--- a/source/tests/pt/model/test_nlist_backend.py
+++ b/source/tests/pt/model/test_nlist_backend.py
@@ -86,6 +86,20 @@ def _system():
     return coords, atype, box
 
 
+def _multiframe_system(nframes: int = 3):
+    """Frames with different box sizes -> different per-frame ghost counts,
+    exercising the vesin builder's pad-to-common-nall + stack path.
+    """
+    rng = np.random.default_rng(20240604)
+    atype = np.array([0, 0, 1, 1, 2, 0, 1, 2], dtype=np.int64)
+    coords, boxes = [], []
+    for ff in range(nframes):
+        box_len = 6.0 + 1.5 * ff
+        coords.append((rng.random((len(atype), 3)) * box_len).astype(np.float64))
+        boxes.append((np.eye(3) * box_len).reshape(9).astype(np.float64))
+    return np.stack(coords, axis=0), atype, np.stack(boxes, axis=0)
+
+
 @pytest.fixture(scope="module")
 def pt_files(tmp_path_factory):
     d = tmp_path_factory.mktemp("nlist_backend")
@@ -126,3 +140,17 @@ def test_vesin_matches_native(pt_files, name: str, periodic: bool) -> None:
         np.testing.assert_allclose(
             a, b, rtol=1e-9, atol=1e-9, err_msg=f"{name} {label}"
         )
+
+
+@pytest.mark.parametrize("name", list(ALL_MODELS))  # descriptor family
+def test_vesin_matches_native_multiframe(pt_files, name: str) -> None:
+    """Multi-frame eval (frames with differing ghost counts) matches native."""
+    coords, atype, box = _multiframe_system()
+    dp_native = DeepPot(pt_files[name], nlist_backend="native")
+    dp_vesin = DeepPot(pt_files[name], nlist_backend="vesin")
+    ref = dp_native.eval(coords, box, atype, atomic=True)
+    out = dp_vesin.eval(coords, box, atype, atomic=True)
+    for a, b, label in zip(ref, out, ["e", "f", "v", "ae", "av"], strict=True):
+        np.testing.assert_allclose(
+            a, b, rtol=1e-9, atol=1e-9, err_msg=f"{name} {label}"
+        )
diff --git a/source/tests/pt_expt/utils/test_neighbor_list.py b/source/tests/pt_expt/utils/test_neighbor_list.py
index 603c209da3..517bc4d692 100644
--- a/source/tests/pt_expt/utils/test_neighbor_list.py
+++ b/source/tests/pt_expt/utils/test_neighbor_list.py
@@ -253,6 +253,25 @@ def _system(natoms: int = 6, box_len: float = 10.0, seed: int = GLOBAL_SEED):
     return coord, atype, box
 
 
+def _multiframe_system(nframes: int = 3, natoms: int = 6, seed: int = GLOBAL_SEED):
+    """Multi-frame 3-type system whose frames have *different* geometries (and
+    box sizes), so the per-frame ghost counts differ and the builder's
+    pad-to-common-nall + stack path is exercised.
+    """
+    rng = np.random.default_rng(seed)
+    coords, boxes = [], []
+    for ff in range(nframes):
+        box_len = 6.0 + 1.5 * ff  # vary box -> vary ghost count per frame
+        coords.append((rng.random((natoms, 3)) * box_len).astype(np.float64))
+        boxes.append((np.eye(3) * box_len).reshape(9).astype(np.float64))
+    coord = np.stack(coords, axis=0)
+    atype = np.tile(
+        np.array([[0, 0, 1, 1, 2, 0]], dtype=np.int64)[:, :natoms], (nframes, 1)
+    )
+    box = np.stack(boxes, axis=0)
+    return coord, atype, box
+
+
 def _per_atom_neighbor_dists(ext_coord, nlist, coord):
     """Sorted, rounded valid-neighbor distances for each local atom."""
     ext_coord = np.asarray(ext_coord).reshape(-1, 3)
@@ -326,6 +345,34 @@ def test_builder_outputs_on_input_device() -> None:
         assert t.device.type == device.type
 
 
+@pytest.mark.parametrize("periodic", [False, True])  # non-PBC vs PBC
+def test_builder_multiframe_matches_default(periodic: bool) -> None:
+    """Multi-frame build (frames with differing ghost counts) exercises the
+    pad-to-common-nall + stack path; every frame's neighbor multiset must still
+    match the default builder, in numpy and torch namespaces.
+    """
+    coord_np, atype_np, box_np = _multiframe_system()
+    box_np = box_np if periodic else None
+    rcut, sel = 4.0, [20, 20, 8]
+    ec_d, _, nl_d, _ = DefaultNeighborList().build(
+        coord_np, atype_np, box_np, rcut, sel
+    )
+    ec_v, _, nl_v, _ = VesinNeighborList().build(coord_np, atype_np, box_np, rcut, sel)
+    coord_t = torch.tensor(coord_np, dtype=torch.float64)
+    atype_t = torch.tensor(atype_np, dtype=torch.int64)
+    box_t = None if box_np is None else torch.tensor(box_np, dtype=torch.float64)
+    ec_vt, _, nl_vt, _ = VesinNeighborList().build(coord_t, atype_t, box_t, rcut, sel)
+    for ff in range(coord_np.shape[0]):
+        ref = _per_atom_neighbor_dists(ec_d[ff], nl_d[ff], coord_np[ff])
+        assert _per_atom_neighbor_dists(ec_v[ff], nl_v[ff], coord_np[ff]) == ref
+        assert (
+            _per_atom_neighbor_dists(
+                ec_vt[ff].cpu().numpy(), nl_vt[ff].cpu().numpy(), coord_np[ff]
+            )
+            == ref
+        )
+
+
 @pytest.mark.parametrize("name", list(ALL_MODELS))  # descriptor family
 @pytest.mark.parametrize("periodic", [False, True])  # non-PBC vs PBC
 def test_dpmodel_equivalence(name: str, periodic: bool) -> None:
@@ -373,6 +420,33 @@ def test_pt_expt_equivalence(name: str, periodic: bool) -> None:
         )
 
 
+@pytest.mark.parametrize("name", ["se_e2_a", "dpa1"])  # non-mixed + attention
+def test_pt_expt_multiframe_equivalence(name: str) -> None:
+    """Multi-frame (frames with differing ghost counts) pt_expt outputs are
+    invariant to the nlist strategy -- exercises the builder's per-frame pad +
+    stack feeding the batched model forward.
+    """
+    coord_np, atype_np, box_np = _multiframe_system()
+    model_dict = ALL_MODELS[name]
+    md = get_model(copy.deepcopy(model_dict))
+    md.eval()
+    atype_t = torch.tensor(atype_np, dtype=torch.int64)
+    box_t = torch.tensor(box_np, dtype=torch.float64)
+    results = {}
+    for tag, nl in (("def", DefaultNeighborList()), ("ves", VesinNeighborList())):
+        coord_t = torch.tensor(coord_np, dtype=torch.float64).requires_grad_(True)
+        results[tag] = md.forward(
+            coord_t, atype_t, box=box_t, do_atomic_virial=True, neighbor_list=nl
+        )
+    for k in ("energy", "force", "virial", "atom_virial"):
+        np.testing.assert_allclose(
+            results["def"][k].detach().cpu().numpy(),
+            results["ves"][k].detach().cpu().numpy(),
+            err_msg=f"{name} {k}",
+            **_tol(model_dict),
+        )
+
+
 @pytest.mark.parametrize("name", list(ALL_MODELS))  # descriptor family
 def test_default_fallback(name: str) -> None:
     """``neighbor_list=None`` equals an explicit DefaultNeighborList byte-for-byte."""

From 1ce1b796778a02c8e8ea64dd022a4ffa23c33e8f Mon Sep 17 00:00:00 2001
From: Han Wang <wang_han@iapcm.ac.cn>
Date: Wed, 3 Jun 2026 16:50:00 +0800
Subject: [PATCH 05/12] refactor(pt_expt): pin vesin input device with `with
 torch.device(...)`

Replace the from_numpy/ascontiguousarray branch with a clean `torch.as_tensor`
inside a `with torch.device(device)` block (device = CPU for numpy/dpmodel
inputs, the input tensor's device for torch), matching the existing
`with torch.device(...)` guard around `nl.compute` and the project convention
of pinning the device on tensor creation. Fixes the CUDA-init-under-placeholder-
default-device issue without the clunky numpy bridge.
---
 deepmd/pt_expt/utils/vesin_neighbor_list.py | 28 ++++++++-------------
 1 file changed, 11 insertions(+), 17 deletions(-)

diff --git a/deepmd/pt_expt/utils/vesin_neighbor_list.py b/deepmd/pt_expt/utils/vesin_neighbor_list.py
index 3ab7acd303..915c6859e3 100644
--- a/deepmd/pt_expt/utils/vesin_neighbor_list.py
+++ b/deepmd/pt_expt/utils/vesin_neighbor_list.py
@@ -22,7 +22,6 @@
     Any,
 )
 
-import numpy as np
 import torch
 
 from deepmd.dpmodel.utils.neighbor_list import (
@@ -64,22 +63,17 @@ def build(
         type-splits it.
         """
         is_numpy = not isinstance(coord, torch.Tensor)
-        # Bridge numpy (dpmodel) through CPU torch; keep torch tensors on their
-        # own device.  Avoid ``torch.as_tensor`` without an explicit device: under
-        # a non-CPU ambient default device (e.g. tests set a placeholder CUDA
-        # default) it can trigger CUDA init even for an already-CPU tensor.
-        if is_numpy:
-            coord_t = torch.from_numpy(np.ascontiguousarray(coord))
-            atype_t = torch.from_numpy(np.ascontiguousarray(atype)).to(torch.int64)
-            box_t = (
-                None
-                if box is None
-                else torch.from_numpy(np.ascontiguousarray(box)).to(coord_t.dtype)
-            )
-        else:
-            coord_t = coord
-            atype_t = atype.to(torch.int64)
-            box_t = None if box is None else box.to(coord_t.dtype)
+        # vesin runs on the device of the inputs: numpy (the dpmodel backend) is
+        # bridged through CPU torch; torch tensors stay on their own device.  Pin
+        # the ambient default device (cf. the ``with torch.device(...)`` guard
+        # around ``nl.compute`` below) so ``as_tensor`` is not affected by a
+        # placeholder default device -- e.g. tests set a CUDA default, under
+        # which a device-less ``as_tensor`` triggers CUDA init even for CPU input.
+        device = torch.device("cpu") if is_numpy else coord.device
+        with torch.device(device):
+            coord_t = torch.as_tensor(coord)
+            atype_t = torch.as_tensor(atype).to(torch.int64)
+            box_t = None if box is None else torch.as_tensor(box, dtype=coord_t.dtype)
 
         nframes = atype_t.shape[0]
         nloc = atype_t.shape[1]

From 2f57119f8d7aa185601e880251c4c6335af0edc5 Mon Sep 17 00:00:00 2001
From: Han Wang <wang_han@iapcm.ac.cn>
Date: Wed, 3 Jun 2026 17:37:32 +0800
Subject: [PATCH 06/12] fix(pt_expt): handle zero-atom frames in
 VesinNeighborList

vesin's nl.compute rejects an empty `points` array ("`points` can not be a NULL
pointer"), so an all-empty system (e.g. test_zero_input, coords shape [nf,0,3])
crashed once vesin became the default builder. Return an empty extended
representation directly for a zero-atom frame, matching the native builder.

Fixes the CI failures in test_models.py::TestDeepPot_fparam_aparam_*::test_zero_input
(.pth and .pte). Adds a builder-level empty-system regression test.
---
 deepmd/pt_expt/utils/vesin_neighbor_list.py      | 12 ++++++++++++
 source/tests/pt_expt/utils/test_neighbor_list.py | 16 ++++++++++++++++
 2 files changed, 28 insertions(+)

diff --git a/deepmd/pt_expt/utils/vesin_neighbor_list.py b/deepmd/pt_expt/utils/vesin_neighbor_list.py
index 915c6859e3..1d55548c56 100644
--- a/deepmd/pt_expt/utils/vesin_neighbor_list.py
+++ b/deepmd/pt_expt/utils/vesin_neighbor_list.py
@@ -143,6 +143,18 @@ def _build_single(
     device = positions.device
     nsel = sum(sel)
     nloc = positions.shape[0]
+
+    # Empty system: vesin rejects an empty `points` array ("NULL pointer").
+    # Return an empty extended representation directly, matching the native
+    # builder's handling of a zero-atom frame.
+    if nloc == 0:
+        return (
+            positions,
+            atype,
+            torch.full((0, nsel), -1, dtype=torch.int64, device=device),
+            torch.zeros((0,), dtype=torch.int64, device=device),
+        )
+
     periodic = cell is not None
     box = (
         cell if periodic else torch.zeros((3, 3), dtype=positions.dtype, device=device)
diff --git a/source/tests/pt_expt/utils/test_neighbor_list.py b/source/tests/pt_expt/utils/test_neighbor_list.py
index 517bc4d692..2dec0396ac 100644
--- a/source/tests/pt_expt/utils/test_neighbor_list.py
+++ b/source/tests/pt_expt/utils/test_neighbor_list.py
@@ -335,6 +335,22 @@ def test_builder_matches_default(periodic: bool) -> None:
     )
 
 
+@pytest.mark.parametrize("periodic", [False, True])  # non-PBC vs PBC
+def test_builder_empty_system(periodic: bool) -> None:
+    """A zero-atom frame must not crash vesin (which rejects empty points); the
+    builder returns an empty extended representation, matching the native path.
+    """
+    coord = np.zeros((1, 0, 3), dtype=np.float64)
+    atype = np.zeros((1, 0), dtype=np.int64)
+    box = (np.eye(3) * 10.0).reshape(1, 9).astype(np.float64) if periodic else None
+    sel = [20, 20, 8]
+    ec, ea, nl, mp = VesinNeighborList().build(coord, atype, box, 4.0, sel)
+    assert ec.shape == (1, 0, 3)
+    assert ea.shape == (1, 0)
+    assert nl.shape == (1, 0, sum(sel))
+    assert mp.shape == (1, 0)
+
+
 def test_builder_outputs_on_input_device() -> None:
     coord_np, atype_np, box_np = _system()
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

From def1a963d8ea0fdfd749e2e424b659bfc3552184 Mon Sep 17 00:00:00 2001
From: Han Wang <wang_han@iapcm.ac.cn>
Date: Wed, 3 Jun 2026 18:36:44 +0800
Subject: [PATCH 07/12] fix(pt_expt): type-distinguish the vesin nlist for the
 eval_descriptor path

eval_descriptor calls the descriptor directly, bypassing forward_common_lower's
format_nlist. The native _build_nlist_native builds with
distinguish_types=not mixed_types, so for a non-mixed-type model it hands the
descriptor a type-blocked nlist; the vesin branch returned a non-distinguished
list, giving a wrong descriptor on this path (CI: TestEvalDescriptorASE).

Apply nlist_distinguish_types to the vesin output when not mixed_types, matching
the native builder. The main eval path is unaffected (its format_nlist re-formats;
energy/force/virial already matched native to ~1e-19). Adds a direct
eval_descriptor vesin-vs-native regression test.
---
 deepmd/pt_expt/infer/deep_eval.py            | 15 ++++++++++++---
 source/tests/pt_expt/infer/test_deep_eval.py | 19 +++++++++++++++++++
 2 files changed, 31 insertions(+), 3 deletions(-)

diff --git a/deepmd/pt_expt/infer/deep_eval.py b/deepmd/pt_expt/infer/deep_eval.py
index a45c92c96a..42e4181d65 100644
--- a/deepmd/pt_expt/infer/deep_eval.py
+++ b/deepmd/pt_expt/infer/deep_eval.py
@@ -884,9 +884,18 @@ def _build_nlist_native(
 
         if self._nlist_builder is not None:
             # O(N) cell-list strategy (e.g. vesin): builds the same extended
-            # representation; the compiled forward_common_lower re-formats the
-            # candidate nlist (sort, truncate, type-split).
-            return self._nlist_builder.build(coords, atom_types, cells, rcut, sel)
+            # representation.  Match the native builder's type handling
+            # (``distinguish_types=not mixed_types``) so consumers that bypass
+            # ``forward_common_lower``'s ``format_nlist`` -- e.g.
+            # ``eval_descriptor`` calling the descriptor directly -- receive the
+            # type-distinguished nlist a non-mixed-type descriptor expects.  The
+            # main eval path is unaffected (its ``format_nlist`` re-formats).
+            extended_coord, extended_atype, nlist, mapping = self._nlist_builder.build(
+                coords, atom_types, cells, rcut, sel
+            )
+            if not mixed_types:
+                nlist = nlist_distinguish_types(nlist, extended_atype, sel)
+            return extended_coord, extended_atype, nlist, mapping
 
         if cells is not None:
             box_input = cells.reshape(nframes, 3, 3)
diff --git a/source/tests/pt_expt/infer/test_deep_eval.py b/source/tests/pt_expt/infer/test_deep_eval.py
index 2ce6b7b209..a921a94a7f 100644
--- a/source/tests/pt_expt/infer/test_deep_eval.py
+++ b/source/tests/pt_expt/infer/test_deep_eval.py
@@ -2277,6 +2277,25 @@ def test_vesin_matches_native(self) -> None:
                     a, b, rtol=1e-10, atol=1e-10, err_msg=f"vesin vs native: {name}"
                 )
 
+    def test_eval_descriptor_vesin_matches_native(self) -> None:
+        """eval_descriptor bypasses forward_common_lower's format_nlist, so the
+        vesin builder must apply the same type-distinguishing as the native
+        builder for a non-mixed-type model (regression for the eval_descriptor
+        mismatch).
+        """
+        if not self._vesin_available():
+            self.skipTest("vesin.torch is not installed")
+        rng = np.random.default_rng(GLOBAL_SEED + 31)
+        natoms = 6
+        coords = rng.random((1, natoms, 3)) * 8.0
+        atom_types = np.array([i % self.nt for i in range(natoms)], dtype=np.int32)
+        dp_native = DeepPot(self.tmpfile.name, nlist_backend="native")
+        dp_vesin = DeepPot(self.tmpfile.name, nlist_backend="vesin")
+        for cells in (np.eye(3).reshape(1, 9) * 10.0, None):  # PBC and non-PBC
+            d_native = dp_native.deep_eval.eval_descriptor(coords, cells, atom_types)
+            d_vesin = dp_vesin.deep_eval.eval_descriptor(coords, cells, atom_types)
+            np.testing.assert_allclose(d_native, d_vesin, rtol=1e-10, atol=1e-10)
+
 
 if __name__ == "__main__":
     unittest.main()

From 803ccb7d98069c3043cec5d958bf472e418a19e7 Mon Sep 17 00:00:00 2001
From: Han Wang <wang_han@iapcm.ac.cn>
Date: Wed, 3 Jun 2026 22:15:30 +0800
Subject: [PATCH 08/12] fix(pt): disable vesin nlist for models carrying a data
 modifier

The pt vesin path runs forward_common_lower directly, bypassing
ModelWrapper.forward -- which is where a frozen model's data modifier is
applied. With auto->vesin as the default, frozen .pth models that carry a
modifier returned the un-modified (raw) prediction, breaking
test_data_modifier::test_inference.

Gate vesin off when a data modifier is present (auto falls back to native,
strict vesin raises), so the modifier is applied via ModelWrapper.forward as
before. `self.modifier` is now always initialized in __init__ (was only set on
the .pth branch) so the check needs no getattr fallback. Verified the dispatch
gating directly; the end-to-end behavior is covered by test_data_modifier.
---
 deepmd/pt/infer/deep_eval.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/deepmd/pt/infer/deep_eval.py b/deepmd/pt/infer/deep_eval.py
index 44c4cf30ca..9ebe91bdb2 100644
--- a/deepmd/pt/infer/deep_eval.py
+++ b/deepmd/pt/infer/deep_eval.py
@@ -142,6 +142,8 @@ def __init__(
         self.output_def = output_def
         self.model_path = model_file
         self.neighbor_list = neighbor_list
+        # data modifier, populated only for frozen .pth models that carry one
+        self.modifier = None
         if str(self.model_path).endswith(".pt"):
             state_dict = torch.load(
                 model_file, map_location=env.DEVICE, weights_only=True
@@ -270,6 +272,11 @@ def _setup_nlist_backend(self, nlist_backend: str) -> None:
             unsupported = "spin models"
         elif self._has_hessian:
             unsupported = "hessian models"
+        elif self.modifier is not None:
+            # the vesin path runs forward_common_lower directly, bypassing
+            # ModelWrapper.forward (which applies the data modifier); fall back
+            # to the native path so the modifier is still applied.
+            unsupported = "models with a data modifier"
         ase_provided = self.neighbor_list is not None
         if nlist_backend == "native":
             self._use_vesin = False

From b871e91800c96cb295f10feb2bb071fe28060dcb Mon Sep 17 00:00:00 2001
From: Han Wang <wang_han@iapcm.ac.cn>
Date: Thu, 4 Jun 2026 00:44:30 +0800
Subject: [PATCH 09/12] fix(pt): restrict vesin nlist to energy models

_eval_lower_vesin reconstructs the backend output from forward_common_lower /
communicate keys via _OUTDEF_DP2BACKEND, which only matches the model's own
output translation for the energy model. For other fitting types the keys
differ (e.g. the polar fitting output is "polarizability" but the backend name
is "polar"), so the vesin path dropped those outputs and returned NaN -- breaking
test_polarizability_fitting once auto->vesin became the default.

Gate vesin off (auto -> native, strict vesin -> raise) when the model is not an
energy model. Energy is the large-system inference target; polar/dipole/dos/
property fall back to the native builder, which goes through the model's own
forward and is correct.
---
 deepmd/pt/infer/deep_eval.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/deepmd/pt/infer/deep_eval.py b/deepmd/pt/infer/deep_eval.py
index 9ebe91bdb2..e398ac9210 100644
--- a/deepmd/pt/infer/deep_eval.py
+++ b/deepmd/pt/infer/deep_eval.py
@@ -277,6 +277,15 @@ def _setup_nlist_backend(self, nlist_backend: str) -> None:
             # ModelWrapper.forward (which applies the data modifier); fall back
             # to the native path so the modifier is still applied.
             unsupported = "models with a data modifier"
+        elif "energy" not in self.dp.model["Default"].model_output_type():
+            # _eval_lower_vesin reconstructs the backend output from the
+            # forward_common_lower / communicate keys via _OUTDEF_DP2BACKEND,
+            # which matches the model's own translation only for the energy
+            # model (e.g. the polar fitting key is "polarizability" but the
+            # backend output is "polar").  Restrict vesin to energy models --
+            # the large-system inference target -- and fall back to native
+            # for the other fitting types.
+            unsupported = "non-energy models"
         ase_provided = self.neighbor_list is not None
         if nlist_backend == "native":
             self._use_vesin = False

From 1c7f2ef1c480ffb9c819d89dd96d0e104e21db11 Mon Sep 17 00:00:00 2001
From: Han Wang <wang_han@iapcm.ac.cn>
Date: Fri, 5 Jun 2026 00:26:51 +0800
Subject: [PATCH 10/12] docs(model): type and document neighbor_list on
 ener_model call/forward

Replace the meaningless 'Any' annotation on the neighbor_list parameter of
EnergyModel.call (dpmodel) and EnergyModel.forward (pt_expt) with
'NeighborList | None' and add a docstring describing it, addressing the
review feedback.
---
 deepmd/dpmodel/model/ener_model.py | 20 +++++++++++++++++++-
 deepmd/pt_expt/model/ener_model.py | 19 ++++++++++++++++++-
 2 files changed, 37 insertions(+), 2 deletions(-)

diff --git a/deepmd/dpmodel/model/ener_model.py b/deepmd/dpmodel/model/ener_model.py
index 80619f60de..a8280dbebf 100644
--- a/deepmd/dpmodel/model/ener_model.py
+++ b/deepmd/dpmodel/model/ener_model.py
@@ -21,6 +21,9 @@
 from deepmd.dpmodel.output_def import (
     FittingOutputDef,
 )
+from deepmd.dpmodel.utils.neighbor_list import (
+    NeighborList,
+)
 
 from .dp_model import (
     DPModelCommon,
@@ -88,8 +91,23 @@ def call(
         aparam: Array | None = None,
         do_atomic_virial: bool = False,
         charge_spin: Array | None = None,
-        neighbor_list: Any = None,
+        neighbor_list: NeighborList | None = None,
     ) -> dict[str, Array]:
+        """Evaluate the energy model.
+
+        Most arguments share the meaning of :meth:`call_common`.
+
+        Parameters
+        ----------
+        neighbor_list
+            The neighbor-list construction strategy forwarded to
+            :meth:`call_common`.  ``None`` uses the default all-pairs builder
+            (:class:`~deepmd.dpmodel.utils.neighbor_list.NeighborList`
+            subclass :class:`~deepmd.dpmodel.utils.default_neighbor_list.DefaultNeighborList`),
+            reproducing the historical behavior; an alternative strategy may be
+            injected to accelerate neighbor-list construction without changing
+            the model outputs.
+        """
         model_ret = self.call_common(
             coord,
             atype,
diff --git a/deepmd/pt_expt/model/ener_model.py b/deepmd/pt_expt/model/ener_model.py
index 5d5409b028..4f868043b6 100644
--- a/deepmd/pt_expt/model/ener_model.py
+++ b/deepmd/pt_expt/model/ener_model.py
@@ -18,6 +18,9 @@
 from deepmd.dpmodel.model.make_hessian_model import (
     make_hessian_model,
 )
+from deepmd.dpmodel.utils.neighbor_list import (
+    NeighborList,
+)
 
 from .make_model import (
     make_model,
@@ -59,8 +62,22 @@ def forward(
         aparam: torch.Tensor | None = None,
         do_atomic_virial: bool = False,
         charge_spin: torch.Tensor | None = None,
-        neighbor_list: Any = None,
+        neighbor_list: NeighborList | None = None,
     ) -> dict[str, torch.Tensor]:
+        """Evaluate the energy model.
+
+        Most arguments share the meaning of :meth:`call_common`.
+
+        Parameters
+        ----------
+        neighbor_list
+            The neighbor-list construction strategy forwarded to
+            :meth:`call_common`.  ``None`` uses the default all-pairs builder
+            (:class:`~deepmd.dpmodel.utils.default_neighbor_list.DefaultNeighborList`),
+            reproducing the historical behavior; an alternative strategy (e.g.
+            the ``vesin`` O(N) cell list) may be injected to accelerate
+            neighbor-list construction without changing the model outputs.
+        """
         model_ret = self.call_common(
             coord,
             atype,

From a78a564e811162237457b7675907b208c869de83 Mon Sep 17 00:00:00 2001
From: Han Wang <wang_han@iapcm.ac.cn>
Date: Sat, 6 Jun 2026 09:36:43 +0800
Subject: [PATCH 11/12] test(pt_expt): place neighbor-list equivalence inputs
 on DEVICE

The pt_expt model-equivalence tests in test_neighbor_list.py built their
input tensors as plain CPU tensors while pt_expt registers all model buffers
on env.DEVICE (torch.as_tensor(..., device=env.DEVICE)).  On a CUDA runner the
buffers live on cuda:0 but the inputs stayed on cpu, so env_mat's
index_select(davg, atype) raised a device-mismatch RuntimeError -- failing the
merge-queue CUDA gate (26 tests) while the CPU CI was green.

Move the model and all input tensors onto DEVICE (matching the existing
suite idiom in test_finetune.py): get_model(...).to(DEVICE) plus device=DEVICE
on coord/atype/box.  No-op on CPU; fixes the CUDA path.
---
 .../tests/pt_expt/utils/test_neighbor_list.py | 37 +++++++++++++------
 1 file changed, 25 insertions(+), 12 deletions(-)

diff --git a/source/tests/pt_expt/utils/test_neighbor_list.py b/source/tests/pt_expt/utils/test_neighbor_list.py
index 2dec0396ac..79922940d3 100644
--- a/source/tests/pt_expt/utils/test_neighbor_list.py
+++ b/source/tests/pt_expt/utils/test_neighbor_list.py
@@ -36,6 +36,9 @@
 from deepmd.pt_expt.model import (
     get_model,
 )
+from deepmd.pt_expt.utils.env import (
+    DEVICE,
+)
 from deepmd.pt_expt.utils.vesin_neighbor_list import (
     VesinNeighborList,
     is_vesin_torch_available,
@@ -414,13 +417,19 @@ def test_pt_expt_equivalence(name: str, periodic: bool) -> None:
     coord_np, atype_np, box_np = _system()
     box_np = box_np if periodic else None
     model_dict = ALL_MODELS[name]
-    md = get_model(copy.deepcopy(model_dict))
+    md = get_model(copy.deepcopy(model_dict)).to(DEVICE)
     md.eval()
-    box_t = None if box_np is None else torch.tensor(box_np, dtype=torch.float64)
-    atype_t = torch.tensor(atype_np, dtype=torch.int64)
+    box_t = (
+        None
+        if box_np is None
+        else torch.tensor(box_np, dtype=torch.float64, device=DEVICE)
+    )
+    atype_t = torch.tensor(atype_np, dtype=torch.int64, device=DEVICE)
     results = {}
     for tag, nl in (("def", DefaultNeighborList()), ("ves", VesinNeighborList())):
-        coord_t = torch.tensor(coord_np, dtype=torch.float64).requires_grad_(True)
+        coord_t = torch.tensor(
+            coord_np, dtype=torch.float64, device=DEVICE
+        ).requires_grad_(True)
         results[tag] = md.forward(
             coord_t, atype_t, box=box_t, do_atomic_virial=True, neighbor_list=nl
         )
@@ -444,13 +453,15 @@ def test_pt_expt_multiframe_equivalence(name: str) -> None:
     """
     coord_np, atype_np, box_np = _multiframe_system()
     model_dict = ALL_MODELS[name]
-    md = get_model(copy.deepcopy(model_dict))
+    md = get_model(copy.deepcopy(model_dict)).to(DEVICE)
     md.eval()
-    atype_t = torch.tensor(atype_np, dtype=torch.int64)
-    box_t = torch.tensor(box_np, dtype=torch.float64)
+    atype_t = torch.tensor(atype_np, dtype=torch.int64, device=DEVICE)
+    box_t = torch.tensor(box_np, dtype=torch.float64, device=DEVICE)
     results = {}
     for tag, nl in (("def", DefaultNeighborList()), ("ves", VesinNeighborList())):
-        coord_t = torch.tensor(coord_np, dtype=torch.float64).requires_grad_(True)
+        coord_t = torch.tensor(
+            coord_np, dtype=torch.float64, device=DEVICE
+        ).requires_grad_(True)
         results[tag] = md.forward(
             coord_t, atype_t, box=box_t, do_atomic_virial=True, neighbor_list=nl
         )
@@ -467,16 +478,18 @@ def test_pt_expt_multiframe_equivalence(name: str) -> None:
 def test_default_fallback(name: str) -> None:
     """``neighbor_list=None`` equals an explicit DefaultNeighborList byte-for-byte."""
     coord_np, atype_np, box_np = _system()
-    md = get_model(copy.deepcopy(ALL_MODELS[name]))
+    md = get_model(copy.deepcopy(ALL_MODELS[name])).to(DEVICE)
     md.eval()
-    box_t = torch.tensor(box_np, dtype=torch.float64)
-    atype_t = torch.tensor(atype_np, dtype=torch.int64)
+    box_t = torch.tensor(box_np, dtype=torch.float64, device=DEVICE)
+    atype_t = torch.tensor(atype_np, dtype=torch.int64, device=DEVICE)
     outs = {}
     for tag, kw in (
         ("none", {}),
         ("explicit", {"neighbor_list": DefaultNeighborList()}),
     ):
-        coord_t = torch.tensor(coord_np, dtype=torch.float64).requires_grad_(True)
+        coord_t = torch.tensor(
+            coord_np, dtype=torch.float64, device=DEVICE
+        ).requires_grad_(True)
         outs[tag] = md.forward(coord_t, atype_t, box=box_t, do_atomic_virial=True, **kw)
     for k in ("energy", "force", "virial"):
         np.testing.assert_array_equal(

From cc3ef42e21ab87d75e11ee4fa46d557b0350d10c Mon Sep 17 00:00:00 2001
From: Han Wang <wang_han@iapcm.ac.cn>
Date: Sat, 6 Jun 2026 09:44:08 +0800
Subject: [PATCH 12/12] test(pt_expt): use env.DEVICE instead of
 value-importing DEVICE

CodeQL flagged the value-import `from deepmd.pt_expt.utils.env import DEVICE`
(import of a mutable module attribute): DEVICE is rebound at runtime by the
CPU-forcing context manager in serialization.py, so a value-import would not
observe the change.  Switch to the dominant suite convention -- import the env
module and reference env.DEVICE -- which both silences the alert and tracks any
rebinding.  No behavioral change.
---
 .../tests/pt_expt/utils/test_neighbor_list.py | 28 +++++++++----------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/source/tests/pt_expt/utils/test_neighbor_list.py b/source/tests/pt_expt/utils/test_neighbor_list.py
index 79922940d3..ca959eabe8 100644
--- a/source/tests/pt_expt/utils/test_neighbor_list.py
+++ b/source/tests/pt_expt/utils/test_neighbor_list.py
@@ -36,8 +36,8 @@
 from deepmd.pt_expt.model import (
     get_model,
 )
-from deepmd.pt_expt.utils.env import (
-    DEVICE,
+from deepmd.pt_expt.utils import (
+    env,
 )
 from deepmd.pt_expt.utils.vesin_neighbor_list import (
     VesinNeighborList,
@@ -417,18 +417,18 @@ def test_pt_expt_equivalence(name: str, periodic: bool) -> None:
     coord_np, atype_np, box_np = _system()
     box_np = box_np if periodic else None
     model_dict = ALL_MODELS[name]
-    md = get_model(copy.deepcopy(model_dict)).to(DEVICE)
+    md = get_model(copy.deepcopy(model_dict)).to(env.DEVICE)
     md.eval()
     box_t = (
         None
         if box_np is None
-        else torch.tensor(box_np, dtype=torch.float64, device=DEVICE)
+        else torch.tensor(box_np, dtype=torch.float64, device=env.DEVICE)
     )
-    atype_t = torch.tensor(atype_np, dtype=torch.int64, device=DEVICE)
+    atype_t = torch.tensor(atype_np, dtype=torch.int64, device=env.DEVICE)
     results = {}
     for tag, nl in (("def", DefaultNeighborList()), ("ves", VesinNeighborList())):
         coord_t = torch.tensor(
-            coord_np, dtype=torch.float64, device=DEVICE
+            coord_np, dtype=torch.float64, device=env.DEVICE
         ).requires_grad_(True)
         results[tag] = md.forward(
             coord_t, atype_t, box=box_t, do_atomic_virial=True, neighbor_list=nl
@@ -453,14 +453,14 @@ def test_pt_expt_multiframe_equivalence(name: str) -> None:
     """
     coord_np, atype_np, box_np = _multiframe_system()
     model_dict = ALL_MODELS[name]
-    md = get_model(copy.deepcopy(model_dict)).to(DEVICE)
+    md = get_model(copy.deepcopy(model_dict)).to(env.DEVICE)
     md.eval()
-    atype_t = torch.tensor(atype_np, dtype=torch.int64, device=DEVICE)
-    box_t = torch.tensor(box_np, dtype=torch.float64, device=DEVICE)
+    atype_t = torch.tensor(atype_np, dtype=torch.int64, device=env.DEVICE)
+    box_t = torch.tensor(box_np, dtype=torch.float64, device=env.DEVICE)
     results = {}
     for tag, nl in (("def", DefaultNeighborList()), ("ves", VesinNeighborList())):
         coord_t = torch.tensor(
-            coord_np, dtype=torch.float64, device=DEVICE
+            coord_np, dtype=torch.float64, device=env.DEVICE
         ).requires_grad_(True)
         results[tag] = md.forward(
             coord_t, atype_t, box=box_t, do_atomic_virial=True, neighbor_list=nl
@@ -478,17 +478,17 @@ def test_pt_expt_multiframe_equivalence(name: str) -> None:
 def test_default_fallback(name: str) -> None:
     """``neighbor_list=None`` equals an explicit DefaultNeighborList byte-for-byte."""
     coord_np, atype_np, box_np = _system()
-    md = get_model(copy.deepcopy(ALL_MODELS[name])).to(DEVICE)
+    md = get_model(copy.deepcopy(ALL_MODELS[name])).to(env.DEVICE)
     md.eval()
-    box_t = torch.tensor(box_np, dtype=torch.float64, device=DEVICE)
-    atype_t = torch.tensor(atype_np, dtype=torch.int64, device=DEVICE)
+    box_t = torch.tensor(box_np, dtype=torch.float64, device=env.DEVICE)
+    atype_t = torch.tensor(atype_np, dtype=torch.int64, device=env.DEVICE)
     outs = {}
     for tag, kw in (
         ("none", {}),
         ("explicit", {"neighbor_list": DefaultNeighborList()}),
     ):
         coord_t = torch.tensor(
-            coord_np, dtype=torch.float64, device=DEVICE
+            coord_np, dtype=torch.float64, device=env.DEVICE
         ).requires_grad_(True)
         outs[tag] = md.forward(coord_t, atype_t, box=box_t, do_atomic_virial=True, **kw)
     for k in ("energy", "force", "virial"):