Skip to content
62 changes: 62 additions & 0 deletions deepmd/dpmodel/atomic_model/base_atomic_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,45 @@ def __init__(
self.rcond = rcond
self.preset_out_bias = preset_out_bias
self.data_stat_protect = data_stat_protect
self._observed_type: list[str] | None = None

@property
def observed_type(self) -> list[str] | None:
"""Get the observed element type list from data statistics."""
return self._observed_type

def _collect_and_set_observed_type(
self,
sampled_func: Callable[[], list[dict]],
stat_file_path: DPPath | None,
preset_observed_type: list[str] | None,
) -> None:
"""Collect observed types with priority: preset > stat_file > compute.

Parameters
----------
sampled_func
The lazy sampled function to get data frames.
stat_file_path
The path to the statistics files (should already include type_map suffix).
preset_observed_type
User-specified observed types that take highest priority.
"""
from deepmd.dpmodel.utils.stat import (
_restore_observed_type_from_file,
_save_observed_type_to_file,
collect_observed_types,
)

if preset_observed_type is not None:
self._observed_type = preset_observed_type
else:
observed = _restore_observed_type_from_file(stat_file_path)
if observed is None:
sampled = sampled_func()
observed = collect_observed_types(sampled, self.type_map)
_save_observed_type_to_file(stat_file_path, observed)
self._observed_type = observed

def init_out_stat(self) -> None:
"""Initialize the output bias."""
Expand Down Expand Up @@ -271,6 +310,29 @@ def get_compute_stats_distinguish_types(self) -> bool:
"""Get whether the fitting net computes stats which are not distinguished between different types of atoms."""
return True

def compute_or_load_stat(
self,
sampled_func: Callable[[], list[dict]],
stat_file_path: DPPath | None = None,
compute_or_load_out_stat: bool = True,
preset_observed_type: list[str] | None = None,
) -> None:
"""Compute or load the statistics parameters of the model,
such as mean and standard deviation of descriptors or the energy bias of the fitting net.

Parameters
----------
sampled_func
The lazy sampled function to get data frames from different data systems.
stat_file_path
The path to the stat file.
compute_or_load_out_stat : bool
Whether to compute the output statistics.
If False, it will only compute the input statistics
(e.g. mean and standard deviation of descriptors).
"""
raise NotImplementedError

def compute_or_load_out_stat(
self,
merged: Callable[[], list[dict]] | list[dict],
Expand Down
5 changes: 5 additions & 0 deletions deepmd/dpmodel/atomic_model/dp_atomic_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,7 @@ def compute_or_load_stat(
sampled_func: Callable[[], list[dict]],
stat_file_path: DPPath | None = None,
compute_or_load_out_stat: bool = True,
preset_observed_type: list[str] | None = None,
) -> None:
"""Compute or load the statistics parameters of the model,
such as mean and standard deviation of descriptors or the energy bias of the fitting net.
Expand All @@ -227,6 +228,10 @@ def compute_or_load_stat(
if compute_or_load_out_stat:
self.compute_or_load_out_stat(wrapped_sampler, stat_file_path)

self._collect_and_set_observed_type(
wrapped_sampler, stat_file_path, preset_observed_type
)

def change_type_map(
self, type_map: list[str], model_with_new_type_stat: Any | None = None
) -> None:
Expand Down
15 changes: 14 additions & 1 deletion deepmd/dpmodel/atomic_model/linear_atomic_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -349,6 +349,7 @@ def compute_or_load_stat(
sampled_func: Callable[[], list[dict]],
stat_file_path: DPPath | None = None,
compute_or_load_out_stat: bool = True,
preset_observed_type: list[str] | None = None,
) -> None:
"""Compute or load the statistics parameters of the model.

Expand All @@ -364,9 +365,21 @@ def compute_or_load_stat(
compute_or_load_out_stat : bool
Whether to compute the output statistics.
"""
# Compute observed type once at parent level, then propagate to
# sub-models via preset_observed_type to avoid redundant computation.
obs_stat_path = stat_file_path
if obs_stat_path is not None and self.type_map is not None:
obs_stat_path = obs_stat_path / " ".join(self.type_map)
self._collect_and_set_observed_type(
sampled_func, obs_stat_path, preset_observed_type
)

for md in self.models:
md.compute_or_load_stat(
sampled_func, stat_file_path, compute_or_load_out_stat=False
sampled_func,
stat_file_path,
compute_or_load_out_stat=False,
preset_observed_type=self._observed_type,
)

if stat_file_path is not None and self.type_map is not None:
Expand Down
10 changes: 10 additions & 0 deletions deepmd/dpmodel/atomic_model/pairtab_atomic_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,7 @@ def compute_or_load_stat(
sampled_func: Callable[[], list[dict]],
stat_file_path: DPPath | None = None,
compute_or_load_out_stat: bool = True,
preset_observed_type: list[str] | None = None,
) -> None:
"""Compute or load the statistics parameters of the model.

Expand All @@ -235,6 +236,15 @@ def compute_or_load_stat(
wrapped_sampler = self._make_wrapped_sampler(sampled_func)
self.compute_or_load_out_stat(wrapped_sampler, stat_file_path)

if stat_file_path is not None and self.type_map is not None:
stat_file_path /= " ".join(self.type_map)

self._collect_and_set_observed_type(
sampled_func if callable(sampled_func) else lambda: sampled_func,
stat_file_path,
preset_observed_type,
)

def forward_atomic(
self,
extended_coord: Array,
Expand Down
28 changes: 28 additions & 0 deletions deepmd/dpmodel/infer/deep_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,9 @@
from deepmd.infer.deep_wfc import (
DeepWFC,
)
from deepmd.utils.econf_embd import (
sort_element_type,
)

if TYPE_CHECKING:
import ase.neighborlist
Expand Down Expand Up @@ -403,6 +406,31 @@ def get_model_def_script(self) -> dict:
"""Get model definition script."""
return json.loads(self.dp.get_model_def_script())

def get_observed_types(self) -> dict:
"""Get observed types (elements) of the model during data statistics.

Returns
-------
dict
A dictionary containing the information of observed type in the model:
- 'type_num': the total number of observed types in this model.
- 'observed_type': a list of the observed types in this model.
"""
# Try metadata first (from model_def_script)
model_def_script = self.get_model_def_script()
observed_type_list = model_def_script.get("info", {}).get("observed_type")
if observed_type_list is not None:
return {
"type_num": len(observed_type_list),
"observed_type": observed_type_list,
}
# Fallback: bias-based approach for old models
observed_type_list = self.dp.get_observed_type_list()
return {
"type_num": len(observed_type_list),
"observed_type": sort_element_type(observed_type_list),
}

def get_model(self) -> "BaseModel":
"""Get the dpmodel BaseModel.

Expand Down
9 changes: 8 additions & 1 deletion deepmd/dpmodel/model/make_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -381,6 +381,8 @@ def get_out_bias(self) -> Array:
def get_observed_type_list(self) -> list[str]:
"""Get observed types (elements) of the model during data statistics.

Bias-based fallback for old models without metadata.

Returns
-------
list[str]
Expand Down Expand Up @@ -718,6 +720,7 @@ def compute_or_load_stat(
self,
sampled_func: Callable[[], Any],
stat_file_path: DPPath | None = None,
preset_observed_type: list[str] | None = None,
) -> None:
"""Compute or load the statistics parameters of the model.

Expand All @@ -728,8 +731,12 @@ def compute_or_load_stat(
data systems.
stat_file_path
The path to the stat file.
preset_observed_type
User-specified observed types that take highest priority.
"""
self.atomic_model.compute_or_load_stat(sampled_func, stat_file_path)
self.atomic_model.compute_or_load_stat(
sampled_func, stat_file_path, preset_observed_type=preset_observed_type
)

def get_model_def_script(self) -> str:
"""Get the model definition script."""
Expand Down
56 changes: 56 additions & 0 deletions deepmd/dpmodel/utils/stat.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,62 @@
log = logging.getLogger(__name__)


def collect_observed_types(sampled: list[dict], type_map: list[str]) -> list[str]:
"""Collect observed element types from sampled training data.

Parameters
----------
sampled : list[dict]
Sampled data from different data systems. Each dict must contain
``"atype"`` with shape ``[nframes, natoms]``.
type_map : list[str]
Mapping from type index to element symbol.

Returns
-------
list[str]
Sorted list of observed element symbols.
"""
from deepmd.utils.econf_embd import (
sort_element_type,
)

observed_indices: set[int] = set()
for system in sampled:
atype = to_numpy_array(system["atype"]) # shape: [nframes, natoms]
observed_indices.update(np.unique(atype).tolist())
observed_types = [
type_map[i] for i in sorted(observed_indices) if i < len(type_map)
]
return sort_element_type(observed_types)


def _restore_observed_type_from_file(
stat_file_path: DPPath | None,
) -> list[str] | None:
"""Try to load observed_type from stat file."""
if stat_file_path is None:
return None
fp = stat_file_path / "observed_type"
if fp.is_file():
arr = fp.load_numpy()
# Decode bytes back to str if stored as bytes (for h5py compatibility)
return [x.decode() if isinstance(x, bytes) else x for x in arr.tolist()]
return None


def _save_observed_type_to_file(
stat_file_path: DPPath | None, observed_type: list[str]
) -> None:
"""Save observed_type to stat file."""
if stat_file_path is None:
return
stat_file_path.mkdir(exist_ok=True, parents=True)
fp = stat_file_path / "observed_type"
# Use bytes dtype for h5py compatibility (h5py cannot store Unicode strings)
fp.save_numpy(np.array(observed_type, dtype="S"))


def _restore_from_file(
stat_file_path: DPPath,
keys: list[str],
Expand Down
9 changes: 8 additions & 1 deletion deepmd/entrypoints/show.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,13 @@ def show(
)
else:
log.info("The observed types for this model: ")
observed_types = model.get_observed_types()
observed_type_list = model_params.get("info", {}).get("observed_type")
if observed_type_list is not None:
observed_types = {
"type_num": len(observed_type_list),
"observed_type": observed_type_list,
}
else:
observed_types = model.get_observed_types()
log.info(f"Number of observed types: {observed_types['type_num']} ")
log.info(f"Observed types: {observed_types['observed_type']} ")
8 changes: 8 additions & 0 deletions deepmd/pt/infer/deep_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -736,6 +736,14 @@ def get_observed_types(self) -> dict:
- 'type_num': the total number of observed types in this model.
- 'observed_type': a list of the observed types in this model.
"""
# Try metadata first (from model_def_script, already a dict)
observed_type_list = self.model_def_script.get("info", {}).get("observed_type")
if observed_type_list is not None:
return {
"type_num": len(observed_type_list),
"observed_type": observed_type_list,
}
# Fallback: bias-based approach for old models
observed_type_list = self.dp.model["Default"].get_observed_type_list()
return {
"type_num": len(observed_type_list),
Expand Down
40 changes: 40 additions & 0 deletions deepmd/pt/model/atomic_model/base_atomic_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,45 @@ def __init__(
self.rcond = rcond
self.preset_out_bias = preset_out_bias
self.data_stat_protect = data_stat_protect
self._observed_type: list[str] | None = None

@property
def observed_type(self) -> list[str] | None:
"""Get the observed element type list from data statistics."""
return self._observed_type

def _collect_and_set_observed_type(
self,
sampled_func: Callable[[], list[dict]],
stat_file_path: "DPPath | None",
preset_observed_type: list[str] | None,
) -> None:
"""Collect observed types with priority: preset > stat_file > compute.

Parameters
----------
sampled_func
The lazy sampled function to get data frames.
stat_file_path
The path to the statistics files (should already include type_map suffix).
preset_observed_type
User-specified observed types that take highest priority.
"""
from deepmd.dpmodel.utils.stat import (
_restore_observed_type_from_file,
_save_observed_type_to_file,
collect_observed_types,
)

if preset_observed_type is not None:
self._observed_type = preset_observed_type
else:
observed = _restore_observed_type_from_file(stat_file_path)
if observed is None:
sampled = sampled_func()
observed = collect_observed_types(sampled, self.type_map)
_save_observed_type_to_file(stat_file_path, observed)
self._observed_type = observed

def init_out_stat(self) -> None:
"""Initialize the output bias."""
Expand Down Expand Up @@ -376,6 +415,7 @@ def compute_or_load_stat(
merged: Callable[[], list[dict]] | list[dict],
stat_file_path: DPPath | None = None,
compute_or_load_out_stat: bool = True,
preset_observed_type: list[str] | None = None,
) -> NoReturn:
"""
Compute or load the statistics parameters of the model,
Expand Down
5 changes: 5 additions & 0 deletions deepmd/pt/model/atomic_model/dp_atomic_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,7 @@ def compute_or_load_stat(
sampled_func: Callable[[], list[dict]],
stat_file_path: DPPath | None = None,
compute_or_load_out_stat: bool = True,
preset_observed_type: list[str] | None = None,
) -> None:
"""
Compute or load the statistics parameters of the model,
Expand Down Expand Up @@ -355,6 +356,10 @@ def wrapped_sampler() -> list[dict]:
if compute_or_load_out_stat:
self.compute_or_load_out_stat(wrapped_sampler, stat_file_path)

self._collect_and_set_observed_type(
wrapped_sampler, stat_file_path, preset_observed_type
)

def compute_fitting_input_stat(
self,
sample_merged: Callable[[], list[dict]] | list[dict],
Expand Down
Loading