diff --git a/data/raw/raw_to_set.sh b/data/raw/raw_to_set.sh index 3971a9c279..c27b6e27e9 100755 --- a/data/raw/raw_to_set.sh +++ b/data/raw/raw_to_set.sh @@ -37,6 +37,8 @@ for ii in $(seq 0 $nset_1); do test -f virial.raw$pi && mv virial.raw$pi set.$pi/virial.raw test -f atom_ener.raw$pi && mv atom_ener.raw$pi set.$pi/atom_ener.raw test -f fparam.raw$pi && mv fparam.raw$pi set.$pi/fparam.raw + test -f dipole.raw$pi && mv dipole.raw$pi set.$pi/dipole.raw + test -f polarizability.raw$pi && mv polarizability.raw$pi set.$pi/polarizability.raw test -f atomic_dipole.raw$pi && mv atomic_dipole.raw$pi set.$pi/atomic_dipole.raw test -f atomic_polarizability.raw$pi && mv atomic_polarizability.raw$pi set.$pi/atomic_polarizability.raw diff --git a/source/tests/common/test_raw_to_set.py b/source/tests/common/test_raw_to_set.py new file mode 100644 index 0000000000..1c1cdb0378 --- /dev/null +++ b/source/tests/common/test_raw_to_set.py @@ -0,0 +1,76 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +"""Tests for ``data/raw/raw_to_set.sh``. + +The script splits ``*.raw`` label files into per-set chunks and converts each +chunk into ``*.npy`` inside ``set./``. Historically the per-set *move* block +omitted the global ``dipole.raw``/``polarizability.raw`` chunks, so datasets +carrying global dipole/polarizability labels silently lost them (the ``*.npy`` +files were never generated). These tests lock in the move/convert symmetry for +every tensor label the script splits. +""" + +import shutil +import subprocess +from pathlib import ( + Path, +) + +import numpy as np +import pytest + +# repo root: source/tests/common/test_raw_to_set.py -> parents[3] +RAW_TO_SET = Path(__file__).parents[3] / "data" / "raw" / "raw_to_set.sh" + + +@pytest.mark.skipif( + shutil.which("bash") is None or shutil.which("split") is None, + reason="raw_to_set.sh requires bash and split on PATH", +) +@pytest.mark.parametrize( + "label,ncol", + [ + ("dipole", 3), # global dipole (regression: previously dropped) + ("polarizability", 9), # global polarizability (regression: previously dropped) + ("atomic_dipole", 3), # already-working path, documents intended symmetry + ("atomic_polarizability", 9), # already-working path + ], +) +def test_raw_to_set_preserves_tensor_labels( + tmp_path: Path, label: str, ncol: int +) -> None: + """Every split tensor label must be converted to ``set./