From f216b9b0b85105fae4711f550d7bcdf244c8cf1f Mon Sep 17 00:00:00 2001 From: Han Wang Date: Wed, 1 Jul 2026 13:06:42 +0800 Subject: [PATCH] fix(data): move split global dipole/polarizability into set dirs raw_to_set.sh splits global dipole.raw/polarizability.raw into per-set chunks and the in-set conversion block expects dipole.raw/polarizability.raw inside set./, but the move block never moved those chunks in. Datasets with global dipole/polarizability labels silently lost them: the .npy files were never generated and the split chunks were orphaned in the raw dir. Add the missing moves, mirroring the existing split/convert order, and add a test covering the move/convert symmetry for every tensor label the script splits (the script previously had no test at all). Fix #5692 --- data/raw/raw_to_set.sh | 2 + source/tests/common/test_raw_to_set.py | 76 ++++++++++++++++++++++++++ 2 files changed, 78 insertions(+) create mode 100644 source/tests/common/test_raw_to_set.py diff --git a/data/raw/raw_to_set.sh b/data/raw/raw_to_set.sh index 3971a9c279..c27b6e27e9 100755 --- a/data/raw/raw_to_set.sh +++ b/data/raw/raw_to_set.sh @@ -37,6 +37,8 @@ for ii in $(seq 0 $nset_1); do test -f virial.raw$pi && mv virial.raw$pi set.$pi/virial.raw test -f atom_ener.raw$pi && mv atom_ener.raw$pi set.$pi/atom_ener.raw test -f fparam.raw$pi && mv fparam.raw$pi set.$pi/fparam.raw + test -f dipole.raw$pi && mv dipole.raw$pi set.$pi/dipole.raw + test -f polarizability.raw$pi && mv polarizability.raw$pi set.$pi/polarizability.raw test -f atomic_dipole.raw$pi && mv atomic_dipole.raw$pi set.$pi/atomic_dipole.raw test -f atomic_polarizability.raw$pi && mv atomic_polarizability.raw$pi set.$pi/atomic_polarizability.raw diff --git a/source/tests/common/test_raw_to_set.py b/source/tests/common/test_raw_to_set.py new file mode 100644 index 0000000000..1c1cdb0378 --- /dev/null +++ b/source/tests/common/test_raw_to_set.py @@ -0,0 +1,76 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +"""Tests for ``data/raw/raw_to_set.sh``. + +The script splits ``*.raw`` label files into per-set chunks and converts each +chunk into ``*.npy`` inside ``set./``. Historically the per-set *move* block +omitted the global ``dipole.raw``/``polarizability.raw`` chunks, so datasets +carrying global dipole/polarizability labels silently lost them (the ``*.npy`` +files were never generated). These tests lock in the move/convert symmetry for +every tensor label the script splits. +""" + +import shutil +import subprocess +from pathlib import ( + Path, +) + +import numpy as np +import pytest + +# repo root: source/tests/common/test_raw_to_set.py -> parents[3] +RAW_TO_SET = Path(__file__).parents[3] / "data" / "raw" / "raw_to_set.sh" + + +@pytest.mark.skipif( + shutil.which("bash") is None or shutil.which("split") is None, + reason="raw_to_set.sh requires bash and split on PATH", +) +@pytest.mark.parametrize( + "label,ncol", + [ + ("dipole", 3), # global dipole (regression: previously dropped) + ("polarizability", 9), # global polarizability (regression: previously dropped) + ("atomic_dipole", 3), # already-working path, documents intended symmetry + ("atomic_polarizability", 9), # already-working path + ], +) +def test_raw_to_set_preserves_tensor_labels( + tmp_path: Path, label: str, ncol: int +) -> None: + """Every split tensor label must be converted to ``set./