From 4835440bf5af9c3f82eadd56be50e10ee53f994b Mon Sep 17 00:00:00 2001 From: Domenico Marson Date: Thu, 11 Sep 2025 17:36:55 +0200 Subject: [PATCH 1/6] changed logic for sort_backbone --- package/CHANGELOG | 2 + package/MDAnalysis/analysis/polymer.py | 49 ++++++++++--------- .../analysis/test_persistencelength.py | 19 ++++--- 3 files changed, 39 insertions(+), 31 deletions(-) diff --git a/package/CHANGELOG b/package/CHANGELOG index 75dcd5f534d..56c11b65ece 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -81,6 +81,8 @@ Changes * Removed undocumented and unused attribute `analysis.lineardensity.LinearDensity.totalmass` (PR #5007) * Remove `default` channel from RTD conda env. (Issue # 5036, PR # 5037) + * Changed the fragment detection logic in `analysis.polymer.sort_backbone` (Issue #5112, PR #) + * Replaced the while loop in `analysis.polymer.sort_backbone` with a for loop (Issue #5112, PR #) Deprecations * The RDKit converter parameter `NoImplicit` has been deprecated in favour of diff --git a/package/MDAnalysis/analysis/polymer.py b/package/MDAnalysis/analysis/polymer.py index 2bee3839710..1aad03c325e 100644 --- a/package/MDAnalysis/analysis/polymer.py +++ b/package/MDAnalysis/analysis/polymer.py @@ -67,37 +67,32 @@ def sort_backbone(backbone): .. versionadded:: 0.20.0 """ - if not backbone.n_fragments == 1: - raise ValueError( - "{} fragments found in backbone. " - "backbone must be a single contiguous AtomGroup" - "".format(backbone.n_fragments) - ) + degrees = [len(atom.bonded_atoms & backbone) for atom in backbone] + deg1_atoms = [atom for atom, d in zip(backbone, degrees) if d == 1] + wrong_atoms = [ + atom for atom, d in zip(backbone, degrees) if d not in (1, 2) + ] - branches = [at for at in backbone if len(at.bonded_atoms & backbone) > 2] - if branches: - # find which atom has too many bonds for easier debug + if len(wrong_atoms) > 0: raise ValueError( - "Backbone is not linear. " - "The following atoms have more than two bonds in backbone: {}." - "".format(",".join(str(a) for a in branches)) + "Backbone contains atoms with connectivity degree not equal to 1 or 2. " + "This suggests branches or isolated atoms. Problematic atoms: {}." + "".format(", ".join(str(a) for a in wrong_atoms)) ) - caps = [ - atom for atom in backbone if len(atom.bonded_atoms & backbone) == 1 - ] - if not caps: - # cyclical structure + if len(deg1_atoms) != 2: raise ValueError( - "Could not find starting point of backbone, " - "is the backbone cyclical?" + "Backbone connectivity invalid: " + "expected exactly 2 atoms with connectivity degree 1 (caps). " + "Cyclical structures are not supported. " + "Atoms with connectivity degree 1 found: {}." + "".format(", ".join(str(a) for a in deg1_atoms)) ) # arbitrarily choose one of the capping atoms to be the startpoint - sorted_backbone = AtomGroup([caps[0]]) + sorted_backbone = AtomGroup([deg1_atoms[0]]) - # iterate until the sorted chain length matches the backbone size - while len(sorted_backbone) < len(backbone): + for _ in range(len(backbone) - 1): # current end of the chain end_atom = sorted_backbone[-1] @@ -108,8 +103,16 @@ def sort_backbone(backbone): # append this to the sorted backbone sorted_backbone += next_atom - return sorted_backbone + # final sanity check to see if we missed some atoms + if len(sorted_backbone) != len(backbone): + raise ValueError( + "Backbone traversal did not visit all atoms. " + "Expected {} atoms, got {}.".format( + len(backbone), len(sorted_backbone) + ) + ) + return sorted_backbone class PersistenceLength(AnalysisBase): r"""Calculate the persistence length for polymer chains diff --git a/testsuite/MDAnalysisTests/analysis/test_persistencelength.py b/testsuite/MDAnalysisTests/analysis/test_persistencelength.py index e17482dd3d4..d6e42702162 100644 --- a/testsuite/MDAnalysisTests/analysis/test_persistencelength.py +++ b/testsuite/MDAnalysisTests/analysis/test_persistencelength.py @@ -149,18 +149,21 @@ def test_sortbb(self, u): assert_equal(s_ag.ids, [0, 1, 4, 6, 8]) - def test_not_fragment(self, u): - # two fragments don't work - bad_ag = u.residues[0].atoms[:2] + u.residues[1].atoms[:2] - with pytest.raises(ValueError): - polymer.sort_backbone(bad_ag) - def test_branches(self, u): # includes side branches, can't sort bad_ag = u.atoms[:10] # include -H etc - with pytest.raises(ValueError): + with pytest.raises(ValueError) as ex: polymer.sort_backbone(bad_ag) + assert "branches or isolated atoms" in str(ex.value) + + def test_isolated(self, u): + u = mda.Universe.empty(4, trajectory=True) + bondlist = [(0, 1), (1, 2)] + u.add_TopologyAttr(Bonds(bondlist)) + with pytest.raises(ValueError) as ex: + polymer.sort_backbone(u.atoms) + assert "branches or isolated atoms" in str(ex.value) def test_circular(self): u = mda.Universe.empty(6, trajectory=True) @@ -170,4 +173,4 @@ def test_circular(self): with pytest.raises(ValueError) as ex: polymer.sort_backbone(u.atoms) - assert "cyclical" in str(ex.value) + assert "Cyclical" in str(ex.value) From 5ca68e5055cab2a12f439fb018b21c91481ec983 Mon Sep 17 00:00:00 2001 From: Domenico Marson Date: Thu, 11 Sep 2025 17:47:30 +0200 Subject: [PATCH 2/6] added test for internal missing --- .../MDAnalysisTests/analysis/test_persistencelength.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/testsuite/MDAnalysisTests/analysis/test_persistencelength.py b/testsuite/MDAnalysisTests/analysis/test_persistencelength.py index d6e42702162..d12701f0432 100644 --- a/testsuite/MDAnalysisTests/analysis/test_persistencelength.py +++ b/testsuite/MDAnalysisTests/analysis/test_persistencelength.py @@ -165,6 +165,14 @@ def test_isolated(self, u): polymer.sort_backbone(u.atoms) assert "branches or isolated atoms" in str(ex.value) + def test_missing_internal(self, u): + u = mda.Universe.empty(4, trajectory=True) + bondlist = [(0, 1), (2, 3)] + u.add_TopologyAttr(Bonds(bondlist)) + with pytest.raises(ValueError) as ex: + polymer.sort_backbone(u.atoms) + assert "Backbone connectivity invalid" in str(ex.value) + def test_circular(self): u = mda.Universe.empty(6, trajectory=True) # circular structure From b26705c5776676c375670c98fb5c3dcc90b684e8 Mon Sep 17 00:00:00 2001 From: Domenico Marson Date: Thu, 11 Sep 2025 17:53:42 +0200 Subject: [PATCH 3/6] added PR# to changelog --- package/CHANGELOG | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/package/CHANGELOG b/package/CHANGELOG index 56c11b65ece..db8f01a8216 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -81,8 +81,8 @@ Changes * Removed undocumented and unused attribute `analysis.lineardensity.LinearDensity.totalmass` (PR #5007) * Remove `default` channel from RTD conda env. (Issue # 5036, PR # 5037) - * Changed the fragment detection logic in `analysis.polymer.sort_backbone` (Issue #5112, PR #) - * Replaced the while loop in `analysis.polymer.sort_backbone` with a for loop (Issue #5112, PR #) + * Changed the fragment detection logic in `analysis.polymer.sort_backbone` (Issue #5112, PR #5113) + * Replaced the while loop in `analysis.polymer.sort_backbone` with a for loop (Issue #5112, PR #5113) Deprecations * The RDKit converter parameter `NoImplicit` has been deprecated in favour of From d07d8e8d6ab45529c2651dc19bbc0e8ce637315a Mon Sep 17 00:00:00 2001 From: Domenico Marson Date: Wed, 1 Oct 2025 10:35:19 +0200 Subject: [PATCH 4/6] tests now use match --- package/CHANGELOG | 5 +++-- .../analysis/test_persistencelength.py | 13 ++++--------- 2 files changed, 7 insertions(+), 11 deletions(-) diff --git a/package/CHANGELOG b/package/CHANGELOG index db8f01a8216..d0049fc206e 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -40,6 +40,8 @@ Fixes * Fixes the benchmark `SimpleRmsBench` in `benchmarks/analysis/rms.py` by changing the way weights for RMSD are calculated, instead of directly passing them. (Issue #3520, PR #5006) + * `analysis.polymer.sort_backbone` is now working for discontinuous polymers + and does not result in an infinite loop (Issue #5112, PR #5113) Enhancements * Added capability to calculate MSD from frames with irregular (non-linear) @@ -81,8 +83,7 @@ Changes * Removed undocumented and unused attribute `analysis.lineardensity.LinearDensity.totalmass` (PR #5007) * Remove `default` channel from RTD conda env. (Issue # 5036, PR # 5037) - * Changed the fragment detection logic in `analysis.polymer.sort_backbone` (Issue #5112, PR #5113) - * Replaced the while loop in `analysis.polymer.sort_backbone` with a for loop (Issue #5112, PR #5113) + Deprecations * The RDKit converter parameter `NoImplicit` has been deprecated in favour of diff --git a/testsuite/MDAnalysisTests/analysis/test_persistencelength.py b/testsuite/MDAnalysisTests/analysis/test_persistencelength.py index d12701f0432..54005c8a667 100644 --- a/testsuite/MDAnalysisTests/analysis/test_persistencelength.py +++ b/testsuite/MDAnalysisTests/analysis/test_persistencelength.py @@ -153,32 +153,27 @@ def test_branches(self, u): # includes side branches, can't sort bad_ag = u.atoms[:10] # include -H etc - with pytest.raises(ValueError) as ex: + with pytest.raises(ValueError, match="branches or isolated atoms"): polymer.sort_backbone(bad_ag) - assert "branches or isolated atoms" in str(ex.value) def test_isolated(self, u): u = mda.Universe.empty(4, trajectory=True) bondlist = [(0, 1), (1, 2)] u.add_TopologyAttr(Bonds(bondlist)) - with pytest.raises(ValueError) as ex: + with pytest.raises(ValueError, match="branches or isolated atoms"): polymer.sort_backbone(u.atoms) - assert "branches or isolated atoms" in str(ex.value) def test_missing_internal(self, u): u = mda.Universe.empty(4, trajectory=True) bondlist = [(0, 1), (2, 3)] u.add_TopologyAttr(Bonds(bondlist)) - with pytest.raises(ValueError) as ex: + with pytest.raises(ValueError, match="Backbone connectivity invalid"): polymer.sort_backbone(u.atoms) - assert "Backbone connectivity invalid" in str(ex.value) def test_circular(self): u = mda.Universe.empty(6, trajectory=True) # circular structure bondlist = [(0, 1), (1, 2), (2, 3), (3, 4), (4, 5), (5, 0)] u.add_TopologyAttr(Bonds(bondlist)) - - with pytest.raises(ValueError) as ex: + with pytest.raises(ValueError, match="Cyclical"): polymer.sort_backbone(u.atoms) - assert "Cyclical" in str(ex.value) From daa17b7e0085e111283c0857563dda8b2de1715c Mon Sep 17 00:00:00 2001 From: Domenico Marson Date: Sat, 4 Oct 2025 10:02:04 +0200 Subject: [PATCH 5/6] made black happy --- package/MDAnalysis/analysis/polymer.py | 1 + 1 file changed, 1 insertion(+) diff --git a/package/MDAnalysis/analysis/polymer.py b/package/MDAnalysis/analysis/polymer.py index 1aad03c325e..606ad2b7c8a 100644 --- a/package/MDAnalysis/analysis/polymer.py +++ b/package/MDAnalysis/analysis/polymer.py @@ -114,6 +114,7 @@ def sort_backbone(backbone): return sorted_backbone + class PersistenceLength(AnalysisBase): r"""Calculate the persistence length for polymer chains From 800cbdfe6add26c498113b9198f64e171e7f086a Mon Sep 17 00:00:00 2001 From: Domenico Marson Date: Sat, 4 Oct 2025 10:03:33 +0200 Subject: [PATCH 6/6] removed unreachable code --- package/MDAnalysis/analysis/polymer.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/package/MDAnalysis/analysis/polymer.py b/package/MDAnalysis/analysis/polymer.py index 606ad2b7c8a..7a0510eab03 100644 --- a/package/MDAnalysis/analysis/polymer.py +++ b/package/MDAnalysis/analysis/polymer.py @@ -103,15 +103,6 @@ def sort_backbone(backbone): # append this to the sorted backbone sorted_backbone += next_atom - # final sanity check to see if we missed some atoms - if len(sorted_backbone) != len(backbone): - raise ValueError( - "Backbone traversal did not visit all atoms. " - "Expected {} atoms, got {}.".format( - len(backbone), len(sorted_backbone) - ) - ) - return sorted_backbone