From ecfd0e5a2d0789a73e63c94d5b4de6e8ada8f66b Mon Sep 17 00:00:00 2001
From: Hamid Arian <harian@yorku.ca>
Date: Sat, 20 Jun 2026 02:47:43 -0400
Subject: [PATCH] fix(hrp): symmetrise correlation distance before squareform
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

hrp() raised "ValueError: Distance matrix must be symmetric" when the
correlation matrix was only symmetric to floating-point tolerance — the
normal case for matrices coming from cov_to_corr or denoise_cov, where
sub-epsilon asymmetry is unavoidable. Average the distance with its
transpose and zero the diagonal before squareform; clustering is
unchanged. Mirrors the same fix in RiskLabAI.jl v0.5.1.

Adds a regression test (test_hrp_asymmetric_correlation) and bumps the
patch version to 2.0.1.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 CHANGELOG.md                  | 10 ++++++++++
 RiskLabAI/optimization/hrp.py |  7 +++++++
 pyproject.toml                |  2 +-
 test/optimization/test_hrp.py | 24 ++++++++++++++++++++++++
 4 files changed, 42 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 25de7f1..f613a01 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,6 +3,16 @@
 All notable changes to RiskLabAI.py are documented here.
 Format: [Keep a Changelog](https://keepachangelog.com/en/1.1.0/); versioning: [SemVer](https://semver.org/).
 
+## [2.0.1]
+
+### Fixed
+- `optimization.hrp.hrp` no longer raises `ValueError: Distance matrix must be
+  symmetric` when the correlation matrix is only symmetric to floating-point
+  tolerance (as produced by `cov_to_corr` or by denoising). The correlation
+  distance is now symmetrised (`(d + dᵀ)/2`, zero diagonal) before
+  `squareform`. Regression test added (`test_hrp_asymmetric_correlation`).
+  Mirrors the same fix in RiskLabAI.jl v0.5.1.
+
 ## [2.0.0]
 
 A **breaking** release that standardises the public API on PEP 8 names and makes
diff --git a/RiskLabAI/optimization/hrp.py b/RiskLabAI/optimization/hrp.py
index d1e7527..4290140 100644
--- a/RiskLabAI/optimization/hrp.py
+++ b/RiskLabAI/optimization/hrp.py
@@ -193,6 +193,13 @@ def hrp(cov: pd.DataFrame, corr: pd.DataFrame) -> pd.Series:
     # 1. Calculate distance
     distance = distance_corr(corr_df.values)
 
+    # Enforce exact symmetry: a correlation matrix produced by ``cov_to_corr`` or
+    # by denoising can be asymmetric at the floating-point level, which makes
+    # ``squareform`` reject it ("Distance matrix must be symmetric"). Averaging
+    # with the transpose removes that asymmetry without changing the clustering.
+    distance = (distance + distance.T) / 2.0
+    np.fill_diagonal(distance, 0.0)
+
     dist_condensed = scd.squareform(distance, force="tovector")
 
     # 2. Cluster
diff --git a/pyproject.toml b/pyproject.toml
index 4ae26ea..c21dd19 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "RiskLabAI"
-version = "2.0.0"
+version = "2.0.1"
 authors = [
     { name = "RiskLab", email = "arian@risklab.ai" },
 ]
diff --git a/test/optimization/test_hrp.py b/test/optimization/test_hrp.py
index 7d3a2c6..72c6c83 100644
--- a/test/optimization/test_hrp.py
+++ b/test/optimization/test_hrp.py
@@ -88,3 +88,27 @@ def test_hrp(mock_cov_matrix):
     pd.testing.assert_series_equal(
         weights, expected_weights, atol=1e-5, check_names=False
     )
+
+
+def test_hrp_asymmetric_correlation():
+    """A correlation matrix that is only symmetric to floating-point tolerance
+    (as produced by ``cov_to_corr`` or denoising) must not break ``hrp``.
+
+    Regression test: ``squareform`` previously rejected the tiny asymmetry with
+    "Distance matrix must be symmetric"."""
+    rng = np.random.default_rng(0)
+    n = 8
+    a = rng.normal(size=(n, n))
+    corr = np.corrcoef(a @ a.T)
+    # Inject a sub-epsilon asymmetry, the kind cov_to_corr leaves behind.
+    corr[0, 1] += 1e-15
+    names = [f"A{i}" for i in range(n)]
+    corr_df = pd.DataFrame(corr, index=names, columns=names)
+    cov_df = corr_df  # unit variances → cov == corr
+
+    assert (corr != corr.T).any()  # genuinely asymmetric (not bit-identical)
+    weights = hrp(cov_df, corr_df)
+    assert isinstance(weights, pd.Series)
+    assert weights.shape == (n,)
+    assert np.isclose(weights.sum(), 1.0)
+    assert (weights >= 0).all()