From ecfd0e5a2d0789a73e63c94d5b4de6e8ada8f66b Mon Sep 17 00:00:00 2001 From: Hamid Arian Date: Sat, 20 Jun 2026 02:47:43 -0400 Subject: [PATCH] fix(hrp): symmetrise correlation distance before squareform MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit hrp() raised "ValueError: Distance matrix must be symmetric" when the correlation matrix was only symmetric to floating-point tolerance — the normal case for matrices coming from cov_to_corr or denoise_cov, where sub-epsilon asymmetry is unavoidable. Average the distance with its transpose and zero the diagonal before squareform; clustering is unchanged. Mirrors the same fix in RiskLabAI.jl v0.5.1. Adds a regression test (test_hrp_asymmetric_correlation) and bumps the patch version to 2.0.1. Co-Authored-By: Claude Opus 4.8 (1M context) --- CHANGELOG.md | 10 ++++++++++ RiskLabAI/optimization/hrp.py | 7 +++++++ pyproject.toml | 2 +- test/optimization/test_hrp.py | 24 ++++++++++++++++++++++++ 4 files changed, 42 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 25de7f1..f613a01 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,16 @@ All notable changes to RiskLabAI.py are documented here. Format: [Keep a Changelog](https://keepachangelog.com/en/1.1.0/); versioning: [SemVer](https://semver.org/). +## [2.0.1] + +### Fixed +- `optimization.hrp.hrp` no longer raises `ValueError: Distance matrix must be + symmetric` when the correlation matrix is only symmetric to floating-point + tolerance (as produced by `cov_to_corr` or by denoising). The correlation + distance is now symmetrised (`(d + dᔀ)/2`, zero diagonal) before + `squareform`. Regression test added (`test_hrp_asymmetric_correlation`). + Mirrors the same fix in RiskLabAI.jl v0.5.1. + ## [2.0.0] A **breaking** release that standardises the public API on PEP 8 names and makes diff --git a/RiskLabAI/optimization/hrp.py b/RiskLabAI/optimization/hrp.py index d1e7527..4290140 100644 --- a/RiskLabAI/optimization/hrp.py +++ b/RiskLabAI/optimization/hrp.py @@ -193,6 +193,13 @@ def hrp(cov: pd.DataFrame, corr: pd.DataFrame) -> pd.Series: # 1. Calculate distance distance = distance_corr(corr_df.values) + # Enforce exact symmetry: a correlation matrix produced by ``cov_to_corr`` or + # by denoising can be asymmetric at the floating-point level, which makes + # ``squareform`` reject it ("Distance matrix must be symmetric"). Averaging + # with the transpose removes that asymmetry without changing the clustering. + distance = (distance + distance.T) / 2.0 + np.fill_diagonal(distance, 0.0) + dist_condensed = scd.squareform(distance, force="tovector") # 2. Cluster diff --git a/pyproject.toml b/pyproject.toml index 4ae26ea..c21dd19 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "RiskLabAI" -version = "2.0.0" +version = "2.0.1" authors = [ { name = "RiskLab", email = "arian@risklab.ai" }, ] diff --git a/test/optimization/test_hrp.py b/test/optimization/test_hrp.py index 7d3a2c6..72c6c83 100644 --- a/test/optimization/test_hrp.py +++ b/test/optimization/test_hrp.py @@ -88,3 +88,27 @@ def test_hrp(mock_cov_matrix): pd.testing.assert_series_equal( weights, expected_weights, atol=1e-5, check_names=False ) + + +def test_hrp_asymmetric_correlation(): + """A correlation matrix that is only symmetric to floating-point tolerance + (as produced by ``cov_to_corr`` or denoising) must not break ``hrp``. + + Regression test: ``squareform`` previously rejected the tiny asymmetry with + "Distance matrix must be symmetric".""" + rng = np.random.default_rng(0) + n = 8 + a = rng.normal(size=(n, n)) + corr = np.corrcoef(a @ a.T) + # Inject a sub-epsilon asymmetry, the kind cov_to_corr leaves behind. + corr[0, 1] += 1e-15 + names = [f"A{i}" for i in range(n)] + corr_df = pd.DataFrame(corr, index=names, columns=names) + cov_df = corr_df # unit variances → cov == corr + + assert (corr != corr.T).any() # genuinely asymmetric (not bit-identical) + weights = hrp(cov_df, corr_df) + assert isinstance(weights, pd.Series) + assert weights.shape == (n,) + assert np.isclose(weights.sum(), 1.0) + assert (weights >= 0).all()