From 86bed717493aecfffe4ac746d8f9a7097bf4dc35 Mon Sep 17 00:00:00 2001 From: Yan Wong Date: Mon, 9 Jan 2023 13:35:07 +0000 Subject: [PATCH 1/2] Add spearman's R to the accuracy tests And use better r2 estimates --- tests/test_accuracy.py | 67 ++++++++++++++++++++++++------------------ 1 file changed, 38 insertions(+), 29 deletions(-) diff --git a/tests/test_accuracy.py b/tests/test_accuracy.py index 1ec68eff..d68d40c9 100644 --- a/tests/test_accuracy.py +++ b/tests/test_accuracy.py @@ -29,6 +29,7 @@ import msprime import numpy as np import pytest +import scipy import tskit import tsdate @@ -46,7 +47,7 @@ def test_make_static_files(self, request): So that we are assured of using the same tree sequence, regardless of the version and random number generator used in msprime, we keep these as static files and only run this function when explicitly specified, e.g. via - pytest test_accuracy.py::TestAccuracy::create_static_files + pytest test_accuracy.py::TestAccuracy::test_make_static_files """ mu = 1e-6 Ne = 1e4 @@ -74,14 +75,22 @@ def test_make_static_files(self, request): ts.dump(os.path.join(request.fspath.dirname, "data", f"{name}.trees")) @pytest.mark.parametrize( - "ts_name,min_r2_ts,min_r2_posterior", + "ts_name,min_r2_ts,min_r2_unconstrained,min_spear_ts,min_spear_unconstrained", [ - ("one_tree", 0.94776615238, 0.94776615238), - ("few_trees", 0.96605244, 0.96605244), - ("many_trees", 0.92646, 0.92646), + ("one_tree", 0.98601, 0.98601, 0.97719, 0.97719), + ("few_trees", 0.98220, 0.98220, 0.97744, 0.97744), + ("many_trees", 0.93449, 0.93449, 0.964547, 0.964547), ], ) - def test_basic(self, ts_name, min_r2_ts, min_r2_posterior, request): + def test_basic( + self, + ts_name, + min_r2_ts, + min_r2_unconstrained, + min_spear_ts, + min_spear_unconstrained, + request, + ): ts = tskit.load( os.path.join(request.fspath.dirname, "data", ts_name + ".trees") ) @@ -97,29 +106,29 @@ def test_basic(self, ts_name, min_r2_ts, min_r2_posterior, request): dts, posteriors = tsdate.date( ts, Ne=Ne, mutation_rate=mu, return_posteriors=True ) + # make sure we can read node metadata - old tsdate versions didn't set a schema + if dts.table_metadata_schemas.node.schema is None: + tables = dts.dump_tables() + tables.nodes.metadata_schema = tskit.MetadataSchema.permissive_json() + dts = tables.tree_sequence() + # Only test nonsample node times - nonsample_nodes = np.ones(ts.num_nodes, dtype=bool) - nonsample_nodes[ts.samples()] = False + nonsamples = np.ones(ts.num_nodes, dtype=bool) + nonsamples[ts.samples()] = False - # Test the tree sequence times - r_sq = ( - np.corrcoef( - np.log(ts.nodes_time[nonsample_nodes]), - np.log(dts.nodes_time[nonsample_nodes]), - )[0, 1] - ** 2 - ) - assert r_sq >= min_r2_ts + min_vals = { + "r_sq": {"ts": min_r2_ts, "unconstr": min_r2_unconstrained}, + "spearmans_r": {"ts": min_spear_ts, "unconstr": min_spear_unconstrained}, + } - # Test the posterior means too. - post_mean = np.array( - [ - np.sum(posteriors[i] * posteriors["start_time"]) / np.sum(posteriors[i]) - for i in np.where(nonsample_nodes)[0] - ] - ) - r_sq = ( - np.corrcoef(np.log(ts.nodes_time[nonsample_nodes]), np.log(post_mean))[0, 1] - ** 2 - ) - assert r_sq >= min_r2_posterior + expected = ts.nodes_time[nonsamples] + for (observed, src) in [ + (dts.nodes_time[nonsamples], "ts"), + ([dts.node(i).metadata["mn"] for i in np.where(nonsamples)[0]], "unconstr"), + ]: + # Test the tree sequence times + r_sq = np.corrcoef(expected, observed)[0, 1] ** 2 + assert r_sq >= min_vals["r_sq"][src] + + spearmans_r = scipy.stats.spearmanr(expected, observed).correlation + assert spearmans_r >= min_vals["spearmans_r"][src] From 24dbca813f98f61bfb3ea53069c5e8c0d1dd608b Mon Sep 17 00:00:00 2001 From: Yan Wong Date: Mon, 9 Jan 2023 13:45:44 +0000 Subject: [PATCH 2/2] Add a test to check we properly scale for Ne --- tests/test_accuracy.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tests/test_accuracy.py b/tests/test_accuracy.py index d68d40c9..77c53638 100644 --- a/tests/test_accuracy.py +++ b/tests/test_accuracy.py @@ -132,3 +132,13 @@ def test_basic( spearmans_r = scipy.stats.spearmanr(expected, observed).correlation assert spearmans_r >= min_vals["spearmans_r"][src] + + @pytest.mark.parametrize("Ne", [0.1, 1, 400]) + def test_scaling(self, Ne): + """ + Test that we are in the right theoretical ballpark given known Ne + """ + ts = tskit.Tree.generate_comb(2).tree_sequence + dts = tsdate.date(ts, Ne=Ne, mutation_rate=None) + # Check the date is within 10% of the expected + assert 0.9 < dts.node(dts.first().root).time / (2 * Ne) < 1.1