Skip to content

Commit 7b4180c

Browse files
committed
test: stochastic test for multinomial - need another pair of eyes
I'm still unsure if I'm sampling incorrectly or am using bad verification technique, should rely on a common GoF test instead of my homebrew statistics
1 parent b159ac0 commit 7b4180c

File tree

1 file changed

+38
-20
lines changed

1 file changed

+38
-20
lines changed

src/distribution/multinomial.rs

Lines changed: 38 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -567,33 +567,51 @@ mod tests {
567567
#[test]
568568
#[ignore = "this test is designed to assess approximately normal results within 2σ"]
569569
fn test_stochastic_uniform_samples() {
570-
use crate::statistics::Statistics;
571570
use ::rand::{distributions::Distribution, prelude::thread_rng};
571+
use crate::statistics::Statistics;
572+
573+
// describe multinomial such that each binomial variable is viable normal approximation
574+
let k: f64 = 60.0;
572575
let n: f64 = 1000.0;
573-
let k: usize = 20;
574-
let weights = vec![1.0; k];
576+
let weights = vec![1.0; k as usize];
575577
let multinomial = Multinomial::new(weights, n as u64).unwrap();
576-
let samples: OVector<f64, Dyn> = multinomial.sample(&mut thread_rng());
577-
eprintln!("{samples}");
578578

579-
samples.iter().for_each(|&s| {
579+
// obtain sample statistics for multiple draws from this multinomial distribution
580+
// during iteration, verify that each event is ~ Binom(n, 1/k) under normal approx
581+
let n_trials = 20;
582+
let stats_across_multinom_events = std::iter::repeat_with(|| {
583+
let samples: OVector<f64, Dyn> = multinomial.sample(&mut thread_rng());
584+
samples.iter().enumerate().for_each(|(i, &s)| {
585+
assert_abs_diff_eq!(s, n / k, epsilon = multinomial.variance().unwrap()[(i, i)],)
586+
});
587+
samples
588+
})
589+
.take(n_trials)
590+
.map(|sample| (sample.mean(), sample.population_variance()));
591+
592+
println!("{:#?}", stats_across_multinom_events.clone().collect::<Vec<_>>());
593+
594+
// claim: for X from a given trial, Var[X] ~ χ^2(k-1)
595+
// the variance across this multinomial sample should be against the true mean
596+
// Var[X] = sum (X_i - bar{X})^2 = sum (X_i - n/k)^2
597+
// alternatively, variance is linear, so we should also have
598+
// Var[X] = k Var[X_i] = k npq = k n (k-1)/k^2 = n (k-1)/k as these are iid Binom(n, 1/k)
599+
//
600+
// since parameters of the binomial variable are around np ~ 20, each binomial variable is approximately normal
601+
//
602+
// therefore, population variance should be a sum of squares of normal variables from their mean.
603+
// i.e. population variances of these multinomial samples should be a scaling of χ^2 squared variables
604+
// with k-1 dof
605+
//
606+
// normal approximation of χ^2(k-1) should be valid for k = 50, so assert against 2σ_normal
607+
for (_mu, var) in stats_across_multinom_events {
580608
assert_abs_diff_eq!(
581-
s,
582-
n / k as f64,
583-
epsilon = 2.0 * multinomial.variance().unwrap()[(0, 0)].sqrt(),
609+
var,
610+
k - 1.0,
611+
epsilon = ((2.0*k - 2.0)/n_trials as f64).sqrt()
584612
)
585-
});
586-
587-
assert_abs_diff_eq!(
588-
samples.iter().population_variance(),
589-
multinomial.variance().unwrap()[(0, 0)],
590-
epsilon = n.sqrt()
591-
);
613+
}
592614

593-
assert_eq!(
594-
samples.into_iter().map(|&x| x as u64).sum::<u64>(),
595-
n as u64
596-
);
597615
}
598616
}
599617

0 commit comments

Comments
 (0)