Skip to content

Commit 82501a1

Browse files
committed
test: stochastic test for multinomial - need another pair of eyes
I'm still unsure if I'm sampling incorrectly or am using bad verification technique, should rely on a common GoF test instead of my homebrew statistics
1 parent 41e32c5 commit 82501a1

File tree

1 file changed

+37
-20
lines changed

1 file changed

+37
-20
lines changed

src/distribution/multinomial.rs

Lines changed: 37 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -545,33 +545,50 @@ mod tests {
545545
#[test]
546546
#[ignore = "this test is designed to assess approximately normal results within 2σ"]
547547
fn test_stochastic_uniform_samples() {
548-
use crate::statistics::Statistics;
549548
use ::rand::{distributions::Distribution, prelude::thread_rng};
549+
use crate::statistics::Statistics;
550+
// describe multinomial such that each binomial variable is viable normal approximation
551+
let k: f64 = 60.0;
550552
let n: f64 = 1000.0;
551-
let k: usize = 20;
552-
let weights = vec![1.0; k];
553+
let weights = vec![1.0; k as usize];
553554
let multinomial = Multinomial::new(weights, n as u64).unwrap();
554-
let samples = multinomial.sample(&mut thread_rng());
555-
eprintln!("{samples}");
556555

557-
samples.iter().for_each(|&s| {
556+
// obtain sample statistics for multiple draws from this multinomial distribution
557+
// during iteration, verify that each event is ~ Binom(n, 1/k) under normal approx
558+
let n_trials = 20;
559+
let stats_across_multinom_events = std::iter::repeat_with(|| {
560+
let samples = multinomial.sample(&mut thread_rng());
561+
samples.iter().enumerate().for_each(|(i, &s)| {
562+
assert_abs_diff_eq!(s, n / k, epsilon = multinomial.variance().unwrap()[(i, i)],)
563+
});
564+
samples
565+
})
566+
.take(n_trials)
567+
.map(|sample| (sample.mean(), sample.population_variance()));
568+
569+
println!("{:#?}", stats_across_multinom_events.clone().collect::<Vec<_>>());
570+
571+
// claim: for X from a given trial, Var[X] ~ χ^2(k-1)
572+
// the variance across this multinomial sample should be against the true mean
573+
// Var[X] = sum (X_i - bar{X})^2 = sum (X_i - n/k)^2
574+
// alternatively, variance is linear, so we should also have
575+
// Var[X] = k Var[X_i] = k npq = k n (k-1)/k^2 = n (k-1)/k as these are iid Binom(n, 1/k)
576+
//
577+
// since parameters of the binomial variable are around np ~ 20, each binomial variable is approximately normal
578+
//
579+
// therefore, population variance should be a sum of squares of normal variables from their mean.
580+
// i.e. population variances of these multinomial samples should be a scaling of χ^2 squared variables
581+
// with k-1 dof
582+
//
583+
// normal approximation of χ^2(k-1) should be valid for k = 50, so assert against 2σ_normal
584+
for (_mu, var) in stats_across_multinom_events {
558585
assert_abs_diff_eq!(
559-
s,
560-
n / k as f64,
561-
epsilon = 2.0 * multinomial.variance().unwrap()[(0, 0)].sqrt(),
586+
var,
587+
k - 1.0,
588+
epsilon = ((2.0*k - 2.0)/n_trials as f64).sqrt()
562589
)
563-
});
564-
565-
assert_abs_diff_eq!(
566-
samples.iter().population_variance(),
567-
multinomial.variance().unwrap()[(0, 0)],
568-
epsilon = n.sqrt()
569-
);
590+
}
570591

571-
assert_eq!(
572-
samples.into_iter().map(|&x| x as u64).sum::<u64>(),
573-
n as u64
574-
);
575592
}
576593
}
577594

0 commit comments

Comments
 (0)