@@ -567,33 +567,51 @@ mod tests {
567567 #[ test]
568568 #[ ignore = "this test is designed to assess approximately normal results within 2σ" ]
569569 fn test_stochastic_uniform_samples ( ) {
570- use crate :: statistics:: Statistics ;
571570 use :: rand:: { distributions:: Distribution , prelude:: thread_rng} ;
571+ use crate :: statistics:: Statistics ;
572+
573+ // describe multinomial such that each binomial variable is viable normal approximation
574+ let k: f64 = 60.0 ;
572575 let n: f64 = 1000.0 ;
573- let k: usize = 20 ;
574- let weights = vec ! [ 1.0 ; k] ;
576+ let weights = vec ! [ 1.0 ; k as usize ] ;
575577 let multinomial = Multinomial :: new ( weights, n as u64 ) . unwrap ( ) ;
576- let samples: OVector < f64 , Dyn > = multinomial. sample ( & mut thread_rng ( ) ) ;
577- eprintln ! ( "{samples}" ) ;
578578
579- samples. iter ( ) . for_each ( |& s| {
579+ // obtain sample statistics for multiple draws from this multinomial distribution
580+ // during iteration, verify that each event is ~ Binom(n, 1/k) under normal approx
581+ let n_trials = 20 ;
582+ let stats_across_multinom_events = std:: iter:: repeat_with ( || {
583+ let samples: OVector < f64 , Dyn > = multinomial. sample ( & mut thread_rng ( ) ) ;
584+ samples. iter ( ) . enumerate ( ) . for_each ( |( i, & s) | {
585+ assert_abs_diff_eq ! ( s, n / k, epsilon = multinomial. variance( ) . unwrap( ) [ ( i, i) ] , )
586+ } ) ;
587+ samples
588+ } )
589+ . take ( n_trials)
590+ . map ( |sample| ( sample. mean ( ) , sample. population_variance ( ) ) ) ;
591+
592+ println ! ( "{:#?}" , stats_across_multinom_events. clone( ) . collect:: <Vec <_>>( ) ) ;
593+
594+ // claim: for X from a given trial, Var[X] ~ χ^2(k-1)
595+ // the variance across this multinomial sample should be against the true mean
596+ // Var[X] = sum (X_i - bar{X})^2 = sum (X_i - n/k)^2
597+ // alternatively, variance is linear, so we should also have
598+ // Var[X] = k Var[X_i] = k npq = k n (k-1)/k^2 = n (k-1)/k as these are iid Binom(n, 1/k)
599+ //
600+ // since parameters of the binomial variable are around np ~ 20, each binomial variable is approximately normal
601+ //
602+ // therefore, population variance should be a sum of squares of normal variables from their mean.
603+ // i.e. population variances of these multinomial samples should be a scaling of χ^2 squared variables
604+ // with k-1 dof
605+ //
606+ // normal approximation of χ^2(k-1) should be valid for k = 50, so assert against 2σ_normal
607+ for ( _mu, var) in stats_across_multinom_events {
580608 assert_abs_diff_eq ! (
581- s ,
582- n / k as f64 ,
583- epsilon = 2.0 * multinomial . variance ( ) . unwrap ( ) [ ( 0 , 0 ) ] . sqrt( ) ,
609+ var ,
610+ k - 1.0 ,
611+ epsilon = ( ( 2.0 * k - 2.0 ) /n_trials as f64 ) . sqrt( )
584612 )
585- } ) ;
586-
587- assert_abs_diff_eq ! (
588- samples. iter( ) . population_variance( ) ,
589- multinomial. variance( ) . unwrap( ) [ ( 0 , 0 ) ] ,
590- epsilon = n. sqrt( )
591- ) ;
613+ }
592614
593- assert_eq ! (
594- samples. into_iter( ) . map( |& x| x as u64 ) . sum:: <u64 >( ) ,
595- n as u64
596- ) ;
597615 }
598616}
599617
0 commit comments