@@ -545,33 +545,50 @@ mod tests {
545545 #[ test]
546546 #[ ignore = "this test is designed to assess approximately normal results within 2σ" ]
547547 fn test_stochastic_uniform_samples ( ) {
548- use crate :: statistics:: Statistics ;
549548 use :: rand:: { distributions:: Distribution , prelude:: thread_rng} ;
549+ use crate :: statistics:: Statistics ;
550+ // describe multinomial such that each binomial variable is viable normal approximation
551+ let k: f64 = 60.0 ;
550552 let n: f64 = 1000.0 ;
551- let k: usize = 20 ;
552- let weights = vec ! [ 1.0 ; k] ;
553+ let weights = vec ! [ 1.0 ; k as usize ] ;
553554 let multinomial = Multinomial :: new ( weights, n as u64 ) . unwrap ( ) ;
554- let samples = multinomial. sample ( & mut thread_rng ( ) ) ;
555- eprintln ! ( "{samples}" ) ;
556555
557- samples. iter ( ) . for_each ( |& s| {
556+ // obtain sample statistics for multiple draws from this multinomial distribution
557+ // during iteration, verify that each event is ~ Binom(n, 1/k) under normal approx
558+ let n_trials = 20 ;
559+ let stats_across_multinom_events = std:: iter:: repeat_with ( || {
560+ let samples = multinomial. sample ( & mut thread_rng ( ) ) ;
561+ samples. iter ( ) . enumerate ( ) . for_each ( |( i, & s) | {
562+ assert_abs_diff_eq ! ( s, n / k, epsilon = multinomial. variance( ) . unwrap( ) [ ( i, i) ] , )
563+ } ) ;
564+ samples
565+ } )
566+ . take ( n_trials)
567+ . map ( |sample| ( sample. mean ( ) , sample. population_variance ( ) ) ) ;
568+
569+ println ! ( "{:#?}" , stats_across_multinom_events. clone( ) . collect:: <Vec <_>>( ) ) ;
570+
571+ // claim: for X from a given trial, Var[X] ~ χ^2(k-1)
572+ // the variance across this multinomial sample should be against the true mean
573+ // Var[X] = sum (X_i - bar{X})^2 = sum (X_i - n/k)^2
574+ // alternatively, variance is linear, so we should also have
575+ // Var[X] = k Var[X_i] = k npq = k n (k-1)/k^2 = n (k-1)/k as these are iid Binom(n, 1/k)
576+ //
577+ // since parameters of the binomial variable are around np ~ 20, each binomial variable is approximately normal
578+ //
579+ // therefore, population variance should be a sum of squares of normal variables from their mean.
580+ // i.e. population variances of these multinomial samples should be a scaling of χ^2 squared variables
581+ // with k-1 dof
582+ //
583+ // normal approximation of χ^2(k-1) should be valid for k = 50, so assert against 2σ_normal
584+ for ( _mu, var) in stats_across_multinom_events {
558585 assert_abs_diff_eq ! (
559- s ,
560- n / k as f64 ,
561- epsilon = 2.0 * multinomial . variance ( ) . unwrap ( ) [ ( 0 , 0 ) ] . sqrt( ) ,
586+ var ,
587+ k - 1.0 ,
588+ epsilon = ( ( 2.0 * k - 2.0 ) /n_trials as f64 ) . sqrt( )
562589 )
563- } ) ;
564-
565- assert_abs_diff_eq ! (
566- samples. iter( ) . population_variance( ) ,
567- multinomial. variance( ) . unwrap( ) [ ( 0 , 0 ) ] ,
568- epsilon = n. sqrt( )
569- ) ;
590+ }
570591
571- assert_eq ! (
572- samples. into_iter( ) . map( |& x| x as u64 ) . sum:: <u64 >( ) ,
573- n as u64
574- ) ;
575592 }
576593}
577594
0 commit comments