diff --git a/docs/software.rst b/docs/software.rst index 186e2020..e5974ffd 100644 --- a/docs/software.rst +++ b/docs/software.rst @@ -238,4 +238,4 @@ Release Notes * v0.4.1 - package updates * v0.4.2 - validation script in Python * v0.4.3 - allow non-binary incidence - * v0.5 - support for multiprocessing \ No newline at end of file + * v0.5 - support for multiprocessing diff --git a/example_survey_weighting/configs/settings.yaml b/example_survey_weighting/configs/settings.yaml index 1254a73c..4072ab16 100755 --- a/example_survey_weighting/configs/settings.yaml +++ b/example_survey_weighting/configs/settings.yaml @@ -18,7 +18,8 @@ USE_SIMUL_INTEGERIZER: True USE_CVXPY: False max_expansion_factor: 4 # Default is 30 min_expansion_factor: 0.5 - +absolute_upper_bounds: 20000 +absolute_lower_bounds: 1 # Geographic Settings # ------------------------------------------------------------------ diff --git a/populationsim/balancer.py b/populationsim/balancer.py index fef1c172..e0e3702d 100644 --- a/populationsim/balancer.py +++ b/populationsim/balancer.py @@ -242,6 +242,7 @@ def np_balancer( def do_balancing(control_spec, total_hh_control_col, max_expansion_factor, min_expansion_factor, + absolute_upper_bound, absolute_lower_bound, incidence_df, control_totals, initial_weights): # incidence table should only have control columns @@ -262,14 +263,21 @@ def do_balancing(control_spec, if min_expansion_factor: - # number_of_households in this seed geograpy as specified in seed_controlss + # number_of_households in this seed geograpy as specified in seed_controls number_of_households = control_totals[total_hh_control_index] total_weights = initial_weights.sum() lb_ratio = min_expansion_factor * float(number_of_households) / float(total_weights) lb_weights = initial_weights * lb_ratio - lb_weights = lb_weights.clip(lower=0) + + if absolute_lower_bound: + lb_weights = lb_weights.clip(lower=absolute_lower_bound) + else: + lb_weights = lb_weights.clip(lower=0) + + elif absolute_lower_bound: + lb_weights = initial_weights.clip(lower=absolute_lower_bound) else: lb_weights = None @@ -283,7 +291,14 @@ def do_balancing(control_spec, ub_ratio = max_expansion_factor * float(number_of_households) / float(total_weights) ub_weights = initial_weights * ub_ratio - ub_weights = ub_weights.round().clip(lower=1).astype(int) + + if absolute_upper_bound: + ub_weights = ub_weights.round().clip(upper=absolute_upper_bound, lower=1).astype(int) + else: + ub_weights = ub_weights.round().clip(lower=1).astype(int) + + elif absolute_upper_bound: + ub_weights = ub_weights.round().clip(upper=absolute_upper_bound, lower=1).astype(int) else: ub_weights = None diff --git a/populationsim/steps/final_seed_balancing.py b/populationsim/steps/final_seed_balancing.py index c21d2df6..aea0ccb6 100644 --- a/populationsim/steps/final_seed_balancing.py +++ b/populationsim/steps/final_seed_balancing.py @@ -68,6 +68,8 @@ def final_seed_balancing(settings, crosswalk, control_spec, incidence_table): max_expansion_factor = settings.get('max_expansion_factor', None) min_expansion_factor = settings.get('min_expansion_factor', None) + absolute_upper_bound = settings.get('absolute_upper_bound', None) + absolute_lower_bound = settings.get('absolute_lower_bound', None) relaxation_factors = pd.DataFrame(index=seed_controls_df.columns.tolist()) @@ -86,6 +88,8 @@ def final_seed_balancing(settings, crosswalk, control_spec, incidence_table): total_hh_control_col=total_hh_control_col, max_expansion_factor=max_expansion_factor, min_expansion_factor=min_expansion_factor, + absolute_lower_bound=absolute_lower_bound, + absolute_upper_bound=absolute_upper_bound, incidence_df=seed_incidence_df, control_totals=seed_controls_df.loc[seed_id], initial_weights=seed_incidence_df['sample_weight']) diff --git a/populationsim/steps/initial_seed_balancing.py b/populationsim/steps/initial_seed_balancing.py index 96bf0f70..9bc384ca 100644 --- a/populationsim/steps/initial_seed_balancing.py +++ b/populationsim/steps/initial_seed_balancing.py @@ -65,6 +65,8 @@ def initial_seed_balancing(settings, crosswalk, control_spec, incidence_table): max_expansion_factor = settings.get('max_expansion_factor', None) min_expansion_factor = settings.get('min_expansion_factor', None) + absolute_upper_bound = settings.get('absolute_upper_bound', None) + absolute_lower_bound = settings.get('absolute_lower_bound', None) # run balancer for each seed geography weight_list = [] @@ -82,6 +84,8 @@ def initial_seed_balancing(settings, crosswalk, control_spec, incidence_table): total_hh_control_col=total_hh_control_col, max_expansion_factor=max_expansion_factor, min_expansion_factor=min_expansion_factor, + absolute_upper_bound=absolute_upper_bound, + absolute_lower_bound=absolute_lower_bound, incidence_df=seed_incidence_df, control_totals=seed_controls_df.loc[seed_id], initial_weights=seed_incidence_df['sample_weight']) diff --git a/populationsim/steps/repop_balancing.py b/populationsim/steps/repop_balancing.py index 6a46376c..9299facd 100644 --- a/populationsim/steps/repop_balancing.py +++ b/populationsim/steps/repop_balancing.py @@ -60,6 +60,8 @@ def repop_balancing(settings, crosswalk, control_spec, incidence_table): max_expansion_factor = settings.get('max_expansion_factor', None) min_expansion_factor = settings.get('min_expansion_factor', None) + absolute_upper_bound = settings.get('absolute_upper_bound', None) + absolute_lower_bound = settings.get('absolute_lower_bound', None) # run balancer for each low geography low_weight_list = [] @@ -101,6 +103,8 @@ def repop_balancing(settings, crosswalk, control_spec, incidence_table): total_hh_control_col=total_hh_control_col, max_expansion_factor=max_expansion_factor, min_expansion_factor=min_expansion_factor, + absolute_upper_bound=absolute_upper_bound, + absolute_lower_bound=absolute_lower_bound, incidence_df=seed_incidence_df, control_totals=low_controls_df.loc[low_id], initial_weights=initial_weights)