Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion experiments/cn_vs_noisybn/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,4 +36,4 @@ n_infer: 1000 # Number of inferences to pe

# Other
seed: 42 # Global seed
num_cores: 'multiprocessing.cpu_count() - 1' # Number of threads to use for parallelization
num_cores: 'multiprocessing.cpu_count() - 1' # Number of threads to use for parallelization
7 changes: 5 additions & 2 deletions src/data.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from itertools import product
from pprint import pformat
from numpy.random import randint

import pyagrum as gum
from numpy.random import randint

from src.config import create_clean_dir, get_base_path, set_global_seed
from src.utils import compact_dict
Expand All @@ -26,7 +26,10 @@ def generate_naivebayes(config):

# Set BN (naive Bayes) structure
n_modmax = config["n_modmax"]
bn_str_gen = (f'{config["target_var"]}->X{i}[{randint(2, n_modmax+1)}]' for i in range(config["n_nodes"] - 1))
bn_str_gen = (
f'{config["target_var"]}->X{i}[{randint(2, n_modmax+1)}]'
for i in range(config["n_nodes"] - 1)
)
bn_str = "; ".join(bn_str_gen)

# For each model ...
Expand Down
6 changes: 2 additions & 4 deletions src/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from more_itertools import random_product

from src.config import get_base_path, set_global_seed
from src.utils import (add_counts_to_bn, get_min_max_bns, noisy_bn, safe_assert)
from src.utils import add_counts_to_bn, get_min_max_bns, noisy_bn, safe_assert


def run_inferences(exp, ess, eps, config):
Expand Down Expand Up @@ -128,9 +128,7 @@ def cpt_value(


# Get a naive Bayes log-joint
def nb_log_joint(
bn: gum.BayesNet, target: str, t: float, children: dict
) -> float:
def nb_log_joint(bn: gum.BayesNet, target: str, t: float, children: dict) -> float:
"""
Get log[P(target=t, children)] by exploiting the BN factorization.
The DAG is a naive Bayes with `target` a binary target variable.
Expand Down
97 changes: 55 additions & 42 deletions src/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def get_llr(x: dict, theta, theta_hat):


# Check BNs sampled from a CN
def are_all_bns_different(bn_vec) -> None:
def are_all_bns_different(bn_vec) -> bool:

signatures = set()
for bn in bn_vec:
Expand All @@ -46,6 +46,8 @@ def are_all_bns_different(bn_vec) -> None:

print(f"({len(signatures)}/{len(bn_vec)} different BNs.)")

return len(signatures) == len(bn_vec)


# Add counts of events to a BN
def add_counts_to_bn(bn, data):
Expand Down Expand Up @@ -133,11 +135,12 @@ def get_min_max_bns(cn, exp: str):


# Sample from a credal set K(x | pi_x), i.e., a constrained polytope.
def sample_from_cset(vec_min, vec_max):
def sample_from_cset(vec_min, vec_max, n_samples) -> list:
"""
A credal set is a polytope in a space of #X parameters, defined by a:
We assume a credal set is a polytope in a space of #X parameters, defined by a:
- Multi-dimensional rectangle, i.e., inequality constraint Ax <= b, and
- Hyperplane (provided all the variables sum up to 1), i.e., equality constraint A_eq x = b_eq.
In the case of local IDM, this is ensured.
"""

# Define the rectangle
Expand All @@ -158,78 +161,88 @@ def sample_from_cset(vec_min, vec_max):
# Sample from the polytope
mc = hopsy.MarkovChain(constrained_rectangle)
rng = hopsy.RandomNumberGenerator(42)
_, constrained_samples = hopsy.sample(mc, rng, n_samples=1, thinning=10)
constrained_samples = constrained_samples.flatten()
_, constrained_samples = hopsy.sample(mc, rng, n_samples, thinning=10)
constrained_samples = constrained_samples[0]

# Debug
safe_assert(np.all(vec_min <= vec_max))
safe_assert(n_par == len(vec_max))
safe_assert(n_par == A.shape[1])
safe_assert(n_par == A_eq.shape[1])
safe_assert(n_par == len(constrained_samples))
safe_assert(len(constrained_samples) == n_samples)
for i in constrained_samples:
safe_assert(len(i) == n_par)

return constrained_samples


# Sample from two esxtreme CPTs
def sample_from_cpts(cpt_min, cpt_max) -> np.array:
# Sample from two extreme CPTs
def sample_from_cpts(cpt_min, cpt_max, n_samples) -> list:

# Transform CPTs into pandas dataframes
cpt_min = np.atleast_2d(cpt_min.topandas())
cpt_max = np.atleast_2d(cpt_max.topandas())

# Sample conditional distributions
cpt_sample = []
# For each row in the CPT ...
credal_dict = {}
for row in range(cpt_min.shape[0]):

vec_min = cpt_min[row, :]
vec_max = cpt_max[row, :]
# ... sample `n_samples` points from the credal set
credal_dict[row] = sample_from_cset(cpt_min[row, :], cpt_max[row, :], n_samples)

# For each sample ...
cpt_samples = []
for i in range(n_samples):

# Sample from polytope
vec_sample = sample_from_cset(vec_min, vec_max)
cpt_sample.append(vec_sample)
# ... build the CPT
cpt = []
for row in range(cpt_min.shape[0]):
cpt.append(credal_dict[row][i])

cpt_sample = np.array(cpt_sample).flatten()
cpt = np.array(cpt).flatten()
cpt_samples.append(cpt)

# Debug
safe_assert(cpt_min.shape == cpt_max.shape)
safe_assert(cpt_min.shape == cpt_max.shape)
safe_assert(prod(cpt_min.shape) == prod(cpt_max.shape))
safe_assert(len(cpt_sample) == prod(cpt_min.shape))
safe_assert(len(credal_dict) == prod(cpt_min.shape))
safe_assert(len(cpt_samples) == n_samples)

return cpt_sample
return cpt_samples


# BNs sampler from a CN
def sample_from_cn(cn, exp: str, n: int):
def sample_from_cn(cn, exp: str, n_samples: int) -> list:

# Get the DAG and extreme BNs
dag = gum.BayesNet(cn.current_bn())
bn_min, bn_max = get_min_max_bns(cn, exp)

# Draw n random BNs
# For each variable ...
cpts_dict = {}
for var in dag.names():

# ... sample `n_samples` CPTs from the CN
cpts_dict[var] = sample_from_cpts(bn_min.cpt(var), bn_max.cpt(var), n_samples)

# For each sample ...
bns = []
for _ in range(n):
for i in range(n_samples):

# Init an empty BN
# ... init an empty BN ...
bn = gum.BayesNet(dag)

# For each variable ...
# ... and fill its CPTs
for var in dag.names():

# ... sample from the CN CPT, ...
cpt_sample = sample_from_cpts(bn_min.cpt(var), bn_max.cpt(var))

# ... and fill the BN's CPT
bn.cpt(var).fillWith(cpt_sample)
bn.cpt(var).fillWith(cpts_dict[var][i])

bns.append(bn)

# Debug
safe_assert(check_consistency(bn, bn_min, bn_max))

# Debug
safe_assert(n == len(bns))
safe_assert(len(cpts_dict) == len(dag.names()))
safe_assert(len(bns) == n_samples)

return bns

Expand All @@ -239,27 +252,27 @@ def check_consistency(bn, bn_min, bn_max) -> bool:

for var in bn.names():
bn_cpt = np.atleast_2d(bn.cpt(var).topandas())
bn_min_cpt = np.array(bn_min.cpt(var).topandas())
bn_max_cpt = np.array(bn_max.cpt(var).topandas())
bn_min_cpt = np.atleast_2d(bn_min.cpt(var).topandas())
bn_max_cpt = np.atleast_2d(bn_max.cpt(var).topandas())

# Check if probabilities sum to 1
sum_vec = np.sum(bn_cpt, axis=1)
probability_integrity = np.all(np.abs(sum_vec - 1) < 1e-5)
probability_consistency = np.all(np.abs(sum_vec - 1) < 1e-5)

# Check if the BN CPT is >= min CPT
min_integrity = np.all(bn_cpt >= bn_min_cpt)
min_consistency = np.all(bn_cpt >= bn_min_cpt)

# Check if the BN CPT is <= max CPT
max_integrity = np.all(bn_cpt <= bn_max_cpt)
max_consistency = np.all(bn_cpt <= bn_max_cpt)

integrity = probability_integrity and min_integrity and max_integrity
consistency = probability_consistency and min_consistency and max_consistency

if integrity:
if consistency:
continue
else:
print("probability_integrity: ", probability_integrity)
print("min_integrity: ", min_integrity)
print("max_integrity: ", max_integrity)
print("probability_consistency: ", probability_consistency)
print("min_consistency: ", min_consistency)
print("max_consistency: ", max_consistency)
return False

return True
Loading