Niccolo-Rocchi · Niccolo-Rocchi · Nov 7, 2025 · Nov 7, 2025 · Nov 7, 2025 · Nov 7, 2025
diff --git a/experiments/cn_vs_noisybn/config.yaml b/experiments/cn_vs_noisybn/config.yaml
@@ -36,4 +36,4 @@ n_infer: 1000                                       # Number of inferences to pe
 
 # Other
 seed: 42                                            # Global seed
-num_cores: 'multiprocessing.cpu_count() - 1'        # Number of threads to use for parallelization
+num_cores: 'multiprocessing.cpu_count() - 1'        # Number of threads to use for parallelization
diff --git a/src/data.py b/src/data.py
@@ -1,8 +1,8 @@
 from itertools import product
 from pprint import pformat
-from numpy.random import randint
 
 import pyagrum as gum
+from numpy.random import randint
 
 from src.config import create_clean_dir, get_base_path, set_global_seed
 from src.utils import compact_dict
@@ -26,7 +26,10 @@ def generate_naivebayes(config):
 
     # Set BN (naive Bayes) structure
     n_modmax = config["n_modmax"]
-    bn_str_gen = (f'{config["target_var"]}->X{i}[{randint(2, n_modmax+1)}]' for i in range(config["n_nodes"] - 1))
+    bn_str_gen = (
+        f'{config["target_var"]}->X{i}[{randint(2, n_modmax+1)}]'
+        for i in range(config["n_nodes"] - 1)
+    )
     bn_str = "; ".join(bn_str_gen)
 
     # For each model ...

diff --git a/src/inference.py b/src/inference.py
@@ -6,7 +6,7 @@
 from more_itertools import random_product
 
 from src.config import get_base_path, set_global_seed
-from src.utils import (add_counts_to_bn, get_min_max_bns, noisy_bn, safe_assert)
+from src.utils import add_counts_to_bn, get_min_max_bns, noisy_bn, safe_assert
 
 
 def run_inferences(exp, ess, eps, config):
@@ -128,9 +128,7 @@ def cpt_value(
 
 
 # Get a naive Bayes log-joint
-def nb_log_joint(
-    bn: gum.BayesNet, target: str, t: float, children: dict
-) -> float:
+def nb_log_joint(bn: gum.BayesNet, target: str, t: float, children: dict) -> float:
     """
     Get log[P(target=t, children)] by exploiting the BN factorization.
     The DAG is a naive Bayes with `target` a binary target variable.

diff --git a/src/utils.py b/src/utils.py
@@ -32,7 +32,7 @@ def get_llr(x: dict, theta, theta_hat):
 
 
 # Check BNs sampled from a CN
-def are_all_bns_different(bn_vec) -> None:
+def are_all_bns_different(bn_vec) -> bool:
 
     signatures = set()
     for bn in bn_vec:
@@ -46,6 +46,8 @@ def are_all_bns_different(bn_vec) -> None:
 
     print(f"({len(signatures)}/{len(bn_vec)} different BNs.)")
 
+    return len(signatures) == len(bn_vec)
+
 
 # Add counts of events to a BN
 def add_counts_to_bn(bn, data):
@@ -133,11 +135,12 @@ def get_min_max_bns(cn, exp: str):
 
 
 # Sample from a credal set K(x | pi_x), i.e., a constrained polytope.
-def sample_from_cset(vec_min, vec_max):
+def sample_from_cset(vec_min, vec_max, n_samples) -> list:
     """
-    A credal set is a polytope in a space of #X parameters, defined by a:
+    We assume a credal set is a polytope in a space of #X parameters, defined by a:
      - Multi-dimensional rectangle, i.e., inequality constraint Ax <= b, and
      - Hyperplane (provided all the variables sum up to 1), i.e., equality constraint A_eq x = b_eq.
+    In the case of local IDM, this is ensured.
     """
 
     # Define the rectangle
@@ -158,78 +161,88 @@ def sample_from_cset(vec_min, vec_max):
     # Sample from the polytope
     mc = hopsy.MarkovChain(constrained_rectangle)
     rng = hopsy.RandomNumberGenerator(42)
-    _, constrained_samples = hopsy.sample(mc, rng, n_samples=1, thinning=10)
-    constrained_samples = constrained_samples.flatten()
+    _, constrained_samples = hopsy.sample(mc, rng, n_samples, thinning=10)
+    constrained_samples = constrained_samples[0]
 
     # Debug
     safe_assert(np.all(vec_min <= vec_max))
     safe_assert(n_par == len(vec_max))
     safe_assert(n_par == A.shape[1])
     safe_assert(n_par == A_eq.shape[1])
-    safe_assert(n_par == len(constrained_samples))
+    safe_assert(len(constrained_samples) == n_samples)
+    for i in constrained_samples:
+        safe_assert(len(i) == n_par)
 
     return constrained_samples
 
 
-# Sample from two esxtreme CPTs
-def sample_from_cpts(cpt_min, cpt_max) -> np.array:
+# Sample from two extreme CPTs
+def sample_from_cpts(cpt_min, cpt_max, n_samples) -> list:
 
     # Transform CPTs into pandas dataframes
     cpt_min = np.atleast_2d(cpt_min.topandas())
     cpt_max = np.atleast_2d(cpt_max.topandas())
 
-    # Sample conditional distributions
-    cpt_sample = []
+    # For each row in the CPT ...
+    credal_dict = {}
     for row in range(cpt_min.shape[0]):
 
-        vec_min = cpt_min[row, :]
-        vec_max = cpt_max[row, :]
+        # ... sample `n_samples` points from the credal set
+        credal_dict[row] = sample_from_cset(cpt_min[row, :], cpt_max[row, :], n_samples)
+
+    # For each sample ...
+    cpt_samples = []
+    for i in range(n_samples):
 
-        # Sample from polytope
-        vec_sample = sample_from_cset(vec_min, vec_max)
-        cpt_sample.append(vec_sample)
+        # ... build the CPT
+        cpt = []
+        for row in range(cpt_min.shape[0]):
+            cpt.append(credal_dict[row][i])
 
-    cpt_sample = np.array(cpt_sample).flatten()
+        cpt = np.array(cpt).flatten()
+        cpt_samples.append(cpt)
 
     # Debug
     safe_assert(cpt_min.shape == cpt_max.shape)
-    safe_assert(cpt_min.shape == cpt_max.shape)
-    safe_assert(prod(cpt_min.shape) == prod(cpt_max.shape))
-    safe_assert(len(cpt_sample) == prod(cpt_min.shape))
+    safe_assert(len(credal_dict) == prod(cpt_min.shape))
+    safe_assert(len(cpt_samples) == n_samples)
 
-    return cpt_sample
+    return cpt_samples
 
 
 # BNs sampler from a CN
-def sample_from_cn(cn, exp: str, n: int):
+def sample_from_cn(cn, exp: str, n_samples: int) -> list:
 
     # Get the DAG and extreme BNs
     dag = gum.BayesNet(cn.current_bn())
     bn_min, bn_max = get_min_max_bns(cn, exp)
 
-    # Draw n random BNs
+    # For each variable ...
+    cpts_dict = {}
+    for var in dag.names():
+
+        # ... sample `n_samples` CPTs from the CN
+        cpts_dict[var] = sample_from_cpts(bn_min.cpt(var), bn_max.cpt(var), n_samples)
+
+    # For each sample ...
     bns = []
-    for _ in range(n):
+    for i in range(n_samples):
 
-        # Init an empty BN
+        # ... init an empty BN ...
         bn = gum.BayesNet(dag)
 
-        # For each variable ...
+        # ... and fill its CPTs
         for var in dag.names():
-
-            # ... sample from the CN CPT, ...
-            cpt_sample = sample_from_cpts(bn_min.cpt(var), bn_max.cpt(var))
-
-            # ... and fill the BN's CPT
-            bn.cpt(var).fillWith(cpt_sample)
+            bn.cpt(var).fillWith(cpts_dict[var][i])
 
         bns.append(bn)
 
         # Debug
         safe_assert(check_consistency(bn, bn_min, bn_max))
 
     # Debug
-    safe_assert(n == len(bns))
+    safe_assert(len(cpts_dict) == len(dag.names()))
+    safe_assert(len(bns) == n_samples)
 
     return bns
 
@@ -239,27 +252,27 @@ def check_consistency(bn, bn_min, bn_max) -> bool:
 
     for var in bn.names():
         bn_cpt = np.atleast_2d(bn.cpt(var).topandas())
-        bn_min_cpt = np.array(bn_min.cpt(var).topandas())
-        bn_max_cpt = np.array(bn_max.cpt(var).topandas())
+        bn_min_cpt = np.atleast_2d(bn_min.cpt(var).topandas())
+        bn_max_cpt = np.atleast_2d(bn_max.cpt(var).topandas())
 
         # Check if probabilities sum to 1
         sum_vec = np.sum(bn_cpt, axis=1)
-        probability_integrity = np.all(np.abs(sum_vec - 1) < 1e-5)
+        probability_consistency = np.all(np.abs(sum_vec - 1) < 1e-5)
 
         # Check if the BN CPT is >= min CPT
-        min_integrity = np.all(bn_cpt >= bn_min_cpt)
+        min_consistency = np.all(bn_cpt >= bn_min_cpt)
 
         # Check if the BN CPT is <= max CPT
-        max_integrity = np.all(bn_cpt <= bn_max_cpt)
+        max_consistency = np.all(bn_cpt <= bn_max_cpt)
 
-        integrity = probability_integrity and min_integrity and max_integrity
+        consistency = probability_consistency and min_consistency and max_consistency
 
-        if integrity:
+        if consistency:
             continue
         else:
-            print("probability_integrity: ", probability_integrity)
-            print("min_integrity: ", min_integrity)
-            print("max_integrity: ", max_integrity)
+            print("probability_consistency: ", probability_consistency)
+            print("min_consistency: ", min_consistency)
+            print("max_consistency: ", max_consistency)
             return False
 
     return True