GispoCoding · zelioluca · Oct 11, 2023 · Jan 11, 2024 · Jan 11, 2024 · Jan 11, 2024
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,5 @@
+.idea/*
+
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]

diff --git a/.idea/.gitignore b/.idea/.gitignore
diff --git a/.idea/eis_toolkit.iml b/.idea/eis_toolkit.iml
diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml
diff --git a/.idea/misc.xml b/.idea/misc.xml
diff --git a/.idea/modules.xml b/.idea/modules.xml
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
diff --git a/docs/bayesian_nn.md b/docs/bayesian_nn.md
@@ -0,0 +1,30 @@
+### This document is taken from the official EIS TOOLKIT Docs
+
+In traditional deep neural networks (DNN), the trained weights of a DNN are a point estimate for each parameter, thereby 
+producing deterministic network outputs for a given input. On the other hand, Bayesian neural networks (BNN) establish 
+a probabilistic distribution over the weight parameters. The process of estimating the posterior weight distribution 
+enables Bayesian neural networks (BNNs) to capture and quantify uncertainty in their predictions effectively 
+(Olivier et al.,2021). Measuring uncertainty can help identify when a test input's predictions are noisy because 
+it deviates from the training distribution or because the model cannot account for unknown causes. The point 
+predictions generated by Deep Neural Networks (DNNs) do not provide information regarding the inherent features 
+of the input.
+
+The total uncertainty in the prediction, referred to as the predictive uncertainty, is comprised of both epistemic 
+and aleatoric uncertainty (Joachims, 2021). Epistemic uncertainty refers to uncertainty within the model parameters. 
+It is reducible uncertainty, which means that it can be reduced by collecting more data or improving the model. This 
+can be viewed as the posterior weight distribution, in which a peaked posterior distribution reflects lower epistemic 
+uncertainty, while a broader posterior distribution shows higher epistemic uncertainty. For the given input point and 
+fixed weight parameters, higher aleatoric uncertainty denotes a noisy and uncertain estimate of the model's output. 
+In these cases, the model shows a lack of confidence in its forecasts, which is indicative of significant uncertainty 
+or variation in the evaluated results.
+
+The implementation of a Bayesian Neural Network (BNN) involves the utilization of probabilistic (e.g., DenseVariational) 
+layers to define the posterior and prior distributions over the weights of the model, hence enabling the estimation of 
+uncertainty (Chang et al., 2021). This methodology enables the neural network to effectively include the inherent 
+uncertainty present in both the data and the model, resulting in the generation of a probability distribution 
+including a range of potential outputs, as opposed to a singular deterministic prediction. By utilizing probabilistic 
+layers, the neural network is capable of preserving and enhancing distribution over every weight. This approach 
+allows for a more comprehensive representation that encompasses the uncertainty and variability in both the model 
+parameters and predictions. Consequently, it leads to the development of a more resilient and informative model, 
+particularly in situations where there is uncertainty or limited data availability.
+
diff --git a/eis_toolkit/bayesian_nn/__init__.py b/eis_toolkit/bayesian_nn/__init__.py
diff --git a/eis_toolkit/bayesian_nn/bayesian_nn.py b/eis_toolkit/bayesian_nn/bayesian_nn.py
@@ -0,0 +1,300 @@
+from typing import Literal, Union
+
+import numpy as np
+import pandas as pd
+import tensorflow as tf
+import tensorflow_probability as tfp
+
+from eis_toolkit.exceptions import InvalidInputDataException
+
+
+def __posterior_mean_field(
+    kernel_size=int or tuple[int, int],
+    bias_size: int = 0,
+    reinterpreted_batch_ndims: int = 1,
+    scale: float = 1e-5,
+    dtype=None,
+) -> tf.keras.Sequential():
+    """
+    Do the posterior mean field.
+
+    Parameters:
+    - kernel_size: integer or a tuple of integer of the filter dimension.
+    - bias_size: the size of the bias default parameters is zero.
+    - dtype: datatype of the layer default is float 32.
+    - scale: scale of the normal distribution.
+    - reinterpreted_batch_ndims: the reinterpreted dimension of the batch.
+
+    Returns:
+    - a sequential model the posteriori distribution.
+    """
+    n = kernel_size + bias_size
+    c = np.log(np.expm1(1.0))
+    return tf.keras.Sequential(
+        [
+            tfp.layers.VariableLayer(2 * n, dtype=dtype),
+            tfp.layers.DistributionLambda(
+                lambda t: tfp.distributions.Independent(
+                    tfp.distributions.Normal(loc=t[..., :n], scale=scale + tf.nn.softplus(c + t[..., n:])),
+                    reinterpreted_batch_ndims=1,
+                )
+            ),
+        ]
+    )
+
+
+def __prior_trainable(
+    kernel_size,
+    bias_size=0,
+    dtype=None,
+    scale: float = 1.0,
+    reinterpreted_batch_ndims: int = 1,
+) -> tf.keras.Sequential:
+    """
+    Do the learns of the optimal parameter for the bayesian NN.
+
+    Parameters:
+    - kernel_size: integer or a tuple of integer of the filter dimension.
+    - bias_size: the size of the bias default parameters is zero.
+    - dtype: datatype of the layer default is float 32.
+    - scale: scale of the normal distribution.
+    - reinterpreted_batch_ndims: the reinterpreted dimension of the batch.
+
+    Returns:
+    - trainable model
+    """
+    n = kernel_size + bias_size
+    return tf.keras.Sequential(
+        [
+            tfp.layers.VariableLayer(n, dtype=dtype),
+            tfp.layers.DistributionLambda(
+                lambda t: tfp.distributions.Independent(
+                    tfp.distributions.Normal(loc=t, scale=scale), reinterpreted_batch_ndims=reinterpreted_batch_ndims
+                )
+            ),
+        ]
+    )
+
+
+def __create_probabilistic_bnn_model(
+    train_size: int,
+    hidden_units: list[int],
+    features_name: list[str or int],
+    last_activation: Literal["softmax", "sigmoid"],
+) -> tf.keras.Model:
+    """
+    Do the bayesian model.
+
+    Parameters:
+    - train_size: the train size.
+    - hidden_units: number of the layers of the network.
+    - features_name: name of the feature to impute to the network.
+    - last_activation: the output activation of the network.
+
+    Returns:
+    - the model before compilation
+    """
+    inputs = {}
+
+    for feature_name in features_name:
+        inputs[feature_name] = tf.keras.layers.Input(name=feature_name, shape=(1,), dtype=tf.float32)
+
+    features = tf.keras.layers.Concatenate(axis=-1)(list(inputs.values()))
+    features = tf.keras.layers.BatchNormalization()(features)
+
+    for units in hidden_units:
+        features = tfp.layers.DenseVariational(
+            units=units,
+            make_prior_fn=__prior_trainable,
+            make_posterior_fn=__posterior_mean_field,
+            kl_weight=1 / train_size,
+            activation=last_activation,
+        )(features)
+
+    distribution_params = tf.keras.layers.Dense(units=2)(features)
+    outputs = tfp.layers.IndependentNormal(1)(distribution_params)
+
+    model = tf.keras.Model(inputs=inputs, outputs=outputs)
+    return model
+
+
+def negative_loglikelihood(targets: tf.Tensor, estimated_distribution: tf.Tensor) -> tf.Tensor:
+    """
+    Do the negative likelihood loss.
+
+    Parameters:
+    - targets: real labels needed to compute this loss.
+    - estimated_distribution: the predicted labels needed to compute this loss.
+
+    Returns:
+    - negative log probabilities of the class
+    """
+    return -estimated_distribution.log_prob(targets)
+
+
+def generate_prediction_using_traditional_arrays(
+    X_train: dict[str, np.ndarray],
+    y_train: np.ndarray,
+    X_test: dict[str, np.ndarray],
+    y_test: np.ndarray,
+    validation_split: float or None,
+    features_name: list[str or int],
+    last_activation: Literal["softmax", "sigmoid"],
+    hidden_units: list[int],
+    batch_size: int,
+    num_epochs: int,
+    optimizer: Union[
+        tf.keras.optimizers.Adam, tf.keras.optimizers.Nadam, tf.keras.optimizers.RMSprop, tf.keras.optimizers.SGD
+    ],
+    loss: Union[tf.keras.losses.BinaryCrossentropy, tf.keras.losses.CategoricalCrossentropy, negative_loglikelihood],
+    metrics: Union[tf.keras.metrics.RootMeanSquaredError, tf.keras.metrics.Accuracy],
+) -> list[dict[str, any]] or tf.keras.Model:
+    """
+    Compute inferences and generate predictions with the bayesian model.
+
+    Parameters:
+    - X_train: the portion of the dataset used for training.
+    - X_test: the portion of the dataset used for testing.
+    - y_test: labels used for training.
+    - y_test: label used to report the observed data.
+    - validation_split: the amount of data to set as validation test (in float).
+    - features_name: a list of features name or number.
+    - last_activation: the output of the model.
+    - hidden_units: the number of the networks layer.
+    - batch_size: the batch size.
+    - num_epochs: the number of epochs.
+    - optimizer: optimizer of the network.
+    - loss: measure the error between the predicted and true values.
+    - metrics: performance of the model.
+    Raise:
+    - InvalidInputDataException: when the input data is None or invalid.
+    Returns:
+    - a list of dict that contains, predicted mean, std, CI lower and upper, the actual value. In the case there is
+      a test set, otherwise return the model for further analysis.
+    """
+
+    if X_train is None or y_train is None:
+        raise InvalidInputDataException
+
+    # here create the probabilistic model
+    prob_bnn_model = __create_probabilistic_bnn_model(
+        train_size=len(X_train),
+        hidden_units=hidden_units,
+        features_name=features_name,
+        last_activation=last_activation,
+    )
+
+    prob_bnn_model.compile(optimizer=optimizer, loss=loss, metrics=[metrics])
+
+    prob_bnn_model.fit(
+        X_train,
+        y_train,
+        epochs=num_epochs,
+        batch_size=batch_size,
+        validation_split=validation_split if validation_split is not None else None,
+        verbose=1,
+    )
+
+    if X_test is not None:
+        results = []
+        prediction_distribution = prob_bnn_model(X_test)
+        prediction_mean = prediction_distribution.mean().numpy().tolist()
+        prediction_stdv = prediction_distribution.stddev().numpy()
+
+        upper = (prediction_mean + (1.96 * prediction_stdv)).tolist()
+        lower = (prediction_mean - (1.96 * prediction_stdv)).tolist()
+        prediction_stdv = prediction_stdv.tolist()
+
+        for idx in range(len(prediction_mean)):
+            results.append(
+                {
+                    "mean": round(prediction_mean[idx][0], 2),
+                    "stddev": round(prediction_stdv[idx][0], 2),
+                    "95% CI lower": round(lower[idx][0], 2),
+                    "95% CI upper": round(upper[idx][0], 2),
+                    "Actual": y_test[idx],
+                }
+            )
+
+        return results
+    else:
+        return prob_bnn_model
+
+
+def generate_predictions_with_tensor_api(
+    train_dataset: Union[tf.data.Dataset, pd.DataFrame],
+    test_dataset: Union[tf.data.Dataset, pd.DataFrame, None],
+    features_name: list[str or int],
+    last_activation: Literal["softmax", "sigmoid"],
+    hidden_units: list[int],
+    batch_size: int,
+    num_epochs: int,
+    optimizer: Union[
+        tf.keras.optimizers.Adam, tf.keras.optimizers.Nadam, tf.keras.optimizers.RMSprop, tf.keras.optimizers.SGD
+    ],
+    loss: Union[tf.keras.losses.BinaryCrossentropy, tf.keras.losses.CategoricalCrossentropy, negative_loglikelihood],
+    metrics: Union[tf.keras.metrics.RootMeanSquaredError, tf.keras.metrics.Accuracy],
+) -> list[dict[str, any]] or tf.keras.Model:
+    """
+    Compute inferences and generate predictions with the bayesian model.
+
+    Parameters:
+    - train_dataset: the portion of the dataset used for training.
+    - test_dataset: the portion of the dataset used for testing.
+    - features_name: a list of features name or number.
+    - last_activation: the output of the model.
+    - hidden_units: the number of the networks layer.
+    - batch_size: the batch size.
+    - num_epochs: the number of epochs.
+    - optimizer: optimizer of the network.
+    - loss: measure the error between the predicted and true values.
+    - metrics: performance of the model.
+    Raise:
+    - InvalidInputDataException: when the input data is None or invalid.
+    Returns:
+    - a list of dict that contains, predicted mean, std, CI lower and upper, the actual value. In the case there is
+      a test set, otherwise return the model for further analysis.
+    """
+
+    if train_dataset is None:
+        raise InvalidInputDataException
+
+    # here create the probabilistic model
+    prob_bnn_model = __create_probabilistic_bnn_model(
+        train_size=len(train_dataset),
+        hidden_units=hidden_units,
+        features_name=features_name,
+        last_activation=last_activation,
+    )
+
+    prob_bnn_model.compile(optimizer=optimizer, loss=loss, metrics=[metrics])
+
+    prob_bnn_model.fit(train_dataset, epochs=num_epochs, verbose=1)
+
+    if test_dataset is not None:
+        results = []
+        sample = test_dataset.cardinality().numpy() * batch_size
+        examples, targets = list(test_dataset.unbatch().shuffle(batch_size * 10).batch(sample))[0]
+        prediction_distribution = prob_bnn_model(examples)
+
+        prediction_mean = prediction_distribution.mean().numpy().tolist()
+        prediction_stdv = prediction_distribution.stddev().numpy()
+
+        upper = (prediction_mean + (1.96 * prediction_stdv)).tolist()
+        lower = (prediction_mean - (1.96 * prediction_stdv)).tolist()
+        prediction_stdv = prediction_stdv.tolist()
+
+        for idx in range(len(prediction_mean)):
+            results.append(
+                {
+                    "mean": round(prediction_mean[idx][0], 2),
+                    "stddev": round(prediction_stdv[idx][0], 2),
+                    "95% CI lower": round(lower[idx][0], 2),
+                    "95% CI upper": round(upper[idx][0], 2),
+                    "Actual": targets[idx].numpy(),
+                }
+            )
+
+        return results
+    else:
+        return prob_bnn_model
diff --git a/eis_toolkit/exceptions.py b/eis_toolkit/exceptions.py
@@ -72,3 +72,11 @@ class NonSquarePixelSizeException(Exception):
 
 class NumericValueSignException(Exception):
     """Exception error class for numeric value sign exception."""
+
+
+class InvalidNumberOfConv2DLayer(Exception):
+    """Exception throws when when the list of conv layers is <= 0."""
+
+
+class InvalidInputDataException(Exception):
+    """Exception throws when the input data is None or invalid."""
diff --git a/eis_toolkit/unet/__init__.py b/eis_toolkit/unet/__init__.py