Tweaked plotting code.

ralager · ralager · commit edb43ecb86e0 · 2021-04-08T13:49:19.000-06:00
diff --git a/ml4convection/figures/make_permutation_figure.py b/ml4convection/figures/make_permutation_figure.py
@@ -0,0 +1,266 @@
+"""Makes 4-panel figure to show results of permutation test."""
+
+import os
+import argparse
+import numpy
+import matplotlib
+matplotlib.use('agg')
+import matplotlib.pyplot as pyplot
+from gewittergefahr.gg_utils import file_system_utils
+from gewittergefahr.deep_learning import permutation_utils as gg_permutation
+from gewittergefahr.plotting import permutation_plotting
+from gewittergefahr.plotting import imagemagick_utils
+from ml4convection.machine_learning import permutation
+
+BAR_FACE_COLOUR = numpy.array([27, 158, 119], dtype=float) / 255
+
+FIGURE_WIDTH_INCHES = 15.
+FIGURE_HEIGHT_INCHES = 15.
+FIGURE_RESOLUTION_DPI = 300
+CONCAT_FIGURE_SIZE_PX = int(1e7)
+
+PREDICTOR_NAME_TO_VERBOSE = {
+    'Band 8': r'Band 8 (6.25 $\mu$m)',
+    'Band 9': r'Band 9 (6.95 $\mu$m)',
+    'Band 10': r'Band 10 (7.35 $\mu$m)',
+    'Band 11': r'Band 11 (8.60 $\mu$m)',
+    'Band 13': r'Band 13 (10.45 $\mu$m)',
+    'Band 14': r'Band 14 (11.20 $\mu$m)',
+    'Band 16': r'Band 16 (13.30 $\mu$m)'
+}
+
+FORWARD_FILE_ARG_NAME = 'input_forward_file_name'
+BACKWARDS_FILE_ARG_NAME = 'input_backwards_file_name'
+NUM_PREDICTORS_ARG_NAME = 'num_predictors_to_plot'
+CONFIDENCE_LEVEL_ARG_NAME = 'confidence_level'
+OUTPUT_DIR_ARG_NAME = 'output_dir_name'
+
+FORWARD_FILE_HELP_STRING = (
+    'Path to file with results of forward test (will be read by '
+    '`permutation.read_file` in the ml4convection library).'
+)
+BACKWARDS_FILE_HELP_STRING = (
+    'Path to file with results of backwards test (will be read by '
+    '`permutation.read_file` in the ml4convection library).'
+)
+NUM_PREDICTORS_HELP_STRING = (
+    'Will plot only the `{0:s}` most important predictors in each panel.  To '
+    'plot all predictors, leave this argument alone.'
+).format(NUM_PREDICTORS_ARG_NAME)
+
+CONFIDENCE_LEVEL_HELP_STRING = (
+    'Confidence level for error bars (in range 0...1).'
+)
+OUTPUT_DIR_HELP_STRING = (
+    'Path to output directory (figures will be saved here).'
+)
+
+INPUT_ARG_PARSER = argparse.ArgumentParser()
+INPUT_ARG_PARSER.add_argument(
+    '--' + FORWARD_FILE_ARG_NAME, type=str, required=True,
+    help=FORWARD_FILE_HELP_STRING
+)
+INPUT_ARG_PARSER.add_argument(
+    '--' + BACKWARDS_FILE_ARG_NAME, type=str, required=True,
+    help=BACKWARDS_FILE_HELP_STRING
+)
+INPUT_ARG_PARSER.add_argument(
+    '--' + NUM_PREDICTORS_ARG_NAME, type=int, required=False, default=-1,
+    help=NUM_PREDICTORS_HELP_STRING
+)
+INPUT_ARG_PARSER.add_argument(
+    '--' + CONFIDENCE_LEVEL_ARG_NAME, type=float, required=False, default=0.95,
+    help=CONFIDENCE_LEVEL_HELP_STRING
+)
+INPUT_ARG_PARSER.add_argument(
+    '--' + OUTPUT_DIR_ARG_NAME, type=str, required=True,
+    help=OUTPUT_DIR_HELP_STRING
+)
+
+
+def _results_to_gg_format(permutation_dict):
+    """Converts permutation results from ml4rt format to GewitterGefahr format.
+
+    :param permutation_dict: Dictionary created by `run_forward_test` or
+        `run_backwards_test` in `ml4rt.machine_learning.permutation`.
+    :return: permutation_dict: Same but in format created by `run_forward_test`
+        or `run_backwards_test` in `gewittergefahr.deep_learning.permutation`.
+    """
+
+    permutation_dict[gg_permutation.ORIGINAL_COST_ARRAY_KEY] = (
+        permutation_dict[permutation.ORIGINAL_COST_KEY]
+    )
+
+    permutation_dict[gg_permutation.BACKWARDS_FLAG] = (
+        permutation_dict[permutation.BACKWARDS_FLAG_KEY]
+    )
+
+    permutation_dict[gg_permutation.BEST_PREDICTORS_KEY] = [
+        PREDICTOR_NAME_TO_VERBOSE[s] for s in
+        permutation_dict[permutation.BEST_PREDICTORS_KEY]
+    ]
+
+    permutation_dict[gg_permutation.STEP1_PREDICTORS_KEY] = [
+        PREDICTOR_NAME_TO_VERBOSE[s] for s in
+        permutation_dict[permutation.STEP1_PREDICTORS_KEY]
+    ]
+
+    return permutation_dict
+
+
+def _run(forward_file_name, backwards_file_name, num_predictors_to_plot,
+         confidence_level, output_dir_name):
+    """Makes 4-panel figure to show results of permutation test.
+
+    This is effectively the main method.
+
+    :param forward_file_name: See documentation at top of file.
+    :param backwards_file_name: Same.
+    :param num_predictors_to_plot: Same.
+    :param confidence_level: Same.
+    :param output_dir_name: Same.
+    """
+
+    if num_predictors_to_plot <= 0:
+        num_predictors_to_plot = None
+
+    file_system_utils.mkdir_recursive_if_necessary(
+        directory_name=output_dir_name
+    )
+
+    print('Reading data from: "{0:s}"...'.format(forward_file_name))
+    forward_permutation_dict = permutation.read_file(forward_file_name)
+    forward_permutation_dict = _results_to_gg_format(forward_permutation_dict)
+
+    print('Reading data from: "{0:s}"...'.format(backwards_file_name))
+    backwards_permutation_dict = permutation.read_file(backwards_file_name)
+    backwards_permutation_dict = _results_to_gg_format(
+        backwards_permutation_dict
+    )
+
+    figure_object, axes_object = pyplot.subplots(
+        1, 1, figsize=(FIGURE_WIDTH_INCHES, FIGURE_HEIGHT_INCHES)
+    )
+    permutation_plotting.plot_single_pass_test(
+        permutation_dict=forward_permutation_dict, axes_object=axes_object,
+        num_predictors_to_plot=num_predictors_to_plot,
+        plot_percent_increase=False, confidence_level=confidence_level,
+        bar_face_colour=BAR_FACE_COLOUR
+    )
+    axes_object.set_title('Single-pass forward')
+    axes_object.set_xlabel('')
+
+    this_file_name = '{0:s}/single_pass_forward.jpg'.format(output_dir_name)
+    panel_file_names = [this_file_name]
+
+    print('Saving figure to: "{0:s}"...'.format(panel_file_names[-1]))
+    figure_object.savefig(
+        panel_file_names[-1], dpi=FIGURE_RESOLUTION_DPI,
+        pad_inches=0, bbox_inches='tight'
+    )
+    pyplot.close(figure_object)
+
+    figure_object, axes_object = pyplot.subplots(
+        1, 1, figsize=(FIGURE_WIDTH_INCHES, FIGURE_HEIGHT_INCHES)
+    )
+    permutation_plotting.plot_multipass_test(
+        permutation_dict=forward_permutation_dict, axes_object=axes_object,
+        num_predictors_to_plot=num_predictors_to_plot,
+        plot_percent_increase=False, confidence_level=confidence_level,
+        bar_face_colour=BAR_FACE_COLOUR
+    )
+    axes_object.set_title('Multi-pass forward')
+    axes_object.set_xlabel('')
+    axes_object.set_ylabel('')
+
+    this_file_name = '{0:s}/multi_pass_forward.jpg'.format(output_dir_name)
+    panel_file_names.append(this_file_name)
+
+    print('Saving figure to: "{0:s}"...'.format(panel_file_names[-1]))
+    figure_object.savefig(
+        panel_file_names[-1], dpi=FIGURE_RESOLUTION_DPI,
+        pad_inches=0, bbox_inches='tight'
+    )
+    pyplot.close(figure_object)
+
+    figure_object, axes_object = pyplot.subplots(
+        1, 1, figsize=(FIGURE_WIDTH_INCHES, FIGURE_HEIGHT_INCHES)
+    )
+    permutation_plotting.plot_single_pass_test(
+        permutation_dict=backwards_permutation_dict, axes_object=axes_object,
+        num_predictors_to_plot=num_predictors_to_plot,
+        plot_percent_increase=False, confidence_level=confidence_level,
+        bar_face_colour=BAR_FACE_COLOUR
+    )
+    axes_object.set_title('Single-pass backward')
+    axes_object.set_xlabel('1 - FSS')
+
+    this_file_name = '{0:s}/single_pass_backward.jpg'.format(output_dir_name)
+    panel_file_names.append(this_file_name)
+
+    print('Saving figure to: "{0:s}"...'.format(panel_file_names[-1]))
+    figure_object.savefig(
+        panel_file_names[-1], dpi=FIGURE_RESOLUTION_DPI,
+        pad_inches=0, bbox_inches='tight'
+    )
+    pyplot.close(figure_object)
+
+    figure_object, axes_object = pyplot.subplots(
+        1, 1, figsize=(FIGURE_WIDTH_INCHES, FIGURE_HEIGHT_INCHES)
+    )
+    permutation_plotting.plot_multipass_test(
+        permutation_dict=backwards_permutation_dict, axes_object=axes_object,
+        num_predictors_to_plot=num_predictors_to_plot,
+        plot_percent_increase=False, confidence_level=confidence_level,
+        bar_face_colour=BAR_FACE_COLOUR
+    )
+    axes_object.set_title('Multi-pass backward')
+    axes_object.set_xlabel('1 - FSS')
+    axes_object.set_ylabel('')
+
+    this_file_name = '{0:s}/multi_pass_backward.jpg'.format(output_dir_name)
+    panel_file_names.append(this_file_name)
+
+    print('Saving figure to: "{0:s}"...'.format(panel_file_names[-1]))
+    figure_object.savefig(
+        panel_file_names[-1], dpi=FIGURE_RESOLUTION_DPI,
+        pad_inches=0, bbox_inches='tight'
+    )
+    pyplot.close(figure_object)
+
+    concat_figure_file_name = '{0:s}/permutation_test.jpg'.format(
+        output_dir_name
+    )
+    print('Concatenating panels to: "{0:s}"...'.format(concat_figure_file_name))
+
+    imagemagick_utils.concatenate_images(
+        input_file_names=panel_file_names,
+        output_file_name=concat_figure_file_name,
+        num_panel_rows=2, num_panel_columns=2
+    )
+    imagemagick_utils.trim_whitespace(
+        input_file_name=concat_figure_file_name,
+        output_file_name=concat_figure_file_name
+    )
+    imagemagick_utils.resize_image(
+        input_file_name=concat_figure_file_name,
+        output_file_name=concat_figure_file_name,
+        output_size_pixels=CONCAT_FIGURE_SIZE_PX
+    )
+
+    for this_file_name in panel_file_names:
+        os.remove(this_file_name)
+
+
+if __name__ == '__main__':
+    INPUT_ARG_OBJECT = INPUT_ARG_PARSER.parse_args()
+
+    _run(
+        forward_file_name=getattr(INPUT_ARG_OBJECT, FORWARD_FILE_ARG_NAME),
+        backwards_file_name=getattr(INPUT_ARG_OBJECT, BACKWARDS_FILE_ARG_NAME),
+        num_predictors_to_plot=getattr(
+            INPUT_ARG_OBJECT, NUM_PREDICTORS_ARG_NAME
+        ),
+        confidence_level=getattr(INPUT_ARG_OBJECT, CONFIDENCE_LEVEL_ARG_NAME),
+        output_dir_name=getattr(INPUT_ARG_OBJECT, OUTPUT_DIR_ARG_NAME)
+    )
diff --git a/ml4convection/machine_learning/neural_net.py b/ml4convection/machine_learning/neural_net.py
@@ -1,12 +1,13 @@
 """Methods for training and applying neural nets."""
 
 import copy
-import random
 import os.path
 import dill
 import numpy
+numpy.random.seed(6695)
 import keras
 import tensorflow
+tensorflow.random.set_seed(6695)
 import tensorflow.keras as tf_keras
 from gewittergefahr.gg_utils import file_system_utils
 from gewittergefahr.gg_utils import error_checking
@@ -1497,7 +1498,9 @@ def generator_full_grid(option_dict):
             ' are available.'
         )
 
-    random.shuffle(valid_date_strings)
+    valid_date_strings = numpy.array(valid_date_strings)
+    numpy.random.shuffle(valid_date_strings)
+    valid_date_strings = valid_date_strings.tolist()
     date_index = 0
 
     while True:
@@ -1743,7 +1746,8 @@ def train_model(
         num_validation_batches_per_epoch, validation_option_dict,
         mask_matrix, full_mask_matrix, loss_function_name, metric_names,
         do_early_stopping=True,
-        plateau_lr_multiplier=DEFAULT_LEARNING_RATE_MULTIPLIER):
+        plateau_lr_multiplier=DEFAULT_LEARNING_RATE_MULTIPLIER,
+        save_every_epoch=True):
     """Trains neural net on either full grid or partial grids.
 
     M = number of rows in full grid
@@ -1785,6 +1789,8 @@ def train_model(
     :param plateau_lr_multiplier: Multiplier for learning rate.  Learning
         rate will be multiplied by this factor upon plateau in validation
         performance.
+    :param save_every_epoch: Boolean flag.  If True, will save new model after
+        every epoch.
     """
 
     file_system_utils.mkdir_recursive_if_necessary(
@@ -1799,6 +1805,7 @@ def train_model(
     error_checking.assert_is_integer(num_validation_batches_per_epoch)
     error_checking.assert_is_geq(num_validation_batches_per_epoch, 2)
     error_checking.assert_is_boolean(do_early_stopping)
+    error_checking.assert_is_boolean(save_every_epoch)
 
     error_checking.assert_is_numpy_array(mask_matrix, num_dimensions=2)
     error_checking.assert_is_numpy_array(full_mask_matrix, num_dimensions=2)
@@ -1840,16 +1847,23 @@ def train_model(
         validation_option_dict[this_key] = training_option_dict[this_key]
 
     validation_option_dict = _check_generator_args(validation_option_dict)
-    model_file_name = '{0:s}/model.h5'.format(output_dir_name)
+
+    if save_every_epoch:
+        model_file_name = (
+            output_dir_name +
+            '/model_epoch={epoch:03d}_val-loss={val_loss:.6f}.h5'
+        )
+    else:
+        model_file_name = '{0:s}/model.h5'.format(output_dir_name)
 
     history_object = keras.callbacks.CSVLogger(
         filename='{0:s}/history.csv'.format(output_dir_name),
         separator=',', append=False
     )
     checkpoint_object = keras.callbacks.ModelCheckpoint(
         filepath=model_file_name, monitor='val_loss', verbose=1,
-        save_best_only=do_early_stopping, save_weights_only=False, mode='min',
-        period=1
+        save_best_only=not save_every_epoch, save_weights_only=False,
+        mode='min', period=1
     )
     list_of_callback_objects = [history_object, checkpoint_object]
 
diff --git a/ml4convection/plotting/evaluation_plotting.py b/ml4convection/plotting/evaluation_plotting.py
@@ -780,7 +780,7 @@ def plot_performance_diagram(
     if num_bootstrap_reps > 1:
         polygon_coord_matrix = confidence_interval_to_polygon(
             x_value_matrix=success_ratio_matrix, y_value_matrix=pod_matrix,
-            confidence_level=confidence_level, same_order=False
+            confidence_level=confidence_level, same_order=True
         )
 
         polygon_colour = matplotlib.colors.to_rgba(line_colour, POLYGON_OPACITY)
diff --git a/ml4convection/scripts/plot_composite_saliency_map.py b/ml4convection/scripts/plot_composite_saliency_map.py
@@ -162,7 +162,8 @@ def _plot_predictors(brightness_temp_matrix_kelvins, band_numbers,
                 cbar_orientation_string = None
 
             colour_bar_object = satellite_plotting.plot_2d_grid_xy(
-                brightness_temp_matrix_kelvins=brightness_temp_matrix_kelvins,
+                brightness_temp_matrix_kelvins=
+                brightness_temp_matrix_kelvins[..., j, k],
                 axes_object=axes_object_matrix[j, k],
                 cbar_orientation_string=cbar_orientation_string,
                 font_size=FONT_SIZE
diff --git a/ml4convection/scripts/plot_evaluation_by_time.py b/ml4convection/scripts/plot_evaluation_by_time.py
@@ -590,7 +590,7 @@ def _run(input_dir_name, probability_threshold, confidence_level,
         # Plot hourly reliability curves.
         figure_object, axes_object = _plot_reliability_curves(
             score_tables_xarray=hourly_score_tables_xarray,
-            confidence_level=None
+            confidence_level=confidence_level
         )
         axes_object.set_title('Reliability curve by hour')
 
@@ -608,7 +608,7 @@ def _run(input_dir_name, probability_threshold, confidence_level,
         # Plot monthly reliability curves.
         figure_object, axes_object = _plot_reliability_curves(
             score_tables_xarray=monthly_score_tables_xarray,
-            confidence_level=None
+            confidence_level=confidence_level
         )
         axes_object.set_title('Reliability curve by month')
 
diff --git a/ml4convection/scripts/plot_permutation_results.py b/ml4convection/scripts/plot_permutation_results.py
diff --git a/ml4convection/scripts/train_neural_net.py b/ml4convection/scripts/train_neural_net.py

Original file line number	Diff line number	Diff line change
`@@ -780,7 +780,7 @@ def plot_performance_diagram(`
`780`	`780`	`if num_bootstrap_reps > 1:`
`781`	`781`	`polygon_coord_matrix = confidence_interval_to_polygon(`
`782`	`782`	`x_value_matrix=success_ratio_matrix, y_value_matrix=pod_matrix,`
`783`		`- confidence_level=confidence_level, same_order=False`
	`783`	`+ confidence_level=confidence_level, same_order=True`
`784`	`784`	`)`
`785`	`785`
`786`	`786`	`polygon_colour = matplotlib.colors.to_rgba(line_colour, POLYGON_OPACITY)`