From 06f1da812f1da44747700171bf734d86329bd270 Mon Sep 17 00:00:00 2001 From: Manuel Schlund Date: Wed, 19 Jun 2019 09:08:01 +0200 Subject: [PATCH 1/5] Added meaningful error message when files are not found --- esmvalcore/_data_finder.py | 8 +++--- esmvalcore/_recipe.py | 20 ++++++++------ esmvalcore/_recipe_checks.py | 18 ++++++++++-- tests/integration/data_finder.yml | 40 +++++++++++++++++++++++++++ tests/integration/test_data_finder.py | 15 ++++++++-- 5 files changed, 83 insertions(+), 18 deletions(-) diff --git a/esmvalcore/_data_finder.py b/esmvalcore/_data_finder.py index 258b315c7d..0cb5c99afe 100644 --- a/esmvalcore/_data_finder.py +++ b/esmvalcore/_data_finder.py @@ -235,14 +235,14 @@ def _find_input_files(variable, rootpath, drs, fx_var=None): filenames_glob = _get_filenames_glob(variable, drs, fx_var) files = find_files(input_dirs, filenames_glob) - return files + return (files, input_dirs, filenames_glob) def get_input_filelist(variable, rootpath, drs): """Return the full path to input files.""" - files = _find_input_files(variable, rootpath, drs) + (files, dirnames, filenames) = _find_input_files(variable, rootpath, drs) files = select_files(files, variable['start_year'], variable['end_year']) - return files + return (files, dirnames, filenames) def get_input_fx_filelist(variable, rootpath, drs): @@ -256,7 +256,7 @@ def get_input_fx_filelist(variable, rootpath, drs): realm = getattr(table.get(var['short_name']), 'modeling_realm', None) var['modeling_realm'] = realm if realm else table.realm - files = _find_input_files(var, rootpath, drs, fx_var) + (files, _, _) = _find_input_files(var, rootpath, drs, fx_var) fx_files[fx_var] = files[0] if files else None return fx_files diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index 3dc6cdf48c..9ae2990e05 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -234,18 +234,18 @@ def _augment(base, update): def _dataset_to_file(variable, config_user): """Find the first file belonging to dataset from variable info.""" - files = get_input_filelist( + (files, dirnames, filenames) = get_input_filelist( variable=variable, rootpath=config_user['rootpath'], drs=config_user['drs']) if not files and variable.get('derive'): first_required = get_required(variable['short_name'])[0] _augment(first_required, variable) - files = get_input_filelist( + (files, dirnames, filenames) = get_input_filelist( variable=first_required, rootpath=config_user['rootpath'], drs=config_user['drs']) - check.data_availability(files, variable) + check.data_availability(files, variable, dirnames, filenames) return files[0] @@ -438,7 +438,7 @@ def _read_attributes(filename): def _get_input_files(variable, config_user): """Get the input files for a single dataset.""" # Find input files locally. - input_files = get_input_filelist( + (input_files, dirnames, filenames) = get_input_filelist( variable=variable, rootpath=config_user['rootpath'], drs=config_user['drs']) @@ -447,13 +447,15 @@ def _get_input_files(variable, config_user): # Do not download if files are already available locally. if config_user['synda_download'] and not input_files: input_files = synda_search(variable) + dirnames = None + filenames = None logger.info("Using input files for variable %s of dataset %s:\n%s", variable['short_name'], variable['dataset'], '\n'.join(input_files)) if (not config_user.get('skip-nonexistent') or variable['dataset'] == variable.get('reference_dataset')): - check.data_availability(input_files, variable) + check.data_availability(input_files, variable, dirnames, filenames) # Set up provenance tracking for i, filename in enumerate(input_files): @@ -721,10 +723,10 @@ def append(group_prefix, var): for variable in variables: group_prefix = variable['variable_group'] + '_derive_input_' - if not variable.get('force_derivation') and get_input_filelist( - variable=variable, - rootpath=config_user['rootpath'], - drs=config_user['drs']): + (input_files, _, _) = get_input_filelist( + variable=variable, rootpath=config_user['rootpath'], + drs=config_user['drs']) + if not variable.get('force_derivation') and input_files: # No need to derive, just process normally up to derive step var = deepcopy(variable) append(group_prefix, var) diff --git a/esmvalcore/_recipe_checks.py b/esmvalcore/_recipe_checks.py index a6ab607df0..ed716386e5 100644 --- a/esmvalcore/_recipe_checks.py +++ b/esmvalcore/_recipe_checks.py @@ -90,10 +90,24 @@ def variable(var, required_keys): missing, var.get('short_name'), var.get('diagnostic'))) -def data_availability(input_files, var): +def data_availability(input_files, var, dirnames, filenames): """Check if the required input data is available.""" if not input_files: - raise RecipeError("No input files found for variable {}".format(var)) + var.pop('filename', None) + logger.error("No input files found for variable %s", var) + if dirnames and filenames: + logger.error("Looked for files matching %s in %s", filenames, + dirnames) + elif dirnames and not filenames: + logger.error( + "Looked for files in %s, but did not find any file pattern " + "to match against", dirnames) + elif filenames and not dirnames: + logger.error( + "Looked for files matching %s, but did not find any existing " + "input directory", filenames) + logger.error("Set 'log_level' to 'debug' to get more information") + raise RecipeError("Missing data") required_years = set(range(var['start_year'], var['end_year'] + 1)) available_years = set() diff --git a/tests/integration/data_finder.yml b/tests/integration/data_finder.yml index cd718fe2b6..0094c49d30 100644 --- a/tests/integration/data_finder.yml +++ b/tests/integration/data_finder.yml @@ -35,6 +35,10 @@ get_input_filelist: - ta_Amon_HadGEM2-ES_historical_r1i1p1_193412-195911.nc - ta_Amon_HadGEM2-ES_historical_r1i1p1_195912-198411.nc - ta_Amon_HadGEM2-ES_historical_r1i1p1_198412-200511.nc + dirs: + - '' + file_patterns: + - ta_Amon_HadGEM2-ES_historical_r1i1p1_*.nc found_files: - ta_Amon_HadGEM2-ES_historical_r1i1p1_195912-198411.nc @@ -48,6 +52,11 @@ get_input_filelist: - ta_Amon_HadGEM2-ES_historical_r1i1p1_195912-198411.nc - ta_Amon_HadGEM2-ES_historical_r1i1p1_198413-200512.nc - ta_Amon_HadGEM2-ES_rcp85_r1i1p1_200601-210012.nc + dirs: + - '' + file_patterns: + - ta_Amon_HadGEM2-ES_historical_r1i1p1_*.nc + - ta_Amon_HadGEM2-ES_rcp85_r1i1p1_*.nc found_files: - ta_Amon_HadGEM2-ES_historical_r1i1p1_195912-198411.nc - ta_Amon_HadGEM2-ES_historical_r1i1p1_198413-200512.nc @@ -63,10 +72,17 @@ get_input_filelist: - ta_Amon_HadGEM2-ES_historical_r1i1p1_195912-198411.nc - ta_Amon_HadGEM2-ES_historical_r1i1p1_198413-200512.nc - ta_Amon_HadGEM2-ES_rcp85_r1i1p1_200601-210012.nc + dirs: + - '' + file_patterns: + - ta_Amon_HadGEM2-ES_historical_r1i1p1_*.nc found_files: [] - drs: default variable: *variable + dirs: null + file_patterns: + - ta_Amon_HadGEM2-ES_historical_r1i1p1_*.nc found_files: [] - drs: BADC @@ -84,6 +100,10 @@ get_input_filelist: available_symlinks: - link_name: MOHC/HadGEM2-ES/historical/mon/atmos/Amon/r1i1p1/latest target: v20120928 + dirs: + - MOHC/HadGEM2-ES/historical/mon/atmos/Amon/r1i1p1/latest/ta + file_patterns: + - ta_Amon_HadGEM2-ES_historical_r1i1p1_*.nc found_files: - MOHC/HadGEM2-ES/historical/mon/atmos/Amon/r1i1p1/latest/ta/ta_Amon_HadGEM2-ES_historical_r1i1p1_195912-198411.nc - MOHC/HadGEM2-ES/historical/mon/atmos/Amon/r1i1p1/latest/ta/ta_Amon_HadGEM2-ES_historical_r1i1p1_198412-200511.nc @@ -100,6 +120,10 @@ get_input_filelist: - MOHC/HadGEM2-ES/historical/mon/atmos/Amon/r1i1p1/v20110330/ta/ta_Amon_HadGEM2-ES_historical_r1i1p1_193412-195911.nc - MOHC/HadGEM2-ES/historical/mon/atmos/Amon/r1i1p1/v20110330/ta/ta_Amon_HadGEM2-ES_historical_r1i1p1_195912-198411.nc - MOHC/HadGEM2-ES/historical/mon/atmos/Amon/r1i1p1/v20110330/ta/ta_Amon_HadGEM2-ES_historical_r1i1p1_198412-200511.nc + dirs: + - MOHC/HadGEM2-ES/historical/mon/atmos/Amon/r1i1p1/v20110330/ta + file_patterns: + - ta_Amon_HadGEM2-ES_historical_r1i1p1_*.nc found_files: - MOHC/HadGEM2-ES/historical/mon/atmos/Amon/r1i1p1/v20110330/ta/ta_Amon_HadGEM2-ES_historical_r1i1p1_195912-198411.nc - MOHC/HadGEM2-ES/historical/mon/atmos/Amon/r1i1p1/v20110330/ta/ta_Amon_HadGEM2-ES_historical_r1i1p1_198412-200511.nc @@ -119,6 +143,14 @@ get_input_filelist: - MOHC/HadGEM2-ES/historical/mon/atmos/Amon/r1i1p1/v20110330/ta/ta_Amon_HadGEM2-ES_historical_r1i1p1_198412-200511.nc - MOHC/HadGEM2-ES/rcp45/mon/atmos/Amon/r1i1p1/v20110330/ta/ta_Amon_HadGEM2-ES_rcp45_r1i1p1_200601-210012.nc - MOHC/HadGEM2-ES/rcp85/mon/atmos/Amon/r1i1p1/v20110330/ta/ta_Amon_HadGEM2-ES_rcp85_r1i1p1_200601-210012.nc + dirs: + - MOHC/HadGEM2-ES/historical/mon/atmos/Amon/r1i1p1/v20110330/ta + - MOHC/HadGEM2-ES/rcp45/mon/atmos/Amon/r1i1p1/v20110330/ta + - MOHC/HadGEM2-ES/rcp85/mon/atmos/Amon/r1i1p1/v20110330/ta + file_patterns: + - ta_Amon_HadGEM2-ES_historical_r1i1p1_*.nc + - ta_Amon_HadGEM2-ES_rcp45_r1i1p1_*.nc + - ta_Amon_HadGEM2-ES_rcp85_r1i1p1_*.nc found_files: - MOHC/HadGEM2-ES/historical/mon/atmos/Amon/r1i1p1/v20110330/ta/ta_Amon_HadGEM2-ES_historical_r1i1p1_195912-198411.nc - MOHC/HadGEM2-ES/historical/mon/atmos/Amon/r1i1p1/v20110330/ta/ta_Amon_HadGEM2-ES_historical_r1i1p1_198412-200511.nc @@ -137,6 +169,10 @@ get_input_filelist: - historical/Amon/ta/HadGEM2-ES/r1i1p1/ta_Amon_HadGEM2-ES_historical_r1i1p1_193412-195911.nc - historical/Amon/ta/HadGEM2-ES/r1i1p1/ta_Amon_HadGEM2-ES_historical_r1i1p1_195912-198411.nc - historical/Amon/ta/HadGEM2-ES/r1i1p1/ta_Amon_HadGEM2-ES_historical_r1i1p1_198412-200511.nc + dirs: + - historical/Amon/ta/HadGEM2-ES/r1i1p1 + file_patterns: + - ta_Amon_HadGEM2-ES_historical_r1i1p1_*.nc found_files: - historical/Amon/ta/HadGEM2-ES/r1i1p1/ta_Amon_HadGEM2-ES_historical_r1i1p1_195912-198411.nc - historical/Amon/ta/HadGEM2-ES/r1i1p1/ta_Amon_HadGEM2-ES_historical_r1i1p1_198412-200511.nc @@ -154,6 +190,10 @@ get_input_filelist: - historical/Amon/ta/HadGEM2-ES/r1i1p1/ta_Amon_HadGEM2-ES_historical_r1i1p1_195912-198411.nc - historical/Amon/ta/HadGEM2-ES/r1i1p1/ta_Amon_HadGEM2-ES_historical_r1i1p1_198412-200511.nc - rcp85/Amon/ta/HadGEM2-ES/r1i1p1/ta_Amon_HadGEM2-ES_rcp85_r1i1p1_200601-210012.nc + dirs: + - historical/Amon/ta/HadGEM2-ES/r1i1p1 + file_patterns: + - ta_Amon_HadGEM2-ES_historical_r1i1p1_*.nc found_files: - historical/Amon/ta/HadGEM2-ES/r1i1p1/ta_Amon_HadGEM2-ES_historical_r1i1p1_198412-200511.nc diff --git a/tests/integration/test_data_finder.py b/tests/integration/test_data_finder.py index 791b54453d..b64e8853d3 100644 --- a/tests/integration/test_data_finder.py +++ b/tests/integration/test_data_finder.py @@ -87,11 +87,20 @@ def test_get_input_filelist(root, cfg): # Find files rootpath = {cfg['variable']['project']: [root]} drs = {cfg['variable']['project']: cfg['drs']} - input_filelist = get_input_filelist(cfg['variable'], rootpath, drs) + (input_filelist, dirnames, + filenames) = get_input_filelist(cfg['variable'], rootpath, drs) # Test result - reference = [os.path.join(root, file) for file in cfg['found_files']] - assert sorted(input_filelist) == sorted(reference) + ref_files = [os.path.join(root, file) for file in cfg['found_files']] + if cfg['dirs'] is None: + ref_dirs = [] + else: + ref_dirs = [os.path.join(root, dir) for dir in cfg['dirs']] + ref_patterns = cfg['file_patterns'] + + assert sorted(input_filelist) == sorted(ref_files) + assert sorted(dirnames) == sorted(ref_dirs) + assert sorted(filenames) == sorted(ref_patterns) @pytest.mark.parametrize('cfg', CONFIG['get_input_fx_filelist']) From a0843f3fa048c47f1b5a73796bdb25640e5b6153 Mon Sep 17 00:00:00 2001 From: Manuel Schlund Date: Wed, 19 Jun 2019 11:16:44 +0200 Subject: [PATCH 2/5] Improved error message --- esmvalcore/_recipe_checks.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/esmvalcore/_recipe_checks.py b/esmvalcore/_recipe_checks.py index ed716386e5..d39989c474 100644 --- a/esmvalcore/_recipe_checks.py +++ b/esmvalcore/_recipe_checks.py @@ -1,4 +1,5 @@ """Module with functions to check a recipe.""" +import itertools import logging import os import subprocess @@ -96,8 +97,9 @@ def data_availability(input_files, var, dirnames, filenames): var.pop('filename', None) logger.error("No input files found for variable %s", var) if dirnames and filenames: - logger.error("Looked for files matching %s in %s", filenames, - dirnames) + patterns = itertools.product(dirnames, filenames) + patterns = [os.path.join(d, f) for (d, f) in patterns] + logger.error("Looked for files matching\n%s", "\n".join(patterns)) elif dirnames and not filenames: logger.error( "Looked for files in %s, but did not find any file pattern " From 5c92f779efdb33e28e683cd47019070ed83c538f Mon Sep 17 00:00:00 2001 From: Manuel Schlund Date: Wed, 19 Jun 2019 12:23:22 +0200 Subject: [PATCH 3/5] Addressed @ledm's comment --- esmvalcore/_recipe_checks.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/esmvalcore/_recipe_checks.py b/esmvalcore/_recipe_checks.py index d39989c474..178c65258a 100644 --- a/esmvalcore/_recipe_checks.py +++ b/esmvalcore/_recipe_checks.py @@ -99,7 +99,11 @@ def data_availability(input_files, var, dirnames, filenames): if dirnames and filenames: patterns = itertools.product(dirnames, filenames) patterns = [os.path.join(d, f) for (d, f) in patterns] - logger.error("Looked for files matching\n%s", "\n".join(patterns)) + if len(patterns) == 1: + msg = f': {patterns[0]}' + else: + msg = '\n{}'.format('\n'.join(patterns)) + logger.error("Looked for files matching%s", msg) elif dirnames and not filenames: logger.error( "Looked for files in %s, but did not find any file pattern " From d2e7004244ef9b0ebf2735df2331a1dfaafad279 Mon Sep 17 00:00:00 2001 From: Manuel Schlund Date: Sat, 22 Jun 2019 01:32:37 +0200 Subject: [PATCH 4/5] Improved error message when derivation fails because of missing data --- esmvalcore/preprocessor/_derive/__init__.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/esmvalcore/preprocessor/_derive/__init__.py b/esmvalcore/preprocessor/_derive/__init__.py index fc6baec582..f42421252a 100644 --- a/esmvalcore/preprocessor/_derive/__init__.py +++ b/esmvalcore/preprocessor/_derive/__init__.py @@ -108,7 +108,15 @@ def derive(cubes, # Derive variable DerivedVariable = ALL_DERIVED_VARIABLES[short_name] # noqa: N806 - cube = DerivedVariable().calculate(cubes) + try: + cube = DerivedVariable().calculate(cubes) + except Exception as exc: + msg = (f"Derivation of variable '{short_name}' failed. If you used " + f"the option '--skip-nonexistent' for running your recipe, " + f"this might be caused by missing input data for derivation " + f"('{short_name}' needs the variables " + f"{DerivedVariable().required}.") + raise ValueError(msg) from exc # Set standard attributes cube.var_name = short_name From 4e33766ac2b931cd62d18b191eb04397d344a1d4 Mon Sep 17 00:00:00 2001 From: Manuel Schlund Date: Sat, 22 Jun 2019 01:35:45 +0200 Subject: [PATCH 5/5] Added missing bracket --- esmvalcore/preprocessor/_derive/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/esmvalcore/preprocessor/_derive/__init__.py b/esmvalcore/preprocessor/_derive/__init__.py index f42421252a..a40d8dbab4 100644 --- a/esmvalcore/preprocessor/_derive/__init__.py +++ b/esmvalcore/preprocessor/_derive/__init__.py @@ -115,7 +115,7 @@ def derive(cubes, f"the option '--skip-nonexistent' for running your recipe, " f"this might be caused by missing input data for derivation " f"('{short_name}' needs the variables " - f"{DerivedVariable().required}.") + f"{DerivedVariable().required}).") raise ValueError(msg) from exc # Set standard attributes