diff --git a/esmvalcore/_data_finder.py b/esmvalcore/_data_finder.py index 78a2214903..7b39127995 100644 --- a/esmvalcore/_data_finder.py +++ b/esmvalcore/_data_finder.py @@ -125,7 +125,7 @@ def _replace_tags(path, variable): if tag == 'latestversion': # handled separately later continue - elif tag in variable: + if tag in variable: replacewith = variable[tag] else: raise KeyError("Dataset key {} must be specified for {}, check " @@ -248,7 +248,7 @@ def _find_input_files(variable, rootpath, drs): filenames_glob = _get_filenames_glob(variable, drs) files = find_files(input_dirs, filenames_glob) - return files + return (files, input_dirs, filenames_glob) def get_input_filelist(variable, rootpath, drs): @@ -257,12 +257,12 @@ def get_input_filelist(variable, rootpath, drs): # this is needed and is not a duplicate effort if variable['project'] == 'CMIP5' and variable['frequency'] == 'fx': variable['ensemble'] = 'r0i0p0' - files = _find_input_files(variable, rootpath, drs) + (files, dirnames, filenames) = _find_input_files(variable, rootpath, drs) # do time gating only for non-fx variables if variable['frequency'] != 'fx': files = select_files(files, variable['start_year'], variable['end_year']) - return files + return (files, dirnames, filenames) def get_output_file(variable, preproc_dir): diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index c94022b5b5..7f20c5ace1 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -233,18 +233,19 @@ def _augment(base, update): def _dataset_to_file(variable, config_user): """Find the first file belonging to dataset from variable info.""" - files = _get_input_files(variable, config_user) + (files, dirnames, filenames) = _get_input_files(variable, config_user) if not files and variable.get('derive'): required_vars = get_required(variable['short_name'], variable['project']) for required_var in required_vars: _augment(required_var, variable) _add_cmor_info(required_var, override=True) - files = _get_input_files(required_var, config_user) + (files, dirnames, filenames) = _get_input_files(required_var, + config_user) if files: variable = required_var break - check.data_availability(files, variable) + check.data_availability(files, variable, dirnames, filenames) return files[0] @@ -408,7 +409,7 @@ def _get_correct_fx_file(variable, fx_varname, config_user): {'short_name': fx_varname, 'mip': fx_mip}, var) logger.debug("For CMIP6 fx variable '%s', found table '%s'", fx_varname, fx_mip) - fx_files = _get_input_files(fx_var, config_user) + fx_files = _get_input_files(fx_var, config_user)[0] # If files found, return them if fx_files: @@ -514,7 +515,7 @@ def _read_attributes(filename): def _get_input_files(variable, config_user): """Get the input files for a single dataset (locally and via download).""" - input_files = get_input_filelist( + (input_files, dirnames, filenames) = get_input_filelist( variable=variable, rootpath=config_user['rootpath'], drs=config_user['drs']) @@ -523,20 +524,23 @@ def _get_input_files(variable, config_user): # Do not download if files are already available locally. if config_user['synda_download'] and not input_files: input_files = synda_search(variable) + dirnames = None + filenames = None - return input_files + return (input_files, dirnames, filenames) def _get_ancestors(variable, config_user): """Get the input files for a single dataset and setup provenance.""" - input_files = _get_input_files(variable, config_user) + (input_files, dirnames, filenames) = _get_input_files(variable, + config_user) logger.info("Using input files for variable %s of dataset %s:\n%s", variable['short_name'], variable['dataset'], '\n'.join(input_files)) if (not config_user.get('skip-nonexistent') or variable['dataset'] == variable.get('reference_dataset')): - check.data_availability(input_files, variable) + check.data_availability(input_files, variable, dirnames, filenames) # Set up provenance tracking for i, filename in enumerate(input_files): @@ -830,7 +834,7 @@ def append(group_prefix, var): group_prefix = variable['variable_group'] + '_derive_input_' if not variable.get('force_derivation') and _get_input_files( variable, - config_user): + config_user)[0]: # No need to derive, just process normally up to derive step var = deepcopy(variable) append(group_prefix, var) @@ -841,7 +845,7 @@ def append(group_prefix, var): for var in required_vars: _augment(var, variable) _add_cmor_info(var, override=True) - files = _get_input_files(var, config_user) + files = _get_input_files(var, config_user)[0] if var.get('optional') and not files: logger.info( "Skipping: no data found for %s which is marked as " diff --git a/esmvalcore/_recipe_checks.py b/esmvalcore/_recipe_checks.py index 68a3064875..5c4ecc983f 100644 --- a/esmvalcore/_recipe_checks.py +++ b/esmvalcore/_recipe_checks.py @@ -1,4 +1,5 @@ """Module with functions to check a recipe.""" +import itertools import logging import os import subprocess @@ -90,10 +91,29 @@ def variable(var, required_keys): missing, var.get('short_name'), var.get('diagnostic'))) -def data_availability(input_files, var): +def data_availability(input_files, var, dirnames, filenames): """Check if the required input data is available.""" if not input_files: - raise RecipeError("No input files found for variable {}".format(var)) + var.pop('filename', None) + logger.error("No input files found for variable %s", var) + if dirnames and filenames: + patterns = itertools.product(dirnames, filenames) + patterns = [os.path.join(d, f) for (d, f) in patterns] + if len(patterns) == 1: + msg = f': {patterns[0]}' + else: + msg = '\n{}'.format('\n'.join(patterns)) + logger.error("Looked for files matching%s", msg) + elif dirnames and not filenames: + logger.error( + "Looked for files in %s, but did not find any file pattern " + "to match against", dirnames) + elif filenames and not dirnames: + logger.error( + "Looked for files matching %s, but did not find any existing " + "input directory", filenames) + logger.error("Set 'log_level' to 'debug' to get more information") + raise RecipeError("Missing data") # check time avail only for non-fx variables if var['frequency'] == 'fx': diff --git a/esmvalcore/preprocessor/_derive/__init__.py b/esmvalcore/preprocessor/_derive/__init__.py index 3d94129fc7..11e06de1bc 100644 --- a/esmvalcore/preprocessor/_derive/__init__.py +++ b/esmvalcore/preprocessor/_derive/__init__.py @@ -91,7 +91,15 @@ def derive(cubes, short_name, long_name, units, standard_name=None): # Derive variable DerivedVariable = ALL_DERIVED_VARIABLES[short_name.lower()] # noqa: N806 - cube = DerivedVariable().calculate(cubes) + try: + cube = DerivedVariable().calculate(cubes) + except Exception as exc: + msg = (f"Derivation of variable '{short_name}' failed. If you used " + f"the option '--skip-nonexistent' for running your recipe, " + f"this might be caused by missing input data for derivation " + f"('{short_name}' needs the variables " + f"{DerivedVariable().required}).") + raise ValueError(msg) from exc # Set standard attributes cube.var_name = short_name diff --git a/tests/integration/data_finder.yml b/tests/integration/data_finder.yml index cd718fe2b6..3d73cdd17e 100644 --- a/tests/integration/data_finder.yml +++ b/tests/integration/data_finder.yml @@ -35,6 +35,10 @@ get_input_filelist: - ta_Amon_HadGEM2-ES_historical_r1i1p1_193412-195911.nc - ta_Amon_HadGEM2-ES_historical_r1i1p1_195912-198411.nc - ta_Amon_HadGEM2-ES_historical_r1i1p1_198412-200511.nc + dirs: + - '' + file_patterns: + - ta_Amon_HadGEM2-ES_historical_r1i1p1*.nc found_files: - ta_Amon_HadGEM2-ES_historical_r1i1p1_195912-198411.nc @@ -48,6 +52,11 @@ get_input_filelist: - ta_Amon_HadGEM2-ES_historical_r1i1p1_195912-198411.nc - ta_Amon_HadGEM2-ES_historical_r1i1p1_198413-200512.nc - ta_Amon_HadGEM2-ES_rcp85_r1i1p1_200601-210012.nc + dirs: + - '' + file_patterns: + - ta_Amon_HadGEM2-ES_historical_r1i1p1*.nc + - ta_Amon_HadGEM2-ES_rcp85_r1i1p1*.nc found_files: - ta_Amon_HadGEM2-ES_historical_r1i1p1_195912-198411.nc - ta_Amon_HadGEM2-ES_historical_r1i1p1_198413-200512.nc @@ -63,10 +72,17 @@ get_input_filelist: - ta_Amon_HadGEM2-ES_historical_r1i1p1_195912-198411.nc - ta_Amon_HadGEM2-ES_historical_r1i1p1_198413-200512.nc - ta_Amon_HadGEM2-ES_rcp85_r1i1p1_200601-210012.nc + dirs: + - '' + file_patterns: + - ta_Amon_HadGEM2-ES_historical_r1i1p1*.nc found_files: [] - drs: default variable: *variable + dirs: null + file_patterns: + - ta_Amon_HadGEM2-ES_historical_r1i1p1*.nc found_files: [] - drs: BADC @@ -84,6 +100,10 @@ get_input_filelist: available_symlinks: - link_name: MOHC/HadGEM2-ES/historical/mon/atmos/Amon/r1i1p1/latest target: v20120928 + dirs: + - MOHC/HadGEM2-ES/historical/mon/atmos/Amon/r1i1p1/latest/ta + file_patterns: + - ta_Amon_HadGEM2-ES_historical_r1i1p1*.nc found_files: - MOHC/HadGEM2-ES/historical/mon/atmos/Amon/r1i1p1/latest/ta/ta_Amon_HadGEM2-ES_historical_r1i1p1_195912-198411.nc - MOHC/HadGEM2-ES/historical/mon/atmos/Amon/r1i1p1/latest/ta/ta_Amon_HadGEM2-ES_historical_r1i1p1_198412-200511.nc @@ -100,6 +120,10 @@ get_input_filelist: - MOHC/HadGEM2-ES/historical/mon/atmos/Amon/r1i1p1/v20110330/ta/ta_Amon_HadGEM2-ES_historical_r1i1p1_193412-195911.nc - MOHC/HadGEM2-ES/historical/mon/atmos/Amon/r1i1p1/v20110330/ta/ta_Amon_HadGEM2-ES_historical_r1i1p1_195912-198411.nc - MOHC/HadGEM2-ES/historical/mon/atmos/Amon/r1i1p1/v20110330/ta/ta_Amon_HadGEM2-ES_historical_r1i1p1_198412-200511.nc + dirs: + - MOHC/HadGEM2-ES/historical/mon/atmos/Amon/r1i1p1/v20110330/ta + file_patterns: + - ta_Amon_HadGEM2-ES_historical_r1i1p1*.nc found_files: - MOHC/HadGEM2-ES/historical/mon/atmos/Amon/r1i1p1/v20110330/ta/ta_Amon_HadGEM2-ES_historical_r1i1p1_195912-198411.nc - MOHC/HadGEM2-ES/historical/mon/atmos/Amon/r1i1p1/v20110330/ta/ta_Amon_HadGEM2-ES_historical_r1i1p1_198412-200511.nc @@ -119,6 +143,14 @@ get_input_filelist: - MOHC/HadGEM2-ES/historical/mon/atmos/Amon/r1i1p1/v20110330/ta/ta_Amon_HadGEM2-ES_historical_r1i1p1_198412-200511.nc - MOHC/HadGEM2-ES/rcp45/mon/atmos/Amon/r1i1p1/v20110330/ta/ta_Amon_HadGEM2-ES_rcp45_r1i1p1_200601-210012.nc - MOHC/HadGEM2-ES/rcp85/mon/atmos/Amon/r1i1p1/v20110330/ta/ta_Amon_HadGEM2-ES_rcp85_r1i1p1_200601-210012.nc + dirs: + - MOHC/HadGEM2-ES/historical/mon/atmos/Amon/r1i1p1/v20110330/ta + - MOHC/HadGEM2-ES/rcp45/mon/atmos/Amon/r1i1p1/v20110330/ta + - MOHC/HadGEM2-ES/rcp85/mon/atmos/Amon/r1i1p1/v20110330/ta + file_patterns: + - ta_Amon_HadGEM2-ES_historical_r1i1p1*.nc + - ta_Amon_HadGEM2-ES_rcp45_r1i1p1*.nc + - ta_Amon_HadGEM2-ES_rcp85_r1i1p1*.nc found_files: - MOHC/HadGEM2-ES/historical/mon/atmos/Amon/r1i1p1/v20110330/ta/ta_Amon_HadGEM2-ES_historical_r1i1p1_195912-198411.nc - MOHC/HadGEM2-ES/historical/mon/atmos/Amon/r1i1p1/v20110330/ta/ta_Amon_HadGEM2-ES_historical_r1i1p1_198412-200511.nc @@ -137,6 +169,10 @@ get_input_filelist: - historical/Amon/ta/HadGEM2-ES/r1i1p1/ta_Amon_HadGEM2-ES_historical_r1i1p1_193412-195911.nc - historical/Amon/ta/HadGEM2-ES/r1i1p1/ta_Amon_HadGEM2-ES_historical_r1i1p1_195912-198411.nc - historical/Amon/ta/HadGEM2-ES/r1i1p1/ta_Amon_HadGEM2-ES_historical_r1i1p1_198412-200511.nc + dirs: + - historical/Amon/ta/HadGEM2-ES/r1i1p1 + file_patterns: + - ta_Amon_HadGEM2-ES_historical_r1i1p1*.nc found_files: - historical/Amon/ta/HadGEM2-ES/r1i1p1/ta_Amon_HadGEM2-ES_historical_r1i1p1_195912-198411.nc - historical/Amon/ta/HadGEM2-ES/r1i1p1/ta_Amon_HadGEM2-ES_historical_r1i1p1_198412-200511.nc @@ -154,6 +190,10 @@ get_input_filelist: - historical/Amon/ta/HadGEM2-ES/r1i1p1/ta_Amon_HadGEM2-ES_historical_r1i1p1_195912-198411.nc - historical/Amon/ta/HadGEM2-ES/r1i1p1/ta_Amon_HadGEM2-ES_historical_r1i1p1_198412-200511.nc - rcp85/Amon/ta/HadGEM2-ES/r1i1p1/ta_Amon_HadGEM2-ES_rcp85_r1i1p1_200601-210012.nc + dirs: + - historical/Amon/ta/HadGEM2-ES/r1i1p1 + file_patterns: + - ta_Amon_HadGEM2-ES_historical_r1i1p1*.nc found_files: - historical/Amon/ta/HadGEM2-ES/r1i1p1/ta_Amon_HadGEM2-ES_historical_r1i1p1_198412-200511.nc diff --git a/tests/integration/test_data_finder.py b/tests/integration/test_data_finder.py index d931f5730c..db7ec5b8da 100644 --- a/tests/integration/test_data_finder.py +++ b/tests/integration/test_data_finder.py @@ -86,8 +86,17 @@ def test_get_input_filelist(root, cfg): # Find files rootpath = {cfg['variable']['project']: [root]} drs = {cfg['variable']['project']: cfg['drs']} - input_filelist = get_input_filelist(cfg['variable'], rootpath, drs) + (input_filelist, dirnames, + filenames) = get_input_filelist(cfg['variable'], rootpath, drs) # Test result - reference = [os.path.join(root, file) for file in cfg['found_files']] - assert sorted(input_filelist) == sorted(reference) + ref_files = [os.path.join(root, file) for file in cfg['found_files']] + if cfg['dirs'] is None: + ref_dirs = [] + else: + ref_dirs = [os.path.join(root, dir) for dir in cfg['dirs']] + ref_patterns = cfg['file_patterns'] + + assert sorted(input_filelist) == sorted(ref_files) + assert sorted(dirnames) == sorted(ref_dirs) + assert sorted(filenames) == sorted(ref_patterns)