Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions esmvalcore/_data_finder.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ def _replace_tags(path, variable):

if tag == 'latestversion': # handled separately later
continue
elif tag in variable:
if tag in variable:
replacewith = variable[tag]
else:
raise KeyError("Dataset key {} must be specified for {}, check "
Expand Down Expand Up @@ -248,7 +248,7 @@ def _find_input_files(variable, rootpath, drs):
filenames_glob = _get_filenames_glob(variable, drs)
files = find_files(input_dirs, filenames_glob)

return files
return (files, input_dirs, filenames_glob)


def get_input_filelist(variable, rootpath, drs):
Expand All @@ -257,12 +257,12 @@ def get_input_filelist(variable, rootpath, drs):
# this is needed and is not a duplicate effort
if variable['project'] == 'CMIP5' and variable['frequency'] == 'fx':
variable['ensemble'] = 'r0i0p0'
files = _find_input_files(variable, rootpath, drs)
(files, dirnames, filenames) = _find_input_files(variable, rootpath, drs)
# do time gating only for non-fx variables
if variable['frequency'] != 'fx':
files = select_files(files, variable['start_year'],
variable['end_year'])
return files
return (files, dirnames, filenames)


def get_output_file(variable, preproc_dir):
Expand Down
24 changes: 14 additions & 10 deletions esmvalcore/_recipe.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,18 +233,19 @@ def _augment(base, update):

def _dataset_to_file(variable, config_user):
"""Find the first file belonging to dataset from variable info."""
files = _get_input_files(variable, config_user)
(files, dirnames, filenames) = _get_input_files(variable, config_user)
if not files and variable.get('derive'):
required_vars = get_required(variable['short_name'],
variable['project'])
for required_var in required_vars:
_augment(required_var, variable)
_add_cmor_info(required_var, override=True)
files = _get_input_files(required_var, config_user)
(files, dirnames, filenames) = _get_input_files(required_var,
config_user)
if files:
variable = required_var
break
check.data_availability(files, variable)
check.data_availability(files, variable, dirnames, filenames)
return files[0]


Expand Down Expand Up @@ -408,7 +409,7 @@ def _get_correct_fx_file(variable, fx_varname, config_user):
{'short_name': fx_varname, 'mip': fx_mip}, var)
logger.debug("For CMIP6 fx variable '%s', found table '%s'",
fx_varname, fx_mip)
fx_files = _get_input_files(fx_var, config_user)
fx_files = _get_input_files(fx_var, config_user)[0]

# If files found, return them
if fx_files:
Expand Down Expand Up @@ -514,7 +515,7 @@ def _read_attributes(filename):

def _get_input_files(variable, config_user):
"""Get the input files for a single dataset (locally and via download)."""
input_files = get_input_filelist(
(input_files, dirnames, filenames) = get_input_filelist(
variable=variable,
rootpath=config_user['rootpath'],
drs=config_user['drs'])
Expand All @@ -523,20 +524,23 @@ def _get_input_files(variable, config_user):
# Do not download if files are already available locally.
if config_user['synda_download'] and not input_files:
input_files = synda_search(variable)
dirnames = None
filenames = None

return input_files
return (input_files, dirnames, filenames)


def _get_ancestors(variable, config_user):
"""Get the input files for a single dataset and setup provenance."""
input_files = _get_input_files(variable, config_user)
(input_files, dirnames, filenames) = _get_input_files(variable,
config_user)

logger.info("Using input files for variable %s of dataset %s:\n%s",
variable['short_name'], variable['dataset'],
'\n'.join(input_files))
if (not config_user.get('skip-nonexistent')
or variable['dataset'] == variable.get('reference_dataset')):
check.data_availability(input_files, variable)
check.data_availability(input_files, variable, dirnames, filenames)

# Set up provenance tracking
for i, filename in enumerate(input_files):
Expand Down Expand Up @@ -830,7 +834,7 @@ def append(group_prefix, var):
group_prefix = variable['variable_group'] + '_derive_input_'
if not variable.get('force_derivation') and _get_input_files(
variable,
config_user):
config_user)[0]:
# No need to derive, just process normally up to derive step
var = deepcopy(variable)
append(group_prefix, var)
Expand All @@ -841,7 +845,7 @@ def append(group_prefix, var):
for var in required_vars:
_augment(var, variable)
_add_cmor_info(var, override=True)
files = _get_input_files(var, config_user)
files = _get_input_files(var, config_user)[0]
if var.get('optional') and not files:
logger.info(
"Skipping: no data found for %s which is marked as "
Expand Down
24 changes: 22 additions & 2 deletions esmvalcore/_recipe_checks.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Module with functions to check a recipe."""
import itertools
import logging
import os
import subprocess
Expand Down Expand Up @@ -90,10 +91,29 @@ def variable(var, required_keys):
missing, var.get('short_name'), var.get('diagnostic')))


def data_availability(input_files, var):
def data_availability(input_files, var, dirnames, filenames):
"""Check if the required input data is available."""
if not input_files:
raise RecipeError("No input files found for variable {}".format(var))
var.pop('filename', None)
logger.error("No input files found for variable %s", var)
if dirnames and filenames:
patterns = itertools.product(dirnames, filenames)
patterns = [os.path.join(d, f) for (d, f) in patterns]
if len(patterns) == 1:
msg = f': {patterns[0]}'
else:
msg = '\n{}'.format('\n'.join(patterns))
logger.error("Looked for files matching%s", msg)
elif dirnames and not filenames:
logger.error(
"Looked for files in %s, but did not find any file pattern "
"to match against", dirnames)
elif filenames and not dirnames:
logger.error(
"Looked for files matching %s, but did not find any existing "
"input directory", filenames)
logger.error("Set 'log_level' to 'debug' to get more information")
Comment thread
mattiarighi marked this conversation as resolved.
raise RecipeError("Missing data")

# check time avail only for non-fx variables
if var['frequency'] == 'fx':
Expand Down
10 changes: 9 additions & 1 deletion esmvalcore/preprocessor/_derive/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,15 @@ def derive(cubes, short_name, long_name, units, standard_name=None):

# Derive variable
DerivedVariable = ALL_DERIVED_VARIABLES[short_name.lower()] # noqa: N806
cube = DerivedVariable().calculate(cubes)
try:
cube = DerivedVariable().calculate(cubes)
except Exception as exc:
msg = (f"Derivation of variable '{short_name}' failed. If you used "
f"the option '--skip-nonexistent' for running your recipe, "
f"this might be caused by missing input data for derivation "
f"('{short_name}' needs the variables "
f"{DerivedVariable().required}).")
raise ValueError(msg) from exc

# Set standard attributes
cube.var_name = short_name
Expand Down
40 changes: 40 additions & 0 deletions tests/integration/data_finder.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,10 @@ get_input_filelist:
- ta_Amon_HadGEM2-ES_historical_r1i1p1_193412-195911.nc
- ta_Amon_HadGEM2-ES_historical_r1i1p1_195912-198411.nc
- ta_Amon_HadGEM2-ES_historical_r1i1p1_198412-200511.nc
dirs:
- ''
file_patterns:
- ta_Amon_HadGEM2-ES_historical_r1i1p1*.nc
found_files:
- ta_Amon_HadGEM2-ES_historical_r1i1p1_195912-198411.nc

Expand All @@ -48,6 +52,11 @@ get_input_filelist:
- ta_Amon_HadGEM2-ES_historical_r1i1p1_195912-198411.nc
- ta_Amon_HadGEM2-ES_historical_r1i1p1_198413-200512.nc
- ta_Amon_HadGEM2-ES_rcp85_r1i1p1_200601-210012.nc
dirs:
- ''
file_patterns:
- ta_Amon_HadGEM2-ES_historical_r1i1p1*.nc
- ta_Amon_HadGEM2-ES_rcp85_r1i1p1*.nc
found_files:
- ta_Amon_HadGEM2-ES_historical_r1i1p1_195912-198411.nc
- ta_Amon_HadGEM2-ES_historical_r1i1p1_198413-200512.nc
Expand All @@ -63,10 +72,17 @@ get_input_filelist:
- ta_Amon_HadGEM2-ES_historical_r1i1p1_195912-198411.nc
- ta_Amon_HadGEM2-ES_historical_r1i1p1_198413-200512.nc
- ta_Amon_HadGEM2-ES_rcp85_r1i1p1_200601-210012.nc
dirs:
- ''
file_patterns:
- ta_Amon_HadGEM2-ES_historical_r1i1p1*.nc
found_files: []

- drs: default
variable: *variable
dirs: null
file_patterns:
- ta_Amon_HadGEM2-ES_historical_r1i1p1*.nc
found_files: []

- drs: BADC
Expand All @@ -84,6 +100,10 @@ get_input_filelist:
available_symlinks:
- link_name: MOHC/HadGEM2-ES/historical/mon/atmos/Amon/r1i1p1/latest
target: v20120928
dirs:
- MOHC/HadGEM2-ES/historical/mon/atmos/Amon/r1i1p1/latest/ta
file_patterns:
- ta_Amon_HadGEM2-ES_historical_r1i1p1*.nc
found_files:
- MOHC/HadGEM2-ES/historical/mon/atmos/Amon/r1i1p1/latest/ta/ta_Amon_HadGEM2-ES_historical_r1i1p1_195912-198411.nc
- MOHC/HadGEM2-ES/historical/mon/atmos/Amon/r1i1p1/latest/ta/ta_Amon_HadGEM2-ES_historical_r1i1p1_198412-200511.nc
Expand All @@ -100,6 +120,10 @@ get_input_filelist:
- MOHC/HadGEM2-ES/historical/mon/atmos/Amon/r1i1p1/v20110330/ta/ta_Amon_HadGEM2-ES_historical_r1i1p1_193412-195911.nc
- MOHC/HadGEM2-ES/historical/mon/atmos/Amon/r1i1p1/v20110330/ta/ta_Amon_HadGEM2-ES_historical_r1i1p1_195912-198411.nc
- MOHC/HadGEM2-ES/historical/mon/atmos/Amon/r1i1p1/v20110330/ta/ta_Amon_HadGEM2-ES_historical_r1i1p1_198412-200511.nc
dirs:
- MOHC/HadGEM2-ES/historical/mon/atmos/Amon/r1i1p1/v20110330/ta
file_patterns:
- ta_Amon_HadGEM2-ES_historical_r1i1p1*.nc
found_files:
- MOHC/HadGEM2-ES/historical/mon/atmos/Amon/r1i1p1/v20110330/ta/ta_Amon_HadGEM2-ES_historical_r1i1p1_195912-198411.nc
- MOHC/HadGEM2-ES/historical/mon/atmos/Amon/r1i1p1/v20110330/ta/ta_Amon_HadGEM2-ES_historical_r1i1p1_198412-200511.nc
Expand All @@ -119,6 +143,14 @@ get_input_filelist:
- MOHC/HadGEM2-ES/historical/mon/atmos/Amon/r1i1p1/v20110330/ta/ta_Amon_HadGEM2-ES_historical_r1i1p1_198412-200511.nc
- MOHC/HadGEM2-ES/rcp45/mon/atmos/Amon/r1i1p1/v20110330/ta/ta_Amon_HadGEM2-ES_rcp45_r1i1p1_200601-210012.nc
- MOHC/HadGEM2-ES/rcp85/mon/atmos/Amon/r1i1p1/v20110330/ta/ta_Amon_HadGEM2-ES_rcp85_r1i1p1_200601-210012.nc
dirs:
- MOHC/HadGEM2-ES/historical/mon/atmos/Amon/r1i1p1/v20110330/ta
- MOHC/HadGEM2-ES/rcp45/mon/atmos/Amon/r1i1p1/v20110330/ta
- MOHC/HadGEM2-ES/rcp85/mon/atmos/Amon/r1i1p1/v20110330/ta
file_patterns:
- ta_Amon_HadGEM2-ES_historical_r1i1p1*.nc
- ta_Amon_HadGEM2-ES_rcp45_r1i1p1*.nc
- ta_Amon_HadGEM2-ES_rcp85_r1i1p1*.nc
found_files:
- MOHC/HadGEM2-ES/historical/mon/atmos/Amon/r1i1p1/v20110330/ta/ta_Amon_HadGEM2-ES_historical_r1i1p1_195912-198411.nc
- MOHC/HadGEM2-ES/historical/mon/atmos/Amon/r1i1p1/v20110330/ta/ta_Amon_HadGEM2-ES_historical_r1i1p1_198412-200511.nc
Expand All @@ -137,6 +169,10 @@ get_input_filelist:
- historical/Amon/ta/HadGEM2-ES/r1i1p1/ta_Amon_HadGEM2-ES_historical_r1i1p1_193412-195911.nc
- historical/Amon/ta/HadGEM2-ES/r1i1p1/ta_Amon_HadGEM2-ES_historical_r1i1p1_195912-198411.nc
- historical/Amon/ta/HadGEM2-ES/r1i1p1/ta_Amon_HadGEM2-ES_historical_r1i1p1_198412-200511.nc
dirs:
- historical/Amon/ta/HadGEM2-ES/r1i1p1
file_patterns:
- ta_Amon_HadGEM2-ES_historical_r1i1p1*.nc
found_files:
- historical/Amon/ta/HadGEM2-ES/r1i1p1/ta_Amon_HadGEM2-ES_historical_r1i1p1_195912-198411.nc
- historical/Amon/ta/HadGEM2-ES/r1i1p1/ta_Amon_HadGEM2-ES_historical_r1i1p1_198412-200511.nc
Expand All @@ -154,6 +190,10 @@ get_input_filelist:
- historical/Amon/ta/HadGEM2-ES/r1i1p1/ta_Amon_HadGEM2-ES_historical_r1i1p1_195912-198411.nc
- historical/Amon/ta/HadGEM2-ES/r1i1p1/ta_Amon_HadGEM2-ES_historical_r1i1p1_198412-200511.nc
- rcp85/Amon/ta/HadGEM2-ES/r1i1p1/ta_Amon_HadGEM2-ES_rcp85_r1i1p1_200601-210012.nc
dirs:
- historical/Amon/ta/HadGEM2-ES/r1i1p1
file_patterns:
- ta_Amon_HadGEM2-ES_historical_r1i1p1*.nc
found_files:
- historical/Amon/ta/HadGEM2-ES/r1i1p1/ta_Amon_HadGEM2-ES_historical_r1i1p1_198412-200511.nc

Expand Down
15 changes: 12 additions & 3 deletions tests/integration/test_data_finder.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,8 +86,17 @@ def test_get_input_filelist(root, cfg):
# Find files
rootpath = {cfg['variable']['project']: [root]}
drs = {cfg['variable']['project']: cfg['drs']}
input_filelist = get_input_filelist(cfg['variable'], rootpath, drs)
(input_filelist, dirnames,
filenames) = get_input_filelist(cfg['variable'], rootpath, drs)

# Test result
reference = [os.path.join(root, file) for file in cfg['found_files']]
assert sorted(input_filelist) == sorted(reference)
ref_files = [os.path.join(root, file) for file in cfg['found_files']]
if cfg['dirs'] is None:
ref_dirs = []
else:
ref_dirs = [os.path.join(root, dir) for dir in cfg['dirs']]
ref_patterns = cfg['file_patterns']

assert sorted(input_filelist) == sorted(ref_files)
assert sorted(dirnames) == sorted(ref_dirs)
assert sorted(filenames) == sorted(ref_patterns)