diff --git a/esmvalcore/_data_finder.py b/esmvalcore/_data_finder.py index 8610f9a0a4..13cd322c3c 100644 --- a/esmvalcore/_data_finder.py +++ b/esmvalcore/_data_finder.py @@ -8,6 +8,7 @@ import logging import os import re +import glob from ._config import get_project_config @@ -95,9 +96,7 @@ def select_files(filenames, start_year, end_year): def _replace_tags(path, variable): """Replace tags in the config-developer's file with actual values.""" path = path.strip('/') - - tlist = re.findall(r'\[([^]]*)\]', path) - + tlist = re.findall(r'{([^}]*)}', path) paths = [path] for tag in tlist: original_tag = tag @@ -112,7 +111,6 @@ def _replace_tags(path, variable): "your recipe entry".format(tag, variable)) paths = _replace_tag(paths, original_tag, replacewith) - return paths @@ -125,7 +123,7 @@ def _replace_tag(paths, tag, replacewith): result.extend(_replace_tag(paths, tag, item)) else: text = _apply_caps(str(replacewith), lower, upper) - result.extend(p.replace('[' + tag + ']', text) for p in paths) + result.extend(p.replace('{' + tag + '}', text) for p in paths) return result @@ -151,11 +149,11 @@ def _apply_caps(original, lower, upper): def _resolve_latestversion(dirname_template): """Resolve the 'latestversion' tag.""" - if '[latestversion]' not in dirname_template: + if '{latestversion}' not in dirname_template: return dirname_template # Find latest version - part1, part2 = dirname_template.split('[latestversion]') + part1, part2 = dirname_template.split('{latestversion}') part2 = part2.lstrip(os.sep) if os.path.exists(part1): versions = os.listdir(part1) @@ -205,9 +203,12 @@ def _find_input_dirs(variable, rootpath, drs): for base_path in root: dirname = os.path.join(base_path, dirname_template) dirname = _resolve_latestversion(dirname) - if os.path.exists(dirname): - logger.debug("Found %s", dirname) - dirnames.append(dirname) + matches = glob.glob(dirname) + matches = [match for match in matches if os.path.isdir(match)] + if matches: + for match in matches: + logger.debug("Found %s", match) + dirnames.append(match) else: logger.debug("Skipping non-existent %s", dirname) diff --git a/esmvalcore/config-developer.yml b/esmvalcore/config-developer.yml index 1b3f8c975a..82f0d347b7 100644 --- a/esmvalcore/config-developer.yml +++ b/esmvalcore/config-developer.yml @@ -22,26 +22,26 @@ CMIP6: cmor_strict: true input_dir: default: '/' - BADC: '[activity]/[institute]/[dataset]/[exp]/[ensemble]/[mip]/[short_name]/[grid]/[latestversion]' - DKRZ: '[activity]/[institute]/[dataset]/[exp]/[ensemble]/[mip]/[short_name]/[grid]/[latestversion]' - ETHZ: '[exp]/[mip]/[short_name]/[dataset]/[ensemble]/[grid]/' - input_file: '[short_name]_[mip]_[dataset]_[exp]_[ensemble]_[grid]*.nc' - output_file: '[project]_[dataset]_[mip]_[exp]_[ensemble]_[short_name]_[start_year]-[end_year]' + BADC: '{activity}/{institute}/{dataset}/{exp}/{ensemble}/{mip}/{short_name}/{grid}/{latestversion}' + DKRZ: '{activity}/{institute}/{dataset}/{exp}/{ensemble}/{mip}/{short_name}/{grid}/{latestversion}' + ETHZ: '{exp}/{mip}/{short_name}/{dataset}/{ensemble}/{grid}/' + input_file: '{short_name}_{mip}_{dataset}_{exp}_{ensemble}_{grid}*.nc' + output_file: '{project}_{dataset}_{mip}_{exp}_{ensemble}_{short_name}_{start_year}-{end_year}' cmor_type: 'CMIP6' CMIP5: cmor_strict: true input_dir: default: '/' - BADC: '[institute]/[dataset]/[exp]/[frequency]/[modeling_realm]/[mip]/[ensemble]/[latestversion]/[short_name]' - CP4CDS: '[institute]/[dataset]/[exp]/[frequency]/[modeling_realm]/[mip]/[ensemble]/[short_name]/latest/' - DKRZ: '[institute]/[dataset]/[exp]/[frequency]/[modeling_realm]/[mip]/[ensemble]/[latestversion]/[short_name]' - ETHZ: '[exp]/[mip]/[short_name]/[dataset]/[ensemble]/' - SMHI: '[dataset]/[ensemble]/[exp]/[frequency]' - RCAST: '[exp]/[mip]/[short_name]/[dataset]/[ensemble]/' - BSC: '[type]/[project]/[exp]/[dataset.lower]' - input_file: '[short_name]_[mip]_[dataset]_[exp]_[ensemble]*.nc' - output_file: '[project]_[dataset]_[mip]_[exp]_[ensemble]_[short_name]_[start_year]-[end_year]' + BADC: '{institute}/{dataset}/{exp}/{frequency}/{modeling_realm}/{mip}/{ensemble}/latest/{short_name}' + CP4CDS: '{institute}/{dataset}/{exp}/{frequency}/{modeling_realm}/{mip}/{ensemble}/{short_name}/latest/' + DKRZ: '{institute}/{dataset}/{exp}/{frequency}/{modeling_realm}/{mip}/{ensemble}/{latestversion}/{short_name}' + ETHZ: '{exp}/{mip}/{short_name}/{dataset}/{ensemble}/' + SMHI: '{dataset}/{ensemble}/{exp}/{frequency}' + RCAST: '{exp}/{mip}/{short_name}/{dataset}/{ensemble}/' + BSC: '{type}/{project}/{exp}/{dataset.lower}' + input_file: '{short_name}_{mip}_{dataset}_{exp}_{ensemble}*.nc' + output_file: '{project}_{dataset}_{mip}_{exp}_{ensemble}_{short_name}_{start_year}-{end_year}' institutes: 'ACCESS1-0': ['CSIRO-BOM'] 'ACCESS1-3': ['CSIRO-BOM'] @@ -108,38 +108,34 @@ CMIP5: OBS: cmor_strict: false input_dir: - default: 'Tier[tier]/[dataset]' - BSC: '[type]/[institute.lower]/[dataset.lower]/[freq_folder]/[short_name][freq_base]' - RCAST: '[dataset]' + default: 'Tier{tier}/{dataset}' + BSC: '{type}/{institute.lower}/{dataset.lower}/{freq_folder}/{short_name}{freq_base}' + RCAST: '{dataset}' input_file: - default: '[project]_[dataset]_[type]_[version]_[mip]_[short_name]_*.nc' - BSC: '[short_name]_*.nc' - RCAST: '[short_name]_[mip]_[type]_[dataset]_*.nc' - output_file: '[project]_[dataset]_[type]_[version]_[mip]_[short_name]_[start_year]-[end_year]' + default: '{project}_{dataset}_{type}_{version}_{mip}_{short_name}[_.]*nc' + BSC: '{short_name}_*.nc' + RCAST: '{short_name}_{mip}_{type}_{dataset}_*.nc' + output_file: '{project}_{dataset}_{type}_{version}_{mip}_{short_name}_{start_year}-{end_year}' cmor_type: 'CMIP5' OBS6: cmor_strict: false input_dir: - default: 'Tier[tier]/[dataset]' - BSC: '[type]/[institute.lower]/[dataset.lower]/[freq_folder]/[short_name][freq_base]' + default: 'Tier{tier}/{dataset}' + BSC: '{type}/{institute.lower}/{dataset.lower}/{freq_folder}/{short_name}{freq_base}' input_file: - default: '[project]_[dataset]_[type]_[version]_[mip]_[short_name]_*.nc' - BSC: '[short_name]_*.nc' - input_fx_dir: - default: 'Tier[tier]/[dataset]' - input_fx_file: - default: '[project]_[dataset]_[type]_[version]_fx_[fx_var].nc' - output_file: '[project]_[dataset]_[type]_[version]_[mip]_[short_name]_[start_year]-[end_year]' + default: '{project}_{dataset}_{type}_{version}_{mip}_{short_name}_*.nc' + BSC: '{short_name}_*.nc' + output_file: '{project}_{dataset}_{type}_{version}_{mip}_{short_name}_{start_year}-{end_year}' cmor_type: 'CMIP6' obs4mips: cmor_strict: true input_dir: - default: 'Tier[tier]/[dataset]' + default: 'Tier{tier}/{dataset}' RCAST: '/' - input_file: '[short_name]_[dataset]_[level]_[version]_*.nc' - output_file: '[project]_[dataset]_[level]_[version]_[short_name]_[start_year]-[end_year]' + input_file: '{short_name}_{dataset}_{level}_{version}_*.nc' + output_file: '{project}_{dataset}_{level}_{version}_{short_name}_{start_year}-{end_year}' cmor_type: 'CMIP6' cmor_path: 'obs4mips' cmor_default_table_prefix: 'obs4MIPs_' @@ -147,16 +143,16 @@ obs4mips: ana4mips: cmor_strict: false input_dir: - default: 'Tier[tier]/[dataset]' + default: 'Tier{tier}/{dataset}' RCAST: '/' - input_file: '[short_name]_[mip]_[type]_[dataset]_*.nc' - output_file: '[project]_[mip]_[type]_[dataset]_[short_name]_[start_year]-[end_year]' + input_file: '{short_name}_{mip}_{type}_{dataset}_*.nc' + output_file: '{project}_{mip}_{type}_{dataset}_{short_name}_{start_year}-{end_year}' cmor_type: 'CMIP5' # TODO: add cmor_path and table and set cmor_strict to true EMAC: input_dir: - default: '[dataset]' + default: '{dataset}' input_file: '' - output_file: '[dataset]_[ensemble]_[short_name]_[start_year]-[end_year]' + output_file: '{dataset}_{ensemble}_{short_name}_{start_year}-{end_year}' cmor_type: 'CMIP5' diff --git a/tests/integration/test_recipe.py b/tests/integration/test_recipe.py index f3bbeeb6a1..783e1128c9 100644 --- a/tests/integration/test_recipe.py +++ b/tests/integration/test_recipe.py @@ -93,11 +93,15 @@ def find_files(_, filenames): # Any occurrence of [something] in filename should have # been replaced before this function is called. for filename in filenames: - assert '[' not in filename + assert '{' not in filename filename = filenames[0] filename = str(tmp_path / 'input' / filename) filenames = [] + if filename.endswith('[_.]*nc'): + # Restore when we support filenames with no dates + # filenames.append(filename.replace('[_.]*nc', '.nc')) + filename = filename.replace('[_.]*nc', '_*.nc') if filename.endswith('*.nc'): filename = filename[:-len('*.nc')] + '_' intervals = [ @@ -110,8 +114,8 @@ def find_files(_, filenames): else: filenames.append(filename) - for file in filenames: - create_test_file(file, next(tracking_id)) + for filename in filenames: + create_test_file(filename, next(tracking_id)) return filenames monkeypatch.setattr(esmvalcore._data_finder, 'find_files', find_files)