Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 11 additions & 10 deletions esmvalcore/_data_finder.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import logging
import os
import re
import glob

from ._config import get_project_config

Expand Down Expand Up @@ -95,9 +96,7 @@ def select_files(filenames, start_year, end_year):
def _replace_tags(path, variable):
"""Replace tags in the config-developer's file with actual values."""
path = path.strip('/')

tlist = re.findall(r'\[([^]]*)\]', path)

tlist = re.findall(r'{([^}]*)}', path)
paths = [path]
for tag in tlist:
original_tag = tag
Expand All @@ -112,7 +111,6 @@ def _replace_tags(path, variable):
"your recipe entry".format(tag, variable))

paths = _replace_tag(paths, original_tag, replacewith)

return paths


Expand All @@ -125,7 +123,7 @@ def _replace_tag(paths, tag, replacewith):
result.extend(_replace_tag(paths, tag, item))
else:
text = _apply_caps(str(replacewith), lower, upper)
result.extend(p.replace('[' + tag + ']', text) for p in paths)
result.extend(p.replace('{' + tag + '}', text) for p in paths)
return result


Expand All @@ -151,11 +149,11 @@ def _apply_caps(original, lower, upper):

def _resolve_latestversion(dirname_template):
"""Resolve the 'latestversion' tag."""
if '[latestversion]' not in dirname_template:
if '{latestversion}' not in dirname_template:
return dirname_template

# Find latest version
part1, part2 = dirname_template.split('[latestversion]')
part1, part2 = dirname_template.split('{latestversion}')
part2 = part2.lstrip(os.sep)
if os.path.exists(part1):
versions = os.listdir(part1)
Expand Down Expand Up @@ -205,9 +203,12 @@ def _find_input_dirs(variable, rootpath, drs):
for base_path in root:
dirname = os.path.join(base_path, dirname_template)
dirname = _resolve_latestversion(dirname)
if os.path.exists(dirname):
logger.debug("Found %s", dirname)
dirnames.append(dirname)
matches = glob.glob(dirname)
matches = [match for match in matches if os.path.isdir(match)]
if matches:
for match in matches:
logger.debug("Found %s", match)
dirnames.append(match)
else:
logger.debug("Skipping non-existent %s", dirname)

Expand Down
72 changes: 34 additions & 38 deletions esmvalcore/config-developer.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,26 +22,26 @@ CMIP6:
cmor_strict: true
input_dir:
default: '/'
BADC: '[activity]/[institute]/[dataset]/[exp]/[ensemble]/[mip]/[short_name]/[grid]/[latestversion]'
DKRZ: '[activity]/[institute]/[dataset]/[exp]/[ensemble]/[mip]/[short_name]/[grid]/[latestversion]'
ETHZ: '[exp]/[mip]/[short_name]/[dataset]/[ensemble]/[grid]/'
input_file: '[short_name]_[mip]_[dataset]_[exp]_[ensemble]_[grid]*.nc'
output_file: '[project]_[dataset]_[mip]_[exp]_[ensemble]_[short_name]_[start_year]-[end_year]'
BADC: '{activity}/{institute}/{dataset}/{exp}/{ensemble}/{mip}/{short_name}/{grid}/{latestversion}'
DKRZ: '{activity}/{institute}/{dataset}/{exp}/{ensemble}/{mip}/{short_name}/{grid}/{latestversion}'
ETHZ: '{exp}/{mip}/{short_name}/{dataset}/{ensemble}/{grid}/'
input_file: '{short_name}_{mip}_{dataset}_{exp}_{ensemble}_{grid}*.nc'
output_file: '{project}_{dataset}_{mip}_{exp}_{ensemble}_{short_name}_{start_year}-{end_year}'
cmor_type: 'CMIP6'

CMIP5:
cmor_strict: true
input_dir:
default: '/'
BADC: '[institute]/[dataset]/[exp]/[frequency]/[modeling_realm]/[mip]/[ensemble]/[latestversion]/[short_name]'
CP4CDS: '[institute]/[dataset]/[exp]/[frequency]/[modeling_realm]/[mip]/[ensemble]/[short_name]/latest/'
DKRZ: '[institute]/[dataset]/[exp]/[frequency]/[modeling_realm]/[mip]/[ensemble]/[latestversion]/[short_name]'
ETHZ: '[exp]/[mip]/[short_name]/[dataset]/[ensemble]/'
SMHI: '[dataset]/[ensemble]/[exp]/[frequency]'
RCAST: '[exp]/[mip]/[short_name]/[dataset]/[ensemble]/'
BSC: '[type]/[project]/[exp]/[dataset.lower]'
input_file: '[short_name]_[mip]_[dataset]_[exp]_[ensemble]*.nc'
output_file: '[project]_[dataset]_[mip]_[exp]_[ensemble]_[short_name]_[start_year]-[end_year]'
BADC: '{institute}/{dataset}/{exp}/{frequency}/{modeling_realm}/{mip}/{ensemble}/latest/{short_name}'
CP4CDS: '{institute}/{dataset}/{exp}/{frequency}/{modeling_realm}/{mip}/{ensemble}/{short_name}/latest/'
DKRZ: '{institute}/{dataset}/{exp}/{frequency}/{modeling_realm}/{mip}/{ensemble}/{latestversion}/{short_name}'
ETHZ: '{exp}/{mip}/{short_name}/{dataset}/{ensemble}/'
SMHI: '{dataset}/{ensemble}/{exp}/{frequency}'
RCAST: '{exp}/{mip}/{short_name}/{dataset}/{ensemble}/'
BSC: '{type}/{project}/{exp}/{dataset.lower}'
input_file: '{short_name}_{mip}_{dataset}_{exp}_{ensemble}*.nc'
output_file: '{project}_{dataset}_{mip}_{exp}_{ensemble}_{short_name}_{start_year}-{end_year}'
institutes:
'ACCESS1-0': ['CSIRO-BOM']
'ACCESS1-3': ['CSIRO-BOM']
Expand Down Expand Up @@ -108,55 +108,51 @@ CMIP5:
OBS:
cmor_strict: false
input_dir:
default: 'Tier[tier]/[dataset]'
BSC: '[type]/[institute.lower]/[dataset.lower]/[freq_folder]/[short_name][freq_base]'
RCAST: '[dataset]'
default: 'Tier{tier}/{dataset}'
BSC: '{type}/{institute.lower}/{dataset.lower}/{freq_folder}/{short_name}{freq_base}'
RCAST: '{dataset}'
input_file:
default: '[project]_[dataset]_[type]_[version]_[mip]_[short_name]_*.nc'
BSC: '[short_name]_*.nc'
RCAST: '[short_name]_[mip]_[type]_[dataset]_*.nc'
output_file: '[project]_[dataset]_[type]_[version]_[mip]_[short_name]_[start_year]-[end_year]'
default: '{project}_{dataset}_{type}_{version}_{mip}_{short_name}[_.]*nc'
BSC: '{short_name}_*.nc'
RCAST: '{short_name}_{mip}_{type}_{dataset}_*.nc'
output_file: '{project}_{dataset}_{type}_{version}_{mip}_{short_name}_{start_year}-{end_year}'
cmor_type: 'CMIP5'

OBS6:
cmor_strict: false
input_dir:
default: 'Tier[tier]/[dataset]'
BSC: '[type]/[institute.lower]/[dataset.lower]/[freq_folder]/[short_name][freq_base]'
default: 'Tier{tier}/{dataset}'
BSC: '{type}/{institute.lower}/{dataset.lower}/{freq_folder}/{short_name}{freq_base}'
input_file:
default: '[project]_[dataset]_[type]_[version]_[mip]_[short_name]_*.nc'
BSC: '[short_name]_*.nc'
input_fx_dir:
default: 'Tier[tier]/[dataset]'
input_fx_file:
default: '[project]_[dataset]_[type]_[version]_fx_[fx_var].nc'
output_file: '[project]_[dataset]_[type]_[version]_[mip]_[short_name]_[start_year]-[end_year]'
default: '{project}_{dataset}_{type}_{version}_{mip}_{short_name}_*.nc'
BSC: '{short_name}_*.nc'
output_file: '{project}_{dataset}_{type}_{version}_{mip}_{short_name}_{start_year}-{end_year}'
cmor_type: 'CMIP6'

obs4mips:
cmor_strict: true
input_dir:
default: 'Tier[tier]/[dataset]'
default: 'Tier{tier}/{dataset}'
RCAST: '/'
input_file: '[short_name]_[dataset]_[level]_[version]_*.nc'
output_file: '[project]_[dataset]_[level]_[version]_[short_name]_[start_year]-[end_year]'
input_file: '{short_name}_{dataset}_{level}_{version}_*.nc'
output_file: '{project}_{dataset}_{level}_{version}_{short_name}_{start_year}-{end_year}'
cmor_type: 'CMIP6'
cmor_path: 'obs4mips'
cmor_default_table_prefix: 'obs4MIPs_'

ana4mips:
cmor_strict: false
input_dir:
default: 'Tier[tier]/[dataset]'
default: 'Tier{tier}/{dataset}'
RCAST: '/'
input_file: '[short_name]_[mip]_[type]_[dataset]_*.nc'
output_file: '[project]_[mip]_[type]_[dataset]_[short_name]_[start_year]-[end_year]'
input_file: '{short_name}_{mip}_{type}_{dataset}_*.nc'
output_file: '{project}_{mip}_{type}_{dataset}_{short_name}_{start_year}-{end_year}'
cmor_type: 'CMIP5'
# TODO: add cmor_path and table and set cmor_strict to true

EMAC:
input_dir:
default: '[dataset]'
default: '{dataset}'
input_file: ''
output_file: '[dataset]_[ensemble]_[short_name]_[start_year]-[end_year]'
output_file: '{dataset}_{ensemble}_{short_name}_{start_year}-{end_year}'
cmor_type: 'CMIP5'
10 changes: 7 additions & 3 deletions tests/integration/test_recipe.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,11 +93,15 @@ def find_files(_, filenames):
# Any occurrence of [something] in filename should have
# been replaced before this function is called.
for filename in filenames:
assert '[' not in filename
assert '{' not in filename

filename = filenames[0]
filename = str(tmp_path / 'input' / filename)
filenames = []
if filename.endswith('[_.]*nc'):
# Restore when we support filenames with no dates
# filenames.append(filename.replace('[_.]*nc', '.nc'))
filename = filename.replace('[_.]*nc', '_*.nc')
if filename.endswith('*.nc'):
filename = filename[:-len('*.nc')] + '_'
intervals = [
Expand All @@ -110,8 +114,8 @@ def find_files(_, filenames):
else:
filenames.append(filename)

for file in filenames:
create_test_file(file, next(tracking_id))
for filename in filenames:
create_test_file(filename, next(tracking_id))
return filenames

monkeypatch.setattr(esmvalcore._data_finder, 'find_files', find_files)
Expand Down