Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 0 additions & 12 deletions esmvalcore/_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,18 +187,6 @@ def get_institutes(variable):
return CFG.get(project, {}).get('institutes', {}).get(dataset, [])


def replace_mip_fx(fx_file):
"""Replace MIP so to retrieve correct fx files."""
default_mip = 'Amon'
if fx_file not in CFG['CMIP5']['fx_mip_change']:
logger.warning(
'mip for fx variable %s is not specified in '
'config_developer.yml, using default (%s)', fx_file, default_mip)
new_mip = CFG['CMIP5']['fx_mip_change'].get(fx_file, default_mip)
logger.debug("Switching mip for fx file finding to %s", new_mip)
return new_mip


TAGS_CONFIG_FILE = os.path.join(
DIAGNOSTICS_PATH, 'config-references.yml')

Expand Down
56 changes: 19 additions & 37 deletions esmvalcore/_data_finder.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,7 @@

import six

from ._config import get_project_config, replace_mip_fx
from .cmor.table import CMOR_TABLES
from ._config import get_project_config

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -95,7 +94,7 @@ def select_files(filenames, start_year, end_year):
return selection


def _replace_tags(path, variable, fx_var=None):
def _replace_tags(path, variable):
"""Replace tags in the config-developer's file with actual values."""
path = path.strip('/')

Expand All @@ -106,9 +105,7 @@ def _replace_tags(path, variable, fx_var=None):
original_tag = tag
tag, _, _ = _get_caps_options(tag)

if tag == 'fx_var':
replacewith = fx_var
elif tag == 'latestversion': # handled separately later
if tag == 'latestversion': # handled separately later
continue
elif tag in variable:
replacewith = variable[tag]
Expand Down Expand Up @@ -198,16 +195,16 @@ def get_rootpath(rootpath, project):
raise KeyError('default rootpath must be specified in config-user file')


def _find_input_dirs(variable, rootpath, drs, fx_var=None):
def _find_input_dirs(variable, rootpath, drs):
"""Return a the full paths to input directories."""
project = variable['project']

root = get_rootpath(rootpath, project)
input_type = 'input_{}dir'.format('fx_' if fx_var else '')
input_type = 'input_dir'
path_template = _select_drs(input_type, drs, project)

dirnames = []
for dirname_template in _replace_tags(path_template, variable, fx_var):
for dirname_template in _replace_tags(path_template, variable):
for base_path in root:
dirname = os.path.join(base_path, dirname_template)
dirname = _resolve_latestversion(dirname)
Expand All @@ -220,50 +217,35 @@ def _find_input_dirs(variable, rootpath, drs, fx_var=None):
return dirnames


def _get_filenames_glob(variable, drs, fx_var=None):
def _get_filenames_glob(variable, drs):
"""Return patterns that can be used to look for input files."""
input_type = 'input_{}file'.format('fx_' if fx_var else '')
input_type = 'input_file'
path_template = _select_drs(input_type, drs, variable['project'])
filenames_glob = _replace_tags(path_template, variable, fx_var)
filenames_glob = _replace_tags(path_template, variable)
return filenames_glob


def _find_input_files(variable, rootpath, drs, fx_var=None):
logger.debug("Looking for input %sfiles for variable %s of dataset %s",
fx_var + ' fx ' if fx_var else '', variable['short_name'],
variable['dataset'])

input_dirs = _find_input_dirs(variable, rootpath, drs, fx_var)
filenames_glob = _get_filenames_glob(variable, drs, fx_var)
def _find_input_files(variable, rootpath, drs):
input_dirs = _find_input_dirs(variable, rootpath, drs)
filenames_glob = _get_filenames_glob(variable, drs)
files = find_files(input_dirs, filenames_glob)

return files


def get_input_filelist(variable, rootpath, drs):
"""Return the full path to input files."""
# change ensemble to fixed r0i0p0 for fx variables
if variable['project'] == 'CMIP5'and variable['frequency'] == 'fx':
variable['ensemble'] = 'r0i0p0'
files = _find_input_files(variable, rootpath, drs)
files = select_files(files, variable['start_year'], variable['end_year'])
# do time gating only for non-fx variables
if variable['frequency'] != 'fx':
files = select_files(files, variable['start_year'],
variable['end_year'])
return files


def get_input_fx_filelist(variable, rootpath, drs):
"""Return a dict with the full path to fx input files."""
fx_files = {}
for fx_var in variable['fx_files']:
var = dict(variable)
var['mip'] = replace_mip_fx(fx_var)
table = CMOR_TABLES[var['cmor_table']].get_table(var['mip'])
var['frequency'] = table.frequency
realm = getattr(table.get(var['short_name']), 'modeling_realm', None)
var['modeling_realm'] = realm if realm else table.realm

files = _find_input_files(var, rootpath, drs, fx_var)
fx_files[fx_var] = files[0] if files else None

return fx_files


def get_output_file(variable, preproc_dir):
"""Return the full path to the output (preprocessed) file."""
cfg = get_project_config(variable['project'])
Expand Down
68 changes: 57 additions & 11 deletions esmvalcore/_recipe.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,14 @@
from copy import deepcopy

import yaml

from netCDF4 import Dataset

from . import __version__
from . import _recipe_checks as check
from . import __version__
from ._config import TAGS, get_institutes, replace_tags
from ._data_finder import (get_input_filelist, get_input_fx_filelist,
get_output_file, get_statistic_output_file)
from ._data_finder import (get_input_filelist, get_output_file,
get_statistic_output_file)
from ._provenance import TrackedFile, get_recipe_provenance
from ._recipe_checks import RecipeError
from ._task import (DiagnosticTask, get_flattened_tasks, get_independent_tasks,
Expand Down Expand Up @@ -361,6 +362,45 @@ def _get_default_settings(variable, config_user, derive=False):
return settings


def get_input_fx_filelist(variable, rootpath, drs):
"""Return a dict with fx vars keys and full file paths values."""
fx_files_dict = {}
for fx_var_dict in variable['fx_files']:
fx_var = _add_fxvar_keys(fx_var_dict, variable)
fx_files = get_input_filelist(
variable=fx_var,
rootpath=rootpath,
drs=drs)
if fx_files:
fx_files_dict[fx_var['short_name']] = fx_files[0]
else:
fx_files_dict[fx_var['short_name']] = None

return fx_files_dict


def _add_fxvar_keys(fx_var_dict, variable):
"""Add keys specific to fx variable to use get_input_filelist."""
fx_variable = dict(variable)

# set variable names
fx_variable['variable_group'] = fx_var_dict['short_name']
fx_variable['short_name'] = fx_var_dict['short_name']

# specificities of project
if fx_variable['project'] == 'CMIP5':
fx_variable['mip'] = 'fx'
fx_variable['ensemble'] = 'r0i0p0'
elif fx_variable['project'] == 'CMIP6':
fx_variable['grid'] = variable['grid']
if 'mip' in fx_var_dict:
fx_variable['mip'] = fx_var_dict['mip']
# add missing cmor info
_add_cmor_info(fx_variable, override=True)

return fx_variable


def _update_fx_settings(settings, variable, config_user):
"""Find and set the FX derive/mask settings."""
# update for derive
Expand All @@ -384,7 +424,7 @@ def _update_fx_settings(settings, variable, config_user):
settings['mask_landsea']['fx_files'] = []

var = dict(variable)
var['fx_files'] = ['sftlf', 'sftof']
var['fx_files'] = [{'short_name': 'sftlf'}, {'short_name': 'sftof'}]
fx_files_dict = get_input_fx_filelist(
variable=var,
rootpath=config_user['rootpath'],
Expand All @@ -402,7 +442,7 @@ def _update_fx_settings(settings, variable, config_user):
settings['mask_landseaice']['fx_files'] = []

var = dict(variable)
var['fx_files'] = ['sftgif']
var['fx_files'] = [{'short_name': 'sftgif'}]
fx_files_dict = get_input_fx_filelist(
variable=var,
rootpath=config_user['rootpath'],
Expand All @@ -415,11 +455,13 @@ def _update_fx_settings(settings, variable, config_user):

for step in ('area_statistics', 'volume_statistics'):
if settings.get(step, {}).get('fx_files'):
settings[step]['fx_files'] = get_input_fx_filelist(
variable=variable,
var = dict(variable)
var['fx_files'] = settings.get(step, {}).get('fx_files')
fx_files_dict = get_input_fx_filelist(
variable=var,
rootpath=config_user['rootpath'],
drs=config_user['drs'],
)
drs=config_user['drs'])
settings[step]['fx_files'] = fx_files_dict


def _read_attributes(filename):
Expand Down Expand Up @@ -772,6 +814,10 @@ def _get_preprocessor_task(variables, profiles, config_user, task_name):
name=derive_name)
derive_tasks.append(task)

# don't do time gating for fx variables
if variables[0]['frequency'] == 'fx':
profile['extract_time'] = False

# Create (final) preprocessor task
task = _get_single_preprocessor_task(
variables,
Expand Down Expand Up @@ -905,8 +951,8 @@ def _initialize_variables(self, raw_variable, raw_datasets):
variable['institute'] = institute
check.variable(variable, required_keys)
if 'fx_files' in variable:
for fx_file in variable['fx_files']:
DATASET_KEYS.add(fx_file)
for fx_file_dict in variable['fx_files']:
DATASET_KEYS.add(fx_file_dict['short_name'])
# Get the fx files
variable['fx_files'] = get_input_fx_filelist(
variable=variable,
Expand Down
21 changes: 12 additions & 9 deletions esmvalcore/_recipe_checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,15 +97,18 @@ def data_availability(input_files, var):

required_years = set(range(var['start_year'], var['end_year'] + 1))
available_years = set()
for filename in input_files:
start, end = get_start_end_year(filename)
available_years.update(range(start, end + 1))

missing_years = required_years - available_years
if missing_years:
raise RecipeError(
"No input data available for years {} in files {}".format(
", ".join(str(year) for year in missing_years), input_files))
# check time avail only for non-fx variables
if var['frequency'] != 'fx':
for filename in input_files:
start, end = get_start_end_year(filename)
available_years.update(range(start, end + 1))

missing_years = required_years - available_years
if missing_years:
raise RecipeError(
"No input data available for years {} in files {}".format(
", ".join(str(year) for year in missing_years),
input_files))


def tasks_valid(tasks):
Expand Down
6 changes: 4 additions & 2 deletions esmvalcore/cmor/check.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,13 +97,15 @@ def check_metadata(self, logger=None):
self._check_fill_value()
self._check_dim_names()
self._check_coords()
self._check_time_coord()
if self.frequency != 'fx':
self._check_time_coord()
self._check_rank()

self.report_warnings(logger)
self.report_errors()

self._add_auxiliar_time_coordinates()
if self.frequency != 'fx':
self._add_auxiliar_time_coordinates()
return self._cube

def report_errors(self):
Expand Down
31 changes: 2 additions & 29 deletions esmvalcore/config-developer.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ CMIP6:
BADC: '[institute]/[dataset]/[exp]/[ensemble]/[mip]/[short_name]/[grid]/[latestversion]'
DKRZ: '[institute]/[dataset]/[exp]/[ensemble]/[mip]/[short_name]/[grid]/[latestversion]'
ETHZ: '[exp]/[mip]/[short_name]/[dataset]/[ensemble]/[grid]/'
input_file: '[short_name]_[mip]_[dataset]_[exp]_[ensemble]_[grid]_*.nc'
input_file: '[short_name]_[mip]_[dataset]_[exp]_[ensemble]_[grid]*.nc'
output_file: '[project]_[dataset]_[mip]_[exp]_[ensemble]_[short_name]_[start_year]-[end_year]'
cmor_type: 'CMIP6'
institutes:
Expand Down Expand Up @@ -147,26 +147,7 @@ CMIP5:
ETHZ: '[exp]/[mip]/[short_name]/[dataset]/[ensemble]/'
SMHI: '[dataset]/[ensemble]/[exp]/[frequency]'
BSC: '[type]/[project]/[exp]/[dataset.lower]'
input_file: '[short_name]_[mip]_[dataset]_[exp]_[ensemble]_*.nc'
input_fx_dir:
default: '/'
BADC: '[institute]/[dataset]/[exp]/fx/[modeling_realm]/fx/r0i0p0/[latestversion]/[fx_var]'
CP4CDS: '[institute]/[dataset]/[exp]/fx/[modeling_realm]/fx/r0i0p0/[fx_var]/latest/'
DKRZ: '[institute]/[dataset]/[exp]/fx/[modeling_realm]/fx/r0i0p0/[latestversion]/[fx_var]'
ETHZ: '[exp]/fx/[fx_var]/[dataset]/r0i0p0'
input_fx_file: '[fx_var]_fx_[dataset]_[exp]_r0i0p0.nc'
fx_mip_change:
'areacella': 'Amon'
'areacello': 'Omon'
'basin': 'Omon'
'deptho': 'Omon'
'mrsofc': 'Lmon'
'orog': 'Amon'
'rootd': 'Lmon'
'sftgif': 'Lmon'
'sftlf': 'Amon'
'sftof': 'Omon'
'volcello': 'Omon'
input_file: '[short_name]_[mip]_[dataset]_[exp]_[ensemble]*.nc'
output_file: '[project]_[dataset]_[mip]_[exp]_[ensemble]_[short_name]_[start_year]-[end_year]'
institutes:
'ACCESS1-0': ['CSIRO-BOM']
Expand Down Expand Up @@ -239,10 +220,6 @@ OBS:
input_file:
default: '[project]_[dataset]_[type]_[version]_[mip]_[short_name]_*.nc'
BSC: '[short_name]_*.nc'
input_fx_dir:
default: 'Tier[tier]/[dataset]'
input_fx_file:
default: '[project]_[dataset]_[type]_[version]_fx_[fx_var].nc'
output_file: '[project]_[dataset]_[type]_[version]_[mip]_[short_name]_[start_year]-[end_year]'
cmor_type: 'CMIP5'

Expand All @@ -251,10 +228,6 @@ obs4mips:
input_dir:
default: 'Tier[tier]/[dataset]'
input_file: '[short_name]_[dataset]_[level]_[version]_*.nc'
input_fx_dir:
default: 'Tier[tier]/[dataset]'
input_fx_file:
default: '[project]_[dataset]_fx_[fx_var].nc'
output_file: '[project]_[dataset]_[level]_[version]_[short_name]_[start_year]-[end_year]'
cmor_type: 'CMIP6'
cmor_path: 'obs4mips'
Expand Down
2 changes: 1 addition & 1 deletion esmvalcore/preprocessor/_derive/nbp_grid.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ class DerivedVariable(DerivedVariableBase):
# Required variables
required = [{
'short_name': 'nbp',
'fx_files': ['sftlf'],
'fx_files': [{'short_name': 'sftlf'}],
}]

@staticmethod
Expand Down
2 changes: 1 addition & 1 deletion tests/integration/preprocessor/_derive/test_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def test_get_required_with_fx():

reference = [{
'short_name': 'nbp',
'fx_files': ['sftlf'],
'fx_files': [{'short_name': 'sftlf'}],
}]

assert variables == reference
Expand Down
11 changes: 9 additions & 2 deletions tests/integration/test_data_finder.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,11 +103,18 @@ def test_get_input_fx_filelist(root, cfg):
# Find files
rootpath = {cfg['variable']['project']: [root]}
drs = {cfg['variable']['project']: cfg['drs']}
fx_files = get_input_fx_filelist(cfg['variable'], rootpath, drs)
cfg['variable']['fx_files'] = [
{'short_name': short_name} for short_name
in cfg['variable']['fx_files']
]
fx_files_dict = get_input_fx_filelist(
variable=cfg['variable'],
rootpath=rootpath,
drs=drs)

# Test result
reference = {
fx_var: os.path.join(root, filename) if filename else None
for fx_var, filename in cfg['found_files'].items()
}
assert fx_files == reference
assert fx_files_dict == reference
Loading