-
Notifications
You must be signed in to change notification settings - Fork 48
Development fx restructured #21
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
5068594
909e34f
1bbf364
3e63424
33b9066
5614f7c
475cd8f
0ee4a72
6989951
988d09c
775715c
32b6376
d50465c
2ff537d
62922ed
b36f312
1522c89
cf05187
e799ec8
1a4a766
7ab878f
0fc4392
6f51e4b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -9,8 +9,7 @@ | |
| import os | ||
| import re | ||
|
|
||
| from ._config import get_project_config, replace_mip_fx | ||
| from .cmor.table import CMOR_TABLES | ||
| from ._config import get_project_config | ||
|
|
||
| logger = logging.getLogger(__name__) | ||
|
|
||
|
|
@@ -93,7 +92,7 @@ def select_files(filenames, start_year, end_year): | |
| return selection | ||
|
|
||
|
|
||
| def _replace_tags(path, variable, fx_var=None): | ||
| def _replace_tags(path, variable): | ||
| """Replace tags in the config-developer's file with actual values.""" | ||
| path = path.strip('/') | ||
|
|
||
|
|
@@ -104,9 +103,7 @@ def _replace_tags(path, variable, fx_var=None): | |
| original_tag = tag | ||
| tag, _, _ = _get_caps_options(tag) | ||
|
|
||
| if tag == 'fx_var': | ||
| replacewith = fx_var | ||
| elif tag == 'latestversion': # handled separately later | ||
| if tag == 'latestversion': # handled separately later | ||
| continue | ||
| elif tag in variable: | ||
| replacewith = variable[tag] | ||
|
|
@@ -196,16 +193,15 @@ def get_rootpath(rootpath, project): | |
| raise KeyError('default rootpath must be specified in config-user file') | ||
|
|
||
|
|
||
| def _find_input_dirs(variable, rootpath, drs, fx_var=None): | ||
| def _find_input_dirs(variable, rootpath, drs): | ||
| """Return a the full paths to input directories.""" | ||
| project = variable['project'] | ||
|
|
||
| root = get_rootpath(rootpath, project) | ||
| input_type = 'input_{}dir'.format('fx_' if fx_var else '') | ||
| path_template = _select_drs(input_type, drs, project) | ||
| path_template = _select_drs('input_dir', drs, project) | ||
|
|
||
| dirnames = [] | ||
| for dirname_template in _replace_tags(path_template, variable, fx_var): | ||
| for dirname_template in _replace_tags(path_template, variable): | ||
| for base_path in root: | ||
| dirname = os.path.join(base_path, dirname_template) | ||
| dirname = _resolve_latestversion(dirname) | ||
|
|
@@ -218,50 +214,34 @@ def _find_input_dirs(variable, rootpath, drs, fx_var=None): | |
| return dirnames | ||
|
|
||
|
|
||
| def _get_filenames_glob(variable, drs, fx_var=None): | ||
| def _get_filenames_glob(variable, drs): | ||
| """Return patterns that can be used to look for input files.""" | ||
| input_type = 'input_{}file'.format('fx_' if fx_var else '') | ||
| path_template = _select_drs(input_type, drs, variable['project']) | ||
| filenames_glob = _replace_tags(path_template, variable, fx_var) | ||
| path_template = _select_drs('input_file', drs, variable['project']) | ||
| filenames_glob = _replace_tags(path_template, variable) | ||
| return filenames_glob | ||
|
|
||
|
|
||
| def _find_input_files(variable, rootpath, drs, fx_var=None): | ||
| logger.debug("Looking for input %sfiles for variable %s of dataset %s", | ||
| fx_var + ' fx ' if fx_var else '', variable['short_name'], | ||
| variable['dataset']) | ||
|
|
||
| input_dirs = _find_input_dirs(variable, rootpath, drs, fx_var) | ||
| filenames_glob = _get_filenames_glob(variable, drs, fx_var) | ||
| def _find_input_files(variable, rootpath, drs): | ||
| input_dirs = _find_input_dirs(variable, rootpath, drs) | ||
| filenames_glob = _get_filenames_glob(variable, drs) | ||
| files = find_files(input_dirs, filenames_glob) | ||
|
|
||
| return files | ||
|
|
||
|
|
||
| def get_input_filelist(variable, rootpath, drs): | ||
| """Return the full path to input files.""" | ||
| # change ensemble to fixed r0i0p0 for fx variables | ||
| if variable['project'] == 'CMIP5'and variable['frequency'] == 'fx': | ||
| variable['ensemble'] = 'r0i0p0' | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You're already setting the ensemble in _recipe.py, so I think there is no need to do it again here
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ermm, I don't think that gets picked up for all the cases, lemme test
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Have you had a chance to do this yet?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. not yet 😢 |
||
| files = _find_input_files(variable, rootpath, drs) | ||
| files = select_files(files, variable['start_year'], variable['end_year']) | ||
| # do time gating only for non-fx variables | ||
| if variable['frequency'] != 'fx': | ||
| files = select_files(files, variable['start_year'], | ||
| variable['end_year']) | ||
| return files | ||
|
|
||
|
|
||
| def get_input_fx_filelist(variable, rootpath, drs): | ||
| """Return a dict with the full path to fx input files.""" | ||
| fx_files = {} | ||
| for fx_var in variable['fx_files']: | ||
| var = dict(variable) | ||
| var['mip'] = replace_mip_fx(fx_var) | ||
| table = CMOR_TABLES[var['cmor_table']].get_table(var['mip']) | ||
| var['frequency'] = table.frequency | ||
| realm = getattr(table.get(var['short_name']), 'modeling_realm', None) | ||
| var['modeling_realm'] = realm if realm else table.realm | ||
|
|
||
| files = _find_input_files(var, rootpath, drs, fx_var) | ||
| fx_files[fx_var] = files[0] if files else None | ||
|
|
||
| return fx_files | ||
|
|
||
|
|
||
| def get_output_file(variable, preproc_dir): | ||
| """Return the full path to the output (preprocessed) file.""" | ||
| cfg = get_project_config(variable['project']) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -6,13 +6,14 @@ | |
| from copy import deepcopy | ||
|
|
||
| import yaml | ||
|
|
||
| from netCDF4 import Dataset | ||
|
|
||
| from . import __version__ | ||
| from . import _recipe_checks as check | ||
| from . import __version__ | ||
| from ._config import TAGS, get_institutes, replace_tags | ||
| from ._data_finder import (get_input_filelist, get_input_fx_filelist, | ||
| get_output_file, get_statistic_output_file) | ||
| from ._data_finder import (get_input_filelist, get_output_file, | ||
| get_statistic_output_file) | ||
| from ._provenance import TrackedFile, get_recipe_provenance | ||
| from ._recipe_checks import RecipeError | ||
| from ._task import (DiagnosticTask, get_flattened_tasks, get_independent_tasks, | ||
|
|
@@ -361,6 +362,45 @@ def _get_default_settings(variable, config_user, derive=False): | |
| return settings | ||
|
|
||
|
|
||
| def get_input_fx_filelist(variable, rootpath, drs): | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this function should be removed altogether, it looks like it migrated here from _data_finder.py
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. reply below in the main body of the issue |
||
| """Return a dict with fx vars keys and full file paths values.""" | ||
| fx_files_dict = {} | ||
| for fx_var_dict in variable['fx_files']: | ||
| fx_var = _add_fxvar_keys(fx_var_dict, variable) | ||
| fx_files = get_input_filelist( | ||
| variable=fx_var, | ||
| rootpath=rootpath, | ||
| drs=drs) | ||
| if fx_files: | ||
| fx_files_dict[fx_var['short_name']] = fx_files[0] | ||
| else: | ||
| fx_files_dict[fx_var['short_name']] = None | ||
|
|
||
| return fx_files_dict | ||
|
|
||
|
|
||
| def _add_fxvar_keys(fx_var_dict, variable): | ||
| """Add keys specific to fx variable to use get_input_filelist.""" | ||
| fx_variable = dict(variable) | ||
|
|
||
| # set variable names | ||
| fx_variable['variable_group'] = fx_var_dict['short_name'] | ||
| fx_variable['short_name'] = fx_var_dict['short_name'] | ||
|
|
||
| # specificities of project | ||
| if fx_variable['project'] == 'CMIP5': | ||
| fx_variable['mip'] = 'fx' | ||
| fx_variable['ensemble'] = 'r0i0p0' | ||
| elif fx_variable['project'] == 'CMIP6': | ||
| fx_variable['grid'] = variable['grid'] | ||
| if 'mip' in fx_var_dict: | ||
| fx_variable['mip'] = fx_var_dict['mip'] | ||
| # add missing cmor info | ||
| _add_cmor_info(fx_variable, override=True) | ||
|
|
||
| return fx_variable | ||
|
|
||
|
|
||
| def _update_fx_settings(settings, variable, config_user): | ||
| """Find and set the FX derive/mask settings.""" | ||
| # update for derive | ||
|
|
@@ -384,7 +424,7 @@ def _update_fx_settings(settings, variable, config_user): | |
| settings['mask_landsea']['fx_files'] = [] | ||
|
|
||
| var = dict(variable) | ||
| var['fx_files'] = ['sftlf', 'sftof'] | ||
| var['fx_files'] = [{'short_name': 'sftlf'}, {'short_name': 'sftof'}] | ||
| fx_files_dict = get_input_fx_filelist( | ||
| variable=var, | ||
| rootpath=config_user['rootpath'], | ||
|
|
@@ -402,7 +442,7 @@ def _update_fx_settings(settings, variable, config_user): | |
| settings['mask_landseaice']['fx_files'] = [] | ||
|
|
||
| var = dict(variable) | ||
| var['fx_files'] = ['sftgif'] | ||
| var['fx_files'] = [{'short_name': 'sftgif'}] | ||
| fx_files_dict = get_input_fx_filelist( | ||
| variable=var, | ||
| rootpath=config_user['rootpath'], | ||
|
|
@@ -415,11 +455,13 @@ def _update_fx_settings(settings, variable, config_user): | |
|
|
||
| for step in ('area_statistics', 'volume_statistics'): | ||
| if settings.get(step, {}).get('fx_files'): | ||
| settings[step]['fx_files'] = get_input_fx_filelist( | ||
| variable=variable, | ||
| var = dict(variable) | ||
| var['fx_files'] = settings.get(step, {}).get('fx_files') | ||
| fx_files_dict = get_input_fx_filelist( | ||
| variable=var, | ||
| rootpath=config_user['rootpath'], | ||
| drs=config_user['drs'], | ||
| ) | ||
| drs=config_user['drs']) | ||
| settings[step]['fx_files'] = fx_files_dict | ||
|
|
||
|
|
||
| def _read_attributes(filename): | ||
|
|
@@ -772,6 +814,10 @@ def _get_preprocessor_task(variables, profiles, config_user, task_name): | |
| name=derive_name) | ||
| derive_tasks.append(task) | ||
|
|
||
| # don't do time gating for fx variables | ||
| if variables[0]['frequency'] == 'fx': | ||
| profile['extract_time'] = False | ||
|
|
||
| # Create (final) preprocessor task | ||
| task = _get_single_preprocessor_task( | ||
| variables, | ||
|
|
@@ -905,8 +951,8 @@ def _initialize_variables(self, raw_variable, raw_datasets): | |
| variable['institute'] = institute | ||
| check.variable(variable, required_keys) | ||
| if 'fx_files' in variable: | ||
| for fx_file in variable['fx_files']: | ||
| DATASET_KEYS.add(fx_file) | ||
| for fx_file_dict in variable['fx_files']: | ||
| DATASET_KEYS.add(fx_file_dict['short_name']) | ||
| # Get the fx files | ||
| variable['fx_files'] = get_input_fx_filelist( | ||
| variable=variable, | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
whoops