From 73c69294744a62f783fb9836363d8648622bc08e Mon Sep 17 00:00:00 2001 From: Manuel Schlund Date: Wed, 1 Mar 2023 14:33:05 +0100 Subject: [PATCH 01/13] Used temporary dirs as fixed files dirs --- esmvalcore/_main.py | 26 +++++++++++++++++++++---- esmvalcore/_recipe/recipe.py | 11 ----------- esmvalcore/cmor/_fixes/fix.py | 19 +++++++++++------- esmvalcore/config/_config_object.py | 6 ++++++ esmvalcore/dataset.py | 21 +++++++++++++++++++- tests/integration/recipe/test_recipe.py | 20 +++---------------- tests/unit/main/test_esmvaltool.py | 16 +++++++++++++++ tests/unit/recipe/test_recipe.py | 1 - tests/unit/test_dataset.py | 25 ++++++++++++++++++++++++ 9 files changed, 104 insertions(+), 41 deletions(-) diff --git a/esmvalcore/_main.py b/esmvalcore/_main.py index fb304deb2b..84bb0cc0b9 100755 --- a/esmvalcore/_main.py +++ b/esmvalcore/_main.py @@ -438,10 +438,28 @@ def _run(self, recipe: Path, session) -> None: def _clean_preproc(session): import shutil - if session["remove_preproc_dir"] and session.preproc_dir.exists(): - logger.info("Removing preproc containing preprocessed data") - logger.info("If this data is further needed, then") - logger.info("set remove_preproc_dir to false in config-user.yml") + if (not session['save_intermediary_cubes'] and + session.fixed_file_dir.exists()): + logger.debug( + "Removing `preproc/fixed_files` directory containing fixed " + "data" + ) + logger.debug( + "If this data is further needed, then set " + "`save_intermediary_cubes` to `true` and `remove_preproc_dir` " + "to `false` in your user configuration file" + ) + shutil.rmtree(session.fixed_file_dir) + + if session['remove_preproc_dir'] and session.preproc_dir.exists(): + logger.info( + "Removing `preproc` directory containing preprocessed data" + ) + logger.info( + "If this data is further needed, then set " + "`remove_preproc_dir` to `false` in your user configuration " + "file" + ) shutil.rmtree(session.preproc_dir) @staticmethod diff --git a/esmvalcore/_recipe/recipe.py b/esmvalcore/_recipe/recipe.py index baac585026..686bcef665 100644 --- a/esmvalcore/_recipe/recipe.py +++ b/esmvalcore/_recipe/recipe.py @@ -223,17 +223,6 @@ def _get_default_settings(dataset): 'units': facets['units'], } - # Clean up fixed files - if not session['save_intermediary_cubes']: - fix_dirs = [] - for item in [dataset] + dataset.supplementaries: - output_file = _get_output_file(item.facets, session.preproc_dir) - fix_dir = f"{output_file.with_suffix('')}_fixed" - fix_dirs.append(fix_dir) - settings['cleanup'] = { - 'remove': fix_dirs, - } - # Strip supplementary variables before saving settings['remove_supplementary_variables'] = {} diff --git a/esmvalcore/cmor/_fixes/fix.py b/esmvalcore/cmor/_fixes/fix.py index db41dc3ff8..c9a0c89559 100644 --- a/esmvalcore/cmor/_fixes/fix.py +++ b/esmvalcore/cmor/_fixes/fix.py @@ -1,7 +1,8 @@ """Contains the base class for dataset fixes.""" +from __future__ import annotations + import importlib import inspect -import os from pathlib import Path from ..table import CMOR_TABLES @@ -197,14 +198,17 @@ def get_fixes(project, dataset, mip, short_name, extra_facets=None): return fixes @staticmethod - def get_fixed_filepath(output_dir, filepath): + def get_fixed_filepath( + output_dir: str | Path, + filepath: str | Path, + ) -> str: """Get the filepath for the fixed file. Parameters ---------- - output_dir: str + output_dir: str or Path Output directory. - filepath: str + filepath: str or Path Original path. Returns @@ -212,6 +216,7 @@ def get_fixed_filepath(output_dir, filepath): str Path to the fixed file. """ - if not os.path.isdir(output_dir): - os.makedirs(output_dir) - return os.path.join(output_dir, os.path.basename(filepath)) + output_dir = Path(output_dir) + output_dir.mkdir(parents=True, exist_ok=True) + filepath = Path(filepath) + return str(output_dir / filepath.name) diff --git a/esmvalcore/config/_config_object.py b/esmvalcore/config/_config_object.py index 1d2c94e1e9..54c1900f72 100644 --- a/esmvalcore/config/_config_object.py +++ b/esmvalcore/config/_config_object.py @@ -147,6 +147,7 @@ class Session(ValidatedConfig): _validate = _validators relative_preproc_dir = Path('preproc') + relative_fixed_file_dir = Path('preproc', 'fixed_files') relative_work_dir = Path('work') relative_plot_dir = Path('plots') relative_run_dir = Path('run') @@ -177,6 +178,11 @@ def preproc_dir(self): """Return preproc directory.""" return self.session_dir / self.relative_preproc_dir + @property + def fixed_file_dir(self): + """Return fixed file directory.""" + return self.session_dir / self.relative_fixed_file_dir + @property def work_dir(self): """Return work directory.""" diff --git a/esmvalcore/dataset.py b/esmvalcore/dataset.py index ca95241d5e..c2ad0f7546 100644 --- a/esmvalcore/dataset.py +++ b/esmvalcore/dataset.py @@ -4,6 +4,7 @@ import logging import pprint import re +import tempfile import textwrap import uuid from copy import deepcopy @@ -696,7 +697,7 @@ def _load(self, callback) -> Cube: settings: dict[str, dict[str, Any]] = {} settings['fix_file'] = { - 'output_dir': Path(f"{output_file.with_suffix('')}_fixed"), + 'output_dir': self.get_temporary_fixed_file_dir(), **self.facets, } settings['load'] = {'callback': callback} @@ -842,3 +843,21 @@ def _update_timerange(self): check.valid_time_selection(timerange) self.set_facet('timerange', timerange) + + def get_temporary_fixed_file_dir(self) -> Path: + """Create and return new temporary directory for storing fixed files. + + Returns + ------- + Path + Path to new temporary directory. + + """ + fixed_file_dir = self.session.fixed_file_dir + fixed_file_dir.mkdir(parents=True, exist_ok=True) + facets_for_prefix = ('project', 'dataset', 'mip', 'short_name') + prefix = '_'.join( + [str(self.facets.get(facet, '')) for facet in facets_for_prefix] + + [''] + ) + return Path(tempfile.mkdtemp(prefix=prefix, dir=fixed_file_dir)) diff --git a/tests/integration/recipe/test_recipe.py b/tests/integration/recipe/test_recipe.py index 5739768f90..bdfbd0a6c0 100644 --- a/tests/integration/recipe/test_recipe.py +++ b/tests/integration/recipe/test_recipe.py @@ -83,7 +83,6 @@ DEFAULT_PREPROCESSOR_STEPS = ( 'load', - 'cleanup', 'remove_supplementary_variables', 'save', ) @@ -104,16 +103,13 @@ def create_test_file(filename, tracking_id=None): iris.save(cube, filename) -def _get_default_settings_for_chl(fix_dir, save_filename): +def _get_default_settings_for_chl(save_filename): """Get default preprocessor settings for chl.""" defaults = { 'load': { 'callback': 'default' }, 'remove_supplementary_variables': {}, - 'cleanup': { - 'remove': [fix_dir] - }, 'save': { 'compress': False, 'filename': save_filename, @@ -432,9 +428,7 @@ def test_default_preprocessor(tmp_path, patched_datafinder, session): preproc_dir = os.path.dirname(product.filename) assert preproc_dir.startswith(str(tmp_path)) - fix_dir = os.path.join( - preproc_dir, 'CMIP5_CanESM2_Oyr_historical_r1i1p1_chl_2000-2005_fixed') - defaults = _get_default_settings_for_chl(fix_dir, product.filename) + defaults = _get_default_settings_for_chl(product.filename) assert product.settings == defaults @@ -472,9 +466,7 @@ def test_default_preprocessor_custom_order(tmp_path, patched_datafinder, preproc_dir = os.path.dirname(product.filename) assert preproc_dir.startswith(str(tmp_path)) - fix_dir = os.path.join( - preproc_dir, 'CMIP5_CanESM2_Oyr_historical_r1i1p1_chl_2000-2005_fixed') - defaults = _get_default_settings_for_chl(fix_dir, product.filename) + defaults = _get_default_settings_for_chl(product.filename) assert product.settings == defaults @@ -564,17 +556,11 @@ def test_default_fx_preprocessor(tmp_path, patched_datafinder, session): preproc_dir = os.path.dirname(product.filename) assert preproc_dir.startswith(str(tmp_path)) - fix_dir = os.path.join(preproc_dir, - 'CMIP5_CanESM2_fx_historical_r0i0p0_sftlf_fixed') - defaults = { 'load': { 'callback': 'default' }, 'remove_supplementary_variables': {}, - 'cleanup': { - 'remove': [fix_dir] - }, 'save': { 'compress': False, 'filename': product.filename, diff --git a/tests/unit/main/test_esmvaltool.py b/tests/unit/main/test_esmvaltool.py index 42aaabc4ec..4c9e296fb8 100644 --- a/tests/unit/main/test_esmvaltool.py +++ b/tests/unit/main/test_esmvaltool.py @@ -30,6 +30,7 @@ def cfg(mocker, tmp_path): session.session_dir = output_dir / 'recipe_test' session.run_dir = session.session_dir / 'run_dir' session.preproc_dir = session.session_dir / 'preproc_dir' + session.fixed_file_dir = session.preproc_dir / 'fixed_files' cfg = mocker.Mock() cfg.start_session.return_value = session @@ -82,6 +83,7 @@ def test_run(mocker, session, offline): session['log_level'] = 'default' session['config_file'] = '/path/to/config-user.yml' session['remove_preproc_dir'] = True + session['save_intermediary_cubes'] = False recipe = Path('/recipe_dir/recipe_test.yml') @@ -156,10 +158,24 @@ def test_run_session_dir_exists_alternative_fails(mocker, session): def test_clean_preproc_dir(session): session.preproc_dir.mkdir(parents=True) + session.fixed_file_dir.mkdir(parents=True) session['remove_preproc_dir'] = True + session['save_intermediary_cubes'] = False program = ESMValTool() program._clean_preproc(session) assert not session.preproc_dir.exists() + assert not session.fixed_file_dir.exists() + + +def test_do_not_clean_preproc_dir(session): + session.preproc_dir.mkdir(parents=True) + session.fixed_file_dir.mkdir(parents=True) + session['remove_preproc_dir'] = False + session['save_intermediary_cubes'] = True + program = ESMValTool() + program._clean_preproc(session) + assert session.preproc_dir.exists() + assert session.fixed_file_dir.exists() @mock.patch('esmvalcore._main.iter_entry_points') diff --git a/tests/unit/recipe/test_recipe.py b/tests/unit/recipe/test_recipe.py index 603ab47ae7..b830351318 100644 --- a/tests/unit/recipe/test_recipe.py +++ b/tests/unit/recipe/test_recipe.py @@ -571,7 +571,6 @@ def test_get_default_settings(mocker): 'load': {'callback': 'default'}, 'remove_supplementary_variables': {}, 'save': {'compress': False, 'alias': 'sic'}, - 'cleanup': {'remove': ['/path/to/file_fixed']}, } diff --git a/tests/unit/test_dataset.py b/tests/unit/test_dataset.py index fd40cb1946..09c6fadd97 100644 --- a/tests/unit/test_dataset.py +++ b/tests/unit/test_dataset.py @@ -1560,6 +1560,12 @@ def test_load(mocker, session): create_autospec=True, return_value=output_file, ) + test_get_temporary_fixed_file_dir = mocker.patch.object( + dataset, + 'get_temporary_fixed_file_dir', + create_autospec=True, + return_value=fix_dir, + ) args = {} order = [] @@ -1654,6 +1660,7 @@ def mock_preprocess(items, step, input_files, output_file, debug, assert args == load_args _get_output_file.assert_called_with(dataset.facets, session.preproc_dir) + test_get_temporary_fixed_file_dir.assert_called_once_with() def test_load_fail(session): @@ -1663,3 +1670,21 @@ def test_load_fail(session): dataset.files = [] with pytest.raises(InputFilesNotFound): dataset.load() + + +@pytest.mark.parametrize( + 'dataset,prefix', + [ + (Dataset(project='OBS', dataset='X', mip='fx', short_name='sftlf'), + 'OBS_X_fx_sftlf_'), + (Dataset(dataset='Y'), '_Y___'), + (Dataset(), '____'), + ] +) +def test_get_temporary_fixed_file_dir(session, dataset, prefix): + """Test ``Dataset._get_temporary_fixed_file_dir``.""" + dataset.session = session + temp_dir = dataset.get_temporary_fixed_file_dir() + assert temp_dir.parent == session.session_dir / 'preproc' / 'fixed_files' + assert temp_dir.is_dir() + assert temp_dir.name.startswith(prefix) From 20b496369d6ce0e9292cf905af6dc687cb013052 Mon Sep 17 00:00:00 2001 From: Manuel Schlund <32543114+schlunma@users.noreply.github.com> Date: Thu, 2 Mar 2023 11:29:41 +0100 Subject: [PATCH 02/13] Update esmvalcore/dataset.py Co-authored-by: Bouwe Andela --- esmvalcore/dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/esmvalcore/dataset.py b/esmvalcore/dataset.py index 6b2b6fb515..65ed221ac4 100644 --- a/esmvalcore/dataset.py +++ b/esmvalcore/dataset.py @@ -863,7 +863,7 @@ def get_temporary_fixed_file_dir(self) -> Path: fixed_file_dir.mkdir(parents=True, exist_ok=True) facets_for_prefix = ('project', 'dataset', 'mip', 'short_name') prefix = '_'.join( - [str(self.facets.get(facet, '')) for facet in facets_for_prefix] + + ["-".join(str(elem) for elem in value) if isinstance(value, (list, tuple)) else str(value) for facet, value in self.facets.items() if facets in facets_for_prefix] + [''] ) return Path(tempfile.mkdtemp(prefix=prefix, dir=fixed_file_dir)) From acddf753c60ca71079f705cbb5fd5e097a65682d Mon Sep 17 00:00:00 2001 From: Manuel Schlund Date: Thu, 2 Mar 2023 12:02:04 +0100 Subject: [PATCH 03/13] get_fixed_filepath() -> Path --- esmvalcore/cmor/_fixes/fix.py | 7 +- esmvalcore/cmor/_fixes/ipslcm/ipsl_cm6.py | 2 +- tests/integration/cmor/_fixes/test_fix.py | 184 +++++++++++----------- 3 files changed, 99 insertions(+), 94 deletions(-) diff --git a/esmvalcore/cmor/_fixes/fix.py b/esmvalcore/cmor/_fixes/fix.py index c9a0c89559..626f7aafff 100644 --- a/esmvalcore/cmor/_fixes/fix.py +++ b/esmvalcore/cmor/_fixes/fix.py @@ -201,7 +201,7 @@ def get_fixes(project, dataset, mip, short_name, extra_facets=None): def get_fixed_filepath( output_dir: str | Path, filepath: str | Path, - ) -> str: + ) -> Path: """Get the filepath for the fixed file. Parameters @@ -213,10 +213,9 @@ def get_fixed_filepath( Returns ------- - str + Path Path to the fixed file. """ output_dir = Path(output_dir) output_dir.mkdir(parents=True, exist_ok=True) - filepath = Path(filepath) - return str(output_dir / filepath.name) + return output_dir / Path(filepath).name diff --git a/esmvalcore/cmor/_fixes/ipslcm/ipsl_cm6.py b/esmvalcore/cmor/_fixes/ipslcm/ipsl_cm6.py index d590daf7af..9d21dfe4c3 100644 --- a/esmvalcore/cmor/_fixes/ipslcm/ipsl_cm6.py +++ b/esmvalcore/cmor/_fixes/ipslcm/ipsl_cm6.py @@ -51,7 +51,7 @@ def fix_file(self, filepath, output_dir): outfile = self.get_fixed_filepath(output_dir, alt_filepath) tim1 = time.time() logger.debug("Using CDO for selecting %s in %s", varname, filepath) - command = ["cdo", "-selvar,%s" % varname, str(filepath), outfile] + command = ["cdo", "-selvar,%s" % varname, str(filepath), str(outfile)] subprocess.run(command, check=True) logger.debug("CDO selection done in %.2f seconds", time.time() - tim1) return outfile diff --git a/tests/integration/cmor/_fixes/test_fix.py b/tests/integration/cmor/_fixes/test_fix.py index 16629549fe..23b219ac74 100644 --- a/tests/integration/cmor/_fixes/test_fix.py +++ b/tests/integration/cmor/_fixes/test_fix.py @@ -1,9 +1,7 @@ """Integration tests for fixes.""" import os -import shutil -import tempfile -import unittest +from pathlib import Path import pytest from iris.cube import Cube @@ -13,100 +11,108 @@ from esmvalcore.cmor._fixes.cmip5.cesm1_bgc import Gpp from esmvalcore.cmor._fixes.cmip6.cesm2 import Omon, Tos from esmvalcore.cmor._fixes.cordex.cnrm_cerfacs_cnrm_cm5.cnrm_aladin63 import ( - Tas) + Tas, +) from esmvalcore.cmor._fixes.cordex.cordex_fixes import AllVars from esmvalcore.cmor.fix import Fix -class TestFix(unittest.TestCase): - def setUp(self): - """Set up temp folder.""" - self.temp_folder = tempfile.mkdtemp() - - def tearDown(self): - """Remove temp folder.""" - shutil.rmtree(self.temp_folder) - - def test_get_fix(self): - self.assertListEqual( - Fix.get_fixes('CMIP5', 'CanESM2', 'Amon', 'fgco2'), [FgCo2(None)]) - - def test_get_fix_case_insensitive(self): - self.assertListEqual( - Fix.get_fixes('CMIP5', 'CanESM2', 'Amon', 'fgCo2'), [FgCo2(None)]) - - def test_get_fix_cordex(self): - self.assertListEqual( - Fix.get_fixes( - 'CORDEX', - 'CNRM-ALADIN63', - 'Amon', - 'tas', - extra_facets={'driver': 'CNRM-CERFACS-CNRM-CM5'}), - [Tas(None), AllVars(None)]) - - def test_get_grid_fix_cordex(self): - self.assertListEqual( - Fix.get_fixes( - 'CORDEX', - 'CNRM-ALADIN53', - 'Amon', - 'tas', - extra_facets={'driver': 'CNRM-CERFACS-CNRM-CM5'}), - [AllVars(None)]) - - def test_get_fixes_with_replace(self): - self.assertListEqual(Fix.get_fixes('CMIP5', 'BNU-ESM', 'Amon', 'ch4'), - [Ch4(None)]) - - def test_get_fixes_with_generic(self): - self.assertListEqual( - Fix.get_fixes('CMIP5', 'CESM1-BGC', 'Amon', 'gpp'), [Gpp(None)]) - - def test_get_fix_no_project(self): - with pytest.raises(KeyError): - Fix.get_fixes('BAD_PROJECT', 'BNU-ESM', 'Amon', 'ch4') - - def test_get_fix_no_model(self): - self.assertListEqual( - Fix.get_fixes('CMIP5', 'BAD_MODEL', 'Amon', 'ch4'), []) - - def test_get_fix_no_var(self): - self.assertListEqual( - Fix.get_fixes('CMIP5', 'BNU-ESM', 'Amon', 'BAD_VAR'), []) - - def test_get_fix_only_mip(self): - self.assertListEqual( - Fix.get_fixes('CMIP6', 'CESM2', 'Omon', 'thetao'), [Omon(None)]) - - def test_get_fix_only_mip_case_insensitive(self): - self.assertListEqual( - Fix.get_fixes('CMIP6', 'CESM2', 'omOn', 'thetao'), [Omon(None)]) - - def test_get_fix_mip_and_var(self): - self.assertListEqual( - Fix.get_fixes('CMIP6', 'CESM2', 'Omon', 'tos'), +def test_get_fix(): + assert Fix.get_fixes('CMIP5', 'CanESM2', 'Amon', 'fgco2') == [FgCo2(None)] + + +def test_get_fix_case_insensitive(): + assert Fix.get_fixes('CMIP5', 'CanESM2', 'Amon', 'fgCo2'), [FgCo2(None)] + + +def test_get_fix_cordex(): + fix = Fix.get_fixes( + 'CORDEX', + 'CNRM-ALADIN63', + 'Amon', + 'tas', + extra_facets={'driver': 'CNRM-CERFACS-CNRM-CM5'}, + ) + assert fix == [Tas(None), AllVars(None)] + + +def test_get_grid_fix_cordex(): + fix = Fix.get_fixes( + 'CORDEX', + 'CNRM-ALADIN53', + 'Amon', + 'tas', + extra_facets={'driver': 'CNRM-CERFACS-CNRM-CM5'}, + ) + assert fix == [AllVars(None)] + + +def test_get_fixes_with_replace(): + assert Fix.get_fixes('CMIP5', 'BNU-ESM', 'Amon', 'ch4') == [Ch4(None)] + + +def test_get_fixes_with_generic(): + assert Fix.get_fixes('CMIP5', 'CESM1-BGC', 'Amon', 'gpp') == [Gpp(None)] + + +def test_get_fix_no_project(): + with pytest.raises(KeyError): + Fix.get_fixes('BAD_PROJECT', 'BNU-ESM', 'Amon', 'ch4') + + +def test_get_fix_no_model(): + assert Fix.get_fixes('CMIP5', 'BAD_MODEL', 'Amon', 'ch4') == [] + + +def test_get_fix_no_var(): + assert Fix.get_fixes('CMIP5', 'BNU-ESM', 'Amon', 'BAD_VAR') == [] + + +def test_get_fix_only_mip(): + assert Fix.get_fixes('CMIP6', 'CESM2', 'Omon', 'thetao') == [Omon(None)] + + +def test_get_fix_only_mip_case_insensitive(): + assert Fix.get_fixes('CMIP6', 'CESM2', 'omOn', 'thetao') == [Omon(None)] + + +def test_get_fix_mip_and_var(): + assert (Fix.get_fixes('CMIP6', 'CESM2', 'Omon', 'tos') == [Tos(None), Omon(None)]) - def test_fix_metadata(self): - cube = Cube([0]) - reference = Cube([0]) - self.assertEqual(Fix(None).fix_metadata(cube), reference) +def test_fix_metadata(): + cube = Cube([0]) + reference = Cube([0]) + assert Fix(None).fix_metadata(cube) == reference + + +def test_fix_data(): + cube = Cube([0]) + reference = Cube([0]) + assert Fix(None).fix_data(cube) == reference + + +def test_fix_file(): + filepath = 'sample_filepath' + assert Fix(None).fix_file(filepath, 'preproc') == filepath - def test_fix_data(self): - cube = Cube([0]) - reference = Cube([0]) - self.assertEqual(Fix(None).fix_data(cube), reference) +def test_get_fixed_filepath_paths(tmp_path): + output_dir = tmp_path / 'fixed' + filepath = Path('this', 'is', 'a', 'file.nc') + assert not output_dir.is_dir() + fixed_path = Fix(None).get_fixed_filepath(output_dir, filepath) + assert output_dir.is_dir() + assert isinstance(fixed_path, Path) + assert fixed_path == tmp_path / 'fixed' / 'file.nc' - def test_fix_file(self): - filepath = 'sample_filepath' - self.assertEqual(Fix(None).fix_file(filepath, 'preproc'), filepath) - def test_fixed_filenam(self): - filepath = os.path.join(self.temp_folder, 'file.nc') - output_dir = os.path.join(self.temp_folder, 'fixed') - os.makedirs(output_dir) - fixed_filepath = Fix(None).get_fixed_filepath(output_dir, filepath) - self.assertTrue(fixed_filepath, os.path.join(output_dir, 'file.nc')) +def test_get_fixed_filepath_strs(tmp_path): + output_dir = os.path.join(str(tmp_path), 'fixed') + filepath = os.path.join('this', 'is', 'a', 'file.nc') + assert not Path(output_dir).is_dir() + fixed_path = Fix(None).get_fixed_filepath(output_dir, filepath) + assert Path(output_dir).is_dir() + assert isinstance(fixed_path, Path) + assert fixed_path == tmp_path / 'fixed' / 'file.nc' From fa60329f8066c08b12063012dd6381ffff84a3fb Mon Sep 17 00:00:00 2001 From: Manuel Schlund Date: Thu, 2 Mar 2023 12:50:04 +0100 Subject: [PATCH 04/13] Nicer prefix for tmp fix dirs --- esmvalcore/dataset.py | 62 ++++++++++++++++++++++++-------------- tests/unit/test_dataset.py | 35 +++++++++++++++++++-- 2 files changed, 71 insertions(+), 26 deletions(-) diff --git a/esmvalcore/dataset.py b/esmvalcore/dataset.py index 65ed221ac4..319d443e39 100644 --- a/esmvalcore/dataset.py +++ b/esmvalcore/dataset.py @@ -97,6 +97,22 @@ class Dataset: Facets describing the dataset. """ + _SUMMARY_FACETS = ( + 'short_name', + 'mip', + 'project', + 'dataset', + 'rcm_version', + 'driver', + 'domain', + 'activity', + 'exp', + 'ensemble', + 'grid', + 'version', + ) + """Facets used to create a summary of a Dataset instance.""" + def __init__(self, **facets: FacetValue): self.facets: Facets = {} @@ -434,6 +450,24 @@ def facets2str(facets): txt.append(f"session: '{self.session.session_name}'") return "\n".join(txt) + def _get_joined_summary_facets( + self, + separator: str, + join_lists: bool = False, + ) -> str: + """Get string consisting of joined summary facets.""" + summary_facets_vals = [] + for key in self._SUMMARY_FACETS: + if key not in self.facets: + continue + val = self.facets[key] + if join_lists and isinstance(val, (tuple, list)): + val = '-'.join(str(elem) for elem in val) + else: + val = str(val) + summary_facets_vals.append(val) + return separator.join(summary_facets_vals) + def summary(self, shorten: bool = False) -> str: """Summarize the content of dataset. @@ -450,28 +484,12 @@ def summary(self, shorten: bool = False) -> str: if not shorten: return repr(self) - keys = ( - 'short_name', - 'mip', - 'project', - 'dataset', - 'rcm_version', - 'driver', - 'domain', - 'activity', - 'exp', - 'ensemble', - 'grid', - 'version', - ) title = self.__class__.__name__ - txt = ( - f"{title}: " + - ", ".join(str(self.facets[k]) for k in keys if k in self.facets)) + txt = f"{title}: " + self._get_joined_summary_facets(', ') def supplementary_summary(dataset): return ", ".join( - str(dataset.facets[k]) for k in keys + str(dataset.facets[k]) for k in self._SUMMARY_FACETS if k in dataset.facets and dataset[k] != self.facets.get(k)) if self.supplementaries: @@ -861,9 +879,7 @@ def get_temporary_fixed_file_dir(self) -> Path: """ fixed_file_dir = self.session.fixed_file_dir fixed_file_dir.mkdir(parents=True, exist_ok=True) - facets_for_prefix = ('project', 'dataset', 'mip', 'short_name') - prefix = '_'.join( - ["-".join(str(elem) for elem in value) if isinstance(value, (list, tuple)) else str(value) for facet, value in self.facets.items() if facets in facets_for_prefix] + - [''] - ) + prefix = self._get_joined_summary_facets('_', join_lists=True) + if prefix != '': + prefix += '_' return Path(tempfile.mkdtemp(prefix=prefix, dir=fixed_file_dir)) diff --git a/tests/unit/test_dataset.py b/tests/unit/test_dataset.py index cd3c57163f..9bd71fd7b9 100644 --- a/tests/unit/test_dataset.py +++ b/tests/unit/test_dataset.py @@ -47,6 +47,33 @@ def test_repr_supplementary(): """).strip() +@pytest.mark.parametrize( + "separator,join_lists,output", + [ + ('_', False, "1_d_dom_a_('e1', 'e2')_['ens2', 'ens1']_g1_v1"), + ('_', True, "1_d_dom_a_e1-e2_ens2-ens1_g1_v1"), + (' ', False, "1 d dom a ('e1', 'e2') ['ens2', 'ens1'] g1 v1"), + (' ', True, "1 d dom a e1-e2 ens2-ens1 g1 v1"), + ] +) +def test_get_joined_summary_facet(separator, join_lists, output): + ds = Dataset( + test='this should not appear', + rcm_version='1', + driver='d', + domain='dom', + activity='a', + exp=('e1', 'e2'), + ensemble=['ens2', 'ens1'], + grid='g1', + version='v1', + ) + joined_str = ds._get_joined_summary_facets( + separator, join_lists=join_lists + ) + assert joined_str == output + + def test_short_summary(): ds = Dataset( project='CMIP6', @@ -1736,9 +1763,11 @@ def test_load_fail(session): 'dataset,prefix', [ (Dataset(project='OBS', dataset='X', mip='fx', short_name='sftlf'), - 'OBS_X_fx_sftlf_'), - (Dataset(dataset='Y'), '_Y___'), - (Dataset(), '____'), + 'sftlf_fx_OBS_X_'), + (Dataset(mip=('fx', 'day'), short_name='sftlf', exp=['exp1', 'exp2']), + 'sftlf_fx-day_exp1-exp2_'), + (Dataset(dataset='Y'), 'Y_'), + (Dataset(), ''), ] ) def test_get_temporary_fixed_file_dir(session, dataset, prefix): From 4ddb6e4ec3c1a9db956534af58c817771b476657 Mon Sep 17 00:00:00 2001 From: Manuel Schlund Date: Thu, 2 Mar 2023 12:58:06 +0100 Subject: [PATCH 05/13] Made new functionalities private --- esmvalcore/_main.py | 4 ++-- esmvalcore/config/_config_object.py | 12 ++++++------ esmvalcore/dataset.py | 6 +++--- tests/unit/main/test_esmvaltool.py | 10 +++++----- tests/unit/test_dataset.py | 4 ++-- 5 files changed, 18 insertions(+), 18 deletions(-) diff --git a/esmvalcore/_main.py b/esmvalcore/_main.py index 2e34935385..0a0f563ff5 100755 --- a/esmvalcore/_main.py +++ b/esmvalcore/_main.py @@ -457,7 +457,7 @@ def _clean_preproc(session): import shutil if (not session['save_intermediary_cubes'] and - session.fixed_file_dir.exists()): + session._fixed_file_dir.exists()): logger.debug( "Removing `preproc/fixed_files` directory containing fixed " "data" @@ -467,7 +467,7 @@ def _clean_preproc(session): "`save_intermediary_cubes` to `true` and `remove_preproc_dir` " "to `false` in your user configuration file" ) - shutil.rmtree(session.fixed_file_dir) + shutil.rmtree(session._fixed_file_dir) if session['remove_preproc_dir'] and session.preproc_dir.exists(): logger.info( diff --git a/esmvalcore/config/_config_object.py b/esmvalcore/config/_config_object.py index 3c05b5cf08..425788b81a 100644 --- a/esmvalcore/config/_config_object.py +++ b/esmvalcore/config/_config_object.py @@ -154,12 +154,12 @@ class Session(ValidatedConfig): _deprecated_defaults = _deprecated_options_defaults relative_preproc_dir = Path('preproc') - relative_fixed_file_dir = Path('preproc', 'fixed_files') relative_work_dir = Path('work') relative_plot_dir = Path('plots') relative_run_dir = Path('run') relative_main_log = Path('run', 'main_log.txt') relative_main_log_debug = Path('run', 'main_log_debug.txt') + _relative_fixed_file_dir = Path('preproc', 'fixed_files') def __init__(self, config: dict, name: str = 'session'): super().__init__(config) @@ -185,11 +185,6 @@ def preproc_dir(self): """Return preproc directory.""" return self.session_dir / self.relative_preproc_dir - @property - def fixed_file_dir(self): - """Return fixed file directory.""" - return self.session_dir / self.relative_fixed_file_dir - @property def work_dir(self): """Return work directory.""" @@ -220,6 +215,11 @@ def main_log_debug(self): """Return main log debug file.""" return self.session_dir / self.relative_main_log_debug + @property + def _fixed_file_dir(self): + """Return fixed file directory.""" + return self.session_dir / self._relative_fixed_file_dir + def to_config_user(self) -> dict: """Turn the `Session` object into a recipe-compatible dict. diff --git a/esmvalcore/dataset.py b/esmvalcore/dataset.py index 319d443e39..98993cd8a2 100644 --- a/esmvalcore/dataset.py +++ b/esmvalcore/dataset.py @@ -721,7 +721,7 @@ def _load(self, callback) -> Cube: settings: dict[str, dict[str, Any]] = {} settings['fix_file'] = { - 'output_dir': self.get_temporary_fixed_file_dir(), + 'output_dir': self._get_temporary_fixed_file_dir(), **self.facets, } settings['load'] = {'callback': callback} @@ -868,7 +868,7 @@ def _update_timerange(self): self.set_facet('timerange', timerange) - def get_temporary_fixed_file_dir(self) -> Path: + def _get_temporary_fixed_file_dir(self) -> Path: """Create and return new temporary directory for storing fixed files. Returns @@ -877,7 +877,7 @@ def get_temporary_fixed_file_dir(self) -> Path: Path to new temporary directory. """ - fixed_file_dir = self.session.fixed_file_dir + fixed_file_dir = self.session._fixed_file_dir fixed_file_dir.mkdir(parents=True, exist_ok=True) prefix = self._get_joined_summary_facets('_', join_lists=True) if prefix != '': diff --git a/tests/unit/main/test_esmvaltool.py b/tests/unit/main/test_esmvaltool.py index 9b9f38023a..0bbe1b850d 100644 --- a/tests/unit/main/test_esmvaltool.py +++ b/tests/unit/main/test_esmvaltool.py @@ -30,7 +30,7 @@ def cfg(mocker, tmp_path): session.session_dir = output_dir / 'recipe_test' session.run_dir = session.session_dir / 'run_dir' session.preproc_dir = session.session_dir / 'preproc_dir' - session.fixed_file_dir = session.preproc_dir / 'fixed_files' + session._fixed_file_dir = session.preproc_dir / 'fixed_files' cfg = mocker.Mock() cfg.start_session.return_value = session @@ -158,24 +158,24 @@ def test_run_session_dir_exists_alternative_fails(mocker, session): def test_clean_preproc_dir(session): session.preproc_dir.mkdir(parents=True) - session.fixed_file_dir.mkdir(parents=True) + session._fixed_file_dir.mkdir(parents=True) session['remove_preproc_dir'] = True session['save_intermediary_cubes'] = False program = ESMValTool() program._clean_preproc(session) assert not session.preproc_dir.exists() - assert not session.fixed_file_dir.exists() + assert not session._fixed_file_dir.exists() def test_do_not_clean_preproc_dir(session): session.preproc_dir.mkdir(parents=True) - session.fixed_file_dir.mkdir(parents=True) + session._fixed_file_dir.mkdir(parents=True) session['remove_preproc_dir'] = False session['save_intermediary_cubes'] = True program = ESMValTool() program._clean_preproc(session) assert session.preproc_dir.exists() - assert session.fixed_file_dir.exists() + assert session._fixed_file_dir.exists() @mock.patch('esmvalcore._main.iter_entry_points') diff --git a/tests/unit/test_dataset.py b/tests/unit/test_dataset.py index 9bd71fd7b9..89b5df48cb 100644 --- a/tests/unit/test_dataset.py +++ b/tests/unit/test_dataset.py @@ -1649,7 +1649,7 @@ def test_load(mocker, session): ) test_get_temporary_fixed_file_dir = mocker.patch.object( dataset, - 'get_temporary_fixed_file_dir', + '_get_temporary_fixed_file_dir', create_autospec=True, return_value=fix_dir, ) @@ -1773,7 +1773,7 @@ def test_load_fail(session): def test_get_temporary_fixed_file_dir(session, dataset, prefix): """Test ``Dataset._get_temporary_fixed_file_dir``.""" dataset.session = session - temp_dir = dataset.get_temporary_fixed_file_dir() + temp_dir = dataset._get_temporary_fixed_file_dir() assert temp_dir.parent == session.session_dir / 'preproc' / 'fixed_files' assert temp_dir.is_dir() assert temp_dir.name.startswith(prefix) From d5c16197608d13868943e88d649991f5b92a485d Mon Sep 17 00:00:00 2001 From: Manuel Schlund Date: Thu, 2 Mar 2023 13:14:29 +0100 Subject: [PATCH 06/13] Deprecated cleanup --- esmvalcore/config/_config_validators.py | 2 ++ esmvalcore/preprocessor/__init__.py | 9 +++--- esmvalcore/preprocessor/_io.py | 29 ++++++++++++++++++- .../preprocessor/_io/test_cleanup.py | 9 ++++++ 4 files changed, 44 insertions(+), 5 deletions(-) diff --git a/esmvalcore/config/_config_validators.py b/esmvalcore/config/_config_validators.py index cc65369c0c..736a6ba689 100644 --- a/esmvalcore/config/_config_validators.py +++ b/esmvalcore/config/_config_validators.py @@ -355,6 +355,7 @@ def deprecate_offline( Raw input value for ``offline`` option. validated_value: Any Validated value for ``offline`` option. + """ option = 'offline' deprecated_version = '2.8.0' @@ -386,6 +387,7 @@ def deprecate_use_legacy_supplementaries( Raw input value for ``use_legacy_supplementaries`` option. validated_value: Any Validated value for ``use_legacy_supplementaries`` option. + """ option = 'use_legacy_supplementaries' deprecated_version = '2.8.0' diff --git a/esmvalcore/preprocessor/__init__.py b/esmvalcore/preprocessor/__init__.py index 34be8289fb..a57fb50373 100644 --- a/esmvalcore/preprocessor/__init__.py +++ b/esmvalcore/preprocessor/__init__.py @@ -493,10 +493,11 @@ def save(self): 'save', input_files=self._input_files, **self.settings['save']) - preprocess([], - 'cleanup', - input_files=self._input_files, - **self.settings.get('cleanup', {})) + if 'cleanup' in self.settings: + preprocess([], + 'cleanup', + input_files=self._input_files, + **self.settings['cleanup']) def close(self): """Close the file.""" diff --git a/esmvalcore/preprocessor/_io.py b/esmvalcore/preprocessor/_io.py index ba134968f8..8fc16aa2fc 100644 --- a/esmvalcore/preprocessor/_io.py +++ b/esmvalcore/preprocessor/_io.py @@ -347,7 +347,34 @@ def _get_debug_filename(filename, step): def cleanup(files, remove=None): - """Clean up after running the preprocessor.""" + """_summary_Clean up after running the preprocessor. + + Warning + ------- + .. deprecated:: 2.8.0 + This function has been deprecated in ESMValCore version 2.8.0 and is + scheduled for removal in version 2.10.0. + + Parameters + ---------- + files: list of Path + Preprocessor output files (will not be removed if not in `removed`). + remove: list of Path or None, optional (default: None) + Files or directories to remove. + + Returns + ------- + list of Path + Preprocessor output files. + + """ + deprecation_msg = ( + "The preprocessor function `cleanup` has been deprecated in " + "ESMValCore version 2.8.0 and is scheduled for removal in version " + "2.10.0." + ) + warnings.warn(deprecation_msg, ESMValCoreDeprecationWarning) + if remove is None: remove = [] diff --git a/tests/integration/preprocessor/_io/test_cleanup.py b/tests/integration/preprocessor/_io/test_cleanup.py index e8bfdd6156..3ef98b8574 100644 --- a/tests/integration/preprocessor/_io/test_cleanup.py +++ b/tests/integration/preprocessor/_io/test_cleanup.py @@ -4,6 +4,9 @@ import tempfile import unittest +import pytest + +from esmvalcore.exceptions import ESMValCoreDeprecationWarning from esmvalcore.preprocessor import _io @@ -37,3 +40,9 @@ def test_cleanup_when_files_removed(self): _io.cleanup([], self.temp_paths) for path in self.temp_paths: self.assertFalse(os.path.exists(path)) + + def test_deprecation(self): + """Test that deprecation warning is properly raised.""" + msg = "cleanup" + with pytest.warns(ESMValCoreDeprecationWarning, match=msg): + _io.cleanup([], []) From a3daf9c862fbc353f579a6529b83e115fdb62fe0 Mon Sep 17 00:00:00 2001 From: Manuel Schlund Date: Thu, 2 Mar 2023 13:22:07 +0100 Subject: [PATCH 07/13] Fixed docstring --- esmvalcore/preprocessor/_io.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/esmvalcore/preprocessor/_io.py b/esmvalcore/preprocessor/_io.py index 8fc16aa2fc..6f24010625 100644 --- a/esmvalcore/preprocessor/_io.py +++ b/esmvalcore/preprocessor/_io.py @@ -347,7 +347,7 @@ def _get_debug_filename(filename, step): def cleanup(files, remove=None): - """_summary_Clean up after running the preprocessor. + """Clean up after running the preprocessor. Warning ------- From f87e25e566d681d3eb3d9fcf346b5ef0abbd289f Mon Sep 17 00:00:00 2001 From: Manuel Schlund Date: Thu, 2 Mar 2023 13:45:36 +0100 Subject: [PATCH 08/13] Added missing test for cleanup call --- .../preprocessor/test_preprocessor_file.py | 35 +++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/tests/unit/preprocessor/test_preprocessor_file.py b/tests/unit/preprocessor/test_preprocessor_file.py index df731e5655..3ebb3385d6 100644 --- a/tests/unit/preprocessor/test_preprocessor_file.py +++ b/tests/unit/preprocessor/test_preprocessor_file.py @@ -145,3 +145,38 @@ def test_close(): product.save.assert_called_once_with() product.save_provenance.assert_called_once_with() assert product._cubes is None + + +@mock.patch('esmvalcore.preprocessor.preprocess', autospec=True) +def test_save_no_cleanup(mock_preprocess): + """Test ``save``.""" + product = mock.create_autospec(PreprocessorFile, instance=True) + product.settings = {'save': {}} + product._cubes = mock.sentinel.cubes + product._input_files = mock.sentinel.input_files + + PreprocessorFile.save(product) + + assert mock_preprocess.mock_calls == [ + mock.call( + mock.sentinel.cubes, 'save', input_files=mock.sentinel.input_files + ), + ] + + +@mock.patch('esmvalcore.preprocessor.preprocess', autospec=True) +def test_save_cleanup(mock_preprocess): + """Test ``save``.""" + product = mock.create_autospec(PreprocessorFile, instance=True) + product.settings = {'save': {}, 'cleanup': {}} + product._cubes = mock.sentinel.cubes + product._input_files = mock.sentinel.input_files + + PreprocessorFile.save(product) + + assert mock_preprocess.mock_calls == [ + mock.call( + mock.sentinel.cubes, 'save', input_files=mock.sentinel.input_files + ), + mock.call([], 'cleanup', input_files=mock.sentinel.input_files), + ] From 6b6c5930edba103f3b979e64eff465e107fb2b14 Mon Sep 17 00:00:00 2001 From: Manuel Schlund Date: Thu, 2 Mar 2023 15:07:50 +0100 Subject: [PATCH 09/13] Suggestions from code review --- esmvalcore/dataset.py | 4 +--- esmvalcore/preprocessor/_io.py | 5 +++-- tests/unit/test_dataset.py | 2 +- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/esmvalcore/dataset.py b/esmvalcore/dataset.py index 98993cd8a2..273f4e2fed 100644 --- a/esmvalcore/dataset.py +++ b/esmvalcore/dataset.py @@ -879,7 +879,5 @@ def _get_temporary_fixed_file_dir(self) -> Path: """ fixed_file_dir = self.session._fixed_file_dir fixed_file_dir.mkdir(parents=True, exist_ok=True) - prefix = self._get_joined_summary_facets('_', join_lists=True) - if prefix != '': - prefix += '_' + prefix = self._get_joined_summary_facets('_', join_lists=True) + '_' return Path(tempfile.mkdtemp(prefix=prefix, dir=fixed_file_dir)) diff --git a/esmvalcore/preprocessor/_io.py b/esmvalcore/preprocessor/_io.py index 6f24010625..564ec89fe3 100644 --- a/esmvalcore/preprocessor/_io.py +++ b/esmvalcore/preprocessor/_io.py @@ -352,8 +352,9 @@ def cleanup(files, remove=None): Warning ------- .. deprecated:: 2.8.0 - This function has been deprecated in ESMValCore version 2.8.0 and is - scheduled for removal in version 2.10.0. + This function is no longer used and has been deprecated since + ESMValCore version 2.8.0. It is scheduled for removal in version + 2.10.0. Parameters ---------- diff --git a/tests/unit/test_dataset.py b/tests/unit/test_dataset.py index 89b5df48cb..c1446bb0cf 100644 --- a/tests/unit/test_dataset.py +++ b/tests/unit/test_dataset.py @@ -1767,7 +1767,7 @@ def test_load_fail(session): (Dataset(mip=('fx', 'day'), short_name='sftlf', exp=['exp1', 'exp2']), 'sftlf_fx-day_exp1-exp2_'), (Dataset(dataset='Y'), 'Y_'), - (Dataset(), ''), + (Dataset(), '_'), ] ) def test_get_temporary_fixed_file_dir(session, dataset, prefix): From 343cc27aef400a4d9fe0fb856adaa81655ec934d Mon Sep 17 00:00:00 2001 From: Manuel Schlund Date: Thu, 2 Mar 2023 16:16:47 +0100 Subject: [PATCH 10/13] Create fix dir only if necessary --- esmvalcore/cmor/_fixes/cmip6/cesm2.py | 22 +++++-- esmvalcore/cmor/_fixes/cmip6/cesm2_waccm.py | 13 +++- esmvalcore/cmor/_fixes/emac/emac.py | 6 +- esmvalcore/cmor/_fixes/fix.py | 68 +++++++++++++++------ esmvalcore/cmor/_fixes/ipslcm/ipsl_cm6.py | 6 +- esmvalcore/cmor/fix.py | 16 +++-- esmvalcore/dataset.py | 21 ++++--- tests/unit/test_dataset.py | 17 +++--- 8 files changed, 120 insertions(+), 49 deletions(-) diff --git a/esmvalcore/cmor/_fixes/cmip6/cesm2.py b/esmvalcore/cmor/_fixes/cmip6/cesm2.py index 51bc04685e..99c38a3d72 100644 --- a/esmvalcore/cmor/_fixes/cmip6/cesm2.py +++ b/esmvalcore/cmor/_fixes/cmip6/cesm2.py @@ -18,9 +18,16 @@ class Cl(Fix): """Fixes for ``cl``.""" - def _fix_formula_terms(self, filepath, output_dir): + def _fix_formula_terms( + self, + filepath, + output_dir, + create_temporary_dir=False, + ): """Fix ``formula_terms`` attribute.""" - new_path = self.get_fixed_filepath(output_dir, filepath) + new_path = self.get_fixed_filepath( + output_dir, filepath, create_temporary_dir=create_temporary_dir + ) copyfile(filepath, new_path) dataset = Dataset(new_path, mode='a') dataset.variables['lev'].formula_terms = 'p0: p0 a: a b: b ps: ps' @@ -29,7 +36,7 @@ def _fix_formula_terms(self, filepath, output_dir): dataset.close() return new_path - def fix_file(self, filepath, output_dir): + def fix_file(self, filepath, output_dir, create_temporary_dir=False): """Fix hybrid pressure coordinate. Adds missing ``formula_terms`` attribute to file. @@ -47,6 +54,11 @@ def fix_file(self, filepath, output_dir): Path to the original file. output_dir : str Path of the directory where the fixed file is saved to. + create_temporary_dir: bool, optional (default: False) + If `True`, create temporary directory using `output_dir` as a + `prefix` for :func:`tempfile.mkdtemp` and store the fixed files in + there. If `False`, use the `output_dir` as directory to store fixed + files. Returns ------- @@ -54,7 +66,9 @@ def fix_file(self, filepath, output_dir): Path to the fixed file. """ - new_path = self._fix_formula_terms(filepath, output_dir) + new_path = self._fix_formula_terms( + filepath, output_dir, create_temporary_dir=create_temporary_dir + ) dataset = Dataset(new_path, mode='a') dataset.variables['a_bnds'][:] = dataset.variables['a_bnds'][::-1, :] dataset.variables['b_bnds'][:] = dataset.variables['b_bnds'][::-1, :] diff --git a/esmvalcore/cmor/_fixes/cmip6/cesm2_waccm.py b/esmvalcore/cmor/_fixes/cmip6/cesm2_waccm.py index 200505f12b..3eec596c38 100644 --- a/esmvalcore/cmor/_fixes/cmip6/cesm2_waccm.py +++ b/esmvalcore/cmor/_fixes/cmip6/cesm2_waccm.py @@ -1,17 +1,17 @@ """Fixes for CESM2-WACCM model.""" from netCDF4 import Dataset +from ..common import SiconcFixScalarCoord from .cesm2 import Cl as BaseCl from .cesm2 import Fgco2 as BaseFgco2 from .cesm2 import Omon as BaseOmon from .cesm2 import Tas as BaseTas -from ..common import SiconcFixScalarCoord class Cl(BaseCl): """Fixes for cl.""" - def fix_file(self, filepath, output_dir): + def fix_file(self, filepath, output_dir, create_temporary_dir=False): """Fix hybrid pressure coordinate. Adds missing ``formula_terms`` attribute to file. @@ -29,6 +29,11 @@ def fix_file(self, filepath, output_dir): Path to the original file. output_dir : str Path of the directory where the fixed file is saved to. + create_temporary_dir: bool, optional (default: False) + If `True`, create temporary directory using `output_dir` as a + `prefix` for :func:`tempfile.mkdtemp` and store the fixed files in + there. If `False`, use the `output_dir` as directory to store fixed + files. Returns ------- @@ -36,7 +41,9 @@ def fix_file(self, filepath, output_dir): Path to the fixed file. """ - new_path = self._fix_formula_terms(filepath, output_dir) + new_path = self._fix_formula_terms( + filepath, output_dir, create_temporary_dir=create_temporary_dir + ) dataset = Dataset(new_path, mode='a') dataset.variables['a_bnds'][:] = dataset.variables['a_bnds'][:, ::-1] dataset.variables['b_bnds'][:] = dataset.variables['b_bnds'][:, ::-1] diff --git a/esmvalcore/cmor/_fixes/emac/emac.py b/esmvalcore/cmor/_fixes/emac/emac.py index a65a0a21fa..d3a90e7890 100644 --- a/esmvalcore/cmor/_fixes/emac/emac.py +++ b/esmvalcore/cmor/_fixes/emac/emac.py @@ -37,7 +37,7 @@ class AllVars(EmacFix): 'kg/m**2s': 'kg m-2 s-1', } - def fix_file(self, filepath, output_dir): + def fix_file(self, filepath, output_dir, create_temporary_dir=False): """Fix file. Fixes hybrid pressure level coordinate. @@ -51,7 +51,9 @@ def fix_file(self, filepath, output_dir): """ if 'alevel' not in self.vardef.dimensions: return filepath - new_path = self.get_fixed_filepath(output_dir, filepath) + new_path = self.get_fixed_filepath( + output_dir, filepath, create_temporary_dir=create_temporary_dir + ) copyfile(filepath, new_path) with Dataset(new_path, mode='a') as dataset: if 'formula_terms' in dataset.variables['lev'].ncattrs(): diff --git a/esmvalcore/cmor/_fixes/fix.py b/esmvalcore/cmor/_fixes/fix.py index 626f7aafff..feb13eed5e 100644 --- a/esmvalcore/cmor/_fixes/fix.py +++ b/esmvalcore/cmor/_fixes/fix.py @@ -3,6 +3,7 @@ import importlib import inspect +import tempfile from pathlib import Path from ..table import CMOR_TABLES @@ -17,36 +18,50 @@ def __init__(self, vardef, extra_facets=None): Parameters ---------- vardef: str - CMOR table entry + CMOR table entry. extra_facets: dict, optional Extra facets are mainly used for data outside of the big projects like CMIP, CORDEX, obs4MIPs. For details, see :ref:`extra_facets`. + """ self.vardef = vardef if extra_facets is None: extra_facets = {} self.extra_facets = extra_facets - def fix_file(self, filepath: Path, output_dir: Path) -> Path: + def fix_file( + self, + filepath: Path, + output_dir: Path, + create_temporary_dir: bool = False, + ) -> Path: """Apply fixes to the files prior to creating the cube. - Should be used only to fix errors that prevent loading or can - not be fixed in the cube (i.e. those related with missing_value - and _FillValue) + Should be used only to fix errors that prevent loading or cannot be + fixed in the cube (e.g., those related to `missing_value` or + `_FillValue`). Parameters ---------- filepath: Path - file to fix + File to fix. output_dir: Path - path to the folder to store the fixed files, if required + Output directory for fixed files or prefix for + :func:`tempfile.mkdtemp` (see `create_temporary_dir`). Make sure + this directory exists if `create_temporary_dir=False` is used. + create_temporary_dir: bool, optional (default: False) + If `True`, create temporary directory using `output_dir` as a + `prefix` for :func:`tempfile.mkdtemp` and store the fixed files in + there. If `False`, use the `output_dir` as directory to store fixed + files. Returns ------- Path Path to the corrected file. It can be different from the original filepath if a fix has been applied, but if not it should be the - original filepath + original filepath. + """ return filepath @@ -60,12 +75,13 @@ def fix_metadata(self, cubes): Parameters ---------- cubes: iris.cube.CubeList - Cubes to fix + Cubes to fix. Returns ------- iris.cube.CubeList Fixed cubes. They can be different instances. + """ return cubes @@ -75,19 +91,21 @@ def get_cube_from_list(self, cubes, short_name=None): Parameters ---------- cubes : iris.cube.CubeList - List of cubes to search - short_name : str - Cube's variable short name. If None, short name is the class name + List of cubes to search. + short_name : str or None + Cube's variable short name. If `None`, `short name` is the class + name. Raises ------ Exception - If no cube is found + If no cube is found. Returns ------- iris.Cube Variable's cube + """ if short_name is None: short_name = self.vardef.short_name @@ -104,12 +122,13 @@ def fix_data(self, cube): Parameters ---------- cube: iris.cube.Cube - Cube to fix + Cube to fix. Returns ------- iris.cube.Cube Fixed cube. It can be a difference instance. + """ return cube @@ -152,8 +171,9 @@ def get_fixes(project, dataset, mip, short_name, extra_facets=None): Returns ------- - list(Fix) + list[Fix] Fixes to apply for the given data. + """ cmor_table = CMOR_TABLES[project] vardef = cmor_table.get_variable(mip, short_name) @@ -201,21 +221,35 @@ def get_fixes(project, dataset, mip, short_name, extra_facets=None): def get_fixed_filepath( output_dir: str | Path, filepath: str | Path, + create_temporary_dir: bool = False, ) -> Path: """Get the filepath for the fixed file. Parameters ---------- output_dir: str or Path - Output directory. + Output directory or prefix for :func:`tempfile.mkdtemp` (see + `create_temporary_dir`). Will be created if it does not exist, yet. filepath: str or Path Original path. + create_temporary_dir: bool, optional (default: False) + If `True`, create temporary directory using `output_dir` as a + `prefix` for :func:`tempfile.mkdtemp` and store the fixed files in + there. If `False`, use the `output_dir` as directory to store fixed + files. Returns ------- Path Path to the fixed file. + """ output_dir = Path(output_dir) - output_dir.mkdir(parents=True, exist_ok=True) + if create_temporary_dir: + parent_dir = output_dir.parent + parent_dir.mkdir(parents=True, exist_ok=True) + prefix = output_dir.name + output_dir = Path(tempfile.mkdtemp(prefix=prefix, dir=parent_dir)) + else: + output_dir.mkdir(parents=True, exist_ok=True) return output_dir / Path(filepath).name diff --git a/esmvalcore/cmor/_fixes/ipslcm/ipsl_cm6.py b/esmvalcore/cmor/_fixes/ipslcm/ipsl_cm6.py index 9d21dfe4c3..f311b2a8d6 100644 --- a/esmvalcore/cmor/_fixes/ipslcm/ipsl_cm6.py +++ b/esmvalcore/cmor/_fixes/ipslcm/ipsl_cm6.py @@ -16,7 +16,7 @@ class AllVars(Fix): """Fixes for all IPSLCM variables.""" - def fix_file(self, filepath, output_dir): + def fix_file(self, filepath, output_dir, create_temporary_dir=False): """Select IPSLCM variable in filepath. This is done only if input file is a multi-variable one. This @@ -48,7 +48,9 @@ def fix_file(self, filepath, output_dir): # Proceed with CDO selvar varname = self.extra_facets.get(VARNAME_KEY, self.vardef.short_name) alt_filepath = str(filepath).replace(".nc", "_cdo_selected.nc") - outfile = self.get_fixed_filepath(output_dir, alt_filepath) + outfile = self.get_fixed_filepath( + output_dir, alt_filepath, create_temporary_dir=create_temporary_dir + ) tim1 = time.time() logger.debug("Using CDO for selecting %s in %s", varname, filepath) command = ["cdo", "-selvar,%s" % varname, str(filepath), str(outfile)] diff --git a/esmvalcore/cmor/fix.py b/esmvalcore/cmor/fix.py index ba10e28680..e5666aa8cd 100644 --- a/esmvalcore/cmor/fix.py +++ b/esmvalcore/cmor/fix.py @@ -23,6 +23,7 @@ def fix_file( dataset: str, mip: str, output_dir: Path, + create_temporary_dir: bool = False, **extra_facets, ) -> Path: """Fix files before ESMValTool can load them. @@ -45,7 +46,14 @@ def fix_file( mip: str Variable's MIP. output_dir: Path - Output directory for fixed files. + Output directory for fixed files or prefix for :func:`tempfile.mkdtemp` + (see `create_temporary_dir`). Make sure this directory exists (manually + or in the underlying `Fix.fix_file` function) if + `create_temporary_dir=False` is used. + create_temporary_dir: bool, optional (default: False) + If `True`, create temporary directory using `output_dir` as a `prefix` + for :func:`tempfile.mkdtemp` and store the fixed files in there. If + `False`, use the `output_dir` as directory to store fixed files. **extra_facets: dict, optional Extra facets are mainly used for data outside of the big projects like CMIP, CORDEX, obs4MIPs. For details, see :ref:`extra_facets`. @@ -55,8 +63,6 @@ def fix_file( Path: Path to the fixed file. """ - if not output_dir.exists(): - output_dir.mkdir(parents=True, exist_ok=True) # Update extra_facets with variable information given as regular arguments # to this function extra_facets.update({ @@ -71,7 +77,9 @@ def fix_file( mip=mip, short_name=short_name, extra_facets=extra_facets): - file = fix.fix_file(file, output_dir) + file = fix.fix_file( + file, output_dir, create_temporary_dir=create_temporary_dir + ) return file diff --git a/esmvalcore/dataset.py b/esmvalcore/dataset.py index 273f4e2fed..838cb5aef2 100644 --- a/esmvalcore/dataset.py +++ b/esmvalcore/dataset.py @@ -4,7 +4,6 @@ import logging import pprint import re -import tempfile import textwrap import uuid from copy import deepcopy @@ -721,7 +720,8 @@ def _load(self, callback) -> Cube: settings: dict[str, dict[str, Any]] = {} settings['fix_file'] = { - 'output_dir': self._get_temporary_fixed_file_dir(), + 'output_dir': self._get_fixed_file_dir_prefix(), + 'create_temporary_dir': True, **self.facets, } settings['load'] = {'callback': callback} @@ -868,16 +868,21 @@ def _update_timerange(self): self.set_facet('timerange', timerange) - def _get_temporary_fixed_file_dir(self) -> Path: - """Create and return new temporary directory for storing fixed files. + def _get_fixed_file_dir_prefix(self) -> Path: + """Get directory prefix for storing fixed files. + + This can be used as `prefix` for :func:`tempfile.mkdtemp` to create a + temporary directory to store fixed files. + + Note + ---- + Does not create the directory. Returns ------- Path - Path to new temporary directory. + Directory prefix. """ - fixed_file_dir = self.session._fixed_file_dir - fixed_file_dir.mkdir(parents=True, exist_ok=True) prefix = self._get_joined_summary_facets('_', join_lists=True) + '_' - return Path(tempfile.mkdtemp(prefix=prefix, dir=fixed_file_dir)) + return self.session._fixed_file_dir / prefix diff --git a/tests/unit/test_dataset.py b/tests/unit/test_dataset.py index c1446bb0cf..8b05f5ad27 100644 --- a/tests/unit/test_dataset.py +++ b/tests/unit/test_dataset.py @@ -1647,9 +1647,9 @@ def test_load(mocker, session): create_autospec=True, return_value=output_file, ) - test_get_temporary_fixed_file_dir = mocker.patch.object( + test_get_fixed_file_dir_prefix = mocker.patch.object( dataset, - '_get_temporary_fixed_file_dir', + '_get_fixed_file_dir_prefix', create_autospec=True, return_value=fix_dir, ) @@ -1747,7 +1747,7 @@ def mock_preprocess(items, step, input_files, output_file, debug, assert args == load_args _get_output_file.assert_called_with(dataset.facets, session.preproc_dir) - test_get_temporary_fixed_file_dir.assert_called_once_with() + test_get_fixed_file_dir_prefix.assert_called_once_with() def test_load_fail(session): @@ -1770,10 +1770,9 @@ def test_load_fail(session): (Dataset(), '_'), ] ) -def test_get_temporary_fixed_file_dir(session, dataset, prefix): - """Test ``Dataset._get_temporary_fixed_file_dir``.""" +def test_get_fixed_file_dir_prefix(session, dataset, prefix): + """Test ``Dataset._get_fixed_file_dir_prefix``.""" dataset.session = session - temp_dir = dataset._get_temporary_fixed_file_dir() - assert temp_dir.parent == session.session_dir / 'preproc' / 'fixed_files' - assert temp_dir.is_dir() - assert temp_dir.name.startswith(prefix) + temp_dir = dataset._get_fixed_file_dir_prefix() + assert temp_dir == session.session_dir / 'preproc' / 'fixed_files' / prefix + assert not temp_dir.exists() From 7805bfceeef52709d982ecda987fe60d73fa43cd Mon Sep 17 00:00:00 2001 From: Manuel Schlund Date: Thu, 2 Mar 2023 16:36:28 +0100 Subject: [PATCH 11/13] Added tests --- .../cmor/_fixes/cmip6/test_cesm2.py | 4 ++- .../cmor/_fixes/cmip6/test_cesm2_waccm.py | 6 ++-- tests/integration/cmor/_fixes/test_fix.py | 28 +++++++++++++++++++ tests/unit/test_dataset.py | 1 + 4 files changed, 36 insertions(+), 3 deletions(-) diff --git a/tests/integration/cmor/_fixes/cmip6/test_cesm2.py b/tests/integration/cmor/_fixes/cmip6/test_cesm2.py index 212902dfed..9396c25f31 100644 --- a/tests/integration/cmor/_fixes/cmip6/test_cesm2.py +++ b/tests/integration/cmor/_fixes/cmip6/test_cesm2.py @@ -90,7 +90,9 @@ def test_cl_fix_file(mock_get_filepath, tmp_path, test_data_path): 'fixed_cesm2_cl.nc') fix = Cl(None) fixed_file = fix.fix_file(nc_path, tmp_path) - mock_get_filepath.assert_called_once_with(tmp_path, nc_path) + mock_get_filepath.assert_called_once_with( + tmp_path, nc_path, create_temporary_dir=False + ) fixed_cubes = iris.load(fixed_file) assert len(fixed_cubes) == 2 var_names = [cube.var_name for cube in fixed_cubes] diff --git a/tests/integration/cmor/_fixes/cmip6/test_cesm2_waccm.py b/tests/integration/cmor/_fixes/cmip6/test_cesm2_waccm.py index 36acb3c5a8..7dfc2e7089 100644 --- a/tests/integration/cmor/_fixes/cmip6/test_cesm2_waccm.py +++ b/tests/integration/cmor/_fixes/cmip6/test_cesm2_waccm.py @@ -45,8 +45,10 @@ def test_cl_fix_file(mock_get_filepath, tmp_path, test_data_path): mock_get_filepath.return_value = os.path.join(tmp_path, 'fixed_cesm2_waccm_cl.nc') fix = Cl(None) - fixed_file = fix.fix_file(str(nc_path), tmp_path) - mock_get_filepath.assert_called_once_with(tmp_path, str(nc_path)) + fixed_file = fix.fix_file(nc_path, tmp_path) + mock_get_filepath.assert_called_once_with( + tmp_path, nc_path, create_temporary_dir=False + ) fixed_cube = iris.load_cube(fixed_file) lev_coord = fixed_cube.coord(var_name='lev') a_coord = fixed_cube.coord(var_name='a') diff --git a/tests/integration/cmor/_fixes/test_fix.py b/tests/integration/cmor/_fixes/test_fix.py index 23b219ac74..80b8914791 100644 --- a/tests/integration/cmor/_fixes/test_fix.py +++ b/tests/integration/cmor/_fixes/test_fix.py @@ -108,6 +108,20 @@ def test_get_fixed_filepath_paths(tmp_path): assert fixed_path == tmp_path / 'fixed' / 'file.nc' +def test_get_fixed_filepath_temporary_paths(tmp_path): + output_dir = tmp_path / 'fixed' / 'prefix_1_' + filepath = Path('this', 'is', 'a', 'file.nc') + assert not output_dir.parent.is_dir() + fixed_path = Fix(None).get_fixed_filepath( + output_dir, filepath, create_temporary_dir=True + ) + assert fixed_path.parent.is_dir() + assert isinstance(fixed_path, Path) + assert fixed_path != tmp_path / 'fixed' / 'prefix_1_' / 'file.nc' + assert fixed_path.parent.name.startswith('prefix_1_') + assert fixed_path.name == 'file.nc' + + def test_get_fixed_filepath_strs(tmp_path): output_dir = os.path.join(str(tmp_path), 'fixed') filepath = os.path.join('this', 'is', 'a', 'file.nc') @@ -116,3 +130,17 @@ def test_get_fixed_filepath_strs(tmp_path): assert Path(output_dir).is_dir() assert isinstance(fixed_path, Path) assert fixed_path == tmp_path / 'fixed' / 'file.nc' + + +def test_get_fixed_filepath_temporary_strs(tmp_path): + output_dir = os.path.join(str(tmp_path), 'fixed', 'prefix_1_') + filepath = os.path.join('this', 'is', 'a', 'file.nc') + assert not Path(output_dir).parent.is_dir() + fixed_path = Fix(None).get_fixed_filepath( + output_dir, filepath, create_temporary_dir=True + ) + assert fixed_path.parent.is_dir() + assert isinstance(fixed_path, Path) + assert fixed_path != tmp_path / 'fixed' / 'prefix_1_' / 'file.nc' + assert fixed_path.parent.name.startswith('prefix_1_') + assert fixed_path.name == 'file.nc' diff --git a/tests/unit/test_dataset.py b/tests/unit/test_dataset.py index 8b05f5ad27..2fce85d305 100644 --- a/tests/unit/test_dataset.py +++ b/tests/unit/test_dataset.py @@ -1689,6 +1689,7 @@ def mock_preprocess(items, step, input_files, output_file, debug, 'callback': 'default' }, 'fix_file': { + 'create_temporary_dir': True, 'dataset': 'CanESM2', 'ensemble': 'r1i1p1', 'exp': 'historical', From 05d522dbc7a74a8a1bf5236df821ec49703fc122 Mon Sep 17 00:00:00 2001 From: Manuel Schlund Date: Thu, 2 Mar 2023 17:09:07 +0100 Subject: [PATCH 12/13] creat_temporary_dir -> add_unique_suffix --- esmvalcore/cmor/_fixes/cmip6/cesm2.py | 19 +++++------- esmvalcore/cmor/_fixes/cmip6/cesm2_waccm.py | 15 ++++------ esmvalcore/cmor/_fixes/emac/emac.py | 4 +-- esmvalcore/cmor/_fixes/fix.py | 30 +++++++------------ esmvalcore/cmor/_fixes/ipslcm/ipsl_cm6.py | 4 +-- esmvalcore/cmor/fix.py | 15 ++++------ esmvalcore/dataset.py | 6 ++-- .../cmor/_fixes/cmip6/test_cesm2.py | 2 +- .../cmor/_fixes/cmip6/test_cesm2_waccm.py | 2 +- tests/integration/cmor/_fixes/test_fix.py | 4 +-- tests/unit/test_dataset.py | 2 +- 11 files changed, 42 insertions(+), 61 deletions(-) diff --git a/esmvalcore/cmor/_fixes/cmip6/cesm2.py b/esmvalcore/cmor/_fixes/cmip6/cesm2.py index 99c38a3d72..134f68b29c 100644 --- a/esmvalcore/cmor/_fixes/cmip6/cesm2.py +++ b/esmvalcore/cmor/_fixes/cmip6/cesm2.py @@ -22,11 +22,11 @@ def _fix_formula_terms( self, filepath, output_dir, - create_temporary_dir=False, + add_unique_suffix=False, ): """Fix ``formula_terms`` attribute.""" new_path = self.get_fixed_filepath( - output_dir, filepath, create_temporary_dir=create_temporary_dir + output_dir, filepath, add_unique_suffix=add_unique_suffix ) copyfile(filepath, new_path) dataset = Dataset(new_path, mode='a') @@ -36,7 +36,7 @@ def _fix_formula_terms( dataset.close() return new_path - def fix_file(self, filepath, output_dir, create_temporary_dir=False): + def fix_file(self, filepath, output_dir, add_unique_suffix=False): """Fix hybrid pressure coordinate. Adds missing ``formula_terms`` attribute to file. @@ -52,13 +52,10 @@ def fix_file(self, filepath, output_dir, create_temporary_dir=False): ---------- filepath : str Path to the original file. - output_dir : str - Path of the directory where the fixed file is saved to. - create_temporary_dir: bool, optional (default: False) - If `True`, create temporary directory using `output_dir` as a - `prefix` for :func:`tempfile.mkdtemp` and store the fixed files in - there. If `False`, use the `output_dir` as directory to store fixed - files. + output_dir: Path + Output directory for fixed files. + add_unique_suffix: bool, optional (default: False) + Adds a unique suffix to `output_dir` for thread safety. Returns ------- @@ -67,7 +64,7 @@ def fix_file(self, filepath, output_dir, create_temporary_dir=False): """ new_path = self._fix_formula_terms( - filepath, output_dir, create_temporary_dir=create_temporary_dir + filepath, output_dir, add_unique_suffix=add_unique_suffix ) dataset = Dataset(new_path, mode='a') dataset.variables['a_bnds'][:] = dataset.variables['a_bnds'][::-1, :] diff --git a/esmvalcore/cmor/_fixes/cmip6/cesm2_waccm.py b/esmvalcore/cmor/_fixes/cmip6/cesm2_waccm.py index 3eec596c38..f7263a00dd 100644 --- a/esmvalcore/cmor/_fixes/cmip6/cesm2_waccm.py +++ b/esmvalcore/cmor/_fixes/cmip6/cesm2_waccm.py @@ -11,7 +11,7 @@ class Cl(BaseCl): """Fixes for cl.""" - def fix_file(self, filepath, output_dir, create_temporary_dir=False): + def fix_file(self, filepath, output_dir, add_unique_suffix=False): """Fix hybrid pressure coordinate. Adds missing ``formula_terms`` attribute to file. @@ -27,13 +27,10 @@ def fix_file(self, filepath, output_dir, create_temporary_dir=False): ---------- filepath : str Path to the original file. - output_dir : str - Path of the directory where the fixed file is saved to. - create_temporary_dir: bool, optional (default: False) - If `True`, create temporary directory using `output_dir` as a - `prefix` for :func:`tempfile.mkdtemp` and store the fixed files in - there. If `False`, use the `output_dir` as directory to store fixed - files. + output_dir: Path + Output directory for fixed files. + add_unique_suffix: bool, optional (default: False) + Adds a unique suffix to `output_dir` for thread safety. Returns ------- @@ -42,7 +39,7 @@ def fix_file(self, filepath, output_dir, create_temporary_dir=False): """ new_path = self._fix_formula_terms( - filepath, output_dir, create_temporary_dir=create_temporary_dir + filepath, output_dir, add_unique_suffix=add_unique_suffix ) dataset = Dataset(new_path, mode='a') dataset.variables['a_bnds'][:] = dataset.variables['a_bnds'][:, ::-1] diff --git a/esmvalcore/cmor/_fixes/emac/emac.py b/esmvalcore/cmor/_fixes/emac/emac.py index d3a90e7890..958efa6204 100644 --- a/esmvalcore/cmor/_fixes/emac/emac.py +++ b/esmvalcore/cmor/_fixes/emac/emac.py @@ -37,7 +37,7 @@ class AllVars(EmacFix): 'kg/m**2s': 'kg m-2 s-1', } - def fix_file(self, filepath, output_dir, create_temporary_dir=False): + def fix_file(self, filepath, output_dir, add_unique_suffix=False): """Fix file. Fixes hybrid pressure level coordinate. @@ -52,7 +52,7 @@ def fix_file(self, filepath, output_dir, create_temporary_dir=False): if 'alevel' not in self.vardef.dimensions: return filepath new_path = self.get_fixed_filepath( - output_dir, filepath, create_temporary_dir=create_temporary_dir + output_dir, filepath, add_unique_suffix=add_unique_suffix ) copyfile(filepath, new_path) with Dataset(new_path, mode='a') as dataset: diff --git a/esmvalcore/cmor/_fixes/fix.py b/esmvalcore/cmor/_fixes/fix.py index feb13eed5e..96a1c709c1 100644 --- a/esmvalcore/cmor/_fixes/fix.py +++ b/esmvalcore/cmor/_fixes/fix.py @@ -33,7 +33,7 @@ def fix_file( self, filepath: Path, output_dir: Path, - create_temporary_dir: bool = False, + add_unique_suffix: bool = False, ) -> Path: """Apply fixes to the files prior to creating the cube. @@ -46,14 +46,9 @@ def fix_file( filepath: Path File to fix. output_dir: Path - Output directory for fixed files or prefix for - :func:`tempfile.mkdtemp` (see `create_temporary_dir`). Make sure - this directory exists if `create_temporary_dir=False` is used. - create_temporary_dir: bool, optional (default: False) - If `True`, create temporary directory using `output_dir` as a - `prefix` for :func:`tempfile.mkdtemp` and store the fixed files in - there. If `False`, use the `output_dir` as directory to store fixed - files. + Output directory for fixed files. + add_unique_suffix: bool, optional (default: False) + Adds a unique suffix to `output_dir` for thread safety. Returns ------- @@ -221,22 +216,19 @@ def get_fixes(project, dataset, mip, short_name, extra_facets=None): def get_fixed_filepath( output_dir: str | Path, filepath: str | Path, - create_temporary_dir: bool = False, + add_unique_suffix: bool = False, ) -> Path: """Get the filepath for the fixed file. Parameters ---------- - output_dir: str or Path - Output directory or prefix for :func:`tempfile.mkdtemp` (see - `create_temporary_dir`). Will be created if it does not exist, yet. + output_dir: Path + Output directory for fixed files. Will be created if it does not + exist yet. filepath: str or Path Original path. - create_temporary_dir: bool, optional (default: False) - If `True`, create temporary directory using `output_dir` as a - `prefix` for :func:`tempfile.mkdtemp` and store the fixed files in - there. If `False`, use the `output_dir` as directory to store fixed - files. + add_unique_suffix: bool, optional (default: False) + Adds a unique suffix to `output_dir` for thread safety. Returns ------- @@ -245,7 +237,7 @@ def get_fixed_filepath( """ output_dir = Path(output_dir) - if create_temporary_dir: + if add_unique_suffix: parent_dir = output_dir.parent parent_dir.mkdir(parents=True, exist_ok=True) prefix = output_dir.name diff --git a/esmvalcore/cmor/_fixes/ipslcm/ipsl_cm6.py b/esmvalcore/cmor/_fixes/ipslcm/ipsl_cm6.py index f311b2a8d6..362d950f6c 100644 --- a/esmvalcore/cmor/_fixes/ipslcm/ipsl_cm6.py +++ b/esmvalcore/cmor/_fixes/ipslcm/ipsl_cm6.py @@ -16,7 +16,7 @@ class AllVars(Fix): """Fixes for all IPSLCM variables.""" - def fix_file(self, filepath, output_dir, create_temporary_dir=False): + def fix_file(self, filepath, output_dir, add_unique_suffix=False): """Select IPSLCM variable in filepath. This is done only if input file is a multi-variable one. This @@ -49,7 +49,7 @@ def fix_file(self, filepath, output_dir, create_temporary_dir=False): varname = self.extra_facets.get(VARNAME_KEY, self.vardef.short_name) alt_filepath = str(filepath).replace(".nc", "_cdo_selected.nc") outfile = self.get_fixed_filepath( - output_dir, alt_filepath, create_temporary_dir=create_temporary_dir + output_dir, alt_filepath, add_unique_suffix=add_unique_suffix ) tim1 = time.time() logger.debug("Using CDO for selecting %s in %s", varname, filepath) diff --git a/esmvalcore/cmor/fix.py b/esmvalcore/cmor/fix.py index e5666aa8cd..7afab44938 100644 --- a/esmvalcore/cmor/fix.py +++ b/esmvalcore/cmor/fix.py @@ -23,7 +23,7 @@ def fix_file( dataset: str, mip: str, output_dir: Path, - create_temporary_dir: bool = False, + add_unique_suffix: bool = False, **extra_facets, ) -> Path: """Fix files before ESMValTool can load them. @@ -46,14 +46,9 @@ def fix_file( mip: str Variable's MIP. output_dir: Path - Output directory for fixed files or prefix for :func:`tempfile.mkdtemp` - (see `create_temporary_dir`). Make sure this directory exists (manually - or in the underlying `Fix.fix_file` function) if - `create_temporary_dir=False` is used. - create_temporary_dir: bool, optional (default: False) - If `True`, create temporary directory using `output_dir` as a `prefix` - for :func:`tempfile.mkdtemp` and store the fixed files in there. If - `False`, use the `output_dir` as directory to store fixed files. + Output directory for fixed files. + add_unique_suffix: bool, optional (default: False) + Adds a unique suffix to `output_dir` for thread safety. **extra_facets: dict, optional Extra facets are mainly used for data outside of the big projects like CMIP, CORDEX, obs4MIPs. For details, see :ref:`extra_facets`. @@ -78,7 +73,7 @@ def fix_file( short_name=short_name, extra_facets=extra_facets): file = fix.fix_file( - file, output_dir, create_temporary_dir=create_temporary_dir + file, output_dir, add_unique_suffix=add_unique_suffix ) return file diff --git a/esmvalcore/dataset.py b/esmvalcore/dataset.py index 838cb5aef2..9742173220 100644 --- a/esmvalcore/dataset.py +++ b/esmvalcore/dataset.py @@ -721,7 +721,7 @@ def _load(self, callback) -> Cube: settings: dict[str, dict[str, Any]] = {} settings['fix_file'] = { 'output_dir': self._get_fixed_file_dir_prefix(), - 'create_temporary_dir': True, + 'add_unique_suffix': True, **self.facets, } settings['load'] = {'callback': callback} @@ -872,11 +872,11 @@ def _get_fixed_file_dir_prefix(self) -> Path: """Get directory prefix for storing fixed files. This can be used as `prefix` for :func:`tempfile.mkdtemp` to create a - temporary directory to store fixed files. + unique directory to store fixed files. Note ---- - Does not create the directory. + This is thread safe. Does not create the directory. Returns ------- diff --git a/tests/integration/cmor/_fixes/cmip6/test_cesm2.py b/tests/integration/cmor/_fixes/cmip6/test_cesm2.py index 9396c25f31..55442a323a 100644 --- a/tests/integration/cmor/_fixes/cmip6/test_cesm2.py +++ b/tests/integration/cmor/_fixes/cmip6/test_cesm2.py @@ -91,7 +91,7 @@ def test_cl_fix_file(mock_get_filepath, tmp_path, test_data_path): fix = Cl(None) fixed_file = fix.fix_file(nc_path, tmp_path) mock_get_filepath.assert_called_once_with( - tmp_path, nc_path, create_temporary_dir=False + tmp_path, nc_path, add_unique_suffix=False ) fixed_cubes = iris.load(fixed_file) assert len(fixed_cubes) == 2 diff --git a/tests/integration/cmor/_fixes/cmip6/test_cesm2_waccm.py b/tests/integration/cmor/_fixes/cmip6/test_cesm2_waccm.py index 7dfc2e7089..b9075b6716 100644 --- a/tests/integration/cmor/_fixes/cmip6/test_cesm2_waccm.py +++ b/tests/integration/cmor/_fixes/cmip6/test_cesm2_waccm.py @@ -47,7 +47,7 @@ def test_cl_fix_file(mock_get_filepath, tmp_path, test_data_path): fix = Cl(None) fixed_file = fix.fix_file(nc_path, tmp_path) mock_get_filepath.assert_called_once_with( - tmp_path, nc_path, create_temporary_dir=False + tmp_path, nc_path, add_unique_suffix=False ) fixed_cube = iris.load_cube(fixed_file) lev_coord = fixed_cube.coord(var_name='lev') diff --git a/tests/integration/cmor/_fixes/test_fix.py b/tests/integration/cmor/_fixes/test_fix.py index 80b8914791..1bf38d1425 100644 --- a/tests/integration/cmor/_fixes/test_fix.py +++ b/tests/integration/cmor/_fixes/test_fix.py @@ -113,7 +113,7 @@ def test_get_fixed_filepath_temporary_paths(tmp_path): filepath = Path('this', 'is', 'a', 'file.nc') assert not output_dir.parent.is_dir() fixed_path = Fix(None).get_fixed_filepath( - output_dir, filepath, create_temporary_dir=True + output_dir, filepath, add_unique_suffix=True ) assert fixed_path.parent.is_dir() assert isinstance(fixed_path, Path) @@ -137,7 +137,7 @@ def test_get_fixed_filepath_temporary_strs(tmp_path): filepath = os.path.join('this', 'is', 'a', 'file.nc') assert not Path(output_dir).parent.is_dir() fixed_path = Fix(None).get_fixed_filepath( - output_dir, filepath, create_temporary_dir=True + output_dir, filepath, add_unique_suffix=True ) assert fixed_path.parent.is_dir() assert isinstance(fixed_path, Path) diff --git a/tests/unit/test_dataset.py b/tests/unit/test_dataset.py index 2fce85d305..db6d1c3948 100644 --- a/tests/unit/test_dataset.py +++ b/tests/unit/test_dataset.py @@ -1689,7 +1689,7 @@ def mock_preprocess(items, step, input_files, output_file, debug, 'callback': 'default' }, 'fix_file': { - 'create_temporary_dir': True, + 'add_unique_suffix': True, 'dataset': 'CanESM2', 'ensemble': 'r1i1p1', 'exp': 'historical', From 50dfc8e3c6613d019585b272c7f4e583745b3cca Mon Sep 17 00:00:00 2001 From: Bouwe Andela Date: Thu, 2 Mar 2023 21:36:51 +0100 Subject: [PATCH 13/13] Simplify code --- esmvalcore/dataset.py | 25 +++++-------------------- tests/unit/test_dataset.py | 34 ++++++---------------------------- 2 files changed, 11 insertions(+), 48 deletions(-) diff --git a/esmvalcore/dataset.py b/esmvalcore/dataset.py index 9742173220..9293c42107 100644 --- a/esmvalcore/dataset.py +++ b/esmvalcore/dataset.py @@ -717,10 +717,14 @@ def _load(self, callback) -> Cube: raise InputFilesNotFound(msg) output_file = _get_output_file(self.facets, self.session.preproc_dir) + fix_dir_prefix = Path( + self.session._fixed_file_dir, + self._get_joined_summary_facets('_', join_lists=True) + '_', + ) settings: dict[str, dict[str, Any]] = {} settings['fix_file'] = { - 'output_dir': self._get_fixed_file_dir_prefix(), + 'output_dir': fix_dir_prefix, 'add_unique_suffix': True, **self.facets, } @@ -867,22 +871,3 @@ def _update_timerange(self): check.valid_time_selection(timerange) self.set_facet('timerange', timerange) - - def _get_fixed_file_dir_prefix(self) -> Path: - """Get directory prefix for storing fixed files. - - This can be used as `prefix` for :func:`tempfile.mkdtemp` to create a - unique directory to store fixed files. - - Note - ---- - This is thread safe. Does not create the directory. - - Returns - ------- - Path - Directory prefix. - - """ - prefix = self._get_joined_summary_facets('_', join_lists=True) + '_' - return self.session._fixed_file_dir / prefix diff --git a/tests/unit/test_dataset.py b/tests/unit/test_dataset.py index db6d1c3948..b8ebc209c8 100644 --- a/tests/unit/test_dataset.py +++ b/tests/unit/test_dataset.py @@ -1640,19 +1640,17 @@ def test_load(mocker, session): ) dataset.session = session output_file = Path('/path/to/output.nc') - fix_dir = Path('/path/to/output_fixed') + fix_dir_prefix = Path( + session.preproc_dir, + 'fixed_files', + 'chl_Oyr_CMIP5_CanESM2_historical_r1i1p1_', + ) _get_output_file = mocker.patch.object( esmvalcore.dataset, '_get_output_file', create_autospec=True, return_value=output_file, ) - test_get_fixed_file_dir_prefix = mocker.patch.object( - dataset, - '_get_fixed_file_dir_prefix', - create_autospec=True, - return_value=fix_dir, - ) args = {} order = [] @@ -1695,7 +1693,7 @@ def mock_preprocess(items, step, input_files, output_file, debug, 'exp': 'historical', 'frequency': 'yr', 'mip': 'Oyr', - 'output_dir': fix_dir, + 'output_dir': fix_dir_prefix, 'project': 'CMIP5', 'short_name': 'chl', 'timerange': '2000/2005', @@ -1748,7 +1746,6 @@ def mock_preprocess(items, step, input_files, output_file, debug, assert args == load_args _get_output_file.assert_called_with(dataset.facets, session.preproc_dir) - test_get_fixed_file_dir_prefix.assert_called_once_with() def test_load_fail(session): @@ -1758,22 +1755,3 @@ def test_load_fail(session): dataset.files = [] with pytest.raises(InputFilesNotFound): dataset.load() - - -@pytest.mark.parametrize( - 'dataset,prefix', - [ - (Dataset(project='OBS', dataset='X', mip='fx', short_name='sftlf'), - 'sftlf_fx_OBS_X_'), - (Dataset(mip=('fx', 'day'), short_name='sftlf', exp=['exp1', 'exp2']), - 'sftlf_fx-day_exp1-exp2_'), - (Dataset(dataset='Y'), 'Y_'), - (Dataset(), '_'), - ] -) -def test_get_fixed_file_dir_prefix(session, dataset, prefix): - """Test ``Dataset._get_fixed_file_dir_prefix``.""" - dataset.session = session - temp_dir = dataset._get_fixed_file_dir_prefix() - assert temp_dir == session.session_dir / 'preproc' / 'fixed_files' / prefix - assert not temp_dir.exists()