From 0e7bb54045c26793f90ed40c02c3b89c7432ec60 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Tue, 17 Dec 2019 17:36:41 +0100 Subject: [PATCH 001/105] Add a method to save citation information --- esmvalcore/_task.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/esmvalcore/_task.py b/esmvalcore/_task.py index 825cfa8cd3..2b229a4c96 100644 --- a/esmvalcore/_task.py +++ b/esmvalcore/_task.py @@ -11,6 +11,7 @@ import time from copy import deepcopy from multiprocessing import Pool +from pybtex.database import BibliographyData, Entry import psutil import yaml @@ -574,6 +575,33 @@ def _collect_provenance(self): self.name, time.time() - start) + + def _write_citation_file(self): + """Write citation information provided from the recorded provenance.""" + citation_file = os.path.join(self.settings['run_dir'], + 'diagnostic_citation.bibtex') + + # papers describing the diagnostic and recipe + bib_entry = BibliographyData({ + 'article-minimal': Entry('article', [ + ('author', ''), + ('title', ''), + ('journal', ""), + ('year', ''),]), + }) + + # model data citation information + + # observational data citation information + + # esmvaltool and other scientific software citation + + # scientific compute cluster citation information, if applicable + + # save the file + bib_entry.to_string(citation_file, 'bibtex') + + def __str__(self): """Get human readable description.""" txt = "{}:\nscript: {}\n{}\nsettings:\n{}\n".format( From 21e56b5bc2bb073231fd7a8c7e53650e9a989a78 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Wed, 15 Jan 2020 17:33:38 +0100 Subject: [PATCH 002/105] Fixing the function write_citation_file --- esmvalcore/_task.py | 36 ++++++++++++++++++++++++------------ 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/esmvalcore/_task.py b/esmvalcore/_task.py index 2b229a4c96..0851e090d6 100644 --- a/esmvalcore/_task.py +++ b/esmvalcore/_task.py @@ -16,7 +16,7 @@ import psutil import yaml -from ._config import DIAGNOSTICS_PATH, TAGS, replace_tags +from ._config import DIAGNOSTICS_PATH, TAGS, replace_tags, get_tag_value from ._provenance import TrackedFile, get_task_provenance logger = logging.getLogger(__name__) @@ -507,6 +507,7 @@ def _run(self, input_files): if returncode == 0: logger.debug("Script %s completed successfully", self.script) self._collect_provenance() + self._write_citation_file() return [self.output_dir] raise DiagnosticError( @@ -577,18 +578,27 @@ def _collect_provenance(self): def _write_citation_file(self): - """Write citation information provided from the recorded provenance.""" - citation_file = os.path.join(self.settings['run_dir'], - 'diagnostic_citation.bibtex') + """Write citation information provided by the recorded provenance.""" + provenance_file = os.path.join(self.settings['run_dir'], + 'diagnostic_provenance.yml') + with open(provenance_file, 'r') as file: + table = yaml.safe_load(file) + section = 'references' + reference_dict = {} + for filename, attributes in table.items(): + for tag in attributes[section]: + reference_dict[tag] = get_tag_value(section, tag) # papers describing the diagnostic and recipe - bib_entry = BibliographyData({ - 'article-minimal': Entry('article', [ - ('author', ''), - ('title', ''), - ('journal', ""), - ('year', ''),]), - }) + bib_entry = {} + bib_fields = ['author', 'journal', 'volume', 'pages', 'doi', 'year'] + for key in reference_dict: + reference = list(reference_dict[key].split(",")) + # "[Last name] et al., [journal abbr.], [volume], [pages], doi:[doi], [year]. + bib_entry.update({ + key: Entry('article', list(zip(bib_fields, reference ))), + }) + bib_data = BibliographyData(bib_entry) # model data citation information @@ -599,7 +609,9 @@ def _write_citation_file(self): # scientific compute cluster citation information, if applicable # save the file - bib_entry.to_string(citation_file, 'bibtex') + citation_file = os.path.join(self.settings['run_dir'], + 'diagnostic_citation.bibtex') + bib_data.to_string(citation_file, 'bibtex') def __str__(self): From db80953b3700f9b1c08f34f79677f3a73976a230 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Tue, 21 Jan 2020 17:53:36 +0100 Subject: [PATCH 003/105] Fix the function write_citation_file --- esmvalcore/_config.py | 11 ++++++++++ esmvalcore/_task.py | 48 +++++++++++++------------------------------ 2 files changed, 25 insertions(+), 34 deletions(-) diff --git a/esmvalcore/_config.py b/esmvalcore/_config.py index 6f44f16731..50a0f7cb85 100644 --- a/esmvalcore/_config.py +++ b/esmvalcore/_config.py @@ -26,6 +26,17 @@ def find_diagnostics(): DIAGNOSTICS_PATH = find_diagnostics() +def find_references(): + """Try to find bibtex files in references folder.""" + try: + import esmvaltool + except ImportError: + return '' + return os.path.join(os.path.dirname(esmvaltool.__file__), 'references') + + +REFERENCES_PATH = find_references() + def read_config_user_file(config_file, recipe_name): """Read config user file and store settings in a dictionary.""" diff --git a/esmvalcore/_task.py b/esmvalcore/_task.py index 0851e090d6..3cd66b2d06 100644 --- a/esmvalcore/_task.py +++ b/esmvalcore/_task.py @@ -12,11 +12,13 @@ from copy import deepcopy from multiprocessing import Pool from pybtex.database import BibliographyData, Entry +import doi2bib.crossref as ref import psutil import yaml +import prov -from ._config import DIAGNOSTICS_PATH, TAGS, replace_tags, get_tag_value +from ._config import DIAGNOSTICS_PATH, TAGS, replace_tags, get_tag_value, REFERENCES_PATH from ._provenance import TrackedFile, get_task_provenance logger = logging.getLogger(__name__) @@ -507,7 +509,7 @@ def _run(self, input_files): if returncode == 0: logger.debug("Script %s completed successfully", self.script) self._collect_provenance() - self._write_citation_file() + # self._write_citation_file() return [self.output_dir] raise DiagnosticError( @@ -571,47 +573,25 @@ def _collect_provenance(self): product = TrackedFile(filename, attributes, ancestors) product.initialize_provenance(self.activity) product.save_provenance() + self._write_citation_file(product) self.products.add(product) logger.debug("Collecting provenance of task %s took %.1f seconds", self.name, time.time() - start) - def _write_citation_file(self): + def _write_citation_file(self, product): """Write citation information provided by the recorded provenance.""" - provenance_file = os.path.join(self.settings['run_dir'], - 'diagnostic_provenance.yml') - with open(provenance_file, 'r') as file: - table = yaml.safe_load(file) - section = 'references' - reference_dict = {} - for filename, attributes in table.items(): - for tag in attributes[section]: - reference_dict[tag] = get_tag_value(section, tag) - - # papers describing the diagnostic and recipe - bib_entry = {} - bib_fields = ['author', 'journal', 'volume', 'pages', 'doi', 'year'] - for key in reference_dict: - reference = list(reference_dict[key].split(",")) - # "[Last name] et al., [journal abbr.], [volume], [pages], doi:[doi], [year]. - bib_entry.update({ - key: Entry('article', list(zip(bib_fields, reference ))), - }) - bib_data = BibliographyData(bib_entry) - - # model data citation information - - # observational data citation information - - # esmvaltool and other scientific software citation + bib_data = {v:k for k, v in TAGS['references'].items()} - # scientific compute cluster citation information, if applicable + for item in product.provenance.records: + for key, value in item.attributes: + # if key.namespace.prefix == 'attribute' and key.localpart in {'reference', 'references'}: + if key.namespace.prefix == 'attribute' and key.localpart in {'reference'}: + tag = bib_data[value] - # save the file - citation_file = os.path.join(self.settings['run_dir'], - 'diagnostic_citation.bibtex') - bib_data.to_string(citation_file, 'bibtex') + # print(REFERENCES_PATH) + # citation_file = Path(product.filename) + '_citation.bibtex' def __str__(self): From 4b63ef6e42e08b281061fe469b3f5816a62c3e70 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Wed, 22 Jan 2020 16:25:29 +0100 Subject: [PATCH 004/105] fix the function _write_citation_file --- esmvalcore/_config.py | 1 + esmvalcore/_task.py | 40 ++++++++++++++++++++++++++-------------- 2 files changed, 27 insertions(+), 14 deletions(-) diff --git a/esmvalcore/_config.py b/esmvalcore/_config.py index 50a0f7cb85..d8f7de0819 100644 --- a/esmvalcore/_config.py +++ b/esmvalcore/_config.py @@ -26,6 +26,7 @@ def find_diagnostics(): DIAGNOSTICS_PATH = find_diagnostics() + def find_references(): """Try to find bibtex files in references folder.""" try: diff --git a/esmvalcore/_task.py b/esmvalcore/_task.py index 3cd66b2d06..eaab26aea9 100644 --- a/esmvalcore/_task.py +++ b/esmvalcore/_task.py @@ -11,14 +11,11 @@ import time from copy import deepcopy from multiprocessing import Pool -from pybtex.database import BibliographyData, Entry -import doi2bib.crossref as ref import psutil import yaml -import prov -from ._config import DIAGNOSTICS_PATH, TAGS, replace_tags, get_tag_value, REFERENCES_PATH +from ._config import DIAGNOSTICS_PATH, TAGS, replace_tags, REFERENCES_PATH from ._provenance import TrackedFile, get_task_provenance logger = logging.getLogger(__name__) @@ -579,20 +576,35 @@ def _collect_provenance(self): self.name, time.time() - start) - def _write_citation_file(self, product): """Write citation information provided by the recorded provenance.""" - bib_data = {v:k for k, v in TAGS['references'].items()} - + reference_tag = {v: k for k, v in TAGS['references'].items()} + # collect info from provenance + product_entry = [] for item in product.provenance.records: for key, value in item.attributes: - # if key.namespace.prefix == 'attribute' and key.localpart in {'reference', 'references'}: - if key.namespace.prefix == 'attribute' and key.localpart in {'reference'}: - tag = bib_data[value] - - # print(REFERENCES_PATH) - # citation_file = Path(product.filename) + '_citation.bibtex' - + if (key.namespace.prefix == 'attribute' + and key.localpart in {'reference', 'references'}): + product_entry.append(value) + + # map between reference tags and entries + product_tag = [] + for key in reference_tag.keys(): + for entry in product_entry: + if key in entry and reference_tag[key] not in product_tag: + product_tag.append(reference_tag[key]) + + # save all citation info into one bibtex file + bibtex_entry = '' + for tags in product_tag: + bib_file_path = os.path.join(REFERENCES_PATH, tags + '.bibtex') + if os.path.isfile(bib_file_path): + with open(bib_file_path, 'r') as file: + bibtex_entry += '{}\n'.format(file.read()) + citation_file = (os.path.splitext(product.filename)[0] + + '_citation.bibtex') + with open(citation_file, 'w') as file: + file.write(bibtex_entry) def __str__(self): """Get human readable description.""" From 6a3872388ec0e0e33db5a3998e1ae04c6becbc1e Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Wed, 22 Jan 2020 17:26:14 +0100 Subject: [PATCH 005/105] style --- esmvalcore/_task.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/esmvalcore/_task.py b/esmvalcore/_task.py index eaab26aea9..95459f960c 100644 --- a/esmvalcore/_task.py +++ b/esmvalcore/_task.py @@ -578,7 +578,6 @@ def _collect_provenance(self): def _write_citation_file(self, product): """Write citation information provided by the recorded provenance.""" - reference_tag = {v: k for k, v in TAGS['references'].items()} # collect info from provenance product_entry = [] for item in product.provenance.records: @@ -588,6 +587,7 @@ def _write_citation_file(self, product): product_entry.append(value) # map between reference tags and entries + reference_tag = {v: k for k, v in TAGS['references'].items()} product_tag = [] for key in reference_tag.keys(): for entry in product_entry: From 528eee796057e08b65e2ce2cae9b8cd79f8dcf6f Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Thu, 23 Jan 2020 09:55:06 +0100 Subject: [PATCH 006/105] refactor and style --- esmvalcore/_task.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/esmvalcore/_task.py b/esmvalcore/_task.py index 95459f960c..6e4d2c18fd 100644 --- a/esmvalcore/_task.py +++ b/esmvalcore/_task.py @@ -586,7 +586,7 @@ def _write_citation_file(self, product): and key.localpart in {'reference', 'references'}): product_entry.append(value) - # map between reference tags and entries + # map between reference.tags and product.entries reference_tag = {v: k for k, v in TAGS['references'].items()} product_tag = [] for key in reference_tag.keys(): @@ -597,10 +597,14 @@ def _write_citation_file(self, product): # save all citation info into one bibtex file bibtex_entry = '' for tags in product_tag: - bib_file_path = os.path.join(REFERENCES_PATH, tags + '.bibtex') - if os.path.isfile(bib_file_path): - with open(bib_file_path, 'r') as file: + bibtex_file = os.path.join(REFERENCES_PATH, tags + '.bibtex') + if os.path.isfile(bibtex_file): + with open(bibtex_file, 'r') as file: bibtex_entry += '{}\n'.format(file.read()) + else: + raise DiagnosticError( + "The reference file ({}): does not exist.".format( + bibtex_file)) citation_file = (os.path.splitext(product.filename)[0] + '_citation.bibtex') with open(citation_file, 'w') as file: From a7a6368e72d39f86947bd96ca2ebef2c5566efce Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Thu, 23 Jan 2020 16:31:04 +0100 Subject: [PATCH 007/105] Add esmvaltool paper to the provenance, and style --- esmvalcore/_provenance.py | 12 +++++++++--- esmvalcore/_task.py | 1 - 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/esmvalcore/_provenance.py b/esmvalcore/_provenance.py index d0c5352e2b..d966fdf390 100644 --- a/esmvalcore/_provenance.py +++ b/esmvalcore/_provenance.py @@ -8,12 +8,15 @@ from PIL.PngImagePlugin import PngInfo from prov.dot import prov_to_dot from prov.model import ProvDocument +from ._config import replace_tags from ._version import __version__ logger = logging.getLogger(__name__) ESMVALTOOL_URI_PREFIX = 'https://www.esmvaltool.org/' +# it is the technical overview and should always be cited +ESMVALTOOL_PAPER_TAG = ['righi19gmd'] def update_without_duplicating(bundle, other): @@ -31,9 +34,12 @@ def create_namespace(provenance, namespace): def get_esmvaltool_provenance(): """Create an esmvaltool run activity.""" provenance = ProvDocument() - namespace = 'software' - create_namespace(provenance, namespace) - attributes = {} # TODO: add dependencies with versions here + for namespace in ('software', 'attribute'): + create_namespace(provenance, namespace) + + # TODO: add dependencies with versions here + attributes_value = replace_tags('references', ESMVALTOOL_PAPER_TAG) + attributes = {'attribute:references': attributes_value} activity = provenance.activity( namespace + ':esmvaltool==' + __version__, other_attributes=attributes) diff --git a/esmvalcore/_task.py b/esmvalcore/_task.py index 6e4d2c18fd..102db2bfee 100644 --- a/esmvalcore/_task.py +++ b/esmvalcore/_task.py @@ -506,7 +506,6 @@ def _run(self, input_files): if returncode == 0: logger.debug("Script %s completed successfully", self.script) self._collect_provenance() - # self._write_citation_file() return [self.output_dir] raise DiagnosticError( From 65ee713c42b7947a6731f8d7ecc3a94e724ddd6a Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Thu, 23 Jan 2020 17:33:12 +0100 Subject: [PATCH 008/105] update the tag --- esmvalcore/_provenance.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/esmvalcore/_provenance.py b/esmvalcore/_provenance.py index d966fdf390..72ed9ce9ce 100644 --- a/esmvalcore/_provenance.py +++ b/esmvalcore/_provenance.py @@ -16,7 +16,7 @@ ESMVALTOOL_URI_PREFIX = 'https://www.esmvaltool.org/' # it is the technical overview and should always be cited -ESMVALTOOL_PAPER_TAG = ['righi19gmd'] +ESMVALTOOL_PAPER_TAG = ['righi19gmdd'] def update_without_duplicating(bundle, other): From 36d7e363dd8a9af5a59ea6cb9426e93cd9bc7c4d Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Tue, 17 Dec 2019 17:36:41 +0100 Subject: [PATCH 009/105] Add a method to save citation information --- esmvalcore/_task.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/esmvalcore/_task.py b/esmvalcore/_task.py index 28074b5163..c8c1841e2d 100644 --- a/esmvalcore/_task.py +++ b/esmvalcore/_task.py @@ -11,6 +11,7 @@ import time from copy import deepcopy from multiprocessing import Pool +from pybtex.database import BibliographyData, Entry import psutil import yaml @@ -582,6 +583,33 @@ def _collect_provenance(self): self.name, time.time() - start) + + def _write_citation_file(self): + """Write citation information provided from the recorded provenance.""" + citation_file = os.path.join(self.settings['run_dir'], + 'diagnostic_citation.bibtex') + + # papers describing the diagnostic and recipe + bib_entry = BibliographyData({ + 'article-minimal': Entry('article', [ + ('author', ''), + ('title', ''), + ('journal', ""), + ('year', ''),]), + }) + + # model data citation information + + # observational data citation information + + # esmvaltool and other scientific software citation + + # scientific compute cluster citation information, if applicable + + # save the file + bib_entry.to_string(citation_file, 'bibtex') + + def __str__(self): """Get human readable description.""" txt = "{}:\nscript: {}\n{}\nsettings:\n{}\n".format( From 5f70c732ea42eb59ea7b278dbc7d7c1a3731118e Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Wed, 15 Jan 2020 17:33:38 +0100 Subject: [PATCH 010/105] Fixing the function write_citation_file --- esmvalcore/_task.py | 36 ++++++++++++++++++++++++------------ 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/esmvalcore/_task.py b/esmvalcore/_task.py index c8c1841e2d..17fa1c8d90 100644 --- a/esmvalcore/_task.py +++ b/esmvalcore/_task.py @@ -16,7 +16,7 @@ import psutil import yaml -from ._config import DIAGNOSTICS_PATH, TAGS, replace_tags +from ._config import DIAGNOSTICS_PATH, TAGS, replace_tags, get_tag_value from ._provenance import TrackedFile, get_task_provenance logger = logging.getLogger(__name__) @@ -515,6 +515,7 @@ def _run(self, input_files): if returncode == 0: logger.debug("Script %s completed successfully", self.script) self._collect_provenance() + self._write_citation_file() return [self.output_dir] raise DiagnosticError( @@ -585,18 +586,27 @@ def _collect_provenance(self): def _write_citation_file(self): - """Write citation information provided from the recorded provenance.""" - citation_file = os.path.join(self.settings['run_dir'], - 'diagnostic_citation.bibtex') + """Write citation information provided by the recorded provenance.""" + provenance_file = os.path.join(self.settings['run_dir'], + 'diagnostic_provenance.yml') + with open(provenance_file, 'r') as file: + table = yaml.safe_load(file) + section = 'references' + reference_dict = {} + for filename, attributes in table.items(): + for tag in attributes[section]: + reference_dict[tag] = get_tag_value(section, tag) # papers describing the diagnostic and recipe - bib_entry = BibliographyData({ - 'article-minimal': Entry('article', [ - ('author', ''), - ('title', ''), - ('journal', ""), - ('year', ''),]), - }) + bib_entry = {} + bib_fields = ['author', 'journal', 'volume', 'pages', 'doi', 'year'] + for key in reference_dict: + reference = list(reference_dict[key].split(",")) + # "[Last name] et al., [journal abbr.], [volume], [pages], doi:[doi], [year]. + bib_entry.update({ + key: Entry('article', list(zip(bib_fields, reference ))), + }) + bib_data = BibliographyData(bib_entry) # model data citation information @@ -607,7 +617,9 @@ def _write_citation_file(self): # scientific compute cluster citation information, if applicable # save the file - bib_entry.to_string(citation_file, 'bibtex') + citation_file = os.path.join(self.settings['run_dir'], + 'diagnostic_citation.bibtex') + bib_data.to_string(citation_file, 'bibtex') def __str__(self): From aff593d4b2f767958f1a59687b7122794a749892 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Tue, 21 Jan 2020 17:53:36 +0100 Subject: [PATCH 011/105] Fix the function write_citation_file --- esmvalcore/_config.py | 11 ++++++++++ esmvalcore/_task.py | 48 +++++++++++++------------------------------ 2 files changed, 25 insertions(+), 34 deletions(-) diff --git a/esmvalcore/_config.py b/esmvalcore/_config.py index c8e08381db..06453ac572 100644 --- a/esmvalcore/_config.py +++ b/esmvalcore/_config.py @@ -26,6 +26,17 @@ def find_diagnostics(): DIAGNOSTICS_PATH = find_diagnostics() +def find_references(): + """Try to find bibtex files in references folder.""" + try: + import esmvaltool + except ImportError: + return '' + return os.path.join(os.path.dirname(esmvaltool.__file__), 'references') + + +REFERENCES_PATH = find_references() + def read_config_user_file(config_file, recipe_name): """Read config user file and store settings in a dictionary.""" diff --git a/esmvalcore/_task.py b/esmvalcore/_task.py index 17fa1c8d90..340c4ace4d 100644 --- a/esmvalcore/_task.py +++ b/esmvalcore/_task.py @@ -12,11 +12,13 @@ from copy import deepcopy from multiprocessing import Pool from pybtex.database import BibliographyData, Entry +import doi2bib.crossref as ref import psutil import yaml +import prov -from ._config import DIAGNOSTICS_PATH, TAGS, replace_tags, get_tag_value +from ._config import DIAGNOSTICS_PATH, TAGS, replace_tags, get_tag_value, REFERENCES_PATH from ._provenance import TrackedFile, get_task_provenance logger = logging.getLogger(__name__) @@ -515,7 +517,7 @@ def _run(self, input_files): if returncode == 0: logger.debug("Script %s completed successfully", self.script) self._collect_provenance() - self._write_citation_file() + # self._write_citation_file() return [self.output_dir] raise DiagnosticError( @@ -579,47 +581,25 @@ def _collect_provenance(self): product = TrackedFile(filename, attributes, ancestors) product.initialize_provenance(self.activity) product.save_provenance() + self._write_citation_file(product) self.products.add(product) logger.debug("Collecting provenance of task %s took %.1f seconds", self.name, time.time() - start) - def _write_citation_file(self): + def _write_citation_file(self, product): """Write citation information provided by the recorded provenance.""" - provenance_file = os.path.join(self.settings['run_dir'], - 'diagnostic_provenance.yml') - with open(provenance_file, 'r') as file: - table = yaml.safe_load(file) - section = 'references' - reference_dict = {} - for filename, attributes in table.items(): - for tag in attributes[section]: - reference_dict[tag] = get_tag_value(section, tag) - - # papers describing the diagnostic and recipe - bib_entry = {} - bib_fields = ['author', 'journal', 'volume', 'pages', 'doi', 'year'] - for key in reference_dict: - reference = list(reference_dict[key].split(",")) - # "[Last name] et al., [journal abbr.], [volume], [pages], doi:[doi], [year]. - bib_entry.update({ - key: Entry('article', list(zip(bib_fields, reference ))), - }) - bib_data = BibliographyData(bib_entry) - - # model data citation information - - # observational data citation information - - # esmvaltool and other scientific software citation + bib_data = {v:k for k, v in TAGS['references'].items()} - # scientific compute cluster citation information, if applicable + for item in product.provenance.records: + for key, value in item.attributes: + # if key.namespace.prefix == 'attribute' and key.localpart in {'reference', 'references'}: + if key.namespace.prefix == 'attribute' and key.localpart in {'reference'}: + tag = bib_data[value] - # save the file - citation_file = os.path.join(self.settings['run_dir'], - 'diagnostic_citation.bibtex') - bib_data.to_string(citation_file, 'bibtex') + # print(REFERENCES_PATH) + # citation_file = Path(product.filename) + '_citation.bibtex' def __str__(self): From 34062150b6ab3d2094f7782814481a947ae0f911 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Wed, 22 Jan 2020 16:25:29 +0100 Subject: [PATCH 012/105] fix the function _write_citation_file --- esmvalcore/_config.py | 1 + esmvalcore/_task.py | 40 ++++++++++++++++++++++++++-------------- 2 files changed, 27 insertions(+), 14 deletions(-) diff --git a/esmvalcore/_config.py b/esmvalcore/_config.py index 06453ac572..4c94b80ef7 100644 --- a/esmvalcore/_config.py +++ b/esmvalcore/_config.py @@ -26,6 +26,7 @@ def find_diagnostics(): DIAGNOSTICS_PATH = find_diagnostics() + def find_references(): """Try to find bibtex files in references folder.""" try: diff --git a/esmvalcore/_task.py b/esmvalcore/_task.py index 340c4ace4d..ce5bdfc29b 100644 --- a/esmvalcore/_task.py +++ b/esmvalcore/_task.py @@ -11,14 +11,11 @@ import time from copy import deepcopy from multiprocessing import Pool -from pybtex.database import BibliographyData, Entry -import doi2bib.crossref as ref import psutil import yaml -import prov -from ._config import DIAGNOSTICS_PATH, TAGS, replace_tags, get_tag_value, REFERENCES_PATH +from ._config import DIAGNOSTICS_PATH, TAGS, replace_tags, REFERENCES_PATH from ._provenance import TrackedFile, get_task_provenance logger = logging.getLogger(__name__) @@ -587,20 +584,35 @@ def _collect_provenance(self): self.name, time.time() - start) - def _write_citation_file(self, product): """Write citation information provided by the recorded provenance.""" - bib_data = {v:k for k, v in TAGS['references'].items()} - + reference_tag = {v: k for k, v in TAGS['references'].items()} + # collect info from provenance + product_entry = [] for item in product.provenance.records: for key, value in item.attributes: - # if key.namespace.prefix == 'attribute' and key.localpart in {'reference', 'references'}: - if key.namespace.prefix == 'attribute' and key.localpart in {'reference'}: - tag = bib_data[value] - - # print(REFERENCES_PATH) - # citation_file = Path(product.filename) + '_citation.bibtex' - + if (key.namespace.prefix == 'attribute' + and key.localpart in {'reference', 'references'}): + product_entry.append(value) + + # map between reference tags and entries + product_tag = [] + for key in reference_tag.keys(): + for entry in product_entry: + if key in entry and reference_tag[key] not in product_tag: + product_tag.append(reference_tag[key]) + + # save all citation info into one bibtex file + bibtex_entry = '' + for tags in product_tag: + bib_file_path = os.path.join(REFERENCES_PATH, tags + '.bibtex') + if os.path.isfile(bib_file_path): + with open(bib_file_path, 'r') as file: + bibtex_entry += '{}\n'.format(file.read()) + citation_file = (os.path.splitext(product.filename)[0] + + '_citation.bibtex') + with open(citation_file, 'w') as file: + file.write(bibtex_entry) def __str__(self): """Get human readable description.""" From 05f4ce8760fe0a6244c63318c0034a1b5f07b63f Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Wed, 22 Jan 2020 17:26:14 +0100 Subject: [PATCH 013/105] style --- esmvalcore/_task.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/esmvalcore/_task.py b/esmvalcore/_task.py index ce5bdfc29b..4dbcbf266e 100644 --- a/esmvalcore/_task.py +++ b/esmvalcore/_task.py @@ -586,7 +586,6 @@ def _collect_provenance(self): def _write_citation_file(self, product): """Write citation information provided by the recorded provenance.""" - reference_tag = {v: k for k, v in TAGS['references'].items()} # collect info from provenance product_entry = [] for item in product.provenance.records: @@ -596,6 +595,7 @@ def _write_citation_file(self, product): product_entry.append(value) # map between reference tags and entries + reference_tag = {v: k for k, v in TAGS['references'].items()} product_tag = [] for key in reference_tag.keys(): for entry in product_entry: From 585cc92566946853e6a578b4b19a050524dd7374 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Thu, 23 Jan 2020 09:55:06 +0100 Subject: [PATCH 014/105] refactor and style --- esmvalcore/_task.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/esmvalcore/_task.py b/esmvalcore/_task.py index 4dbcbf266e..0faa70c3a4 100644 --- a/esmvalcore/_task.py +++ b/esmvalcore/_task.py @@ -594,7 +594,7 @@ def _write_citation_file(self, product): and key.localpart in {'reference', 'references'}): product_entry.append(value) - # map between reference tags and entries + # map between reference.tags and product.entries reference_tag = {v: k for k, v in TAGS['references'].items()} product_tag = [] for key in reference_tag.keys(): @@ -605,10 +605,14 @@ def _write_citation_file(self, product): # save all citation info into one bibtex file bibtex_entry = '' for tags in product_tag: - bib_file_path = os.path.join(REFERENCES_PATH, tags + '.bibtex') - if os.path.isfile(bib_file_path): - with open(bib_file_path, 'r') as file: + bibtex_file = os.path.join(REFERENCES_PATH, tags + '.bibtex') + if os.path.isfile(bibtex_file): + with open(bibtex_file, 'r') as file: bibtex_entry += '{}\n'.format(file.read()) + else: + raise DiagnosticError( + "The reference file ({}): does not exist.".format( + bibtex_file)) citation_file = (os.path.splitext(product.filename)[0] + '_citation.bibtex') with open(citation_file, 'w') as file: From 46f7b458effa885d4d0968b7dcad340b65567b79 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Thu, 23 Jan 2020 16:31:04 +0100 Subject: [PATCH 015/105] Add esmvaltool paper to the provenance, and style --- esmvalcore/_provenance.py | 12 +++++++++--- esmvalcore/_task.py | 1 - 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/esmvalcore/_provenance.py b/esmvalcore/_provenance.py index d0c5352e2b..d966fdf390 100644 --- a/esmvalcore/_provenance.py +++ b/esmvalcore/_provenance.py @@ -8,12 +8,15 @@ from PIL.PngImagePlugin import PngInfo from prov.dot import prov_to_dot from prov.model import ProvDocument +from ._config import replace_tags from ._version import __version__ logger = logging.getLogger(__name__) ESMVALTOOL_URI_PREFIX = 'https://www.esmvaltool.org/' +# it is the technical overview and should always be cited +ESMVALTOOL_PAPER_TAG = ['righi19gmd'] def update_without_duplicating(bundle, other): @@ -31,9 +34,12 @@ def create_namespace(provenance, namespace): def get_esmvaltool_provenance(): """Create an esmvaltool run activity.""" provenance = ProvDocument() - namespace = 'software' - create_namespace(provenance, namespace) - attributes = {} # TODO: add dependencies with versions here + for namespace in ('software', 'attribute'): + create_namespace(provenance, namespace) + + # TODO: add dependencies with versions here + attributes_value = replace_tags('references', ESMVALTOOL_PAPER_TAG) + attributes = {'attribute:references': attributes_value} activity = provenance.activity( namespace + ':esmvaltool==' + __version__, other_attributes=attributes) diff --git a/esmvalcore/_task.py b/esmvalcore/_task.py index 0faa70c3a4..c50a212c6a 100644 --- a/esmvalcore/_task.py +++ b/esmvalcore/_task.py @@ -514,7 +514,6 @@ def _run(self, input_files): if returncode == 0: logger.debug("Script %s completed successfully", self.script) self._collect_provenance() - # self._write_citation_file() return [self.output_dir] raise DiagnosticError( From 2ea1c98e640e8c13a4fcbe68aeadd8a940305ca8 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Thu, 23 Jan 2020 17:33:12 +0100 Subject: [PATCH 016/105] update the tag --- esmvalcore/_provenance.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/esmvalcore/_provenance.py b/esmvalcore/_provenance.py index d966fdf390..72ed9ce9ce 100644 --- a/esmvalcore/_provenance.py +++ b/esmvalcore/_provenance.py @@ -16,7 +16,7 @@ ESMVALTOOL_URI_PREFIX = 'https://www.esmvaltool.org/' # it is the technical overview and should always be cited -ESMVALTOOL_PAPER_TAG = ['righi19gmd'] +ESMVALTOOL_PAPER_TAG = ['righi19gmdd'] def update_without_duplicating(bundle, other): From 638d08ce97e05c7fb45c1e61dc638c8685ae416d Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Mon, 27 Jan 2020 15:37:35 +0100 Subject: [PATCH 017/105] change the method to a function --- esmvalcore/_task.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/esmvalcore/_task.py b/esmvalcore/_task.py index c50a212c6a..737085dcdd 100644 --- a/esmvalcore/_task.py +++ b/esmvalcore/_task.py @@ -554,6 +554,7 @@ def _collect_provenance(self): attrs = { 'script_file': self.script, } + for key in self.settings: if key not in ignore: attrs[key] = self.settings[key] @@ -583,7 +584,8 @@ def _collect_provenance(self): self.name, time.time() - start) - def _write_citation_file(self, product): + @staticmethod + def _write_citation_file(product): """Write citation information provided by the recorded provenance.""" # collect info from provenance product_entry = [] From f61be822e5e025bdb55d874891512f899ac80556 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Mon, 27 Jan 2020 15:40:39 +0100 Subject: [PATCH 018/105] fix the function get_esmvaltool_porvenance --- esmvalcore/_provenance.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/esmvalcore/_provenance.py b/esmvalcore/_provenance.py index 72ed9ce9ce..e813f9162a 100644 --- a/esmvalcore/_provenance.py +++ b/esmvalcore/_provenance.py @@ -8,15 +8,15 @@ from PIL.PngImagePlugin import PngInfo from prov.dot import prov_to_dot from prov.model import ProvDocument -from ._config import replace_tags from ._version import __version__ +from ._config import replace_tags, TAGS logger = logging.getLogger(__name__) ESMVALTOOL_URI_PREFIX = 'https://www.esmvaltool.org/' # it is the technical overview and should always be cited -ESMVALTOOL_PAPER_TAG = ['righi19gmdd'] +ESMVALTOOL_PAPER_TAG = 'righi19gmdd' def update_without_duplicating(bundle, other): @@ -38,7 +38,11 @@ def get_esmvaltool_provenance(): create_namespace(provenance, namespace) # TODO: add dependencies with versions here - attributes_value = replace_tags('references', ESMVALTOOL_PAPER_TAG) + section = 'references' + if section in TAGS and ESMVALTOOL_PAPER_TAG in TAGS[section]: + attributes_value = replace_tags(section, [ESMVALTOOL_PAPER_TAG]) + else: + attributes_value = ESMVALTOOL_PAPER_TAG attributes = {'attribute:references': attributes_value} activity = provenance.activity( namespace + ':esmvaltool==' + __version__, other_attributes=attributes) From 21f09d23b80d4ee4961358927b9350c2895b118a Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Tue, 28 Jan 2020 17:40:45 +0100 Subject: [PATCH 019/105] fix the if-else condition --- esmvalcore/_task.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/esmvalcore/_task.py b/esmvalcore/_task.py index 737085dcdd..44349e18f0 100644 --- a/esmvalcore/_task.py +++ b/esmvalcore/_task.py @@ -611,9 +611,8 @@ def _write_citation_file(product): with open(bibtex_file, 'r') as file: bibtex_entry += '{}\n'.format(file.read()) else: - raise DiagnosticError( - "The reference file ({}): does not exist.".format( - bibtex_file)) + logger.info('The reference file %s does not exist.', + bibtex_file) citation_file = (os.path.splitext(product.filename)[0] + '_citation.bibtex') with open(citation_file, 'w') as file: From eb50f0b5690b280fa90d16b09331bb2894db22bf Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Thu, 30 Jan 2020 17:50:50 +0100 Subject: [PATCH 020/105] Add CMIP citation info, and refactor --- esmvalcore/_task.py | 162 +++++++++++++++++++++++++++++++++++++------- 1 file changed, 137 insertions(+), 25 deletions(-) diff --git a/esmvalcore/_task.py b/esmvalcore/_task.py index 44349e18f0..6a88137c71 100644 --- a/esmvalcore/_task.py +++ b/esmvalcore/_task.py @@ -12,8 +12,11 @@ from copy import deepcopy from multiprocessing import Pool +import urllib +import json import psutil import yaml +from pybtex.database import BibliographyData, Entry from ._config import DIAGNOSTICS_PATH, TAGS, replace_tags, REFERENCES_PATH from ._provenance import TrackedFile, get_task_provenance @@ -24,6 +27,9 @@ 'mip', } +CMIP6_CITATION_URL = 'https://cera-www.dkrz.de/WDCC/ui/cerasearch/' \ + 'cmip6?input=CMIP6.CMIP.' + def which(executable): """Find executable in PATH.""" @@ -588,35 +594,48 @@ def _collect_provenance(self): def _write_citation_file(product): """Write citation information provided by the recorded provenance.""" # collect info from provenance - product_entry = [] + citation = { + 'reference': [], + 'info_url': [], + 'tag': [], + 'file': [], + 'entry': '', + 'url': '' + } + citation['file'] = [ + os.path.splitext(product.filename)[0] + '_data_citation_url.txt', + os.path.splitext(product.filename)[0] + '_data_citation.bibtex', + ] for item in product.provenance.records: for key, value in item.attributes: if (key.namespace.prefix == 'attribute' and key.localpart in {'reference', 'references'}): - product_entry.append(value) - - # map between reference.tags and product.entries - reference_tag = {v: k for k, v in TAGS['references'].items()} - product_tag = [] - for key in reference_tag.keys(): - for entry in product_entry: - if key in entry and reference_tag[key] not in product_tag: - product_tag.append(reference_tag[key]) - - # save all citation info into one bibtex file - bibtex_entry = '' - for tags in product_tag: - bibtex_file = os.path.join(REFERENCES_PATH, tags + '.bibtex') - if os.path.isfile(bibtex_file): - with open(bibtex_file, 'r') as file: - bibtex_entry += '{}\n'.format(file.read()) - else: - logger.info('The reference file %s does not exist.', - bibtex_file) - citation_file = (os.path.splitext(product.filename)[0] - + '_citation.bibtex') - with open(citation_file, 'w') as file: - file.write(bibtex_entry) + citation['reference'].append(value) + if (key.namespace.prefix == 'attribute' + and key.localpart == 'further_info_url'): + citation['info_url'].append('.'.join( + (value.split(".org/")[1]).split(".")[1:4] + )) + + # collect CMIP6 citation, if any + if citation['info_url']: + citation['entry'], citation['url'] = _collect_cmip_citation( + citation['info_url'] + ) + + if citation['url']: + with open(citation['file'][0], 'w') as file: + file.write(citation['url']) + + # map between reference.entry and product.entry + citation['tag'] = _replace_entry(TAGS['references'], + citation['reference']) + + # collect all citation info into one bibtex file + citation['entry'] += _collect_bibtex_citation(citation['tag']) + if citation['entry']: + with open(citation['file'][1], 'w') as file: + file.write(citation['entry']) def __str__(self): """Get human readable description.""" @@ -629,6 +648,99 @@ def __str__(self): return txt +def _get_response(url): + """Return information from CMIP6 Data Citation service in json format.""" + json_data = False + try: + open_url = urllib.request.urlopen(url) + if open_url.getcode() == 200: + data = open_url.read() + json_data = json.loads(data) + else: + logger.info('Error in the CMIP citation link %s', + url) + except IOError: + logger.info('Error in receiving the CMIP citation file %s', + url) + return json_data + + +def _json_to_bibtex(data): + """Make a bibtex entry from CMIP6 Data Citation json format.""" + url = ''.join(['https://doi.org/', data['identifier']['id']]) + author_list = [] + for item in data['creators']: + author_list.append(item['creatorName']) + bib_entry = {url: Entry('misc', [ + ('url', url), + ('title', data['titles'][0]), + ('publisher', data['publisher']), + ('year', data['publicationYear']), + ('author', ' and '.join(author_list)), + ('doi', data['identifier']['id']), + ])} + bib_data = BibliographyData(bib_entry).to_string("bibtex") + return bib_data + + +def _cmip_citation(json_url): + """Get citation information from CMIP6 Data Citation Service.""" + entry = False + json_data = _get_response(json_url) + if json_data: + entry = _json_to_bibtex(json_data) + else: + logger.info('Writing the CMIP citation link %s', + json_url) + return entry + + +def _replace_entry(tags_entry, product_entry): + """Map between the entries in provenance and the entries + in config-references.yml and return tags""" + entry_tags = {v: k for k, v in tags_entry.items()} + tags = [] + for key in entry_tags.keys(): + for entry in product_entry: + if key in entry and entry_tags[key] not in tags: + tags.append(entry_tags[key]) + return tags + + +def _collect_bibtex_citation(citation_tags): + """Collect citation informtion from reference folder that + contains bibtex files""" + citation_entry = '' + for tag in citation_tags: + bibtex_file = os.path.join(REFERENCES_PATH, tag + '.bibtex') + if os.path.isfile(bibtex_file): + with open(bibtex_file, 'r') as file: + citation_entry += '{}\n'.format(file.read()) + else: + logger.info('The reference file %s does not exist.', + bibtex_file) + return citation_entry + + +def _collect_cmip_citation(info_url): + split_str = 'cmip6?input=CMIP6.CMIP.' + citation_entry = '' + citation_url = '' + for info in info_url: + json_url = ''.join( + [CMIP6_CITATION_URL.split(split_str)[0], + 'cerarest/export', split_str, info] + ) + entry = _cmip_citation(json_url) + if entry: + citation_entry += '{}\n'.format(entry) + else: + citation_url += '{}\n'.format( + ''.join([CMIP6_CITATION_URL, info]) + ) + return citation_entry, citation_url + + def get_flattened_tasks(tasks): """Return a set of all tasks and their ancestors in `tasks`.""" return set(t for task in tasks for t in task.flatten()) From c319a9efe2adb10d8791279ac6846d0f52c68a8f Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Fri, 31 Jan 2020 11:09:02 +0100 Subject: [PATCH 021/105] remove pybtex, fix _json_to_bibtex function --- esmvalcore/_task.py | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/esmvalcore/_task.py b/esmvalcore/_task.py index 6a88137c71..2e64e2010c 100644 --- a/esmvalcore/_task.py +++ b/esmvalcore/_task.py @@ -16,7 +16,6 @@ import json import psutil import yaml -from pybtex.database import BibliographyData, Entry from ._config import DIAGNOSTICS_PATH, TAGS, replace_tags, REFERENCES_PATH from ._provenance import TrackedFile, get_task_provenance @@ -666,21 +665,20 @@ def _get_response(url): def _json_to_bibtex(data): - """Make a bibtex entry from CMIP6 Data Citation json format.""" + """Make a bibtex entry from CMIP6 Data Citation json data.""" url = ''.join(['https://doi.org/', data['identifier']['id']]) author_list = [] for item in data['creators']: author_list.append(item['creatorName']) - bib_entry = {url: Entry('misc', [ - ('url', url), - ('title', data['titles'][0]), - ('publisher', data['publisher']), - ('year', data['publicationYear']), - ('author', ' and '.join(author_list)), - ('doi', data['identifier']['id']), - ])} - bib_data = BibliographyData(bib_entry).to_string("bibtex") - return bib_data + bibtex_entry = ('@misc{'+ url + ',\n\t'\ + 'url = {' + url + '},\n\t'\ + 'title = {' + data['titles'][0] + '},\n\t'\ + 'publisher = {' + data['publisher'] + '},\n\t'\ + 'year = '+ data['publicationYear'] + ',\n\t'\ + 'author = {' + ' and '.join(author_list) + '},\n\t'\ + 'doi = {' + data['identifier']['id'] + '},\n'\ + '}') + return bibtex_entry def _cmip_citation(json_url): From 227460b8aa807d5f215d591a3c0126f88f4e53a9 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Fri, 31 Jan 2020 13:41:20 +0100 Subject: [PATCH 022/105] Refactor and style --- esmvalcore/_task.py | 89 +++++++++++++++++++-------------------------- 1 file changed, 37 insertions(+), 52 deletions(-) diff --git a/esmvalcore/_task.py b/esmvalcore/_task.py index 2e64e2010c..4a751e879c 100644 --- a/esmvalcore/_task.py +++ b/esmvalcore/_task.py @@ -603,7 +603,7 @@ def _write_citation_file(product): } citation['file'] = [ os.path.splitext(product.filename)[0] + '_data_citation_url.txt', - os.path.splitext(product.filename)[0] + '_data_citation.bibtex', + os.path.splitext(product.filename)[0] + '_citation.bibtex', ] for item in product.provenance.records: for key, value in item.attributes: @@ -619,19 +619,18 @@ def _write_citation_file(product): # collect CMIP6 citation, if any if citation['info_url']: citation['entry'], citation['url'] = _collect_cmip_citation( - citation['info_url'] - ) - + citation['info_url']) if citation['url']: with open(citation['file'][0], 'w') as file: file.write(citation['url']) - # map between reference.entry and product.entry - citation['tag'] = _replace_entry(TAGS['references'], - citation['reference']) + # collect recipe citation, if any + if citation['reference']: + citation['tag'] = _replace_entry(citation['reference']) + citation['entry'] += '{}\n'.format( + _collect_bibtex_citation(citation['tag'])) - # collect all citation info into one bibtex file - citation['entry'] += _collect_bibtex_citation(citation['tag']) + # write one bibtex file if citation['entry']: with open(citation['file'][1], 'w') as file: file.write(citation['entry']) @@ -670,33 +669,20 @@ def _json_to_bibtex(data): author_list = [] for item in data['creators']: author_list.append(item['creatorName']) - bibtex_entry = ('@misc{'+ url + ',\n\t'\ - 'url = {' + url + '},\n\t'\ - 'title = {' + data['titles'][0] + '},\n\t'\ - 'publisher = {' + data['publisher'] + '},\n\t'\ - 'year = '+ data['publicationYear'] + ',\n\t'\ - 'author = {' + ' and '.join(author_list) + '},\n\t'\ - 'doi = {' + data['identifier']['id'] + '},\n'\ + bibtex_entry = ('@misc{' + url + ',\n\t' + 'url = {' + url + '},\n\t' + 'title = {' + data['titles'][0] + '},\n\t' + 'publisher = {' + data['publisher'] + '},\n\t' + 'year = ' + data['publicationYear'] + ',\n\t' + 'author = {' + ' and '.join(author_list) + '},\n\t' + 'doi = {' + data['identifier']['id'] + '},\n' '}') return bibtex_entry -def _cmip_citation(json_url): - """Get citation information from CMIP6 Data Citation Service.""" - entry = False - json_data = _get_response(json_url) - if json_data: - entry = _json_to_bibtex(json_data) - else: - logger.info('Writing the CMIP citation link %s', - json_url) - return entry - - -def _replace_entry(tags_entry, product_entry): - """Map between the entries in provenance and the entries - in config-references.yml and return tags""" - entry_tags = {v: k for k, v in tags_entry.items()} +def _replace_entry(product_entry): + """Find tags of the references in provenance""" + entry_tags = {v: k for k, v in TAGS['references'].items()} tags = [] for key in entry_tags.keys(): for entry in product_entry: @@ -705,38 +691,37 @@ def _replace_entry(tags_entry, product_entry): return tags -def _collect_bibtex_citation(citation_tags): - """Collect citation informtion from reference folder that - contains bibtex files""" - citation_entry = '' - for tag in citation_tags: +def _collect_bibtex_citation(tags): + """Collect informtion from bibtex files""" + entry = '' + for tag in tags: bibtex_file = os.path.join(REFERENCES_PATH, tag + '.bibtex') if os.path.isfile(bibtex_file): with open(bibtex_file, 'r') as file: - citation_entry += '{}\n'.format(file.read()) + entry += '{}\n'.format(file.read()) else: logger.info('The reference file %s does not exist.', bibtex_file) - return citation_entry + return entry def _collect_cmip_citation(info_url): + """Collect information from CMIP6 Data Citation Service.""" split_str = 'cmip6?input=CMIP6.CMIP.' - citation_entry = '' - citation_url = '' + url = ''.join([CMIP6_CITATION_URL.split(split_str)[0], + 'cerarest/export', split_str]) + entry = '' + link = '' for info in info_url: - json_url = ''.join( - [CMIP6_CITATION_URL.split(split_str)[0], - 'cerarest/export', split_str, info] - ) - entry = _cmip_citation(json_url) - if entry: - citation_entry += '{}\n'.format(entry) + json_url = ''.join([url, info]) # make the json url + json_data = _get_response(json_url) + if json_data: + entry += '{}\n'.format(_json_to_bibtex(json_data)) else: - citation_url += '{}\n'.format( - ''.join([CMIP6_CITATION_URL, info]) - ) - return citation_entry, citation_url + logger.info('Writing the CMIP citation link %s', + json_url) + link += '{}\n'.format(''.join([CMIP6_CITATION_URL, info])) + return entry, link def get_flattened_tasks(tasks): From 6cccf255bdd8631f458c109f28cdcf2ca49f437d Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Fri, 31 Jan 2020 14:02:39 +0100 Subject: [PATCH 023/105] Refactor and style --- esmvalcore/_task.py | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/esmvalcore/_task.py b/esmvalcore/_task.py index 4a751e879c..9d53ffff74 100644 --- a/esmvalcore/_task.py +++ b/esmvalcore/_task.py @@ -593,18 +593,17 @@ def _collect_provenance(self): def _write_citation_file(product): """Write citation information provided by the recorded provenance.""" # collect info from provenance + file_name = os.path.splitext(product.filename)[0] citation = { 'reference': [], 'info_url': [], 'tag': [], - 'file': [], 'entry': '', - 'url': '' - } - citation['file'] = [ - os.path.splitext(product.filename)[0] + '_data_citation_url.txt', - os.path.splitext(product.filename)[0] + '_citation.bibtex', - ] + 'url': '', + 'file': [ + file_name + '_data_citation_url.txt', + file_name + '_citation.bibtex', + ]} for item in product.provenance.records: for key, value in item.attributes: if (key.namespace.prefix == 'attribute' @@ -612,9 +611,7 @@ def _write_citation_file(product): citation['reference'].append(value) if (key.namespace.prefix == 'attribute' and key.localpart == 'further_info_url'): - citation['info_url'].append('.'.join( - (value.split(".org/")[1]).split(".")[1:4] - )) + citation['info_url'].append(value) # collect CMIP6 citation, if any if citation['info_url']: @@ -708,19 +705,20 @@ def _collect_bibtex_citation(tags): def _collect_cmip_citation(info_url): """Collect information from CMIP6 Data Citation Service.""" split_str = 'cmip6?input=CMIP6.CMIP.' - url = ''.join([CMIP6_CITATION_URL.split(split_str)[0], - 'cerarest/export', split_str]) + url_stem = ''.join([CMIP6_CITATION_URL.split(split_str)[0], + 'cerarest/export', split_str]) entry = '' link = '' - for info in info_url: - json_url = ''.join([url, info]) # make the json url + for data_url in info_url: + data_info = '.'.join((data_url.split(".org/")[1]).split(".")[1:4]) + json_url = ''.join([url_stem, data_info]) # make the json url json_data = _get_response(json_url) if json_data: entry += '{}\n'.format(_json_to_bibtex(json_data)) else: logger.info('Writing the CMIP citation link %s', json_url) - link += '{}\n'.format(''.join([CMIP6_CITATION_URL, info])) + link += '{}\n'.format(''.join([CMIP6_CITATION_URL, data_info])) return entry, link From c4827575e5143c6dea07232fef98e163b32c3645 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Fri, 31 Jan 2020 14:17:36 +0100 Subject: [PATCH 024/105] fix open_url --- esmvalcore/_task.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/esmvalcore/_task.py b/esmvalcore/_task.py index 9d53ffff74..5ee6f2a7e8 100644 --- a/esmvalcore/_task.py +++ b/esmvalcore/_task.py @@ -646,17 +646,18 @@ def __str__(self): def _get_response(url): """Return information from CMIP6 Data Citation service in json format.""" json_data = False - try: - open_url = urllib.request.urlopen(url) - if open_url.getcode() == 200: - data = open_url.read() - json_data = json.loads(data) - else: - logger.info('Error in the CMIP citation link %s', + if url.lower().startswith('https'): + try: + open_url = urllib.request.urlopen(url) + if open_url.getcode() == 200: + data = open_url.read() + json_data = json.loads(data) + else: + logger.info('Error in the CMIP citation link %s', + url) + except IOError: + logger.info('Error in receiving the CMIP citation file %s', url) - except IOError: - logger.info('Error in receiving the CMIP citation file %s', - url) return json_data From 824f869f211efd998071cf1a34184880e7f9152b Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Fri, 31 Jan 2020 15:23:01 +0100 Subject: [PATCH 025/105] fix the _get_response function --- esmvalcore/_task.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/esmvalcore/_task.py b/esmvalcore/_task.py index 5ee6f2a7e8..9da3fdbc67 100644 --- a/esmvalcore/_task.py +++ b/esmvalcore/_task.py @@ -12,8 +12,7 @@ from copy import deepcopy from multiprocessing import Pool -import urllib -import json +import requests import psutil import yaml @@ -648,10 +647,9 @@ def _get_response(url): json_data = False if url.lower().startswith('https'): try: - open_url = urllib.request.urlopen(url) - if open_url.getcode() == 200: - data = open_url.read() - json_data = json.loads(data) + response = requests.get(url) + if response.status_code == 200: + json_data = response.json() else: logger.info('Error in the CMIP citation link %s', url) From 8cc7babcaf2cf1263a4f1dcc6f224b18ce0be5c0 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Fri, 31 Jan 2020 16:24:31 +0100 Subject: [PATCH 026/105] add documentation --- esmvalcore/_task.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/esmvalcore/_task.py b/esmvalcore/_task.py index 9da3fdbc67..186f9b5b24 100644 --- a/esmvalcore/_task.py +++ b/esmvalcore/_task.py @@ -590,7 +590,14 @@ def _collect_provenance(self): @staticmethod def _write_citation_file(product): - """Write citation information provided by the recorded provenance.""" + """ + Write citation information provided by the recorded provenance. + Recipe and cmip6 data references are saved into one bibtex file. + cmip6 data references are provided by CMIP6 data citation service. + each cmip6 data reference has a json link. In the case of internet + connection, cmip6 data references are saved into a bibtex file. + Otherwise, cmip6 data reference links are saved into a text file. + """ # collect info from provenance file_name = os.path.splitext(product.filename)[0] citation = { From 96a5e850a58f92df6645f7edb85f74c1a9001720 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Fri, 31 Jan 2020 17:00:08 +0100 Subject: [PATCH 027/105] Style --- esmvalcore/_task.py | 31 ++++++++++++++----------------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/esmvalcore/_task.py b/esmvalcore/_task.py index 186f9b5b24..1b03ecd720 100644 --- a/esmvalcore/_task.py +++ b/esmvalcore/_task.py @@ -658,11 +658,9 @@ def _get_response(url): if response.status_code == 200: json_data = response.json() else: - logger.info('Error in the CMIP citation link %s', - url) + logger.info('Error in the CMIP json link') except IOError: - logger.info('Error in receiving the CMIP citation file %s', - url) + logger.info('Error in receiving the CMIP json file') return json_data @@ -686,12 +684,12 @@ def _json_to_bibtex(data): def _replace_entry(product_entry): """Find tags of the references in provenance""" entry_tags = {v: k for k, v in TAGS['references'].items()} - tags = [] + tag_list = [] for key in entry_tags.keys(): for entry in product_entry: - if key in entry and entry_tags[key] not in tags: - tags.append(entry_tags[key]) - return tags + if key in entry and entry_tags[key] not in tag_list: + tag_list.append(entry_tags[key]) + return tag_list def _collect_bibtex_citation(tags): @@ -713,19 +711,18 @@ def _collect_cmip_citation(info_url): split_str = 'cmip6?input=CMIP6.CMIP.' url_stem = ''.join([CMIP6_CITATION_URL.split(split_str)[0], 'cerarest/export', split_str]) - entry = '' - link = '' + citation_entry = '' + citation_link = '' for data_url in info_url: data_info = '.'.join((data_url.split(".org/")[1]).split(".")[1:4]) - json_url = ''.join([url_stem, data_info]) # make the json url - json_data = _get_response(json_url) + json_data = _get_response(''.join([url_stem, data_info])) if json_data: - entry += '{}\n'.format(_json_to_bibtex(json_data)) + citation_entry += '{}\n'.format(_json_to_bibtex(json_data)) else: - logger.info('Writing the CMIP citation link %s', - json_url) - link += '{}\n'.format(''.join([CMIP6_CITATION_URL, data_info])) - return entry, link + citation_link += '{}\n'.format(''.join( + [CMIP6_CITATION_URL, data_info])) + logger.info('Returning the CMIP citation link for %s', data_info) + return citation_entry, citation_link def get_flattened_tasks(tasks): From c1214a4fcfcafd2ccc17e0a90bdc70e3c2f63d03 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Mon, 10 Feb 2020 17:41:26 +0100 Subject: [PATCH 028/105] Refactor and style --- esmvalcore/_task.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/esmvalcore/_task.py b/esmvalcore/_task.py index 1b03ecd720..b4ce18c988 100644 --- a/esmvalcore/_task.py +++ b/esmvalcore/_task.py @@ -612,12 +612,11 @@ def _write_citation_file(product): ]} for item in product.provenance.records: for key, value in item.attributes: - if (key.namespace.prefix == 'attribute' - and key.localpart in {'reference', 'references'}): - citation['reference'].append(value) - if (key.namespace.prefix == 'attribute' - and key.localpart == 'further_info_url'): - citation['info_url'].append(value) + if key.namespace.prefix == 'attribute': + if key.localpart in {'reference', 'references'}: + citation['reference'].append(value) + elif key.localpart == 'further_info_url': + citation['info_url'].append(value) # collect CMIP6 citation, if any if citation['info_url']: @@ -682,7 +681,7 @@ def _json_to_bibtex(data): def _replace_entry(product_entry): - """Find tags of the references in provenance""" + """Find tags of the references in provenance.""" entry_tags = {v: k for k, v in TAGS['references'].items()} tag_list = [] for key in entry_tags.keys(): @@ -693,7 +692,7 @@ def _replace_entry(product_entry): def _collect_bibtex_citation(tags): - """Collect informtion from bibtex files""" + """Collect information from bibtex files.""" entry = '' for tag in tags: bibtex_file = os.path.join(REFERENCES_PATH, tag + '.bibtex') From b0db9ec849e0a114bab79710bc7c2bac11dcaa62 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Tue, 11 Feb 2020 13:36:04 +0100 Subject: [PATCH 029/105] add a test checking if jason data includes bibtex keys --- esmvalcore/_task.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/esmvalcore/_task.py b/esmvalcore/_task.py index b4ce18c988..1c316dbd9d 100644 --- a/esmvalcore/_task.py +++ b/esmvalcore/_task.py @@ -662,6 +662,15 @@ def _get_response(url): logger.info('Error in receiving the CMIP json file') return json_data +def _valid_json_data(data): + valid_data = False + keys = ['identifier', 'creators', 'titles', 'publisher', 'publicationYear'] + if all(key in data for key in keys): + check_names = all('creatorName' in item for item in data['creators']) + if 'id' in data['identifier'] and check_names: + valid_data = True + return valid_data + def _json_to_bibtex(data): """Make a bibtex entry from CMIP6 Data Citation json data.""" @@ -715,7 +724,7 @@ def _collect_cmip_citation(info_url): for data_url in info_url: data_info = '.'.join((data_url.split(".org/")[1]).split(".")[1:4]) json_data = _get_response(''.join([url_stem, data_info])) - if json_data: + if json_data and _valid_json_data(json_data): citation_entry += '{}\n'.format(_json_to_bibtex(json_data)) else: citation_link += '{}\n'.format(''.join( From cf54ae0aeb07282795c33a09549aa283dd95d476 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Tue, 11 Feb 2020 13:50:10 +0100 Subject: [PATCH 030/105] style --- esmvalcore/_task.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/esmvalcore/_task.py b/esmvalcore/_task.py index 1c316dbd9d..caed46a1c4 100644 --- a/esmvalcore/_task.py +++ b/esmvalcore/_task.py @@ -601,8 +601,8 @@ def _write_citation_file(product): # collect info from provenance file_name = os.path.splitext(product.filename)[0] citation = { - 'reference': [], - 'info_url': [], + 'references': [], + 'info_urls': [], 'tag': [], 'entry': '', 'url': '', @@ -614,21 +614,21 @@ def _write_citation_file(product): for key, value in item.attributes: if key.namespace.prefix == 'attribute': if key.localpart in {'reference', 'references'}: - citation['reference'].append(value) - elif key.localpart == 'further_info_url': - citation['info_url'].append(value) + citation['references'].append(value) + elif key.localpart == 'further_info_url': + citation['info_urls'].append(value) # collect CMIP6 citation, if any - if citation['info_url']: + if citation['info_urls']: citation['entry'], citation['url'] = _collect_cmip_citation( - citation['info_url']) + citation['info_urls']) if citation['url']: with open(citation['file'][0], 'w') as file: file.write(citation['url']) # collect recipe citation, if any - if citation['reference']: - citation['tag'] = _replace_entry(citation['reference']) + if citation['references']: + citation['tag'] = _replace_entry(citation['references']) citation['entry'] += '{}\n'.format( _collect_bibtex_citation(citation['tag'])) @@ -662,6 +662,7 @@ def _get_response(url): logger.info('Error in receiving the CMIP json file') return json_data + def _valid_json_data(data): valid_data = False keys = ['identifier', 'creators', 'titles', 'publisher', 'publicationYear'] From c5bcdd5b324e258f85f22da550df04f1c304f123 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Wed, 19 Feb 2020 10:03:59 +0100 Subject: [PATCH 031/105] add new module and remove functions from task --- esmvalcore/_citation.py | 165 ++++++++++++++++++++++++++++++++++++++++ esmvalcore/_task.py | 154 +++---------------------------------- 2 files changed, 175 insertions(+), 144 deletions(-) create mode 100644 esmvalcore/_citation.py diff --git a/esmvalcore/_citation.py b/esmvalcore/_citation.py new file mode 100644 index 0000000000..a6348ec51b --- /dev/null +++ b/esmvalcore/_citation.py @@ -0,0 +1,165 @@ +"""Citation module.""" +import contextlib +import datetime +import errno +import logging +import numbers +import os +import pprint +import subprocess +import threading +import time +from copy import deepcopy +from multiprocessing import Pool + +import requests +import psutil +import yaml + +from ._config import DIAGNOSTICS_PATH, TAGS, replace_tags, REFERENCES_PATH +from ._provenance import TrackedFile, get_task_provenance + +logger = logging.getLogger(__name__) + +DATASET_KEYS = { + 'mip', +} + +CMIP6_URL_STEM = 'https://cera-www.dkrz.de/WDCC/ui/cerasearch' + + +def _write_citation_file(product): + """ + Write citation information provided by the recorded provenance. + Recipe and cmip6 data references are saved into one bibtex file. + cmip6 data references are provided by CMIP6 data citation service. + each cmip6 data reference has a json link. In the case of internet + connection, cmip6 data references are saved into a bibtex file. + Otherwise, cmip6 data reference links are saved into a text file. + """ + # collect info from provenance + product_name = os.path.splitext(product.filename)[0] + product_tags = [] + product_entries = '' + product_urls = '' + citation = { + 'references': [], + 'info_urls': [], + 'tag': [], + 'entry': '', + 'url': '', + } + for item in product.provenance.records: + for key, value in item.attributes: + if key.namespace.prefix == 'attribute': + print(item.attributes[0]) + print('&&&&&&&&&&&&&&&&&&&&&&&&') + if key.localpart in {'reference', 'references'}: + product_entries += '{}\n'.format(_collect_bibtex_citation(product_tags)) + elif key.localpart == 'mip_era' and value == 'CMIP6': + json_url, info_url = _make_url(item.attributes) + cmip_entry = _collect_cmip_citation(json_url, info_url) + if cmip_entry == info_url: + product_urls += '{}\n'.format(cmip_entry) + else: + product_entries += '{}\n'.format(cmip_entry) + + # save CMIP6 url_info, if any + if product_urls: + with open(f'{product_name}_data_citation_url.txt', 'w') as file: + file.write(citation['url']) + + # write one bibtex file + if product_entries: + with open(f'{product_name}_citation.bibtex.txt', 'w') as file: + file.write(product_entries) + + +def _get_response(url): + """Return information from CMIP6 Data Citation service in json format.""" + json_data = False + if url.lower().startswith('https'): + try: + response = requests.get(url) + if response.status_code == 200: + json_data = response.json() + else: + logger.info('Error in the CMIP json link') + except IOError: + logger.info('Error in receiving the CMIP json file') + return json_data + + +def _valid_json_data(data): + valid_data = False + keys = ['identifier', 'creators', 'titles', 'publisher', 'publicationYear'] + if all(key in data for key in keys): + check_names = all('creatorName' in item for item in data['creators']) + if 'id' in data['identifier'] and check_names: + valid_data = True + return valid_data + + +def _json_to_bibtex(data): + """Make a bibtex entry from CMIP6 Data Citation json data.""" + url = ''.join(['https://doi.org/', data['identifier']['id']]) + author_list = [] + for item in data['creators']: + author_list.append(item['creatorName']) + bibtex_entry = ('@misc{' + url + ',\n\t' + 'url = {' + url + '},\n\t' + 'title = {' + data['titles'][0] + '},\n\t' + 'publisher = {' + data['publisher'] + '},\n\t' + 'year = ' + data['publicationYear'] + ',\n\t' + 'author = {' + ' and '.join(author_list) + '},\n\t' + 'doi = {' + data['identifier']['id'] + '},\n' + '}') + return bibtex_entry + + +def _replace_entry(product_entry): + """Find tags of the references in provenance.""" + entry_tags = {v: k for k, v in TAGS['references'].items()} + tag_list = [] + for key in entry_tags.keys(): + for entry in product_entry: + if key in entry and entry_tags[key] not in tag_list: + tag_list.append(entry_tags[key]) + return tag_list + + +def _collect_bibtex_citation(tags): + """Collect information from bibtex files.""" + entry = '' + for tag in tags: + bibtex_file = os.path.join(REFERENCES_PATH, tag + '.bibtex') + if os.path.isfile(bibtex_file): + with open(bibtex_file, 'r') as file: + entry += '{}\n'.format(file.read()) + else: + logger.info('The reference file %s does not exist.', + bibtex_file) + return entry + + +def _collect_cmip_citation(json_url, info_url): + """Collect information from CMIP6 Data Citation Service.""" + bibtex_entry = info_url + json_data = _get_response(json_url) + if json_data and _valid_json_data(json_data): + bibtex_entry = _json_to_bibtex(json_data) + else: + logger.info('Invalid json link %s', json_url) + return bibtex_entry + + +def _make_url(attribute): + mip_era = attribute.get('attribute:mip_era') + activity_id = attribute.get('attribute:activity_id') + institution_id = attribute.get('attribute:institution_id') + source_id = attribute.get('attribute:source_id') + experiment_id = attribute.get('attribute:experiment_id') + url_prefix = f'{mip_era}.{activity_id}.{institution_id}.{source_id}.{experiment_id}' + json_url = f'{CMIP6_URL_STEM}/cerarest/exportcmip6?input={url_prefix}' + info_url = f'{CMIP6_URL_STEM}/cmip6?input=CMIP6.{url_prefix}' + return json_url, info_url diff --git a/esmvalcore/_task.py b/esmvalcore/_task.py index caed46a1c4..88b82e95ab 100644 --- a/esmvalcore/_task.py +++ b/esmvalcore/_task.py @@ -12,12 +12,12 @@ from copy import deepcopy from multiprocessing import Pool -import requests import psutil import yaml -from ._config import DIAGNOSTICS_PATH, TAGS, replace_tags, REFERENCES_PATH +from ._config import DIAGNOSTICS_PATH, TAGS, replace_tags from ._provenance import TrackedFile, get_task_provenance +from ._citation import _write_citation_file logger = logging.getLogger(__name__) @@ -25,9 +25,6 @@ 'mip', } -CMIP6_CITATION_URL = 'https://cera-www.dkrz.de/WDCC/ui/cerasearch/' \ - 'cmip6?input=CMIP6.CMIP.' - def which(executable): """Find executable in PATH.""" @@ -575,68 +572,23 @@ def _collect_provenance(self): } attributes.update(deepcopy(attrs)) - for key in attributes: - if key in TAGS: - attributes[key] = replace_tags(key, attributes[key]) + section = 'references' + # for key in attributes: + # if key in TAGS: + # if key in section: + # attributes[key] = cite_tags(key, attributes[key]) + # else: + # attributes[key] = replace_tags(key, attributes[key]) product = TrackedFile(filename, attributes, ancestors) product.initialize_provenance(self.activity) product.save_provenance() - self._write_citation_file(product) + _write_citation_file(product) self.products.add(product) logger.debug("Collecting provenance of task %s took %.1f seconds", self.name, time.time() - start) - @staticmethod - def _write_citation_file(product): - """ - Write citation information provided by the recorded provenance. - Recipe and cmip6 data references are saved into one bibtex file. - cmip6 data references are provided by CMIP6 data citation service. - each cmip6 data reference has a json link. In the case of internet - connection, cmip6 data references are saved into a bibtex file. - Otherwise, cmip6 data reference links are saved into a text file. - """ - # collect info from provenance - file_name = os.path.splitext(product.filename)[0] - citation = { - 'references': [], - 'info_urls': [], - 'tag': [], - 'entry': '', - 'url': '', - 'file': [ - file_name + '_data_citation_url.txt', - file_name + '_citation.bibtex', - ]} - for item in product.provenance.records: - for key, value in item.attributes: - if key.namespace.prefix == 'attribute': - if key.localpart in {'reference', 'references'}: - citation['references'].append(value) - elif key.localpart == 'further_info_url': - citation['info_urls'].append(value) - - # collect CMIP6 citation, if any - if citation['info_urls']: - citation['entry'], citation['url'] = _collect_cmip_citation( - citation['info_urls']) - if citation['url']: - with open(citation['file'][0], 'w') as file: - file.write(citation['url']) - - # collect recipe citation, if any - if citation['references']: - citation['tag'] = _replace_entry(citation['references']) - citation['entry'] += '{}\n'.format( - _collect_bibtex_citation(citation['tag'])) - - # write one bibtex file - if citation['entry']: - with open(citation['file'][1], 'w') as file: - file.write(citation['entry']) - def __str__(self): """Get human readable description.""" txt = "{}:\nscript: {}\n{}\nsettings:\n{}\n".format( @@ -648,92 +600,6 @@ def __str__(self): return txt -def _get_response(url): - """Return information from CMIP6 Data Citation service in json format.""" - json_data = False - if url.lower().startswith('https'): - try: - response = requests.get(url) - if response.status_code == 200: - json_data = response.json() - else: - logger.info('Error in the CMIP json link') - except IOError: - logger.info('Error in receiving the CMIP json file') - return json_data - - -def _valid_json_data(data): - valid_data = False - keys = ['identifier', 'creators', 'titles', 'publisher', 'publicationYear'] - if all(key in data for key in keys): - check_names = all('creatorName' in item for item in data['creators']) - if 'id' in data['identifier'] and check_names: - valid_data = True - return valid_data - - -def _json_to_bibtex(data): - """Make a bibtex entry from CMIP6 Data Citation json data.""" - url = ''.join(['https://doi.org/', data['identifier']['id']]) - author_list = [] - for item in data['creators']: - author_list.append(item['creatorName']) - bibtex_entry = ('@misc{' + url + ',\n\t' - 'url = {' + url + '},\n\t' - 'title = {' + data['titles'][0] + '},\n\t' - 'publisher = {' + data['publisher'] + '},\n\t' - 'year = ' + data['publicationYear'] + ',\n\t' - 'author = {' + ' and '.join(author_list) + '},\n\t' - 'doi = {' + data['identifier']['id'] + '},\n' - '}') - return bibtex_entry - - -def _replace_entry(product_entry): - """Find tags of the references in provenance.""" - entry_tags = {v: k for k, v in TAGS['references'].items()} - tag_list = [] - for key in entry_tags.keys(): - for entry in product_entry: - if key in entry and entry_tags[key] not in tag_list: - tag_list.append(entry_tags[key]) - return tag_list - - -def _collect_bibtex_citation(tags): - """Collect information from bibtex files.""" - entry = '' - for tag in tags: - bibtex_file = os.path.join(REFERENCES_PATH, tag + '.bibtex') - if os.path.isfile(bibtex_file): - with open(bibtex_file, 'r') as file: - entry += '{}\n'.format(file.read()) - else: - logger.info('The reference file %s does not exist.', - bibtex_file) - return entry - - -def _collect_cmip_citation(info_url): - """Collect information from CMIP6 Data Citation Service.""" - split_str = 'cmip6?input=CMIP6.CMIP.' - url_stem = ''.join([CMIP6_CITATION_URL.split(split_str)[0], - 'cerarest/export', split_str]) - citation_entry = '' - citation_link = '' - for data_url in info_url: - data_info = '.'.join((data_url.split(".org/")[1]).split(".")[1:4]) - json_data = _get_response(''.join([url_stem, data_info])) - if json_data and _valid_json_data(json_data): - citation_entry += '{}\n'.format(_json_to_bibtex(json_data)) - else: - citation_link += '{}\n'.format(''.join( - [CMIP6_CITATION_URL, data_info])) - logger.info('Returning the CMIP citation link for %s', data_info) - return citation_entry, citation_link - - def get_flattened_tasks(tasks): """Return a set of all tasks and their ancestors in `tasks`.""" return set(t for task in tasks for t in task.flatten()) From 824c0f9480049cc450a0caacf412ce3184612b5c Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Wed, 19 Feb 2020 10:04:31 +0100 Subject: [PATCH 032/105] fix citation parts --- esmvalcore/_config.py | 5 +++++ esmvalcore/_provenance.py | 3 ++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/esmvalcore/_config.py b/esmvalcore/_config.py index 4c94b80ef7..64b368df9a 100644 --- a/esmvalcore/_config.py +++ b/esmvalcore/_config.py @@ -245,3 +245,8 @@ def get_tag_value(section, tag): def replace_tags(section, tags): """Replace a list of tags with their values.""" return tuple(get_tag_value(section, tag) for tag in tags) + + +def cite_tags(section, tags): + """Replace a list of tags with their values.""" + return tuple(get_tag_value(section, tag) for tag in tags) diff --git a/esmvalcore/_provenance.py b/esmvalcore/_provenance.py index e813f9162a..680b321afb 100644 --- a/esmvalcore/_provenance.py +++ b/esmvalcore/_provenance.py @@ -40,7 +40,8 @@ def get_esmvaltool_provenance(): # TODO: add dependencies with versions here section = 'references' if section in TAGS and ESMVALTOOL_PAPER_TAG in TAGS[section]: - attributes_value = replace_tags(section, [ESMVALTOOL_PAPER_TAG]) + # attributes_value = replace_tags(section, [ESMVALTOOL_PAPER_TAG]) + attributes_value = cite_tags(section, [ESMVALTOOL_PAPER_TAG]) else: attributes_value = ESMVALTOOL_PAPER_TAG attributes = {'attribute:references': attributes_value} From a0a05dac50e6e11c45aab4a20a12bc4ce5cdb75a Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Fri, 21 Feb 2020 13:04:02 +0100 Subject: [PATCH 033/105] fix the citation functions, fix provenance to not replace the tags for reference --- esmvalcore/_citation.py | 74 +++++++++++++++++++++++------------------ esmvalcore/_task.py | 10 ++---- 2 files changed, 45 insertions(+), 39 deletions(-) diff --git a/esmvalcore/_citation.py b/esmvalcore/_citation.py index a6348ec51b..d0122c8d0e 100644 --- a/esmvalcore/_citation.py +++ b/esmvalcore/_citation.py @@ -39,23 +39,13 @@ def _write_citation_file(product): """ # collect info from provenance product_name = os.path.splitext(product.filename)[0] - product_tags = [] product_entries = '' product_urls = '' - citation = { - 'references': [], - 'info_urls': [], - 'tag': [], - 'entry': '', - 'url': '', - } for item in product.provenance.records: for key, value in item.attributes: if key.namespace.prefix == 'attribute': - print(item.attributes[0]) - print('&&&&&&&&&&&&&&&&&&&&&&&&') if key.localpart in {'reference', 'references'}: - product_entries += '{}\n'.format(_collect_bibtex_citation(product_tags)) + product_entries += '{}\n'.format(_collect_bibtex_citation(value)) elif key.localpart == 'mip_era' and value == 'CMIP6': json_url, info_url = _make_url(item.attributes) cmip_entry = _collect_cmip_citation(json_url, info_url) @@ -67,11 +57,11 @@ def _write_citation_file(product): # save CMIP6 url_info, if any if product_urls: with open(f'{product_name}_data_citation_url.txt', 'w') as file: - file.write(citation['url']) + file.write(product_urls) # write one bibtex file if product_entries: - with open(f'{product_name}_citation.bibtex.txt', 'w') as file: + with open(f'{product_name}_citation.bibtex', 'w') as file: file.write(product_entries) @@ -102,18 +92,30 @@ def _valid_json_data(data): def _json_to_bibtex(data): """Make a bibtex entry from CMIP6 Data Citation json data.""" - url = ''.join(['https://doi.org/', data['identifier']['id']]) - author_list = [] - for item in data['creators']: - author_list.append(item['creatorName']) - bibtex_entry = ('@misc{' + url + ',\n\t' - 'url = {' + url + '},\n\t' - 'title = {' + data['titles'][0] + '},\n\t' - 'publisher = {' + data['publisher'] + '},\n\t' - 'year = ' + data['publicationYear'] + ',\n\t' - 'author = {' + ' and '.join(author_list) + '},\n\t' - 'doi = {' + data['identifier']['id'] + '},\n' - '}') + author_list = [item['creatorName'] for item in data['creators']] + if len(author_list) > 1: + authors = ' and '.join(author_list) + else: + authors = author_list[0] + title = data['titles'][0] + publisher = data['publisher'] + year = data['publicationYear'] + doi = data['identifier']['id'] + url = f'https://doi.org/{doi}' + + newlinetab = '\n\t' + newline = '\n' + + bibtex_entry = ( + f'{"@misc{"}{url},{newlinetab}' + f'url = {{{url}}},{newlinetab}' + f'title = {{{title}}},{newlinetab}' + f'publisher = {{{publisher}}},{newlinetab}' + f'year = {year},{newlinetab}' + f'author = {{{authors}}},{newlinetab}' + f'doi = {{{doi}}},{newline}' + f'{"}"}' + ) return bibtex_entry @@ -128,8 +130,9 @@ def _replace_entry(product_entry): return tag_list -def _collect_bibtex_citation(tags): +def _collect_bibtex_citation(value): """Collect information from bibtex files.""" + tags = value.split(',') entry = '' for tag in tags: bibtex_file = os.path.join(REFERENCES_PATH, tag + '.bibtex') @@ -154,12 +157,19 @@ def _collect_cmip_citation(json_url, info_url): def _make_url(attribute): - mip_era = attribute.get('attribute:mip_era') - activity_id = attribute.get('attribute:activity_id') - institution_id = attribute.get('attribute:institution_id') - source_id = attribute.get('attribute:source_id') - experiment_id = attribute.get('attribute:experiment_id') - url_prefix = f'{mip_era}.{activity_id}.{institution_id}.{source_id}.{experiment_id}' + """make json and info urls based on CMIP6 Data Citation Service.""" + # the order of keys is important + localpart = { + 'mip_era': '', + 'activity_id': '', + 'institution_id': '', + 'source_id': '', + 'experiment_id': '', + } + for key, value in attribute: + if key.localpart in localpart: + localpart[key.localpart] = value + url_prefix = '.'.join(localpart.values()) json_url = f'{CMIP6_URL_STEM}/cerarest/exportcmip6?input={url_prefix}' info_url = f'{CMIP6_URL_STEM}/cmip6?input=CMIP6.{url_prefix}' return json_url, info_url diff --git a/esmvalcore/_task.py b/esmvalcore/_task.py index 88b82e95ab..399d2176f6 100644 --- a/esmvalcore/_task.py +++ b/esmvalcore/_task.py @@ -572,13 +572,9 @@ def _collect_provenance(self): } attributes.update(deepcopy(attrs)) - section = 'references' - # for key in attributes: - # if key in TAGS: - # if key in section: - # attributes[key] = cite_tags(key, attributes[key]) - # else: - # attributes[key] = replace_tags(key, attributes[key]) + for key in attributes: + if key in TAGS and key not in 'references': + attributes[key] = replace_tags(key, attributes[key]) product = TrackedFile(filename, attributes, ancestors) product.initialize_provenance(self.activity) From 3a87afe9bb98709afa6cb76254d669afccf60522 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Fri, 21 Feb 2020 15:05:15 +0100 Subject: [PATCH 034/105] keep the references tags and not to replace them --- esmvalcore/_provenance.py | 8 +------- esmvalcore/_recipe.py | 2 +- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/esmvalcore/_provenance.py b/esmvalcore/_provenance.py index 680b321afb..3996febb4d 100644 --- a/esmvalcore/_provenance.py +++ b/esmvalcore/_provenance.py @@ -10,7 +10,6 @@ from prov.model import ProvDocument from ._version import __version__ -from ._config import replace_tags, TAGS logger = logging.getLogger(__name__) @@ -38,12 +37,7 @@ def get_esmvaltool_provenance(): create_namespace(provenance, namespace) # TODO: add dependencies with versions here - section = 'references' - if section in TAGS and ESMVALTOOL_PAPER_TAG in TAGS[section]: - # attributes_value = replace_tags(section, [ESMVALTOOL_PAPER_TAG]) - attributes_value = cite_tags(section, [ESMVALTOOL_PAPER_TAG]) - else: - attributes_value = ESMVALTOOL_PAPER_TAG + attributes_value = ESMVALTOOL_PAPER_TAG attributes = {'attribute:references': attributes_value} activity = provenance.activity( namespace + ':esmvaltool==' + __version__, other_attributes=attributes) diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index 7f20c5ace1..dcb65704f7 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -949,7 +949,7 @@ def _initalize_provenance(self, raw_documentation): """Initialize the recipe provenance.""" doc = deepcopy(raw_documentation) for key in doc: - if key in TAGS: + if key in TAGS and key not in 'references': doc[key] = replace_tags(key, doc[key]) return get_recipe_provenance(doc, self._filename) From b7a6773b771bdc3f8b2c7a861e6c954b5f27a79c Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Fri, 21 Feb 2020 15:06:55 +0100 Subject: [PATCH 035/105] remove unnecessary imports and refactor --- esmvalcore/_citation.py | 96 ++++++++++++++++------------------------- 1 file changed, 37 insertions(+), 59 deletions(-) diff --git a/esmvalcore/_citation.py b/esmvalcore/_citation.py index d0122c8d0e..b5e9486c14 100644 --- a/esmvalcore/_citation.py +++ b/esmvalcore/_citation.py @@ -1,30 +1,12 @@ """Citation module.""" -import contextlib -import datetime -import errno -import logging -import numbers import os -import pprint -import subprocess -import threading -import time -from copy import deepcopy -from multiprocessing import Pool - +import logging +import re import requests -import psutil -import yaml - -from ._config import DIAGNOSTICS_PATH, TAGS, replace_tags, REFERENCES_PATH -from ._provenance import TrackedFile, get_task_provenance +from ._config import REFERENCES_PATH logger = logging.getLogger(__name__) -DATASET_KEYS = { - 'mip', -} - CMIP6_URL_STEM = 'https://cera-www.dkrz.de/WDCC/ui/cerasearch' @@ -39,13 +21,14 @@ def _write_citation_file(product): """ # collect info from provenance product_name = os.path.splitext(product.filename)[0] + products_tags = [] product_entries = '' product_urls = '' for item in product.provenance.records: for key, value in item.attributes: if key.namespace.prefix == 'attribute': if key.localpart in {'reference', 'references'}: - product_entries += '{}\n'.format(_collect_bibtex_citation(value)) + products_tags.append(value) elif key.localpart == 'mip_era' and value == 'CMIP6': json_url, info_url = _make_url(item.attributes) cmip_entry = _collect_cmip_citation(json_url, info_url) @@ -59,12 +42,25 @@ def _write_citation_file(product): with open(f'{product_name}_data_citation_url.txt', 'w') as file: file.write(product_urls) + # convert tags to bibtex entries + if products_tags: + # make tags clean and unique + tags = list(set(_clean_tags(products_tags))) + for tag in tags: + product_entries += '{}\n'.format(_collect_bibtex_citation(tag)) + # write one bibtex file if product_entries: with open(f'{product_name}_citation.bibtex', 'w') as file: file.write(product_entries) +def _clean_tags(tags): + """some tages are combined in one string variable in provenance.""" + pattern = re.compile(r'\w+') + return pattern.findall(str(tags)) + + def _get_response(url): """Return information from CMIP6 Data Citation service in json format.""" json_data = False @@ -93,55 +89,37 @@ def _valid_json_data(data): def _json_to_bibtex(data): """Make a bibtex entry from CMIP6 Data Citation json data.""" author_list = [item['creatorName'] for item in data['creators']] - if len(author_list) > 1: - authors = ' and '.join(author_list) - else: + if author_list[0] == author_list[-1]: authors = author_list[0] + else: + authors = ' and '.join(author_list) title = data['titles'][0] publisher = data['publisher'] year = data['publicationYear'] doi = data['identifier']['id'] url = f'https://doi.org/{doi}' - - newlinetab = '\n\t' - newline = '\n' - bibtex_entry = ( - f'{"@misc{"}{url},{newlinetab}' - f'url = {{{url}}},{newlinetab}' - f'title = {{{title}}},{newlinetab}' - f'publisher = {{{publisher}}},{newlinetab}' - f'year = {year},{newlinetab}' - f'author = {{{authors}}},{newlinetab}' - f'doi = {{{doi}}},{newline}' - f'{"}"}' + f'{"@misc{"}{url},\n\t' + f'url = {{{url}}},\n\t' + f'title = {{{title}}},\n\t' + f'publisher = {{{publisher}}},\n\t' + f'year = {year},\n\t' + f'author = {{{authors}}},\n\t' + f'doi = {{{doi}}},\n' + f'{"}"}\n' ) return bibtex_entry -def _replace_entry(product_entry): - """Find tags of the references in provenance.""" - entry_tags = {v: k for k, v in TAGS['references'].items()} - tag_list = [] - for key in entry_tags.keys(): - for entry in product_entry: - if key in entry and entry_tags[key] not in tag_list: - tag_list.append(entry_tags[key]) - return tag_list - - -def _collect_bibtex_citation(value): +def _collect_bibtex_citation(tag): """Collect information from bibtex files.""" - tags = value.split(',') - entry = '' - for tag in tags: - bibtex_file = os.path.join(REFERENCES_PATH, tag + '.bibtex') - if os.path.isfile(bibtex_file): - with open(bibtex_file, 'r') as file: - entry += '{}\n'.format(file.read()) - else: - logger.info('The reference file %s does not exist.', - bibtex_file) + bibtex_file = os.path.join(REFERENCES_PATH, tag + '.bibtex') + if os.path.isfile(bibtex_file): + with open(bibtex_file, 'r') as file: + entry = '{}'.format(file.read()) + else: + logger.info('The reference file %s does not exist.', + bibtex_file) return entry From b88e2dd9860e4c6d9d9d2e92ef654da451b7e9e9 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Fri, 21 Feb 2020 16:28:25 +0100 Subject: [PATCH 036/105] update the documentation --- esmvalcore/_config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/esmvalcore/_config.py b/esmvalcore/_config.py index 64b368df9a..408c6e110d 100644 --- a/esmvalcore/_config.py +++ b/esmvalcore/_config.py @@ -28,7 +28,7 @@ def find_diagnostics(): def find_references(): - """Try to find bibtex files in references folder.""" + """Try to find the path for references folder.""" try: import esmvaltool except ImportError: From 438960e276a1b6b0cc34527d544f6b2feaad5e73 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Fri, 21 Feb 2020 16:30:41 +0100 Subject: [PATCH 037/105] check if the reference folder does not exist --- esmvalcore/_citation.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/esmvalcore/_citation.py b/esmvalcore/_citation.py index b5e9486c14..d3d65f932d 100644 --- a/esmvalcore/_citation.py +++ b/esmvalcore/_citation.py @@ -113,13 +113,16 @@ def _json_to_bibtex(data): def _collect_bibtex_citation(tag): """Collect information from bibtex files.""" - bibtex_file = os.path.join(REFERENCES_PATH, tag + '.bibtex') - if os.path.isfile(bibtex_file): - with open(bibtex_file, 'r') as file: - entry = '{}'.format(file.read()) + if REFERENCES_PATH: + bibtex_file = os.path.join(REFERENCES_PATH, tag + '.bibtex') + if os.path.isfile(bibtex_file): + with open(bibtex_file, 'r') as file: + entry = '{}'.format(file.read()) + else: + raise ValueError('The reference file {} does not exist.'.format(bibtex_file)) else: - logger.info('The reference file %s does not exist.', - bibtex_file) + logger.info('The reference folder does not exist.') + entry = '' return entry From 36677e39a354469c6c09a7f93cd95c6d1e815c50 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Fri, 21 Feb 2020 17:10:14 +0100 Subject: [PATCH 038/105] remove validating json data --- esmvalcore/_citation.py | 36 ++++++++++++++++-------------------- 1 file changed, 16 insertions(+), 20 deletions(-) diff --git a/esmvalcore/_citation.py b/esmvalcore/_citation.py index d3d65f932d..8f028babed 100644 --- a/esmvalcore/_citation.py +++ b/esmvalcore/_citation.py @@ -76,28 +76,24 @@ def _get_response(url): return json_data -def _valid_json_data(data): - valid_data = False - keys = ['identifier', 'creators', 'titles', 'publisher', 'publicationYear'] - if all(key in data for key in keys): - check_names = all('creatorName' in item for item in data['creators']) - if 'id' in data['identifier'] and check_names: - valid_data = True - return valid_data - - def _json_to_bibtex(data): """Make a bibtex entry from CMIP6 Data Citation json data.""" - author_list = [item['creatorName'] for item in data['creators']] - if author_list[0] == author_list[-1]: - authors = author_list[0] - else: - authors = ' and '.join(author_list) - title = data['titles'][0] - publisher = data['publisher'] - year = data['publicationYear'] - doi = data['identifier']['id'] + if data.get('creators', False): + author_list = [item.get('creatorName', '') for item in data['creators']] + if author_list: + if author_list[0] == author_list[-1]: + authors = author_list[0] + else: + authors = ' and '.join(author_list) + + title = data.get('titles', ['title not found'])[0] + publisher = data.get('publisher', 'publisher not found') + year = data.get('publicationYear', 'publicationYear not found') + + if data.get('identifier', False): + doi = data.get('identifier').get('id', 'doi not found') url = f'https://doi.org/{doi}' + bibtex_entry = ( f'{"@misc{"}{url},\n\t' f'url = {{{url}}},\n\t' @@ -130,7 +126,7 @@ def _collect_cmip_citation(json_url, info_url): """Collect information from CMIP6 Data Citation Service.""" bibtex_entry = info_url json_data = _get_response(json_url) - if json_data and _valid_json_data(json_data): + if json_data: bibtex_entry = _json_to_bibtex(json_data) else: logger.info('Invalid json link %s', json_url) From 748987ce123c88475fd171ebdcfdfe67556e5899 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Fri, 21 Feb 2020 17:21:40 +0100 Subject: [PATCH 039/105] refactor json to bibtex function --- esmvalcore/_citation.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/esmvalcore/_citation.py b/esmvalcore/_citation.py index 8f028babed..ca053f9e66 100644 --- a/esmvalcore/_citation.py +++ b/esmvalcore/_citation.py @@ -83,6 +83,8 @@ def _json_to_bibtex(data): if author_list: if author_list[0] == author_list[-1]: authors = author_list[0] + if not authors: + authors = 'creatorName not found' else: authors = ' and '.join(author_list) From 299bd841551804040f7351b752ad685784384115 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Fri, 21 Feb 2020 17:54:54 +0100 Subject: [PATCH 040/105] refactor --- esmvalcore/_citation.py | 66 +++++++++++++++++++++++++++-------------- 1 file changed, 44 insertions(+), 22 deletions(-) diff --git a/esmvalcore/_citation.py b/esmvalcore/_citation.py index ca053f9e66..d0bf2b7d9e 100644 --- a/esmvalcore/_citation.py +++ b/esmvalcore/_citation.py @@ -12,36 +12,44 @@ def _write_citation_file(product): """ - Write citation information provided by the recorded provenance. - Recipe and cmip6 data references are saved into one bibtex file. - cmip6 data references are provided by CMIP6 data citation service. - each cmip6 data reference has a json link. In the case of internet - connection, cmip6 data references are saved into a bibtex file. - Otherwise, cmip6 data reference links are saved into a text file. + Write citation information provided by the recorded provenance. + + Recipe and cmip6 data references are saved into one bibtex file. + cmip6 data references are provided by CMIP6 data citation service. + each cmip6 data reference has a json link. In the case of internet + connection, cmip6 data references are saved into a bibtex file. + Otherwise, cmip6 data reference links are saved into a text file. """ # collect info from provenance product_name = os.path.splitext(product.filename)[0] products_tags = [] product_entries = '' product_urls = '' + product_info_urls = [] + product_json_urls = [] + for item in product.provenance.records: for key, value in item.attributes: if key.namespace.prefix == 'attribute': if key.localpart in {'reference', 'references'}: products_tags.append(value) elif key.localpart == 'mip_era' and value == 'CMIP6': - json_url, info_url = _make_url(item.attributes) - cmip_entry = _collect_cmip_citation(json_url, info_url) - if cmip_entry == info_url: - product_urls += '{}\n'.format(cmip_entry) - else: - product_entries += '{}\n'.format(cmip_entry) + url_prefix = _make_url_prefix(item.attributes) + product_info_urls.append(_make_info_url(url_prefix)) + product_json_urls.append(_make_json_url(url_prefix)) # save CMIP6 url_info, if any - if product_urls: + if product_info_urls: + for info_url in product_info_urls: + product_urls += '{}\n'.format(info_url) with open(f'{product_name}_data_citation_url.txt', 'w') as file: file.write(product_urls) + # convert json_urls to bibtex entries + if product_json_urls: + for json_url in product_json_urls: + product_entries += '{}\n'.format(_collect_cmip_citation(json_url)) + # convert tags to bibtex entries if products_tags: # make tags clean and unique @@ -56,7 +64,7 @@ def _write_citation_file(product): def _clean_tags(tags): - """some tages are combined in one string variable in provenance.""" + """Clean the tages that are recorded as str by provenance.""" pattern = re.compile(r'\w+') return pattern.findall(str(tags)) @@ -79,7 +87,9 @@ def _get_response(url): def _json_to_bibtex(data): """Make a bibtex entry from CMIP6 Data Citation json data.""" if data.get('creators', False): - author_list = [item.get('creatorName', '') for item in data['creators']] + author_list = [ + item.get('creatorName', '') for item in data['creators'] + ] if author_list: if author_list[0] == author_list[-1]: authors = author_list[0] @@ -105,7 +115,7 @@ def _json_to_bibtex(data): f'author = {{{authors}}},\n\t' f'doi = {{{doi}}},\n' f'{"}"}\n' - ) + ) return bibtex_entry @@ -117,26 +127,28 @@ def _collect_bibtex_citation(tag): with open(bibtex_file, 'r') as file: entry = '{}'.format(file.read()) else: - raise ValueError('The reference file {} does not exist.'.format(bibtex_file)) + raise ValueError( + 'The reference file {} does not exist.'.format(bibtex_file) + ) else: logger.info('The reference folder does not exist.') entry = '' return entry -def _collect_cmip_citation(json_url, info_url): +def _collect_cmip_citation(json_url): """Collect information from CMIP6 Data Citation Service.""" - bibtex_entry = info_url json_data = _get_response(json_url) if json_data: bibtex_entry = _json_to_bibtex(json_data) else: logger.info('Invalid json link %s', json_url) + bibtex_entry = 'Invalid json link' return bibtex_entry -def _make_url(attribute): - """make json and info urls based on CMIP6 Data Citation Service.""" +def _make_url_prefix(attribute): + """Make url prefix based on CMIP6 Data Citation Service.""" # the order of keys is important localpart = { 'mip_era': '', @@ -149,6 +161,16 @@ def _make_url(attribute): if key.localpart in localpart: localpart[key.localpart] = value url_prefix = '.'.join(localpart.values()) + return url_prefix + + +def _make_json_url(url_prefix): + """Make json url based on CMIP6 Data Citation Service.""" json_url = f'{CMIP6_URL_STEM}/cerarest/exportcmip6?input={url_prefix}' + return json_url + + +def _make_info_url(url_prefix): + """Make info url based on CMIP6 Data Citation Service.""" info_url = f'{CMIP6_URL_STEM}/cmip6?input=CMIP6.{url_prefix}' - return json_url, info_url + return info_url From 6508877752eb0a457ee13485b771d4f1e2403975 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Fri, 21 Feb 2020 18:10:11 +0100 Subject: [PATCH 041/105] refactor --- esmvalcore/_citation.py | 45 +++++++++++++++++++++++------------------ 1 file changed, 25 insertions(+), 20 deletions(-) diff --git a/esmvalcore/_citation.py b/esmvalcore/_citation.py index d0bf2b7d9e..7575ac6bff 100644 --- a/esmvalcore/_citation.py +++ b/esmvalcore/_citation.py @@ -22,32 +22,20 @@ def _write_citation_file(product): """ # collect info from provenance product_name = os.path.splitext(product.filename)[0] - products_tags = [] product_entries = '' - product_urls = '' - product_info_urls = [] - product_json_urls = [] - - for item in product.provenance.records: - for key, value in item.attributes: - if key.namespace.prefix == 'attribute': - if key.localpart in {'reference', 'references'}: - products_tags.append(value) - elif key.localpart == 'mip_era' and value == 'CMIP6': - url_prefix = _make_url_prefix(item.attributes) - product_info_urls.append(_make_info_url(url_prefix)) - product_json_urls.append(_make_json_url(url_prefix)) + urls = '' + products_tags, json_urls, info_urls = _get_citation_info(product) # save CMIP6 url_info, if any - if product_info_urls: - for info_url in product_info_urls: - product_urls += '{}\n'.format(info_url) + if info_urls: + for info_url in info_urls: + urls += '{}\n'.format(info_url) with open(f'{product_name}_data_citation_url.txt', 'w') as file: - file.write(product_urls) + file.write(urls) # convert json_urls to bibtex entries - if product_json_urls: - for json_url in product_json_urls: + if json_urls: + for json_url in json_urls: product_entries += '{}\n'.format(_collect_cmip_citation(json_url)) # convert tags to bibtex entries @@ -63,6 +51,23 @@ def _write_citation_file(product): file.write(product_entries) +def _get_citation_info(product): + """Collect tags, and urls.""" + info_urls = [] + json_urls = [] + tags = [] + for item in product.provenance.records: + for key, value in item.attributes: + if key.namespace.prefix == 'attribute': + if key.localpart in {'reference', 'references'}: + tags.append(value) + elif key.localpart == 'mip_era' and value == 'CMIP6': + url_prefix = _make_url_prefix(item.attributes) + info_urls.append(_make_info_url(url_prefix)) + json_urls.append(_make_json_url(url_prefix)) + return tags, json_urls, info_urls + + def _clean_tags(tags): """Clean the tages that are recorded as str by provenance.""" pattern = re.compile(r'\w+') From df3b00822afe40b9cc33e8ef2537d51ec94fb1a8 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Mon, 24 Feb 2020 09:44:11 +0100 Subject: [PATCH 042/105] remove unused function --- esmvalcore/_config.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/esmvalcore/_config.py b/esmvalcore/_config.py index 408c6e110d..9f77fdb3e0 100644 --- a/esmvalcore/_config.py +++ b/esmvalcore/_config.py @@ -245,8 +245,3 @@ def get_tag_value(section, tag): def replace_tags(section, tags): """Replace a list of tags with their values.""" return tuple(get_tag_value(section, tag) for tag in tags) - - -def cite_tags(section, tags): - """Replace a list of tags with their values.""" - return tuple(get_tag_value(section, tag) for tag in tags) From 7b1c339b07dd836c00b45e32e74344fd1bf4a927 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Mon, 24 Feb 2020 09:54:48 +0100 Subject: [PATCH 043/105] refactor wrtite and save functions --- esmvalcore/_citation.py | 41 ++++++++++++++++++++--------------------- 1 file changed, 20 insertions(+), 21 deletions(-) diff --git a/esmvalcore/_citation.py b/esmvalcore/_citation.py index 7575ac6bff..a2560fbae9 100644 --- a/esmvalcore/_citation.py +++ b/esmvalcore/_citation.py @@ -22,16 +22,32 @@ def _write_citation_file(product): """ # collect info from provenance product_name = os.path.splitext(product.filename)[0] + info_urls = [] + json_urls = [] + products_tags = [] + for item in product.provenance.records: + for key, value in item.attributes: + if key.namespace.prefix == 'attribute': + if key.localpart in {'reference', 'references'}: + products_tags.append(value) + elif key.localpart == 'mip_era' and value == 'CMIP6': + url_prefix = _make_url_prefix(item.attributes) + info_urls.append(_make_info_url(url_prefix)) + json_urls.append(_make_json_url(url_prefix)) + + _save_citation_info(product_name, products_tags, json_urls, info_urls) + + +def _save_citation_info(product_name, products_tags, json_urls, info_urls): product_entries = '' - urls = '' - products_tags, json_urls, info_urls = _get_citation_info(product) + product_urls = '' # save CMIP6 url_info, if any if info_urls: for info_url in info_urls: - urls += '{}\n'.format(info_url) + product_urls += '{}\n'.format(info_url) with open(f'{product_name}_data_citation_url.txt', 'w') as file: - file.write(urls) + file.write(product_urls) # convert json_urls to bibtex entries if json_urls: @@ -51,23 +67,6 @@ def _write_citation_file(product): file.write(product_entries) -def _get_citation_info(product): - """Collect tags, and urls.""" - info_urls = [] - json_urls = [] - tags = [] - for item in product.provenance.records: - for key, value in item.attributes: - if key.namespace.prefix == 'attribute': - if key.localpart in {'reference', 'references'}: - tags.append(value) - elif key.localpart == 'mip_era' and value == 'CMIP6': - url_prefix = _make_url_prefix(item.attributes) - info_urls.append(_make_info_url(url_prefix)) - json_urls.append(_make_json_url(url_prefix)) - return tags, json_urls, info_urls - - def _clean_tags(tags): """Clean the tages that are recorded as str by provenance.""" pattern = re.compile(r'\w+') From 622bb86afaf490904a0eb811fcbb37d45b17b8da Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Mon, 24 Feb 2020 10:03:08 +0100 Subject: [PATCH 044/105] remove new line --- esmvalcore/_task.py | 1 - 1 file changed, 1 deletion(-) diff --git a/esmvalcore/_task.py b/esmvalcore/_task.py index 399d2176f6..ef8c586734 100644 --- a/esmvalcore/_task.py +++ b/esmvalcore/_task.py @@ -555,7 +555,6 @@ def _collect_provenance(self): attrs = { 'script_file': self.script, } - for key in self.settings: if key not in ignore: attrs[key] = self.settings[key] From 0968425c1cdf0acc416d25ddabbcf3ac9a350461 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Mon, 24 Feb 2020 10:59:37 +0100 Subject: [PATCH 045/105] use diagnostics path instaed of finding references path --- esmvalcore/_config.py | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/esmvalcore/_config.py b/esmvalcore/_config.py index 9f77fdb3e0..c8e08381db 100644 --- a/esmvalcore/_config.py +++ b/esmvalcore/_config.py @@ -27,18 +27,6 @@ def find_diagnostics(): DIAGNOSTICS_PATH = find_diagnostics() -def find_references(): - """Try to find the path for references folder.""" - try: - import esmvaltool - except ImportError: - return '' - return os.path.join(os.path.dirname(esmvaltool.__file__), 'references') - - -REFERENCES_PATH = find_references() - - def read_config_user_file(config_file, recipe_name): """Read config user file and store settings in a dictionary.""" with open(config_file, 'r') as file: From e7d6bd70240f96c5a28cc286652af46e698cecb5 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Mon, 24 Feb 2020 11:29:30 +0100 Subject: [PATCH 046/105] use pathlib instead of os.path --- esmvalcore/_citation.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/esmvalcore/_citation.py b/esmvalcore/_citation.py index a2560fbae9..79322f473c 100644 --- a/esmvalcore/_citation.py +++ b/esmvalcore/_citation.py @@ -2,8 +2,15 @@ import os import logging import re +from pathlib import Path import requests -from ._config import REFERENCES_PATH + +from ._config import DIAGNOSTICS_PATH + +if DIAGNOSTICS_PATH: + REFERENCES_PATH = Path(DIAGNOSTICS_PATH) / 'references' +else: + REFERENCES_PATH = '' logger = logging.getLogger(__name__) @@ -126,10 +133,9 @@ def _json_to_bibtex(data): def _collect_bibtex_citation(tag): """Collect information from bibtex files.""" if REFERENCES_PATH: - bibtex_file = os.path.join(REFERENCES_PATH, tag + '.bibtex') - if os.path.isfile(bibtex_file): - with open(bibtex_file, 'r') as file: - entry = '{}'.format(file.read()) + bibtex_file = REFERENCES_PATH / f'{tag}.bibtex' + if bibtex_file.is_file(): + entry = bibtex_file.read_text() else: raise ValueError( 'The reference file {} does not exist.'.format(bibtex_file) From 776bd72338309c96785c8c3e2c6dc7016a69b10f Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Mon, 24 Feb 2020 13:09:38 +0100 Subject: [PATCH 047/105] add esmvaltool technical paper as default citation entry --- esmvalcore/_citation.py | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/esmvalcore/_citation.py b/esmvalcore/_citation.py index 79322f473c..adee2d782b 100644 --- a/esmvalcore/_citation.py +++ b/esmvalcore/_citation.py @@ -16,6 +16,27 @@ CMIP6_URL_STEM = 'https://cera-www.dkrz.de/WDCC/ui/cerasearch' +# it is the technical overview and should always be cited +ESMVALTOOL_PAPER = ( + '@article{righi19gmdd,\n\t' + 'doi = {10.5194/gmd-2019-226},\n\t' + 'url = {https://doi.org/10.5194%2Fgmd-2019-226},\n\t' + 'year = 2019,\n\t' + 'month = {sep},\n\t' + 'publisher = {Copernicus {GmbH}},\n\t' + 'author = {Mattia Righi and Bouwe Andela and Veronika Eyring ' + 'and Axel Lauer and Valeriu Predoi and Manuel Schlund ' + 'and Javier Vegas-Regidor and Lisa Bock and Björn Brötz ' + 'and Lee de Mora and Faruk Diblen and Laura Dreyer ' + 'and Niels Drost and Paul Earnshaw and Birgit Hassler ' + 'and Nikolay Koldunov and Bill Little and Saskia Loosveldt Tomas ' + 'and Klaus Zimmermann},\n\t' + 'title = {{ESMValTool} v2.0 ' + '{\\&}amp$\\mathsemicolon${\\#}8211$\\mathsemicolon$ ' + 'Technical overview}\n' + '}\n' +) + def _write_citation_file(product): """ @@ -70,8 +91,12 @@ def _save_citation_info(product_name, products_tags, json_urls, info_urls): # write one bibtex file if product_entries: - with open(f'{product_name}_citation.bibtex', 'w') as file: - file.write(product_entries) + bibtex_content = product_entries + else: + # add the technical overview paper that should always be cited + bibtex_content = ESMVALTOOL_PAPER + with open(f'{product_name}_citation.bibtex', 'w') as file: + file.write(bibtex_content) def _clean_tags(tags): From d51a3f038e59cb3c4346d34059ab58f1f76b4e51 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Mon, 24 Feb 2020 13:51:57 +0100 Subject: [PATCH 048/105] fix the logger error message --- esmvalcore/_citation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/esmvalcore/_citation.py b/esmvalcore/_citation.py index adee2d782b..14d620d503 100644 --- a/esmvalcore/_citation.py +++ b/esmvalcore/_citation.py @@ -114,7 +114,7 @@ def _get_response(url): if response.status_code == 200: json_data = response.json() else: - logger.info('Error in the CMIP json link') + logger.info('Error in the CMIP json link: %s', url) except IOError: logger.info('Error in receiving the CMIP json file') return json_data From b835eacd679292bc80f66cd6d0d036bca13b686b Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Mon, 24 Feb 2020 15:40:01 +0100 Subject: [PATCH 049/105] style and refactor --- esmvalcore/_citation.py | 18 +++++++++--------- esmvalcore/_task.py | 2 +- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/esmvalcore/_citation.py b/esmvalcore/_citation.py index 14d620d503..25507fce66 100644 --- a/esmvalcore/_citation.py +++ b/esmvalcore/_citation.py @@ -38,7 +38,7 @@ ) -def _write_citation_file(product): +def _write_citation_file(filename, provenance): """ Write citation information provided by the recorded provenance. @@ -49,24 +49,24 @@ def _write_citation_file(product): Otherwise, cmip6 data reference links are saved into a text file. """ # collect info from provenance - product_name = os.path.splitext(product.filename)[0] + product_name = os.path.splitext(filename)[0] info_urls = [] json_urls = [] - products_tags = [] - for item in product.provenance.records: + product_tags = [] + for item in provenance.records: for key, value in item.attributes: if key.namespace.prefix == 'attribute': if key.localpart in {'reference', 'references'}: - products_tags.append(value) + product_tags.append(value) elif key.localpart == 'mip_era' and value == 'CMIP6': url_prefix = _make_url_prefix(item.attributes) info_urls.append(_make_info_url(url_prefix)) json_urls.append(_make_json_url(url_prefix)) - _save_citation_info(product_name, products_tags, json_urls, info_urls) + _save_citation_info(product_name, product_tags, json_urls, info_urls) -def _save_citation_info(product_name, products_tags, json_urls, info_urls): +def _save_citation_info(product_name, product_tags, json_urls, info_urls): product_entries = '' product_urls = '' @@ -83,9 +83,9 @@ def _save_citation_info(product_name, products_tags, json_urls, info_urls): product_entries += '{}\n'.format(_collect_cmip_citation(json_url)) # convert tags to bibtex entries - if products_tags: + if product_tags: # make tags clean and unique - tags = list(set(_clean_tags(products_tags))) + tags = list(set(_clean_tags(product_tags))) for tag in tags: product_entries += '{}\n'.format(_collect_bibtex_citation(tag)) diff --git a/esmvalcore/_task.py b/esmvalcore/_task.py index ef8c586734..a73436d4a8 100644 --- a/esmvalcore/_task.py +++ b/esmvalcore/_task.py @@ -578,7 +578,7 @@ def _collect_provenance(self): product = TrackedFile(filename, attributes, ancestors) product.initialize_provenance(self.activity) product.save_provenance() - _write_citation_file(product) + _write_citation_file(product.filename, product.provenance) self.products.add(product) logger.debug("Collecting provenance of task %s took %.1f seconds", self.name, From 800cdb9847c8f2fc2141288728194faddc18e25f Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Mon, 24 Feb 2020 17:32:08 +0100 Subject: [PATCH 050/105] refactor --- esmvalcore/_citation.py | 36 +++++++++++++++--------------------- 1 file changed, 15 insertions(+), 21 deletions(-) diff --git a/esmvalcore/_citation.py b/esmvalcore/_citation.py index 25507fce66..72260c8e81 100644 --- a/esmvalcore/_citation.py +++ b/esmvalcore/_citation.py @@ -83,20 +83,18 @@ def _save_citation_info(product_name, product_tags, json_urls, info_urls): product_entries += '{}\n'.format(_collect_cmip_citation(json_url)) # convert tags to bibtex entries - if product_tags: - # make tags clean and unique - tags = list(set(_clean_tags(product_tags))) - for tag in tags: - product_entries += '{}\n'.format(_collect_bibtex_citation(tag)) - - # write one bibtex file - if product_entries: - bibtex_content = product_entries + if REFERENCES_PATH: + if product_tags: + # make tags clean and unique + tags = list(set(_clean_tags(product_tags))) + for tag in tags: + product_entries += '{}\n'.format(_collect_bibtex_citation(tag)) else: # add the technical overview paper that should always be cited - bibtex_content = ESMVALTOOL_PAPER + logger.info('The reference folder does not exist.') + product_entries = ESMVALTOOL_PAPER with open(f'{product_name}_citation.bibtex', 'w') as file: - file.write(bibtex_content) + file.write(product_entries) def _clean_tags(tags): @@ -157,17 +155,13 @@ def _json_to_bibtex(data): def _collect_bibtex_citation(tag): """Collect information from bibtex files.""" - if REFERENCES_PATH: - bibtex_file = REFERENCES_PATH / f'{tag}.bibtex' - if bibtex_file.is_file(): - entry = bibtex_file.read_text() - else: - raise ValueError( - 'The reference file {} does not exist.'.format(bibtex_file) - ) + bibtex_file = REFERENCES_PATH / f'{tag}.bibtex' + if bibtex_file.is_file(): + entry = bibtex_file.read_text() else: - logger.info('The reference folder does not exist.') - entry = '' + raise ValueError( + 'The reference file {} does not exist.'.format(bibtex_file) + ) return entry From b96dc6da43582a493883d75b9b25c3b3b02733dc Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Mon, 24 Feb 2020 18:05:04 +0100 Subject: [PATCH 051/105] fix broken tests due to removing references and replace tags --- tests/integration/test_recipe.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/tests/integration/test_recipe.py b/tests/integration/test_recipe.py index cff40fcef8..89271d79f8 100644 --- a/tests/integration/test_recipe.py +++ b/tests/integration/test_recipe.py @@ -14,6 +14,7 @@ from esmvalcore._task import DiagnosticTask from esmvalcore.preprocessor import DEFAULT_ORDER, PreprocessingTask from esmvalcore.preprocessor._io import concatenate_callback +from esmvalcore._citation import REFERENCES_PATH from .test_diagnostic_run import write_config_user_file from .test_provenance import check_provenance @@ -1180,11 +1181,6 @@ def simulate_diagnostic_run(diagnostic_task): 'name': 'Bouwe Andela', }, }, - 'references': { - 'acknow_author': "Please acknowledge the author(s).", - 'contact_authors': "Please contact the author(s) ...", - 'acknow_project': "Please acknowledge the project(s).", - }, 'projects': { 'c3s-magic': 'C3S MAGIC project', }, @@ -1261,7 +1257,7 @@ def test_diagnostic_task_provenance( key).pop() == record[key] # Check that diagnostic script tags have been added - for key in ('statistics', 'domains', 'authors', 'references'): + for key in ('statistics', 'domains', 'authors'): assert product.attributes[key] == tuple(TAGS[key][k] for k in record[key]) @@ -1277,7 +1273,7 @@ def test_diagnostic_task_provenance( for key in ('description', 'references'): value = src['documentation'][key] if key == 'references': - value = ', '.join(TAGS[key][k] for k in value) + value = ', '.join(src['documentation'][key]) assert recipe_record[0].get_attribute('attribute:' + key).pop() == value From 8e403666483e203dec2788c8066bc63dc9a204bb Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Mon, 24 Feb 2020 18:05:39 +0100 Subject: [PATCH 052/105] add a unit test for citation.py --- tests/integration/test_citation.py | 42 ++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 tests/integration/test_citation.py diff --git a/tests/integration/test_citation.py b/tests/integration/test_citation.py new file mode 100644 index 0000000000..6838ee3663 --- /dev/null +++ b/tests/integration/test_citation.py @@ -0,0 +1,42 @@ +"""Test _citation.py.""" +from pathlib import Path +from prov.model import ProvDocument + +import esmvalcore +from esmvalcore._citation import _write_citation_file, ESMVALTOOL_PAPER +from esmvalcore._provenance import ESMVALTOOL_URI_PREFIX + +# Two test cases: +# 1: references are replaced with bibtex +# 2: CMIP6 citation info is retrieved from ES-DOC + + +def test_references(tmp_path, monkeypatch): + """Test1: references are replaced with bibtex.""" + # Create fake provenance + provenance = ProvDocument() + provenance.add_namespace('file', uri=ESMVALTOOL_URI_PREFIX + 'file') + provenance.add_namespace('attribute', + uri=ESMVALTOOL_URI_PREFIX + 'attribute') + + filename = str(tmp_path / 'output.nc') + attributes = {'attribute:references': 'test_tag'} + provenance.entity('file:' + filename, attributes) + + # Create fake bibtex references tag file + references_path = tmp_path / 'references' + references_path.mkdir() + monkeypatch.setattr( + esmvalcore._citation, 'REFERENCES_PATH', references_path + ) + fake_bibtex_file = references_path / 'test_tag.bibtex' + fake_bibtex = "Fake bibtex file content\n" + fake_bibtex_file.write_text(fake_bibtex) + + _write_citation_file(filename, provenance) + citation_file = tmp_path / 'output_citation.bibtex' + citation = citation_file.read_text() + assert citation == '\n'.join([ESMVALTOOL_PAPER, fake_bibtex]) + + +# def test_cmip6_data_citation(tmp_path, monkeypatch): From f9e89c8e66cc7a2232d9767198d77463f6c04b83 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Mon, 24 Feb 2020 18:11:21 +0100 Subject: [PATCH 053/105] safe to remove esmvaltool bibtex file --- esmvalcore/_citation.py | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/esmvalcore/_citation.py b/esmvalcore/_citation.py index 72260c8e81..40d8f448ea 100644 --- a/esmvalcore/_citation.py +++ b/esmvalcore/_citation.py @@ -5,7 +5,7 @@ from pathlib import Path import requests -from ._config import DIAGNOSTICS_PATH +from ._config import DIAGNOSTICS_PATH, ESMVALTOOL_PAPER_TAG if DIAGNOSTICS_PATH: REFERENCES_PATH = Path(DIAGNOSTICS_PATH) / 'references' @@ -67,20 +67,20 @@ def _write_citation_file(filename, provenance): def _save_citation_info(product_name, product_tags, json_urls, info_urls): - product_entries = '' - product_urls = '' + citation_entries = [ESMVALTOOL_PAPER] + citation_urls = '' # save CMIP6 url_info, if any if info_urls: for info_url in info_urls: - product_urls += '{}\n'.format(info_url) + citation_urls += '{}\n'.format(info_url) with open(f'{product_name}_data_citation_url.txt', 'w') as file: - file.write(product_urls) + file.write(citation_urls) # convert json_urls to bibtex entries if json_urls: for json_url in json_urls: - product_entries += '{}\n'.format(_collect_cmip_citation(json_url)) + citation_entries.append(_collect_cmip_citation(json_url)) # convert tags to bibtex entries if REFERENCES_PATH: @@ -88,13 +88,11 @@ def _save_citation_info(product_name, product_tags, json_urls, info_urls): # make tags clean and unique tags = list(set(_clean_tags(product_tags))) for tag in tags: - product_entries += '{}\n'.format(_collect_bibtex_citation(tag)) - else: - # add the technical overview paper that should always be cited - logger.info('The reference folder does not exist.') - product_entries = ESMVALTOOL_PAPER + if tag not in ESMVALTOOL_PAPER_TAG: + citation_entries.append(_collect_bibtex_citation(tag)) + with open(f'{product_name}_citation.bibtex', 'w') as file: - file.write(product_entries) + file.write('\n'.join(citation_entries)) def _clean_tags(tags): From 66dd95a74d5ba9df298837610e88a1d0080fdc89 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Tue, 25 Feb 2020 16:28:47 +0100 Subject: [PATCH 054/105] move the esmvaltool paper tag to citation module --- esmvalcore/_provenance.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/esmvalcore/_provenance.py b/esmvalcore/_provenance.py index 3996febb4d..9ab3c134c6 100644 --- a/esmvalcore/_provenance.py +++ b/esmvalcore/_provenance.py @@ -10,12 +10,11 @@ from prov.model import ProvDocument from ._version import __version__ +from ._citation import ESMVALTOOL_PAPER_TAG logger = logging.getLogger(__name__) ESMVALTOOL_URI_PREFIX = 'https://www.esmvaltool.org/' -# it is the technical overview and should always be cited -ESMVALTOOL_PAPER_TAG = 'righi19gmdd' def update_without_duplicating(bundle, other): From 85f03f2705ee07499d459c9fef2dcca4230ac307 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Tue, 25 Feb 2020 16:29:27 +0100 Subject: [PATCH 055/105] add the esmvaltool paper tag --- esmvalcore/_citation.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/esmvalcore/_citation.py b/esmvalcore/_citation.py index 40d8f448ea..82089ef69e 100644 --- a/esmvalcore/_citation.py +++ b/esmvalcore/_citation.py @@ -5,7 +5,7 @@ from pathlib import Path import requests -from ._config import DIAGNOSTICS_PATH, ESMVALTOOL_PAPER_TAG +from ._config import DIAGNOSTICS_PATH if DIAGNOSTICS_PATH: REFERENCES_PATH = Path(DIAGNOSTICS_PATH) / 'references' @@ -17,6 +17,7 @@ CMIP6_URL_STEM = 'https://cera-www.dkrz.de/WDCC/ui/cerasearch' # it is the technical overview and should always be cited +ESMVALTOOL_PAPER_TAG = 'righi19gmdd' ESMVALTOOL_PAPER = ( '@article{righi19gmdd,\n\t' 'doi = {10.5194/gmd-2019-226},\n\t' From a5da5f3c3690765c12b0472e89fc57168f5190b1 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Tue, 25 Feb 2020 16:30:03 +0100 Subject: [PATCH 056/105] remove unused import --- tests/integration/test_citation.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/integration/test_citation.py b/tests/integration/test_citation.py index 6838ee3663..f323f61943 100644 --- a/tests/integration/test_citation.py +++ b/tests/integration/test_citation.py @@ -1,5 +1,4 @@ """Test _citation.py.""" -from pathlib import Path from prov.model import ProvDocument import esmvalcore From dc37c3950feb73c09f5ff308f1b8e2a219e3b912 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Tue, 25 Feb 2020 17:15:45 +0100 Subject: [PATCH 057/105] remove unused import --- tests/integration/test_recipe.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/integration/test_recipe.py b/tests/integration/test_recipe.py index 89271d79f8..9bda6e29f5 100644 --- a/tests/integration/test_recipe.py +++ b/tests/integration/test_recipe.py @@ -14,7 +14,6 @@ from esmvalcore._task import DiagnosticTask from esmvalcore.preprocessor import DEFAULT_ORDER, PreprocessingTask from esmvalcore.preprocessor._io import concatenate_callback -from esmvalcore._citation import REFERENCES_PATH from .test_diagnostic_run import write_config_user_file from .test_provenance import check_provenance From cdd51817117eef4e871b7ac0918e1b77eba7d253 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Wed, 26 Feb 2020 16:11:34 +0100 Subject: [PATCH 058/105] refactor json to bibtex function --- esmvalcore/_citation.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/esmvalcore/_citation.py b/esmvalcore/_citation.py index 82089ef69e..90af8dc3e0 100644 --- a/esmvalcore/_citation.py +++ b/esmvalcore/_citation.py @@ -119,6 +119,14 @@ def _get_response(url): def _json_to_bibtex(data): """Make a bibtex entry from CMIP6 Data Citation json data.""" + url = 'url not found' + title = data.get('titles', ['title not found'])[0] + publisher = data.get('publisher', 'publisher not found') + year = data.get('publicationYear', 'publicationYear not found') + authors = 'creators not found' + doi = 'doi not found' + + author_list = [] if data.get('creators', False): author_list = [ item.get('creatorName', '') for item in data['creators'] @@ -131,13 +139,9 @@ def _json_to_bibtex(data): else: authors = ' and '.join(author_list) - title = data.get('titles', ['title not found'])[0] - publisher = data.get('publisher', 'publisher not found') - year = data.get('publicationYear', 'publicationYear not found') - if data.get('identifier', False): doi = data.get('identifier').get('id', 'doi not found') - url = f'https://doi.org/{doi}' + url = f'https://doi.org/{doi}' bibtex_entry = ( f'{"@misc{"}{url},\n\t' From 4e65db148f6ea285e8966fb080431f3c47cc843f Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Wed, 26 Feb 2020 16:22:22 +0100 Subject: [PATCH 059/105] fix tests for _citation.py --- tests/integration/test_citation.py | 89 +++++++++++++++++++++++++++--- 1 file changed, 82 insertions(+), 7 deletions(-) diff --git a/tests/integration/test_citation.py b/tests/integration/test_citation.py index f323f61943..df5ba42cce 100644 --- a/tests/integration/test_citation.py +++ b/tests/integration/test_citation.py @@ -2,13 +2,10 @@ from prov.model import ProvDocument import esmvalcore -from esmvalcore._citation import _write_citation_file, ESMVALTOOL_PAPER +from esmvalcore._citation import (_write_citation_file, + ESMVALTOOL_PAPER, CMIP6_URL_STEM) from esmvalcore._provenance import ESMVALTOOL_URI_PREFIX -# Two test cases: -# 1: references are replaced with bibtex -# 2: CMIP6 citation info is retrieved from ES-DOC - def test_references(tmp_path, monkeypatch): """Test1: references are replaced with bibtex.""" @@ -17,7 +14,6 @@ def test_references(tmp_path, monkeypatch): provenance.add_namespace('file', uri=ESMVALTOOL_URI_PREFIX + 'file') provenance.add_namespace('attribute', uri=ESMVALTOOL_URI_PREFIX + 'attribute') - filename = str(tmp_path / 'output.nc') attributes = {'attribute:references': 'test_tag'} provenance.entity('file:' + filename, attributes) @@ -38,4 +34,83 @@ def test_references(tmp_path, monkeypatch): assert citation == '\n'.join([ESMVALTOOL_PAPER, fake_bibtex]) -# def test_cmip6_data_citation(tmp_path, monkeypatch): +def mock_get_response(url): + """Mock _get_response() function.""" + json_data = False + if url.lower().startswith('https'): + json_data = {'titles': ['title is found']} + return json_data + + +def test_cmip6_data_citation(tmp_path, monkeypatch): + """Test2: CMIP6 citation info is retrieved from ES-DOC.""" + # Create fake provenance + provenance = ProvDocument() + provenance.add_namespace('file', uri=ESMVALTOOL_URI_PREFIX + 'file') + provenance.add_namespace('attribute', + uri=ESMVALTOOL_URI_PREFIX + 'attribute') + attributes = { + 'attribute:mip_era': 'CMIP6', + 'attribute:activity_id': 'activity', + 'attribute:institution_id': 'institution', + 'attribute:source_id': 'source', + 'attribute:experiment_id': 'experiment', + } + filename = str(tmp_path / 'output.nc') + provenance.entity('file:' + filename, attributes) + + monkeypatch.setattr( + esmvalcore._citation, '_get_response', mock_get_response + ) + _write_citation_file(filename, provenance) + citation_file = tmp_path / 'output_citation.bibtex' + + # Create fake bibtex entry + url = 'url not found' + title = 'title is found' + publisher = 'publisher not found' + year = 'publicationYear not found' + authors = 'creators not found' + doi = 'doi not found' + fake_bibtex_entry = ( + f'{"@misc{"}{url},\n\t' + f'url = {{{url}}},\n\t' + f'title = {{{title}}},\n\t' + f'publisher = {{{publisher}}},\n\t' + f'year = {year},\n\t' + f'author = {{{authors}}},\n\t' + f'doi = {{{doi}}},\n' + f'{"}"}\n' + ) + assert citation_file.read_text() == '\n'.join( + [ESMVALTOOL_PAPER, fake_bibtex_entry] + ) + + +def test_cmip6_data_citation_url(tmp_path, monkeypatch): + """Test3: CMIP6 info_url is retrieved from ES-DOC.""" + # Create fake provenance + provenance = ProvDocument() + provenance.add_namespace('file', uri=ESMVALTOOL_URI_PREFIX + 'file') + provenance.add_namespace('attribute', + uri=ESMVALTOOL_URI_PREFIX + 'attribute') + attributes = { + 'attribute:mip_era': 'CMIP6', + 'attribute:activity_id': 'activity', + 'attribute:institution_id': 'institution', + 'attribute:source_id': 'source', + 'attribute:experiment_id': 'experiment', + } + filename = str(tmp_path / 'output.nc') + provenance.entity('file:' + filename, attributes) + + monkeypatch.setattr( + esmvalcore._citation, '_get_response', mock_get_response + ) + _write_citation_file(filename, provenance) + citation_url = tmp_path / 'output_data_citation_url.txt' + + # Create fake info url + fake_url_prefix = '.'.join(attributes.values()) + fake_info_url = f'{CMIP6_URL_STEM}/cmip6?input=CMIP6.{fake_url_prefix}' + assert citation_url.read_text() == '{}\n'.format(fake_info_url) From 1eb18b2529483d46cd73dfb8be0118f4205c983a Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Wed, 26 Feb 2020 16:53:17 +0100 Subject: [PATCH 060/105] remove unused monkeypatch --- tests/integration/test_citation.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/tests/integration/test_citation.py b/tests/integration/test_citation.py index df5ba42cce..36d11af22a 100644 --- a/tests/integration/test_citation.py +++ b/tests/integration/test_citation.py @@ -87,7 +87,7 @@ def test_cmip6_data_citation(tmp_path, monkeypatch): ) -def test_cmip6_data_citation_url(tmp_path, monkeypatch): +def test_cmip6_data_citation_url(tmp_path): """Test3: CMIP6 info_url is retrieved from ES-DOC.""" # Create fake provenance provenance = ProvDocument() @@ -103,10 +103,6 @@ def test_cmip6_data_citation_url(tmp_path, monkeypatch): } filename = str(tmp_path / 'output.nc') provenance.entity('file:' + filename, attributes) - - monkeypatch.setattr( - esmvalcore._citation, '_get_response', mock_get_response - ) _write_citation_file(filename, provenance) citation_url = tmp_path / 'output_data_citation_url.txt' From 2f49ecbeebc2977be5c848260a40c16e09411555 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Fri, 28 Feb 2020 16:27:15 +0100 Subject: [PATCH 061/105] fix typo --- esmvalcore/_citation.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/esmvalcore/_citation.py b/esmvalcore/_citation.py index 90af8dc3e0..f488a29fd7 100644 --- a/esmvalcore/_citation.py +++ b/esmvalcore/_citation.py @@ -58,12 +58,12 @@ def _write_citation_file(filename, provenance): for key, value in item.attributes: if key.namespace.prefix == 'attribute': if key.localpart in {'reference', 'references'}: + ## check if value is a tag in recipe or diagnostics product_tags.append(value) elif key.localpart == 'mip_era' and value == 'CMIP6': url_prefix = _make_url_prefix(item.attributes) info_urls.append(_make_info_url(url_prefix)) json_urls.append(_make_json_url(url_prefix)) - _save_citation_info(product_name, product_tags, json_urls, info_urls) @@ -97,7 +97,7 @@ def _save_citation_info(product_name, product_tags, json_urls, info_urls): def _clean_tags(tags): - """Clean the tages that are recorded as str by provenance.""" + """Clean the tags that are recorded as str by provenance.""" pattern = re.compile(r'\w+') return pattern.findall(str(tags)) From 789df2bb2352383e37f41376da31eb06d8b91b11 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Fri, 6 Mar 2020 16:46:30 +0100 Subject: [PATCH 062/105] add support for references that are not in diagnostics, refactor --- esmvalcore/_citation.py | 49 ++++++++++++++++++++++++++++------------- 1 file changed, 34 insertions(+), 15 deletions(-) diff --git a/esmvalcore/_citation.py b/esmvalcore/_citation.py index f488a29fd7..425e264b9b 100644 --- a/esmvalcore/_citation.py +++ b/esmvalcore/_citation.py @@ -49,39 +49,58 @@ def _write_citation_file(filename, provenance): connection, cmip6 data references are saved into a bibtex file. Otherwise, cmip6 data reference links are saved into a text file. """ - # collect info from provenance product_name = os.path.splitext(filename)[0] info_urls = [] json_urls = [] product_tags = [] + product_refs = [] + # collect references from provenance for item in provenance.records: + attributes = {} for key, value in item.attributes: if key.namespace.prefix == 'attribute': - if key.localpart in {'reference', 'references'}: - ## check if value is a tag in recipe or diagnostics - product_tags.append(value) - elif key.localpart == 'mip_era' and value == 'CMIP6': + attributes[key.localpart] = value + attributes[item.identifier.namespace.prefix] = item.identifier.namespace.prefix + # check if item is related to a diagnostics + if {'references', 'script_file'} <= set(attributes): + product_tags.append(attributes['references']) + # check if item is related to a recipe + if {'references', 'recipe'} <= set(attributes): + product_tags.append(attributes['references']) + # check if item is not related to a diagnostics or recipe + if not attributes.keys() & {'recipe', 'script_file'} and attributes.keys() & {'references'}: + if attributes['references'] != ESMVALTOOL_PAPER_TAG: + product_refs.append(attributes['references']) + + # collect cmip6 info from provenance + for item in provenance.records: + attributes = {} + for key, value in item.attributes: + if key.namespace.prefix == 'attribute': + if key.localpart == 'mip_era' and value == 'CMIP6': url_prefix = _make_url_prefix(item.attributes) info_urls.append(_make_info_url(url_prefix)) json_urls.append(_make_json_url(url_prefix)) - _save_citation_info(product_name, product_tags, json_urls, info_urls) + _save_citation_info(product_name, product_tags, product_refs, json_urls, info_urls) -def _save_citation_info(product_name, product_tags, json_urls, info_urls): + +def _save_citation_info(product_name, product_tags, product_refs, json_urls, info_urls): citation_entries = [ESMVALTOOL_PAPER] - citation_urls = '' # save CMIP6 url_info, if any if info_urls: - for info_url in info_urls: - citation_urls += '{}\n'.format(info_url) - with open(f'{product_name}_data_citation_url.txt', 'w') as file: - file.write(citation_urls) + with open(f'{product_name}_data_citation_info.txt', 'w') as file: + file.write('\n'.join(list(set(info_urls)))) + + # save any refrences info that is not related to recipe or diagnostics + if product_refs: + with open(f'{product_name}_data_citation_info.txt', 'w') as file: + file.write('\n'.join(list(set(product_refs)))) # convert json_urls to bibtex entries - if json_urls: - for json_url in json_urls: - citation_entries.append(_collect_cmip_citation(json_url)) + for json_url in json_urls: + citation_entries.append(_collect_cmip_citation(json_url)) # convert tags to bibtex entries if REFERENCES_PATH: From 0b96874f9068d5d95312c52a7f80fe094c6577f3 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Fri, 6 Mar 2020 17:24:21 +0100 Subject: [PATCH 063/105] fix test for new codes in citation.py --- tests/integration/test_citation.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tests/integration/test_citation.py b/tests/integration/test_citation.py index 36d11af22a..eb6b64296f 100644 --- a/tests/integration/test_citation.py +++ b/tests/integration/test_citation.py @@ -15,7 +15,10 @@ def test_references(tmp_path, monkeypatch): provenance.add_namespace('attribute', uri=ESMVALTOOL_URI_PREFIX + 'attribute') filename = str(tmp_path / 'output.nc') - attributes = {'attribute:references': 'test_tag'} + attributes = { + 'attribute:references': 'test_tag', + 'attribute:script_file': 'diagnostics.py' + } provenance.entity('file:' + filename, attributes) # Create fake bibtex references tag file @@ -104,9 +107,9 @@ def test_cmip6_data_citation_url(tmp_path): filename = str(tmp_path / 'output.nc') provenance.entity('file:' + filename, attributes) _write_citation_file(filename, provenance) - citation_url = tmp_path / 'output_data_citation_url.txt' + citation_url = tmp_path / 'output_data_citation_info.txt' # Create fake info url fake_url_prefix = '.'.join(attributes.values()) fake_info_url = f'{CMIP6_URL_STEM}/cmip6?input=CMIP6.{fake_url_prefix}' - assert citation_url.read_text() == '{}\n'.format(fake_info_url) + assert citation_url.read_text() == fake_info_url From c452ba6025ccc5f77b68eb983ca60c9f9a932bc1 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Fri, 6 Mar 2020 17:25:00 +0100 Subject: [PATCH 064/105] fix newlines in entries --- esmvalcore/_citation.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/esmvalcore/_citation.py b/esmvalcore/_citation.py index 425e264b9b..fd2392b0a7 100644 --- a/esmvalcore/_citation.py +++ b/esmvalcore/_citation.py @@ -35,7 +35,7 @@ 'title = {{ESMValTool} v2.0 ' '{\\&}amp$\\mathsemicolon${\\#}8211$\\mathsemicolon$ ' 'Technical overview}\n' - '}\n' + '}' ) @@ -74,7 +74,6 @@ def _write_citation_file(filename, provenance): # collect cmip6 info from provenance for item in provenance.records: - attributes = {} for key, value in item.attributes: if key.namespace.prefix == 'attribute': if key.localpart == 'mip_era' and value == 'CMIP6': @@ -100,7 +99,9 @@ def _save_citation_info(product_name, product_tags, product_refs, json_urls, inf # convert json_urls to bibtex entries for json_url in json_urls: - citation_entries.append(_collect_cmip_citation(json_url)) + cmip_citation = _collect_cmip_citation(json_url) + if cmip_citation: + citation_entries.append(cmip_citation) # convert tags to bibtex entries if REFERENCES_PATH: @@ -194,7 +195,7 @@ def _collect_cmip_citation(json_url): bibtex_entry = _json_to_bibtex(json_data) else: logger.info('Invalid json link %s', json_url) - bibtex_entry = 'Invalid json link' + bibtex_entry = False return bibtex_entry From dfe6e1234781ff27fc610802dafaf256d8851684 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Fri, 6 Mar 2020 17:41:42 +0100 Subject: [PATCH 065/105] style --- esmvalcore/_citation.py | 32 ++++++++++++-------------------- 1 file changed, 12 insertions(+), 20 deletions(-) diff --git a/esmvalcore/_citation.py b/esmvalcore/_citation.py index fd2392b0a7..bf31a2ed9e 100644 --- a/esmvalcore/_citation.py +++ b/esmvalcore/_citation.py @@ -53,14 +53,18 @@ def _write_citation_file(filename, provenance): info_urls = [] json_urls = [] product_tags = [] - product_refs = [] # collect references from provenance for item in provenance.records: attributes = {} for key, value in item.attributes: if key.namespace.prefix == 'attribute': attributes[key.localpart] = value - attributes[item.identifier.namespace.prefix] = item.identifier.namespace.prefix + identifier = item.identifier.namespace.prefix + attributes[identifier] = identifier + if key.localpart == 'mip_era' and value == 'CMIP6': + url_prefix = _make_url_prefix(item.attributes) + info_urls.append(_make_info_url(url_prefix)) + json_urls.append(_make_json_url(url_prefix)) # check if item is related to a diagnostics if {'references', 'script_file'} <= set(attributes): product_tags.append(attributes['references']) @@ -68,35 +72,23 @@ def _write_citation_file(filename, provenance): if {'references', 'recipe'} <= set(attributes): product_tags.append(attributes['references']) # check if item is not related to a diagnostics or recipe - if not attributes.keys() & {'recipe', 'script_file'} and attributes.keys() & {'references'}: + if (not attributes.keys() & {'recipe', 'script_file'} and + attributes.keys() & {'references'}): if attributes['references'] != ESMVALTOOL_PAPER_TAG: - product_refs.append(attributes['references']) - - # collect cmip6 info from provenance - for item in provenance.records: - for key, value in item.attributes: - if key.namespace.prefix == 'attribute': - if key.localpart == 'mip_era' and value == 'CMIP6': - url_prefix = _make_url_prefix(item.attributes) - info_urls.append(_make_info_url(url_prefix)) - json_urls.append(_make_json_url(url_prefix)) + info_urls.append(attributes['references']) - _save_citation_info(product_name, product_tags, product_refs, json_urls, info_urls) + _save_citation_info(product_name, product_tags, json_urls, info_urls) -def _save_citation_info(product_name, product_tags, product_refs, json_urls, info_urls): +def _save_citation_info(product_name, product_tags, json_urls, info_urls): citation_entries = [ESMVALTOOL_PAPER] # save CMIP6 url_info, if any + # save any refrences info that is not related to recipe or diagnostics if info_urls: with open(f'{product_name}_data_citation_info.txt', 'w') as file: file.write('\n'.join(list(set(info_urls)))) - # save any refrences info that is not related to recipe or diagnostics - if product_refs: - with open(f'{product_name}_data_citation_info.txt', 'w') as file: - file.write('\n'.join(list(set(product_refs)))) - # convert json_urls to bibtex entries for json_url in json_urls: cmip_citation = _collect_cmip_citation(json_url) From 492c26d0cb8cacd7e8fe2e10d42a2e3e95654c14 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Mon, 9 Mar 2020 11:52:34 +0100 Subject: [PATCH 066/105] refactor --- esmvalcore/_citation.py | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/esmvalcore/_citation.py b/esmvalcore/_citation.py index bf31a2ed9e..5f58347606 100644 --- a/esmvalcore/_citation.py +++ b/esmvalcore/_citation.py @@ -35,7 +35,7 @@ 'title = {{ESMValTool} v2.0 ' '{\\&}amp$\\mathsemicolon${\\#}8211$\\mathsemicolon$ ' 'Technical overview}\n' - '}' + '}\n' ) @@ -53,6 +53,7 @@ def _write_citation_file(filename, provenance): info_urls = [] json_urls = [] product_tags = [] + section = 'references' # collect references from provenance for item in provenance.records: attributes = {} @@ -65,17 +66,14 @@ def _write_citation_file(filename, provenance): url_prefix = _make_url_prefix(item.attributes) info_urls.append(_make_info_url(url_prefix)) json_urls.append(_make_json_url(url_prefix)) - # check if item is related to a diagnostics - if {'references', 'script_file'} <= set(attributes): - product_tags.append(attributes['references']) - # check if item is related to a recipe - if {'references', 'recipe'} <= set(attributes): - product_tags.append(attributes['references']) - # check if item is not related to a diagnostics or recipe - if (not attributes.keys() & {'recipe', 'script_file'} and - attributes.keys() & {'references'}): - if attributes['references'] != ESMVALTOOL_PAPER_TAG: - info_urls.append(attributes['references']) + if section in attributes.keys(): + # check if reference is related to a diagnostics or a recipe + if attributes.keys() & {'script_file', 'recipe'}: + product_tags.append(attributes[section]) + # check if reference is not related to a diagnostics or a recipe + if (not attributes.keys() & {'recipe', 'script_file'} and + attributes[section] != ESMVALTOOL_PAPER_TAG): + info_urls.append(attributes[section]) _save_citation_info(product_name, product_tags, json_urls, info_urls) From 433296359fac68c00905948e7c940359df51f610 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Mon, 9 Mar 2020 18:01:22 +0100 Subject: [PATCH 067/105] add a function to convert bibtex to reference entry --- esmvalcore/_citation.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/esmvalcore/_citation.py b/esmvalcore/_citation.py index 5f58347606..a0a38742ef 100644 --- a/esmvalcore/_citation.py +++ b/esmvalcore/_citation.py @@ -216,3 +216,29 @@ def _make_info_url(url_prefix): """Make info url based on CMIP6 Data Citation Service.""" info_url = f'{CMIP6_URL_STEM}/cmip6?input=CMIP6.{url_prefix}' return info_url + + +def cite_tag_value(tag): + """Convert a tag to bibtex entry.""" + reference_entry = [] + fields = ['title', 'publisher', 'authors', 'journal', 'doi'] + pattern = r'.*?\{(.*)\}.*' + + if REFERENCES_PATH: + entry = _collect_bibtex_citation(tag).split(',') + for item in entry: + if 'authors' in item: + authors_name = re.search(pattern, item).group(1).split('and') + if authors_name[0] == authors_name[-1]: + reference_entry.append(authors_name) + else: + reference_entry.append([f'{authors_name[0]}, et al.']) + if 'year' in item: + year = item.split('year =')[-1] + reference_entry.append(year) + for field in fields: + reference_entry = [ + re.search(pattern, item).group(1) for item in entry if field in item + ] + print(','.join(reference_entry)) + return ','.join(reference_entry) From 38a1a18f10f3e347d35e0c1437ba6ab0fd9aa841 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Tue, 10 Mar 2020 11:03:58 +0100 Subject: [PATCH 068/105] fix the function cite_tag_value --- esmvalcore/_citation.py | 29 ++++++----------------------- 1 file changed, 6 insertions(+), 23 deletions(-) diff --git a/esmvalcore/_citation.py b/esmvalcore/_citation.py index a0a38742ef..b6027dfe09 100644 --- a/esmvalcore/_citation.py +++ b/esmvalcore/_citation.py @@ -218,27 +218,10 @@ def _make_info_url(url_prefix): return info_url -def cite_tag_value(tag): - """Convert a tag to bibtex entry.""" - reference_entry = [] - fields = ['title', 'publisher', 'authors', 'journal', 'doi'] - pattern = r'.*?\{(.*)\}.*' - +def cite_tag_value(tags): + """Convert tags to bibtex entries.""" + reference_entries = '' if REFERENCES_PATH: - entry = _collect_bibtex_citation(tag).split(',') - for item in entry: - if 'authors' in item: - authors_name = re.search(pattern, item).group(1).split('and') - if authors_name[0] == authors_name[-1]: - reference_entry.append(authors_name) - else: - reference_entry.append([f'{authors_name[0]}, et al.']) - if 'year' in item: - year = item.split('year =')[-1] - reference_entry.append(year) - for field in fields: - reference_entry = [ - re.search(pattern, item).group(1) for item in entry if field in item - ] - print(','.join(reference_entry)) - return ','.join(reference_entry) + reference_entries = [_collect_bibtex_citation(tag) for tag in [tags]] + reference_entries = '\n'.join(reference_entries) + return reference_entries From 7a061cd754230a1aaacd27dd4a4b76c5df41d072 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Tue, 10 Mar 2020 11:57:14 +0100 Subject: [PATCH 069/105] remove the unnecessary condition for TAGS --- esmvalcore/_recipe.py | 2 +- esmvalcore/_task.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index dcb65704f7..7f20c5ace1 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -949,7 +949,7 @@ def _initalize_provenance(self, raw_documentation): """Initialize the recipe provenance.""" doc = deepcopy(raw_documentation) for key in doc: - if key in TAGS and key not in 'references': + if key in TAGS: doc[key] = replace_tags(key, doc[key]) return get_recipe_provenance(doc, self._filename) diff --git a/esmvalcore/_task.py b/esmvalcore/_task.py index a73436d4a8..42bc44c73a 100644 --- a/esmvalcore/_task.py +++ b/esmvalcore/_task.py @@ -572,7 +572,7 @@ def _collect_provenance(self): attributes.update(deepcopy(attrs)) for key in attributes: - if key in TAGS and key not in 'references': + if key in TAGS: attributes[key] = replace_tags(key, attributes[key]) product = TrackedFile(filename, attributes, ancestors) From f30a0d62295d71622f13a6370f93700498a73519 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Tue, 10 Mar 2020 14:57:10 +0100 Subject: [PATCH 070/105] add tests to check if references have been added --- tests/integration/test_recipe.py | 31 +++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/tests/integration/test_recipe.py b/tests/integration/test_recipe.py index 9bda6e29f5..14507738ad 100644 --- a/tests/integration/test_recipe.py +++ b/tests/integration/test_recipe.py @@ -14,6 +14,7 @@ from esmvalcore._task import DiagnosticTask from esmvalcore.preprocessor import DEFAULT_ORDER, PreprocessingTask from esmvalcore.preprocessor._io import concatenate_callback +from esmvalcore._citation import REFERENCES_PATH, _clean_tags from .test_diagnostic_run import write_config_user_file from .test_provenance import check_provenance @@ -1260,6 +1261,9 @@ def test_diagnostic_task_provenance( assert product.attributes[key] == tuple(TAGS[key][k] for k in record[key]) + # Check that diagnostic reference files have been added + _test_bibtex_files(product.attributes['references']) + # Check that recipe diagnostic tags have been added src = yaml.safe_load(DEFAULT_DOCUMENTATION + content) for key in ('realms', 'themes'): @@ -1269,12 +1273,16 @@ def test_diagnostic_task_provenance( # Check that recipe tags have been added recipe_record = product.provenance.get_record('recipe:recipe_test.yml') assert len(recipe_record) == 1 - for key in ('description', 'references'): - value = src['documentation'][key] - if key == 'references': - value = ', '.join(src['documentation'][key]) - assert recipe_record[0].get_attribute('attribute:' + - key).pop() == value + key = 'description' + value = src['documentation'][key] + assert recipe_record[0].get_attribute('attribute:' + + key).pop() == value + + # Check that recipe reference files have been added + key = 'references' + recipe_tags = recipe_record[0].get_attribute('attribute:' + + key).pop() + _test_bibtex_files(recipe_tags) # Test that provenance was saved to netcdf, xml and svg plot cube = iris.load(product.filename)[0] @@ -1284,6 +1292,17 @@ def test_diagnostic_task_provenance( assert os.path.exists(prefix + '.svg') +def _test_bibtex_files(product_tags): + """check bibtex files exit in REFERENCES_PATH.""" + if REFERENCES_PATH: + tags = list(set(_clean_tags(product_tags))) + for tag in tags: + bibtex_file = REFERENCES_PATH / f'{tag}.bibtex' + if not bibtex_file.is_file(): + raise ValueError( + 'The reference file {} does not exist.'.format(bibtex_file) + ) + def test_alias_generation(tmp_path, patched_datafinder, config_user): content = dedent(""" From a3c7e420c6d386f611d20887883090de758101f0 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Tue, 10 Mar 2020 15:29:53 +0100 Subject: [PATCH 071/105] refactor --- esmvalcore/_citation.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/esmvalcore/_citation.py b/esmvalcore/_citation.py index b6027dfe09..71640f31b3 100644 --- a/esmvalcore/_citation.py +++ b/esmvalcore/_citation.py @@ -94,13 +94,11 @@ def _save_citation_info(product_name, product_tags, json_urls, info_urls): citation_entries.append(cmip_citation) # convert tags to bibtex entries - if REFERENCES_PATH: - if product_tags: - # make tags clean and unique - tags = list(set(_clean_tags(product_tags))) - for tag in tags: - if tag not in ESMVALTOOL_PAPER_TAG: - citation_entries.append(_collect_bibtex_citation(tag)) + if REFERENCES_PATH and product_tags: + # make tags clean and unique + tags = list(set(_clean_tags(product_tags))) + for tag in tags: + citation_entries.append(_collect_bibtex_citation(tag)) with open(f'{product_name}_citation.bibtex', 'w') as file: file.write('\n'.join(citation_entries)) @@ -172,9 +170,10 @@ def _collect_bibtex_citation(tag): if bibtex_file.is_file(): entry = bibtex_file.read_text() else: - raise ValueError( - 'The reference file {} does not exist.'.format(bibtex_file) + logger.info( + 'The reference file %s does not exist.', bibtex_file ) + entry = '' return entry From 1d81db719b0bea8b9f6792d6713191d6bc2b3478 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Tue, 10 Mar 2020 17:59:06 +0100 Subject: [PATCH 072/105] refactor --- esmvalcore/_citation.py | 44 ++++++++++++++++++++++------------------- 1 file changed, 24 insertions(+), 20 deletions(-) diff --git a/esmvalcore/_citation.py b/esmvalcore/_citation.py index 71640f31b3..e83d26065c 100644 --- a/esmvalcore/_citation.py +++ b/esmvalcore/_citation.py @@ -53,27 +53,31 @@ def _write_citation_file(filename, provenance): info_urls = [] json_urls = [] product_tags = [] - section = 'references' - # collect references from provenance for item in provenance.records: - attributes = {} - for key, value in item.attributes: - if key.namespace.prefix == 'attribute': - attributes[key.localpart] = value - identifier = item.identifier.namespace.prefix - attributes[identifier] = identifier - if key.localpart == 'mip_era' and value == 'CMIP6': - url_prefix = _make_url_prefix(item.attributes) - info_urls.append(_make_info_url(url_prefix)) - json_urls.append(_make_json_url(url_prefix)) - if section in attributes.keys(): - # check if reference is related to a diagnostics or a recipe - if attributes.keys() & {'script_file', 'recipe'}: - product_tags.append(attributes[section]) - # check if reference is not related to a diagnostics or a recipe - if (not attributes.keys() & {'recipe', 'script_file'} and - attributes[section] != ESMVALTOOL_PAPER_TAG): - info_urls.append(attributes[section]) + # get cmip6 citation info + value = item.get_attribute('attribute:' + 'mip_era') + if 'CMIP6' in list(value): + url_prefix = _make_url_prefix(item.attributes) + info_urls.append(_make_info_url(url_prefix)) + json_urls.append(_make_json_url(url_prefix)) + # get diagnostics citation tags + if item.get_attribute('attribute:' + 'script_file'): + product_tags.append( + item.get_attribute('attribute:' + 'references').pop() + ) + # get recipe citation tags + if item.get_attribute('attribute:' + 'references'): + if item.identifier.namespace.prefix == 'recipe': + product_tags.append( + item.get_attribute('attribute:' + 'references').pop() + ) + # get other references information recorded by provenance + tags = list(set(_clean_tags(product_tags + [ESMVALTOOL_PAPER_TAG]))) + for item in provenance.records: + if item.get_attribute('attribute:' + 'references'): + value = item.get_attribute('attribute:' + 'references').pop() + if value not in tags: + info_urls.append(value) _save_citation_info(product_name, product_tags, json_urls, info_urls) From 504a17fb0b9e7a75cd4e66fc56e888c71f48fe11 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Wed, 11 Mar 2020 10:15:27 +0100 Subject: [PATCH 073/105] fix broken test --- esmvalcore/_citation.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/esmvalcore/_citation.py b/esmvalcore/_citation.py index e83d26065c..57d13c9d87 100644 --- a/esmvalcore/_citation.py +++ b/esmvalcore/_citation.py @@ -60,17 +60,18 @@ def _write_citation_file(filename, provenance): url_prefix = _make_url_prefix(item.attributes) info_urls.append(_make_info_url(url_prefix)) json_urls.append(_make_json_url(url_prefix)) - # get diagnostics citation tags - if item.get_attribute('attribute:' + 'script_file'): - product_tags.append( - item.get_attribute('attribute:' + 'references').pop() - ) - # get recipe citation tags if item.get_attribute('attribute:' + 'references'): + # get recipe citation tags if item.identifier.namespace.prefix == 'recipe': product_tags.append( item.get_attribute('attribute:' + 'references').pop() ) + # get diagnostics citation tags + if item.get_attribute('attribute:' + 'script_file'): + product_tags.append( + item.get_attribute('attribute:' + 'references').pop() + ) + # get other references information recorded by provenance tags = list(set(_clean_tags(product_tags + [ESMVALTOOL_PAPER_TAG]))) for item in provenance.records: From 857832bd9b8e94616b90a820ce10a89c367384c8 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Wed, 11 Mar 2020 15:49:25 +0100 Subject: [PATCH 074/105] fix the test for tags in test_recipe --- tests/integration/test_recipe.py | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/tests/integration/test_recipe.py b/tests/integration/test_recipe.py index 14507738ad..106dc106d6 100644 --- a/tests/integration/test_recipe.py +++ b/tests/integration/test_recipe.py @@ -1273,16 +1273,13 @@ def test_diagnostic_task_provenance( # Check that recipe tags have been added recipe_record = product.provenance.get_record('recipe:recipe_test.yml') assert len(recipe_record) == 1 - key = 'description' - value = src['documentation'][key] - assert recipe_record[0].get_attribute('attribute:' + - key).pop() == value - - # Check that recipe reference files have been added - key = 'references' - recipe_tags = recipe_record[0].get_attribute('attribute:' + - key).pop() - _test_bibtex_files(recipe_tags) + for key in ('description', 'references'): + value = src['documentation'][key] + if key == 'references': + value = ','.join(src['documentation'][key]) + _test_bibtex_files(value) + assert recipe_record[0].get_attribute('attribute:' + + key).pop() == value # Test that provenance was saved to netcdf, xml and svg plot cube = iris.load(product.filename)[0] From 0997805b3960ee989f36e8547593f89070552ad4 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Wed, 11 Mar 2020 21:48:21 +0100 Subject: [PATCH 075/105] add a space after , for joining tags --- tests/integration/test_recipe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_recipe.py b/tests/integration/test_recipe.py index 106dc106d6..fab899c586 100644 --- a/tests/integration/test_recipe.py +++ b/tests/integration/test_recipe.py @@ -1276,7 +1276,7 @@ def test_diagnostic_task_provenance( for key in ('description', 'references'): value = src['documentation'][key] if key == 'references': - value = ','.join(src['documentation'][key]) + value = ', '.join(src['documentation'][key]) _test_bibtex_files(value) assert recipe_record[0].get_attribute('attribute:' + key).pop() == value From 16043f12e51abf172848f86d5ccdabba22a90ebc Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Wed, 11 Mar 2020 21:49:28 +0100 Subject: [PATCH 076/105] remove pop() and refactor --- esmvalcore/_citation.py | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/esmvalcore/_citation.py b/esmvalcore/_citation.py index 57d13c9d87..38da8f5868 100644 --- a/esmvalcore/_citation.py +++ b/esmvalcore/_citation.py @@ -54,31 +54,29 @@ def _write_citation_file(filename, provenance): json_urls = [] product_tags = [] for item in provenance.records: + reference_attr = item.get_attribute('attribute:' + 'references') # get cmip6 citation info value = item.get_attribute('attribute:' + 'mip_era') if 'CMIP6' in list(value): url_prefix = _make_url_prefix(item.attributes) info_urls.append(_make_info_url(url_prefix)) json_urls.append(_make_json_url(url_prefix)) - if item.get_attribute('attribute:' + 'references'): + if reference_attr: # get recipe citation tags if item.identifier.namespace.prefix == 'recipe': - product_tags.append( - item.get_attribute('attribute:' + 'references').pop() - ) + product_tags += list(reference_attr) # get diagnostics citation tags if item.get_attribute('attribute:' + 'script_file'): - product_tags.append( - item.get_attribute('attribute:' + 'references').pop() - ) + product_tags += list(reference_attr) # get other references information recorded by provenance - tags = list(set(_clean_tags(product_tags + [ESMVALTOOL_PAPER_TAG]))) + tags = set(_clean_tags(product_tags + [ESMVALTOOL_PAPER_TAG])) for item in provenance.records: - if item.get_attribute('attribute:' + 'references'): - value = item.get_attribute('attribute:' + 'references').pop() - if value not in tags: - info_urls.append(value) + reference_attr = item.get_attribute('attribute:' + 'references') + if reference_attr: + value = set(_clean_tags(reference_attr)) + if not value.issubset(tags): + info_urls += list(reference_attr) _save_citation_info(product_name, product_tags, json_urls, info_urls) From 0ee0047f125433df3642cc00c4282390165ad398 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Thu, 12 Mar 2020 10:50:34 +0100 Subject: [PATCH 077/105] fix flake8 error --- tests/integration/test_recipe.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/integration/test_recipe.py b/tests/integration/test_recipe.py index fab899c586..2d3276fe1a 100644 --- a/tests/integration/test_recipe.py +++ b/tests/integration/test_recipe.py @@ -1300,6 +1300,7 @@ def _test_bibtex_files(product_tags): 'The reference file {} does not exist.'.format(bibtex_file) ) + def test_alias_generation(tmp_path, patched_datafinder, config_user): content = dedent(""" From b0a2372bd415b196d0c444c535e1ae6f22e286c6 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Fri, 20 Mar 2020 16:29:43 +0100 Subject: [PATCH 078/105] remove cite_tag_value --- esmvalcore/_citation.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/esmvalcore/_citation.py b/esmvalcore/_citation.py index 38da8f5868..a3ca74c614 100644 --- a/esmvalcore/_citation.py +++ b/esmvalcore/_citation.py @@ -218,12 +218,3 @@ def _make_info_url(url_prefix): """Make info url based on CMIP6 Data Citation Service.""" info_url = f'{CMIP6_URL_STEM}/cmip6?input=CMIP6.{url_prefix}' return info_url - - -def cite_tag_value(tags): - """Convert tags to bibtex entries.""" - reference_entries = '' - if REFERENCES_PATH: - reference_entries = [_collect_bibtex_citation(tag) for tag in [tags]] - reference_entries = '\n'.join(reference_entries) - return reference_entries From 1d64c43bb3b1bf8bbadba34d0d78b9703dbfe990 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Fri, 20 Mar 2020 16:33:33 +0100 Subject: [PATCH 079/105] move \t to begning of the line, remove + from get attribute --- esmvalcore/_citation.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/esmvalcore/_citation.py b/esmvalcore/_citation.py index a3ca74c614..fbe0c4c896 100644 --- a/esmvalcore/_citation.py +++ b/esmvalcore/_citation.py @@ -19,20 +19,20 @@ # it is the technical overview and should always be cited ESMVALTOOL_PAPER_TAG = 'righi19gmdd' ESMVALTOOL_PAPER = ( - '@article{righi19gmdd,\n\t' - 'doi = {10.5194/gmd-2019-226},\n\t' - 'url = {https://doi.org/10.5194%2Fgmd-2019-226},\n\t' - 'year = 2019,\n\t' - 'month = {sep},\n\t' - 'publisher = {Copernicus {GmbH}},\n\t' - 'author = {Mattia Righi and Bouwe Andela and Veronika Eyring ' + '@article{righi19gmdd,\n' + '\tdoi = {10.5194/gmd-2019-226},\n' + '\turl = {https://doi.org/10.5194%2Fgmd-2019-226},\n' + '\tyear = 2019,\n' + '\tmonth = {sep},\n' + '\tpublisher = {Copernicus {GmbH}},\n' + '\tauthor = {Mattia Righi and Bouwe Andela and Veronika Eyring ' 'and Axel Lauer and Valeriu Predoi and Manuel Schlund ' 'and Javier Vegas-Regidor and Lisa Bock and Björn Brötz ' 'and Lee de Mora and Faruk Diblen and Laura Dreyer ' 'and Niels Drost and Paul Earnshaw and Birgit Hassler ' 'and Nikolay Koldunov and Bill Little and Saskia Loosveldt Tomas ' - 'and Klaus Zimmermann},\n\t' - 'title = {{ESMValTool} v2.0 ' + 'and Klaus Zimmermann},\n' + '\ttitle = {{ESMValTool} v2.0 ' '{\\&}amp$\\mathsemicolon${\\#}8211$\\mathsemicolon$ ' 'Technical overview}\n' '}\n' @@ -54,9 +54,9 @@ def _write_citation_file(filename, provenance): json_urls = [] product_tags = [] for item in provenance.records: - reference_attr = item.get_attribute('attribute:' + 'references') + reference_attr = item.get_attribute('attribute:references') # get cmip6 citation info - value = item.get_attribute('attribute:' + 'mip_era') + value = item.get_attribute('attribute:mip_era') if 'CMIP6' in list(value): url_prefix = _make_url_prefix(item.attributes) info_urls.append(_make_info_url(url_prefix)) @@ -66,13 +66,13 @@ def _write_citation_file(filename, provenance): if item.identifier.namespace.prefix == 'recipe': product_tags += list(reference_attr) # get diagnostics citation tags - if item.get_attribute('attribute:' + 'script_file'): + if item.get_attribute('attribute:script_file'): product_tags += list(reference_attr) # get other references information recorded by provenance tags = set(_clean_tags(product_tags + [ESMVALTOOL_PAPER_TAG])) for item in provenance.records: - reference_attr = item.get_attribute('attribute:' + 'references') + reference_attr = item.get_attribute('attribute:references') if reference_attr: value = set(_clean_tags(reference_attr)) if not value.issubset(tags): From 7bb95a8a3247a503ac618564951e7c4635c0acdb Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Fri, 20 Mar 2020 17:21:25 +0100 Subject: [PATCH 080/105] refactor write_citation_file function --- esmvalcore/_citation.py | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/esmvalcore/_citation.py b/esmvalcore/_citation.py index fbe0c4c896..acefb9790e 100644 --- a/esmvalcore/_citation.py +++ b/esmvalcore/_citation.py @@ -57,25 +57,19 @@ def _write_citation_file(filename, provenance): reference_attr = item.get_attribute('attribute:references') # get cmip6 citation info value = item.get_attribute('attribute:mip_era') - if 'CMIP6' in list(value): + if 'CMIP6' in value: url_prefix = _make_url_prefix(item.attributes) info_urls.append(_make_info_url(url_prefix)) json_urls.append(_make_json_url(url_prefix)) if reference_attr: # get recipe citation tags if item.identifier.namespace.prefix == 'recipe': - product_tags += list(reference_attr) + product_tags.extend(reference_attr) # get diagnostics citation tags - if item.get_attribute('attribute:script_file'): - product_tags += list(reference_attr) - - # get other references information recorded by provenance - tags = set(_clean_tags(product_tags + [ESMVALTOOL_PAPER_TAG])) - for item in provenance.records: - reference_attr = item.get_attribute('attribute:references') - if reference_attr: - value = set(_clean_tags(reference_attr)) - if not value.issubset(tags): + elif item.get_attribute('attribute:script_file'): + print(reference_attr) + product_tags.extend(reference_attr) + elif ESMVALTOOL_PAPER_TAG not in reference_attr: info_urls += list(reference_attr) _save_citation_info(product_name, product_tags, json_urls, info_urls) From d57c984e79faf9d69e3e316cd00dd00a60e3e193 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Fri, 20 Mar 2020 17:29:26 +0100 Subject: [PATCH 081/105] refactor clean_tag function, fix the logger --- esmvalcore/_citation.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/esmvalcore/_citation.py b/esmvalcore/_citation.py index acefb9790e..05e1208825 100644 --- a/esmvalcore/_citation.py +++ b/esmvalcore/_citation.py @@ -67,7 +67,6 @@ def _write_citation_file(filename, provenance): product_tags.extend(reference_attr) # get diagnostics citation tags elif item.get_attribute('attribute:script_file'): - print(reference_attr) product_tags.extend(reference_attr) elif ESMVALTOOL_PAPER_TAG not in reference_attr: info_urls += list(reference_attr) @@ -93,7 +92,7 @@ def _save_citation_info(product_name, product_tags, json_urls, info_urls): # convert tags to bibtex entries if REFERENCES_PATH and product_tags: # make tags clean and unique - tags = list(set(_clean_tags(product_tags))) + tags = _clean_tags(product_tags) for tag in tags: citation_entries.append(_collect_bibtex_citation(tag)) @@ -104,7 +103,7 @@ def _save_citation_info(product_name, product_tags, json_urls, info_urls): def _clean_tags(tags): """Clean the tags that are recorded as str by provenance.""" pattern = re.compile(r'\w+') - return pattern.findall(str(tags)) + return list(set(pattern.findall(str(tags)))) def _get_response(url): @@ -116,9 +115,9 @@ def _get_response(url): if response.status_code == 200: json_data = response.json() else: - logger.info('Error in the CMIP json link: %s', url) + logger.warning('Error in the CMIP6 citation link: %s', url) except IOError: - logger.info('Error in receiving the CMIP json file') + logger.info('No network connection, unable to retrieve CMIP6 citation information') return json_data From 035a4422750aa022d06d53606b2b1dd3b173e9db Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Fri, 20 Mar 2020 17:40:19 +0100 Subject: [PATCH 082/105] fix minor things --- esmvalcore/_citation.py | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/esmvalcore/_citation.py b/esmvalcore/_citation.py index 05e1208825..9d509ac7e4 100644 --- a/esmvalcore/_citation.py +++ b/esmvalcore/_citation.py @@ -147,15 +147,17 @@ def _json_to_bibtex(data): doi = data.get('identifier').get('id', 'doi not found') url = f'https://doi.org/{doi}' - bibtex_entry = ( - f'{"@misc{"}{url},\n\t' - f'url = {{{url}}},\n\t' - f'title = {{{title}}},\n\t' - f'publisher = {{{publisher}}},\n\t' - f'year = {year},\n\t' - f'author = {{{authors}}},\n\t' - f'doi = {{{doi}}},\n' - f'{"}"}\n' + bibtex_entry = textwrap.dedent( + f""" + @misc{{{url} + \turl = {{{url}}}, + \ttitle = {{{title}}}, + \tpublisher = {{{publisher}}}, + \tyear = {year}, + \tauthor = {{{authors}}}, + \tdoi = {{{doi}}}, + }} + """.lstrip() ) return bibtex_entry @@ -166,7 +168,7 @@ def _collect_bibtex_citation(tag): if bibtex_file.is_file(): entry = bibtex_file.read_text() else: - logger.info( + logger.warning( 'The reference file %s does not exist.', bibtex_file ) entry = '' @@ -179,8 +181,7 @@ def _collect_cmip_citation(json_url): if json_data: bibtex_entry = _json_to_bibtex(json_data) else: - logger.info('Invalid json link %s', json_url) - bibtex_entry = False + bibtex_entry = '' return bibtex_entry From f267a02a35df0c17db389511a0a3d575cda671a3 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Fri, 20 Mar 2020 17:40:43 +0100 Subject: [PATCH 083/105] style --- esmvalcore/_provenance.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/esmvalcore/_provenance.py b/esmvalcore/_provenance.py index 9ab3c134c6..e8f1a6521e 100644 --- a/esmvalcore/_provenance.py +++ b/esmvalcore/_provenance.py @@ -36,8 +36,7 @@ def get_esmvaltool_provenance(): create_namespace(provenance, namespace) # TODO: add dependencies with versions here - attributes_value = ESMVALTOOL_PAPER_TAG - attributes = {'attribute:references': attributes_value} + attributes = {'attribute:references': ESMVALTOOL_PAPER_TAG} activity = provenance.activity( namespace + ':esmvaltool==' + __version__, other_attributes=attributes) From 07c04baaa1911246deb435dea955fcc203329904 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Mon, 23 Mar 2020 17:39:33 +0100 Subject: [PATCH 084/105] add import, refactor jason_to_bitex func --- esmvalcore/_citation.py | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/esmvalcore/_citation.py b/esmvalcore/_citation.py index 9d509ac7e4..91c560b8c1 100644 --- a/esmvalcore/_citation.py +++ b/esmvalcore/_citation.py @@ -3,6 +3,7 @@ import logging import re from pathlib import Path +import textwrap import requests from ._config import DIAGNOSTICS_PATH @@ -127,23 +128,17 @@ def _json_to_bibtex(data): title = data.get('titles', ['title not found'])[0] publisher = data.get('publisher', 'publisher not found') year = data.get('publicationYear', 'publicationYear not found') - authors = 'creators not found' doi = 'doi not found' - author_list = [] - if data.get('creators', False): + if data.get('creators', ''): author_list = [ item.get('creatorName', '') for item in data['creators'] ] - if author_list: - if author_list[0] == author_list[-1]: - authors = author_list[0] - if not authors: - authors = 'creatorName not found' - else: - authors = ' and '.join(author_list) - - if data.get('identifier', False): + authors = ' and '.join(author_list) + if not authors: + authors = 'creators not found' + + if data.get('identifier', ''): doi = data.get('identifier').get('id', 'doi not found') url = f'https://doi.org/{doi}' From 558e109c0d6ad051335bb8d5d35e3f8c30f14b4f Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Mon, 23 Mar 2020 17:42:24 +0100 Subject: [PATCH 085/105] move the test to esmvaltool repo --- tests/integration/test_recipe.py | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/tests/integration/test_recipe.py b/tests/integration/test_recipe.py index 2d3276fe1a..d42c3a9455 100644 --- a/tests/integration/test_recipe.py +++ b/tests/integration/test_recipe.py @@ -1261,9 +1261,6 @@ def test_diagnostic_task_provenance( assert product.attributes[key] == tuple(TAGS[key][k] for k in record[key]) - # Check that diagnostic reference files have been added - _test_bibtex_files(product.attributes['references']) - # Check that recipe diagnostic tags have been added src = yaml.safe_load(DEFAULT_DOCUMENTATION + content) for key in ('realms', 'themes'): @@ -1277,7 +1274,6 @@ def test_diagnostic_task_provenance( value = src['documentation'][key] if key == 'references': value = ', '.join(src['documentation'][key]) - _test_bibtex_files(value) assert recipe_record[0].get_attribute('attribute:' + key).pop() == value @@ -1289,18 +1285,6 @@ def test_diagnostic_task_provenance( assert os.path.exists(prefix + '.svg') -def _test_bibtex_files(product_tags): - """check bibtex files exit in REFERENCES_PATH.""" - if REFERENCES_PATH: - tags = list(set(_clean_tags(product_tags))) - for tag in tags: - bibtex_file = REFERENCES_PATH / f'{tag}.bibtex' - if not bibtex_file.is_file(): - raise ValueError( - 'The reference file {} does not exist.'.format(bibtex_file) - ) - - def test_alias_generation(tmp_path, patched_datafinder, config_user): content = dedent(""" From d60b53155b8d7d464361749b736a515a290d853c Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Mon, 23 Mar 2020 17:45:58 +0100 Subject: [PATCH 086/105] undo the changes --- esmvalcore/_provenance.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/esmvalcore/_provenance.py b/esmvalcore/_provenance.py index e8f1a6521e..dad826e1f0 100644 --- a/esmvalcore/_provenance.py +++ b/esmvalcore/_provenance.py @@ -32,11 +32,9 @@ def create_namespace(provenance, namespace): def get_esmvaltool_provenance(): """Create an esmvaltool run activity.""" provenance = ProvDocument() - for namespace in ('software', 'attribute'): - create_namespace(provenance, namespace) - - # TODO: add dependencies with versions here - attributes = {'attribute:references': ESMVALTOOL_PAPER_TAG} + namespace = 'software' + create_namespace(provenance, namespace) + attributes = {} # TODO: add dependencies with versions here activity = provenance.activity( namespace + ':esmvaltool==' + __version__, other_attributes=attributes) From 824143d048b9e36058d237738be229d21b99218d Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Mon, 23 Mar 2020 18:37:48 +0100 Subject: [PATCH 087/105] refactor --- esmvalcore/_citation.py | 34 +++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/esmvalcore/_citation.py b/esmvalcore/_citation.py index 91c560b8c1..85483f55aa 100644 --- a/esmvalcore/_citation.py +++ b/esmvalcore/_citation.py @@ -18,9 +18,9 @@ CMIP6_URL_STEM = 'https://cera-www.dkrz.de/WDCC/ui/cerasearch' # it is the technical overview and should always be cited -ESMVALTOOL_PAPER_TAG = 'righi19gmdd' +ESMVALTOOL_PAPER_TAG = 'righi19gmd' ESMVALTOOL_PAPER = ( - '@article{righi19gmdd,\n' + '@article{righi19gmd,\n' '\tdoi = {10.5194/gmd-2019-226},\n' '\turl = {https://doi.org/10.5194%2Fgmd-2019-226},\n' '\tyear = 2019,\n' @@ -80,9 +80,13 @@ def _save_citation_info(product_name, product_tags, json_urls, info_urls): # save CMIP6 url_info, if any # save any refrences info that is not related to recipe or diagnostics + title = [ + "Some citation information are found, " + "which are not mentioned in the recipe or diagnostic." + ] if info_urls: with open(f'{product_name}_data_citation_info.txt', 'w') as file: - file.write('\n'.join(list(set(info_urls)))) + file.write('\n'.join(title + list(set(info_urls)))) # convert json_urls to bibtex entries for json_url in json_urls: @@ -118,7 +122,10 @@ def _get_response(url): else: logger.warning('Error in the CMIP6 citation link: %s', url) except IOError: - logger.info('No network connection, unable to retrieve CMIP6 citation information') + logger.info( + 'No network connection,' + 'unable to retrieve CMIP6 citation information' + ) return json_data @@ -128,6 +135,7 @@ def _json_to_bibtex(data): title = data.get('titles', ['title not found'])[0] publisher = data.get('publisher', 'publisher not found') year = data.get('publicationYear', 'publicationYear not found') + authors = 'creators not found' doi = 'doi not found' if data.get('creators', ''): @@ -135,8 +143,8 @@ def _json_to_bibtex(data): item.get('creatorName', '') for item in data['creators'] ] authors = ' and '.join(author_list) - if not authors: - authors = 'creators not found' + if not authors: + authors = 'creators not found' if data.get('identifier', ''): doi = data.get('identifier').get('id', 'doi not found') @@ -144,13 +152,13 @@ def _json_to_bibtex(data): bibtex_entry = textwrap.dedent( f""" - @misc{{{url} - \turl = {{{url}}}, - \ttitle = {{{title}}}, - \tpublisher = {{{publisher}}}, - \tyear = {year}, - \tauthor = {{{authors}}}, - \tdoi = {{{doi}}}, + @misc{{{url}, + url = {{{url}}}, + title = {{{title}}}, + publisher = {{{publisher}}}, + year = {year}, + author = {{{authors}}}, + doi = {{{doi}}}, }} """.lstrip() ) From d85156c07dfa1d774370255a0d161c7d5f946d71 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Mon, 23 Mar 2020 18:38:12 +0100 Subject: [PATCH 088/105] fix the tests --- tests/integration/test_citation.py | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/tests/integration/test_citation.py b/tests/integration/test_citation.py index eb6b64296f..0623ace8f6 100644 --- a/tests/integration/test_citation.py +++ b/tests/integration/test_citation.py @@ -1,4 +1,5 @@ """Test _citation.py.""" +import textwrap from prov.model import ProvDocument import esmvalcore @@ -75,15 +76,17 @@ def test_cmip6_data_citation(tmp_path, monkeypatch): year = 'publicationYear not found' authors = 'creators not found' doi = 'doi not found' - fake_bibtex_entry = ( - f'{"@misc{"}{url},\n\t' - f'url = {{{url}}},\n\t' - f'title = {{{title}}},\n\t' - f'publisher = {{{publisher}}},\n\t' - f'year = {year},\n\t' - f'author = {{{authors}}},\n\t' - f'doi = {{{doi}}},\n' - f'{"}"}\n' + fake_bibtex_entry = textwrap.dedent( + f""" + @misc{{{url}, + url = {{{url}}}, + title = {{{title}}}, + publisher = {{{publisher}}}, + year = {year}, + author = {{{authors}}}, + doi = {{{doi}}}, + }} + """.lstrip() ) assert citation_file.read_text() == '\n'.join( [ESMVALTOOL_PAPER, fake_bibtex_entry] @@ -111,5 +114,9 @@ def test_cmip6_data_citation_url(tmp_path): # Create fake info url fake_url_prefix = '.'.join(attributes.values()) - fake_info_url = f'{CMIP6_URL_STEM}/cmip6?input=CMIP6.{fake_url_prefix}' - assert citation_url.read_text() == fake_info_url + fake_info_url = [f'{CMIP6_URL_STEM}/cmip6?input=CMIP6.{fake_url_prefix}'] + title = [ + "Some citation information are found, " + "which are not mentioned in the recipe or diagnostic." + ] + assert citation_url.read_text() == '\n'.join(title + fake_info_url) From 4e9e0141e55801d0b00994e4b3a96643a6c681b6 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Mon, 23 Mar 2020 23:29:44 +0100 Subject: [PATCH 089/105] fix get_recipe_provenance function --- esmvalcore/_provenance.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/esmvalcore/_provenance.py b/esmvalcore/_provenance.py index dad826e1f0..fd6f032001 100644 --- a/esmvalcore/_provenance.py +++ b/esmvalcore/_provenance.py @@ -80,7 +80,7 @@ def get_recipe_provenance(documentation, filename): entity = provenance.entity( 'recipe:{}'.format(filename), { 'attribute:description': documentation.get('description', ''), - 'attribute:references': ', '.join( + 'attribute:references': str( documentation.get('references', [])), }) From a1bbbff24e5e06be81c50e4e8d40ec5fa1e56404 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Mon, 23 Mar 2020 23:30:10 +0100 Subject: [PATCH 090/105] refactor extract_tags function --- esmvalcore/_citation.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/esmvalcore/_citation.py b/esmvalcore/_citation.py index 85483f55aa..db272c213b 100644 --- a/esmvalcore/_citation.py +++ b/esmvalcore/_citation.py @@ -96,8 +96,7 @@ def _save_citation_info(product_name, product_tags, json_urls, info_urls): # convert tags to bibtex entries if REFERENCES_PATH and product_tags: - # make tags clean and unique - tags = _clean_tags(product_tags) + tags = _extract_tags(product_tags) for tag in tags: citation_entries.append(_collect_bibtex_citation(tag)) @@ -105,8 +104,10 @@ def _save_citation_info(product_name, product_tags, json_urls, info_urls): file.write('\n'.join(citation_entries)) -def _clean_tags(tags): - """Clean the tags that are recorded as str by provenance.""" +def _extract_tags(tags): + """Extract tags that are recorded by provenance, + as for example, "['acknow_project', 'acknow_author']". + """ pattern = re.compile(r'\w+') return list(set(pattern.findall(str(tags)))) From f106cd0cde8d9852cc907db1c6676059cd0cf1e5 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Mon, 23 Mar 2020 23:32:20 +0100 Subject: [PATCH 091/105] remove esmvaltool_paper_tag --- esmvalcore/_citation.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/esmvalcore/_citation.py b/esmvalcore/_citation.py index db272c213b..4389123424 100644 --- a/esmvalcore/_citation.py +++ b/esmvalcore/_citation.py @@ -18,7 +18,6 @@ CMIP6_URL_STEM = 'https://cera-www.dkrz.de/WDCC/ui/cerasearch' # it is the technical overview and should always be cited -ESMVALTOOL_PAPER_TAG = 'righi19gmd' ESMVALTOOL_PAPER = ( '@article{righi19gmd,\n' '\tdoi = {10.5194/gmd-2019-226},\n' @@ -69,7 +68,7 @@ def _write_citation_file(filename, provenance): # get diagnostics citation tags elif item.get_attribute('attribute:script_file'): product_tags.extend(reference_attr) - elif ESMVALTOOL_PAPER_TAG not in reference_attr: + else: info_urls += list(reference_attr) _save_citation_info(product_name, product_tags, json_urls, info_urls) From d54c0126e943bf18b6727bc7ea7a40f506b33f17 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Mon, 23 Mar 2020 23:37:51 +0100 Subject: [PATCH 092/105] remove esmvaltool_paper_tag --- esmvalcore/_provenance.py | 1 - 1 file changed, 1 deletion(-) diff --git a/esmvalcore/_provenance.py b/esmvalcore/_provenance.py index fd6f032001..bf675fae0b 100644 --- a/esmvalcore/_provenance.py +++ b/esmvalcore/_provenance.py @@ -10,7 +10,6 @@ from prov.model import ProvDocument from ._version import __version__ -from ._citation import ESMVALTOOL_PAPER_TAG logger = logging.getLogger(__name__) From a9f13234687dba5d85f43bb17ae860760ab38c96 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Tue, 24 Mar 2020 10:53:59 +0100 Subject: [PATCH 093/105] refcator bibtex string --- esmvalcore/_citation.py | 12 ++++++------ tests/integration/test_citation.py | 12 ++++++------ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/esmvalcore/_citation.py b/esmvalcore/_citation.py index 4389123424..a426ebbfb5 100644 --- a/esmvalcore/_citation.py +++ b/esmvalcore/_citation.py @@ -153,12 +153,12 @@ def _json_to_bibtex(data): bibtex_entry = textwrap.dedent( f""" @misc{{{url}, - url = {{{url}}}, - title = {{{title}}}, - publisher = {{{publisher}}}, - year = {year}, - author = {{{authors}}}, - doi = {{{doi}}}, + \turl = {{{url}}}, + \ttitle = {{{title}}}, + \tpublisher = {{{publisher}}}, + \tyear = {year}, + \tauthor = {{{authors}}}, + \tdoi = {{{doi}}}, }} """.lstrip() ) diff --git a/tests/integration/test_citation.py b/tests/integration/test_citation.py index 0623ace8f6..6d51ad01ab 100644 --- a/tests/integration/test_citation.py +++ b/tests/integration/test_citation.py @@ -79,12 +79,12 @@ def test_cmip6_data_citation(tmp_path, monkeypatch): fake_bibtex_entry = textwrap.dedent( f""" @misc{{{url}, - url = {{{url}}}, - title = {{{title}}}, - publisher = {{{publisher}}}, - year = {year}, - author = {{{authors}}}, - doi = {{{doi}}}, + \turl = {{{url}}}, + \ttitle = {{{title}}}, + \tpublisher = {{{publisher}}}, + \tyear = {year}, + \tauthor = {{{authors}}}, + \tdoi = {{{doi}}}, }} """.lstrip() ) From ddc5e711bb4357d6b104e60f7d324f4ebac460a3 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Tue, 24 Mar 2020 11:42:55 +0100 Subject: [PATCH 094/105] remove import from _citation --- tests/integration/test_recipe.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/integration/test_recipe.py b/tests/integration/test_recipe.py index 05bfd7d265..5ace9184d3 100644 --- a/tests/integration/test_recipe.py +++ b/tests/integration/test_recipe.py @@ -15,7 +15,6 @@ from esmvalcore._task import DiagnosticTask from esmvalcore.preprocessor import DEFAULT_ORDER, PreprocessingTask from esmvalcore.preprocessor._io import concatenate_callback -from esmvalcore._citation import REFERENCES_PATH, _clean_tags from esmvalcore.cmor.check import CheckLevels From aba4457fd1deb8aa2563f5a3e839695d5df80c06 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Tue, 24 Mar 2020 11:44:49 +0100 Subject: [PATCH 095/105] remove lstrip() --- esmvalcore/_citation.py | 2 +- tests/integration/test_citation.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/esmvalcore/_citation.py b/esmvalcore/_citation.py index a426ebbfb5..42248411ab 100644 --- a/esmvalcore/_citation.py +++ b/esmvalcore/_citation.py @@ -160,7 +160,7 @@ def _json_to_bibtex(data): \tauthor = {{{authors}}}, \tdoi = {{{doi}}}, }} - """.lstrip() + """ ) return bibtex_entry diff --git a/tests/integration/test_citation.py b/tests/integration/test_citation.py index 6d51ad01ab..d8834d8843 100644 --- a/tests/integration/test_citation.py +++ b/tests/integration/test_citation.py @@ -86,7 +86,7 @@ def test_cmip6_data_citation(tmp_path, monkeypatch): \tauthor = {{{authors}}}, \tdoi = {{{doi}}}, }} - """.lstrip() + """ ) assert citation_file.read_text() == '\n'.join( [ESMVALTOOL_PAPER, fake_bibtex_entry] From 27f4effc59c7e5de3847711b8ab98f55566972d2 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Tue, 24 Mar 2020 13:10:06 +0100 Subject: [PATCH 096/105] add str and fix the test --- tests/integration/test_recipe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_recipe.py b/tests/integration/test_recipe.py index 5ace9184d3..e6f64b8ad0 100644 --- a/tests/integration/test_recipe.py +++ b/tests/integration/test_recipe.py @@ -1282,7 +1282,7 @@ def test_diagnostic_task_provenance( for key in ('description', 'references'): value = src['documentation'][key] if key == 'references': - value = ', '.join(src['documentation'][key]) + value = str(src['documentation'][key]) assert recipe_record[0].get_attribute('attribute:' + key).pop() == value From fb2f057108e46c342420a1077b45832be3a9bb39 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Tue, 24 Mar 2020 13:10:23 +0100 Subject: [PATCH 097/105] style --- esmvalcore/_citation.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/esmvalcore/_citation.py b/esmvalcore/_citation.py index 42248411ab..cb2e935711 100644 --- a/esmvalcore/_citation.py +++ b/esmvalcore/_citation.py @@ -104,8 +104,9 @@ def _save_citation_info(product_name, product_tags, json_urls, info_urls): def _extract_tags(tags): - """Extract tags that are recorded by provenance, - as for example, "['acknow_project', 'acknow_author']". + """ + Extract tags that are recorded by provenance as + for example, "['acknow_project', 'acknow_author']". """ pattern = re.compile(r'\w+') return list(set(pattern.findall(str(tags)))) From 60b90800764f611c8a665a0f9ac6b3ef2cea3678 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Wed, 25 Mar 2020 12:52:28 +0100 Subject: [PATCH 098/105] refactor write_citation_file function --- esmvalcore/_citation.py | 56 +++++++++++++++++++++++++---------------- 1 file changed, 34 insertions(+), 22 deletions(-) diff --git a/esmvalcore/_citation.py b/esmvalcore/_citation.py index cb2e935711..9b77bfb6b2 100644 --- a/esmvalcore/_citation.py +++ b/esmvalcore/_citation.py @@ -50,17 +50,18 @@ def _write_citation_file(filename, provenance): Otherwise, cmip6 data reference links are saved into a text file. """ product_name = os.path.splitext(filename)[0] - info_urls = [] - json_urls = [] + further_info = [] + cmip6_info_urls = [] + cmip6_json_urls = [] product_tags = [] for item in provenance.records: - reference_attr = item.get_attribute('attribute:references') # get cmip6 citation info - value = item.get_attribute('attribute:mip_era') - if 'CMIP6' in value: + mip_era = item.get_attribute('attribute:mip_era') + if 'CMIP6' in mip_era: url_prefix = _make_url_prefix(item.attributes) - info_urls.append(_make_info_url(url_prefix)) - json_urls.append(_make_json_url(url_prefix)) + cmip6_info_urls.append(_make_info_url(url_prefix)) + cmip6_json_urls.append(_make_json_url(url_prefix)) + reference_attr = item.get_attribute('attribute:references') if reference_attr: # get recipe citation tags if item.identifier.namespace.prefix == 'recipe': @@ -69,24 +70,14 @@ def _write_citation_file(filename, provenance): elif item.get_attribute('attribute:script_file'): product_tags.extend(reference_attr) else: - info_urls += list(reference_attr) - - _save_citation_info(product_name, product_tags, json_urls, info_urls) + further_info.extend(reference_attr) + _save_citation(product_name, product_tags, cmip6_json_urls) + _save_citation_info(product_name, cmip6_info_urls, further_info) -def _save_citation_info(product_name, product_tags, json_urls, info_urls): +def _save_citation(product_name, product_tags, json_urls): + """Save all bibtex entries in one bibtex file.""" citation_entries = [ESMVALTOOL_PAPER] - - # save CMIP6 url_info, if any - # save any refrences info that is not related to recipe or diagnostics - title = [ - "Some citation information are found, " - "which are not mentioned in the recipe or diagnostic." - ] - if info_urls: - with open(f'{product_name}_data_citation_info.txt', 'w') as file: - file.write('\n'.join(title + list(set(info_urls)))) - # convert json_urls to bibtex entries for json_url in json_urls: cmip_citation = _collect_cmip_citation(json_url) @@ -103,6 +94,27 @@ def _save_citation_info(product_name, product_tags, json_urls, info_urls): file.write('\n'.join(citation_entries)) +def _save_citation_info(product_name, info_urls, further_info): + """Save all citation information in one text file.""" + lines = [] + # save CMIP6 url_info, if any + if info_urls: + lines.append( + "Follow the links below to find more information about CMIP6 data." + ) + lines.extend(info_urls) + # save any refrences info that is not related to recipe or diagnostics + if further_info: + lines.append( + "Some data citation information are found, " + "which are not mentioned in the recipe or diagnostic." + ) + lines.extend(further_info) + if lines: + with open(f'{product_name}_data_citation_info.txt', 'w') as file: + file.write('\n'.join(lines)) + + def _extract_tags(tags): """ Extract tags that are recorded by provenance as From fd0ff2e8cf13858db6045e86d3d41e1c89ec7a68 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Wed, 25 Mar 2020 13:04:51 +0100 Subject: [PATCH 099/105] fix multiline docstring --- esmvalcore/_citation.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/esmvalcore/_citation.py b/esmvalcore/_citation.py index 9b77bfb6b2..707a69f3e0 100644 --- a/esmvalcore/_citation.py +++ b/esmvalcore/_citation.py @@ -75,6 +75,7 @@ def _write_citation_file(filename, provenance): _save_citation(product_name, product_tags, cmip6_json_urls) _save_citation_info(product_name, cmip6_info_urls, further_info) + def _save_citation(product_name, product_tags, json_urls): """Save all bibtex entries in one bibtex file.""" citation_entries = [ESMVALTOOL_PAPER] @@ -116,9 +117,10 @@ def _save_citation_info(product_name, info_urls, further_info): def _extract_tags(tags): - """ - Extract tags that are recorded by provenance as - for example, "['acknow_project', 'acknow_author']". + """Extract tags. + + Tags are recorded as string of lists by provenance. + For example, "['acknow_project', 'acknow_author']". """ pattern = re.compile(r'\w+') return list(set(pattern.findall(str(tags)))) @@ -136,7 +138,7 @@ def _get_response(url): logger.warning('Error in the CMIP6 citation link: %s', url) except IOError: logger.info( - 'No network connection,' + 'No network connection, ' 'unable to retrieve CMIP6 citation information' ) return json_data From 6bb038ffc04e128e48b437a53e7167c5dda6c5b7 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Wed, 25 Mar 2020 13:05:24 +0100 Subject: [PATCH 100/105] fix title for info_url --- tests/integration/test_citation.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/integration/test_citation.py b/tests/integration/test_citation.py index d8834d8843..69867653c5 100644 --- a/tests/integration/test_citation.py +++ b/tests/integration/test_citation.py @@ -116,7 +116,6 @@ def test_cmip6_data_citation_url(tmp_path): fake_url_prefix = '.'.join(attributes.values()) fake_info_url = [f'{CMIP6_URL_STEM}/cmip6?input=CMIP6.{fake_url_prefix}'] title = [ - "Some citation information are found, " - "which are not mentioned in the recipe or diagnostic." + "Follow the links below to find more information about CMIP6 data." ] assert citation_url.read_text() == '\n'.join(title + fake_info_url) From c995e663f093f05d28a38ad4a3c7eff56b61d6c0 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Wed, 25 Mar 2020 13:28:38 +0100 Subject: [PATCH 101/105] fix minor things --- esmvalcore/_citation.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/esmvalcore/_citation.py b/esmvalcore/_citation.py index 707a69f3e0..0ee529901a 100644 --- a/esmvalcore/_citation.py +++ b/esmvalcore/_citation.py @@ -128,7 +128,7 @@ def _extract_tags(tags): def _get_response(url): """Return information from CMIP6 Data Citation service in json format.""" - json_data = False + json_data = None if url.lower().startswith('https'): try: response = requests.get(url) @@ -153,7 +153,7 @@ def _json_to_bibtex(data): authors = 'creators not found' doi = 'doi not found' - if data.get('creators', ''): + if 'creators' in data: author_list = [ item.get('creatorName', '') for item in data['creators'] ] @@ -161,8 +161,8 @@ def _json_to_bibtex(data): if not authors: authors = 'creators not found' - if data.get('identifier', ''): - doi = data.get('identifier').get('id', 'doi not found') + if 'identifier' in data: + doi = data['identifier'].get('id', 'doi not found') url = f'https://doi.org/{doi}' bibtex_entry = textwrap.dedent( @@ -228,5 +228,5 @@ def _make_json_url(url_prefix): def _make_info_url(url_prefix): """Make info url based on CMIP6 Data Citation Service.""" - info_url = f'{CMIP6_URL_STEM}/cmip6?input=CMIP6.{url_prefix}' + info_url = f'{CMIP6_URL_STEM}/cmip6?input={url_prefix}' return info_url From 096e70d999e973d6b578b6c739f4bbfd802db849 Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Wed, 25 Mar 2020 13:30:04 +0100 Subject: [PATCH 102/105] remove duplicated cmip6 --- tests/integration/test_citation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_citation.py b/tests/integration/test_citation.py index 69867653c5..8dc4b89c8a 100644 --- a/tests/integration/test_citation.py +++ b/tests/integration/test_citation.py @@ -114,7 +114,7 @@ def test_cmip6_data_citation_url(tmp_path): # Create fake info url fake_url_prefix = '.'.join(attributes.values()) - fake_info_url = [f'{CMIP6_URL_STEM}/cmip6?input=CMIP6.{fake_url_prefix}'] + fake_info_url = [f'{CMIP6_URL_STEM}/cmip6?input={fake_url_prefix}'] title = [ "Follow the links below to find more information about CMIP6 data." ] From f078010c9532dd74543f9bbfcc80195e38e4769d Mon Sep 17 00:00:00 2001 From: SarahAlidoost Date: Wed, 25 Mar 2020 13:33:32 +0100 Subject: [PATCH 103/105] style --- esmvalcore/_citation.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/esmvalcore/_citation.py b/esmvalcore/_citation.py index 0ee529901a..7365ca48c7 100644 --- a/esmvalcore/_citation.py +++ b/esmvalcore/_citation.py @@ -45,9 +45,9 @@ def _write_citation_file(filename, provenance): Recipe and cmip6 data references are saved into one bibtex file. cmip6 data references are provided by CMIP6 data citation service. - each cmip6 data reference has a json link. In the case of internet + Each cmip6 data reference has a json link. In the case of internet connection, cmip6 data references are saved into a bibtex file. - Otherwise, cmip6 data reference links are saved into a text file. + Also, cmip6 data reference links are saved into a text file. """ product_name = os.path.splitext(filename)[0] further_info = [] @@ -119,7 +119,7 @@ def _save_citation_info(product_name, info_urls, further_info): def _extract_tags(tags): """Extract tags. - Tags are recorded as string of lists by provenance. + Tags are recorded as string of a list by provenance. For example, "['acknow_project', 'acknow_author']". """ pattern = re.compile(r'\w+') From 66fdf11adcc40b036e2913020f0b0d4e4069d880 Mon Sep 17 00:00:00 2001 From: Bouwe Andela Date: Mon, 30 Mar 2020 16:35:20 +0200 Subject: [PATCH 104/105] Improve text and avoid duplicate citation entries --- esmvalcore/_citation.py | 144 ++++++++++++++++------------- esmvalcore/_task.py | 4 +- tests/integration/test_citation.py | 41 ++++---- 3 files changed, 99 insertions(+), 90 deletions(-) diff --git a/esmvalcore/_citation.py b/esmvalcore/_citation.py index 7365ca48c7..3dfc0f1b9c 100644 --- a/esmvalcore/_citation.py +++ b/esmvalcore/_citation.py @@ -1,23 +1,21 @@ """Citation module.""" -import os import logging +import os import re -from pathlib import Path import textwrap +from functools import lru_cache + import requests from ._config import DIAGNOSTICS_PATH -if DIAGNOSTICS_PATH: - REFERENCES_PATH = Path(DIAGNOSTICS_PATH) / 'references' -else: - REFERENCES_PATH = '' - logger = logging.getLogger(__name__) +REFERENCES_PATH = DIAGNOSTICS_PATH / 'references' + CMIP6_URL_STEM = 'https://cera-www.dkrz.de/WDCC/ui/cerasearch' -# it is the technical overview and should always be cited +# The technical overview paper should always be cited ESMVALTOOL_PAPER = ( '@article{righi19gmd,\n' '\tdoi = {10.5194/gmd-2019-226},\n' @@ -35,11 +33,10 @@ '\ttitle = {{ESMValTool} v2.0 ' '{\\&}amp$\\mathsemicolon${\\#}8211$\\mathsemicolon$ ' 'Technical overview}\n' - '}\n' -) + '}\n') -def _write_citation_file(filename, provenance): +def _write_citation_files(filename, provenance): """ Write citation information provided by the recorded provenance. @@ -50,80 +47,97 @@ def _write_citation_file(filename, provenance): Also, cmip6 data reference links are saved into a text file. """ product_name = os.path.splitext(filename)[0] - further_info = [] - cmip6_info_urls = [] - cmip6_json_urls = [] - product_tags = [] + + tags = set() + cmip6_json_urls = set() + cmip6_info_urls = set() + other_info = set() + for item in provenance.records: - # get cmip6 citation info - mip_era = item.get_attribute('attribute:mip_era') - if 'CMIP6' in mip_era: + # get cmip6 data citation info + cmip6_data = 'CMIP6' in item.get_attribute('attribute:mip_era') + if cmip6_data: url_prefix = _make_url_prefix(item.attributes) - cmip6_info_urls.append(_make_info_url(url_prefix)) - cmip6_json_urls.append(_make_json_url(url_prefix)) - reference_attr = item.get_attribute('attribute:references') - if reference_attr: - # get recipe citation tags + cmip6_info_urls.add(_make_info_url(url_prefix)) + cmip6_json_urls.add(_make_json_url(url_prefix)) + + # get other citation info + references = item.get_attribute('attribute:references') + if not references: + # ESMValTool CMORization scripts use 'reference' (without final s) + references = item.get_attribute('attribute:reference') + if references: if item.identifier.namespace.prefix == 'recipe': - product_tags.extend(reference_attr) - # get diagnostics citation tags + # get recipe citation tags + tags.update(references) elif item.get_attribute('attribute:script_file'): - product_tags.extend(reference_attr) - else: - further_info.extend(reference_attr) + # get diagnostics citation tags + tags.update(references) + elif not cmip6_data: + # get any other data citation tags, e.g. CMIP5 + other_info.update(references) - _save_citation(product_name, product_tags, cmip6_json_urls) - _save_citation_info(product_name, cmip6_info_urls, further_info) + _save_citation_bibtex(product_name, tags, cmip6_json_urls) + _save_citation_info_txt(product_name, cmip6_info_urls, other_info) -def _save_citation(product_name, product_tags, json_urls): - """Save all bibtex entries in one bibtex file.""" +def _save_citation_bibtex(product_name, tags, json_urls): + """Save the bibtex entries in a bibtex file.""" citation_entries = [ESMVALTOOL_PAPER] + + # convert tags to bibtex entries + if tags: + entries = set() + for tag in _extract_tags(tags): + entries.add(_collect_bibtex_citation(tag)) + citation_entries.extend(sorted(entries)) + # convert json_urls to bibtex entries + entries = set() for json_url in json_urls: cmip_citation = _collect_cmip_citation(json_url) if cmip_citation: - citation_entries.append(cmip_citation) - - # convert tags to bibtex entries - if REFERENCES_PATH and product_tags: - tags = _extract_tags(product_tags) - for tag in tags: - citation_entries.append(_collect_bibtex_citation(tag)) + entries.add(cmip_citation) + citation_entries.extend(sorted(entries)) with open(f'{product_name}_citation.bibtex', 'w') as file: file.write('\n'.join(citation_entries)) -def _save_citation_info(product_name, info_urls, further_info): - """Save all citation information in one text file.""" +def _save_citation_info_txt(product_name, info_urls, other_info): + """Save all data citation information in one text file.""" lines = [] - # save CMIP6 url_info, if any + # Save CMIP6 url_info if info_urls: lines.append( - "Follow the links below to find more information about CMIP6 data." - ) - lines.extend(info_urls) - # save any refrences info that is not related to recipe or diagnostics - if further_info: - lines.append( - "Some data citation information are found, " - "which are not mentioned in the recipe or diagnostic." + "Follow the links below to find more information about CMIP6 data:" ) - lines.extend(further_info) + lines.extend(f'- {url}' for url in sorted(info_urls)) + + # Save any references from the 'references' and 'reference' NetCDF global + # attributes. + if other_info: + if lines: + lines.append('') + lines.append("Additional data citation information was found, for " + "which no entry is available in the bibtex file:") + lines.extend('- ' + str(t).replace('\n', ' ') + for t in sorted(other_info)) + if lines: with open(f'{product_name}_data_citation_info.txt', 'w') as file: - file.write('\n'.join(lines)) + file.write('\n'.join(lines) + '\n') def _extract_tags(tags): """Extract tags. - Tags are recorded as string of a list by provenance. - For example, "['acknow_project', 'acknow_author']". + Tags are recorded as a list of strings converted to a string in provenance. + For example, a single entry in the list `tags` could be the string + "['acknow_project', 'acknow_author']". """ pattern = re.compile(r'\w+') - return list(set(pattern.findall(str(tags)))) + return set(pattern.findall(str(tags))) def _get_response(url): @@ -137,10 +151,8 @@ def _get_response(url): else: logger.warning('Error in the CMIP6 citation link: %s', url) except IOError: - logger.info( - 'No network connection, ' - 'unable to retrieve CMIP6 citation information' - ) + logger.info('No network connection, ' + 'unable to retrieve CMIP6 citation information') return json_data @@ -165,8 +177,7 @@ def _json_to_bibtex(data): doi = data['identifier'].get('id', 'doi not found') url = f'https://doi.org/{doi}' - bibtex_entry = textwrap.dedent( - f""" + bibtex_entry = textwrap.dedent(f""" @misc{{{url}, \turl = {{{url}}}, \ttitle = {{{title}}}, @@ -175,24 +186,25 @@ def _json_to_bibtex(data): \tauthor = {{{authors}}}, \tdoi = {{{doi}}}, }} - """ - ) + """).lstrip() return bibtex_entry +@lru_cache(maxsize=1024) def _collect_bibtex_citation(tag): """Collect information from bibtex files.""" bibtex_file = REFERENCES_PATH / f'{tag}.bibtex' if bibtex_file.is_file(): entry = bibtex_file.read_text() else: - logger.warning( - 'The reference file %s does not exist.', bibtex_file - ) entry = '' + logger.warning( + "The reference file %s does not exist, citation information " + "incomplete.", bibtex_file) return entry +@lru_cache(maxsize=1024) def _collect_cmip_citation(json_url): """Collect information from CMIP6 Data Citation Service.""" json_data = _get_response(json_url) diff --git a/esmvalcore/_task.py b/esmvalcore/_task.py index add4e12ecc..d348f80ad3 100644 --- a/esmvalcore/_task.py +++ b/esmvalcore/_task.py @@ -17,9 +17,9 @@ import psutil import yaml +from ._citation import _write_citation_files from ._config import DIAGNOSTICS_PATH, TAGS, replace_tags from ._provenance import TrackedFile, get_task_provenance -from ._citation import _write_citation_file logger = logging.getLogger(__name__) @@ -566,7 +566,7 @@ def _collect_provenance(self): product = TrackedFile(filename, attributes, ancestors) product.initialize_provenance(self.activity) product.save_provenance() - _write_citation_file(product.filename, product.provenance) + _write_citation_files(product.filename, product.provenance) self.products.add(product) logger.debug("Collecting provenance of task %s took %.1f seconds", self.name, diff --git a/tests/integration/test_citation.py b/tests/integration/test_citation.py index 8dc4b89c8a..50fb65ef02 100644 --- a/tests/integration/test_citation.py +++ b/tests/integration/test_citation.py @@ -1,10 +1,11 @@ """Test _citation.py.""" import textwrap + from prov.model import ProvDocument import esmvalcore -from esmvalcore._citation import (_write_citation_file, - ESMVALTOOL_PAPER, CMIP6_URL_STEM) +from esmvalcore._citation import (CMIP6_URL_STEM, ESMVALTOOL_PAPER, + _write_citation_files) from esmvalcore._provenance import ESMVALTOOL_URI_PREFIX @@ -25,14 +26,13 @@ def test_references(tmp_path, monkeypatch): # Create fake bibtex references tag file references_path = tmp_path / 'references' references_path.mkdir() - monkeypatch.setattr( - esmvalcore._citation, 'REFERENCES_PATH', references_path - ) + monkeypatch.setattr(esmvalcore._citation, 'REFERENCES_PATH', + references_path) fake_bibtex_file = references_path / 'test_tag.bibtex' fake_bibtex = "Fake bibtex file content\n" fake_bibtex_file.write_text(fake_bibtex) - _write_citation_file(filename, provenance) + _write_citation_files(filename, provenance) citation_file = tmp_path / 'output_citation.bibtex' citation = citation_file.read_text() assert citation == '\n'.join([ESMVALTOOL_PAPER, fake_bibtex]) @@ -63,10 +63,9 @@ def test_cmip6_data_citation(tmp_path, monkeypatch): filename = str(tmp_path / 'output.nc') provenance.entity('file:' + filename, attributes) - monkeypatch.setattr( - esmvalcore._citation, '_get_response', mock_get_response - ) - _write_citation_file(filename, provenance) + monkeypatch.setattr(esmvalcore._citation, '_get_response', + mock_get_response) + _write_citation_files(filename, provenance) citation_file = tmp_path / 'output_citation.bibtex' # Create fake bibtex entry @@ -76,8 +75,7 @@ def test_cmip6_data_citation(tmp_path, monkeypatch): year = 'publicationYear not found' authors = 'creators not found' doi = 'doi not found' - fake_bibtex_entry = textwrap.dedent( - f""" + fake_bibtex_entry = textwrap.dedent(f""" @misc{{{url}, \turl = {{{url}}}, \ttitle = {{{title}}}, @@ -86,11 +84,9 @@ def test_cmip6_data_citation(tmp_path, monkeypatch): \tauthor = {{{authors}}}, \tdoi = {{{doi}}}, }} - """ - ) + """).lstrip() assert citation_file.read_text() == '\n'.join( - [ESMVALTOOL_PAPER, fake_bibtex_entry] - ) + [ESMVALTOOL_PAPER, fake_bibtex_entry]) def test_cmip6_data_citation_url(tmp_path): @@ -109,13 +105,14 @@ def test_cmip6_data_citation_url(tmp_path): } filename = str(tmp_path / 'output.nc') provenance.entity('file:' + filename, attributes) - _write_citation_file(filename, provenance) + _write_citation_files(filename, provenance) citation_url = tmp_path / 'output_data_citation_info.txt' # Create fake info url fake_url_prefix = '.'.join(attributes.values()) - fake_info_url = [f'{CMIP6_URL_STEM}/cmip6?input={fake_url_prefix}'] - title = [ - "Follow the links below to find more information about CMIP6 data." - ] - assert citation_url.read_text() == '\n'.join(title + fake_info_url) + text = '\n'.join([ + "Follow the links below to find more information about CMIP6 data:", + f"- {CMIP6_URL_STEM}/cmip6?input={fake_url_prefix}", + '', + ]) + assert citation_url.read_text() == text From 7a85ee17cebe2ac0eebc5c5581f5b0530270284a Mon Sep 17 00:00:00 2001 From: Bouwe Andela Date: Tue, 31 Mar 2020 11:36:50 +0200 Subject: [PATCH 105/105] Update ESMValTool reference --- esmvalcore/_citation.py | 37 ++++++++++++++++++++----------------- 1 file changed, 20 insertions(+), 17 deletions(-) diff --git a/esmvalcore/_citation.py b/esmvalcore/_citation.py index 3dfc0f1b9c..b3ead8a600 100644 --- a/esmvalcore/_citation.py +++ b/esmvalcore/_citation.py @@ -17,23 +17,26 @@ # The technical overview paper should always be cited ESMVALTOOL_PAPER = ( - '@article{righi19gmd,\n' - '\tdoi = {10.5194/gmd-2019-226},\n' - '\turl = {https://doi.org/10.5194%2Fgmd-2019-226},\n' - '\tyear = 2019,\n' - '\tmonth = {sep},\n' - '\tpublisher = {Copernicus {GmbH}},\n' - '\tauthor = {Mattia Righi and Bouwe Andela and Veronika Eyring ' - 'and Axel Lauer and Valeriu Predoi and Manuel Schlund ' - 'and Javier Vegas-Regidor and Lisa Bock and Björn Brötz ' - 'and Lee de Mora and Faruk Diblen and Laura Dreyer ' - 'and Niels Drost and Paul Earnshaw and Birgit Hassler ' - 'and Nikolay Koldunov and Bill Little and Saskia Loosveldt Tomas ' - 'and Klaus Zimmermann},\n' - '\ttitle = {{ESMValTool} v2.0 ' - '{\\&}amp$\\mathsemicolon${\\#}8211$\\mathsemicolon$ ' - 'Technical overview}\n' - '}\n') + "@article{righi20gmd,\n" + "\tdoi = {10.5194/gmd-13-1179-2020},\n" + "\turl = {https://doi.org/10.5194/gmd-13-1179-2020},\n" + "\tyear = {2020},\n" + "\tmonth = mar,\n" + "\tpublisher = {Copernicus {GmbH}},\n" + "\tvolume = {13},\n" + "\tnumber = {3},\n" + "\tpages = {1179--1199},\n" + "\tauthor = {Mattia Righi and Bouwe Andela and Veronika Eyring " + "and Axel Lauer and Valeriu Predoi and Manuel Schlund " + "and Javier Vegas-Regidor and Lisa Bock and Bj\"{o}rn Br\"{o}tz " + "and Lee de Mora and Faruk Diblen and Laura Dreyer " + "and Niels Drost and Paul Earnshaw and Birgit Hassler " + "and Nikolay Koldunov and Bill Little and Saskia Loosveldt Tomas " + "and Klaus Zimmermann},\n" + "\ttitle = {Earth System Model Evaluation Tool (ESMValTool) v2.0 " + "-- technical overview},\n" + "\tjournal = {Geoscientific Model Development}\n" + "}\n") def _write_citation_files(filename, provenance):