Skip to content

Commit 7931ee4

Browse files
committed
cleaned up model_catalogs.py and updated docstrings
1 parent 1fa7f87 commit 7931ee4

File tree

1 file changed

+35
-66
lines changed

1 file changed

+35
-66
lines changed

model_catalogs/model_catalogs.py

Lines changed: 35 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -127,20 +127,24 @@ def make_catalog(
127127

128128
def open_catalog(cat_loc, return_cat=True, save_catalog=False, override=False, boundaries=False,
129129
save_boundaries=False):
130-
"""_summary_
130+
"""Open an intake catalog file and set up code to apply processing/transform.
131+
132+
Optionally calculate the boundaries of the model represented in cat_log.
131133
132134
Parameters
133135
----------
134136
cat_loc : str, Catalog
135-
Can be the name of a subcatalog in `intake.cat` that has been installed or can be the path to a catalog.
136-
save_catalog : bool, optional
137-
Defaults to True, and saves to cat_path.
137+
The catalog to open. cat_loc can be the representation of a path to a catalog file (string or Path) or it can be a Catalog object.
138138
return_cat : bool, optional
139-
Return catalog.
139+
Return catalog from function. Defaults to True.
140+
save_catalog : bool, optional
141+
Defaults to False, and saves to mc.CACHE_PATH_COMPILED(model).
140142
override : boolean, optional
141-
Use `override=True` to compile the catalog files together regardless of freshness.
143+
Use `override=True` to calculate boundaries of the model regardless of whether the file already exists.
142144
boundaries : boolean, optional
143-
If True, find or calculate domain boundary of model.
145+
If True, find previously-saved or calculate domain boundary of model.
146+
save_boundaries : bool, optional
147+
Defaults to False, and saves to mc.FILE_PATH_BOUNDARIES(model).
144148
"""
145149

146150
if isinstance(cat_loc, Catalog):
@@ -179,7 +183,7 @@ def open_catalog(cat_loc, return_cat=True, save_catalog=False, override=False, b
179183
full_cat_metadata=cat_orig.metadata,
180184
cat_driver=mc.process.DatasetTransform,
181185
cat_path=mc.CACHE_PATH_COMPILED,
182-
save_catalog=True,
186+
save_catalog=save_catalog,
183187
return_cat=True,
184188
)
185189

@@ -189,35 +193,46 @@ def open_catalog(cat_loc, return_cat=True, save_catalog=False, override=False, b
189193

190194
def setup(locs="mc_", override=False):
191195
"""Setup reference catalogs for models.
192-
193-
Loops over hard-wired "orig" catalogs available in ``mc.CAT_PATH_ORIG``, reads in previously-saved model boundary information, saves temporary catalog files for each model, and links those together into the returned master catalog. For some models, reading in the original catalogs applies a "today" and/or "yesterday" date Intake user parameter that supplies two example model files that can be used for examining the model output for the example times. Those are rerun each time this function is rerun, filling the parameters using the proper dates.
196+
197+
Loops over catalogs that have been previously installed as data packages to intake that start with the string(s) in locs. The default is to read in the required GOODS model catalogs which are prefixed with "mc_". Alternatively, one or more local catalog files can be input as strings or Paths.
198+
199+
This function calls ``open_catalog`` which reads in previously-saved model boundary information (or calculates it if not available) and saves temporary catalog files for each model (called "compiled"), then this function links those together into the returned main catalog. For some models, reading in the original catalogs applies a "today" and/or "yesterday" date Intake user parameter that supplies two example model files that can be used for examining the model output for the example times. Those are rerun each time this function is rerun, filling the parameters using the proper dates.
194200
195201
Parameters
196202
----------
197-
locs : str, list
198-
The name of a catalog in the default intake catalog `intake.cat`.
203+
locs : str, Path, list
204+
This can be:
205+
206+
* a string or Path describing where a Catalog file is located
207+
* a string of the prefix for selecting catalogs from the default intake catalog, ``intake.cat``. It is expected to be of the form "PREFIX_CATALOGNAME" with an underscore at the end followed by the catalog name, and there could be many catalogs with that "PREFIX_" set up.
208+
* a list of a combination of the previous options.
209+
199210
override : boolean, optional
200211
Use `override=True` to compile the catalog files together regardless of freshness.
201212
202213
Returns
203214
-------
204215
Intake catalog
205-
Nested Intake catalog with a catalog for each model in ``mc.CAT_PATH_ORIG``. Each model in turn has one or more model_source available (e.g., "coops-forecast-agg", "coops-forecast-noagg").
216+
Nested Intake catalog with a catalog for each input option. Each model in turn has one or more model_source available (e.g., "coops-forecast-agg", "coops-forecast-noagg").
206217
207218
Examples
208219
--------
209220
210-
Set up master catalog:
221+
Set up main catalog:
211222
212-
>>> cat = mc.setup()
223+
>>> main_cat = mc.setup()
213224
214225
Examine list of models available in catalog:
215226
216-
>>> list(cat)
227+
>>> list(main_cat)
217228
218229
Examine the model_sources for a specific model in the catalog:
219230
220-
>>> list(cat['CBOFS'])
231+
>>> list(main_cat['CBOFS'])
232+
233+
Separate from ``model_catalogs`` you can check the default Intake catalog with:
234+
235+
>>> list(intake.cat)
221236
"""
222237

223238
locs = mc.astype(locs, list)
@@ -240,24 +255,6 @@ def setup(locs="mc_", override=False):
240255

241256
# now cats is a list of Catalogs or a list of one Path
242257
initial_cats.extend(cats)
243-
244-
# # LOC IS A STRING CURRENTLY
245-
246-
# # start with one, then UPDATE TO MULTIPLE
247-
# # initial_cats is a list of Catalogs in this case
248-
# initial_cats = [intake.cat[cat_name] for cat_name in list(intake.cat) if loc in cat_name]
249-
250-
# # remove the prefix from the catalog name
251-
# for cat in initial_cats:
252-
# cat.name = cat.name.lstrip(loc)
253-
254-
# # check for if loc is instead a path to a catalog
255-
# if len(initial_cats) == 0:
256-
# # initial_cats is a list of one Path in this case
257-
# initial_cats = [PurePath(loc)]
258-
# else:
259-
# # UPDATE ERROR
260-
# raise KeyError(f"The requested catalog {loc} needs to have been previously installed with intake.")
261258

262259
cat_transform_locs = []
263260
for cat in list(initial_cats):
@@ -266,12 +263,12 @@ def setup(locs="mc_", override=False):
266263
name = cat.stem
267264
elif isinstance(cat, Catalog):
268265
name = cat.name
269-
# import pdb; pdb.set_trace()
270266

271267
# re-compile together catalog file if user wants to override possibly
272268
# existing file or if is not fresh
273-
if override or not mc.is_fresh(mc.FILE_PATH_COMPILED(name)):
274-
open_catalog(cat, return_cat=False, save_catalog=True, boundaries=True, save_boundaries=True)
269+
if override or not mc.is_fresh(mc.FILE_PATH_COMPILED(name)):
270+
# override for open_catalog is about calculating boundaries
271+
open_catalog(cat, return_cat=False, save_catalog=True, boundaries=True, save_boundaries=True, override=False)
275272
cat_transform_locs.append(mc.FILE_PATH_COMPILED(name))
276273

277274
# have to read these from disk in order to make them type
@@ -291,34 +288,6 @@ def setup(locs="mc_", override=False):
291288
save_catalog=False,
292289
)
293290

294-
# cat_transform_locs = []
295-
# # Loop over all hard-wired original catalog files, one per model
296-
# for cat_loc in mc.CAT_PATH_ORIG.glob("*.yaml"):
297-
298-
# # re-compile together catalog file if user wants to override possibly
299-
# # existing file or if is not fresh
300-
# if override or not mc.is_fresh(mc.FILE_PATH_COMPILED(cat_loc.stem)):
301-
302-
# open_catalog(cat_loc, return_cat=False, save_catalog=True, override=override, boundaries=True)
303-
304-
# cat_transform_locs.append(mc.FILE_PATH_COMPILED(cat_loc.stem))
305-
306-
# # have to read these from disk in order to make them type
307-
# # intake.catalog.local.YAMLFileCatalog
308-
# # instead of intake.catalog.base.Catalog
309-
# cats = [intake.open_catalog(loc) for loc in cat_transform_locs]
310-
311-
# # make master nested catalog
312-
# main_cat = mc.make_catalog(
313-
# cats,
314-
# full_cat_name="MAIN-CATALOG",
315-
# full_cat_description="Main catalog for models; a catalog of nested catalogs.",
316-
# full_cat_metadata={"source_catalog_dir": str(mc.CAT_PATH_ORIG)},
317-
# cat_driver=intake.catalog.local.YAMLFileCatalog,
318-
# cat_path=None,
319-
# save_catalog=False,
320-
# )
321-
322291
return main_cat
323292

324293

0 commit comments

Comments
 (0)