diff --git a/.gitignore b/.gitignore index fa228556..5b13247c 100644 --- a/.gitignore +++ b/.gitignore @@ -148,4 +148,6 @@ data_backup autotest/temp/ # uv lockfile -uv.lock \ No newline at end of file +uv.lock + +modflow_devtools/data \ No newline at end of file diff --git a/docs/md/models.md b/docs/md/models.md new file mode 100644 index 00000000..af155aa9 --- /dev/null +++ b/docs/md/models.md @@ -0,0 +1,30 @@ +# Models API + +The `modflow_devtools.models` module provides programmatic access to MODFLOW 6 example models via [Pooch](https://www.fatiando.org/pooch/latest/index.html). + +When the module is imported, it checks for the existence of the registry in models files. If they are found, it loads the registry and dynamically generates functions for each model, attaching them to the module namespace. + +Each function returns a list of files. Example usage: + +```python +import modflow_devtools.models as models + +files = models.some_model() +``` + +## Developers + +The `make_registry.py` script is responsible for generating a registry text file and a mapping between files and models. This script should be run in the CI pipeline at release time before the package is built. The generated registry file and model mapping are used to create a pooch instance for fetching model files, and should be distributed with the package. + +The script can be executed with `python -m modflow_devtools.make_registry` and accepts the following options: + +- `--path` or `-p`: Specifies the directory containing model directories. If not provided, the default path is used. +- `--append` or `-a`: If specified, the script will append to the existing registry file instead of overwriting it. +- `--base-url` or `-b`: Specifies the base URL for the registry file. If not provided, the default base URL is used. + +For example, to create a registry of models in the MF6 test models repositories, each of which is checked out in the current working directory: + +```shell +python -m modflow_devtools.make_registry -p modflow6-testmodels -b https://github.com/MODFLOW-ORG/modflow6-testmodels/raw/master +python -m modflow_devtools.make_registry -p modflow6-largetestmodels -a -b https://github.com/MODFLOW-ORG/modflow6-largetestmodels/raw/master +``` \ No newline at end of file diff --git a/modflow_devtools/make_registry.py b/modflow_devtools/make_registry.py new file mode 100644 index 00000000..8dcb69bd --- /dev/null +++ b/modflow_devtools/make_registry.py @@ -0,0 +1,90 @@ +import argparse +import hashlib +from pathlib import Path + +import tomli_w as tomli + +from modflow_devtools.misc import get_model_paths + +REPO_OWNER = "MODFLOW-ORG" +REPO_NAME = "modflow-devtools" +REPO_REF = "develop" +PROJ_ROOT = Path(__file__).parents[1] +DATA_RELPATH = "data" +DATA_PATH = PROJ_ROOT / REPO_NAME / DATA_RELPATH +REGISTRY_PATH = DATA_PATH / "registry.txt" +MODELS_PATH = DATA_PATH / "models.toml" +BASE_URL = f"https://github.com/{REPO_OWNER}/{REPO_NAME}/raw/{REPO_REF}/{DATA_RELPATH}/" + + +def _sha256(path: Path) -> str: + """ + Compute the SHA256 hash of the given file. + Reference: https://stackoverflow.com/a/44873382/6514033 + """ + h = hashlib.sha256() + b = bytearray(128 * 1024) + mv = memoryview(b) + with path.open("rb", buffering=0) as f: + for n in iter(lambda: f.readinto(mv), 0): + h.update(mv[:n]) + return h.hexdigest() + + +def write_registry( + path: Path, registry_path: Path, base_url: str, append: bool = False +): + if not registry_path.exists(): + registry_path.parent.mkdir(parents=True, exist_ok=True) + + models = {} + exclude = [".DS_Store"] + with registry_path.open("a+" if append else "w") as f: + if not path.is_dir(): + raise NotADirectoryError(f"Path {path} is not a directory.") + for mp in get_model_paths(path): + for p in mp.rglob("*"): + if "compare" in str(p): + continue + if p.is_file() and not any(e in p.name for e in exclude): + relpath = p.relative_to(path) + name = str(relpath).replace("/", "_").replace("-", "_") + hash = _sha256(p) + url = f"{base_url}/{relpath!s}" + line = f"{name} {hash} {url}" + f.write(line + "\n") + key = str(relpath.parent).replace("/", "_").replace("-", "_") + if key not in models: + models[key] = [] + models[key].append(name) + + models_path = registry_path.parent / "models.toml" + with models_path.open("ab+" if append else "wb") as mf: + tomli.dump(models, mf) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Convert DFN files to TOML.") + parser.add_argument( + "--path", + "-p", + type=str, + help="Directory containing model directories.", + ) + parser.add_argument( + "--append", + "-a", + action="store_true", + help="Append to the registry file instead of overwriting.", + ) + parser.add_argument( + "--base-url", + "-b", + type=str, + help="Base URL for the registry file.", + ) + args = parser.parse_args() + path = Path(args.path) if args.path else DATA_PATH + base_url = args.base_url if args.base_url else BASE_URL + + write_registry(path, REGISTRY_PATH, base_url, args.append) diff --git a/modflow_devtools/models.py b/modflow_devtools/models.py new file mode 100644 index 00000000..676847ef --- /dev/null +++ b/modflow_devtools/models.py @@ -0,0 +1,49 @@ +from pathlib import Path + +import pooch +import tomli + +import modflow_devtools + +REPO_OWNER = "MODFLOW-ORG" +REPO_NAME = "modflow-devtools" +REPO_REF = "develop" +PROJ_ROOT = Path(__file__).parents[1] +DATA_RELPATH = "data" +DATA_PATH = PROJ_ROOT / REPO_NAME / DATA_RELPATH +REGISTRY_PATH = DATA_PATH / "registry.txt" +MODELS_PATH = DATA_PATH / "models.toml" +BASE_URL = f"https://github.com/{REPO_OWNER}/{REPO_NAME}/raw/{REPO_REF}/{DATA_RELPATH}/" +VERSION = modflow_devtools.__version__.rpartition(".dev")[0] +FETCHER = pooch.create( + path=pooch.os_cache(REPO_NAME), + base_url=BASE_URL, + version=VERSION, + registry=None, +) + +if not REGISTRY_PATH.exists(): + raise FileNotFoundError(f"Registry file {REGISTRY_PATH} not found.") + +if not MODELS_PATH.exists(): + raise FileNotFoundError(f"Models file {MODELS_PATH} not found.") + +FETCHER.load_registry(REGISTRY_PATH) + + +def _generate_function(model_name: str, files: list) -> callable: + def model_function() -> list: + return [FETCHER.fetch(file) for file in files] + + model_function.__name__ = model_name + return model_function + + +def _make_functions(models_path: Path, registry_path: Path): + with models_path.open("rb") as f: + models = tomli.load(f) + for model_name, files in models.items(): + globals()[model_name] = _generate_function(model_name, files) + + +_make_functions(MODELS_PATH, REGISTRY_PATH) diff --git a/pyproject.toml b/pyproject.toml index dd6b923c..f1c6e0a9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -74,7 +74,12 @@ dfn = [ "tomli", "tomli-w" ] -dev = ["modflow-devtools[lint,test,docs,dfn]"] +models = [ + "pooch", + "tomli", + "tomli-w" +] +dev = ["modflow-devtools[lint,test,docs,dfn,models]"] [dependency-groups] build = [ @@ -111,12 +116,18 @@ dfn = [ "tomli", "tomli-w" ] +models = [ + "pooch", + "tomli", + "tomli-w" +] dev = [ {include-group = "build"}, {include-group = "lint"}, {include-group = "test"}, {include-group = "docs"}, - {include-group = "dfn"} + {include-group = "dfn"}, + {include-group = "models"}, ] [project.urls]