diff --git a/.github/workflows/prepare-release.yml b/.github/workflows/prepare-release.yml index 8b67cdc..b08f46f 100644 --- a/.github/workflows/prepare-release.yml +++ b/.github/workflows/prepare-release.yml @@ -13,7 +13,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [3.8] + python-version: [3.9] env: PYTHON_PACKAGE: data_pipelines_cli steps: diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index d88932d..fad8bf7 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -12,7 +12,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ["3.8", "3.9", "3.10"] + python-version: ["3.9", "3.10"] steps: - uses: actions/checkout@v3 @@ -32,7 +32,7 @@ jobs: - name: Check pre-commit status run: | - pip install .[tests] + pip install .[tests,databricks] pip freeze pipdeptree pre-commit run --all-files diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 2570c97..89fcf4b 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,7 +1,7 @@ repos: -- repo: https://github.com/pre-commit/mirrors-isort - rev: v5.10.1 +- repo: https://github.com/PyCQA/isort + rev: 5.12.0 hooks: - id: isort diff --git a/README.md b/README.md index 2f9a568..3693d58 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # data-pipelines-cli -[![Python Version](https://img.shields.io/badge/python-3.8%20%7C%203.9%20%7C%203.10-blue.svg)](https://github.com/getindata/data-pipelines-cli) +[![Python Version](https://img.shields.io/badge/python-3.9%20%7C%203.10-blue.svg)](https://github.com/getindata/data-pipelines-cli) [![PyPI Version](https://badge.fury.io/py/data-pipelines-cli.svg)](https://pypi.org/project/data-pipelines-cli/) [![Downloads](https://pepy.tech/badge/data-pipelines-cli)](https://pepy.tech/project/data-pipelines-cli) [![Maintainability](https://api.codeclimate.com/v1/badges/e44ed9383a42b59984f6/maintainability)](https://codeclimate.com/github/getindata/data-pipelines-cli/maintainability) diff --git a/data_pipelines_cli/cli_commands/generate/databricks_job.py b/data_pipelines_cli/cli_commands/generate/databricks_job.py new file mode 100644 index 0000000..7d86294 --- /dev/null +++ b/data_pipelines_cli/cli_commands/generate/databricks_job.py @@ -0,0 +1,80 @@ +from __future__ import annotations + +import click +from dbt_databricks_factory.cli import create_job_cli +from dbt_databricks_factory.config import GitProvider + + +@click.command("databricks-job", help="Generate a Databricks job") +@click.argument( + "manifest-file", + type=click.Path(exists=True, file_okay=True, dir_okay=False, readable=True), +) +@click.option("--job-name", required=True, help="Name of the job to create.") +@click.option("--project-dir", required=True, help="Path to dbt project directory.") +@click.option("--profiles-dir", required=True, help="Path to dbt profiles directory.") +@click.option("--cron-schedule", help="Cron schedule for the job.") +@click.option( + "--job-cluster", multiple=True, type=click.Tuple([str, str]), help="Job cluster config." +) +@click.option( + "--task-cluster", + multiple=True, + type=click.Tuple([str, str]), + help="Job cluster name or existing cluster id.", +) +@click.option("--default-task-cluster", help="Default task cluster name or existing cluster id.") +@click.option("--library", multiple=True, type=str, help="Libraries config.") +@click.option("--git-url", required=True, help="Git url.") +@click.option("--git-branch", help="Git branch.") +@click.option("--git-commit", help="Git commit.") +@click.option("--git-tag", help="Git tag.") +@click.option( + "--git-provider", + required=True, + help="Git provider.", + type=click.Choice([provider.value for provider in GitProvider]), +) +@click.option("--pretty", is_flag=True, help="Pretty print the output.") +@click.option( + "--output-file", + help="Output file path.", + type=click.Path(file_okay=True, dir_okay=False, writable=True), +) +def generate_databricks_job_command( + job_name: str, + manifest_file: str, + project_dir: str, + profiles_dir: str, + cron_schedule: str | None, + job_cluster: list[tuple[str, str]], + task_cluster: list[tuple[str, str]], + default_task_cluster: str | None, + library: list[str], + git_url: str, + git_branch: str | None, + git_commit: str | None, + git_tag: str | None, + git_provider: str, + pretty: bool, + output_file: str, +) -> None: + """Generate a Databricks job.""" + create_job_cli( + job_name, + manifest_file, + project_dir, + profiles_dir, + cron_schedule, + job_cluster, + task_cluster, + default_task_cluster, + library, + git_url, + git_branch, + git_commit, + git_tag, + git_provider, + pretty, + output_file, + ) diff --git a/data_pipelines_cli/cli_commands/generate/generate.py b/data_pipelines_cli/cli_commands/generate/generate.py index fa2f706..4cf1359 100644 --- a/data_pipelines_cli/cli_commands/generate/generate.py +++ b/data_pipelines_cli/cli_commands/generate/generate.py @@ -1,3 +1,5 @@ +import logging + import click from .model_yaml import generate_model_yamls_command @@ -13,3 +15,10 @@ def generate_group() -> None: generate_group.add_command(generate_model_yamls_command) generate_group.add_command(generate_source_sqls_command) generate_group.add_command(generate_source_yamls_command) + +try: + from .databricks_job import generate_databricks_job_command + + generate_group.add_command(generate_databricks_job_command) +except ImportError: + logging.info("Databricks CLI not installed") diff --git a/docs/index.rst b/docs/index.rst index 920c27f..3f077aa 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -1,7 +1,7 @@ ``Data Pipelines CLI``: CLI for data platform ============================================== -.. image:: https://img.shields.io/badge/python-3.7%20%7C%203.8%20%7C%203.9-blue.svg +.. image:: https://img.shields.io/badge/python-3.9%20%7C%203.10-blue.svg :target: https://github.com/getindata/data-pipelines-cli :alt: Python Version diff --git a/docs/source/data_pipelines_cli.cli_commands.generate.rst b/docs/source/data_pipelines_cli.cli_commands.generate.rst index 9cbf127..9feb682 100644 --- a/docs/source/data_pipelines_cli.cli_commands.generate.rst +++ b/docs/source/data_pipelines_cli.cli_commands.generate.rst @@ -36,6 +36,14 @@ data\_pipelines\_cli.cli\_commands.generate.source\_sql module data\_pipelines\_cli.cli\_commands.generate.source\_yaml module --------------------------------------------------------------- +.. automodule:: data_pipelines_cli.cli_commands.generate.source_yaml + :members: + :undoc-members: + :show-inheritance: + +data\_pipelines\_cli.cli\_commands.generate.databricks\_job module +--------------------------------------------------------------- + .. automodule:: data_pipelines_cli.cli_commands.generate.source_yaml :members: :undoc-members: @@ -48,4 +56,3 @@ data\_pipelines\_cli.cli\_commands.generate.utils module :members: :undoc-members: :show-inheritance: - diff --git a/setup.py b/setup.py index 6dfbcd0..0630007 100644 --- a/setup.py +++ b/setup.py @@ -31,11 +31,13 @@ "postgres": ["dbt-postgres==1.5.4"], "snowflake": ["dbt-snowflake==1.5.2"], "redshift": ["dbt-redshift==1.5.9"], + "databricks": ["dbt-databricks-factory>=0.1.1"], "dbt-all": [ "dbt-bigquery==1.5.5", "dbt-postgres==1.5.4", "dbt-snowflake==1.5.2", "dbt-redshift==1.5.9", + "dbt-databricks-factory>=0.1.1", ], # --- "docker": ["docker==6.0.1"], @@ -75,10 +77,9 @@ long_description_content_type="text/markdown", license="Apache Software License (Apache 2.0)", license_files=("LICENSE",), - python_requires=">=3.8", + python_requires=">=3.9", classifiers=[ "Development Status :: 1 - Planning", - "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", ], diff --git a/tests/cli_commands/test_generate.py b/tests/cli_commands/test_generate.py index 6451884..58685fe 100644 --- a/tests/cli_commands/test_generate.py +++ b/tests/cli_commands/test_generate.py @@ -323,3 +323,97 @@ def test_is_ephemeral_model(self): self.assertFalse(_is_ephemeral_model(example_dict, "c")) with self.assertRaises(DataPipelinesError): _is_ephemeral_model(example_dict, "d") + + +def test_generate_databricks_job() -> None: + runner = CliRunner() + result = runner.invoke( + _cli, + [ + "generate", + "databricks-job", + "--project-dir", + "/project/dir", + "--profiles-dir", + "/dbfs/profiles", + "--job-name", + "my-job", + "--library", + "my-library==1.0.0", + "--git-url", + "https://my-git.url.com", + "--git-provider", + "gitHub", + "--git-branch", + "my-branch", + "--default-task-cluster", + "my-cluster", + "--pretty", + str(GOLDENS_DIR_PATH / "target" / "manifest.json"), + ], + ) + expected = { + "name": "my-job", + "tasks": [ + { + "task_key": "model_my_new_project_my_first_dbt_model-run", + "dbt_task": { + "project_directory": "/project/dir", + "commands": [ + "dbt deps", + "dbt run --profiles-dir /dbfs/profiles --select my_first_dbt_model", + ], + }, + "libraries": [{"pypi": {"package": "my-library==1.0.0"}}], + "existing_cluster_id": "my-cluster", + }, + { + "task_key": "model_my_new_project_my_first_dbt_model-test", + "dbt_task": { + "project_directory": "/project/dir", + "commands": [ + "dbt deps", + "dbt test --profiles-dir /dbfs/profiles --select my_first_dbt_model", + ], + }, + "depends_on": [{"task_key": "model_my_new_project_my_first_dbt_model-run"}], + "libraries": [{"pypi": {"package": "my-library==1.0.0"}}], + "existing_cluster_id": "my-cluster", + }, + { + "task_key": "model_my_new_project_my_second_dbt_model-run", + "dbt_task": { + "project_directory": "/project/dir", + "commands": [ + "dbt deps", + "dbt run --profiles-dir /dbfs/profiles --select my_second_dbt_model", + ], + }, + "depends_on": [{"task_key": "model_my_new_project_my_first_dbt_model-test"}], + "libraries": [{"pypi": {"package": "my-library==1.0.0"}}], + "existing_cluster_id": "my-cluster", + }, + { + "task_key": "model_my_new_project_my_second_dbt_model-test", + "dbt_task": { + "project_directory": "/project/dir", + "commands": [ + "dbt deps", + "dbt test --profiles-dir /dbfs/profiles --select my_second_dbt_model", + ], + }, + "depends_on": [{"task_key": "model_my_new_project_my_second_dbt_model-run"}], + "libraries": [{"pypi": {"package": "my-library==1.0.0"}}], + "existing_cluster_id": "my-cluster", + }, + ], + "job_clusters": [], + "git_source": { + "git_url": "https://my-git.url.com", + "git_provider": "gitHub", + "git_branch": "my-branch", + }, + "format": "MULTI_TASK", + } + assert result.output == json.dumps(expected, indent=2) + "\n" + assert result.exit_code == 0 diff --git a/tox.ini b/tox.ini index 75ba379..f35d5c0 100644 --- a/tox.ini +++ b/tox.ini @@ -1,16 +1,16 @@ [tox] -envlist = py38, py39, py310 +envlist = py39, py310 [gh-actions] python = - 3.8: py38 3.9: py39 3.10: py310 [testenv] extras = tests + databricks commands= python -m pytest --cov data_pipelines_cli --cov-report xml --cov-report term-missing --ignore=venv