diff --git a/.dockerignore b/.dockerignore index 7f016672e82c3..c22816e799a13 100644 --- a/.dockerignore +++ b/.dockerignore @@ -30,16 +30,21 @@ !airflow !dags !dev -#!/docs - we do not need docs in the Docker! +!docs !licenses !scripts !tests !.coveragerc !.rat-excludes +!.flake8 +!pylintrc !LICENSE !MANIFEST.in !NOTICE +!CHANGELOG.txt +!.github +!run-tests # Avoid triggering context change on README change (new companies using Airflow) # So please do not uncomment this line ;) @@ -98,3 +103,7 @@ airflow/www/static/docs # Exclude auto-generated Finder files on Mac OS **/.DS_Store **/Thumbs.db + +# Exclude docs generated files +docs/_build/ +docs/_api/ diff --git a/.flake8 b/.flake8 index 5be10da45bafb..b921d33cc9190 100644 --- a/.flake8 +++ b/.flake8 @@ -1,5 +1,5 @@ [flake8] max-line-length = 110 ignore = E731,W504 -exclude = .svn,CVS,.bzr,.hg,.git,__pycache__,.tox,.eggs,*.egg,*/_vendor/*,node_modules +exclude = .svn,CVS,.bzr,.hg,.git,__pycache__,.eggs,*.egg,*/_vendor/*,node_modules format = ${cyan}%(path)s${reset}:${yellow_bold}%(row)d${reset}:${green_bold}%(col)d${reset}: ${red_bold}%(code)s${reset} %(text)s diff --git a/.gitignore b/.gitignore index 0ccc81842735e..4f80809da06c6 100644 --- a/.gitignore +++ b/.gitignore @@ -158,3 +158,7 @@ airflow-*.pid .mypy_cache/ .dmypy.json dmypy.json + +# Needed for CI Dockerfile build system +.build +/tmp diff --git a/.travis.yml b/.travis.yml index 858593e8f45fb..529a8364dc4e4 100644 --- a/.travis.yml +++ b/.travis.yml @@ -18,77 +18,61 @@ # dist: xenial language: python -python: - - "3.6" env: global: - - TRAVIS_CACHE=$HOME/.travis_cache/ - - CASS_DRIVER_BUILD_CONCURRENCY=8 - - CASS_DRIVER_NO_CYTHON=1 + - BUILD_ID=${TRAVIS_BUILD_ID} + - AIRFLOW_CONTAINER_BRANCH_NAME=${TRAVIS_BRANCH} matrix: - - TOX_ENV=py35-backend_mysql-env_docker PYTHON_VERSION=3 - - TOX_ENV=py35-backend_sqlite-env_docker PYTHON_VERSION=3 - - TOX_ENV=py35-backend_postgres-env_docker PYTHON_VERSION=3 - - TOX_ENV=py35-backend_postgres-env_kubernetes KUBERNETES_VERSION=v1.13.0 PYTHON_VERSION=3 - + - BACKEND=mysql ENV=docker + - BACKEND=postgres ENV=docker + - BACKEND=sqlite ENV=docker + - BACKEND=postgres ENV=kubernetes KUBERNETES_VERSION=v1.13.0 +python: + - "3.6" + - "3.5" +matrix: + exclude: + - env: BACKEND=postgres ENV=docker + python: "3.5" + - env: BACKEND=mysql ENV=docker + python: "3.5" + - env: BACKEND=postgres ENV=kubernetes KUBERNETES_VERSION=v1.13.0 + python: "3.5" + - env: BACKEND=sqlite ENV=docker + python: "3.6" stages: - pre-test - test - jobs: include: - name: Flake8 stage: pre-test - install: pip install flake8 - script: flake8 + script: ./scripts/ci/ci_flake8.sh + install: skip - name: mypy stage: pre-test - install: pip install mypy - script: mypy airflow tests + script: ./scripts/ci/ci_mypy.sh + install: skip - name: Check license header stage: pre-test + script: ./scripts/ci/ci_check_license.sh install: skip - script: scripts/ci/6-check-license.sh - name: Lint Dockerfile stage: pre-test + script: ./scripts/ci/ci_lint_dockerfile.sh install: skip - script: scripts/ci/ci_lint_dockerfile.sh - name: Check docs - stage: pre-test - install: pip install -e .[doc] - script: docs/build.sh + stage: test + script: ./scripts/ci/ci_docs.sh + install: skip - name: Pylint stage: pre-test - install: pip install pylint~=2.3.1 # Ensure the same version as in setup.py - script: scripts/ci/ci_pylint.sh - cache: false -cache: - directories: - - $HOME/.wheelhouse/ - - $HOME/.cache/pip - - $HOME/.travis_cache/ + script: ./scripts/ci/ci_pylint.sh + install: skip +services: + - docker before_install: - # Required for K8s v1.10.x. See - # https://github.com/kubernetes/kubernetes/issues/61058#issuecomment-372764783 - - if [ ! -z "$KUBERNETES_VERSION" ]; then sudo mount --make-shared / && sudo service docker restart; fi + - ./scripts/ci/ci_before_install.sh install: - - pip install --upgrade pip - - docker-compose -f scripts/ci/docker-compose.yml pull --quiet -script: - - if [ -z "$KUBERNETES_VERSION" ]; then - docker-compose --log-level ERROR -f scripts/ci/docker-compose.yml run airflow-testing /app/scripts/ci/run-ci.sh; - fi - - if [ ! -z "$KUBERNETES_VERSION" ]; then - ./scripts/ci/kubernetes/minikube/stop_minikube.sh && - ./scripts/ci/kubernetes/setup_kubernetes.sh && - ./scripts/ci/kubernetes/kube/deploy.sh -d persistent_mode && - MINIKUBE_IP=$(minikube ip) docker-compose --log-level ERROR -f scripts/ci/docker-compose.yml -f scripts/ci/docker-compose-kubernetes.yml run airflow-testing /app/scripts/ci/run-ci.sh; - fi - - if [ ! -z "$KUBERNETES_VERSION" ]; then - ./scripts/ci/kubernetes/minikube/stop_minikube.sh && - ./scripts/ci/kubernetes/setup_kubernetes.sh && - ./scripts/ci/kubernetes/kube/deploy.sh -d git_mode && - MINIKUBE_IP=$(minikube ip) docker-compose --log-level ERROR -f scripts/ci/docker-compose.yml -f scripts/ci/docker-compose-kubernetes.yml run airflow-testing /app/scripts/ci/run-ci.sh; - fi -before_cache: - - sudo chown -R travis:travis $HOME/.cache/pip $HOME/.wheelhouse/ + - ./hooks/build +script: "./scripts/ci/ci_run_airflow_testing.sh" diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index f3844c09865f9..34c68bcc43b50 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -22,53 +22,91 @@ under the License. Contributions are welcome and are greatly appreciated! Every little bit helps, and credit will always be given. -# Table of Contents - * [TOC](#table-of-contents) - * [Types of Contributions](#types-of-contributions) - - [Report Bugs](#report-bugs) - - [Fix Bugs](#fix-bugs) - - [Implement Features](#implement-features) - - [Improve Documentation](#improve-documentation) - - [Submit Feedback](#submit-feedback) - * [Documentation](#documentation) - * [Development and Testing](#development-and-testing) - - [Setting up a development environment](#setting-up-a-development-environment) - - [Running unit tests](#running-unit-tests) - * [Pull requests guidelines](#pull-request-guidelines) - * [Changing the Metadata Database](#changing-the-metadata-database) - -## Types of Contributions - -### Report Bugs + + +**Table of Contents** *generated with [DocToc](https://github.com/thlorenz/doctoc)* + +- [Types of Contributions](#types-of-contributions) + - [Report Bugs](#report-bugs) + - [Fix Bugs](#fix-bugs) + - [Implement Features](#implement-features) + - [Improve Documentation](#improve-documentation) + - [Submit Feedback](#submit-feedback) +- [Documentation](#documentation) +- [Local virtualenv development environment](#local-virtualenv-development-environment) + - [Installation](#installation) + - [Running individual tests](#running-individual-tests) + - [Running tests directly from the IDE](#running-tests-directly-from-the-ide) +- [Integration test development environment](#integration-test-development-environment) + - [Prerequisites](#prerequisites) + - [Using the Docker Compose environment](#using-the-docker-compose-environment) + - [Entering bash shell in Docker Compose environment](#entering-bash-shell-in-docker-compose-environment) + - [Running individual tests within the container](#running-individual-tests-within-the-container) + - [Running static code analysis](#running-static-code-analysis) + - [Running static code analysis from the host](#running-static-code-analysis-from-the-host) + - [Running static code analysis in the docker compose environment](#running-static-code-analysis-in-the-docker-compose-environment) + - [Running static code analysis on selected files/modules](#running-static-code-analysis-on-selected-filesmodules) + - [Automation of image building](#automation-of-image-building) + - [Local Docker Compose scripts](#local-docker-compose-scripts) + - [Running the whole suite of tests](#running-the-whole-suite-of-tests) + - [Stopping the environment](#stopping-the-environment) + - [Fixing file/directory ownership](#fixing-filedirectory-ownership) + - [Building the images](#building-the-images) + - [Force pulling the images](#force-pulling-the-images) + - [Cleaning up cached Docker images/containers](#cleaning-up-cached-docker-imagescontainers) + - [Troubleshooting](#troubleshooting) +- [Pylint checks](#pylint-checks) +- [Git hooks](#git-hooks) +- [Pull Request Guidelines](#pull-request-guidelines) +- [Testing on Travis CI](#testing-on-travis-ci) + - [Travis CI GitHub App (new version)](#travis-ci-github-app-new-version) + - [Travis CI GitHub Services (legacy version)](#travis-ci-github-services-legacy-version) + - [Prefer travis-ci.com over travis-ci.org](#prefer-travis-cicom-over-travis-ciorg) +- [Changing the Metadata Database](#changing-the-metadata-database) +- [Setting up the node / npm javascript environment](#setting-up-the-node--npm-javascript-environment) + - [Node/npm versions](#nodenpm-versions) + - [Using npm to generate bundled files](#using-npm-to-generate-bundled-files) + - [npm](#npm) + - [npm packages](#npm-packages) + - [Upgrading npm packages](#upgrading-npm-packages) + - [Javascript Style Guide](#javascript-style-guide) + + + +# Types of Contributions + +## Report Bugs Report bugs through [Apache Jira](https://issues.apache.org/jira/browse/AIRFLOW) Please report relevant information and preferably code that exhibits the problem. -### Fix Bugs +## Fix Bugs Look through the Jira issues for bugs. Anything is open to whoever wants to implement it. -### Implement Features +## Implement Features -Look through the [Apache Jira](https://issues.apache.org/jira/browse/AIRFLOW) for features. Any unassigned "Improvement" issue is open to whoever wants to implement it. +Look through the [Apache Jira](https://issues.apache.org/jira/browse/AIRFLOW) for features. +Any unassigned "Improvement" issue is open to whoever wants to implement it. We've created the operators, hooks, macros and executors we needed, but we made sure that this part of Airflow is extensible. New operators, hooks, macros and executors are very welcomed! -### Improve Documentation +## Improve Documentation Airflow could always use better documentation, whether as part of the official Airflow docs, in docstrings, `docs/*.rst` or even on the web as blog posts or articles. -### Submit Feedback +## Submit Feedback -The best way to send feedback is to open an issue on [Apache Jira](https://issues.apache.org/jira/browse/AIRFLOW) +The best way to send feedback is to open an issue on +[Apache Jira](https://issues.apache.org/jira/browse/AIRFLOW) If you are proposing a feature: @@ -76,7 +114,7 @@ If you are proposing a feature: - Keep the scope as narrow as possible, to make it easier to implement. - Remember that this is a volunteer-driven project, and that contributions are welcome :) -## Documentation +# Documentation The latest API documentation is usually available [here](https://airflow.apache.org/). To generate a local version, @@ -95,123 +133,417 @@ cd docs ./start_doc_server.sh ``` -Only a subset of the API reference documentation builds. Install additional -extras to build the full API reference. +# Local virtualenv development environment -## Development and Testing +When you develop Airflow you can create local virtualenv with all requirements required by Airflow. -### Setting up a development environment +Advantage of local installation is that everything works locally, you do not have to enter Docker/container +environment and you can easily debug the code locally. You can also have access to python virtualenv that +contains all the necessary requirements and use it in your local IDE - this aids autocompletion, and +running tests directly from within the IDE. -There are three ways to setup an Apache Airflow development environment. +The disadvantage is that you have to maintain your dependencies and local environment consistent with +other development environments that you have on your local machine. -1. Using tools and libraries installed directly on your system +Another disadvantage is that you you cannot run tests that require +external components - mysql, postgres database, hadoop, mongo, cassandra, redis etc.. +The tests in Airflow are a mixture of unit and integration tests and some of them +require those components to be setup. Only real unit tests can be run bu default in local environment. - Install Python (2.7.x or 3.5.x), MySQL, and libxml by using system-level package - managers like yum, apt-get for Linux, or Homebrew for Mac OS at first. Refer to the [base CI Dockerfile](https://github.com/apache/airflow-ci/blob/master/Dockerfile) for - a comprehensive list of required packages. +If you want to run integration tests, you need to configure and install the dependencies on your own. - Then install python development requirements. It is usually best to work in a virtualenv: +It's also very difficult to make sure that your local environment is consistent with other's environments. +This can often lead to "works for me" syndrome. It's better to use the Docker Compose integration test +environment in case you want reproducible environment consistent with other people. - ```bash - cd $AIRFLOW_HOME - virtualenv env - source env/bin/activate - pip install -e '.[devel]' - ``` +## Installation -2. Using a Docker container +Install Python (3.5 or 3.6), MySQL, and libxml by using system-level package +managers like yum, apt-get for Linux, or Homebrew for Mac OS at first. +Refer to the [Dockerfile](Dockerfile) for a comprehensive list of required packages. - Go to your Airflow directory and start a new docker container. You can choose between Python 2 or 3, whatever you prefer. +In order to use your IDE you need you can use the virtual environment. Ideally +you should setup virtualenv for all python versions that Airflow supports (2.7, 3.5, 3.6). +An easy way to create the virtualenv is to use +[virtualenvwrapper](https://virtualenvwrapper.readthedocs.io/en/latest/) - it allows +you to easily switch between virtualenvs using `workon` command and mange +your virtual environments more easily. Typically creating the environment can be done by: - ``` - # Start docker in your Airflow directory - docker run -t -i -v `pwd`:/airflow/ -w /airflow/ python:3 bash +``` +mkvirtualenv --python=python +``` + +Then you need to install python PIP requirements. Typically it can be done with: +`pip install -e ".[devel]"`. Then you need to run `airflow initdb` to create sqlite database. + +Once initialization is done, you should select the virtualenv you initialized as the +project's default virtualenv in your IDE and run tests efficiently. + +After setting it up - you can use the usual "Run Test" option of the IDE and have +the autocomplete and documentation support from IDE as well as you can +debug and view the sources of Airflow - which is very helpful during +development. + +## Running individual tests + +Once you activate virtualenv (or enter docker container) as described below you should be able to run +`run-tests` at will (it is in the path in Docker environment but you need to prepend it with `./` in local +virtualenv (`./run-tests`). + +Note that this script has several flags that can be useful for your testing. + +```text +Usage: run-tests [FLAGS] [TESTS_TO_RUN] -- + +Runs tests specified (or all tests if no tests are specified) + +Flags: + +-h, --help + Shows this help message. + +-i, --with-db-init + Forces database initialization before tests + +-s, --nocapture + Don't capture stdout when running the tests. This is useful if you are + debugging with ipdb and want to drop into console with it + by adding this line to source code: + + import ipdb; ipdb.set_trace() + +-v, --verbose + Verbose output showing coloured output of tests being run and summary + of the tests - in a manner similar to the tests run in the CI environment. +``` + +You can pass extra parameters to nose, by adding nose arguments after `--` + +For example, in order to just execute the "core" unit tests and add ipdb set_trace method, you can +run the following command: + +```bash +./run-tests tests.core:CoreTest --nocapture --verbose +``` + +or a single test method without colors or debug logs: + +```bash +./run-tests tests.core:CoreTest.test_check_operators +``` +Note that `./run_tests` script runs tests but the first time it runs, ut performs database initialisation. +If you run further tests without leaving the environment, the database will not be initialized, but you +can always force database initialization with `--with-db-init` (`-i`) switch. The scripts will +inform you what you can do when they are run. - # To install all of airflows dependencies to run all tests (this is a lot) - pip install -e . - - # To run only certain tests install the devel requirements and whatever is required - # for your test. See setup.py for the possible requirements. For example: - pip install -e '.[gcp,devel]' +## Running tests directly from the IDE - # Init the database - airflow initdb +Once you configure your tests to use the virtualenv you created. running tests +from IDE is as simple as: - nosetests -v tests/hooks/test_druid_hook.py +![Run unittests](images/run_unittests.png) - test_get_first_record (tests.hooks.test_druid_hook.TestDruidDbApiHook) ... ok - test_get_records (tests.hooks.test_druid_hook.TestDruidDbApiHook) ... ok - test_get_uri (tests.hooks.test_druid_hook.TestDruidDbApiHook) ... ok - test_get_conn_url (tests.hooks.test_druid_hook.TestDruidHook) ... ok - test_submit_gone_wrong (tests.hooks.test_druid_hook.TestDruidHook) ... ok - test_submit_ok (tests.hooks.test_druid_hook.TestDruidHook) ... ok - test_submit_timeout (tests.hooks.test_druid_hook.TestDruidHook) ... ok - test_submit_unknown_response (tests.hooks.test_druid_hook.TestDruidHook) ... ok +Note that while most of the tests are typical "unit" tests that do not +require external components, there are a number of tests that are more of +"integration" ot even "system" tests (depending on the convention you use). +Those tests interact with external components. For those tests +you need to run complete Docker Compose - base environment below. - ---------------------------------------------------------------------- - Ran 8 tests in 3.036s +# Integration test development environment - OK - ``` +This is environment that is used during CI builds on Travis CI. We have scripts to reproduce the +Travis environment and you can enter the environment and run it locally. - The Airflow code is mounted inside of the Docker container, so if you change something using your favorite IDE, you can directly test it in the container. +The scripts used by Travis CI run also image builds which make the images contain all the sources. You can +see which scripts are used in [.travis.yml](.travis.yml) file. -3. Using [Docker Compose](https://docs.docker.com/compose/) and Airflow's CI scripts +## Prerequisites - Start a docker container through Compose for development to avoid installing the packages directly on your system. The following will give you a shell inside a container, run all required service containers (MySQL, PostgresSQL, krb5 and so on) and install all the dependencies: +**Docker** - ```bash - docker-compose -f scripts/ci/docker-compose.yml run airflow-testing bash - # From the container - export TOX_ENV=py35-backend_mysql-env_docker - /app/scripts/ci/run-ci.sh - ``` +You need to have [Docker CE](https://docs.docker.com/get-started/) installed. - If you wish to run individual tests inside of Docker environment you can do as follows: +IMPORTANT!!! : Mac OS Docker default Disk size settings - ```bash - # From the container (with your desired environment) with druid hook - export TOX_ENV=py35-backend_mysql-env_docker - /app/scripts/ci/run-ci.sh -- tests/hooks/test_druid_hook.py - ``` +When you develop on Mac OS you usually have not enough disk space for Docker if you start using it seriously. +You should increase disk space available before starting to work with the environment. Usually you have weird +stops of docker containers when you run out of Disk space. It might not be obvious that space is an issue. +If you get into weird behaviour try [Cleaning Up Docker](#cleaning-up-cached-docker-imagescontainers) +See [Docker for Mac - Space](https://docs.docker.com/docker-for-mac/space/) for details of increasing +disk space available for Docker on Mac. -### Running unit tests +At least 128 GB of Disk space is recommended. You can also get by with smaller space but you should more often +clean the docker disk space periodically. -To run tests locally, once your unit test environment is setup (directly on your -system or through our Docker setup) you should be able to simply run -``./run_unit_tests.sh`` at will. +**Getopt and coreutils** -For example, in order to just execute the "core" unit tests, run the following: +If you are on MacOS: +* Run `brew install gnu-getopt coreutils` (if you use brew, or use equivalent command for ports) +* Then (with brew) link the gnu-getopt to become default as suggested by brew by typing. +```bash +echo 'export PATH=\"/usr/local/opt/gnu-getopt/bin:\$PATH\"' >> ~/.bash_profile" +. ~/.bash_profile" ``` -./run_unit_tests.sh tests.core:CoreTest -s --logging-level=DEBUG +* Login and logout afterwards + +If you are on Linux: + +* Run `apt install util-linux coreutils` or equivalent if your system is not Debian-based. + +## Using the Docker Compose environment + +### Entering bash shell in Docker Compose environment + +Default environment settings (python 3.6, sqlite backend, docker environment) +```bash + ./scripts/ci/local_ci_enter_environment.sh ``` -or a single test method: +Overriding default environment settings: +```bash +PYTHON_VERSION=3.5 BACKEND=postgres ENV=docker ./scripts/ci/local_ci_enter_environment.sh ``` -./run_unit_tests.sh tests.core:CoreTest.test_check_operators -s --logging-level=DEBUG + +### Running individual tests within the container + +Once you are inside the environment you can run individual tests as described in +[Running individual tests](#running-individual-tests). + +### Running static code analysis + +We have a number of static code checks that are run in Travis CI but you can run them locally as well. +All the scripts are available in [scripts/ci](scripts/ci) folder. + +All these tests run in python3.6 environment. Note that the first time you run the checks it might take some +time to rebuild the docker images required to run the tests, but all subsequent runs will be much faster - +the build phase will just check if your code has changed and rebuild as needed. + +The checks below are run in a docker environment, which means that if you run them locally, +they should give the same results as the tests run in TravisCI without special environment preparation. + +#### Running static code analysis from the host + +You can trigger the static checks from the host environment, without entering Docker container. You +do that by running appropriate scripts (The same is done in TravisCI) + +* [ci_docs.sh](scripts/ci/ci_docs.sh) - checks that documentation can be built without warnings. +* [ci_flake8.sh](scripts/ci/ci_flake8.sh) - runs flake8 source code style guide enforcement tool +* [ci_mypy.sh](scripts/ci/ci_mypy.sh) - runs mypy type annotation consistency check +* [ci_pylint.sh](scripts/ci/ci_pylint.sh) - runs pylint static code checker +* [ci_lint_dockerfile.sh](scripts/ci/ci_lint_dockerfile.sh) - runs lint checker for the Dockerfile +* [ci_check_license.sh](scripts/ci/ci_check_license.sh) - checks if all licences are present in the sources + +Those scripts ar optimised for time of rebuilds of docker image. The image will be automatically +rebuilt when needed (for example when dependencies change). + +You can also force rebuilding of the image by deleting [.build](./build) +directory which keeps cached information about the images built. + +Documentation after it is built, is available in [docs/_build/html](docs/_build/html) folder. +This folder is mounted from the host so you can access those files in your host as well. + +#### Running static code analysis in the docker compose environment + +If you are already in the [Docker Compose Environment](#entering-bash-shell-in-docker-compose-environment) +you can also run the same static checks from within container: + +* Mypy: `./scripts/ci/in_container/run_mypy.sh airflow tests` +* Pylint: `./scripts/ci/in_container/run_pylint.sh` +* Flake8: `./scripts/ci/in_container/run_flake8.sh` +* Licence check: `./scripts/ci/in_container/run_check_licence.sh` +* Documentation: `./scripts/ci/in_container/run_docs_build.sh` + +#### Running static code analysis on selected files/modules + +In all static check scripts - both in container and in the host you can also pass module/file path as +parameters of the scripts to only check selected modules or files. For example: + +In container: + +`./scripts/ci/in_container/run_pylint.sh ./airflow/example_dags/` + +or + +`./scripts/ci/in_container/run_pylint.sh ./airflow/example_dags/test_utils.py` + +In host: + +`./scripts/ci/ci_pylint.sh ./airflow/example_dags/` + +or + +`./scripts/ci/ci_pylint.sh ./airflow/example_dags/test_utils.py` + +And similarly for other scripts. + +## Automation of image building + +When you run tests or enter environment (see below) the first time you do it, the local image will be +pulled and build for you automatically. + +Note that building image first time pulls the pre-built version of image from Dockerhub based on master +sources and rebuilds the layers that need to be rebuilt - because they changed in local sources. +This might take a bit of time when you run it for the first time and when you add new dependencies - +but rebuilding the image should be an operation done quite rarely (mostly when you start seeing some +unknown problems and want to refresh the environment). + +See [Troubleshooting section](#troubleshooting) for steps you can make to clean the environment. + +Once you performed the first build, the images are rebuilt locally rather than pulled - unless you +force pull the images. But you can force it using the scripts described below. + +## Local Docker Compose scripts + +For your convenience, there are scripts that can be used in local development +- where local host sources are mounted to within the docker container. +Those "local" scripts starts with "local_" prefix in [scripts/ci](scripts/ci) folder and +they run Docker-Compose environment with relevant backends (mysql/postgres) +and additional services started. + +### Running the whole suite of tests + +Running all tests with default settings (python 3.6, sqlite backend, docker environment): + +```bash +./scripts/ci/local_ci_run_airflow_testing.sh ``` -or another example: + +Selecting python version, backend, docker environment: + +```bash +PYTHON_VERSION=3.5 BACKEND=postgres ENV=docker ./scripts/ci/local_ci_run_airflow_testing.sh ``` -./run_unit_tests.sh tests.contrib.operators.test_dataproc_operator:DataprocClusterCreateOperatorTest.test_create_cluster_deletes_error_cluster -s --logging-level=DEBUG + +Running kubernetes tests: +```bash +KUBERNETES_VERSION==v1.13.0 BACKEND=postgres ENV=kubernetes ./scripts/ci/local_ci_run_airflow_testing.sh ``` -To run the whole test suite with Docker Compose, do: +* PYTHON_VERSION might be one of 3.5/3.6 +* BACKEND might be one of postgres/sqlite/mysql +* ENV might be one of docker/kubernetes +* KUBERNETES_VERSION - required for Kubernetes tessts - currently KUBERNETES_VERSION=v1.13.0. +The kubernetes env might not work locally as easily as other tests because it requires your host +to be setup properly (specifically it installs minikube cluster locally on your host and depending +on your machine setting it might or might not work out of the box. +We are working on making the kubernetes tests more easily reproducible locally in the future. + +### Stopping the environment + +Docker-compose environment starts a number of docker containers and keep them running. +You can tear them down by running +[/scripts/ci/local_ci_stop_environment.sh](scripts/ci/local_ci_stop_environment.sh) + + +### Fixing file/directory ownership + +On Linux there is a problem with propagating ownership of created files (known Docker problem). Basically +files and directories created in container are not owned by the host user (but by the root user in our case). +This might prevent you from switching branches for example if files owned by root user are created within +your sources. In case you are on Linux host and haa some files in your sources created by the root user, +you can fix the ownership of those files by running +[scripts/ci/local_ci_fix_ownership.sh](scripts/ci/local_ci_fix_ownership.sh) script. + +### Building the images + +You can manually trigger building of the local CI image using +[scripts/ci/local_ci_build.sh](scripts/ci/local_ci_build.sh). + +### Force pulling the images + +You can later force-pull the images before building it locally so that you are sure that you download +latest images from DockerHub repository before building. This can be done with +[scripts/ci/local_ci_pull_and_build.sh](scripts/ci/local_ci_pull_and_build.sh) script. + +## Cleaning up cached Docker images/containers + +Note that you might need to cleanup your Docker environment occasionally. The images are quite big +(1.5GB for both images needed for static code analysis and CI tests). And if you often rebuild/update +images you might end up with some unused image data. + +Cleanup can be performed with `docker system prune` command. In case you have huge problems with disk space +and want to clean-up all image data you can run `docker system prune --all`. You might need to +[Stop the environment](#stopping-the-environment) in order to clean everything including running containers. + +If you are on Mac OS and you end up with not enough disk space for Docker you should increase disk space +available for Docker. See [Docker for Mac - Space](https://docs.docker.com/docker-for-mac/space/) for details. + +## Troubleshooting + +In case you have problems with the Docker Compose environment - try the following: + +1. [Stop the environment](#stopping-the-environment) +2. Delete [.build](.build) +3. [Force pull the images](#force-pulling-the-images) +4. Re-run the scripts +5. [Clean Up Docker engine](#cleaning-up-cached-docker-imagescontainers) +6. [Fix file/directory ownership](#fixing-filedirectory-ownership) +7. Run `docker system prune --all` to cleanup all images/containers +8. Restart your docker and try again +9. Restart your machine and try again +10. Run `docker system prune --all` after restart (if you had previously errors when running it) +1. Remove and re-install Docker CE, then [force pull the images](#force-pulling-the-images) + +In case the problems are not solved, you can set VERBOSE variable to "true" (`export VERBOSE="true"`) +and rerun failing command, and copy&paste the output from your terminal, describe the problem and +post it in [Airflow Slack](https://apache-airflow-slack.herokuapp.com/) #troubleshooting channel. + +# Pylint checks + +Note that for pylint we are in the process of fixing pylint code checks for the whole Airflow code. This is +a huge task so we implemented an incremental approach for the process. Currently most of the code is +excluded from pylint checks via [pylint_todo.txt](scripts/ci/pylint_todo.txt). We have an open JIRA +issue [AIRFLOW-4364](https://issues.apache.org/jira/browse/AIRFLOW-4364) which has a number of +sub-tasks for each of the modules that should be made compatible. Fixing pylint problems is one of +straightforward and easy tasks to do (but time-consuming) so if you are a first-time contributor to +Airflow you can choose one of the sub-tasks as your first issue to fix. The process to fix the issue looks +as follows: + +1) Remove module/modules from the [pylint_todo.txt](scripts/ci/pylint_todo.txt) +2) Run [ci_pylint.sh](scripts/ci/ci_pylint.sh) +3) Fix all the issues reported by pylint +4) Re-run [ci_pylint.sh](scripts/ci/ci_pylint.sh) +5) If you see "success" - submit PR following [Pull Request guidelines](#pull-request-guidelines) + +There are following guidelines when fixing pylint errors: + +* Ideally fix the errors rather than disable pylint checks - often you can easily refactor the code + (IntelliJ/PyCharm might be helpful when extracting methods in complex code or moving methods around) +* When disabling particular problem - make sure to disable only that error-via the symbolic name + of the error as reported by pylint +* If there is a single line where to disable particular error you can add comment following the line + that causes the problem. For example: +```python +def MakeSummary(pcoll, metric_fn, metric_keys): # pylint: disable=invalid-name ``` -# Install Docker Compose first, then this will run the tests -docker-compose -f scripts/ci/docker-compose.yml run airflow-testing /app/scripts/ci/run-ci.sh +* When there are multiple lines/block of code to disable an error you can surround the block with + comment only pylint:disable/pylint:enable lines. For example: + +```python +# pylint: disable=too-few-public-methods +class LoginForm(Form): + """Form for the user""" + username = StringField('Username', [InputRequired()]) + password = PasswordField('Password', [InputRequired()]) +# pylint: enable=too-few-public-methods ``` -Alternatively, you can also set up [Travis CI](https://travis-ci.org/) on your repo to automate this. -It is free for open source projects. +# Git hooks -Another great way of automating linting and testing is to use [Git Hooks](https://git-scm.com/book/uz/v2/Customizing-Git-Git-Hooks). For example you could create a `pre-commit` file based on the Travis CI Pipeline so that before each commit a local pipeline will be triggered and if this pipeline fails (returns an exit code other than `0`) the commit does not come through. -This "in theory" has the advantage that you can not commit any code that fails that again reduces the errors in the Travis CI Pipelines. +Another great way of automating linting and testing is to use + [Git Hooks](https://git-scm.com/book/uz/v2/Customizing-Git-Git-Hooks). For example you could create a +`pre-commit` file based on the Travis CI Pipeline so that before each commit a local pipeline will be +triggered and if this pipeline fails (returns an exit code other than `0`) the commit does not come through. +This "in theory" has the advantage that you can not commit any code that fails that again reduces the +errors in the Travis CI Pipelines. -Since there are a lot of tests the script would last very long so you probably only should test your new feature locally. +Since there are a lot of tests the script would last very long so you probably only should test your + new +feature locally. The following example of a `pre-commit` file allows you.. - to lint your code via flake8 @@ -262,23 +594,37 @@ See also the list of test classes and methods in `tests/core.py`. Feel free to customize based on the extras available in [setup.py](./setup.py) -## Pull Request Guidelines +# Pull Request Guidelines Before you submit a pull request from your forked repo, check that it meets these guidelines: -1. The pull request should include tests, either as doctests, unit tests, or both. The airflow repo uses [Travis CI](https://travis-ci.org/apache/airflow) to run the tests and [codecov](https://codecov.io/gh/apache/airflow) to track coverage. You can set up both for free on your fork (see the "Testing on Travis CI" section below). It will help you making sure you do not break the build with your PR and that you help increase coverage. -1. Please [rebase your fork](http://stackoverflow.com/a/7244456/1110993), squash commits, and resolve all conflicts. -1. Every pull request should have an associated [JIRA](https://issues.apache.org/jira/browse/AIRFLOW/?selectedTab=com.atlassian.jira.jira-projects-plugin:summary-panel). The JIRA link should also be contained in the PR description. -1. Preface your commit's subject & PR's title with **[AIRFLOW-XXX]** where *XXX* is the JIRA number. We compose release notes (i.e. for Airflow releases) from all commit titles in a release. By placing the JIRA number in the commit title and hence in the release notes, Airflow users can look into JIRA and GitHub PRs for more details about a particular change. +1. The pull request should include tests, either as doctests, unit tests, or both. The airflow repo uses +[Travis CI](https://travis-ci.org/apache/airflow) to run the tests and +[codecov](https://codecov.io/gh/apache/airflow) to track coverage. +You can set up both for free on your fork (see the "Testing on Travis CI" section below). +It will help you making sure you do not break the build with your PR and that you help increase coverage. +1. Please [rebase your fork](http://stackoverflow.com/a/7244456/1110993), squash commits, and +resolve all conflicts. +1. Every pull request should have an associated +[JIRA](https://issues.apache.org/jira/browse/AIRFLOW/?selectedTab=com.atlassian.jira.jira-projects-plugin:summary-panel). +The JIRA link should also be contained in the PR description. +1. Preface your commit's subject & PR's title with **[AIRFLOW-XXX]** where *XXX* is the JIRA number. +We compose release notes (i.e. for Airflow releases) from all commit titles in a release. +By placing the JIRA number in the commit title and hence in the release notes, Airflow users can look into +JIRA and GitHub PRs for more details about a particular change. 1. Add an [Apache License](http://www.apache.org/legal/src-headers.html) header to all new files -1. If the pull request adds functionality, the docs should be updated as part of the same PR. Doc string are often sufficient. Make sure to follow the Sphinx compatible standards. -1. The pull request should work for Python 2.7 and 3.5. If you need help writing code that works in both Python 2 and 3, see the documentation at the [Python-Future project](http://python-future.org) (the future package is an Airflow requirement and should be used where possible). -1. As Airflow grows as a project, we try to enforce a more consistent style and try to follow the Python community guidelines. We currently enforce most [PEP8](https://www.python.org/dev/peps/pep-0008/) and a few other linting rules. It is usually a good idea to lint locally as well using [flake8](https://flake8.readthedocs.org/en/latest/) using `flake8 airflow tests`. `git diff upstream/master -u -- "*.py" | flake8 --diff` will return any changed files in your branch that require linting. -1. We also apply [Pylint](https://www.pylint.org) for linting (static code analysis). Run locally with `./scripts/ci/ci_pylint.sh`. -1. Please read this excellent [article](http://chris.beams.io/posts/git-commit/) on commit messages and adhere to them. It makes the lives of those who come after you a lot easier. - -### Testing on Travis CI +1. If the pull request adds functionality, the docs should be updated as part of the same PR. Doc string +are often sufficient. Make sure to follow the Sphinx compatible standards. +1. The pull request should work for Python 3.5 and 3.6. +1. As Airflow grows as a project, we try to enforce a more consistent style and try to follow the Python +community guidelines. We currently enforce most [PEP8](https://www.python.org/dev/peps/pep-0008/) and a +few other linting rules - described in [Running linting and tests](#running-linting-and-tests). It's a good +idea to run tests locally before opening PR. +1. Please read this excellent [article](http://chris.beams.io/posts/git-commit/) on commit messages and +adhere to them. It makes the lives of those who come after you a lot easier. + +# Testing on Travis CI We currently rely heavily on Travis CI for running the full Airflow test suite as running all of the tests locally requires significant setup. You can setup @@ -291,7 +637,7 @@ setup as separate components on GitHub: 1. **Travis CI GitHub App** (new version) 1. **Travis CI GitHub Services** (legacy version) -#### Travis CI GitHub App (new version) +## Travis CI GitHub App (new version) 1. Once installed, you can configure the Travis CI GitHub App at https://github.com/settings/installations -> Configure Travis CI. @@ -303,7 +649,7 @@ repositories" for convenience, or "Only select repositories" and choose 1. You can access Travis CI for your fork at `https://travis-ci.com//airflow`. -#### Travis CI GitHub Services (legacy version) +## Travis CI GitHub Services (legacy version) The Travis CI GitHub Services versions uses an Authorized OAuth App. Note that `apache/airflow` is currently still using the legacy version. @@ -320,7 +666,7 @@ forked `/airflow` repo even though it is public. 1. You can access Travis CI for your fork at `https://travis-ci.org//airflow`. -#### Prefer travis-ci.com over travis-ci.org +## Prefer travis-ci.com over travis-ci.org The travis-ci.org site for open source projects is now legacy and new projects should instead be created on travis-ci.com for both private repos and open @@ -343,8 +689,7 @@ More information: [travis-ci-open-source]: https://docs.travis-ci.com/user/open-source-on-travis-ci-com/ [travis-ci-org-vs-com]: https://devops.stackexchange.com/a/4305/8830 - -### Changing the Metadata Database +# Changing the Metadata Database When developing features the need may arise to persist information to the the metadata database. Airflow has [Alembic](https://bitbucket.org/zzzeek/alembic) @@ -362,23 +707,24 @@ $ alembic revision -m "add new field to db" ~/airflow/airflow/migrations/versions/12341123_add_new_field_to_db.py ``` -## Setting up the node / npm javascript environment +# Setting up the node / npm javascript environment `airflow/www/` contains all npm-managed, front end assets. Flask-Appbuilder itself comes bundled with jQuery and bootstrap. While these may be phased out over time, these packages are currently not managed with npm. -### Node/npm versions +## Node/npm versions -Make sure you are using recent versions of node and npm. No problems have been found with node>=8.11.3 and npm>=6.1.3 +Make sure you are using recent versions of node and npm. No problems have been found with node>=8.11.3 and +npm>=6.1.3 -### Using npm to generate bundled files +## Using npm to generate bundled files -#### npm +### npm -First, npm must be available in your environment. If it is not you can run the following commands -(taken from [this source](https://gist.github.com/DanHerbert/9520689)) +First, npm must be available in your environment. If you are on Mac and it is not installed, +you can run the following commands (taken from [this source](https://gist.github.com/DanHerbert/9520689)): ``` brew install node --without-npm @@ -393,7 +739,10 @@ Add something like this to your `.bashrc` file, then `source ~/.bashrc` to refle export PATH="$HOME/.npm-packages/bin:$PATH" ``` -#### npm packages +You can also follow +[the general npm installation instructions](https://docs.npmjs.com/downloading-and-installing-node-js-and-npm). + +### npm packages To install third party libraries defined in `package.json`, run the following within the `airflow/www/` directory which will install them in a @@ -418,18 +767,18 @@ npm run prod npm run dev ``` -#### Upgrading npm packages +### Upgrading npm packages Should you add or upgrade a npm package, which involves changing `package.json`, you'll need to re-run `npm install` and push the newly generated `package-lock.json` file so we get the reproducible build. -#### Javascript Style Guide +### Javascript Style Guide We try to enforce a more consistent style and try to follow the JS community guidelines. Once you add or modify any javascript code in the project, please make sure it follows the guidelines defined in [Airbnb JavaScript Style Guide](https://github.com/airbnb/javascript). -Apache Airflow uses [ESLint](https://eslint.org/) as a tool for identifying and reporting on patterns in JavaScript, -which can be used by running any of the following commands. +Apache Airflow uses [ESLint](https://eslint.org/) as a tool for identifying and reporting on patterns +in JavaScript, which can be used by running any of the following commands. ```bash # Check JS code in .js and .html files, and report any errors/warnings diff --git a/Dockerfile b/Dockerfile index 1002689fafe7d..8e4355a7813db 100644 --- a/Dockerfile +++ b/Dockerfile @@ -17,20 +17,18 @@ # WARNING: THIS DOCKERFILE IS NOT INTENDED FOR PRODUCTION USE OR DEPLOYMENT. # # Base image for the whole Docker file -ARG APT_DEPS_IMAGE="airflow-apt-deps" -ARG PYTHON_BASE_IMAGE="python:3.6-slim" +ARG APT_DEPS_IMAGE="airflow-apt-deps-ci-slim" +ARG PYTHON_BASE_IMAGE="python:3.6-slim-stretch" ############################################################################################################ -# This is the base image with APT dependencies needed by Airflow. It is based on a python slim image +# This is the slim image with APT dependencies needed by Airflow. It is based on a python slim image # Parameters: -# PYTHON_BASE_IMAGE - base python image (python:x.y-slim) +# PYTHON_BASE_IMAGE - base python image (python:x.y-slim-stretch) ############################################################################################################ -FROM ${PYTHON_BASE_IMAGE} as airflow-apt-deps +FROM ${PYTHON_BASE_IMAGE} as airflow-apt-deps-ci-slim SHELL ["/bin/bash", "-o", "pipefail", "-e", "-u", "-x", "-c"] -# Need to repeat the empty argument here otherwise it will not be set for this stage -# But the default value carries from the one set before FROM -ARG PYTHON_BASE_IMAGE +ARG PYTHON_BASE_IMAGE="python:3.6-slim-stretch" ENV PYTHON_BASE_IMAGE=${PYTHON_BASE_IMAGE} ARG AIRFLOW_VERSION="2.0.0.dev0" @@ -121,20 +119,20 @@ RUN adduser airflow \ # Parameters: # airflow-apt-deps - this is the base image for CI deps image. ############################################################################################################ -FROM airflow-apt-deps as airflow-ci-apt-deps +FROM airflow-apt-deps-ci-slim as airflow-apt-deps-ci SHELL ["/bin/bash", "-o", "pipefail", "-e", "-u", "-x", "-c"] ENV JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64/ -ARG APT_DEPS_IMAGE +ARG APT_DEPS_IMAGE="airflow-apt-deps-ci-slim" ENV APT_DEPS_IMAGE=${APT_DEPS_IMAGE} RUN echo "${APT_DEPS_IMAGE}" # Note the ifs below might be removed if Buildkit will become usable. It should skip building this # image automatically if it is not used. For now we still go through all layers below but they are empty -RUN if [[ "${APT_DEPS_IMAGE}" == "airflow-ci-apt-deps" ]]; then \ +RUN if [[ "${APT_DEPS_IMAGE}" == "airflow-apt-deps-ci" ]]; then \ # Note missing man directories on debian-stretch # https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=863199 mkdir -pv /usr/share/man/man1 \ @@ -171,7 +169,7 @@ ENV HADOOP_URL=https://archive.cloudera.com/${HADOOP_DISTRO}${HADOOP_MAJOR}/${HA ENV HADOOP_HOME=/tmp/hadoop-cdh HIVE_HOME=/tmp/hive RUN \ -if [[ "${APT_DEPS_IMAGE}" == "airflow-ci-apt-deps" ]]; then \ +if [[ "${APT_DEPS_IMAGE}" == "airflow-apt-deps-ci" ]]; then \ mkdir -pv ${HADOOP_HOME} \ && mkdir -pv ${HIVE_HOME} \ && mkdir /tmp/minicluster \ @@ -183,7 +181,7 @@ fi # Install Hadoop # --absolute-names is a work around to avoid this issue https://github.com/docker/hub-feedback/issues/727 RUN \ -if [[ "${APT_DEPS_IMAGE}" == "airflow-ci-apt-deps" ]]; then \ +if [[ "${APT_DEPS_IMAGE}" == "airflow-apt-deps-ci" ]]; then \ HADOOP_URL=${HADOOP_URL}hadoop-${HADOOP_VERSION}-${HADOOP_DISTRO}${HADOOP_DISTRO_VERSION}.tar.gz \ && HADOOP_TMP_FILE=/tmp/hadoop.tar.gz \ && curl -sL ${HADOOP_URL} > ${HADOOP_TMP_FILE} \ @@ -194,7 +192,7 @@ fi # Install Hive RUN \ -if [[ "${APT_DEPS_IMAGE}" == "airflow-ci-apt-deps" ]]; then \ +if [[ "${APT_DEPS_IMAGE}" == "airflow-apt-deps-ci" ]]; then \ HIVE_URL=${HADOOP_URL}hive-${HIVE_VERSION}-${HADOOP_DISTRO}${HADOOP_DISTRO_VERSION}.tar.gz \ && HIVE_TMP_FILE=/tmp/hive.tar.gz \ && curl -sL ${HIVE_URL} > ${HIVE_TMP_FILE} \ @@ -207,7 +205,7 @@ ENV MINICLUSTER_URL=https://github.com/bolkedebruin/minicluster/releases/downloa ENV MINICLUSTER_VER=1.1 # Install MiniCluster TODO: install it differently. Installing to /tmp is probably a bad idea RUN \ -if [[ "${APT_DEPS_IMAGE}" == "airflow-ci-apt-deps" ]]; then \ +if [[ "${APT_DEPS_IMAGE}" == "airflow-apt-deps-ci" ]]; then \ MINICLUSTER_URL=${MINICLUSTER_URL}${MINICLUSTER_VER}/minicluster-${MINICLUSTER_VER}-SNAPSHOT-bin.zip \ && MINICLUSTER_TMP_FILE=/tmp/minicluster.zip \ && curl -sL ${MINICLUSTER_URL} > ${MINICLUSTER_TMP_FILE} \ @@ -218,6 +216,19 @@ fi ENV PATH "${PATH}:/tmp/hive/bin" +ARG RAT_VERSION="0.12" + +ENV RAT_VERSION="${RAT_VERSION}" \ + RAT_JAR="/tmp/apache-rat-${RAT_VERSION}.jar" \ + RAT_URL="http://repo1.maven.org/maven2/org/apache/rat/apache-rat/${RAT_VERSION}/apache-rat-${RAT_VERSION}.jar" + +RUN \ +if [[ "${APT_DEPS_IMAGE}" == "airflow-apt-deps-ci" ]]; then \ + echo "Downloading RAT from ${RAT_URL} to ${RAT_JAR}" \ + && curl -sL ${RAT_URL} > ${RAT_JAR} \ + ;\ +fi + ############################################################################################################ # This is the target image - it installs PIP and NPM dependencies including efficient caching # mechanisms - it might be used to build the bare airflow build or CI build @@ -233,9 +244,6 @@ WORKDIR /opt/airflow RUN echo "Airflow version: ${AIRFLOW_VERSION}" -ARG APT_DEPS_IMAGE -ENV APT_DEPS_IMAGE=${APT_DEPS_IMAGE} - ARG AIRFLOW_USER=airflow ENV AIRFLOW_USER=${AIRFLOW_USER} @@ -278,13 +286,46 @@ RUN echo "Pip version: ${PIP_VERSION}" RUN pip install --upgrade pip==${PIP_VERSION} -# We are copying everything with airflow:airflow user:group even if we use root to run the scripts +ARG AIRFLOW_REPO=apache/airflow +ENV AIRFLOW_REPO=${AIRFLOW_REPO} + +ARG AIRFLOW_BRANCH=master +ENV AIRFLOW_BRANCH=${AIRFLOW_BRANCH} + +ENV AIRFLOW_GITHUB_DOWNLOAD=https://raw.githubusercontent.com/${AIRFLOW_REPO}/${AIRFLOW_BRANCH} + +# Airflow Extras installed +ARG AIRFLOW_EXTRAS="all" +ENV AIRFLOW_EXTRAS=${AIRFLOW_EXTRAS} + +RUN echo "Installing with extras: ${AIRFLOW_EXTRAS}." + +ARG AIRFLOW_CONTAINER_CI_OPTIMISED_BUILD="false" +ENV AIRFLOW_CONTAINER_CI_OPTIMISED_BUILD=${AIRFLOW_CONTAINER_CI_OPTIMISED_BUILD} + +# By changing the CI build epoch we can force reinstalling Arflow from the current master - +# in case of CI optimized builds (next step). Our build scripts will change the EPOCH every month normally +# But it can also be overwritten manually by setting the AIRFLOW_CI_BUILD_EPOCH environment variable. +ARG AIRFLOW_CI_BUILD_EPOCH="" +ENV AIRFLOW_CI_BUILD_EPOCH=${AIRFLOW_CI_BUILD_EPOCH} + +# In case of CI-optimised builds we want to pre-install master version of airflow dependencies so that +# We do not have to always reinstall it from the scratch. +# This can be reinstalled from latest master by increasing PIP_DEPENDENCIES_EPOCH_NUMBER. +# And is automatically reinstalled from the scratch every month +RUN \ + if [[ "${AIRFLOW_CONTAINER_CI_OPTIMISED_BUILD}" == "true" ]]; then \ + pip install --no-use-pep517 \ + "https://github.com/apache/airflow/archive/master.tar.gz#egg=apache-airflow[${AIRFLOW_EXTRAS}]" \ + && pip uninstall --yes apache-airflow; \ + fi + +# Note! We are copying everything with airflow:airflow user:group even if we use root to run the scripts # This is fine as root user will be able to use those dirs anyway. # Airflow sources change frequently but dependency configuration won't change that often # We copy setup.py and other files needed to perform setup of dependencies -# This way cache here will only be invalidated if any of the -# version/setup configuration change but not when airflow sources change +# So in case setup.py changes we can install latest dependencies required. COPY --chown=airflow:airflow setup.py ${AIRFLOW_SOURCES}/setup.py COPY --chown=airflow:airflow setup.cfg ${AIRFLOW_SOURCES}/setup.cfg @@ -292,14 +333,9 @@ COPY --chown=airflow:airflow airflow/version.py ${AIRFLOW_SOURCES}/airflow/versi COPY --chown=airflow:airflow airflow/__init__.py ${AIRFLOW_SOURCES}/airflow/__init__.py COPY --chown=airflow:airflow airflow/bin/airflow ${AIRFLOW_SOURCES}/airflow/bin/airflow -# Airflow Extras installed -ARG AIRFLOW_EXTRAS="all" -ENV AIRFLOW_EXTRAS=${AIRFLOW_EXTRAS} -RUN echo "Installing with extras: ${AIRFLOW_EXTRAS}." - -# First install only dependencies but no Apache Airflow itself -# This way regular changes in sources of Airflow will not trigger reinstallation of all dependencies -# And this Docker layer will be reused between builds. +# The goal of this line is to install the dependencies from the most current setup.py from sources +# This will be usually incremental small set of packages in CI optimized build, so it will be very fast +# In non-CI optimized build this will install all dependencies before installing sources. RUN pip install --no-use-pep517 -e ".[${AIRFLOW_EXTRAS}]" COPY --chown=airflow:airflow airflow/www/package.json ${AIRFLOW_SOURCES}/airflow/www/package.json @@ -307,13 +343,22 @@ COPY --chown=airflow:airflow airflow/www/package-lock.json ${AIRFLOW_SOURCES}/ai WORKDIR ${AIRFLOW_SOURCES}/airflow/www +ARG BUILD_NPM=true +ENV BUILD_NPM=${BUILD_NPM} + # Install necessary NPM dependencies (triggered by changes in package-lock.json) -RUN gosu ${AIRFLOW_USER} npm ci +RUN \ + if [[ "${BUILD_NPM}" == "true" ]]; then \ + gosu ${AIRFLOW_USER} npm ci; \ + fi COPY --chown=airflow:airflow airflow/www/ ${AIRFLOW_SOURCES}/airflow/www/ # Package NPM for production -RUN gosu ${AIRFLOW_USER} npm run prod +RUN \ + if [[ "${BUILD_NPM}" == "true" ]]; then \ + gosu ${AIRFLOW_USER} npm run prod; \ + fi # Always apt-get update/upgrade here to get latest dependencies before # we redo pip install @@ -328,6 +373,9 @@ COPY --chown=airflow:airflow . ${AIRFLOW_SOURCES}/ WORKDIR ${AIRFLOW_SOURCES} +# Finally install the requirements from the latest sources +RUN pip install --no-use-pep517 -e ".[${AIRFLOW_EXTRAS}]" + # Always add-get update/upgrade here to get latest dependencies before # we redo pip install RUN apt-get update \ diff --git a/airflow/plugins_manager.py b/airflow/plugins_manager.py index 48525c831eea1..acf3a7494e574 100644 --- a/airflow/plugins_manager.py +++ b/airflow/plugins_manager.py @@ -120,6 +120,9 @@ def is_valid_plugin(plugin_obj, existing_plugins): norm_pattern = re.compile(r'[/|.]') +if settings.PLUGINS_FOLDER is None: + raise AirflowPluginException("Plugins folder is not set") + # Crawl through the plugins folder to find AirflowPlugin derivatives for root, dirs, files in os.walk(settings.PLUGINS_FOLDER, followlinks=True): for f in files: diff --git a/docs/build.sh b/docs/build.sh index b6df10cfd74d4..22c6dec297bdd 100755 --- a/docs/build.sh +++ b/docs/build.sh @@ -18,38 +18,96 @@ # specific language governing permissions and limitations # under the License. -set -e +set -euo pipefail -FWDIR="$(cd "`dirname "$0"`"; pwd)" -cd "$FWDIR" +MY_DIR="$(cd "$(dirname "$0")" && pwd)" +pushd "${MY_DIR}" &>/dev/null || exit 1 -NUM_INCORRECT_USE_LITERALINCLUDE_DIRECTIVE=$(grep -inR --include \*.rst 'literalinclude::.\+example_dags' .\ - tee /dev/tty |\ +echo +echo "Working in ${MY_DIR} folder" +echo + + +if [[ -f /.dockerenv ]]; then + # This script can be run both - in container and outside of it. + # Here we are inside the container which means that we should (when the host is Linux) + # fix permissions of the _build and _api folders via sudo. + # Those files are mounted from the host via docs folder and we might not have permissions to + # write to those directories (and remove the _api folder). + # We know we have sudo capabilities inside the container. + echo "Creating the _build and _api folders in case they do not exist" + sudo mkdir -pv _build + sudo mkdir -pv _api + echo "Created the _build and _api folders in case they do not exist" + echo "Changing ownership of _build and _api folders to ${AIRFLOW_USER}:${AIRFLOW_USER}" + sudo chown -R "${AIRFLOW_USER}:${AIRFLOW_USER}" . + echo "Changed ownership of the whole doc folder to ${AIRFLOW_USER}:${AIRFLOW_USER}" +else + # We are outside the container so we simply make sure that the directories exist + echo "Creating the _build and _api folders in case they do not exist" + mkdir -pv _build + mkdir -pv _api + echo "Creating the _build and _api folders in case they do not exist" +fi + +echo "Removing content of the _build and _api folders" +rm -rf _build/* +rm -rf _api/* +echo "Removed content of the _build and _api folders" + + +set +e +# shellcheck disable=SC2063 +NUM_INCORRECT_USE_LITERALINCLUDE=$(grep -inR --include \*.rst 'literalinclude::.\+example_dags' . | \ + tee /dev/tty | wc -l |\ tr -d '[:space:]') +set -e + +echo +echo "Checking for presence of literalinclude in example DAGs" +echo -if [[ "${NUM_INCORRECT_USE_LITERALINCLUDE_DIRECTIVE}" -ne "0" ]]; then +if [[ "${NUM_INCORRECT_USE_LITERALINCLUDE}" -ne "0" ]]; then + echo echo "Unexpected problems found in the documentation. " echo "You should use a exampleinclude directive to include example DAGs." - echo "Currently, ${NUM_INCORRECT_USE_LITERALINCLUDE_DIRECTIVE} problem found." + echo "Currently, ${NUM_INCORRECT_USE_LITERALINCLUDE} problem found." + echo exit 1 +else + echo + echo "No literalincludes in example DAGs found" + echo fi -[[ -d "_build" ]] && rm -r _build -[[ -d "_api" ]] && rm -r _api - SUCCEED_LINE=$(make html |\ tee /dev/tty |\ grep 'build succeeded' |\ head -1) -NUM_CURRENT_WARNINGS=$(echo $SUCCEED_LINE |\ +NUM_CURRENT_WARNINGS=$(echo ${SUCCEED_LINE} |\ sed -E 's/build succeeded, ([0-9]+) warnings?\./\1/g') -if echo $SUCCEED_LINE | grep -q "warning"; then +if [[ -f /.dockerenv ]]; then + # We are inside the container which means that we should fix back the permissions of the + # _build and _api folder files, so that they can be accessed by the host user + # The _api folder should be deleted by then but just in case we should change the ownership + echo "Changing ownership of docs/_build folder back to ${HOST_USER_ID}:${HOST_GROUP_ID}" + sudo chown ${HOST_USER_ID}:${HOST_GROUP_ID} _build + if [[ -d _api ]]; then + sudo chown ${HOST_USER_ID}:${HOST_GROUP_ID} _api + fi + echo "Changed ownership of docs/_build folder back to ${HOST_USER_ID}:${HOST_GROUP_ID}" +fi + + +if echo ${SUCCEED_LINE} | grep -q "warning"; then echo echo "Unexpected problems found in the documentation. " echo "Currently, ${NUM_CURRENT_WARNINGS} warnings found. " echo exit 1 fi + +popd &>/dev/null || exit 1 diff --git a/hooks/build b/hooks/build index 93405a51044f9..e67d9aca9caa8 100755 --- a/hooks/build +++ b/hooks/build @@ -35,13 +35,18 @@ echo echo "Airflow root directory: ${AIRFLOW_ROOT}" echo +BUILD_CACHE_DIR="${AIRFLOW_ROOT}/.build" +mkdir -pv "${BUILD_CACHE_DIR}" + date BUILD_START_TIME=$(date +%s) LAST_STEP_START_TIME=${BUILD_START_TIME} LAST_STEP_NAME="" STEP_STARTED="false" -PYTHON_VERSION_FOR_LATEST_IMAGE=3.5 +PYTHON_VERSION_FOR_DEFAULT_IMAGE=3.6 +AIRFLOW_CI_VERBOSE=${AIRFLOW_CI_VERBOSE:="false"} +AIRFLOW_CI_BUILD_EPOCH=${AIRFLOW_CI_BUILD_EPOCH:=$(date +"%Y%m")} function end_step { if [[ "${STEP_STARTED}" != "true" ]]; then @@ -83,11 +88,17 @@ function add_image_to_push { echo } +function save_to_file { + # shellcheck disable=SC2005 + echo "$(eval echo "\$$1")" > "${BUILD_CACHE_DIR}/.$1" +} + + function build_image { NAME="${1}" MY_IMAGE_TAG="${2}" TARGET_IMAGE="${3}" - APT_DEPS_IMAGE="${4:-airflow-apt-deps}" + APT_DEPS_IMAGE="${4:-airflow-apt-deps-ci-slim}" AIRFLOW_EXTRAS="${5:-all}" AIRFLOW_USER="${6:-airflow}" HOME="${7:-/home/airflow}" @@ -95,6 +106,17 @@ function build_image { echo "Build ${NAME} image: ${MY_IMAGE_TAG}" echo "Base image: ${PYTHON_BASE_IMAGE}" + set +u + if [[ "${MY_IMAGE_TAG}" == "${AIRFLOW_CI_IMAGE}" ]]; then + DOCKER_CACHE_DIRECTIVE=("${DOCKER_CACHE_DIRECTIVE_CI[@]}") + elif [[ "${MY_IMAGE_TAG}" == "${AIRFLOW_SLIM_CI_IMAGE}" ]]; then + DOCKER_CACHE_DIRECTIVE=("${DOCKER_CACHE_DIRECTIVE_CI_SLIM[@]}") + else + echo + echo "Don't know how to set cache directive for ${MY_IMAGE_TAG}. Exiting" + echo + exit 1 + fi set -x docker build \ --build-arg PYTHON_BASE_IMAGE="${PYTHON_BASE_IMAGE}" \ @@ -102,6 +124,9 @@ function build_image { --build-arg APT_DEPS_IMAGE="${APT_DEPS_IMAGE}" \ --build-arg AIRFLOW_EXTRAS="${AIRFLOW_EXTRAS}" \ --build-arg AIRFLOW_USER="${AIRFLOW_USER}" \ + --build-arg BUILD_NPM="${AIRFLOW_CONTAINER_BUILD_NPM}" \ + --build-arg AIRFLOW_CI_BUILD_EPOCH="${AIRFLOW_CI_BUILD_EPOCH}" \ + --build-arg AIRFLOW_CONTAINER_CI_OPTIMISED_BUILD="${AIRFLOW_CONTAINER_CI_OPTIMISED_BUILD}" \ --build-arg HOME="${HOME}" \ "${DOCKER_CACHE_DIRECTIVE[@]}" \ -t "${MY_IMAGE_TAG}" \ @@ -109,8 +134,8 @@ function build_image { . add_image_to_push "${MY_IMAGE_TAG}" - set +x + set -u } start_step "Setting variables" @@ -129,27 +154,59 @@ export DOCKERHUB_REPO=${DOCKERHUB_REPO:="airflow"} # This default Python version is later overridden in case IMAGE_NAME is passed export PYTHON_VERSION=\ ${PYTHON_VERSION:=$(python -c 'import sys;print("%s.%s" % (sys.version_info.major, sys.version_info.minor))')} -export IMAGE_NAME=${IMAGE_NAME:=${DOCKERHUB_USER}/${DOCKERHUB_REPO}:latest-${PYTHON_VERSION}} + +# IMAGE_NAME might come from DockerHub and we decide which PYTHON_VERSION to use based on it (see below) +# In case IMAGE_NAME is not set we determine PYTHON_VERSION based on the default python on the path +# So in virtualenvs it will use the virtualenv's PYTHON_VERSION. +export BASE_IMAGE_NAME=${IMAGE_NAME:=${DOCKERHUB_USER}/${DOCKERHUB_REPO}:master-python${PYTHON_VERSION}} echo -echo "IMAGE_NAME=${IMAGE_NAME:=}" +echo "BASE_IMAGE_NAME=${BASE_IMAGE_NAME:=} (final image will have -ci, -ci-slim suffixes)" echo # Remove index.docker.io/ prefix as it is added by default by DockerHub -export LOCAL_IMAGE=${IMAGE_NAME#index.docker.io/} +export LOCAL_BASE_IMAGE_NAME=${BASE_IMAGE_NAME#index.docker.io/} echo -echo "LOCAL_IMAGE=${LOCAL_IMAGE}" +echo "LOCAL_BASE_IMAGE_NAME=${LOCAL_BASE_IMAGE_NAME}" echo # Extract python version from image name we want to build in case it was passed # Via IMAGE_NAME -# This nice bash construct below extracts last field after '-' delimiter -# So for example 'latest-3.6' will produce PYTHON_VERSION=3.6 -PYTHON_VERSION=$(echo "${LOCAL_IMAGE}" | rev | cut -d '-' -f1 | rev ) -export PYTHON_VERSION +# This nice bash construct below extracts last field after '-python' delimiter +# So for example 'airflow:master-python3.6' will produce LONG_PYTHON_VERSION=python3.6 +LONG_PYTHON_VERSION="${LOCAL_BASE_IMAGE_NAME##*-}" +export LONG_PYTHON_VERSION + +echo +echo "LONG_PYTHON_VERSION=${LONG_PYTHON_VERSION}" +echo + +if [[ ! ${LONG_PYTHON_VERSION} =~ python[2-3]\.[0-9]+ ]]; then + echo + echo >&2 "ERROR! Python version extracted from IMAGE_NAME does not match the pythonX.Y format" + echo "The IMAGE_NAME format should be '-pythonX.Y'" + echo + exit 1 +fi + +PYTHON_VERSION=${LONG_PYTHON_VERSION#python} + +echo +echo "PYTHON_VERSION=${PYTHON_VERSION}" +echo + +# Extract IMAGE_TAG from LOCAL_BASE_IMAGE_NAME (apache/airflow:master-python3.6 -> master-python3.6) +IMAGE_TAG=${LOCAL_BASE_IMAGE_NAME##${DOCKERHUB_USER}/${DOCKERHUB_REPO}:} +echo +echo "Image tag: ${IMAGE_TAG}" +echo -IMAGE_PREFIX=$(echo "${LOCAL_IMAGE}" | rev | cut -d '-' -f2 | cut -d ':' -f1 | rev ) +# Extract TAG_PREFIX from IMAGE_TAG (master-python3.6 -> master) +TAG_PREFIX=${IMAGE_TAG%-*} +echo +echo "Image tag prefix: ${TAG_PREFIX}" +echo # In case of CRON jobs on Travis we run builds without cache if [[ "${TRAVIS_EVENT_TYPE:=}" == "cron" ]]; then @@ -159,7 +216,7 @@ fi # You can set AIRFLOW_CONTAINER_USE_DOCKER_CACHE to false if you do not want to use pulled images # as cache during build -# This way you can test building everything from the scratch +# This way you can test building from the scratch AIRFLOW_CONTAINER_USE_PULLED_IMAGES_CACHE=${AIRFLOW_CONTAINER_USE_PULLED_IMAGES_CACHE:="true"} # You can set AIRFLOW_CONTAINER_USE_NO_CACHE to true if you want to use standard Docker cache during build @@ -175,9 +232,7 @@ EOF export AIRFLOW_VERSION # Check if we are running in the CI environment -# In case of CI build we are not building CI cache. We use it and -# We also use CI deps as base of the main image -CI=${CI:=false} +CI=${CI:="false"} if [[ "${CI}" == "true" ]]; then NON_CI="false" @@ -185,12 +240,15 @@ else NON_CI="true" fi -# Extras used to to build main airflow image -AIRFLOW_MAIN_EXTRAS=${AIRFLOW_MAIN_EXTRAS:="all"} +# Extras used to to build slim airflow image +AIRFLOW_SLIM_EXTRAS=${AIRFLOW_SLIM_EXTRAS:="all"} # Extras used to build cache and CI image AIRFLOW_CI_EXTRAS=${AIRFLOW_CI_EXTRAS:="devel_ci"} +# Whether this is a release build +AIRFLOW_RELEASE_BUILD=${AIRFLOW_RELEASE_BUILD:="false"} + echo echo "Airflow ${AIRFLOW_VERSION} Python: ${PYTHON_VERSION}." echo @@ -200,29 +258,54 @@ echo export AIRFLOW_CONTAINER_PUSH_IMAGES=${AIRFLOW_CONTAINER_PUSH_IMAGES:=${NON_CI}} # Whether to force pull images to populate cache -export AIRFLOW_CONTAINER_FORCE_PULL_IMAGES=${AIRFLOW_CONTAINER_FORCE_PULL_IMAGES:="true"} +# This is set to true by default in CI test environment (we always pull latest images first) +export AIRFLOW_CONTAINER_FORCE_PULL_IMAGES=${AIRFLOW_CONTAINER_FORCE_PULL_IMAGES:=${CI}} -# Skips downloading and building the main, trimmed down image of airflow -# This is set to true by default in CI environment (we only need CI image then) -export AIRFLOW_CONTAINER_SKIP_MAIN_IMAGE=${AIRFLOW_CONTAINER_SKIP_MAIN_IMAGE:=${CI}} -echo "Skip main image: ${AIRFLOW_CONTAINER_SKIP_MAIN_IMAGE}" +# Skips downloading and building the slim image of airflow +# This is set to true by default in CI test environment (we only need full CI image then) +export AIRFLOW_CONTAINER_SKIP_SLIM_CI_IMAGE=${AIRFLOW_CONTAINER_SKIP_SLIM_CI_IMAGE:=${CI}} +echo "Skip slim image: ${AIRFLOW_CONTAINER_SKIP_SLIM_CI_IMAGE}" + +# Skips downloading and building the CI image, full CI-enabled image of airflow +# This is set to true for pre-tests which do not need the whole CI image just the slim one +export AIRFLOW_CONTAINER_SKIP_CI_IMAGE=${AIRFLOW_CONTAINER_SKIP_CI_IMAGE:="false"} +echo "Skip CI image: ${AIRFLOW_CONTAINER_SKIP_CI_IMAGE}" # Skips pulling the airflow images - this will use cache but will build it all from scratch export AIRFLOW_CONTAINER_SKIP_PULLING_AIRFLOW_IMAGES=${AIRFLOW_CONTAINER_SKIP_PULLING_AIRFLOW_IMAGES:="false"} echo "Skip pulling Airflow images: ${AIRFLOW_CONTAINER_SKIP_PULLING_AIRFLOW_IMAGES}" +# Fixes permissions for git-checked out files. This is needed to have consistent build cache across +# Dockerhub, TravisCI and locally checked out code export AIRFLOW_FIX_PERMISSIONS=${AIRFLOW_FIX_PERMISSIONS:="all"} echo "Fixing permissions: ${AIRFLOW_FIX_PERMISSIONS}" -# Base python image for the build -PYTHON_BASE_IMAGE=python:${PYTHON_VERSION}-slim +export AIRFLOW_CONTAINER_BUILD_NPM=${AIRFLOW_CONTAINER_BUILD_NPM:="true"} +echo "Building NPM: ${AIRFLOW_CONTAINER_BUILD_NPM}" -# Image of the main airflow - this is a "reference" image of Airflow with minimum requirements needed -AIRFLOW_IMAGE="${LOCAL_IMAGE}-v${AIRFLOW_VERSION}" +export AIRFLOW_CONTAINER_CI_OPTIMISED_BUILD=${AIRFLOW_CONTAINER_CI_OPTIMISED_BUILD:="true"} +echo "The build optimised for CI: ${AIRFLOW_CONTAINER_CI_OPTIMISED_BUILD}" -# Image of the Airflow CI - this is the image used to run tests in CI environment -AIRFLOW_CI_IMAGE="${LOCAL_IMAGE}-ci-v${AIRFLOW_VERSION}" +# Base python image for the build +export PYTHON_BASE_IMAGE=python:${PYTHON_VERSION}-slim-stretch + +if [[ "${AIRFLOW_RELEASE_BUILD}" == "true" ]]; then + export AIRFLOW_SLIM_CI_IMAGE="${DOCKERHUB_USER}/${DOCKERHUB_REPO}:${AIRFLOW_VERSION}-python${PYTHON_VERSION}-ci-slim" + export AIRFLOW_SLIM_CI_IMAGE_DEFAULT="${DOCKERHUB_USER}/${DOCKERHUB_REPO}:${AIRFLOW_VERSION}-ci-slim" + export AIRFLOW_SLIM_CI_IMAGE_LATEST="${DOCKERHUB_USER}/${DOCKERHUB_REPO}:latest-python${PYTHON_VERSION}-${TAG_PREFIX}-ci-slim" + export AIRFLOW_SLIM_CI_IMAGE_LATEST_DEFAULT="${DOCKERHUB_USER}/${DOCKERHUB_REPO}:latest-${TAG_PREFIX}-ci-slim" + + export AIRFLOW_CI_IMAGE="${DOCKERHUB_USER}/${DOCKERHUB_REPO}:${AIRFLOW_VERSION}-python${PYTHON_VERSION}-ci" + export AIRFLOW_CI_IMAGE_DEFAULT="${DOCKERHUB_USER}/${DOCKERHUB_REPO}:${AIRFLOW_VERSION}-ci" + export AIRFLOW_CI_IMAGE_LATEST="${DOCKERHUB_USER}/${DOCKERHUB_REPO}:latest-python${PYTHON_VERSION}-${TAG_PREFIX}-ci" + export AIRFLOW_CI_IMAGE_LATEST_DEFAULT="${DOCKERHUB_USER}/${DOCKERHUB_REPO}:latest-${TAG_PREFIX}-ci" +else + export AIRFLOW_SLIM_CI_IMAGE="${DOCKERHUB_USER}/${DOCKERHUB_REPO}:${TAG_PREFIX}-python${PYTHON_VERSION}-ci-slim" + export AIRFLOW_SLIM_CI_IMAGE_DEFAULT="${DOCKERHUB_USER}/${DOCKERHUB_REPO}:${TAG_PREFIX}-ci-slim" + export AIRFLOW_CI_IMAGE="${DOCKERHUB_USER}/${DOCKERHUB_REPO}:${TAG_PREFIX}-python${PYTHON_VERSION}-ci" + export AIRFLOW_CI_IMAGE_DEFAULT="${DOCKERHUB_USER}/${DOCKERHUB_REPO}:${TAG_PREFIX}-ci" +fi # In the future we can enable buildkit. # It's experimental now and cache does not work out of the box with buildkit in Docker 18.09.2, buildkit 0.3.3 # It is fixed in upcoming buildkit 0.4.0. @@ -242,7 +325,7 @@ if [[ "${AIRFLOW_CONTAINER_CLEANUP_IMAGES}" == "true" ]]; then echo start_step "Removing images" docker rmi "${PYTHON_BASE_IMAGE}" || true - docker rmi "${AIRFLOW_IMAGE}" || true + docker rmi "${AIRFLOW_SLIM_CI_IMAGE}" || true docker rmi "${AIRFLOW_CI_IMAGE}" || true echo echo "###################################################################" @@ -263,21 +346,27 @@ if [[ "${AIRFLOW_CONTAINER_USE_PULLED_IMAGES_CACHE}" == "true" ]]; then echo echo if [[ "${AIRFLOW_CONTAINER_FORCE_PULL_IMAGES}" == "true" ]]; then - echo "Force-pull base python image. This forces to get the latest version." + echo "Force-pull base python image. This forces to get the most recent versions from the repository." echo set -x docker pull "${PYTHON_BASE_IMAGE}" set +x echo fi - IMAGES_TO_PULL="${AIRFLOW_CI_IMAGE}" - if [[ "${AIRFLOW_CONTAINER_SKIP_MAIN_IMAGE}" == "true" ]]; then - echo "Skip downloading the main Airflow image" + IMAGES_TO_PULL="" + if [[ "${AIRFLOW_CONTAINER_SKIP_CI_IMAGE}" == "true" ]]; then + echo "Skip downloading the CI Airflow image" + else + IMAGES_TO_PULL="${IMAGES_TO_PULL} ${AIRFLOW_CI_IMAGE}" + fi + if [[ "${AIRFLOW_CONTAINER_SKIP_SLIM_CI_IMAGE}" == "true" ]]; then + echo "Skip downloading the slim Airflow image" else - IMAGES_TO_PULL="${IMAGES_TO_PULL} ${AIRFLOW_IMAGE}" + IMAGES_TO_PULL="${IMAGES_TO_PULL} ${AIRFLOW_SLIM_CI_IMAGE}" fi - DOCKER_CACHE_DIRECTIVE=() + DOCKER_CACHE_DIRECTIVE_CI=() + DOCKER_CACHE_DIRECTIVE_CI_SLIM=() for IMAGE in ${IMAGES_TO_PULL} do echo @@ -316,26 +405,45 @@ if [[ "${AIRFLOW_CONTAINER_USE_PULLED_IMAGES_CACHE}" == "true" ]]; then echo fi fi - DOCKER_CACHE_DIRECTIVE+=("--cache-from" "${IMAGE}") + if [[ "${IMAGE}" == "${AIRFLOW_CI_IMAGE}" ]]; then + DOCKER_CACHE_DIRECTIVE_CI+=("--cache-from" "${IMAGE}") + elif [[ "${IMAGE}" == "${AIRFLOW_SLIM_CI_IMAGE}" ]]; then + DOCKER_CACHE_DIRECTIVE_CI_SLIM+=("--cache-from" "${IMAGE}") + else + echo + echo "Don't know how to set cache directive for ${IMAGE}. Exiting" + echo + exit 1 + fi done echo echo "This build uses Docker cache" - echo "Cache directive used: ${DOCKER_CACHE_DIRECTIVE[*]}" + echo "Cache directives used: " + set +u + echo "CI build: ${DOCKER_CACHE_DIRECTIVE_CI[*]}" + echo "CI slim build: ${DOCKER_CACHE_DIRECTIVE_CI_SLIM[*]}" + set -u echo elif [[ "${AIRFLOW_CONTAINER_USE_NO_CACHE}" == "true" ]]; then - DOCKER_CACHE_DIRECTIVE+=("--no-cache") + DOCKER_CACHE_DIRECTIVE_CI+=("--no-cache") + DOCKER_CACHE_DIRECTIVE_CI_SLIM+=("--no-cache") echo echo "Skip cache for builds. Everything will be rebuilt from scratch." echo - echo "Cache directive used: ${DOCKER_CACHE_DIRECTIVE[*]}" + echo "Cache directives used: " + echo "CI build: ${DOCKER_CACHE_DIRECTIVE_CI[*]}" + echo "CI slim build: ${DOCKER_CACHE_DIRECTIVE_CI_SLIM[*]}" echo else - DOCKER_CACHE_DIRECTIVE+=("--cache-from" "${AIRFLOW_CI_IMAGE}" "--cache-from" "${AIRFLOW_IMAGE}") echo echo "Use default cache from locally built images." echo - echo "Cache directive used: ${DOCKER_CACHE_DIRECTIVE[*]}" + echo "Cache directives used: " + set +u + echo "CI build: ${DOCKER_CACHE_DIRECTIVE_CI[*]}" + echo "CI slim build: ${DOCKER_CACHE_DIRECTIVE_CI_SLIM[*]}" + set -u echo fi @@ -345,8 +453,12 @@ if [[ "${OSTYPE}" == "darwin"* ]]; then fi # Build id identifying the build uniquely BUILD_ID=${BUILD_ID:="local"} -# Branch name for triggered builds + + +# Default branch name for triggered builds is master (for dockerhub) BRANCH_NAME=${BRANCH_NAME:="master"} +# Set Airflow container to branch name +AIRFLOW_CONTAINER_BRANCH_NAME=${AIRFLOW_CONTAINER_BRANCH_NAME:=${BRANCH_NAME}} # directory where "deployment" artifacts should be placed DEPLOY_DIR=${AIRFLOW_ROOT}/dist/${BRANCH_NAME}/$(date +%Y-%m-%d)/${BUILD_ID}/${PYTHON_VERSION} @@ -371,10 +483,8 @@ if [[ "${AIRFLOW_FIX_PERMISSIONS}" == "all" || "${AIRFLOW_FIX_PERMISSIONS}" == if [[ "${AIRFLOW_FIX_PERMISSIONS}" == "all" ]]; then # Get all files in the context - by building a small alpine based image # then COPY all files (.) from the context and listing the files via find method - if [[ "$(docker images -q airflow-context:latest 2> /dev/null)" == "" ]]; then - docker build -t airflow-context:latest . -f Dockerfile-context - fi - ALL_FILES_TO_FIX=$(docker run airflow-context:latest /bin/sh -c "(cd /context && find .)") + docker build -t airflow-context . -f Dockerfile-context + ALL_FILES_TO_FIX=$(docker run airflow-context /bin/sh -c "(cd /context && find .)") elif [[ "${AIRFLOW_FIX_PERMISSIONS}" == "setup" ]]; then ALL_FILES_TO_FIX="\ ${AIRFLOW_ROOT}/setup.py \ @@ -392,9 +502,13 @@ if [[ "${AIRFLOW_FIX_PERMISSIONS}" == "all" || "${AIRFLOW_FIX_PERMISSIONS}" == ACCESS_RIGHTS=$("${STAT_BIN}" -c "%A" "${FILE}" || echo "--------") # check if the file is group/other writeable if [[ "${ACCESS_RIGHTS:5:1}" != "-" || "${ACCESS_RIGHTS:8:1}" != "-" ]]; then - "${STAT_BIN}" --printf "%a %A %F \t%s \t-> " "${FILE}" + if [[ "${AIRFLOW_CI_VERBOSE}" == "true" ]]; then + "${STAT_BIN}" --printf "%a %A %F \t%s \t-> " "${FILE}" + fi chmod og-w "${FILE}" - "${STAT_BIN}" --printf "%a %A %F \t%s \t%n\n" "${FILE}" + if [[ "${AIRFLOW_CI_VERBOSE}" == "true" ]]; then + "${STAT_BIN}" --printf "%a %A %F \t%s \t%n\n" "${FILE}" + fi fi done @@ -405,54 +519,55 @@ else echo "Skipping fixing permissions for CI builds" fi -start_step "Build Airflow image" +start_step "Build Airflow CI slim image" -if [[ "${AIRFLOW_CONTAINER_SKIP_MAIN_IMAGE}" == "true" ]]; then - echo "Skip building the main Airflow image" +if [[ "${AIRFLOW_CONTAINER_SKIP_SLIM_CI_IMAGE}" == "true" ]]; then + echo "Skip building the Airflow CI slim image" else build_image "Airflow" \ - "${AIRFLOW_IMAGE}" \ + "${AIRFLOW_SLIM_CI_IMAGE}" \ "main" \ - "airflow-apt-deps" - echo - echo "Tagging additionally the Airflow image ${AIRFLOW_IMAGE} with ${LOCAL_IMAGE} tag" - echo - docker tag "${AIRFLOW_IMAGE}" "${LOCAL_IMAGE}" - add_image_to_push "${LOCAL_IMAGE}" - - if [[ "${PYTHON_VERSION_FOR_LATEST_IMAGE}" == "${PYTHON_VERSION}" && "${BRANCH_NAME}" == "master" ]]; then - echo - echo "Tagging additionally the Airflow image ${AIRFLOW_IMAGE} with ${IMAGE_PREFIX} tag" - echo "For python version ${PYTHON_VERSION} and branch ${BRANCH_NAME}" - echo - docker tag "${AIRFLOW_IMAGE}" "${DOCKERHUB_USER}/${DOCKERHUB_REPO}:${IMAGE_PREFIX}" - add_image_to_push "${DOCKERHUB_USER}/${DOCKERHUB_REPO}:${IMAGE_PREFIX}" + "airflow-apt-deps-ci-slim" + save_to_file AIRFLOW_SLIM_CI_IMAGE + if [[ "${PYTHON_VERSION_FOR_DEFAULT_IMAGE}" == "${PYTHON_VERSION}" ]]; then + docker tag "${AIRFLOW_SLIM_CI_IMAGE}" "${AIRFLOW_SLIM_CI_IMAGE_DEFAULT}" + add_image_to_push "${AIRFLOW_SLIM_CI_IMAGE_DEFAULT}" + fi + if [[ "${AIRFLOW_RELEASE_BUILD}" == "true" ]]; then + docker tag "${AIRFLOW_SLIM_CI_IMAGE}" "${AIRFLOW_SLIM_CI_IMAGE_LATEST}" + add_image_to_push "${AIRFLOW_SLIM_CI_IMAGE_LATEST}" + if [[ "${PYTHON_VERSION_FOR_DEFAULT_IMAGE}" == "${PYTHON_VERSION}" ]]; then + docker tag "${AIRFLOW_SLIM_CI_IMAGE}" "${AIRFLOW_SLIM_CI_IMAGE_LATEST_DEFAULT}" + add_image_to_push "${AIRFLOW_SLIM_CI_IMAGE_LATEST_DEFAULT}" + fi fi fi -start_step "Build Airflow CI image" -build_image "Airflow CI" \ - "${AIRFLOW_CI_IMAGE}" \ - "main" \ - "airflow-ci-apt-deps" \ - "devel_ci" \ - "root" \ - "/root" - -echo -echo "Tagging additionally the CI Airflow image ${AIRFLOW_CI_IMAGE} with ${LOCAL_IMAGE}-ci tag" -echo - -docker tag "${AIRFLOW_CI_IMAGE}" "${LOCAL_IMAGE}-ci" -add_image_to_push "${LOCAL_IMAGE}-ci" - -if [[ "${PYTHON_VERSION_FOR_LATEST_IMAGE}" == "${PYTHON_VERSION}" && "${BRANCH_NAME}" == "master" ]]; then - echo - echo "Tagging additionally the CI Airflow image ${AIRFLOW_CI_IMAGE} with ${IMAGE_PREFIX}-ci tag" - echo "For python version ${PYTHON_VERSION} and branch ${BRANCH_NAME}" - echo - docker tag "${AIRFLOW_CI_IMAGE}" "${DOCKERHUB_USER}/${DOCKERHUB_REPO}:${IMAGE_PREFIX}-ci" - add_image_to_push "${DOCKERHUB_USER}/${DOCKERHUB_REPO}:${IMAGE_PREFIX}-ci" +if [[ "${AIRFLOW_CONTAINER_SKIP_CI_IMAGE}" == "true" ]]; then + echo "Skip building the CI full Airflow image" +else + start_step "Build Airflow CI full image" + build_image "Airflow CI" \ + "${AIRFLOW_CI_IMAGE}" \ + "main" \ + "airflow-apt-deps-ci" \ + "devel_ci" \ + "root" \ + "/root" + save_to_file AIRFLOW_CI_IMAGE + + if [[ "${PYTHON_VERSION_FOR_DEFAULT_IMAGE}" == "${PYTHON_VERSION}" ]]; then + docker tag "${AIRFLOW_CI_IMAGE}" "${AIRFLOW_CI_IMAGE_DEFAULT}" + add_image_to_push "${AIRFLOW_CI_IMAGE_DEFAULT}" + fi + if [[ "${AIRFLOW_RELEASE_BUILD}" == "true" ]]; then + docker tag "${AIRFLOW_CI_IMAGE}" "${AIRFLOW_CI_IMAGE_LATEST}" + add_image_to_push "${AIRFLOW_CI_IMAGE_LATEST}" + if [[ "${PYTHON_VERSION_FOR_DEFAULT_IMAGE}" == "${PYTHON_VERSION}" ]]; then + docker tag "${AIRFLOW_CI_IMAGE}" "${AIRFLOW_CI_IMAGE_LATEST_DEFAULT}" + add_image_to_push "${AIRFLOW_CI_IMAGE_LATEST_DEFAULT}" + fi + fi fi start_step "Pushing images" diff --git a/hooks/push b/hooks/push new file mode 100755 index 0000000000000..eae33aca2988d --- /dev/null +++ b/hooks/push @@ -0,0 +1,35 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# This is hook build used by DockerHub. We are also using it +# on Travis CI to potentially rebuild (and refresh layers that +# are not cached) Docker images that are used to run CI jobs + +# We do not push in the push step because we are building multiple images in the build step +# and it is difficult to pass list of the built images from the build to push phase +set -euo pipefail + +MY_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" + +echo "My dir: ${MY_DIR}" + + +echo +echo "Skip pushing the image. All images were built and pushed in the build hook already!" +echo diff --git a/images/run_unittests.png b/images/run_unittests.png new file mode 100644 index 0000000000000..b7a56b30d81bf Binary files /dev/null and b/images/run_unittests.png differ diff --git a/run-tests b/run-tests new file mode 100755 index 0000000000000..7ffc32a75dcd1 --- /dev/null +++ b/run-tests @@ -0,0 +1,181 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +MY_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" + +# Bash sanity settings (error on exit, complain for undefined vars, error when pipe fails) +set -euo pipefail + +CMDNAME="$(basename -- "$0")" + +AIRFLOW_ROOT="$(cd "${MY_DIR}" && pwd)" +export AIRFLOW__CORE__DAGS_FOLDER="S{AIRFLOW_ROOT}/tests/dags" + +# environment +export AIRFLOW_HOME=${AIRFLOW_HOME:=${HOME}} + +echo "Airflow home: ${AIRFLOW_HOME}" + +export AIRFLOW__CORE__UNIT_TEST_MODE=True + +# add test/test_utils to PYTHONPATH TODO: Do we need that ??? Looks fishy. +export PYTHONPATH=${PYTHONPATH:=}:${AIRFLOW_ROOT}/tests/test_utils + +usage() { + echo """ + +Usage: ${CMDNAME} [FLAGS] [TESTS_TO_RUN] -- + +Runs tests specified (or all tests if no tests are specified) + +Flags: + +-h, --help + Shows this help message. + +-i, --with-db-init + Forces database initialization before tests + +-s, --nocapture + Don't capture stdout when running the tests. This is useful if you are + debugging with ipdb and want to drop into console with it + by adding this line to source code: + + import ipdb; ipdb.set_trace() + +-v, --verbose + Verbose output showing coloured output of tests being run and summary + of the tests - in a manner similar to the tests run in the CI environment. + +""" +} + +echo + +#################### Parsing options/arguments +if ! PARAMS=$(getopt \ + -o "h i s v" \ + -l "help with-db-init nocapture verbose" \ + --name "${CMDNAME}" -- "$@") +then + usage + exit 1 +fi + +eval set -- "${PARAMS}" +unset PARAMS + +WITH_DB_INIT="false" +NOCAPTURE="false" +VERBOSE="false" + +# Parse Flags. +# Please update short and long options in the run-tests-complete script +# This way autocomplete will work out-of-the-box +while true +do + case "${1}" in + -h|--help) + usage; + exit 0 ;; + -i|--with-db-init) + WITH_DB_INIT="true" + shift ;; + -s|--nocapture) + NOCAPTURE="true" + shift ;; + -v|--verbose) + VERBOSE="true" + shift;; + --) + shift ; + break ;; + *) + usage + echo + echo "ERROR: Unknown argument ${1}" + echo + exit 1 + ;; + esac +done + + +# any argument received after -- is overriding the default nose execution arguments: +NOSE_ARGS=("$@") + +AIRFLOW_DB_INITIALISED_FILE=${HOME}/.airflow_db_initialised + +if [[ "${WITH_DB_INIT}" == "true" || ! -f ${AIRFLOW_DB_INITIALISED_FILE} ]]; then + echo + if [[ "${WITH_DB_INIT}" == "true" ]]; then + echo "Initializing the DB - forced with --with-db-init switch" + else + echo "Initializing the DB - first time after entering the container" + echo "You can force re-initialization the database by adding --with-db-init switch to run-tests." + fi + echo + yes | airflow initdb || true + airflow resetdb -y + touch "${AIRFLOW_DB_INITIALISED_FILE}" +else + echo + echo "Skipping initializing of the DB as it was initialized already" + echo + echo "You can re-initialize the database by adding --with-db-init flag when running tests" + echo +fi + +if [[ "${KRB5_KTNAME:=}" == "" ]]; then + echo "KRB5_KTNAME variable is empty - no kerberos intialisation" +else + kinit -kt "${KRB5_KTNAME}" airflow +fi + +if [[ "${#NOSE_ARGS[@]}" == "0" ]]; then + NOSE_ARGS=("--with-coverage" + "--cover-erase" + "--cover-html" + "--cover-package=airflow" + "--cover-html-dir=airflow/www/static/coverage" + "--with-ignore-docstrings" + "--rednose" + "--with-timer" + "-v" + "--logging-level=DEBUG") +fi + +if [[ "${NOCAPTURE}" == "true" ]]; then + echo + echo "Stop capturing stdout" + echo + NOSE_ARGS+=("--nocapture") +fi + +if [[ "${VERBOSE}" == "true" ]]; then + echo + echo "Verbose output" + echo + NOSE_ARGS+=("--rednose" "--with-timer" "-v" "--logging-level=DEBUG") +fi + +echo +echo "Starting the tests with arguments: ${NOSE_ARGS[*]}" +echo +nosetests "${NOSE_ARGS[@]}" +set -u diff --git a/run_unit_tests.sh b/run_unit_tests.sh deleted file mode 100755 index 74b3c9d1e3cec..0000000000000 --- a/run_unit_tests.sh +++ /dev/null @@ -1,76 +0,0 @@ -#!/usr/bin/env bash - -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -set -x - -# environment -export AIRFLOW_HOME=${AIRFLOW_HOME:=~} -export AIRFLOW__CORE__UNIT_TEST_MODE=True - -# add test/contrib to PYTHONPATH -DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" -export PYTHONPATH=$PYTHONPATH:${DIR}/tests/test_utils - -# any argument received is overriding the default nose execution arguments: -nose_args=$@ - -# Generate the `airflow` executable if needed -which airflow > /dev/null || python setup.py develop - -echo "Initializing the DB" -yes | airflow initdb -yes | airflow resetdb - -if [ -z "$nose_args" ]; then - nose_args="--with-coverage \ - --cover-erase \ - --cover-html \ - --cover-package=airflow \ - --cover-html-dir=airflow/www/static/coverage \ - --with-ignore-docstrings \ - --rednose \ - --with-timer \ - -v \ - --logging-level=DEBUG " -fi - -# For impersonation tests running on SQLite on Travis, make the database world readable so other -# users can update it -AIRFLOW_DB="$HOME/airflow.db" - -if [ -f "${AIRFLOW_DB}" ]; then - chmod a+rw "${AIRFLOW_DB}" - chmod g+rwx "${AIRFLOW_HOME}" -fi - -# For impersonation tests on Travis, make airflow accessible to other users via the global PATH -# (which contains /usr/local/bin). Some test environments, like the docker instructions, won't have sudo and -# are probably running as root anyway -if command -v sudo > /dev/null; then - sudo ln -sf "${VIRTUAL_ENV}/bin/airflow" /usr/local/bin/ -else - ln -sf "${VIRTUAL_ENV}/bin/airflow" /usr/local/bin/ -fi - -echo "Starting the unit tests with the following nose arguments: "$nose_args -nosetests $nose_args - -# To run individual tests: -# nosetests tests.core:CoreTest.test_scheduler_job diff --git a/scripts/ci/5-run-tests.sh b/scripts/ci/5-run-tests.sh deleted file mode 100755 index 8aee2d277cf79..0000000000000 --- a/scripts/ci/5-run-tests.sh +++ /dev/null @@ -1,102 +0,0 @@ -#!/usr/bin/env bash - -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -set -o verbose -set -e - -if [ -z "$HADOOP_HOME" ]; then - echo "HADOOP_HOME not set - abort" >&2 - exit 1 -fi - -echo "Using ${HADOOP_DISTRO} distribution of Hadoop from ${HADOOP_HOME}" - -pwd - -echo "Using travis airflow.cfg" -DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" -cp -f ${DIR}/airflow_travis.cfg ~/unittests.cfg - -ROOTDIR="$(dirname $(dirname $DIR))" -export AIRFLOW__CORE__DAGS_FOLDER="$ROOTDIR/tests/dags" - -# add test/contrib to PYTHONPATH -export PYTHONPATH=${PYTHONPATH:-$ROOTDIR/tests/test_utils} - -echo Backend: $AIRFLOW__CORE__SQL_ALCHEMY_CONN - -# environment -export AIRFLOW_HOME=${AIRFLOW_HOME:=~} -export AIRFLOW__CORE__UNIT_TEST_MODE=True - -# any argument received is overriding the default nose execution arguments: -nose_args=$@ - -# Generate the `airflow` executable if needed -which airflow > /dev/null || python setup.py develop - -# For impersonation tests on Travis, make airflow accessible to other users via the global PATH -# (which contains /usr/local/bin) -sudo ln -sf "${VIRTUAL_ENV}/bin/airflow" /usr/local/bin/ - -# Fix codecov build path -if [ ! -h /home/travis/build/apache/airflow ]; then - sudo mkdir -p /home/travis/build/apache - sudo ln -s ${ROOTDIR} /home/travis/build/apache/airflow -fi - -if [ -z "$KUBERNETES_VERSION" ]; then - echo "Initializing the DB" - yes | airflow initdb - yes | airflow resetdb -fi - -if [ -z "$nose_args" ]; then - nose_args="--with-coverage \ - --cover-erase \ - --cover-html \ - --cover-package=airflow \ - --cover-html-dir=airflow/www/static/coverage \ - --with-ignore-docstrings \ - --rednose \ - --with-timer \ - -v \ - --logging-level=INFO" -fi - -if [ -z "$KUBERNETES_VERSION" ]; then - # kdc init happens in setup_kdc.sh - kinit -kt ${KRB5_KTNAME} airflow -fi - -# For impersonation tests running on SQLite on Travis, make the database world readable so other -# users can update it -AIRFLOW_DB="$HOME/airflow.db" - -if [ -f "${AIRFLOW_DB}" ]; then - chmod a+rw "${AIRFLOW_DB}" - chmod g+rwx "${AIRFLOW_HOME}" -fi - -echo "Starting the unit tests with the following nose arguments: "$nose_args -nosetests $nose_args - -# To run individual tests: -# nosetests tests.core:CoreTest.test_scheduler_job diff --git a/scripts/ci/6-check-license.sh b/scripts/ci/6-check-license.sh deleted file mode 100755 index 8f3999043b14d..0000000000000 --- a/scripts/ci/6-check-license.sh +++ /dev/null @@ -1,93 +0,0 @@ -#!/usr/bin/env bash - -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - - -acquire_rat_jar () { - - URL="http://repo1.maven.org/maven2/org/apache/rat/apache-rat/${RAT_VERSION}/apache-rat-${RAT_VERSION}.jar" - - JAR="$rat_jar" - - # Download rat launch jar if it hasn't been downloaded yet - if [ ! -f "$JAR" ]; then - # Download - printf "Attempting to fetch rat\n" - JAR_DL="${JAR}.part" - if [ $(command -v curl) ]; then - curl -L --silent "${URL}" > "$JAR_DL" && mv "$JAR_DL" "$JAR" - elif [ $(command -v wget) ]; then - wget --quiet ${URL} -O "$JAR_DL" && mv "$JAR_DL" "$JAR" - else - printf "You do not have curl or wget installed, please install rat manually.\n" - exit -1 - fi - fi - - unzip -tq "$JAR" &> /dev/null - if [ $? -ne 0 ]; then - # We failed to download - rm "$JAR" - printf "Our attempt to download rat locally to ${JAR} failed. Please install rat manually.\n" - exit -1 - fi - printf "Done downloading.\n" -} - -# Go to the Airflow project root directory -FWDIR="$(cd "`dirname "$0"`"/../..; pwd)" -cd "$FWDIR" - -TMP_DIR=/tmp - -if test -x "$JAVA_HOME/bin/java"; then - declare java_cmd="$JAVA_HOME/bin/java" -else - declare java_cmd=java -fi - -export RAT_VERSION=0.12 -export rat_jar="${TMP_DIR}"/lib/apache-rat-${RAT_VERSION}.jar -mkdir -p ${TMP_DIR}/lib - - -[[ -f "$rat_jar" ]] || acquire_rat_jar || { - echo "Download failed. Obtain the rat jar manually and place it at $rat_jar" - exit 1 -} - -# This is the target of a symlink in airflow/www/static/docs - and rat exclude doesn't cope with the symlink target doesn't exist -mkdir -p docs/_build/html/ - -echo "Running license checks. This can take a while." -$java_cmd -jar "$rat_jar" -E "$FWDIR"/.rat-excludes -d "$FWDIR" > rat-results.txt - -if [ $? -ne 0 ]; then - echo "RAT exited abnormally" - exit 1 -fi - -ERRORS="$(cat rat-results.txt | grep -e "??")" - -if test ! -z "$ERRORS"; then - echo >&2 "Could not find Apache license headers in the following files:" - echo >&2 "$ERRORS" - exit 1 -else - echo -e "RAT checks passed." -fi diff --git a/scripts/ci/_utils.sh b/scripts/ci/_utils.sh new file mode 100644 index 0000000000000..61396116e3b99 --- /dev/null +++ b/scripts/ci/_utils.sh @@ -0,0 +1,453 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +AIRFLOW_SOURCES=$(pwd) +export AIRFLOW_SOURCES + +BUILD_CACHE_DIR="${AIRFLOW_SOURCES}/.build" +export BUILD_CACHE_DIR + +FILES_FOR_REBUILD_CHECK="\ +setup.py \ +setup.cfg \ +Dockerfile \ +airflow/version.py +" + +mkdir -p ${AIRFLOW_SOURCES}/.mypy_cache +mkdir -p ${AIRFLOW_SOURCES}/logs +mkdir -p ${AIRFLOW_SOURCES}/tmp + +# Disable writing .pyc files - slightly slower imports but not messing around when switching +# Python version and avoids problems with root-owned .pyc files in host +export PYTHONDONTWRITEBYTECODE="true" + +# +# Sets mounting of host volumes to container for static checks +# unless AIRFLOW_MOUNT_HOST_VOLUMES_FOR_STATIC_CHECKS is not true +# +# Note that this cannot be function because we need the AIRFLOW_CONTAINER_EXTRA_DOCKER_FLAGS array variable +# +AIRFLOW_MOUNT_HOST_VOLUMES_FOR_STATIC_CHECKS=${AIRFLOW_MOUNT_HOST_VOLUMES_FOR_STATIC_CHECKS:="true"} + +declare -a AIRFLOW_CONTAINER_EXTRA_DOCKER_FLAGS +if [[ ${AIRFLOW_MOUNT_HOST_VOLUMES_FOR_STATIC_CHECKS} == "true" ]]; then + echo + echo "Mounting host volumes to Docker" + echo + AIRFLOW_CONTAINER_EXTRA_DOCKER_FLAGS=( \ + "-v" "${AIRFLOW_SOURCES}/airflow:/opt/airflow/airflow:cached" \ + "-v" "${AIRFLOW_SOURCES}/.mypy_cache:/opt/airflow/.mypy_cache:cached" \ + "-v" "${AIRFLOW_SOURCES}/dev:/opt/airflow/dev:cached" \ + "-v" "${AIRFLOW_SOURCES}/docs:/opt/airflow/docs:cached" \ + "-v" "${AIRFLOW_SOURCES}/scripts:/opt/airflow/scripts:cached" \ + "-v" "${AIRFLOW_SOURCES}/tmp:/opt/airflow/tmp:cached" \ + "-v" "${AIRFLOW_SOURCES}/tests:/opt/airflow/tests:cached" \ + "-v" "${AIRFLOW_SOURCES}/.flake8:/opt/airflow/.flake8:cached" \ + "-v" "${AIRFLOW_SOURCES}/setup.cfg:/opt/airflow/setup.cfg:cached" \ + "-v" "${AIRFLOW_SOURCES}/setup.py:/opt/airflow/setup.py:cached" \ + "-v" "${AIRFLOW_SOURCES}/.rat-excludes:/opt/airflow/.rat-excludes:cached" \ + "-v" "${AIRFLOW_SOURCES}/logs:/opt/airflow/logs:cached" \ + "-v" "${AIRFLOW_SOURCES}/logs:/root/logs:cached" \ + "-v" "${AIRFLOW_SOURCES}/tmp:/opt/airflow/tmp:cached" \ + "-e" "PYTHONDONTWRITEBYTECODE=true" \ + ) +else + echo + echo "Skip mounting host volumes to Docker" + echo + AIRFLOW_CONTAINER_EXTRA_DOCKER_FLAGS=( \ + "-e" "PYTHONDONTWRITEBYTECODE=true" \ + ) +fi + +export AIRFLOW_CONTAINER_EXTRA_DOCKER_FLAGS + +# +# Creates cache directory where we will keep temporary files needed for the build +# +# This directory will be automatically deleted when the script is killed or exists (via trap) +# Unless SKIP_CACHE_DELETION variable is set. You can set this variable and then see +# the output/files generated by the scripts in this directory. +# +# Most useful is out.log file in this directory storing verbose output of the scripts. +# +function create_cache_directory() { + mkdir -p "${BUILD_CACHE_DIR}/cache/" + + CACHE_TMP_FILE_DIR=$(mktemp -d "${BUILD_CACHE_DIR}/cache/XXXXXXXXXX") + export CACHE_TMP_FILE_DIR + + if [[ ${SKIP_CACHE_DELETION:=} != "true" ]]; then + trap 'rm -rf -- "${CACHE_TMP_FILE_DIR}"' INT TERM HUP EXIT + fi + + OUTPUT_LOG="${CACHE_TMP_FILE_DIR}/out.log" + export OUTPUT_LOG +} + +# +# Verifies if stored md5sum of the file changed since the last tme ot was checked +# The md5sum files are stored in .build directory - you can delete this directory +# If you want to rebuild everything from the scratch +# +function check_file_md5sum { + local FILE="${1}" + local MD5SUM + mkdir -pv "${BUILD_CACHE_DIR}/${THE_IMAGE}" + MD5SUM=$(md5sum "${FILE}") + local MD5SUM_FILE + MD5SUM_FILE=${BUILD_CACHE_DIR}/${THE_IMAGE}/$(basename "${FILE}").md5sum + local MD5SUM_FILE_NEW + MD5SUM_FILE_NEW=${CACHE_TMP_FILE_DIR}/$(basename "${FILE}").md5sum.new + echo "${MD5SUM}" > "${MD5SUM_FILE_NEW}" + local RET_CODE=0 + if [[ ! -f "${MD5SUM_FILE}" ]]; then + echo "Missing md5sum for ${FILE}" + RET_CODE=1 + else + diff "${MD5SUM_FILE_NEW}" "${MD5SUM_FILE}" >/dev/null + RES=$? + if [[ "${RES}" != "0" ]]; then + echo "The md5sum changed for ${FILE}" + RET_CODE=1 + fi + fi + return ${RET_CODE} +} + +# +# Moves md5sum file from it's temporary location in CACHE_TMP_FILE_DIR to +# BUILD_CACHE_DIR - thus updating stored MD5 sum fo the file +# +function move_file_md5sum { + local FILE="${1}" + local MD5SUM_FILE + mkdir -pv "${BUILD_CACHE_DIR}/${THE_IMAGE}" + MD5SUM_FILE=${BUILD_CACHE_DIR}/${THE_IMAGE}/$(basename "${FILE}").md5sum + local MD5SUM_FILE_NEW + MD5SUM_FILE_NEW=${CACHE_TMP_FILE_DIR}/$(basename "${FILE}").md5sum.new + if [[ -f "${MD5SUM_FILE_NEW}" ]]; then + mv "${MD5SUM_FILE_NEW}" "${MD5SUM_FILE}" + echo "Updated md5sum file ${MD5SUM_FILE} for ${FILE}." + fi +} + +# +# Stores md5sum files for all important files and +# records that we built the images locally so that next time we use +# it from the local docker cache rather than pull (unless forced) +# +function update_all_md5_files() { + echo + echo "Updating md5sum files" + echo + for FILE in ${FILES_FOR_REBUILD_CHECK} + do + move_file_md5sum "${AIRFLOW_SOURCES}/${FILE}" + done + touch "${BUILD_CACHE_DIR}/.built_${THE_IMAGE}_${PYTHON_VERSION}" +} + +# +# Checks md5sum of all important files in order to optimise speed of running various operations +# That mount sources of Airflow to container and require docker image built with latest dependencies. +# the Docker image will only be marked for rebuilding only in case any of the important files change: +# * setup.py +# * setup.cfg +# * Dockerfile +# * airflow/version.py +# +# This is needed because we want to skip rebuilding of the image when only airflow sources change but +# Trigger rebuild in case we need to change dependencies (setup.py, setup.cfg, change version of Airflow +# or the Dockerfile itself changes. +# +# Another reason to skip rebuilding Docker is thar currently it takes a bit longer time than simple Docker +# files. There are the following, problems with the current Dockerfiles that need longer build times: +# 1) We need to fix group permissions of files in Docker because different linux build services have +# different default umask and Docker uses group permissions in checking for cache invalidation. +# 2) we use multi-stage build and in case of slim image we needlessly build a full CI image because +# support for this only comes with the upcoming buildkit: https://github.com/docker/cli/issues/1134 +# +# As result of this check - most of the static checks will start pretty much immediately. +# +function check_if_docker_build_is_needed() { + set +e + + for FILE in ${FILES_FOR_REBUILD_CHECK} + do + if ! check_file_md5sum "${AIRFLOW_SOURCES}/${FILE}"; then + export AIRFLOW_CONTAINER_DOCKER_BUILD_NEEDED="true" + fi + done + set -e +} + +# +# Checks if core utils required in the host system are installed and explain what needs to be done if not +# +function check_if_coreutils_installed() { + set +e + getopt -T >/dev/null + GETOPT_RETVAL=$? + + if [[ $(uname -s) == 'Darwin' ]] ; then + command -v gstat >/dev/null + STAT_PRESENT=$? + else + command -v stat >/dev/null + STAT_PRESENT=$? + fi + + command -v md5sum >/dev/null + MD5SUM_PRESENT=$? + + set -e + + #################### Parsing options/arguments + if [[ ${GETOPT_RETVAL} != 4 || "${STAT_PRESENT}" != "0" || "${MD5SUM_PRESENT}" != "0" ]]; then + echo + if [[ $(uname -s) == 'Darwin' ]] ; then + echo >&2 "You are running ${CMDNAME} in OSX environment" + echo >&2 "And you need to install gnu commands" + echo + echo >&2 "Run 'brew install gnu-getopt coreutils'" + echo + echo >&2 "Then link the gnu-getopt to become default as suggested by brew by typing:" + echo >&2 "echo 'export PATH=\"/usr/local/opt/gnu-getopt/bin:\$PATH\"' >> ~/.bash_profile" + echo >&2 ". ~/.bash_profile" + echo + echo >&2 "Login and logout afterwards" + echo + else + echo >&2 "You do not have necessary tools in your path (getopt, stat, md5sum)." + echo >&2 "Please install latest/GNU version of getopt and coreutils." + echo >&2 "This can usually be done with 'apt install util-linux coreutils'" + fi + echo + exit 1 + fi +} + +# +# Asserts that we are not inside of the container +# +function assert_not_in_container() { + if [[ -f /.dockerenv ]]; then + echo >&2 + echo >&2 "You are inside the Airflow docker container!" + echo >&2 "You should only run this script from the host." + echo >&2 "Learn more about how we develop and test airflow in:" + echo >&2 "https://github.com/apache/airflow/blob/master/CONTRIBUTING.md" + echo >&2 + exit 1 + fi +} + +# +# Forces Python version to 3.6 (for static checks) +# +function force_python_3_6() { + export PYTHON_BINARY=python3.6 + + # And fail in case it is not available + if [[ ! -x "$(command -v "${PYTHON_BINARY}")" ]]; then + echo >&2 + echo >&2 "${PYTHON_BINARY} is missing in your \$PATH" + echo >&2 + echo >&2 "Please install Python 3.6 and make it available in your path" + echo >&2 + exit 1 + fi + + # Set python version variable to force it in the scripts as well + PYTHON_VERSION=3.6 + export PYTHON_VERSION +} + +# +# Rebuilds the slim image for static checks if needed. In order to speed it up, it's built without NPM +# +function rebuild_image_if_needed_for_static_checks() { + export AIRFLOW_CONTAINER_SKIP_SLIM_CI_IMAGE="false" + export AIRFLOW_CONTAINER_SKIP_CI_IMAGE="true" + export AIRFLOW_CONTAINER_PUSH_IMAGES="false" + export AIRFLOW_CONTAINER_BUILD_NPM="false" # Skip NPM builds to make them faster ! + + export PYTHON_VERSION=3.6 # Always use python version 3.6 for static checks + AIRFLOW_VERSION=$(cat airflow/version.py - << EOF | python +print(version.replace("+","")) +EOF + ) + export AIRFLOW_VERSION + + export THE_IMAGE="SLIM_CI" + if [[ -f "${BUILD_CACHE_DIR}/.built_${THE_IMAGE}_${PYTHON_VERSION}" ]]; then + echo + echo "Image built locally - skip force-pulling them" + echo + else + echo + echo "Image not built locally - force pulling them first" + echo + export AIRFLOW_CONTAINER_FORCE_PULL_IMAGES="true" + export AIRFLOW_CONTAINER_DOCKER_BUILD_NEEDED="true" + fi + + AIRFLOW_CONTAINER_DOCKER_BUILD_NEEDED=${AIRFLOW_CONTAINER_DOCKER_BUILD_NEEDED:="false"} + check_if_docker_build_is_needed + + if [[ "${AIRFLOW_CONTAINER_DOCKER_BUILD_NEEDED}" == "true" ]]; then + echo + echo "Rebuilding image" + echo + # shellcheck source=../../hooks/build + ./hooks/build | tee -a "${OUTPUT_LOG}" + update_all_md5_files + echo + echo "Image rebuilt" + echo + else + echo + echo "No need to rebuild the image as none of the sensitive files changed: ${FILES_FOR_REBUILD_CHECK}" + echo + fi + + AIRFLOW_SLIM_CI_IMAGE=$(cat "${BUILD_CACHE_DIR}/.AIRFLOW_SLIM_CI_IMAGE") + export AIRFLOW_SLIM_CI_IMAGE +} + +function rebuild_image_if_needed_for_tests() { + export AIRFLOW_CONTAINER_SKIP_SLIM_CI_IMAGE="true" + export AIRFLOW_CONTAINER_SKIP_CI_IMAGE="false" + PYTHON_VERSION=${PYTHON_VERSION:=$(python -c \ + 'import sys; print("%s.%s" % (sys.version_info.major, sys.version_info.minor))')} + export PYTHON_VERSION + if [[ ${PYTHON_VERSION} == 2.* ]]; then + echo 2>&1 + echo 2>&1 " Warning! You have python 2.7 on your path" + echo 2>&1 " Switching to python 3.6" + echo 2>&1 + export PYTHON_VERSION=3.6 + fi + AIRFLOW_VERSION=$(cat airflow/version.py - << EOF | python +print(version.replace("+","")) +EOF + ) + export AIRFLOW_VERSION + + export THE_IMAGE="CI" + if [[ -f "${BUILD_CACHE_DIR}/.built_${THE_IMAGE}_${PYTHON_VERSION}" ]]; then + echo + echo "Image built locally - skip force-pulling them" + echo + else + echo + echo "Image not built locally - force pulling them first" + echo + export AIRFLOW_CONTAINER_FORCE_PULL_IMAGES="true" + export AIRFLOW_CONTAINER_DOCKER_BUILD_NEEDED="true" + fi + + export DOCKERHUB_USER=${DOCKERHUB_USER:="apache"} + export DOCKERHUB_REPO=${DOCKERHUB_REPO:="airflow"} + export AIRFLOW_CONTAINER_PUSH_IMAGES="false" + export AIRFLOW_CONTAINER_CI_OPTIMISED_BUILD="true" + + AIRFLOW_CONTAINER_DOCKER_BUILD_NEEDED=${AIRFLOW_CONTAINER_DOCKER_BUILD_NEEDED:="false"} + check_if_docker_build_is_needed + + if [[ "${AIRFLOW_CONTAINER_DOCKER_BUILD_NEEDED}" == "true" ]]; then + echo + echo "Rebuilding image" + echo + # shellcheck source=../../hooks/build + ./hooks/build | tee -a "${OUTPUT_LOG}" + update_all_md5_files + echo + echo "Image rebuilt" + echo + else + echo + echo "No need to rebuild the image as none of the sensitive files changed: ${FILES_FOR_REBUILD_CHECK}" + echo + fi + + AIRFLOW_CI_IMAGE=$(cat "${BUILD_CACHE_DIR}/.AIRFLOW_CI_IMAGE") + export AIRFLOW_CI_IMAGE +} + +# +# Starts the script/ If VERBOSE variable is set to true, it enables verbose output of commands executed +# Also prints some useful diagnostics information at start of the script +# +function script_start { + echo + echo "Running $(basename $0)" + echo + echo "Log is redirected to ${OUTPUT_LOG}" + echo + if [[ ${VERBOSE:=} == "true" ]]; then + echo + echo "Variable VERBOSE Set to \"true\"" + echo "You will see a lot of output" + echo + set -x + else + echo "You can increase verbosity by running 'export VERBOSE=\"true\"" + if [[ ${SKIP_CACHE_DELETION:=} != "true" ]]; then + echo "And skip deleting the output file with 'export SKIP_CACHE_DELETION=\"true\"" + fi + echo + fi + START_SCRIPT_TIME=$(date +%s) +} + +# +# Disables verbosity in the script +# +function script_end { + if [[ ${VERBOSE:=} == "true" ]]; then + set +x + fi + END_SCRIPT_TIME=$(date +%s) + RUN_SCRIPT_TIME=$((END_SCRIPT_TIME-START_SCRIPT_TIME)) + echo + echo "Finished the script $(basename $0)" + echo "It took ${RUN_SCRIPT_TIME} seconds" + echo +} + +function go_to_airflow_sources { + echo + pushd "${MY_DIR}/../../" &>/dev/null || exit 1 + echo + echo "Running in host in $(pwd)" + echo +} + +# +# Performs basic sanity checks common for most of the scripts in this directory +# +function basic_sanity_checks() { + assert_not_in_container + go_to_airflow_sources + force_python_3_6 + check_if_coreutils_installed + create_cache_directory +} diff --git a/scripts/ci/ci_before_install.sh b/scripts/ci/ci_before_install.sh new file mode 100755 index 0000000000000..100fc173fe49e --- /dev/null +++ b/scripts/ci/ci_before_install.sh @@ -0,0 +1,31 @@ +#!/usr/bin/env bash + +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -xeuo pipefail + +KUBERNETES_VERSION=${KUBERNETES_VERSION:=""} +# Required for K8s v1.10.x. See +# https://github.com/kubernetes/kubernetes/issues/61058#issuecomment-372764783 +if [[ "${KUBERNETES_VERSION}" == "" ]]; then + sudo mount --make-shared / + sudo service docker restart +fi +# Cleanup docker installation. It should be empty in CI but let's not risk +docker system prune --all --force diff --git a/scripts/ci/2-setup-kdc.sh b/scripts/ci/ci_build.sh old mode 100755 new mode 100644 similarity index 60% rename from scripts/ci/2-setup-kdc.sh rename to scripts/ci/ci_build.sh index c5cd381c2f626..ac4abe322d31f --- a/scripts/ci/2-setup-kdc.sh +++ b/scripts/ci/ci_build.sh @@ -1,6 +1,4 @@ #!/usr/bin/env bash - -# # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information @@ -18,21 +16,17 @@ # specific language governing permissions and limitations # under the License. -set -exuo pipefail +set -euo pipefail + +MY_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" -DIRNAME=$(cd "$(dirname "$0")"; pwd) +# shellcheck source=./_utils.sh +. "${MY_DIR}/_utils.sh" -FQDN=`hostname` -ADMIN="admin" -PASS="airflow" -KRB5_KTNAME=/etc/airflow.keytab +basic_sanity_checks -cat /etc/hosts -echo "hostname: ${FQDN}" +script_start -sudo cp $DIRNAME/krb5/krb5.conf /etc/krb5.conf +rebuild_image_if_needed_for_static_checks -echo -e "${PASS}\n${PASS}" | sudo kadmin -p ${ADMIN}/admin -w ${PASS} -q "addprinc -randkey airflow/${FQDN}" -sudo kadmin -p ${ADMIN}/admin -w ${PASS} -q "ktadd -k ${KRB5_KTNAME} airflow" -sudo kadmin -p ${ADMIN}/admin -w ${PASS} -q "ktadd -k ${KRB5_KTNAME} airflow/${FQDN}" -sudo chmod 0644 ${KRB5_KTNAME} +script_end diff --git a/scripts/ci/ci_check_license.sh b/scripts/ci/ci_check_license.sh new file mode 100755 index 0000000000000..c110c1c735d40 --- /dev/null +++ b/scripts/ci/ci_check_license.sh @@ -0,0 +1,41 @@ +#!/usr/bin/env bash + +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +set -uo pipefail + +MY_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" + +# shellcheck source=./_utils.sh +. "${MY_DIR}/_utils.sh" + +basic_sanity_checks + +script_start + +rebuild_image_if_needed_for_tests + +docker run "${AIRFLOW_CONTAINER_EXTRA_DOCKER_FLAGS[@]}" -t \ + --entrypoint /opt/airflow/scripts/ci/in_container/run_check_licence.sh \ + --env PYTHONDONTWRITEBYTECODE="true" \ + --env AIRFLOW_CI_VERBOSE=${VERBOSE} \ + --env AIRFLOW_CI_VERBOSE=${VERBOSE} \ + --env HOST_USER_ID="$(id -ur)" \ + --env HOST_GROUP_ID="$(id -gr)" \ + "${AIRFLOW_CI_IMAGE}" \ + +script_end diff --git a/scripts/ci/1-setup-env.sh b/scripts/ci/ci_docs.sh similarity index 51% rename from scripts/ci/1-setup-env.sh rename to scripts/ci/ci_docs.sh index b297de56a7c9a..2557084bc4535 100755 --- a/scripts/ci/1-setup-env.sh +++ b/scripts/ci/ci_docs.sh @@ -1,6 +1,4 @@ #!/usr/bin/env bash - -# # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information @@ -18,24 +16,25 @@ # specific language governing permissions and limitations # under the License. -set -exuo pipefail +set -euo pipefail + +MY_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" + +# shellcheck source=./_utils.sh +. "${MY_DIR}/_utils.sh" + +basic_sanity_checks -if [[ ${INSIDE_DOCKER_CONTAINER:-} != true ]]; then - echo "You are not inside a docker container!" - echo "You should only run this script in the docker container as it may override your files." - echo "Learn more about how we develop and test airflow in:" - echo "https://github.com/apache/airflow/blob/master/CONTRIBUTING.md#development-and-testing" - exit 1 -fi +script_start -# Start MiniCluster -java -cp "/tmp/minicluster-1.1-SNAPSHOT/*" com.ing.minicluster.MiniCluster > /dev/null & +rebuild_image_if_needed_for_static_checks -# Set up ssh keys -echo 'yes' | ssh-keygen -t rsa -C your_email@youremail.com -P '' -f ~/.ssh/id_rsa -cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys -ln -s -f ~/.ssh/authorized_keys ~/.ssh/authorized_keys2 -chmod 600 ~/.ssh/* +docker run "${AIRFLOW_CONTAINER_EXTRA_DOCKER_FLAGS[@]}" -t \ + --entrypoint /opt/airflow/docs/build.sh \ + --env PYTHONDONTWRITEBYTECODE="true" \ + --env AIRFLOW_CI_VERBOSE=${VERBOSE} \ + --env HOST_USER_ID="$(id -ur)" \ + --env HOST_GROUP_ID="$(id -gr)" \ + "${AIRFLOW_SLIM_CI_IMAGE}" \ -# SSH Service -sudo service ssh restart +script_end diff --git a/scripts/ci/ci_flake8.sh b/scripts/ci/ci_flake8.sh new file mode 100755 index 0000000000000..35f26e07d03b7 --- /dev/null +++ b/scripts/ci/ci_flake8.sh @@ -0,0 +1,53 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -euo pipefail + +MY_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" + +# shellcheck source=./_utils.sh +. "${MY_DIR}/_utils.sh" + +basic_sanity_checks + +script_start + +rebuild_image_if_needed_for_static_checks + +FILES=("$@") + +if [[ "${#FILES[@]}" == "0" ]]; then + docker run "${AIRFLOW_CONTAINER_EXTRA_DOCKER_FLAGS[@]}" \ + --entrypoint /opt/airflow/scripts/ci/in_container/run_flake8.sh \ + --env PYTHONDONTWRITEBYTECODE="true" \ + --env AIRFLOW_CI_VERBOSE=${VERBOSE} \ + --env HOST_USER_ID="$(id -ur)" \ + --env HOST_GROUP_ID="$(id -gr)" \ + "${AIRFLOW_SLIM_CI_IMAGE}" | tee -a "${OUTPUT_LOG}" +else + docker run "${AIRFLOW_CONTAINER_EXTRA_DOCKER_FLAGS[@]}" \ + --entrypoint /opt/airflow/scripts/ci/in_container/run_flake8.sh \ + --env PYTHONDONTWRITEBYTECODE="true" \ + --env AIRFLOW_CI_VERBOSE=${VERBOSE} \ + --env HOST_USER_ID="$(id -ur)" \ + --env HOST_GROUP_ID="$(id -gr)" \ + "${AIRFLOW_SLIM_CI_IMAGE}" \ + "${FILES[@]}" | tee -a "${OUTPUT_LOG}" +fi + +script_end diff --git a/scripts/ci/ci_lint_dockerfile.sh b/scripts/ci/ci_lint_dockerfile.sh index 6385f298323f6..29472ca9e4180 100755 --- a/scripts/ci/ci_lint_dockerfile.sh +++ b/scripts/ci/ci_lint_dockerfile.sh @@ -1,6 +1,4 @@ #!/usr/bin/env bash - -# # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. @@ -16,12 +14,18 @@ # See the License for the specific language governing permissions and # limitations under the License. # -set -xeuo pipefail +set -euo pipefail MY_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" -cd ${MY_DIR}/../../ +# shellcheck source=./_utils.sh +. "${MY_DIR}/_utils.sh" +basic_sanity_checks -docker run -v $(pwd)/Dockerfile:/root/Dockerfile -v $(pwd)/.hadolint.yaml:/root/.hadolint.yaml \ +script_start + +docker run -v "$(pwd)/Dockerfile:/root/Dockerfile" -v "$(pwd)/.hadolint.yaml:/root/.hadolint.yaml" \ -w /root hadolint/hadolint /bin/hadolint Dockerfile + +script_end diff --git a/scripts/ci/ci_mypy.sh b/scripts/ci/ci_mypy.sh new file mode 100755 index 0000000000000..e7f8f9a0be090 --- /dev/null +++ b/scripts/ci/ci_mypy.sh @@ -0,0 +1,53 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -euo pipefail + +MY_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" + +# shellcheck source=./_utils.sh +. "${MY_DIR}/_utils.sh" + +basic_sanity_checks + +script_start + +rebuild_image_if_needed_for_static_checks + +FILES=("$@") +if [[ "${#FILES[@]}" == "0" ]]; then + docker run "${AIRFLOW_CONTAINER_EXTRA_DOCKER_FLAGS[@]}" \ + --entrypoint /opt/airflow/scripts/ci/in_container/run_mypy.sh \ + --env PYTHONDONTWRITEBYTECODE="true" \ + --env AIRFLOW_CI_VERBOSE=${VERBOSE} \ + --env HOST_USER_ID="$(id -ur)" \ + --env HOST_GROUP_ID="$(id -gr)" \ + "${AIRFLOW_SLIM_CI_IMAGE}" \ + "airflow" "tests" | tee -a "${OUTPUT_LOG}" +else + docker run "${AIRFLOW_CONTAINER_EXTRA_DOCKER_FLAGS[@]}" \ + --entrypoint /opt/airflow/scripts/ci/in_container/run_mypy.sh \ + --env PYTHONDONTWRITEBYTECODE="true" \ + --env AIRFLOW_CI_VERBOSE=${VERBOSE} \ + --env HOST_USER_ID="$(id -ur)" \ + --env HOST_GROUP_ID="$(id -gr)" \ + "${AIRFLOW_SLIM_CI_IMAGE}" \ + "${FILES[@]}" | tee -a "${OUTPUT_LOG}" +fi + +script_end diff --git a/scripts/ci/ci_pylint.sh b/scripts/ci/ci_pylint.sh index 7755244870d5f..ac70f64cdcfff 100755 --- a/scripts/ci/ci_pylint.sh +++ b/scripts/ci/ci_pylint.sh @@ -16,60 +16,37 @@ # See the License for the specific language governing permissions and # limitations under the License. # - -# Script to run Pylint on all code. Can be started from any working directory -# ./scripts/ci/ci_pylint.sh - -set -uo pipefail - -# Uncomment to see the commands executed -# set -x +set -euo pipefail MY_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" -pushd ${MY_DIR}/../../ || exit 1 - -echo -echo "Running in $(pwd)" -echo +# shellcheck source=./_utils.sh +. "${MY_DIR}/_utils.sh" -echo -echo "Running pylint for source code without tests" -echo +basic_sanity_checks -find . -name "*.py" \ --not -path "./.eggs/*" \ --not -path "./airflow/www/node_modules/*" \ --not -path "./airflow/_vendor/*" \ --not -path "./build/*" \ --not -path "./tests/*" \ --not -name 'webserver_config.py' | grep -vFf scripts/ci/pylint_todo.txt | xargs pylint --output-format=colorized -RES_MAIN=$? +script_start -echo -echo "Running pylint for tests" -echo +rebuild_image_if_needed_for_static_checks -find . -name "*.py" -path './tests/*' | \ -grep -vFf scripts/ci/pylint_todo.txt | \ -xargs pylint --disable=" - missing-docstring, - no-self-use, - too-many-public-methods, - protected-access - " \ - --output-format=colorized -RES_TESTS=$? - -popd || exit 1 - -if [[ "${RES_TESTS}" != 0 || "${RES_MAIN}" != 0 ]]; then - echo - echo "There were some pylint errors. Exiting" - echo - exit 1 +FILES=("$@") +if [[ "${#FILES[@]}" == "0" ]]; then + docker run "${AIRFLOW_CONTAINER_EXTRA_DOCKER_FLAGS[@]}" \ + --entrypoint /opt/airflow/scripts/ci/in_container/run_pylint.sh \ + --env PYTHONDONTWRITEBYTECODE="true" \ + --env AIRFLOW_CI_VERBOSE=${VERBOSE} \ + --env HOST_USER_ID="$(id -ur)" \ + --env HOST_GROUP_ID="$(id -gr)" \ + "${AIRFLOW_SLIM_CI_IMAGE}" | tee -a "${OUTPUT_LOG}" else - echo - echo "Pylint check succeeded" - echo + docker run "${AIRFLOW_CONTAINER_EXTRA_DOCKER_FLAGS[@]}" \ + --entrypoint /opt/airflow/scripts/ci/in_container/run_pylint.sh \ + --env PYTHONDONTWRITEBYTECODE="true" \ + --env AIRFLOW_CI_VERBOSE=${VERBOSE} \ + --env HOST_USER_ID="$(id -ur)" \ + --env HOST_GROUP_ID="$(id -gr)" \ + "${AIRFLOW_SLIM_CI_IMAGE}" \ + "${FILES[@]}" | tee -a "${OUTPUT_LOG}" fi + +script_end diff --git a/scripts/ci/ci_run_airflow_testing.sh b/scripts/ci/ci_run_airflow_testing.sh new file mode 100755 index 0000000000000..a648f3aed35df --- /dev/null +++ b/scripts/ci/ci_run_airflow_testing.sh @@ -0,0 +1,99 @@ +#!/usr/bin/env bash + +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -euo pipefail +MY_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" + +# shellcheck source=./_utils.sh +. "${MY_DIR}/_utils.sh" + +export VERBOSE=${VERBOSE:="true"} + +basic_sanity_checks + +script_start + +rebuild_image_if_needed_for_tests + +export BACKEND=${BACKEND:="sqlite"} +export ENV=${ENV:="docker"} +export MOUNT_LOCAL_SOURCES=${MOUNT_LOCAL_SOURCES:="false"} +export WEBSERVER_HOST_PORT=${WEBSERVER_HOST_PORT:="8080"} +export AIRFLOW_CI_VERBOSE=${VERBOSE} +export PYTHONDONTWRITEBYTECODE="true" + +if [[ ${MOUNT_LOCAL_SOURCES} == "true" ]]; then + DOCKER_COMPOSE_LOCAL=("-f" "${MY_DIR}/docker-compose-local.yml") +else + DOCKER_COMPOSE_LOCAL=() +fi + +# Default branch name for triggered builds is master +export AIRFLOW_CONTAINER_BRANCH_NAME=${AIRFLOW_CONTAINER_BRANCH_NAME:="master"} + +export AIRFLOW_CONTAINER_DOCKER_IMAGE=\ +${DOCKERHUB_USER}/${DOCKERHUB_REPO}:${AIRFLOW_CONTAINER_BRANCH_NAME}-python${PYTHON_VERSION}-ci + +echo +echo "Using docker image: ${AIRFLOW_CONTAINER_DOCKER_IMAGE} for docker compose runs" +echo + +HOST_USER_ID="$(id -ur)" +export HOST_USER_ID + +HOST_GROUP_ID="$(id -gr)" +export HOST_GROUP_ID + +set +u +if [[ "${ENV}" == "docker" ]]; then + docker-compose --log-level INFO \ + -f "${MY_DIR}/docker-compose.yml" \ + -f "${MY_DIR}/docker-compose-${BACKEND}.yml" \ + "${DOCKER_COMPOSE_LOCAL[@]}" \ + run airflow-testing /opt/airflow/scripts/ci/in_container/entrypoint_ci.sh; +else + "${MY_DIR}/kubernetes/minikube/stop_minikube.sh" && "${MY_DIR}/kubernetes/setup_kubernetes.sh" && \ + "${MY_DIR}/kubernetes/kube/deploy.sh" -d persistent_mode + MINIKUBE_IP=$(minikube ip) + export MINIKUBE_IP + docker-compose --log-level ERROR \ + -f "${MY_DIR}/docker-compose.yml" \ + -f "${MY_DIR}/docker-compose-${BACKEND}.yml" \ + -f "${MY_DIR}/docker-compose-kubernetes.yml" \ + "${DOCKER_COMPOSE_LOCAL[@]}" \ + run --no-deps airflow-testing /opt/airflow/scripts/ci/in_container/entrypoint_ci.sh; + set +x + "${MY_DIR}/kubernetes/minikube/stop_minikube.sh" + + "${MY_DIR}/kubernetes/minikube/stop_minikube.sh" && "${MY_DIR}/kubernetes/setup_kubernetes.sh" && \ + "${MY_DIR}/kubernetes/kube/deploy.sh" -d git_mode + MINIKUBE_IP=$(minikube ip) + export MINIKUBE_IP + docker-compose --log-level ERROR \ + -f "${MY_DIR}/docker-compose.yml" \ + -f "${MY_DIR}/docker-compose-${BACKEND}.yml" \ + -f "${MY_DIR}/docker-compose-kubernetes.yml" \ + "${DOCKER_COMPOSE_LOCAL[@]}" \ + run --no-deps airflow-testing /opt/airflow/scripts/ci/in_container/entrypoint_ci.sh; + "${MY_DIR}/kubernetes/minikube/stop_minikube.sh" +fi +set -u + +script_end diff --git a/scripts/ci/docker-compose-kubernetes.yml b/scripts/ci/docker-compose-kubernetes.yml index 0cdb2df199b7b..eb48b6acd0b80 100644 --- a/scripts/ci/docker-compose-kubernetes.yml +++ b/scripts/ci/docker-compose-kubernetes.yml @@ -24,5 +24,5 @@ services: volumes: - /usr/local/bin/kubectl:/usr/local/bin/kubectl - /usr/local/bin/minikube:/usr/local/bin/minikube - - ~/.kube:/home/airflow/.kube - - ~/.minikube:/home/airflow/.minikube + - ~/.kube:/root/.kube + - ~/.minikube:/root/.minikube diff --git a/scripts/ci/docker-compose-local.yml b/scripts/ci/docker-compose-local.yml new file mode 100644 index 0000000000000..2719dd134e5d2 --- /dev/null +++ b/scripts/ci/docker-compose-local.yml @@ -0,0 +1,48 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +version: "2.2" +services: + airflow-testing: + volumes: + - ../../airflow:/opt/airflow/airflow:cached + - ../../setup.cfg:/opt/airflow/setup.cfg:cached + - ../../setup.py:/opt/airflow/setup.py:cached + - ../../dags:/opt/airflow/dags:cached + - ../../dev:/opt/airflow/dev:cached + - ../../docs:/opt/airflow/docs:cached + - ../../hooks:/opt/airflow/hooks:cached + - ../../scripts:/opt/airflow/scripts:cached + - ../../tests:/opt/airflow/tests:cached + - ../../.coveragerc:/opt/airflow/.coveragerc:cached + - ../../LICENSE:/opt/airflow/LICENSE:cached + - ../../MANIFEST.in:/opt/airflow/MANIFEST.in:cached + - ../../NOTICE:/opt/airflow/NOTICE:cached + - ../../CHANGELOG.txt:/opt/airflow/CHANGELOG:cached + - ../../.github:/opt/airflow/.github:cached + - ../../.flake8:/opt/airflow/.flake8:cached + - ../../pylintrc:/opt/airflow/pylintrc:cached + - ../../.rat-excludes:/opt/airflow/.rat-excludes:cached + - ../../run-tests:/opt/airflow/run-tests:cached + - ../../logs:/root/airflow/logs:cached + - ../../logs:/opt/airflow/logs:cached + - ../../tmp:/opt/airflow/tmp:cached + environment: + - HOST_USER_ID + - HOST_GROUP_ID + - PYTHONDONTWRITEBYTECODE + ports: + - "${WEBSERVER_HOST_PORT}:8080" diff --git a/scripts/ci/docker-compose-mysql.yml b/scripts/ci/docker-compose-mysql.yml new file mode 100644 index 0000000000000..365b75cec8213 --- /dev/null +++ b/scripts/ci/docker-compose-mysql.yml @@ -0,0 +1,22 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +version: "2.2" +services: + airflow-testing: + environment: + - AIRFLOW__CORE__SQL_ALCHEMY_CONN=mysql://root@mysql/airflow + - AIRFLOW__CELERY__RESULT_BACKEND=db+mysql://root@mysql/airflow diff --git a/scripts/ci/docker-compose-postgres.yml b/scripts/ci/docker-compose-postgres.yml new file mode 100644 index 0000000000000..68647554fee6e --- /dev/null +++ b/scripts/ci/docker-compose-postgres.yml @@ -0,0 +1,22 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +version: "2.2" +services: + airflow-testing: + environment: + - AIRFLOW__CORE__SQL_ALCHEMY_CONN=postgresql+psycopg2://postgres:airflow@postgres/airflow + - AIRFLOW__CELERY__RESULT_BACKEND=db+postgresql://postgres:airflow@postgres/airflow diff --git a/scripts/ci/test-environment.sh b/scripts/ci/docker-compose-sqlite.yml similarity index 80% rename from scripts/ci/test-environment.sh rename to scripts/ci/docker-compose-sqlite.yml index 5c402d46df287..011689573efe0 100644 --- a/scripts/ci/test-environment.sh +++ b/scripts/ci/docker-compose-sqlite.yml @@ -1,4 +1,3 @@ -#!/bin/bash -e # # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with @@ -14,6 +13,10 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# -docker-compose -f scripts/ci/docker-compose.yml run --rm airflow-testing "${@-bash}" +version: "2.2" +services: + airflow-testing: + environment: + - AIRFLOW__CORE__SQL_ALCHEMY_CONN=sqlite:////root/airflow/airflow.db + - AIRFLOW__CORE__EXECUTOR=SequentialExecutor diff --git a/scripts/ci/docker-compose.yml b/scripts/ci/docker-compose.yml index aa549f4fbefe3..5020cc13e359a 100644 --- a/scripts/ci/docker-compose.yml +++ b/scripts/ci/docker-compose.yml @@ -16,65 +16,67 @@ version: "2.2" services: - mysql: - image: mysql:5.6 - restart: always - environment: - - MYSQL_ALLOW_EMPTY_PASSWORD=true - - MYSQL_ROOT_HOST=% - - MYSQL_DATABASE=airflow - volumes: - - ./mysql/conf.d:/etc/mysql/conf.d postgres: image: postgres:9.6 - restart: always environment: - POSTGRES_USER=postgres - POSTGRES_PASSWORD=airflow - POSTGRES_DB=airflow + mysql: + image: mysql:5.6 + environment: + - MYSQL_ALLOW_EMPTY_PASSWORD=true + - MYSQL_ROOT_HOST=% + - MYSQL_DATABASE=airflow + volumes: + - ./mysql/conf.d:/etc/mysql/conf.d:ro mongo: image: mongo:3 - restart: always cassandra: image: cassandra:3.0 - restart: always rabbitmq: image: rabbitmq:3.7 - restart: always redis: image: redis:5.0.1 - restart: always openldap: image: osixia/openldap:1.2.0 - restart: always command: --copy-service environment: - LDAP_DOMAIN=example.com - LDAP_ADMIN_PASSWORD=insecure - LDAP_CONFIG_PASSWORD=insecure volumes: - - ./openldap/ldif:/container/service/slapd/assets/config/bootstrap/ldif/custom - + - ./openldap/ldif:/container/service/slapd/assets/config/bootstrap/ldif/custom:ro krb5-kdc-server: image: godatadriven/krb5-kdc-server hostname: krb5-kdc-server domainname: example.com airflow-testing: - image: airflowci/incubator-airflow-ci:latest + image: ${AIRFLOW_CONTAINER_DOCKER_IMAGE} init: true + entrypoint: [ "/bin/bash", "-c" ] environment: - - USER=airflow - - INSIDE_DOCKER_CONTAINER=true + - USER=root - ADDITIONAL_PATH=~/.local/bin - - TOX_ENV + - HADOOP_DISTRO=cdh + - HADOOP_HOME=/tmp/hadoop-cdh + - HADOOP_OPTS=-D/tmp/krb5.conf + - HIVE_HOME=/tmp/hive + - MINICLUSTER_HOME=/tmp/minicluster-1.1-SNAPSHOT + - KRB5_CONFIG=/etc/krb5.conf + - KRB5_KTNAME=/etc/airflow.keytab + - CELERY_BROKER_URLS=amqp://guest:guest@rabbitmq:5672,redis://redis:6379/0 + - ENV + - BACKEND - CI + - SOURCE_BRANCH - TRAVIS - TRAVIS_BRANCH - TRAVIS_BUILD_DIR @@ -85,6 +87,8 @@ services: - TRAVIS_REPO_SLUG - TRAVIS_OS_NAME - TRAVIS_TAG + - RUN_TESTS + - AIRFLOW_CI_VERBOSE depends_on: - postgres - mysql @@ -94,8 +98,3 @@ services: - redis - openldap - krb5-kdc-server - volumes: - - ../../:/app - - ~/.cache/pip:/home/airflow/.cache/pip - - ~/.wheelhouse/:/home/airflow/.wheelhouse/ - working_dir: /app diff --git a/scripts/ci/in_container/_in_container_utils.sh b/scripts/ci/in_container/_in_container_utils.sh new file mode 100644 index 0000000000000..83ad1914944b2 --- /dev/null +++ b/scripts/ci/in_container/_in_container_utils.sh @@ -0,0 +1,115 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# +# Asserts that you are actually in container +# +function assert_in_container() { + if [[ ! -f /.dockerenv ]]; then + echo >&2 + echo >&2 "You are not inside the Airflow docker container!" + echo >&2 "You should only run this script in the Airflow docker container as it may override your files." + echo >&2 "Learn more about how we develop and test airflow in:" + echo >&2 "https://github.com/apache/airflow/blob/master/CONTRIBUTING.md" + echo >&2 + exit 1 + fi +} + +function in_container_script_start() { + if [[ ${AIRFLOW_CI_VERBOSE:="false"} == "true" ]]; then + set -x + fi +} + +function in_container_script_end() { + if [[ ${AIRFLOW_CI_VERBOSE:="false"} == "true" ]]; then + set +x + fi +} + +# +# Cleans up PYC files (in case they come in mounted folders) +# +function in_container_cleanup_pyc() { + echo + echo "Cleaning up .pyc files" + echo + set +o pipefail + sudo find . \ + -path "./airflow/www/node_modules" -prune -o \ + -path "./.eggs" -prune -o \ + -path "./docs/_build" -prune -o \ + -path "./build" -prune -o \ + -name "*.pyc" | grep ".pyc$" | sudo xargs rm -vf | wc -l | \ + xargs -n 1 echo "Number of deleted .pyc files:" + set -o pipefail + echo + echo +} + +# +# Cleans up __pycache__ directories (in case they come in mounted folders) +# +function in_container_cleanup_pycache() { + echo + echo "Cleaning up __pycache__ directories" + echo + set +o pipefail + sudo find . \ + -path "./airflow/www/node_modules" -prune -o \ + -path "./.eggs" -prune -o \ + -path "./docs/_build" -prune -o \ + -path "./build" -prune -o \ + -name "__pycache__" | grep "__pycache__" | sudo xargs rm -rvf | wc -l | \ + xargs -n 1 echo "Number of deleted __pycache__ dirs (and files):" + set -o pipefail + echo + echo +} + +# +# Fixes ownership of files generated in container - if they are owned by root, they will be owned by +# The host user. +# +function in_container_fix_ownership() { + echo + echo "Changing ownership of root-owned files to ${HOST_USER_ID}.${HOST_GROUP_ID}" + echo + set +o pipefail + sudo find . -user root | sudo xargs chown -v "${HOST_USER_ID}.${HOST_GROUP_ID}" | wc -l | \ + xargs -n 1 echo "Number of files with changed ownership:" + set -o pipefail + echo + echo +} + +function in_container_go_to_airflow_sources() { + pushd "${AIRFLOW_SOURCES}" &>/dev/null || exit 1 + echo + echo "Running in $(pwd)" + echo +} + +function in_container_basic_sanity_check() { + assert_in_container + in_container_go_to_airflow_sources + in_container_cleanup_pyc + in_container_cleanup_pycache +} diff --git a/scripts/ci/airflow_travis.cfg b/scripts/ci/in_container/airflow_ci.cfg similarity index 94% rename from scripts/ci/airflow_travis.cfg rename to scripts/ci/in_container/airflow_ci.cfg index f630655e3fe54..d9dcb5e53f828 100644 --- a/scripts/ci/airflow_travis.cfg +++ b/scripts/ci/in_container/airflow_ci.cfg @@ -20,7 +20,7 @@ dags_folder = ~/airflow/dags base_log_folder = ~/airflow/logs executor = LocalExecutor -sql_alchemy_conn = # overridden by tox.ini +sql_alchemy_conn = # overridden by the startup scripts unit_test_mode = True load_examples = True donot_pickle = False @@ -54,7 +54,7 @@ celery_app_name = airflow.executors.celery_executor worker_concurrency = 16 worker_log_server_port = 8793 broker_url = amqp://guest:guest@rabbitmq:5672/ -result_backend = # overridden by tox.ini +result_backend = # overridden by startup scripts flower_port = 5555 default_queue = default sync_parallelism = 0 diff --git a/scripts/ci/in_container/entrypoint_ci.sh b/scripts/ci/in_container/entrypoint_ci.sh new file mode 100755 index 0000000000000..c2bfa80ce3397 --- /dev/null +++ b/scripts/ci/in_container/entrypoint_ci.sh @@ -0,0 +1,224 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Bash sanity settings (error on exit, complain for undefined vars, error when pipe fails) +set -euo pipefail +MY_DIR=$(cd "$(dirname "$0")" || exit 1; pwd) + +if [[ ${AIRFLOW_CI_VERBOSE:="false"} == "true" ]]; then + set -x +fi + +# shellcheck source=./_in_container_utils.sh +. "${MY_DIR}/_in_container_utils.sh" + +in_container_basic_sanity_check + +in_container_script_start + +AIRFLOW_ROOT="${MY_DIR}/../../.." + +PYTHON_VERSION=${PYTHON_VERSION:=3.6} +ENV=${ENV:=docker} +BACKEND=${BACKEND:=sqlite} +KUBERNETES_VERSION=${KUBERNETES_VERSION:=""} + +export AIRFLOW_HOME=${AIRFLOW_HOME:=${HOME}} + +if [[ -z "${AIRFLOW_SOURCES:=}" ]]; then + echo >&2 + echo >&2 AIRFLOW_SOURCES not set !!!! + echo >&2 + exit 1 +fi + +echo +echo "Airflow home: ${AIRFLOW_HOME}" +echo "Airflow sources: ${AIRFLOW_SOURCES}" +echo "Airflow core SQL connection: ${AIRFLOW__CORE__SQL_ALCHEMY_CONN:=}" +echo + +ARGS=( "$@" ) + +RUN_TESTS=${RUN_TESTS:="true"} + +if [[ ! -d "${AIRFLOW_SOURCES}/airflow/www/node_modules" ]]; then + echo + echo "Installing NPM modules as they are not yet installed (Sources mounted from Host)" + echo + pushd "${AIRFLOW_SOURCES}/airflow/www/" &>/dev/null || exit 1 + npm ci + echo + popd &>/dev/null || exit 1 +fi +if [[ ! -d "${AIRFLOW_SOURCES}/airflow/www/static/dist" ]]; then + pushd "${AIRFLOW_SOURCES}/airflow/www/" &>/dev/null || exit 1 + echo + echo "Building production version of javascript files (Sources mounted from Host)" + echo + echo + npm run prod + echo + echo + popd &>/dev/null || exit 1 +fi + +if [[ ${AIRFLOW_CI_VERBOSE} == "true" ]]; then + echo + echo "Using ${HADOOP_DISTRO:=} distribution of Hadoop from ${HADOOP_HOME:=}" + echo +fi + +export AIRFLOW__CORE__DAGS_FOLDER="${AIRFLOW_SOURCES}/tests/dags" + +# add test/test_utils to PYTHONPATH (TODO: Do we need it?) +export PYTHONPATH=${PYTHONPATH:-${AIRFLOW_SOURCES}/tests/test_utils} + +# Added to have run-tests on path +export PATH=${PATH}:${AIRFLOW_SOURCES} + +export AIRFLOW__CORE__UNIT_TEST_MODE=True +export HADOOP_DISTRO + +# Fix codecov build path +# TODO: Check this - this should be made travis-independent +if [[ ! -h /home/travis/build/apache/airflow ]]; then + sudo mkdir -p /home/travis/build/apache + sudo ln -s "${AIRFLOW_ROOT}" /home/travis/build/apache/airflow +fi + +# Fix file permissions +if [[ -d "${HOME}/.minikube" ]]; then + sudo chown -R "${AIRFLOW_USER}.${AIRFLOW_USER}" "${HOME}/.kube" "${HOME}/.minikube" +fi + +# Cleanup the logs, tmp when entering the environment +sudo rm -rf "${AIRFLOW_SOURCES}"/logs/* +sudo rm -rf "${AIRFLOW_SOURCES}"/tmp/* +mkdir -p "${AIRFLOW_SOURCES}"/logs/ +mkdir -p "${AIRFLOW_SOURCES}"/tmp/ + +if [[ "${ENV}" == "docker" ]]; then + # Start MiniCluster + java -cp "/tmp/minicluster-1.1-SNAPSHOT/*" com.ing.minicluster.MiniCluster \ + >"${AIRFLOW_HOME}"/logs/minicluster.log 2>&1 & + + # Set up ssh keys + echo 'yes' | ssh-keygen -t rsa -C your_email@youremail.com -P '' -f ~/.ssh/id_rsa \ + >"${AIRFLOW_HOME}"/logs/ssh-keygen.log 2>&1 + + cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys + ln -s -f ~/.ssh/authorized_keys ~/.ssh/authorized_keys2 + chmod 600 ~/.ssh/* + + # SSH Service + sudo service ssh restart >/dev/null 2>&1 + + # Setting up kerberos + + FQDN=$(hostname) + ADMIN="admin" + PASS="airflow" + KRB5_KTNAME=/etc/airflow.keytab + + if [[ ${AIRFLOW_CI_VERBOSE} == "true" ]]; then + echo + echo "Hosts:" + echo + cat /etc/hosts + echo + echo "Hostname: ${FQDN}" + echo + fi + + sudo cp "${MY_DIR}/krb5/krb5.conf" /etc/krb5.conf + + echo -e "${PASS}\n${PASS}" | \ + sudo kadmin -p "${ADMIN}/admin" -w "${PASS}" -q "addprinc -randkey airflow/${FQDN}" >/dev/null 2>&1 + sudo kadmin -p "${ADMIN}/admin" -w "${PASS}" -q "ktadd -k ${KRB5_KTNAME} airflow" >/dev/null 2>&1 + sudo kadmin -p "${ADMIN}/admin" -w "${PASS}" -q "ktadd -k ${KRB5_KTNAME} airflow/${FQDN}" >/dev/null 2>&1 + sudo chmod 0644 "${KRB5_KTNAME}" +fi + +# Exporting XUNIT_FILE so that we can see summary of failed tests +# at the end of the log +export XUNIT_FILE="${AIRFLOW_HOME}/logs/all_tests.xml" +mkdir -pv "${AIRFLOW_HOME}/logs/" + +cp -f "${MY_DIR}/airflow_ci.cfg" "${AIRFLOW_HOME}/unittests.cfg" + +set +u +# If we do not want to run tests, we simply drop into bash +if [[ "${RUN_TESTS}" == "false" ]]; then + if [[ ${#ARGS} == 0 ]]; then + exec /bin/bash + else + exec /bin/bash -c "$(printf "%q " "${ARGS[@]}")" + fi +fi + +if [[ ${#ARGS} == 0 ]]; then + ARGS=("--with-coverage" + "--cover-erase" + "--cover-html" + "--cover-package=airflow" + "--cover-html-dir=airflow/www/static/coverage" + "--with-ignore-docstrings" + "--rednose" + "--with-xunit" + "--xunit-file=${XUNIT_FILE}" + "--with-timer" + "-v" + "--logging-level=INFO") + echo + echo "Running ALL Tests" + echo +else + echo + echo "Running tests with ${ARGS[*]}" + echo +fi +set -u + +KUBERNETES_VERSION=${KUBERNETES_VERSION:=""} + +if [[ -z "${KUBERNETES_VERSION}" ]]; then + echo + echo "Running CI tests with ${ARGS[*]}" + echo + "${MY_DIR}/run_ci_tests.sh" "${ARGS[@]}" + codecov -e "py${PYTHON_VERSION}-backend_${BACKEND}-env_${ENV}" +else + export KUBERNETES_VERSION + export MINIKUBE_IP + # This script runs inside a container, the path of the kubernetes certificate + # is /home/travis/.minikube/client.crt but the user in the container is `root` + # TODO: Check this. This should be made travis-independent :D + if [[ ! -d /home/travis ]]; then + sudo mkdir -p /home/travis + fi + sudo ln -s /root/.minikube /home/travis/.minikube + echo + echo "Running CI tests with ${ARGS[*]}" + echo + "${MY_DIR}/run_ci_tests.sh" tests.minikube "${ARGS[@]}" + codecov -e "py${PYTHON_VERSION}-backend_${BACKEND}-env_${ENV}" +fi + +in_container_script_end diff --git a/scripts/ci/krb5/krb5.conf b/scripts/ci/in_container/krb5/krb5.conf similarity index 100% rename from scripts/ci/krb5/krb5.conf rename to scripts/ci/in_container/krb5/krb5.conf diff --git a/scripts/ci/in_container/run_check_licence.sh b/scripts/ci/in_container/run_check_licence.sh new file mode 100755 index 0000000000000..b71a00c33c0dd --- /dev/null +++ b/scripts/ci/in_container/run_check_licence.sh @@ -0,0 +1,67 @@ +#!/usr/bin/env bash + +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Script to run Pylint on all code. Can be started from any working directory +# ./scripts/ci/run_pylint.sh + +set -uo pipefail + +MY_DIR=$(cd "$(dirname "$0")" || exit 1; pwd) + +# shellcheck source=./_in_container_utils.sh +. "${MY_DIR}/_in_container_utils.sh" + +in_container_basic_sanity_check + +in_container_script_start + +echo +echo "Running Licence check" +echo + +sudo chown -R "${AIRFLOW_USER}.${AIRFLOW_USER}" "${AIRFLOW_SOURCES}/logs" + +# This is the target of a symlink in airflow/www/static/docs - +# and rat exclude doesn't cope with the symlink target doesn't exist +sudo mkdir -p docs/_build/html/ + +echo "Running license checks. This can take a while." + +if ! java -jar "${RAT_JAR}" -E "${AIRFLOW_SOURCES}"/.rat-excludes \ + -d "${AIRFLOW_SOURCES}" > "${AIRFLOW_SOURCES}/logs/rat-results.txt"; then + echo >&2 "RAT exited abnormally" + exit 1 +fi + +ERRORS=$(grep -e "??" "${AIRFLOW_SOURCES}/logs/rat-results.txt") + +in_container_script_end + +in_container_fix_ownership + +if test ! -z "${ERRORS}"; then + echo >&2 + echo >&2 "Could not find Apache license headers in the following files:" + echo >&2 "${ERRORS}" + exit 1 + echo >&2 +else + echo "RAT checks passed." + echo +fi diff --git a/scripts/ci/in_container/run_ci_tests.sh b/scripts/ci/in_container/run_ci_tests.sh new file mode 100755 index 0000000000000..855aea1d19fad --- /dev/null +++ b/scripts/ci/in_container/run_ci_tests.sh @@ -0,0 +1,77 @@ +#!/usr/bin/env bash + +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Bash sanity settings (error on exit, complain for undefined vars, error when pipe fails) +set -euo pipefail + +MY_DIR=$(cd "$(dirname "$0")" || exit 1; pwd) + +# shellcheck source=./_in_container_utils.sh +. "${MY_DIR}/_in_container_utils.sh" + +in_container_basic_sanity_check + +in_container_script_start + +# any argument received is overriding the default nose execution arguments: +NOSE_ARGS=( "$@" ) + +KUBERNETES_VERSION=${KUBERNETES_VERSION:=""} + +if [[ "${KUBERNETES_VERSION}" == "" ]]; then + echo "Initializing the DB" + yes | airflow initdb || true + airflow resetdb -y + + kinit -kt "${KRB5_KTNAME}" airflow +fi + +echo +echo "Starting the tests with those nose arguments: ${NOSE_ARGS[*]}" +echo +set +e +nosetests "${NOSE_ARGS[@]}" +RES=$? + +set +x +if [[ "${RES}" != "0" ]]; then + if [[ -f "${XUNIT_FILE:=}" ]]; then + SEPARATOR_WIDTH=$(tput cols) + echo + printf '=%.0s' $(seq "${SEPARATOR_WIDTH}") + echo + echo " Summary of failed tests" + echo + python "${AIRFLOW_SOURCES:=}/tests/test_utils/print_tests.py" \ + --xunit-file "${XUNIT_FILE}" --only-failed + echo + printf '=%.0s' $(seq "${SEPARATOR_WIDTH}") + else + echo + echo " Not printing summary of failed tests. Missing file: ${XUNIT_FILE}" + echo + fi +else + echo "All tests successful" +fi + +in_container_script_end + +exit "${RES}" diff --git a/scripts/ci/in_container/run_docs_build.sh b/scripts/ci/in_container/run_docs_build.sh new file mode 100755 index 0000000000000..5e403e25d19dd --- /dev/null +++ b/scripts/ci/in_container/run_docs_build.sh @@ -0,0 +1,39 @@ +#!/usr/bin/env bash + +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Script to run Pylint on all code. Can be started from any working directory +# ./scripts/ci/run_pylint.sh + +set -uo pipefail + +MY_DIR=$(cd "$(dirname "$0")" || exit 1; pwd) + +# shellcheck source=./_in_container_utils.sh +. "${MY_DIR}/_in_container_utils.sh" + +in_container_basic_sanity_check + +in_container_script_start + +sudo rm -rf "$(pwd)/docs/_build/*" +sudo "$(pwd)/docs/build.sh" + +in_container_fix_ownership + +in_container_script_end diff --git a/scripts/ci/in_container/run_fix_ownership.sh b/scripts/ci/in_container/run_fix_ownership.sh new file mode 100755 index 0000000000000..52ffd97db3bd8 --- /dev/null +++ b/scripts/ci/in_container/run_fix_ownership.sh @@ -0,0 +1,33 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Bash sanity settings (error on exit, complain for undefined vars, error when pipe fails) +set -euo pipefail +MY_DIR=$(cd "$(dirname "$0")" || exit 1; pwd) + +# shellcheck source=./_in_container_utils.sh +. "${MY_DIR}/_in_container_utils.sh" + +in_container_basic_sanity_check + +in_container_script_start + +in_container_fix_ownership + +in_container_script_end diff --git a/scripts/ci/in_container/run_flake8.sh b/scripts/ci/in_container/run_flake8.sh new file mode 100755 index 0000000000000..d5b4a1cd749d5 --- /dev/null +++ b/scripts/ci/in_container/run_flake8.sh @@ -0,0 +1,59 @@ +#!/usr/bin/env bash + +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Script to run Pylint on all code. Can be started from any working directory +# ./scripts/ci/run_pylint.sh + +set -uo pipefail + +MY_DIR=$(cd "$(dirname "$0")" || exit 1; pwd) + +# shellcheck source=./_in_container_utils.sh +. "${MY_DIR}/_in_container_utils.sh" + +in_container_basic_sanity_check + +in_container_script_start + +if [[ ${#@} == "0" ]]; then + echo + echo "Running flake8 with no parameters" + echo +else + echo + echo "Running flake8 with parameters: $*" + echo +fi + +flake8 "$@" + +RES="$?" + +in_container_script_end + +if [[ "${RES}" != 0 ]]; then + echo >&2 + echo >&2 "There were some flake8 errors. Exiting" + echo >&2 + exit 1 +else + echo + echo "Flake8 check succeeded" + echo +fi diff --git a/scripts/ci/in_container/run_mypy.sh b/scripts/ci/in_container/run_mypy.sh new file mode 100755 index 0000000000000..3390bfa37d023 --- /dev/null +++ b/scripts/ci/in_container/run_mypy.sh @@ -0,0 +1,53 @@ +#!/usr/bin/env bash + +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Script to run Pylint on all code. Can be started from any working directory +# ./scripts/ci/run_pylint.sh + +set -uo pipefail + +MY_DIR=$(cd "$(dirname "$0")" || exit 1; pwd) + +# shellcheck source=./_in_container_utils.sh +. "${MY_DIR}/_in_container_utils.sh" + +in_container_basic_sanity_check + +in_container_script_start + +echo +echo "Running mypy with parameters: $*" +echo + +mypy "$@" + +RES="$?" + +in_container_script_end + +if [[ "${RES}" != 0 ]]; then + echo >&2 + echo >&2 "There were some mypy errors. Exiting" + echo >&2 + exit 1 +else + echo + echo "Mypy check succeeded" + echo +fi diff --git a/scripts/ci/in_container/run_pylint.sh b/scripts/ci/in_container/run_pylint.sh new file mode 100755 index 0000000000000..201fab1301b7e --- /dev/null +++ b/scripts/ci/in_container/run_pylint.sh @@ -0,0 +1,92 @@ +#!/usr/bin/env bash + +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Script to run Pylint on all code. Can be started from any working directory +# ./scripts/ci/run_pylint.sh + +set -uo pipefail + +MY_DIR=$(cd "$(dirname "$0")" || exit 1; pwd) + +# shellcheck source=./_in_container_utils.sh +. "${MY_DIR}/_in_container_utils.sh" + +in_container_basic_sanity_check + +in_container_script_start + +if [[ ${#@} == "0" ]]; then + echo + echo "Running Pylint with no parameters" + echo + + echo + echo "Running pylint for all sources except 'tests' folder" + echo + + # Using path -prune is much better in the local environment on OSX because we have host + # Files mounted and node_modules is a huge directory which takes many seconds to even scan + # -prune works better than -not path because it skips traversing the whole directory. -not path traverses + # the directory and only excludes it after all of it is scanned + find . \ + -path "./airflow/www/node_modules" -prune -o \ + -path "./airflow/_vendor" -prune -o \ + -path "./.eggs" -prune -o \ + -path "./docs/_build" -prune -o \ + -path "./build" -prune -o \ + -path "./tests" -prune -o \ + -name "*.py" \ + -not -name 'webserver_config.py' | \ + grep ".*.py$" | \ + grep -vFf scripts/ci/pylint_todo.txt | xargs pylint --output-format=colorized + RES_MAIN=$? + + echo + echo "Running pylint for 'tests' folder" + echo + find "./tests" -name "*.py" | \ + grep -vFf scripts/ci/pylint_todo.txt | \ + xargs pylint --disable=" + missing-docstring, + no-self-use, + too-many-public-methods, + protected-access + " \ + --output-format=colorized + RES_TESTS=$? +else + echo "Running Pylint with parameters: $*" + echo + pylint --output-format=colorized "$@" + RES_MAIN=$? + RES_TESTS="0" +fi + +in_container_script_end + +if [[ "${RES_TESTS}" != 0 || "${RES_MAIN}" != 0 ]]; then + echo >&2 + echo >&2 "There were some pylint errors. Exiting" + echo >&2 + exit 1 +else + echo + echo "Pylint check succeeded" + echo +fi diff --git a/scripts/ci/kubernetes/README.md b/scripts/ci/kubernetes/README.md index b297d24fafc8f..10c5592895622 100644 --- a/scripts/ci/kubernetes/README.md +++ b/scripts/ci/kubernetes/README.md @@ -19,9 +19,15 @@ under the License. # Airflow on Kubernetes -If you don't have minikube installed, please run `./minikube/start_minikube.sh` to start a k8s-instance on your local machine. Make sure that your `kubectl` is pointing to the local k8s instance. +If you don't have minikube installed, please run `./minikube/start_minikube.sh` +to start a k8s-instance on your local machine. Make sure that your `kubectl` is + pointing to the local k8s instance. -First build the docker images by running `./docker/build.sh`. This will build the image and push it to the local registry. Secondly, deploy Apache Airflow using `./kube/deploy.sh -d persistent_mode`. Finally, open the Airflow webserver page by browsing to `http://192.168.99.100:30809/admin/` (on OSX). +First build the docker images by running `./docker/build.sh`. This will build +the image and push it to the local registry. +Secondly, deploy Apache Airflow using `./kube/deploy.sh -d persistent_mode`. +Finally, open the Airflow webserver +page by browsing to `http://192.168.99.100:30809/admin/` (on OSX). When kicking of a new job, you should be able to see new pods being kicked off: diff --git a/scripts/ci/kubernetes/docker/bootstrap.sh b/scripts/ci/kubernetes/docker/bootstrap.sh index 8dabd792defb2..fc87f1b03fef0 100644 --- a/scripts/ci/kubernetes/docker/bootstrap.sh +++ b/scripts/ci/kubernetes/docker/bootstrap.sh @@ -19,12 +19,12 @@ # launch the appropriate process -if [ "$1" = "webserver" ] +if [[ "$1" = "webserver" ]] then exec airflow webserver fi -if [ "$1" = "scheduler" ] +if [[ "$1" = "scheduler" ]] then exec airflow scheduler fi diff --git a/scripts/ci/kubernetes/docker/build.sh b/scripts/ci/kubernetes/docker/build.sh index 1bde8480aa356..ac75e280fca5a 100755 --- a/scripts/ci/kubernetes/docker/build.sh +++ b/scripts/ci/kubernetes/docker/build.sh @@ -19,35 +19,35 @@ IMAGE=${IMAGE:-airflow} TAG=${TAG:-latest} -DIRNAME=$(cd "$(dirname "$0")"; pwd) -AIRFLOW_ROOT="$DIRNAME/../../../.." +DIRNAME=$(cd "$(dirname "$0")" && pwd) +AIRFLOW_ROOT="${DIRNAME}/../../../.." PYTHON_DOCKER_IMAGE=python:3.6-slim set -e # Don't rebuild the image more than once on travis -if [[ -n "$TRAVIS" || -z "$AIRFLOW_CI_REUSE_K8S_IMAGE" ]] && docker image inspect "$IMAGE:$TAG" > /dev/null 2>/dev/null; then +if [[ -n "${TRAVIS}" || -z "${AIRFLOW_CI_REUSE_K8S_IMAGE}" ]] && \ + docker image inspect "${IMAGE}:${TAG}" > /dev/null 2>/dev/null; then echo "Re-using existing image" exit 0 fi -if [ "${VM_DRIVER:-none}" != "none" ]; then - ENVCONFIG=$(minikube docker-env) - if [ $? -eq 0 ]; then - eval $ENVCONFIG +if [[ "${VM_DRIVER:-none}" != "none" ]]; then + if ENVCONFIG=$(minikube docker-env); then + eval "${ENVCONFIG}" fi fi -echo "Airflow directory $AIRFLOW_ROOT" -echo "Airflow Docker directory $DIRNAME" +echo "Airflow directory ${AIRFLOW_ROOT}" +echo "Airflow Docker directory ${DIRNAME}" -cd $AIRFLOW_ROOT -docker run -ti --rm -v ${AIRFLOW_ROOT}:/airflow \ - -w /airflow ${PYTHON_DOCKER_IMAGE} ./scripts/ci/kubernetes/docker/compile.sh +cd "${AIRFLOW_ROOT}" +docker run -ti --rm -v "${AIRFLOW_ROOT}:/airflow" \ + -w /airflow "${PYTHON_DOCKER_IMAGE}" ./scripts/ci/kubernetes/docker/compile.sh -sudo rm -rf ${AIRFLOW_ROOT}/airflow/www/node_modules +sudo rm -rf "${AIRFLOW_ROOT}/airflow/www/node_modules" -echo "Copy distro $AIRFLOW_ROOT/dist/*.tar.gz ${DIRNAME}/airflow.tar.gz" -cp $AIRFLOW_ROOT/dist/*.tar.gz ${DIRNAME}/airflow.tar.gz -cd $DIRNAME && docker build --pull $DIRNAME --tag=${IMAGE}:${TAG} -rm $DIRNAME/airflow.tar.gz +echo "Copy distro ${AIRFLOW_ROOT}/dist/*.tar.gz ${DIRNAME}/airflow.tar.gz" +cp ${AIRFLOW_ROOT}/dist/*.tar.gz "${DIRNAME}/airflow.tar.gz" +cd "${DIRNAME}" && docker build --pull "${DIRNAME}" --tag="${IMAGE}:${TAG}" +rm "${DIRNAME}/airflow.tar.gz" diff --git a/scripts/ci/kubernetes/kube/deploy.sh b/scripts/ci/kubernetes/kube/deploy.sh index 3d8562b437888..b4604574f8b3b 100755 --- a/scripts/ci/kubernetes/kube/deploy.sh +++ b/scripts/ci/kubernetes/kube/deploy.sh @@ -22,8 +22,8 @@ set -x AIRFLOW_IMAGE=${IMAGE:-airflow} AIRFLOW_TAG=${TAG:-latest} DIRNAME=$(cd "$(dirname "$0")"; pwd) -TEMPLATE_DIRNAME=${DIRNAME}/templates -BUILD_DIRNAME=${DIRNAME}/build +TEMPLATE_DIRNAME="${DIRNAME}/templates" +BUILD_DIRNAME="${DIRNAME}/build" usage() { cat << EOF @@ -51,10 +51,10 @@ done case ${DAGS_VOLUME} in "persistent_mode") - GIT_SYNC=0 + GIT_SYNC="0" ;; "git_mode") - GIT_SYNC=1 + GIT_SYNC="1" ;; *) echo "Value \"$DAGS_VOLUME\" for dags_folder is not valid." >&2 @@ -62,13 +62,13 @@ case ${DAGS_VOLUME} in ;; esac -if [ ! -d "$BUILD_DIRNAME" ]; then - mkdir -p ${BUILD_DIRNAME} +if [[ ! -d "${BUILD_DIRNAME}" ]]; then + mkdir -p "${BUILD_DIRNAME}" fi -rm -f ${BUILD_DIRNAME}/* +rm -f "${BUILD_DIRNAME}"/* -if [ "${GIT_SYNC}" = 0 ]; then +if [[ "${GIT_SYNC}" == "0" ]]; then INIT_DAGS_VOLUME_NAME=airflow-dags POD_AIRFLOW_DAGS_VOLUME_NAME=airflow-dags CONFIGMAP_DAGS_FOLDER=/root/airflow/dags @@ -93,14 +93,14 @@ case "${_UNAME_OUT}" in esac echo "Local OS is ${_MY_OS}" -case $_MY_OS in +case ${_MY_OS} in linux) - SED_COMMAND=sed + SED_COMMAND="sed" ;; darwin) - SED_COMMAND=gsed - if ! $(type "$SED_COMMAND" &> /dev/null) ; then - echo "Could not find \"$SED_COMMAND\" binary, please install it. On OSX brew install gnu-sed" >&2 + SED_COMMAND="gsed" + if ! type "${SED_COMMAND}" &> /dev/null ; then + echo "Could not find \"${SED_COMMAND}\" binary, please install it. On OSX brew install gnu-sed" >&2 exit 1 fi ;; @@ -110,115 +110,120 @@ case $_MY_OS in ;; esac -if [ "${GIT_SYNC}" = 0 ]; then +if [[ "${GIT_SYNC}" == "0" ]]; then ${SED_COMMAND} -e "s/{{INIT_GIT_SYNC}}//g" \ - ${TEMPLATE_DIRNAME}/airflow.template.yaml > ${BUILD_DIRNAME}/airflow.yaml + "${TEMPLATE_DIRNAME}/airflow.template.yaml" > "${BUILD_DIRNAME}/airflow.yaml" else - ${SED_COMMAND} -e "/{{INIT_GIT_SYNC}}/{r $TEMPLATE_DIRNAME/init_git_sync.template.yaml" -e 'd}' \ - ${TEMPLATE_DIRNAME}/airflow.template.yaml > ${BUILD_DIRNAME}/airflow.yaml + ${SED_COMMAND} -e "/{{INIT_GIT_SYNC}}/{r ${TEMPLATE_DIRNAME}/init_git_sync.template.yaml" -e 'd}' \ + "${TEMPLATE_DIRNAME}/airflow.template.yaml" > "${BUILD_DIRNAME}/airflow.yaml" fi -${SED_COMMAND} -i "s|{{AIRFLOW_IMAGE}}|$AIRFLOW_IMAGE|g" ${BUILD_DIRNAME}/airflow.yaml -${SED_COMMAND} -i "s|{{AIRFLOW_TAG}}|$AIRFLOW_TAG|g" ${BUILD_DIRNAME}/airflow.yaml +${SED_COMMAND} -i "s|{{AIRFLOW_IMAGE}}|${AIRFLOW_IMAGE}|g" "${BUILD_DIRNAME}/airflow.yaml" +${SED_COMMAND} -i "s|{{AIRFLOW_TAG}}|${AIRFLOW_TAG}|g" "${BUILD_DIRNAME}/airflow.yaml" -${SED_COMMAND} -i "s|{{CONFIGMAP_GIT_REPO}}|$CONFIGMAP_GIT_REPO|g" ${BUILD_DIRNAME}/airflow.yaml -${SED_COMMAND} -i "s|{{CONFIGMAP_BRANCH}}|$CONFIGMAP_BRANCH|g" ${BUILD_DIRNAME}/airflow.yaml -${SED_COMMAND} -i "s|{{INIT_DAGS_VOLUME_NAME}}|$INIT_DAGS_VOLUME_NAME|g" ${BUILD_DIRNAME}/airflow.yaml -${SED_COMMAND} -i "s|{{POD_AIRFLOW_DAGS_VOLUME_NAME}}|$POD_AIRFLOW_DAGS_VOLUME_NAME|g" ${BUILD_DIRNAME}/airflow.yaml +${SED_COMMAND} -i "s|{{CONFIGMAP_GIT_REPO}}|${CONFIGMAP_GIT_REPO}|g" "${BUILD_DIRNAME}/airflow.yaml" +${SED_COMMAND} -i "s|{{CONFIGMAP_BRANCH}}|${CONFIGMAP_BRANCH}|g" "${BUILD_DIRNAME}/airflow.yaml" +${SED_COMMAND} -i "s|{{INIT_DAGS_VOLUME_NAME}}|${INIT_DAGS_VOLUME_NAME}|g" "${BUILD_DIRNAME}/airflow.yaml" +${SED_COMMAND} -i "s|{{POD_AIRFLOW_DAGS_VOLUME_NAME}}|${POD_AIRFLOW_DAGS_VOLUME_NAME}|g" \ + "${BUILD_DIRNAME}/airflow.yaml" -${SED_COMMAND} "s|{{CONFIGMAP_DAGS_FOLDER}}|$CONFIGMAP_DAGS_FOLDER|g" \ - ${TEMPLATE_DIRNAME}/configmaps.template.yaml > ${BUILD_DIRNAME}/configmaps.yaml -${SED_COMMAND} -i "s|{{CONFIGMAP_GIT_REPO}}|$CONFIGMAP_GIT_REPO|g" ${BUILD_DIRNAME}/configmaps.yaml -${SED_COMMAND} -i "s|{{CONFIGMAP_BRANCH}}|$CONFIGMAP_BRANCH|g" ${BUILD_DIRNAME}/configmaps.yaml -${SED_COMMAND} -i "s|{{CONFIGMAP_GIT_DAGS_FOLDER_MOUNT_POINT}}|$CONFIGMAP_GIT_DAGS_FOLDER_MOUNT_POINT|g" ${BUILD_DIRNAME}/configmaps.yaml -${SED_COMMAND} -i "s|{{CONFIGMAP_DAGS_VOLUME_CLAIM}}|$CONFIGMAP_DAGS_VOLUME_CLAIM|g" ${BUILD_DIRNAME}/configmaps.yaml +${SED_COMMAND} "s|{{CONFIGMAP_DAGS_FOLDER}}|${CONFIGMAP_DAGS_FOLDER}|g" \ + "${TEMPLATE_DIRNAME}/configmaps.template.yaml" > "${BUILD_DIRNAME}/configmaps.yaml" +${SED_COMMAND} -i "s|{{CONFIGMAP_GIT_REPO}}|${CONFIGMAP_GIT_REPO}|g" "${BUILD_DIRNAME}/configmaps.yaml" +${SED_COMMAND} -i "s|{{CONFIGMAP_BRANCH}}|${CONFIGMAP_BRANCH}|g" "${BUILD_DIRNAME}/configmaps.yaml" +${SED_COMMAND} -i "s|{{CONFIGMAP_GIT_DAGS_FOLDER_MOUNT_POINT}}|${CONFIGMAP_GIT_DAGS_FOLDER_MOUNT_POINT}|g" \ + "${BUILD_DIRNAME}/configmaps.yaml" +${SED_COMMAND} -i "s|{{CONFIGMAP_DAGS_VOLUME_CLAIM}}|${CONFIGMAP_DAGS_VOLUME_CLAIM}|g" \ + "${BUILD_DIRNAME}/configmaps.yaml" -cat ${BUILD_DIRNAME}/airflow.yaml -cat ${BUILD_DIRNAME}/configmaps.yaml +cat "${BUILD_DIRNAME}/airflow.yaml" +cat "${BUILD_DIRNAME}/configmaps.yaml" # Fix file permissions +# TODO: Check this - this should be TRAVIS-independent if [[ "${TRAVIS}" == true ]]; then - sudo chown -R travis.travis $HOME/.kube $HOME/.minikube + sudo chown -R travis.travis "$HOME/.kube" "$HOME/.minikube" fi -kubectl delete -f $DIRNAME/postgres.yaml -kubectl delete -f $BUILD_DIRNAME/airflow.yaml -kubectl delete -f $DIRNAME/secrets.yaml +kubectl delete -f "${DIRNAME}/postgres.yaml" +kubectl delete -f "${BUILD_DIRNAME}/airflow.yaml" +kubectl delete -f "${DIRNAME}/secrets.yaml" set -e -kubectl apply -f $DIRNAME/secrets.yaml -kubectl apply -f $BUILD_DIRNAME/configmaps.yaml -kubectl apply -f $DIRNAME/postgres.yaml -kubectl apply -f $DIRNAME/volumes.yaml -kubectl apply -f $BUILD_DIRNAME/airflow.yaml +kubectl apply -f "${DIRNAME}/secrets.yaml" +kubectl apply -f "${BUILD_DIRNAME}/configmaps.yaml" +kubectl apply -f "${DIRNAME}/postgres.yaml" +kubectl apply -f "${DIRNAME}/volumes.yaml" +kubectl apply -f "${BUILD_DIRNAME}/airflow.yaml" dump_logs() { echo "------- pod description -------" - kubectl describe pod $POD + kubectl describe pod "${POD}" echo "------- webserver init container logs - init -------" - kubectl logs $POD -c init || true - if [ "${GIT_SYNC}" = 1 ]; then + kubectl logs "${POD}" -c init || true + if [[ "${GIT_SYNC}" == "1" ]]; then echo "------- webserver init container logs - git-sync-clone -------" - kubectl logs $POD -c git-sync-clone || true + kubectl logs "${POD}" -c git-sync-clone || true fi echo "------- webserver logs -------" - kubectl logs $POD -c webserver || true + kubectl logs "${POD}" -c webserver || true echo "------- scheduler logs -------" - kubectl logs $POD -c scheduler || true + kubectl logs "${POD}" -c scheduler || true echo "--------------" } set +x # wait for up to 10 minutes for everything to be deployed -PODS_ARE_READY=0 +PODS_ARE_READY="0" for i in {1..150} do - echo "------- Running kubectl get pods -------" + echo "------- Running kubectl get pods: $i -------" PODS=$(kubectl get pods | awk 'NR>1 {print $0}') echo "$PODS" - NUM_AIRFLOW_READY=$(echo $PODS | grep airflow | awk '{print $2}' | grep -E '([0-9])\/(\1)' | wc -l | xargs) - NUM_POSTGRES_READY=$(echo $PODS | grep postgres | awk '{print $2}' | grep -E '([0-9])\/(\1)' | wc -l | xargs) - if [ "$NUM_AIRFLOW_READY" == "1" ] && [ "$NUM_POSTGRES_READY" == "1" ]; then - PODS_ARE_READY=1 + NUM_AIRFLOW_READY=$(echo "${PODS}" | grep airflow | awk '{print $2}' | grep -cE '([0-9])\/(\1)' | xargs) + NUM_POSTGRES_READY=$(echo "${PODS}" | grep postgres | awk '{print $2}' | grep -cE '([0-9])\/(\1)' | xargs) + if [[ "${NUM_AIRFLOW_READY}" == "1" && "${NUM_POSTGRES_READY}" == "1" ]]; then + PODS_ARE_READY="1" break fi sleep 4 done POD=$(kubectl get pods -o go-template --template '{{range .items}}{{.metadata.name}}{{"\n"}}{{end}}' | grep airflow | head -1) -if [[ "$PODS_ARE_READY" == 1 ]]; then +if [[ "${PODS_ARE_READY}" == "1" ]]; then echo "PODS are ready." else - echo "PODS are not ready after waiting for a long time. Exiting..." + echo >&2 "PODS are not ready after waiting for a long time. Exiting..." dump_logs exit 1 fi # Wait until Airflow webserver is up MINIKUBE_IP=$(minikube ip) -AIRFLOW_WEBSERVER_IS_READY=0 +AIRFLOW_WEBSERVER_IS_READY="0" CONSECUTIVE_SUCCESS_CALLS=0 for i in {1..30} do - HTTP_CODE=$(curl -LI http://${MINIKUBE_IP}:30809/health -o /dev/null -w '%{http_code}\n' -sS) || true - if [[ "$HTTP_CODE" == 200 ]]; then - let "CONSECUTIVE_SUCCESS_CALLS+=1" + echo "------- Wait until webserver is up: $i -------" + HTTP_CODE=$(curl -LI "http://${MINIKUBE_IP}:30809/health" -o /dev/null -w '%{http_code}\n' -sS) || true + if [[ "${HTTP_CODE}" == 200 ]]; then + (( CONSECUTIVE_SUCCESS_CALLS+=1 )) else - CONSECUTIVE_SUCCESS_CALLS=0 + CONSECUTIVE_SUCCESS_CALLS="0" fi - if [[ "$CONSECUTIVE_SUCCESS_CALLS" == 3 ]]; then - AIRFLOW_WEBSERVER_IS_READY=1 + if [[ "${CONSECUTIVE_SUCCESS_CALLS}" == 3 ]]; then + AIRFLOW_WEBSERVER_IS_READY="1" break fi sleep 10 done -if [[ "$AIRFLOW_WEBSERVER_IS_READY" == 1 ]]; then +if [[ "${AIRFLOW_WEBSERVER_IS_READY}" == "1" ]]; then echo "Airflow webserver is ready." else - echo "Airflow webserver is not ready after waiting for a long time. Exiting..." + echo >&2 "Airflow webserver is not ready after waiting for a long time. Exiting..." dump_logs exit 1 fi diff --git a/scripts/ci/kubernetes/minikube/_k8s.sh b/scripts/ci/kubernetes/minikube/_k8s.sh index 8debeb866651f..d4fdfae9c68fc 100644 --- a/scripts/ci/kubernetes/minikube/_k8s.sh +++ b/scripts/ci/kubernetes/minikube/_k8s.sh @@ -1,3 +1,4 @@ +#!/usr/bin/env bash # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information @@ -21,22 +22,22 @@ # Wait for Kubernetes resources to be up and ready. function _wait_for_ready () { - local count="$1" + local COUNT="$1" shift - local evidence="$1" + local EVIDENCE="$1" shift - local attempts=40 - echo "Waiting till ready (count: $count): $@" - while [[ "$count" < $("$@" 2>&1 | tail -n +2 | awk '{print $2}' | grep -c $evidence) ]]; + local ATTEMPTS=40 + echo "Waiting till ready (count: ${COUNT}): $*" + while [[ "${COUNT}" < $("$@" 2>&1 | tail -n +2 | awk '{print $2}' | grep -c "${EVIDENCE}") ]]; do - if [[ "$attempts" = "1" ]]; then - echo "Last run: $@" + if [[ "${ATTEMPTS}" = "1" ]]; then + echo "Last run: $*" "$@" || true - local command="$@" + local command="$*" command="${command/get/describe}" - $command || true + ${command} || true fi - ((attempts--)) || return 1 + (( ATTEMPTS-- )) || return 1 sleep 5 done "$@" || true @@ -54,12 +55,12 @@ function k8s_single_node_ready () { k8s_all_nodes_ready 1 } -# Wait for at leat expected number of pods to be ready. +# Wait for at least expected number of pods to be ready. function k8s_at_least_n_pods_ready () { - local count="$1" + local COUNT="$1" shift - local evidence="-E '([0-9])\/(\1)'" - _wait_for_ready "$count" "$evidence" kubectl get pods "$@" + local EVIDENCE="-E '([0-9])\/(\1)'" + _wait_for_ready "${COUNT}" "{EVIDENCE}" kubectl get pods "$@" } function k8s_single_pod_ready () { diff --git a/scripts/ci/kubernetes/minikube/start_minikube.sh b/scripts/ci/kubernetes/minikube/start_minikube.sh index e0c621712efa4..292fb37e5f8e4 100755 --- a/scripts/ci/kubernetes/minikube/start_minikube.sh +++ b/scripts/ci/kubernetes/minikube/start_minikube.sh @@ -1,3 +1,4 @@ +#!/usr/bin/env bash # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information @@ -16,15 +17,12 @@ # under the License. # This script was based on one made by @kimoonkim for kubernetes-hdfs - -#!/usr/bin/env bash - set -ex _MY_SCRIPT="${BASH_SOURCE[0]}" _MY_DIR=$(cd "$(dirname "$_MY_SCRIPT")" && pwd) # Avoids 1.7.x because of https://github.com/kubernetes/minikube/issues/2240 -_KUBERNETES_VERSION="${KUBERNETES_VERSION}" +_KUBERNETES_VERSION=${KUBERNETES_VERSION:=""} _MINIKUBE_VERSION="${MINIKUBE_VERSION:-v0.34.1}" echo "setting up kubernetes ${_KUBERNETES_VERSION}, using minikube ${_MINIKUBE_VERSION}" @@ -41,11 +39,10 @@ case "${_UNAME_OUT}" in esac echo "Local OS is ${_MY_OS}" -export MINIKUBE_WANTUPDATENOTIFICATION=false export MINIKUBE_WANTREPORTERRORPROMPT=false export CHANGE_MINIKUBE_NONE_USER=true -cd $_MY_DIR +cd "${_MY_DIR}" source _k8s.sh @@ -66,7 +63,7 @@ fi if [[ ! -x /usr/local/bin/minikube ]]; then echo Downloading minikube. curl -Lo bin/minikube \ - https://storage.googleapis.com/minikube/releases/${_MINIKUBE_VERSION}/minikube-${_MY_OS}-amd64 + "https://storage.googleapis.com/minikube/releases/${_MINIKUBE_VERSION}/minikube-${_MY_OS}-amd64" chmod +x bin/minikube sudo mv bin/minikube /usr/local/bin/minikube fi @@ -91,7 +88,7 @@ cat <<-EOF | docker run -i --rm -v "$(pwd):/build" ubuntu:14.04 >& nsenter.build make nsenter cp -pfv nsenter /build EOF - if [ ! -f ./nsenter ]; then + if [[ ! -f ./nsenter ]]; then echo "ERROR: nsenter build failed, log:" cat nsenter.build.log exit 1 @@ -105,18 +102,19 @@ echo "your path is ${PATH}" _MINIKUBE="sudo -E PATH=$PATH minikube" -$_MINIKUBE start --kubernetes-version=${_KUBERNETES_VERSION} --vm-driver=${_VM_DRIVER} -$_MINIKUBE update-context - +${_MINIKUBE} config set WantUpdateNotification false +${_MINIKUBE} start --kubernetes-version=${_KUBERNETES_VERSION} --vm-driver=${_VM_DRIVER} +${_MINIKUBE} update-context +# TODO: Check This - it should be travis-independent if [[ "${TRAVIS}" == true ]]; then - sudo chown -R travis.travis $HOME/.kube $HOME/.minikube + sudo chown -R travis.travis "${HOME}/.kube" "${HOME}/.minikube" fi # Wait for Kubernetes to be up and ready. k8s_single_node_ready echo Minikube addons: -$_MINIKUBE addons list +${_MINIKUBE} addons list kubectl get storageclass echo Showing kube-system pods kubectl get -n kube-system pods @@ -125,9 +123,9 @@ kubectl get -n kube-system pods (_ADDON=$(kubectl get pod -n kube-system -l component=kube-addon-manager \ --no-headers -o name| cut -d/ -f2); echo Addon-manager describe:; - kubectl describe pod -n kube-system $_ADDON; + kubectl describe pod -n kube-system "${_ADDON}"; echo Addon-manager log:; - kubectl logs -n kube-system $_ADDON; + kubectl logs -n kube-system "${_ADDON}"; exit 1) k8s_single_pod_ready -n kube-system -l k8s-app=kube-dns k8s_single_pod_ready -n kube-system storage-provisioner diff --git a/scripts/ci/kubernetes/minikube/stop_minikube.sh b/scripts/ci/kubernetes/minikube/stop_minikube.sh index 78150be81d43f..5c81f8a121237 100755 --- a/scripts/ci/kubernetes/minikube/stop_minikube.sh +++ b/scripts/ci/kubernetes/minikube/stop_minikube.sh @@ -1,3 +1,4 @@ +#!/usr/bin/env bash # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information @@ -17,8 +18,6 @@ # This script was based on one made by @kimoonkim for kubernetes-hdfs -#!/usr/bin/env bash - set -ex if [[ ! -x /usr/local/bin/minikube ]]; then @@ -26,17 +25,19 @@ if [[ ! -x /usr/local/bin/minikube ]]; then fi # Fix file permissions +# TODO: Change this - this should be Travis independent if [[ "${TRAVIS}" == true ]]; then - sudo chown -R travis.travis $HOME/.kube $HOME/.minikube + sudo chown -R travis.travis "${HOME}/.kube" "${HOME}/.minikube" 2>/dev/null || true fi +set +e -sudo minikube status -if [[ $? = 0 ]]; then +if sudo minikube status; then sudo minikube delete - sudo rm -rf HOME/.kube $HOME/.minikube + sudo rm -rf "${HOME}/.kube" "${HOME}/.minikube" if [[ "${TRAVIS}" == true ]]; then sudo rm -rf /etc/kubernetes/*.conf fi fi +set -e -sudo chown -R travis.travis . +sudo chown -R travis.travis . || true diff --git a/scripts/ci/kubernetes/setup_kubernetes.sh b/scripts/ci/kubernetes/setup_kubernetes.sh index d56cfad12ab2d..121bdc7dd7fa7 100755 --- a/scripts/ci/kubernetes/setup_kubernetes.sh +++ b/scripts/ci/kubernetes/setup_kubernetes.sh @@ -1,3 +1,4 @@ +#!/usr/bin/env bash # Licensed to the Apache Software Foundation (ASF) under one * # or more contributor license agreements. See the NOTICE file * # distributed with this work for additional information * @@ -14,24 +15,24 @@ # KIND, either express or implied. See the License for the * # specific language governing permissions and limitations * # under the License. * - set -o xtrace set -e -echo "This script downloads minikube, starts a driver=None minikube cluster, builds the airflow source and docker image, and then deploys airflow onto kubernetes" -echo "For development, start minikube yourself (ie: minikube start) then run this script as you probably do not want a driver=None minikube cluster" +echo "This script downloads minikube, starts a driver=None minikube cluster, builds the airflow source\ + and docker image, and then deploys airflow onto kubernetes" +echo "For development, start minikube yourself (ie: minikube start) then run this script as you probably\ + do not want a driver=None minikube cluster" -DIRNAME=$(cd "$(dirname "$0")"; pwd) +DIRNAME=$(cd "$(dirname "$0")" && pwd) # Fix file permissions +# TODO: change this - it should be Travis independent if [[ "${TRAVIS}" == true ]]; then sudo chown -R travis.travis . fi -$DIRNAME/minikube/start_minikube.sh -$DIRNAME/docker/build.sh - - +"${DIRNAME}/minikube/start_minikube.sh" +"${DIRNAME}/docker/build.sh" echo "Airflow environment on kubernetes is good to go!" diff --git a/scripts/ci/local_ci_build.sh b/scripts/ci/local_ci_build.sh new file mode 100755 index 0000000000000..11699902f4ef1 --- /dev/null +++ b/scripts/ci/local_ci_build.sh @@ -0,0 +1,36 @@ +#!/usr/bin/env bash + +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# +# Builds full CI docker image - the image that can be used for running full tests of Airflow +# +set -euo pipefail +MY_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" + +# shellcheck source=./_utils.sh +. "${MY_DIR}/_utils.sh" + +basic_sanity_checks + +script_start + +rebuild_image_if_needed_for_tests + +script_end diff --git a/scripts/ci/local_ci_enter_environment.sh b/scripts/ci/local_ci_enter_environment.sh new file mode 100755 index 0000000000000..4ae4b14bb8d28 --- /dev/null +++ b/scripts/ci/local_ci_enter_environment.sh @@ -0,0 +1,32 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# +# Enters bash shell in the Docker container for full CI docker image in order to run tests in the container. +# + +set -euo pipefail +MY_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" + +export RUN_TESTS="false" +export MOUNT_LOCAL_SOURCES="true" +export PYTHON_VERSION=${PYTHON_VERSION:="3.6"} +export VERBOSE=${VERBOSE:="false"} + +# shellcheck source=./ci_run_airflow_testing.sh +exec "${MY_DIR}/ci_run_airflow_testing.sh" diff --git a/scripts/ci/local_ci_fix_ownership.sh b/scripts/ci/local_ci_fix_ownership.sh new file mode 100755 index 0000000000000..7cc49debeb6aa --- /dev/null +++ b/scripts/ci/local_ci_fix_ownership.sh @@ -0,0 +1,58 @@ +#!/usr/bin/env bash + +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# +# Fixes ownership for files created inside container (files owned by root will be owned by host user) +# + +set -euo pipefail +MY_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" + +# shellcheck source=./_utils.sh +. "${MY_DIR}/_utils.sh" + +basic_sanity_checks + +script_start + +export PYTHON_VERSION=${PYTHON_VERSION:="3.6"} +export DOCKERHUB_USER=${DOCKERHUB_USER:="apache"} +export DOCKERHUB_REPO=${DOCKERHUB_REPO:="airflow"} +export WEBSERVER_HOST_PORT=${WEBSERVER_HOST_PORT:="8080"} +export PYTHONDONTWRITEBYTECODE="true" + +# Default branch name for triggered builds is master +export AIRFLOW_CONTAINER_BRANCH_NAME=${AIRFLOW_CONTAINER_BRANCH_NAME:="master"} + +export AIRFLOW_CONTAINER_DOCKER_IMAGE=\ +${DOCKERHUB_USER}/${DOCKERHUB_REPO}:${AIRFLOW_CONTAINER_BRANCH_NAME}-python${PYTHON_VERSION}-ci + +HOST_USER_ID="$(id -ur)" +export HOST_USER_ID + +HOST_GROUP_ID="$(id -gr)" +export HOST_GROUP_ID + +docker-compose \ + -f "${MY_DIR}/docker-compose.yml" \ + -f "${MY_DIR}/docker-compose-local.yml" \ + run --no-deps airflow-testing /opt/airflow/scripts/ci/in_container/run_fix_ownership.sh + +script_end diff --git a/scripts/ci/local_ci_pull_and_build.sh b/scripts/ci/local_ci_pull_and_build.sh new file mode 100755 index 0000000000000..de74ba74c6638 --- /dev/null +++ b/scripts/ci/local_ci_pull_and_build.sh @@ -0,0 +1,38 @@ +#!/usr/bin/env bash + +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# +# Pulls and rebuilds the full CI image used for testing +# +set -euo pipefail +MY_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" + +# shellcheck source=./_utils.sh +. "${MY_DIR}/_utils.sh" + +basic_sanity_checks + +script_start + +export AIRFLOW_CONTAINER_FORCE_PULL_IMAGES="true" + +rebuild_image_if_needed_for_tests + +script_end diff --git a/scripts/ci/local_ci_run_airflow_testing.sh b/scripts/ci/local_ci_run_airflow_testing.sh new file mode 100755 index 0000000000000..6c613c269f85b --- /dev/null +++ b/scripts/ci/local_ci_run_airflow_testing.sh @@ -0,0 +1,33 @@ +#!/usr/bin/env bash + +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# +# Executes tests locally +# +set -euo pipefail +MY_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" + +export RUN_TESTS="true" +export MOUNT_LOCAL_SOURCES="true" +export PYTHON_VERSION=${PYTHON_VERSION:="3.6"} +export VERBOSE=${VERBOSE:="false"} + +# shellcheck source=./ci_run_airflow_testing.sh +exec "${MY_DIR}/ci_run_airflow_testing.sh" diff --git a/scripts/ci/local_ci_stop_environment.sh b/scripts/ci/local_ci_stop_environment.sh new file mode 100755 index 0000000000000..870f0ae1dd3b6 --- /dev/null +++ b/scripts/ci/local_ci_stop_environment.sh @@ -0,0 +1,59 @@ +#!/usr/bin/env bash + +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# +# Stops the Docker Compose environment +# +set -euo pipefail +MY_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" + +# shellcheck source=./_utils.sh +. "${MY_DIR}/_utils.sh" + +basic_sanity_checks + +script_start + +export PYTHON_VERSION=${PYTHON_VERSION:="3.6"} +export DOCKERHUB_USER=${DOCKERHUB_USER:="apache"} +export DOCKERHUB_REPO=${DOCKERHUB_REPO:="airflow"} +export WEBSERVER_HOST_PORT=${WEBSERVER_HOST_PORT:="8080"} + +# Default branch name for triggered builds is master +export AIRFLOW_CONTAINER_BRANCH_NAME=${AIRFLOW_CONTAINER_BRANCH_NAME:="master"} + +export AIRFLOW_CONTAINER_DOCKER_IMAGE=\ +${DOCKERHUB_USER}/${DOCKERHUB_REPO}:${AIRFLOW_CONTAINER_BRANCH_NAME}-python${PYTHON_VERSION}-ci + +HOST_USER_ID="$(id -ur)" +export HOST_USER_ID + +HOST_GROUP_ID="$(id -gr)" +export HOST_GROUP_ID + +docker-compose \ + -f "${MY_DIR}/docker-compose.yml" \ + -f "${MY_DIR}/docker-compose-kubernetes.yml" \ + -f "${MY_DIR}/docker-compose-local.yml" \ + -f "${MY_DIR}/docker-compose-mysql.yml" \ + -f "${MY_DIR}/docker-compose-postgres.yml" \ + -f "${MY_DIR}/docker-compose-sqlite.yml" down + +script_end diff --git a/scripts/ci/run-ci.sh b/scripts/ci/run-ci.sh deleted file mode 100755 index a2f73289d2f52..0000000000000 --- a/scripts/ci/run-ci.sh +++ /dev/null @@ -1,60 +0,0 @@ -#!/usr/bin/env bash - -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -set -x - -DIRNAME=$(cd "$(dirname "$0")"; pwd) -AIRFLOW_ROOT="$DIRNAME/../.." - -nose_args=$@ -# Fix file permissions -sudo chown -R airflow.airflow . $HOME/.cache $HOME/.wheelhouse/ $HOME/.cache/pip -if [ -d $HOME/.minikube ]; then - sudo chown -R airflow.airflow $HOME/.kube $HOME/.minikube -fi - -sudo -H pip3 install --upgrade pip -sudo -H pip3 install tox - -cd $AIRFLOW_ROOT && pip3 --version && tox --version - -if [ -z "$KUBERNETES_VERSION" ]; -then - tox -e $TOX_ENV $nose_args -else - # This script runs inside a container, the path of the kubernetes certificate - # is /home/travis/.minikube/client.crt but the user in the container is `airflow` - if [ ! -d /home/travis ]; then - sudo mkdir -p /home/travis - fi - sudo ln -s /home/airflow/.minikube /home/travis/.minikube - - tox -e $TOX_ENV -- tests.minikube \ - --with-coverage \ - --cover-erase \ - --cover-html \ - --cover-package=airflow \ - --cover-html-dir=airflow/www/static/coverage \ - --with-ignore-docstrings \ - --rednose \ - --with-timer \ - -v \ - --logging-level=DEBUG -fi diff --git a/scripts/docker/entrypoint.sh b/scripts/docker/entrypoint.sh index c5fa6fb0f9aea..30e18596d7354 100755 --- a/scripts/docker/entrypoint.sh +++ b/scripts/docker/entrypoint.sh @@ -21,6 +21,6 @@ set -e echo Starting Apache Airflow with command: -echo airflow $@ +echo airflow "$@" -exec airflow $@ +exec airflow "$@" diff --git a/setup.py b/setup.py index c04cfe04141d3..34b801fe02afa 100644 --- a/setup.py +++ b/setup.py @@ -16,7 +16,6 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. - """Setup for the Airflow library.""" import importlib @@ -25,14 +24,16 @@ import os import subprocess import sys +import unittest from setuptools import setup, find_packages, Command -from setuptools.command.test import test as TestCommand logger = logging.getLogger(__name__) # Kept manually in sync with airflow.__version__ +# noinspection PyUnresolvedReferences spec = importlib.util.spec_from_file_location("airflow.version", os.path.join('airflow', 'version.py')) +# noinspection PyUnresolvedReferences mod = importlib.util.module_from_spec(spec) spec.loader.exec_module(mod) version = mod.version @@ -47,31 +48,11 @@ long_description = '' -class Tox(TestCommand): - """ - Command class to run Tox via setup.py. - Registered as cmdclass in setup() so it can be called with ``python setup.py test``. - """ - - user_options = [('tox-args=', None, "Arguments to pass to tox")] - - def __init__(self, dist, **kw): - super().__init__(dist, **kw) - self.test_suite = True - self.test_args = [] - self.tox_args = '' - - def initialize_options(self): - TestCommand.initialize_options(self) - - def finalize_options(self): - TestCommand.finalize_options(self) - - def run_tests(self): - # import here, cause outside the eggs aren't loaded - import tox - errno = tox.cmdline(args=self.tox_args.split()) - sys.exit(errno) +def airflow_test_suite(): + """Test suite for Airflow tests""" + test_loader = unittest.TestLoader() + test_suite = test_loader.discover('tests', pattern='test_*.py') + return test_suite class CleanCommand(Command): @@ -89,6 +70,7 @@ def initialize_options(self): def finalize_options(self): """Set final values for options.""" + # noinspection PyMethodMayBeStatic def run(self): """Run command to remove temporary files and directories.""" os.system('rm -vrf ./build ./dist ./*.pyc ./*.tgz ./*.egg-info') @@ -109,6 +91,7 @@ def initialize_options(self): def finalize_options(self): """Set final values for options.""" + # noinspection PyMethodMayBeStatic def run(self): """Run a command to compile and build assets.""" subprocess.call('./airflow/www/compile_assets.sh') @@ -129,13 +112,14 @@ def git_version(version_: str) -> str: """ try: import git - repo = git.Repo('.git') + try: + repo = git.Repo('.git') + except git.NoSuchPathError: + logger.warning('.git directory not found: Cannot compute the git version') + return '' except ImportError: logger.warning('gitpython not found: Cannot compute the git version.') return '' - except git.exc.NoSuchPathError: - logger.warning('.git directory not found: Cannot compute the git version') - return '' if repo: sha = repo.head.commit.hexsha if repo.is_dirty(): @@ -273,15 +257,16 @@ def write_version(filename: str = os.path.join(*["airflow", "git_version"])): winrm = ['pywinrm==0.2.2'] zendesk = ['zdesk'] -all_dbs = postgres + mysql + hive + mssql + hdfs + vertica + cloudant + druid + pinot \ - + cassandra + mongo +all_dbs = postgres + mysql + hive + mssql + hdfs + vertica + cloudant + druid + pinot + cassandra + mongo devel = [ 'beautifulsoup4~=4.7.1', 'click==6.7', + 'codecov', 'flake8>=3.6.0', 'flake8-colors', 'freezegun', + 'ipdb', 'jira', 'mongomock', 'moto==1.3.5', @@ -290,12 +275,12 @@ def write_version(filename: str = os.path.join(*["airflow", "git_version"])): 'nose-timer', 'parameterized', 'paramiko', - 'pylint~=2.3.1', # Ensure the same version as in .travis.yml + 'pylint~=2.3.1', 'pysftp', 'pywinrm', 'qds-sdk>=1.9.6', 'rednose', - 'requests_mock' + 'requests_mock', ] if PY3: @@ -430,7 +415,6 @@ def do_setup(): 'ssh': ssh, 'statsd': statsd, 'vertica': vertica, - 'virtualenv': virtualenv, 'webhdfs': webhdfs, 'winrm': winrm, }, @@ -452,10 +436,10 @@ def do_setup(): download_url=( 'https://dist.apache.org/repos/dist/release/airflow/' + version), cmdclass={ - 'test': Tox, 'extra_clean': CleanCommand, 'compile_assets': CompileAssets }, + test_suite='setup.airflow_test_suite', python_requires='~=3.5', ) diff --git a/tests/contrib/hooks/test_jdbc_hook.py b/tests/contrib/hooks/test_jdbc_hook.py index 15878c08c3023..80d39ecd32ed9 100644 --- a/tests/contrib/hooks/test_jdbc_hook.py +++ b/tests/contrib/hooks/test_jdbc_hook.py @@ -47,7 +47,7 @@ def test_jdbc_conn_connection(self, jdbc_mock): jdbc_conn = jdbc_hook.get_conn() self.assertTrue(jdbc_mock.called) self.assertIsInstance(jdbc_conn, Mock) - self.assertEqual(jdbc_conn.name, jdbc_mock.return_value.name) + self.assertEqual(jdbc_conn.name, jdbc_mock.return_value.name) # pylint: disable=no-member if __name__ == '__main__': diff --git a/tests/contrib/hooks/test_sftp_hook.py b/tests/contrib/hooks/test_sftp_hook.py index 6cd996f9c7ec4..9a2c380cc0505 100644 --- a/tests/contrib/hooks/test_sftp_hook.py +++ b/tests/contrib/hooks/test_sftp_hook.py @@ -25,14 +25,29 @@ from airflow.contrib.hooks.sftp_hook import SFTPHook from airflow.models import Connection +from airflow.utils.db import provide_session TMP_PATH = '/tmp' TMP_DIR_FOR_TESTS = 'tests_sftp_hook_dir' TMP_FILE_FOR_TESTS = 'test_file.txt' +SFTP_CONNECTION_USER = "root" + class SFTPHookTest(unittest.TestCase): + + @provide_session + def update_connection(self, login, session=None): + connection = (session.query(Connection). + filter(Connection.conn_id == "sftp_default") + .first()) + old_login = connection.login + connection.login = login + session.commit() + return old_login + def setUp(self): + self.old_login = self.update_connection(SFTP_CONNECTION_USER) self.hook = SFTPHook() os.makedirs(os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) with open(os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS), 'a') as file: @@ -163,6 +178,7 @@ def test_no_host_key_check_no_ignore(self, get_connection): def tearDown(self): shutil.rmtree(os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS)) os.remove(os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS)) + self.update_connection(self.old_login) if __name__ == '__main__': diff --git a/tests/contrib/utils/base_gcp_system_test_case.py b/tests/contrib/utils/base_gcp_system_test_case.py index b63dd1c45d355..3ca078509c34d 100644 --- a/tests/contrib/utils/base_gcp_system_test_case.py +++ b/tests/contrib/utils/base_gcp_system_test_case.py @@ -40,7 +40,7 @@ # Retrieve environment variables from parent directory retriever - it should be -# in the path ${AIRFLOW_SOURCE_DIR}/../../get_system_test_environment_variables.py +# in the path ${AIRFLOW_ROOT}/../../get_system_test_environment_variables.py # and it should print all the variables in form of key=value to the stdout class RetrieveVariables: @staticmethod diff --git a/tests/core.py b/tests/core.py index 69757768c23fa..bf185f011253b 100644 --- a/tests/core.py +++ b/tests/core.py @@ -17,7 +17,6 @@ # specific language governing permissions and limitations # under the License. -import doctest import json import multiprocessing import os @@ -43,15 +42,17 @@ from pendulum import utcnow from airflow import configuration, models -from airflow import jobs, DAG, utils, macros, settings, exceptions +from airflow import jobs, DAG, utils, settings, exceptions from airflow.bin import cli from airflow.configuration import AirflowConfigException, run_command from airflow.exceptions import AirflowException from airflow.executors import SequentialExecutor from airflow.hooks.base_hook import BaseHook from airflow.hooks.sqlite_hook import SqliteHook -from airflow.models import BaseOperator, Connection, TaskFail from airflow.models import ( + BaseOperator, + Connection, + TaskFail, DagBag, DagRun, Pool, @@ -713,13 +714,6 @@ def test_raw_job(self): ti.dag = self.dag_bash ti.run(ignore_ti_state=True) - def test_doctests(self): - modules = [utils, macros] - for mod in modules: - failed, _ = doctest.testmod(mod) - if failed: - raise Exception("Failed a doctest") - def test_variable_set_get_round_trip(self): Variable.set("tested_var_set_id", "Monday morning breakfast") self.assertEqual("Monday morning breakfast", Variable.get("tested_var_set_id")) diff --git a/tests/jobs/test_scheduler_job.py b/tests/jobs/test_scheduler_job.py index 642d64c5e95a1..64b83b460f772 100644 --- a/tests/jobs/test_scheduler_job.py +++ b/tests/jobs/test_scheduler_job.py @@ -1297,8 +1297,7 @@ def test_scheduler_start_date(self): def test_scheduler_task_start_date(self): """ - Test that the scheduler respects task start dates that are different - from DAG start dates + Test that the scheduler respects task start dates that are different from DAG start dates """ dag_id = 'test_task_start_date_scheduling' diff --git a/tests/operators/test_bash_operator.py b/tests/operators/test_bash_operator.py index f4eafcbb3c74f..80cbe90a71033 100644 --- a/tests/operators/test_bash_operator.py +++ b/tests/operators/test_bash_operator.py @@ -81,7 +81,7 @@ def test_echo_env_variables(self): with open(tmp_file.name, 'r') as file: output = ''.join(file.readlines()) self.assertIn('MY_PATH_TO_AIRFLOW_HOME', output) - # exported in run_unit_tests.sh as part of PYTHONPATH + # exported in run-tests as part of PYTHONPATH self.assertIn('tests/test_utils', output) self.assertIn('bash_op_test', output) self.assertIn('echo_env_vars', output) diff --git a/tests/test_impersonation.py b/tests/test_impersonation.py index b06c5df035eef..8aa4a636f778e 100644 --- a/tests/test_impersonation.py +++ b/tests/test_impersonation.py @@ -34,18 +34,32 @@ DEFAULT_DATE = datetime(2015, 1, 1) TEST_USER = 'airflow_test_user' -logger = logging.getLogger(__name__) -# TODO(aoen): Adding/remove a user as part of a test is very bad (especially if the user -# already existed to begin with on the OS), this logic should be moved into a test -# that is wrapped in a container like docker so that the user can be safely added/removed. -# When this is done we can also modify the sudoers file to ensure that useradd will work -# without any manual modification of the sudoers file by the agent that is running these -# tests. +logger = logging.getLogger(__name__) class ImpersonationTest(unittest.TestCase): + + @staticmethod + def grant_permissions(): + airflow_home = os.environ['AIRFLOW_HOME'] + subprocess.check_call( + 'find "%s" -exec sudo chmod og+w {} +; sudo chmod og+rx /root' % airflow_home, shell=True) + + @staticmethod + def revoke_permissions(): + airflow_home = os.environ['AIRFLOW_HOME'] + subprocess.check_call( + 'find "%s" -exec sudo chmod og-w {} +; sudo chmod og-rx /root' % airflow_home, shell=True) + def setUp(self): + if not os.path.isfile('/.dockerenv') or os.environ.get('APT_DEPS_IMAGE') is None: + raise unittest.SkipTest("""Adding/removing a user as part of a test is very bad for host os +(especially if the user already existed to begin with on the OS), therefore we check if we run inside a +the official docker container and only allow to run the test there. This is done by checking /.dockerenv +file (always present inside container) and checking for APT_DEPS_IMAGE variable. +""") + self.grant_permissions() add_default_pool_if_not_exists() self.dagbag = models.DagBag( dag_folder=TEST_DAG_FOLDER, @@ -73,6 +87,7 @@ def setUp(self): def tearDown(self): subprocess.check_output(['sudo', 'userdel', '-r', TEST_USER]) + self.revoke_permissions() def run_backfill(self, dag_id, task_id): dag = self.dagbag.get_dag(dag_id) diff --git a/tests/test_utils/print_tests.py b/tests/test_utils/print_tests.py new file mode 100644 index 0000000000000..76f33a8d10d20 --- /dev/null +++ b/tests/test_utils/print_tests.py @@ -0,0 +1,51 @@ +#!/usr/bin/env python +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +""" +Prints summary of test cases from xunit file. +""" +import argparse +from xml.etree import ElementTree + + +def print_cases(xmlunit_test_file, only_failed=False): + """Prints tests cases.""" + with open(xmlunit_test_file, "r") as file: + text = file.read() + + root = ElementTree.fromstring(text) + + test_cases = root.findall('.//testcase') + for test_case in test_cases: + errors = test_case.findall('error') + failures = test_case.findall('failure') + error_string = "".join([" Error:" + error.get('type') for error in errors]) + failure_string = "".join([" Failure:" + failure.get('type') for failure in failures]) + if only_failed and error_string == "" and failure_string == "": + continue + print(test_case.get('classname') + "." + test_case.get('name') + error_string + failure_string) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Get test cases from xml file ') + parser.add_argument('--xunit-file', dest='xunit_file', action='store', + help="XML Unit file where results of tests are stored") + parser.add_argument('--only-failed', dest='only_failed', action='store_true', + help="Only display tests which have errors and failures") + arguments = parser.parse_args() + print_cases(arguments.xunit_file, only_failed=arguments.only_failed) diff --git a/tests/utils/log/test_file_processor_handler.py b/tests/utils/log/test_file_processor_handler.py index b501063dc3bfb..1a0949c7f48b9 100644 --- a/tests/utils/log/test_file_processor_handler.py +++ b/tests/utils/log/test_file_processor_handler.py @@ -20,11 +20,11 @@ import shutil import os import unittest +from freezegun import freeze_time from airflow.utils.log.file_processor_handler import FileProcessorHandler from airflow.utils import timezone from datetime import timedelta -from freezegun import freeze_time class TestFileProcessorHandler(unittest.TestCase): diff --git a/tests/utils/test_json.py b/tests/utils/test_json.py index c01a94e0bc8f5..b314b51149ff9 100644 --- a/tests/utils/test_json.py +++ b/tests/utils/test_json.py @@ -61,7 +61,7 @@ def test_encode_numpy_float(self): def test_encode_raises(self): self.assertRaisesRegex(TypeError, - "^%s is not JSON serializable$" % Exception, + "^.*is not JSON serializable$", json.dumps, Exception, cls=utils_json.AirflowJsonEncoder) diff --git a/tox.ini b/tox.ini deleted file mode 100644 index a64b639f1926b..0000000000000 --- a/tox.ini +++ /dev/null @@ -1,69 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -[tox] -envlist = flake8,py35-backend_{mysql,sqlite,postgres}-env_{docker,kubernetes} -skipsdist = True - -[global] -wheel_dir = {homedir}/.wheelhouse -find_links = - {homedir}/.wheelhouse - {homedir}/.pip-cache - -[testenv] -deps = - wheel - codecov - -basepython = - py35: python3.5 - -setenv = - HADOOP_DISTRO=cdh - HADOOP_HOME=/tmp/hadoop-cdh - HADOOP_OPTS=-D/tmp/krb5.conf - HIVE_HOME=/tmp/hive - MINICLUSTER_HOME=/tmp/minicluster-1.1-SNAPSHOT - KRB5_CONFIG=/etc/krb5.conf - KRB5_KTNAME=/etc/airflow.keytab - CELERY_BROKER_URLS=amqp://guest:guest@rabbitmq:5672,redis://redis:6379/0 - backend_mysql: AIRFLOW__CORE__SQL_ALCHEMY_CONN=mysql://root@mysql/airflow - backend_mysql: AIRFLOW__CELERY__RESULT_BACKEND=db+mysql://root@mysql/airflow - backend_postgres: AIRFLOW__CORE__SQL_ALCHEMY_CONN=postgresql+psycopg2://postgres:airflow@postgres/airflow - backend_postgres: AIRFLOW__CELERY__RESULT_BACKEND=db+postgresql://postgres:airflow@postgres/airflow - backend_sqlite: AIRFLOW__CORE__SQL_ALCHEMY_CONN=sqlite:///{homedir}/airflow.db - backend_sqlite: AIRFLOW__CORE__EXECUTOR=SequentialExecutor - -passenv = * - -commands = - pip wheel --no-use-pep517 --progress-bar off -w {homedir}/.wheelhouse -f {homedir}/.wheelhouse -e .[devel_ci] - pip install --no-use-pep517 --progress-bar off --find-links={homedir}/.wheelhouse --no-index -e .[devel_ci] - env_docker: {toxinidir}/scripts/ci/1-setup-env.sh - env_docker: {toxinidir}/scripts/ci/2-setup-kdc.sh - {toxinidir}/scripts/ci/5-run-tests.sh [] - codecov -e TOXENV - -[testenv:flake8] -basepython = python3 - -deps = - flake8>=3.6.0 - -commands = flake8