diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 00000000..b14a97c6 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,47 @@ +language: python +dist: trusty +sudo: required + +notifications: + email: false + +services: + - docker +matrix: + include: + - python: "2.7" + env: + - OS=ubuntu-14.04 + - JOBQUEUE=sge + - python: "3.6" + env: + - OS=ubuntu-14.04 + - JOBQUEUE=sge +env: + global: + - DOCKER_COMPOSE_VERSION=1.6.0 + +before_install: + - pwd + - docker version + - docker-compose version + # Install miniconda + - ./ci/conda_setup.sh + - export PATH="$HOME/miniconda/bin:$PATH" + - conda install --yes -c conda-forge python=$TRAVIS_PYTHON_VERSION dask distributed flake8 + # Start jobqueue + - source ci/${JOBQUEUE}.sh + - jobqueue_before_install +install: + - which python + - pip install --no-deps -e . + - jobqueue_install +script: + - flake8 -j auto dask_jobqueue + - jobqueue_script +after_success: + - jobqueue_after_success + + # TODO + # - pip install --no-cache-dir coveralls + # - coveralls diff --git a/ci/conda_setup.sh b/ci/conda_setup.sh new file mode 100755 index 00000000..58131b6f --- /dev/null +++ b/ci/conda_setup.sh @@ -0,0 +1,13 @@ +#!/usr/bin/env bash + +set -e +set -x + +# Install miniconda +wget http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh +bash ~/miniconda.sh -b -p $HOME/miniconda +export PATH="$HOME/miniconda/bin:$PATH" +conda update conda --yes +conda clean -tipy +conda config --set always_yes yes --set changeps1 no +conda --version diff --git a/ci/sge.sh b/ci/sge.sh new file mode 100644 index 00000000..fa9d824b --- /dev/null +++ b/ci/sge.sh @@ -0,0 +1,30 @@ +#!/usr/bin/env bash + +set -x + +function jobqueue_before_install { + docker version + docker-compose version + + # start sge cluster + cd ./ci/sge + ./start-sge.sh + cd - + + docker ps -a + docker images +} + +function jobqueue_install { + docker exec -it sge_master /bin/bash -c "cd /dask-jobqueue; python setup.py install" +} + +function jobqueue_script { + docker exec -it sge_master /bin/bash -c "cd /dask-jobqueue; py.test dask_jobqueue --verbose -E sge" +} + +function jobqueue_after_success { + docker exec -it sge_master bash -c 'cat /tmp/sge*' + docker exec -it slave_one bash -c 'cat /tmp/exec*' + docker exec -it slave_two bash -c 'cat /tmp/exec*' +} diff --git a/ci/sge/Dockerfile-master b/ci/sge/Dockerfile-master new file mode 100644 index 00000000..745eddcb --- /dev/null +++ b/ci/sge/Dockerfile-master @@ -0,0 +1,29 @@ +FROM ubuntu:14.04 + +ENV LANG C.UTF-8 + +RUN apt-get update && apt-get install curl bzip2 git gcc -y --fix-missing + +RUN curl -o miniconda.sh https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh && \ + bash miniconda.sh -f -b -p /opt/anaconda && \ + /opt/anaconda/bin/conda clean -tipy && \ + rm -f miniconda.sh +ENV PATH /opt/anaconda/bin:$PATH +RUN conda install -n root conda=4.4.11 && conda clean -tipy +RUN conda install -c conda-forge dask distributed blas pytest mock ipython pip psutil && conda clean -tipy +RUN pip install --no-cache-dir drmaa +RUN pip install --no-cache-dir git+https://github.com/dask/distributed.git --upgrade + +COPY ./*.sh / +COPY ./*.txt / +RUN bash ./setup-master.sh + +# expose ports +EXPOSE 8000 +EXPOSE 6444 +EXPOSE 6445 +EXPOSE 6446 + +ENV DRMAA_LIBRARY_PATH /usr/lib/gridengine-drmaa/lib/libdrmaa.so +ENV SGE_ROOT /var/lib/gridengine/ +ENV SGE_CELL default diff --git a/ci/sge/Dockerfile-slave b/ci/sge/Dockerfile-slave new file mode 100644 index 00000000..509c38cc --- /dev/null +++ b/ci/sge/Dockerfile-slave @@ -0,0 +1,21 @@ +FROM ubuntu:14.04 + +ENV LANG C.UTF-8 + +RUN apt-get update && apt-get install curl bzip2 git gcc -y --fix-missing + +RUN curl -o miniconda.sh https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh && \ + bash miniconda.sh -f -b -p /opt/anaconda && \ + /opt/anaconda/bin/conda clean -tipy && \ + rm -f miniconda.sh +ENV PATH /opt/anaconda/bin:$PATH +RUN conda install -n root conda=4.4.11 && conda clean -tipy +RUN conda install -c conda-forge dask distributed blas pytest mock ipython pip psutil && conda clean -tipy +RUN pip install --no-cache-dir drmaa +RUN pip install --no-cache-dir git+https://github.com/dask/distributed.git --upgrade + +COPY ./setup-slave.sh / +COPY ./*.sh / +RUN bash ./setup-slave.sh + +CMD python -m SimpleHTTPServer diff --git a/ci/sge/add_worker.sh b/ci/sge/add_worker.sh new file mode 100644 index 00000000..d48c6203 --- /dev/null +++ b/ci/sge/add_worker.sh @@ -0,0 +1,26 @@ +#`/bin/bash + +#!/bin/bash + +QUEUE=$1 +HOSTNAME=$2 +SLOTS=$3 + +# add to the execution host list +TMPFILE=/tmp/sge.hostname-$HOSTNAME +echo -e "hostname $HOSTNAME\nload_scaling NONE\ncomplex_values NONE\nuser_lists NONE\nxuser_lists NONE\nprojects NONE\nxprojects NONE\nusage_scaling NONE\nreport_variables NONE" > $TMPFILE +qconf -Ae $TMPFILE +rm $TMPFILE + +# add to the all hosts list +qconf -aattr hostgroup hostlist $HOSTNAME @allhosts + +# enable the host for the queue, in case it was disabled and not removed +qmod -e $QUEUE@$HOSTNAME + +# Add memory resource +qconf -mattr exechost complex_values h_vmem=100G $HOSTNAME + +if [ "$SLOTS" ]; then + qconf -aattr queue slots "[$HOSTNAME=$SLOTS]" $QUEUE +fi diff --git a/ci/sge/docker-compose.yml b/ci/sge/docker-compose.yml new file mode 100644 index 00000000..9b674032 --- /dev/null +++ b/ci/sge/docker-compose.yml @@ -0,0 +1,44 @@ +version: "2" + +services: + + master: + build: + context: . + dockerfile: Dockerfile-master + container_name: sge_master + hostname: sge_master + #network_mode: host + volumes: + - ../..:/dask-jobqueue + command: bash /run-master.sh + + slave-one: + build: + context: . + dockerfile: Dockerfile-slave + container_name: slave_one + hostname: slave_one + #network_mode: host + volumes: + - ../..:/dask-jobqueue + command: bash /run-slave.sh + links: + - "master:sge_master" + depends_on: + - master + + slave-two: + build: + context: . + dockerfile: Dockerfile-slave + container_name: slave_two + hostname: slave_two + #network_mode: host + volumes: + - ../..:/dask-jobqueue + command: bash /run-slave.sh + links: + - "master:sge_master" + depends_on: + - master diff --git a/ci/sge/hosts.txt b/ci/sge/hosts.txt new file mode 100644 index 00000000..5aee646d --- /dev/null +++ b/ci/sge/hosts.txt @@ -0,0 +1,2 @@ +group_name @allhosts +hostlist NONE diff --git a/ci/sge/queue.txt b/ci/sge/queue.txt new file mode 100644 index 00000000..91ee22bd --- /dev/null +++ b/ci/sge/queue.txt @@ -0,0 +1,50 @@ +qname dask.q +hostlist @allhosts +seq_no 0 +load_thresholds NONE +suspend_thresholds NONE +nsuspend 1 +suspend_interval 00:00:01 +priority 0 +min_cpu_interval 00:00:01 +processors UNDEFINED +qtype BATCH INTERACTIVE +ckpt_list NONE +pe_list make +rerun FALSE +slots 2 +tmpdir /tmp +shell /bin/csh +prolog NONE +epilog NONE +shell_start_mode posix_compliant +starter_method NONE +suspend_method NONE +resume_method NONE +terminate_method NONE +notify 00:00:01 +owner_list NONE +user_lists NONE +xuser_lists NONE +subordinate_list NONE +complex_values NONE +projects NONE +xprojects NONE +calendar NONE +initial_state default +s_rt INFINITY +h_rt INFINITY +s_cpu INFINITY +h_cpu INFINITY +s_fsize INFINITY +h_fsize INFINITY +s_data INFINITY +h_data INFINITY +s_stack INFINITY +h_stack INFINITY +s_core INFINITY +h_core INFINITY +s_rss INFINITY +h_rss INFINITY +s_vmem INFINITY +h_vmem INFINITY diff --git a/ci/sge/run-master.sh b/ci/sge/run-master.sh new file mode 100755 index 00000000..2282ca50 --- /dev/null +++ b/ci/sge/run-master.sh @@ -0,0 +1,23 @@ +#!/bin/bash + + +# start sge +sudo service gridengine-master restart + +while ! ping -c1 slave_one &>/dev/null; do :; done + +qconf -Msconf /scheduler.txt +qconf -Ahgrp /hosts.txt +qconf -Aq /queue.txt + +qconf -ah slave_one +qconf -ah slave_two +qconf -ah slave_three + +qconf -as $HOSTNAME +bash add_worker.sh dask.q slave_one 4 +bash add_worker.sh dask.q slave_two 4 + +sudo service gridengine-master restart + +python -m http.server 8888 diff --git a/ci/sge/run-slave.sh b/ci/sge/run-slave.sh new file mode 100755 index 00000000..f27121e3 --- /dev/null +++ b/ci/sge/run-slave.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +# start sge +sudo service gridengine-exec restart + +sleep 4 + +sudo service gridengine-exec restart + +python -m http.server 8888 diff --git a/ci/sge/run_master.sh b/ci/sge/run_master.sh new file mode 100755 index 00000000..c6024621 --- /dev/null +++ b/ci/sge/run_master.sh @@ -0,0 +1,25 @@ +#!/bin/bash + +set -x +set -e + +# start sge +sudo service gridengine-master restart + +while ! ping -c1 slave_one &>/dev/null; do :; done + +qconf -Msconf /scheduler.txt +qconf -Ahgrp /hosts.txt +qconf -Aq /queue.txt + +qconf -ah slave_one +qconf -ah slave_two +qconf -ah slave_three + +qconf -as $HOSTNAME +bash add_worker.sh dask.q slave_one 4 +bash add_worker.sh dask.q slave_two 4 + +sudo service gridengine-master restart + +python -m http.server 8888 diff --git a/ci/sge/run_slave.sh b/ci/sge/run_slave.sh new file mode 100755 index 00000000..f27121e3 --- /dev/null +++ b/ci/sge/run_slave.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +# start sge +sudo service gridengine-exec restart + +sleep 4 + +sudo service gridengine-exec restart + +python -m http.server 8888 diff --git a/ci/sge/scheduler.txt b/ci/sge/scheduler.txt new file mode 100644 index 00000000..5718eba3 --- /dev/null +++ b/ci/sge/scheduler.txt @@ -0,0 +1,35 @@ +algorithm default +schedule_interval 0:0:1 +maxujobs 0 +queue_sort_method load +job_load_adjustments np_load_avg=0.50 +load_adjustment_decay_time 0:7:30 +load_formula np_load_avg +schedd_job_info true +flush_submit_sec 0 +flush_finish_sec 0 +params none +reprioritize_interval 0:0:0 +halftime 168 +usage_weight_list cpu=1.000000,mem=0.000000,io=0.000000 +compensation_factor 5.000000 +weight_user 0.250000 +weight_project 0.250000 +weight_department 0.250000 +weight_job 0.250000 +weight_tickets_functional 0 +weight_tickets_share 0 +share_override_tickets TRUE +share_functional_shares TRUE +max_functional_jobs_to_schedule 200 +report_pjob_tickets TRUE +max_pending_tasks_per_job 50 +halflife_decay_list none +policy_hierarchy OFS +weight_ticket 0.500000 +weight_waiting_time 0.278000 +weight_deadline 3600000.000000 +weight_urgency 0.500000 +weight_priority 0.000000 +max_reservation 0 +default_duration INFINITY diff --git a/ci/sge/setup-master.sh b/ci/sge/setup-master.sh new file mode 100755 index 00000000..1ea78523 --- /dev/null +++ b/ci/sge/setup-master.sh @@ -0,0 +1,24 @@ +#!/bin/bash +# Configure the master hostname for Grid Engine +echo "gridengine-master shared/gridenginemaster string $HOSTNAME" | sudo debconf-set-selections +echo "gridengine-master shared/gridenginecell string default" | sudo debconf-set-selections +echo "gridengine-master shared/gridengineconfig boolean false" | sudo debconf-set-selections +echo "gridengine-common shared/gridenginemaster string $HOSTNAME" | sudo debconf-set-selections +echo "gridengine-common shared/gridenginecell string default" | sudo debconf-set-selections +echo "gridengine-common shared/gridengineconfig boolean false" | sudo debconf-set-selections +echo "gridengine-client shared/gridenginemaster string $HOSTNAME" | sudo debconf-set-selections +echo "gridengine-client shared/gridenginecell string default" | sudo debconf-set-selections +echo "gridengine-client shared/gridengineconfig boolean false" | sudo debconf-set-selections +# Postfix mail server is also installed as a dependency +echo "postfix postfix/main_mailer_type select No configuration" | sudo debconf-set-selections + +# Install Grid Engine +sudo DEBIAN_FRONTEND=noninteractive apt-get install -y gridengine-master gridengine-client gridengine-drmaa-dev -qq + +# Set up Grid Engine +sudo -u sgeadmin /usr/share/gridengine/scripts/init_cluster /var/lib/gridengine default /var/spool/gridengine/spooldb sgeadmin +sudo service gridengine-master restart + +# Disable Postfix +sudo service postfix stop +sudo update-rc.d postfix disable diff --git a/ci/sge/setup-slave.sh b/ci/sge/setup-slave.sh new file mode 100755 index 00000000..58d9d873 --- /dev/null +++ b/ci/sge/setup-slave.sh @@ -0,0 +1,15 @@ +#!/bin/bash +export MASTER_HOSTNAME=sge_master +echo "gridengine-common shared/gridenginemaster string $MASTER_HOSTNAME" | sudo debconf-set-selections +echo "gridengine-common shared/gridenginecell string default" | sudo debconf-set-selections +echo "gridengine-common shared/gridengineconfig boolean false" | sudo debconf-set-selections +echo "gridengine-client shared/gridenginemaster string $MASTER_HOSTNAME" | sudo debconf-set-selections +echo "gridengine-client shared/gridenginecell string default" | sudo debconf-set-selections +echo "gridengine-client shared/gridengineconfig boolean false" | sudo debconf-set-selections +echo "postfix postfix/main_mailer_type select No configuration" | sudo debconf-set-selections + +sudo DEBIAN_FRONTEND=noninteractive apt-get install -y gridengine-exec gridengine-client gridengine-drmaa-dev -qq + +sudo service postfix stop +sudo update-rc.d postfix disable +echo $MASTER_HOSTNAME | sudo tee /var/lib/gridengine/default/common/act_qmaster diff --git a/ci/sge/start-sge.sh b/ci/sge/start-sge.sh new file mode 100755 index 00000000..c5559828 --- /dev/null +++ b/ci/sge/start-sge.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +docker-compose up -d +while [ `docker exec -it sge_master qhost | grep lx26-amd64 | wc -l` -ne 2 ] + do + echo "Waiting for SGE slots to become available"; + sleep 1 + done +echo "SGE properly configured" diff --git a/conftest.py b/conftest.py new file mode 100644 index 00000000..3a35d9e0 --- /dev/null +++ b/conftest.py @@ -0,0 +1,22 @@ +# content of conftest.py + +import pytest + + +def pytest_addoption(parser): + parser.addoption("-E", action="store", metavar="NAME", + help="only run tests matching the environment NAME.") + + +def pytest_configure(config): + # register an additional marker + config.addinivalue_line( + "markers", "env(name): mark test to run only on named environment") + + +def pytest_runtest_setup(item): + envmarker = item.get_marker("env") + if envmarker is not None: + envname = envmarker.args[0] + if envname != item.config.getoption("-E"): + pytest.skip("test requires env %r" % envname) diff --git a/dask_jobqueue/tests/test_jobqueue_core.py b/dask_jobqueue/tests/test_jobqueue_core.py new file mode 100644 index 00000000..2f0d9a67 --- /dev/null +++ b/dask_jobqueue/tests/test_jobqueue_core.py @@ -0,0 +1,4 @@ + +def test_jq_core_placeholder(): + # to test that CI is working + pass diff --git a/dask_jobqueue/tests/test_pbs.py b/dask_jobqueue/tests/test_pbs.py index cde12568..a72ccad5 100644 --- a/dask_jobqueue/tests/test_pbs.py +++ b/dask_jobqueue/tests/test_pbs.py @@ -7,9 +7,11 @@ from distributed.utils_test import loop # noqa: F401 from dask_jobqueue import PBSCluster +pytestmark = pytest.mark.env("pbs") -def test_basic(loop): - with PBSCluster(walltime='00:02:00', threads=2, memory='7GB', + +def test_basic(loop): # noqa: F811 + with PBSCluster(walltime='00:02:00', threads_per_worker=2, memory='7GB', interface='ib0', loop=loop) as cluster: with Client(cluster) as client: workers = cluster.start_workers(2) @@ -32,7 +34,7 @@ def test_basic(loop): assert not cluster.jobs -def test_adaptive(loop): +def test_adaptive(loop): # noqa: F811 with PBSCluster(walltime='00:02:00', loop=loop) as cluster: cluster.adapt() with Client(cluster) as client: @@ -59,7 +61,7 @@ def test_adaptive(loop): assert time() < start + 10 -@pytest.mark.skipif('PBS_ACCOUNT' in os.environ, reason='PBS_ACCOUNT defined') +@pytest.mark.skipif('PBS_ACCOUNT' in os.environ, reason='PBS_ACCOUNT defined') # noqa: F811 def test_errors(loop): with pytest.raises(ValueError) as info: PBSCluster() diff --git a/dask_jobqueue/tests/test_sge.py b/dask_jobqueue/tests/test_sge.py new file mode 100644 index 00000000..15e984c8 --- /dev/null +++ b/dask_jobqueue/tests/test_sge.py @@ -0,0 +1,9 @@ + +import pytest + +pytestmark = pytest.mark.env("sge") + + +def test_sge_placeholder(): + # to test that CI is working + pass diff --git a/dask_jobqueue/tests/test_slurm.py b/dask_jobqueue/tests/test_slurm.py index 629da7b9..ba107b77 100644 --- a/dask_jobqueue/tests/test_slurm.py +++ b/dask_jobqueue/tests/test_slurm.py @@ -7,9 +7,11 @@ from distributed.utils_test import loop # noqa: F401 from dask_jobqueue import SLURMCluster +pytestmark = pytest.mark.env("pbs") -def test_basic(loop): - with SLURMCluster(walltime='00:02:00', threads=2, memory='7GB', + +def test_basic(loop): # noqa: F811 + with SLURMCluster(walltime='00:02:00', threads_per_worker=2, memory='7GB', loop=loop) as cluster: with Client(cluster) as client: workers = cluster.start_workers(2) @@ -32,7 +34,7 @@ def test_basic(loop): assert not cluster.jobs -def test_adaptive(loop): +def test_adaptive(loop): # noqa: F811 with SLURMCluster(walltime='00:02:00', loop=loop) as cluster: cluster.adapt() with Client(cluster) as client: @@ -42,7 +44,8 @@ def test_adaptive(loop): assert cluster.jobs start = time() - while len(client.scheduler_info()['workers']) != cluster.config['processes']: + while (len(client.scheduler_info()['workers']) != + cluster.config['processes']): sleep(0.1) assert time() < start + 10 @@ -59,7 +62,7 @@ def test_adaptive(loop): assert time() < start + 10 -@pytest.mark.skipif('PBS_ACCOUNT' in os.environ, reason='PBS_ACCOUNT defined') +@pytest.mark.skipif('PBS_ACCOUNT' in os.environ, reason='PBS_ACCOUNT defined') # noqa: F811 def test_errors(loop): with pytest.raises(ValueError) as info: SLURMCluster()