From ae13677be8f077499a6ea58ef05f48f7b308c475 Mon Sep 17 00:00:00 2001 From: Guillaume EB Date: Mon, 12 Sep 2022 13:33:23 +0400 Subject: [PATCH 1/5] Clean up inconsistencies in the directory where the tests are run from --- ci/sge.sh | 2 +- ci/slurm.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/sge.sh b/ci/sge.sh index 1cef35ff..5c70a3cb 100644 --- a/ci/sge.sh +++ b/ci/sge.sh @@ -20,7 +20,7 @@ function jobqueue_install { } function jobqueue_script { - docker exec sge_master /bin/bash -c "cd /dask-jobqueue; pytest dask_jobqueue --verbose -s -E sge" + docker exec sge_master /bin/bash -c "cd; pytest /dask-jobqueue/dask_jobqueue --verbose -s -E sge" } function jobqueue_after_script { diff --git a/ci/slurm.sh b/ci/slurm.sh index 76320394..c409cfb8 100644 --- a/ci/slurm.sh +++ b/ci/slurm.sh @@ -29,7 +29,7 @@ function jobqueue_install { } function jobqueue_script { - docker exec slurmctld /bin/bash -c "pytest /dask-jobqueue/dask_jobqueue --verbose -E slurm -s" + docker exec slurmctld /bin/bash -c "cd; pytest /dask-jobqueue/dask_jobqueue --verbose -E slurm -s" } function jobqueue_after_script { From 9875ba5540b61c53bc95abcc213947801befd001 Mon Sep 17 00:00:00 2001 From: Guillaume EB Date: Mon, 12 Sep 2022 15:13:48 +0400 Subject: [PATCH 2/5] Add a shared directory for all CI clusters --- ci/htcondor.sh | 3 +++ ci/htcondor/docker-compose.yml | 4 ++++ ci/pbs.sh | 4 ++++ ci/pbs/docker-compose.yml | 12 ++++++++++-- ci/sge.sh | 3 +++ ci/sge/docker-compose.yml | 12 ++++++++++-- ci/slurm.sh | 3 +++ ci/slurm/docker-compose.yml | 4 ++++ dask_jobqueue/tests/test_jobqueue_core.py | 13 ++++--------- 9 files changed, 45 insertions(+), 13 deletions(-) diff --git a/ci/htcondor.sh b/ci/htcondor.sh index b807140f..ec2c8417 100755 --- a/ci/htcondor.sh +++ b/ci/htcondor.sh @@ -12,6 +12,9 @@ function jobqueue_before_install { docker-compose exec -T submit /bin/bash -c "condor_q" cd - + #Set shared space permissions + docker-compose exec -T submit /bin/bash -c "chmod -R 777 /shared_space" + docker ps -a docker images } diff --git a/ci/htcondor/docker-compose.yml b/ci/htcondor/docker-compose.yml index 4fbad966..496f30bf 100644 --- a/ci/htcondor/docker-compose.yml +++ b/ci/htcondor/docker-compose.yml @@ -26,6 +26,7 @@ services: - secrets:/root/secrets - ../..:/dask-jobqueue - ./condor_config.local:/etc/condor/condor_config.local + - shared_space:/shared_space execute1: image: daskdev/dask-jobqueue:htcondor-execute @@ -41,6 +42,7 @@ services: volumes: - secrets:/root/secrets - ./condor_config.local:/etc/condor/condor_config.local + - shared_space:/shared_space execute2: image: daskdev/dask-jobqueue:htcondor-execute @@ -56,6 +58,8 @@ services: volumes: - secrets:/root/secrets - ./condor_config.local:/etc/condor/condor_config.local + - shared_space:/shared_space volumes: secrets: + shared_space: diff --git a/ci/pbs.sh b/ci/pbs.sh index 0e2dd7c8..b4af1127 100644 --- a/ci/pbs.sh +++ b/ci/pbs.sh @@ -10,6 +10,10 @@ function jobqueue_before_install { ./start-pbs.sh cd - + #Set shared space permissions + docker exec pbs_master /bin/bash -c "chmod -R 777 /shared_space" + docker exec pbs_master /bin/bash -c "chown -R pbsuser:pbsuser /home/pbsuser" + docker exec -u pbsuser pbs_master pbsnodes -a docker ps -a docker images diff --git a/ci/pbs/docker-compose.yml b/ci/pbs/docker-compose.yml index cbb8cada..803210d7 100644 --- a/ci/pbs/docker-compose.yml +++ b/ci/pbs/docker-compose.yml @@ -9,6 +9,8 @@ services: hostname: pbs_master volumes: - ../..:/dask-jobqueue + - userhome:/home/pbsuser + - shared_space:/shared_space command: bash /run-master.sh slave_one: @@ -17,7 +19,8 @@ services: container_name: pbs_slave_1 hostname: pbs_slave_1 volumes: - - ../..:/dask-jobqueue + - userhome:/home/pbsuser + - shared_space:/shared_space entrypoint: "bash /slave-entrypoint.sh" command: bash /run-slave.sh links: @@ -33,7 +36,8 @@ services: container_name: pbs_slave_2 hostname: pbs_slave_2 volumes: - - ../..:/dask-jobqueue + - userhome:/home/pbsuser + - shared_space:/shared_space entrypoint: "bash /slave-entrypoint.sh" command: bash /run-slave.sh links: @@ -42,3 +46,7 @@ services: - PBS_MASTER=pbs_master depends_on: - master + +volumes: + userhome: + shared_space: \ No newline at end of file diff --git a/ci/sge.sh b/ci/sge.sh index 5c70a3cb..227e8db2 100644 --- a/ci/sge.sh +++ b/ci/sge.sh @@ -10,6 +10,9 @@ function jobqueue_before_install { ./start-sge.sh cd - + #Set shared space permissions + docker exec sge_master /bin/bash -c "chmod -R 777 /shared_space" + docker ps -a docker images docker exec sge_master qconf -sq dask.q diff --git a/ci/sge/docker-compose.yml b/ci/sge/docker-compose.yml index d1b5921d..f440093c 100644 --- a/ci/sge/docker-compose.yml +++ b/ci/sge/docker-compose.yml @@ -12,6 +12,8 @@ services: #network_mode: host volumes: - ../..:/dask-jobqueue + - userhome:/root + - shared_space:/shared_space command: bash /dask-jobqueue/ci/sge/run-master.sh slave-one: @@ -23,7 +25,8 @@ services: hostname: slave_one #network_mode: host volumes: - - ../..:/dask-jobqueue + - userhome:/root + - shared_space:/shared_space command: bash /dask-jobqueue/ci/sge/run-slave.sh links: - "master:sge_master" @@ -41,9 +44,14 @@ services: hostname: slave_two #network_mode: host volumes: - - ../..:/dask-jobqueue + - userhome:/root + - shared_space:/shared_space command: bash /dask-jobqueue/ci/sge/run-slave.sh links: - "master:sge_master" depends_on: - master + +volumes: + userhome: + shared_space: \ No newline at end of file diff --git a/ci/slurm.sh b/ci/slurm.sh index c409cfb8..9e1fa2ba 100644 --- a/ci/slurm.sh +++ b/ci/slurm.sh @@ -10,6 +10,9 @@ function jobqueue_before_install { ./start-slurm.sh cd - + #Set shared space permissions + docker exec slurmctld /bin/bash -c "chmod -R 777 /shared_space" + docker ps -a docker images show_network_interfaces diff --git a/ci/slurm/docker-compose.yml b/ci/slurm/docker-compose.yml index de807554..adbac1a6 100644 --- a/ci/slurm/docker-compose.yml +++ b/ci/slurm/docker-compose.yml @@ -44,6 +44,7 @@ services: - slurm_jobdir:/data - var_log_slurm:/var/log/slurm - ../..:/dask-jobqueue + - shared_space:/shared_space expose: - "6817" depends_on: @@ -65,6 +66,7 @@ services: - etc_slurm:/etc/slurm - slurm_jobdir:/data - var_log_slurm:/var/log/slurm + - shared_space:/shared_space expose: - "6818" depends_on: @@ -86,6 +88,7 @@ services: - etc_slurm:/etc/slurm - slurm_jobdir:/data - var_log_slurm:/var/log/slurm + - shared_space:/shared_space expose: - "6818" depends_on: @@ -102,6 +105,7 @@ volumes: slurm_jobdir: var_lib_mysql: var_log_slurm: + shared_space: networks: common-network: diff --git a/dask_jobqueue/tests/test_jobqueue_core.py b/dask_jobqueue/tests/test_jobqueue_core.py index 0afb20c7..8436c86c 100644 --- a/dask_jobqueue/tests/test_jobqueue_core.py +++ b/dask_jobqueue/tests/test_jobqueue_core.py @@ -348,11 +348,9 @@ def test_wrong_parameter_error(Cluster): Cluster(cores=1, memory="1GB", wrong_parameter="wrong_parameter_value") -@pytest.mark.xfail_env({"htcondor": "#535 no shared filesystem in htcondor ci"}) -@pytest.mark.xfail_env({"slurm": "#535 no shared filesystem in slurm ci"}) @pytest.mark.filterwarnings("error:Using a temporary security object:UserWarning") def test_security(EnvSpecificCluster, loop): - dirname = os.path.dirname(__file__) + dirname = "/shared_space" #Shared space configured in all docker compose CIs key = os.path.join(dirname, "key.pem") cert = os.path.join(dirname, "ca.pem") security = Security( @@ -396,10 +394,8 @@ def test_security(EnvSpecificCluster, loop): assert "tls://" in job_script -@pytest.mark.xfail_env({"htcondor": "#535 no shared filesystem in htcondor ci"}) -@pytest.mark.xfail_env({"slurm": "#535 no shared filesystem in slurm ci"}) def test_security_temporary(EnvSpecificCluster, loop): - dirname = os.path.dirname(__file__) + dirname = "/shared_space" #Shared space configured in all docker compose CIs with EnvSpecificCluster( cores=1, memory="500MiB", @@ -440,9 +436,8 @@ def test_security_temporary(EnvSpecificCluster, loop): # TODO assert not any([os.path.exists(f) for f in [keyfile, certfile, cafile]]) -@pytest.mark.xfail_env({"htcondor": "#535 no shared filesystem in htcondor ci"}) -@pytest.mark.xfail_env({"slurm": "#535 no shared filesystem in slurm ci"}) -@pytest.mark.xfail_env({"pbs": "current directory (pbsuser home) not shared"}) +@pytest.mark.xfail_env({"htcondor": "Submitting user do not have a shared home directory in CI"}) +@pytest.mark.xfail_env({"slurm": "Submitting user do not have a shared home directory in CI"}) def test_security_temporary_defaults(EnvSpecificCluster, loop): # test automatic behaviour if security is true and shared_temp_directory not set with pytest.warns(UserWarning, match="shared_temp_directory"), EnvSpecificCluster( From 623042db24a2ce1b034538ccf0a016d43471ae08 Mon Sep 17 00:00:00 2001 From: Guillaume EB Date: Mon, 12 Sep 2022 20:25:55 +0400 Subject: [PATCH 3/5] fix test failures --- ci/sge/docker-compose.yml | 2 ++ dask_jobqueue/tests/test_jobqueue_core.py | 17 +++++++++++++---- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/ci/sge/docker-compose.yml b/ci/sge/docker-compose.yml index f440093c..8656414c 100644 --- a/ci/sge/docker-compose.yml +++ b/ci/sge/docker-compose.yml @@ -25,6 +25,7 @@ services: hostname: slave_one #network_mode: host volumes: + - ../..:/dask-jobqueue - userhome:/root - shared_space:/shared_space command: bash /dask-jobqueue/ci/sge/run-slave.sh @@ -44,6 +45,7 @@ services: hostname: slave_two #network_mode: host volumes: + - ../..:/dask-jobqueue - userhome:/root - shared_space:/shared_space command: bash /dask-jobqueue/ci/sge/run-slave.sh diff --git a/dask_jobqueue/tests/test_jobqueue_core.py b/dask_jobqueue/tests/test_jobqueue_core.py index 8436c86c..29776f26 100644 --- a/dask_jobqueue/tests/test_jobqueue_core.py +++ b/dask_jobqueue/tests/test_jobqueue_core.py @@ -4,6 +4,7 @@ import sys import re import psutil +import shutil import pytest @@ -350,7 +351,11 @@ def test_wrong_parameter_error(Cluster): @pytest.mark.filterwarnings("error:Using a temporary security object:UserWarning") def test_security(EnvSpecificCluster, loop): - dirname = "/shared_space" #Shared space configured in all docker compose CIs + dirname = "/shared_space" # Shared space configured in all docker compose CIs + # Copy security files into the shared folder + test_dir = os.path.dirname(__file__) + shutil.copy2(os.path.join(test_dir, "key.pem"), dirname) + shutil.copy2(os.path.join(test_dir, "ca.pem"), dirname) key = os.path.join(dirname, "key.pem") cert = os.path.join(dirname, "ca.pem") security = Security( @@ -395,7 +400,7 @@ def test_security(EnvSpecificCluster, loop): def test_security_temporary(EnvSpecificCluster, loop): - dirname = "/shared_space" #Shared space configured in all docker compose CIs + dirname = "/shared_space" # Shared space configured in all docker compose CIs with EnvSpecificCluster( cores=1, memory="500MiB", @@ -436,8 +441,12 @@ def test_security_temporary(EnvSpecificCluster, loop): # TODO assert not any([os.path.exists(f) for f in [keyfile, certfile, cafile]]) -@pytest.mark.xfail_env({"htcondor": "Submitting user do not have a shared home directory in CI"}) -@pytest.mark.xfail_env({"slurm": "Submitting user do not have a shared home directory in CI"}) +@pytest.mark.xfail_env( + {"htcondor": "Submitting user do not have a shared home directory in CI"} +) +@pytest.mark.xfail_env( + {"slurm": "Submitting user do not have a shared home directory in CI"} +) def test_security_temporary_defaults(EnvSpecificCluster, loop): # test automatic behaviour if security is true and shared_temp_directory not set with pytest.warns(UserWarning, match="shared_temp_directory"), EnvSpecificCluster( From 0b0256ec89e01d21e53a25ee3dd007a0575880e9 Mon Sep 17 00:00:00 2001 From: Guillaume EB Date: Mon, 12 Sep 2022 20:29:08 +0400 Subject: [PATCH 4/5] Flake and newlines --- ci/pbs/docker-compose.yml | 2 +- ci/sge/docker-compose.yml | 2 +- dask_jobqueue/tests/test_jobqueue_core.py | 1 - 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/ci/pbs/docker-compose.yml b/ci/pbs/docker-compose.yml index 803210d7..4a5a5741 100644 --- a/ci/pbs/docker-compose.yml +++ b/ci/pbs/docker-compose.yml @@ -49,4 +49,4 @@ services: volumes: userhome: - shared_space: \ No newline at end of file + shared_space: diff --git a/ci/sge/docker-compose.yml b/ci/sge/docker-compose.yml index 8656414c..22a6bc15 100644 --- a/ci/sge/docker-compose.yml +++ b/ci/sge/docker-compose.yml @@ -56,4 +56,4 @@ services: volumes: userhome: - shared_space: \ No newline at end of file + shared_space: diff --git a/dask_jobqueue/tests/test_jobqueue_core.py b/dask_jobqueue/tests/test_jobqueue_core.py index 29776f26..47b208ce 100644 --- a/dask_jobqueue/tests/test_jobqueue_core.py +++ b/dask_jobqueue/tests/test_jobqueue_core.py @@ -4,7 +4,6 @@ import sys import re import psutil -import shutil import pytest From 0439a6f7e2596554db4c7d9be05253f69a5a40e1 Mon Sep 17 00:00:00 2001 From: Guillaume EB Date: Mon, 12 Sep 2022 21:07:30 +0400 Subject: [PATCH 5/5] Handle LocalCluster test problem --- .gitignore | 2 ++ ci/htcondor/docker-compose.yml | 1 + ci/pbs/docker-compose.yml | 2 ++ ci/sge/docker-compose.yml | 2 ++ ci/slurm/docker-compose.yml | 2 ++ dask_jobqueue/tests/test_jobqueue_core.py | 6 ++++-- 6 files changed, 13 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index 23a75ada..6f19bdab 100644 --- a/.gitignore +++ b/.gitignore @@ -20,3 +20,5 @@ ci/pbs/environment.yml ci/sge/environment.yml ci/htcondor/environment.yml .vscode/ +ca.pem +key.pem diff --git a/ci/htcondor/docker-compose.yml b/ci/htcondor/docker-compose.yml index 496f30bf..358aafba 100644 --- a/ci/htcondor/docker-compose.yml +++ b/ci/htcondor/docker-compose.yml @@ -20,6 +20,7 @@ services: environment: - CONDOR_HOST=cm - USE_POOL_PASSWORD=yes + - CI_SHARED_SPACE=/shared_space depends_on: - cm volumes: diff --git a/ci/pbs/docker-compose.yml b/ci/pbs/docker-compose.yml index 4a5a5741..550c7ae9 100644 --- a/ci/pbs/docker-compose.yml +++ b/ci/pbs/docker-compose.yml @@ -7,6 +7,8 @@ services: build: . container_name: pbs_master hostname: pbs_master + environment: + - CI_SHARED_SPACE=/shared_space volumes: - ../..:/dask-jobqueue - userhome:/home/pbsuser diff --git a/ci/sge/docker-compose.yml b/ci/sge/docker-compose.yml index 22a6bc15..ed0de0ea 100644 --- a/ci/sge/docker-compose.yml +++ b/ci/sge/docker-compose.yml @@ -10,6 +10,8 @@ services: container_name: sge_master hostname: sge_master #network_mode: host + environment: + - CI_SHARED_SPACE=/shared_space volumes: - ../..:/dask-jobqueue - userhome:/root diff --git a/ci/slurm/docker-compose.yml b/ci/slurm/docker-compose.yml index adbac1a6..cdb9475d 100644 --- a/ci/slurm/docker-compose.yml +++ b/ci/slurm/docker-compose.yml @@ -38,6 +38,8 @@ services: command: ["slurmctld"] container_name: slurmctld hostname: slurmctld + environment: + - CI_SHARED_SPACE=/shared_space volumes: - etc_munge:/etc/munge - etc_slurm:/etc/slurm diff --git a/dask_jobqueue/tests/test_jobqueue_core.py b/dask_jobqueue/tests/test_jobqueue_core.py index 47b208ce..8f743f7e 100644 --- a/dask_jobqueue/tests/test_jobqueue_core.py +++ b/dask_jobqueue/tests/test_jobqueue_core.py @@ -350,7 +350,8 @@ def test_wrong_parameter_error(Cluster): @pytest.mark.filterwarnings("error:Using a temporary security object:UserWarning") def test_security(EnvSpecificCluster, loop): - dirname = "/shared_space" # Shared space configured in all docker compose CIs + # Shared space configured in all docker compose CIs, fallback to current dir if does not exist (LocalCluster) + dirname = os.environ.get("CI_SHARED_SPACE", os.getcwd()) # Copy security files into the shared folder test_dir = os.path.dirname(__file__) shutil.copy2(os.path.join(test_dir, "key.pem"), dirname) @@ -399,7 +400,8 @@ def test_security(EnvSpecificCluster, loop): def test_security_temporary(EnvSpecificCluster, loop): - dirname = "/shared_space" # Shared space configured in all docker compose CIs + # Shared space configured in all docker compose CIs, fallback to current dir if does not exist (LocalCluster) + dirname = os.environ.get("CI_SHARED_SPACE", os.getcwd()) with EnvSpecificCluster( cores=1, memory="500MiB",