diff --git a/dask_jobqueue/core.py b/dask_jobqueue/core.py index 994bc886..97eb0e4f 100644 --- a/dask_jobqueue/core.py +++ b/dask_jobqueue/core.py @@ -477,7 +477,9 @@ def __init__( if interface is None: interface = dask.config.get("jobqueue.%s.interface" % config_name) if scheduler_options is None: - scheduler_options = {} + scheduler_options = dask.config.get( + "jobqueue.%s.scheduler-options" % config_name, {} + ) default_scheduler_options = { "protocol": protocol, diff --git a/dask_jobqueue/jobqueue.yaml b/dask_jobqueue/jobqueue.yaml index 14fdb67e..ec8045f8 100644 --- a/dask_jobqueue/jobqueue.yaml +++ b/dask_jobqueue/jobqueue.yaml @@ -10,17 +10,20 @@ jobqueue: interface: null # Network interface to use like eth0 or ib0 death-timeout: 60 # Number of seconds to wait if a worker can not find a scheduler local-directory: null # Location of fast local storage like /scratch or $TMPDIR + extra: [] # OAR resource manager options shebang: "#!/usr/bin/env bash" queue: null project: null walltime: '00:30:00' - extra: [] env-extra: [] resource-spec: null job-extra: [] log-directory: null + + # Scheduler options + scheduler-options: {} pbs: name: dask-worker @@ -33,17 +36,20 @@ jobqueue: interface: null # Network interface to use like eth0 or ib0 death-timeout: 60 # Number of seconds to wait if a worker can not find a scheduler local-directory: null # Location of fast local storage like /scratch or $TMPDIR + extra: [] # PBS resource manager options shebang: "#!/usr/bin/env bash" queue: null project: null walltime: '00:30:00' - extra: [] env-extra: [] resource-spec: null job-extra: [] log-directory: null + + # Scheduler options + scheduler-options: {} sge: name: dask-worker @@ -56,18 +62,20 @@ jobqueue: interface: null # Network interface to use like eth0 or ib0 death-timeout: 60 # Number of seconds to wait if a worker can not find a scheduler local-directory: null # Location of fast local storage like /scratch or $TMPDIR + extra: [] # SGE resource manager options shebang: "#!/usr/bin/env bash" queue: null project: null walltime: '00:30:00' - extra: [] env-extra: [] job-extra: [] log-directory: null - resource-spec: null + + # Scheduler options + scheduler-options: {} slurm: name: dask-worker @@ -80,18 +88,21 @@ jobqueue: interface: null # Network interface to use like eth0 or ib0 death-timeout: 60 # Number of seconds to wait if a worker can not find a scheduler local-directory: null # Location of fast local storage like /scratch or $TMPDIR + extra: [] # SLURM resource manager options shebang: "#!/usr/bin/env bash" queue: null project: null walltime: '00:30:00' - extra: [] env-extra: [] job-cpu: null job-mem: null job-extra: [] log-directory: null + + # Scheduler options + scheduler-options: {} moab: name: dask-worker @@ -104,17 +115,20 @@ jobqueue: interface: null # Network interface to use like eth0 or ib0 death-timeout: 60 # Number of seconds to wait if a worker can not find a scheduler local-directory: null # Location of fast local storage like /scratch or $TMPDIR + extra: [] # PBS resource manager options shebang: "#!/usr/bin/env bash" queue: null project: null walltime: '00:30:00' - extra: [] env-extra: [] resource-spec: null job-extra: [] log-directory: null + + # Scheduler options + scheduler-options: {} lsf: name: dask-worker @@ -127,13 +141,13 @@ jobqueue: interface: null # Network interface to use like eth0 or ib0 death-timeout: 60 # Number of seconds to wait if a worker can not find a scheduler local-directory: null # Location of fast local storage like /scratch or $TMPDIR + extra: [] # LSF resource manager options shebang: "#!/usr/bin/env bash" queue: null project: null walltime: '00:30' - extra: [] env-extra: [] ncpus: null mem: null @@ -141,6 +155,9 @@ jobqueue: log-directory: null lsf-units: null use-stdin: True # (bool) How jobs are launched, i.e. 'bsub jobscript.sh' or 'bsub < jobscript.sh' + + # Scheduler options + scheduler-options: {} htcondor: name: dask-worker @@ -153,14 +170,17 @@ jobqueue: interface: null # Network interface to use like eth0 or ib0 death-timeout: 60 # Number of seconds to wait if a worker can not find a scheduler local-directory: null # Location of fast local storage like /scratch or $TMPDIR + extra: [] # HTCondor Resource Manager options disk: null # Total amount of disk per job - extra: [] env-extra: [] job-extra: {} # Extra submit attributes log-directory: null shebang: "#!/usr/bin/env condor_submit" + + # Scheduler options + scheduler-options: {} local: name: dask-worker @@ -172,8 +192,11 @@ jobqueue: interface: null # Network interface to use like eth0 or ib0 death-timeout: 60 # Number of seconds to wait if a worker can not find a scheduler local-directory: null # Location of fast local storage like /scratch or $TMPDIR - extra: [] + env-extra: [] job-extra: [] log-directory: null + + # Scheduler options + scheduler-options: {} diff --git a/dask_jobqueue/tests/test_jobqueue_core.py b/dask_jobqueue/tests/test_jobqueue_core.py index bc9c94e0..db44002b 100644 --- a/dask_jobqueue/tests/test_jobqueue_core.py +++ b/dask_jobqueue/tests/test_jobqueue_core.py @@ -343,3 +343,42 @@ def test_cluster_error_scheduler_arguments_should_use_scheduler_options(Cluster) with pytest.raises(ValueError, match=message): with Cluster(cores=1, memory="1GB", dashboard_address=":8787"): pass + + +@pytest.mark.parametrize( + "Cluster", + [PBSCluster, MoabCluster, SLURMCluster, SGECluster, LSFCluster, OARCluster], +) +def test_import_scheduler_options_from_config(Cluster): + + net_if_addrs = psutil.net_if_addrs() + + config_scheduler_interface = list(net_if_addrs.keys())[0] + config_scheduler_port = 8804 + + pass_scheduler_interface = list(net_if_addrs.keys())[1] + + scheduler_options = { + "interface": config_scheduler_interface, + "port": config_scheduler_port, + } + + default_config_name = Cluster.job_cls.config_name + + with dask.config.set( + {"jobqueue.%s.scheduler-options" % default_config_name: scheduler_options} + ): + + with Cluster(cores=2, memory="2GB") as cluster: + scheduler_options = cluster.scheduler_spec["options"] + assert scheduler_options.get("interface") == config_scheduler_interface + assert scheduler_options.get("port") == config_scheduler_port + + with Cluster( + cores=2, + memory="2GB", + scheduler_options={"interface": pass_scheduler_interface}, + ) as cluster: + scheduler_options = cluster.scheduler_spec["options"] + assert scheduler_options.get("interface") == pass_scheduler_interface + assert scheduler_options.get("port") is None diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst index a918dae2..c017867b 100644 --- a/docs/source/changelog.rst +++ b/docs/source/changelog.rst @@ -8,7 +8,8 @@ Development version the Dask scheduler. For example ``scheduler_options={'interface': 'eth0', dashboard_addresses=':12435')`` (:pr:`384`). Breaking change: using ``port`` or ``dashboard_addresses`` arguments raises an error. They have to be passed - through ``scheduler_options``. + through ``scheduler_options``. ``scheduler_options`` can be set through the + config file in the ``scheduler-options`` section (:pr:`405`). - all cluster classes: ``processes`` parameter default has changed. By default, ``processes ~= sqrt(cores)`` so that the number of processes and the number of threads per process is roughly the same. Old default was to use one