diff --git a/distributed/distributed-schema.yaml b/distributed/distributed-schema.yaml index 06e973a81b7..79e8986b4d9 100644 --- a/distributed/distributed-schema.yaml +++ b/distributed/distributed-schema.yaml @@ -959,59 +959,6 @@ properties: Alternatively, the key can be appended to the cert file above, and this field left blank - ucx: - type: object - description: | - UCX provides access to other transport methods including NVLink and InfiniBand. - properties: - cuda-copy: - type: [boolean, 'null'] - description: | - Set environment variables to enable CUDA support over UCX. This may be used even if - InfiniBand and NVLink are not supported or disabled, then transferring data over TCP. - tcp: - type: [boolean, 'null'] - description: | - Set environment variables to enable TCP over UCX, even if InfiniBand and NVLink - are not supported or disabled. - nvlink: - type: [boolean, 'null'] - description: | - Set environment variables to enable UCX over NVLink, implies ``distributed.comm.ucx.tcp=True``. - infiniband: - type: [boolean, 'null'] - description: | - Set environment variables to enable UCX over InfiniBand, implies ``distributed.comm.ucx.tcp=True``. - rdmacm: - type: [boolean, 'null'] - description: | - Set environment variables to enable UCX RDMA connection manager support, - requires ``distributed.comm.ucx.infiniband=True``. - create-cuda-context: - type: [boolean, 'null'] - description: | - Creates a CUDA context before UCX is initialized. This is necessary to enable UCX to - properly identify connectivity of GPUs with specialized networking hardware, such as - InfiniBand. This permits UCX to choose transports automatically, without specifying - additional variables for each transport, while ensuring optimal connectivity. When - ``True``, a CUDA context will be created on the first device listed in - ``CUDA_VISIBLE_DEVICES``. - environment: - type: object - description: | - Mapping for setting arbitrary UCX environment variables. - Names here are translated via the following rules to - map to the relevant UCX environment variable: - - hyphens are replaced with underscores - - words are uppercased - - UCX_ is prepended - So, for example, setting ``some-option=value`` is - equivalent to setting ``UCX_SOME_OPTION=value`` in - the calling environment. - - For a full list of supported UCX environment - variables, run ``ucx_info -f``. - websockets: type: object properties: diff --git a/distributed/distributed.yaml b/distributed/distributed.yaml index 5ab049c2a2b..21ad5275f25 100644 --- a/distributed/distributed.yaml +++ b/distributed/distributed.yaml @@ -230,16 +230,6 @@ distributed: offload: 10MiB # Size after which we choose to offload serialization to another thread default-scheme: tcp socket-backlog: 2048 - ucx: - cuda-copy: null # enable cuda-copy - tcp: null # enable tcp - nvlink: null # enable cuda_ipc - infiniband: null # enable Infiniband - rdmacm: null # enable RDMACM - create-cuda-context: null # create CUDA context before UCX initialization - environment: {} # Any other environment settings to - # be transferred to UCX. Name - # munging: key-name => UCX_KEY_NAME zstd: level: 3 # Compression level, between 1 and 22. threads: 0 # Threads to use. 0 for single-threaded, -1 to infer from cpu count. diff --git a/distributed/worker.py b/distributed/worker.py index 7e3c01ba5c5..4c2c1ca6c06 100644 --- a/distributed/worker.py +++ b/distributed/worker.py @@ -1626,8 +1626,10 @@ async def close( # type: ignore # Give some time for a UCX scheduler to complete closing endpoints # before closing self.batched_stream, otherwise the local endpoint # may be closed too early and errors be raised on the scheduler when - # trying to send closing message. - if self._protocol == "ucx": # pragma: no cover + # trying to send closing message. Using startswith supports variations + # of the protocols, e.g., `ucx` and `ucxx` which are both valid in + # distributed-ucxx. + if self._protocol.startswith("ucx"): # pragma: no cover await asyncio.sleep(0.2) self.batched_send({"op": "close-stream"})