Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
Update CI matrix to use NVKS nodes. (#3572)
* Update CI matrix to use NVKS nodes.
* Update windows CI scripts to accept -arch.
* Move all non-Catch2 device algo tests to lid0/lid1.
This makes sure that they run in the correct CI config on appropriate hardware.
* Switch to all rtx queues:
CUB -> RTXA6000 (48GiB)
Thrust -> RTX4090 (24GiB)
Others -> RTX2080 (8GiB)
  • Loading branch information
alliepiper authored and miscco committed Jan 30, 2025
commit 0ce2d0bc5a9658eb4c1afbccd0ceb22f83acac97
63 changes: 26 additions & 37 deletions ci/matrix.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,26 +19,28 @@ workflows:
- {jobs: ['build'], std: 'max', cxx: ['intel', 'msvc2019']}
- {jobs: ['build'], std: [17, 20], cxx: ['gcc', 'clang', 'msvc']}
# Current CTK testing:
- {jobs: ['test'], project: ['libcudacxx', 'thrust'], std: 'max', cxx: ['gcc', 'clang']}
- {jobs: ['test'], project: ['thrust'], std: 'max', cxx: ['gcc', 'clang'], gpu: 'rtx4090'}
- {jobs: ['test'], project: ['libcudacxx'], std: 'max', cxx: ['gcc', 'clang'], gpu: 'rtx2080'}
# Disabled until we figure out the issue with the TBB dll
#- {jobs: ['test'], project: ['libcudacxx', 'thrust'], std: 'max', cxx: ['msvc']}
#- {jobs: ['test'], project: ['thrust'], std: 'max', cxx: ['msvc'], gpu: 'rtx4090'}
- {jobs: ['test'], project: ['libcudacxx'], std: 'max', cxx: ['msvc'], gpu: 'rtx2080'}
# Split up cub tests:
- {jobs: ['test_nolid', 'test_lid0'], project: ['cub'], std: 'max', cxx: ['gcc']}
- {jobs: ['test_lid1', 'test_lid2'], project: ['cub'], std: 'max', cxx: ['gcc']}
- {jobs: ['test_nolid', 'test_lid0'], project: ['cub'], std: 'max', cxx: ['clang', 'msvc']}
- {jobs: ['test_lid0'], project: ['cub'], std: 'max', cxx: 'gcc12', gpu: 'h100', sm: 'gpu' }
- {jobs: ['test_nolid', 'test_lid0'], project: ['cub'], std: 'max', cxx: ['gcc'], gpu: 'rtxa6000'}
- {jobs: ['test_lid1', 'test_lid2'], project: ['cub'], std: 'max', cxx: ['gcc'], gpu: 'rtxa6000'}
- {jobs: ['test_nolid', 'test_lid0'], project: ['cub'], std: 'max', cxx: ['clang', 'msvc'], gpu: 'rtxa6000'}
- {jobs: ['test_lid0'], project: ['cub'], std: 'max', cxx: 'gcc12', gpu: 'h100', sm: 'gpu' }
# Modded builds:
- {jobs: ['build'], std: [17, 20], ctk: '12.5', cxx: 'nvhpc'}
- {jobs: ['build'], std: 'max', cxx: ['gcc', 'clang'], cpu: 'arm64'}
- {jobs: ['build'], std: 'max', cxx: ['gcc'], sm: '90a'}
# Test Thrust 32-bit-only dispatch here, since it's most likely to break. 64-bit-only is tested in nightly.
- {jobs: ['test_gpu'], project: 'thrust', cmake_options: '-DTHRUST_DISPATCH_TYPE=Force32bit'}
- {jobs: ['test_gpu'], project: 'thrust', cmake_options: '-DTHRUST_DISPATCH_TYPE=Force32bit', gpu: 'rtx4090'}
# default_projects: clang-cuda
- {jobs: ['build'], std: [17, 20], cudacxx: 'clang', cxx: 'clang'}
- {jobs: ['build'], project: 'libcudacxx', std: 'max', cudacxx: 'clang', cxx: 'clang', sm: '90'}
- {jobs: ['build'], project: 'libcudacxx', std: 'max', cudacxx: 'clang', cxx: 'clang', sm: '90a'}
# nvrtc:
- {jobs: ['nvrtc'], project: 'libcudacxx', std: 'all'}
- {jobs: ['nvrtc'], project: 'libcudacxx', std: 'all', gpu: 'rtx2080', sm: 'gpu'}
# verify-codegen:
- {jobs: ['verify_codegen'], project: 'libcudacxx'}
# cudax has different CTK reqs:
Expand All @@ -52,19 +54,19 @@ workflows:
- {jobs: ['build'], project: 'cudax', ctk: ['curr'], std: 17, cxx: ['gcc'], sm: "90"}
- {jobs: ['build'], project: 'cudax', ctk: ['curr'], std: 20, cxx: ['gcc'], sm: "90a"}
- {jobs: ['build'], project: 'cudax', ctk: ['curr'], std: [17, 20], cxx: ['gcc', 'clang'], cpu: 'arm64'}
- {jobs: ['test'], project: 'cudax', ctk: ['curr'], std: 20, cxx: ['gcc12', 'clang', 'msvc']}
- {jobs: ['test'], project: 'cudax', ctk: ['curr'], std: 20, cxx: ['gcc12', 'clang', 'msvc'], gpu: 'rtx2080'}
# Python and c/parallel jobs:
- {jobs: ['test'], project: ['cccl_c_parallel', 'python'], ctk: '12.6'}
- {jobs: ['test'], project: ['cccl_c_parallel', 'python'], ctk: '12.6', gpu: 'rtx2080'}
# cccl-infra:
- {jobs: ['infra'], project: 'cccl', ctk: '11.1', cxx: ['gcc6', 'clang9']}
- {jobs: ['infra'], project: 'cccl', ctk: '12.0', cxx: ['gcc12', 'clang14']}
- {jobs: ['infra'], project: 'cccl', ctk: 'curr', cxx: ['gcc', 'clang']}
- {jobs: ['infra'], project: 'cccl', ctk: '11.1', cxx: ['gcc6', 'clang9'], gpu: 'rtx2080'}
- {jobs: ['infra'], project: 'cccl', ctk: '12.0', cxx: ['gcc12', 'clang14'], gpu: 'rtx2080'}
- {jobs: ['infra'], project: 'cccl', ctk: 'curr', cxx: ['gcc', 'clang'], gpu: 'rtx2080'}

nightly:
# Edge-case jobs
- {jobs: ['limited'], project: 'cub', std: 17}
- {jobs: ['test_gpu'], project: 'thrust', cmake_options: '-DTHRUST_DISPATCH_TYPE=Force32bit'}
- {jobs: ['test_gpu'], project: 'thrust', cmake_options: '-DTHRUST_DISPATCH_TYPE=Force64bit'}
- {jobs: ['limited'], project: 'cub', std: 17, gpu: 'rtx2080'}
- {jobs: ['test_gpu'], project: 'thrust', cmake_options: '-DTHRUST_DISPATCH_TYPE=Force32bit', gpu: 'rtx4090'}
- {jobs: ['test_gpu'], project: 'thrust', cmake_options: '-DTHRUST_DISPATCH_TYPE=Force64bit', gpu: 'rtx4090'}
# Old CTK
- {jobs: ['build'], std: 'all', ctk: '11.1', cxx: ['gcc6', 'gcc7', 'gcc8', 'gcc9', 'clang9', 'msvc2017']}
- {jobs: ['build'], std: 'all', ctk: '11.8', cxx: ['gcc11'], sm: '60;70;80;90'}
Expand All @@ -73,7 +75,11 @@ workflows:
- {jobs: ['build'], std: 'all', cxx: ['clang9', 'clang10', 'clang11', 'clang12', 'clang13', 'clang14', 'clang15', 'clang16', 'clang17']}
- {jobs: ['build'], std: 'all', cxx: ['intel', 'msvc2019']}
# Test current CTK
- {jobs: ['test'], std: 'all', cxx: ['gcc13', 'clang18', 'msvc2022']}
- {jobs: ['test'], project: 'cub', std: 'all', cxx: ['gcc', 'clang', 'msvc'], gpu: 'rtxa6000'}
- {jobs: ['test_lid0'], project: 'cub', std: 'max', cxx: 'gcc', gpu: 'v100'}
- {jobs: ['test_lid0'], project: 'cub', std: 'max', cxx: 'gcc', gpu: 'h100', sm: 'gpu' }
- {jobs: ['test'], project: 'thrust', std: 'all', cxx: ['gcc', 'clang', 'msvc'], gpu: 'rtx4090'}
- {jobs: ['test'], project: 'libcudacxx', std: 'all', cxx: ['gcc', 'clang', 'msvc'], gpu: 'rtx2080'}
# Modded builds:
- {jobs: ['build'], std: 'all', ctk: '12.5', cxx: 'nvhpc'}
- {jobs: ['build'], std: 'all', cxx: ['gcc', 'clang'], cpu: 'arm64'}
Expand All @@ -92,26 +98,9 @@ workflows:
- {jobs: ['build'], project: 'cudax', ctk: ['12.0' ], std: 'all', cxx: ['gcc12'], sm: "90"}
- {jobs: ['build'], project: 'cudax', ctk: [ 'curr'], std: 'all', cxx: ['gcc13'], sm: "90a"}
- {jobs: ['build'], project: 'cudax', ctk: [ 'curr'], std: 'all', cxx: ['gcc13', 'clang16'], cpu: 'arm64'}
- {jobs: ['test'], project: 'cudax', ctk: ['12.0', 'curr'], std: 'all', cxx: ['gcc12']}
- {jobs: ['test'], project: 'cudax', ctk: ['12.0' ], std: 'all', cxx: ['clang14']}
- {jobs: ['test'], project: 'cudax', ctk: [ 'curr'], std: 'all', cxx: ['clang18']}

# # These are waiting on the NVKS nodes:
# - {jobs: ['test'], ctk: '11.1', gpu: 'v100', sm: 'gpu', cxx: 'gcc6', std: [11]}
# - {jobs: ['test'], ctk: '11.1', gpu: 't4', sm: 'gpu', cxx: 'clang9', std: [17]}
# - {jobs: ['test'], ctk: '11.8', gpu: 'rtx2080', sm: 'gpu', cxx: 'gcc11', std: [17]}
# - {jobs: ['test'], ctk: 'curr', gpu: 'rtxa6000', sm: 'gpu', cxx: 'gcc7', std: [14]}
# - {jobs: ['test'], ctk: 'curr', gpu: 'l4', sm: 'gpu', cxx: 'gcc13', std: 'all'}
# - {jobs: ['test'], ctk: 'curr', gpu: 'rtx4090', sm: 'gpu', cxx: 'clang9', std: [11]}
# # H100 runners are currently flakey, only build since those use CPU-only runners:
# - {jobs: ['build'], ctk: 'curr', gpu: 'h100', sm: 'gpu', cxx: 'gcc12', std: [11, 20]}
# - {jobs: ['build'], ctk: 'curr', gpu: 'h100', sm: 'gpu', cxx: 'clang18', std: [17]}
#
# # nvrtc:
# - {jobs: ['nvrtc'], ctk: 'curr', gpu: 't4', sm: 'gpu', cxx: 'gcc13', std: [20], project: ['libcudacxx']}
# - {jobs: ['nvrtc'], ctk: 'curr', gpu: 'rtxa6000', sm: 'gpu', cxx: 'gcc13', std: [20], project: ['libcudacxx']}
# - {jobs: ['nvrtc'], ctk: 'curr', gpu: 'l4', sm: 'gpu', cxx: 'gcc13', std: 'all', project: ['libcudacxx']}
# - {jobs: ['nvrtc'], ctk: 'curr', gpu: 'h100', sm: 'gpu', cxx: 'gcc13', std: [11, 20], project: ['libcudacxx']}
- {jobs: ['test'], project: 'cudax', ctk: ['12.0', 'curr'], std: 'all', cxx: ['gcc12'] , gpu: 'rtx2080'}
- {jobs: ['test'], project: 'cudax', ctk: ['12.0' ], std: 'all', cxx: ['clang14'], gpu: 'rtx2080'}
- {jobs: ['test'], project: 'cudax', ctk: [ 'curr'], std: 'all', cxx: ['clang18'], gpu: 'rtx2080'}

# Any generated jobs that match the entries in `exclude` will be removed from the final matrix for all workflows.
exclude:
Expand Down
15 changes: 13 additions & 2 deletions ci/windows/build_common.psm1
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,11 @@ Param(
[Alias("std")]
[ValidateNotNullOrEmpty()]
[ValidateSet(11, 14, 17, 20)]
[int]$CXX_STANDARD = 17
[int]$CXX_STANDARD = 17,
[Parameter(Mandatory = $false)]
[ValidateNotNullOrEmpty()]
[Alias("arch")]
[int]$CUDA_ARCH = 0
)

$ErrorActionPreference = "Stop"
Expand All @@ -20,6 +24,12 @@ if ($script:CL_VERSION_STRING -match "Version (\d+\.\d+)\.\d+") {
Write-Host "Detected cl.exe version: $CL_VERSION"
}

$script:GLOBAL_CMAKE_OPTIONS = ""
if ($CUDA_ARCH -ne 0) {
$script:GLOBAL_CMAKE_OPTIONS += "-DCMAKE_CUDA_ARCHITECTURES=$CUDA_ARCH"
}


if (-not $env:CCCL_BUILD_INFIX) {
$env:CCCL_BUILD_INFIX = ""
}
Expand Down Expand Up @@ -56,6 +66,7 @@ Write-Host "NVCC_VERSION=$NVCC_VERSION"
Write-Host "CMAKE_BUILD_PARALLEL_LEVEL=$env:CMAKE_BUILD_PARALLEL_LEVEL"
Write-Host "CTEST_PARALLEL_LEVEL=$env:CTEST_PARALLEL_LEVEL"
Write-Host "CCCL_BUILD_INFIX=$env:CCCL_BUILD_INFIX"
Write-Host "GLOBAL_CMAKE_OPTIONS=$script:GLOBAL_CMAKE_OPTIONS"
Write-Host "Current commit is:"
Write-Host "$(git log -1 --format=short)"
Write-Host "========================================"
Expand All @@ -82,7 +93,7 @@ function configure_preset {
pushd ".."

# Echo and execute command to stdout:
$configure_command = "cmake --preset $PRESET $CMAKE_OPTIONS --log-level VERBOSE"
$configure_command = "cmake --preset $PRESET $script:GLOBAL_CMAKE_OPTIONS $CMAKE_OPTIONS --log-level VERBOSE"
Write-Host $configure_command
Invoke-Expression $configure_command
$test_result = $LastExitCode
Expand Down
8 changes: 6 additions & 2 deletions ci/windows/build_cub.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,11 @@ Param(
[Alias("std")]
[ValidateNotNullOrEmpty()]
[ValidateSet(11, 14, 17, 20)]
[int]$CXX_STANDARD = 17
[int]$CXX_STANDARD = 17,
[Parameter(Mandatory = $false)]
[ValidateNotNullOrEmpty()]
[Alias("arch")]
[int]$CUDA_ARCH = 0
)

$ErrorActionPreference = "Stop"
Expand All @@ -14,7 +18,7 @@ If($CURRENT_PATH -ne "ci") {
pushd "$PSScriptRoot/.."
}

Import-Module $PSScriptRoot/build_common.psm1 -ArgumentList $CXX_STANDARD
Import-Module $PSScriptRoot/build_common.psm1 -ArgumentList $CXX_STANDARD, $CUDA_ARCH

$PRESET = "cub-cpp$CXX_STANDARD"
$CMAKE_OPTIONS = ""
Expand Down
8 changes: 6 additions & 2 deletions ci/windows/build_cudax.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,11 @@ Param(
[Alias("std")]
[ValidateNotNullOrEmpty()]
[ValidateSet(20)]
[int]$CXX_STANDARD = 20
[int]$CXX_STANDARD = 20,
[Parameter(Mandatory = $false)]
[ValidateNotNullOrEmpty()]
[Alias("arch")]
[int]$CUDA_ARCH = 0
)

$CURRENT_PATH = Split-Path $pwd -leaf
Expand All @@ -14,7 +18,7 @@ If($CURRENT_PATH -ne "ci") {
}

Remove-Module -Name build_common
Import-Module $PSScriptRoot/build_common.psm1 -ArgumentList $CXX_STANDARD
Import-Module $PSScriptRoot/build_common.psm1 -ArgumentList $CXX_STANDARD, $CUDA_ARCH

$PRESET = "cudax-cpp$CXX_STANDARD"
$CMAKE_OPTIONS = ""
Expand Down
8 changes: 6 additions & 2 deletions ci/windows/build_libcudacxx.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,11 @@ Param(
[Alias("std")]
[ValidateNotNullOrEmpty()]
[ValidateSet(11, 14, 17, 20)]
[int]$CXX_STANDARD = 17
[int]$CXX_STANDARD = 17,
[Parameter(Mandatory = $false)]
[ValidateNotNullOrEmpty()]
[Alias("arch")]
[int]$CUDA_ARCH = 0
)

$ErrorActionPreference = "Stop"
Expand All @@ -14,7 +18,7 @@ If($CURRENT_PATH -ne "ci") {
pushd "$PSScriptRoot/.."
}

Import-Module $PSScriptRoot/build_common.psm1 -ArgumentList $CXX_STANDARD, $GPU_ARCHS
Import-Module $PSScriptRoot/build_common.psm1 -ArgumentList $CXX_STANDARD, $CUDA_ARCH

$PRESET = "libcudacxx-cpp${CXX_STANDARD}"
$CMAKE_OPTIONS = ""
Expand Down
8 changes: 6 additions & 2 deletions ci/windows/build_thrust.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,11 @@ Param(
[Alias("std")]
[ValidateNotNullOrEmpty()]
[ValidateSet(11, 14, 17, 20)]
[int]$CXX_STANDARD = 17
[int]$CXX_STANDARD = 17,
[Parameter(Mandatory = $false)]
[ValidateNotNullOrEmpty()]
[Alias("arch")]
[int]$CUDA_ARCH = 0
)

$ErrorActionPreference = "Stop"
Expand All @@ -14,7 +18,7 @@ If($CURRENT_PATH -ne "ci") {
pushd "$PSScriptRoot/.."
}

Import-Module $PSScriptRoot/build_common.psm1 -ArgumentList $CXX_STANDARD
Import-Module $PSScriptRoot/build_common.psm1 -ArgumentList $CXX_STANDARD, $CUDA_ARCH

$PRESET = "thrust-cpp$CXX_STANDARD"
$CMAKE_OPTIONS = ""
Expand Down
8 changes: 6 additions & 2 deletions ci/windows/test_thrust.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@ Param(
[ValidateSet(11, 14, 17, 20)]
[int]$CXX_STANDARD = 17,
[Parameter(Mandatory = $false)]
[ValidateNotNullOrEmpty()]
[Alias("arch")]
[int]$CUDA_ARCH = 0,
[Parameter(Mandatory = $false)]
[Alias("cpu-only")]
[switch]$CPU_ONLY = $false
)
Expand All @@ -24,11 +28,11 @@ If($CURRENT_PATH -ne "ci") {
}

# Execute the build script:
$build_command = "$PSScriptRoot/build_thrust.ps1 -std $CXX_STANDARD"
$build_command = "$PSScriptRoot/build_thrust.ps1 -std $CXX_STANDARD -arch $CUDA_ARCH"
Write-Host "Executing: $build_command"
Invoke-Expression $build_command

Import-Module $PSScriptRoot/build_common.psm1 -ArgumentList $CXX_STANDARD
Import-Module -Name "$PSScriptRoot/build_common.psm1" -ArgumentList $CXX_STANDARD, $CUDA_ARCH

$PRESET = "thrust-cpu-cpp$CXX_STANDARD"

Expand Down
9 changes: 9 additions & 0 deletions cub/test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -370,6 +370,15 @@ foreach (test_src IN LISTS test_srcs)
set(launcher 0)
endif()

# FIXME: There are a few remaining device algorithm tests that have not been ported to
# use Catch2 and lid variants. Mark these as `lid_0/1` so they'll run in the appropriate
# CI configs:
string(REGEX MATCH "^device_" is_device_test "${test_name}")
_cub_is_fail_test(is_fail_test "%{test_name}")
if (is_device_test AND NOT is_fail_test)
string(APPEND test_name ".lid_${launcher}")
endif()

# Only one version of this test.
cub_add_test(test_target ${test_name} "${test_src}" ${cub_target} ${launcher})
cub_configure_cuda_target(${test_target} RDC ${CUB_FORCE_RDC})
Expand Down
Loading