@@ -14,8 +14,7 @@ echo "🚀 ===== CUDA Installation Script Started ====="
1414CUDA_INSTALL_PREFIX=${CUDA_INSTALL_PREFIX:- $HOME / opt}
1515CUDA_INSTALL_PREFIX=${CUDA_INSTALL_PREFIX%/ }
1616CUDA_VERSION=${CUDA_VERSION:- 12.8}
17- SKIP_PRUNE=${SKIP_PRUNE:- 1}
18- NVSHMEM_VERSION=${NVSHMEM_VERSION:- 3.3.9}
17+ NVSHMEM_VERSION=${NVSHMEM_VERSION:- 3.3.20}
1918INSTALL_NCCL=${INSTALL_NCCL:- 1}
2019
2120echo " CUDA_INSTALL_PREFIX=${CUDA_INSTALL_PREFIX} "
@@ -195,6 +194,8 @@ function install_nccl {
195194 echo " Getting NCCL version information..."
196195 if [[ ${CUDA_VERSION: 0: 2} == " 12" ]]; then
197196 NCCL_VERSION=$( curl -sL https://github.com/pytorch/pytorch/raw/refs/heads/main/.ci/docker/ci_commit_pins/nccl-cu12.txt)
197+ elif [[ ${CUDA_VERSION: 0: 2} == " 13" ]]; then
198+ NCCL_VERSION=$( curl -sL https://github.com/pytorch/pytorch/raw/refs/heads/main/.ci/docker/ci_commit_pins/nccl-cu13.txt)
198199 else
199200 error_exit " Unsupported CUDA version: ${CUDA_VERSION} "
200201 fi
@@ -249,8 +250,13 @@ function install_cusparselt {
249250 local cusparselt_version
250251 local arch_path=${ARCH_PATH}
251252
252- if [[ ${CUDA_VERSION: 0: 4} =~ ^12\. [5-9]$ ]]; then
253+ local CUSPARSELT_NAME
254+ if [[ ${CUDA_VERSION: 0: 2} == " 13" ]]; then
255+ cusparselt_version=" 0.8.0.4"
256+ CUSPARSELT_NAME=" libcusparse_lt-linux-${arch_path} -${cusparselt_version} _cuda13-archive"
257+ elif [[ ${CUDA_VERSION: 0: 4} =~ ^12\. [5-9]$ ]]; then
253258 cusparselt_version=" 0.7.1.0"
259+ CUSPARSELT_NAME=" libcusparse_lt-linux-${arch_path} -${cusparselt_version} -archive"
254260 else
255261 popd
256262 rm -rf tmp_cusparselt
@@ -259,7 +265,6 @@ function install_cusparselt {
259265
260266 echo " ${cusparselt_version} " > " ${USER_TMPDIR} /cusparselt_version.txt"
261267
262- local CUSPARSELT_NAME=" libcusparse_lt-linux-${arch_path} -${cusparselt_version} -archive"
263268 echo " Downloading cuSparseLt: ${CUSPARSELT_NAME} .tar.xz"
264269
265270 if ! curl --retry 3 -OLs https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-${arch_path} /${CUSPARSELT_NAME} .tar.xz; then
@@ -310,26 +315,27 @@ function install_nvshmem {
310315 mkdir -p " ${tmpdir} " && cd " ${tmpdir} "
311316
312317 # nvSHMEM license: https://docs.nvidia.com/nvshmem/api/sla.html
313- local filename=" libnvshmem_cuda${cuda_major_version} -linux-${arch_path} -${nvshmem_version} "
314- local url=" https://developer.download.nvidia.com/compute/redist/nvshmem/${nvshmem_version} /builds/cuda${cuda_major_version} /txz/agnostic/${dl_arch} /${filename} .tar.gz"
318+ local filename=" libnvshmem-linux-${arch_path} -${nvshmem_version} _cuda${cuda_major_version} -archive"
319+ local suffix=" .tar.xz"
320+ local url=" https://developer.download.nvidia.com/compute/redist/nvshmem/${nvshmem_version} /builds/cuda${cuda_major_version} /txz/agnostic/${dl_arch} /${filename}${suffix} "
315321
316- echo " Downloading nvSHMEM: ${filename} .tar.gz "
322+ echo " Downloading nvSHMEM: ${filename}${suffix} "
317323 if ! wget -q " ${url} " ; then
318324 cd ..
319325 rm -rf " ${tmpdir} "
320- error_exit " nvSHMEM download failed: ${filename} .tar.gz "
326+ error_exit " nvSHMEM download failed: ${filename}${suffix} "
321327 fi
322328
323329 echo " Extracting nvSHMEM..."
324- if ! tar xf " ${filename} .tar.gz " ; then
330+ if ! tar xf " ${filename}${suffix} " ; then
325331 cd ..
326332 rm -rf " ${tmpdir} "
327- error_exit " nvSHMEM extraction failed: ${filename} .tar.gz "
333+ error_exit " nvSHMEM extraction failed: ${filename}${suffix} "
328334 fi
329335
330336 echo " Installing nvSHMEM to CUDA directory..."
331- cp -a " libnvshmem /include/" * ${CUDA_INSTALL_PREFIX} /cuda/include/
332- cp -a " libnvshmem /lib/" * ${CUDA_INSTALL_PREFIX} /cuda/lib64/
337+ cp -a " ${filename} /include/" * ${CUDA_INSTALL_PREFIX} /cuda/include/
338+ cp -a " ${filename} /lib/" * ${CUDA_INSTALL_PREFIX} /cuda/lib64/
333339
334340 cd ..
335341 rm -rf " ${tmpdir} "
@@ -371,7 +377,7 @@ function install_126 {
371377
372378# CUDA 12.8 installation function
373379function install_128 {
374- local CUDNN_VERSION=9.10.2.21
380+ local CUDNN_VERSION=9.8.0.87
375381 echo " Starting installation for CUDA 12.8..."
376382
377383 echo " 📦 STEP 1: Installing CUDA toolkit..."
@@ -425,31 +431,39 @@ function install_129 {
425431 return 0
426432}
427433
428- # Simplified pruning function - enable as needed
429- function prune_cuda {
430- local cuda_version=$1
431- local major_minor=$2
434+ # CUDA 13.0 installation function
435+ function install_130 {
436+ local CUDNN_VERSION=9.12.0.46
437+ echo " Starting installation for CUDA 13.0..."
438+
439+ echo " 📦 STEP 1: Installing CUDA toolkit..."
440+ install_cuda " 13.0.0" " cuda_13.0.0_580.65.06_linux" || error_exit " CUDA 13.0.0 toolkit installation failed"
441+
442+ echo " 🧠 STEP 2: Installing cuDNN..."
443+ install_cudnn " 13" " ${CUDNN_VERSION} " || error_exit " cuDNN installation failed"
444+
445+ echo " 🔗 STEP 3: Installing NCCL..."
446+ install_nccl || error_exit " NCCL installation failed"
432447
433- echo " Pruning CUDA ${major_minor} ..."
448+ echo " ⚡ STEP 4: Installing cuSparseLt..."
449+ install_cusparselt || error_exit " cuSparseLt installation failed"
434450
435- # CUDA pruning logic can be added back as needed
436- # Kept empty for now for easier troubleshooting
451+ echo " 💾 STEP 5: Installing nvSHMEM... "
452+ install_nvshmem " 13 " || error_exit " nvSHMEM installation failed "
437453
438- # Pruning complete marker
439- touch " ${USER_TMPDIR} /cuda_${major_minor} _pruned"
454+ if [ " $( id -u) " -eq 0 ]; then
455+ ldconfig
456+ fi
440457
441- echo " CUDA ${major_minor} pruning completed"
458+ echo " ✅ CUDA 13.0 installation completed"
442459 return 0
443460}
444461
445- # Version-specific pruning functions
446- function prune_126 {
447- prune_cuda " 126" " 12.6"
448- }
462+
449463
450464# Main execution logic
451465echo " 🔧 ===== Parsing command line arguments ====="
452- VALID_VERSIONS=(" 12.6" " 12.8" " 12.9" )
466+ VALID_VERSIONS=(" 12.6" " 12.8" " 12.9" " 13.0 " )
453467
454468# Parse command line arguments
455469while test $# -gt 0; do
@@ -487,11 +501,7 @@ if [ $INSTALL_RESULT -ne 0 ]; then
487501 error_exit " Installation failed, exit code: $INSTALL_RESULT "
488502fi
489503
490- # Perform pruning if requested
491- if [ " $SKIP_PRUNE " -eq 0 ]; then
492- echo " Performing CUDA pruning operations..."
493- eval " prune_${version_no_dot} " || error_exit " Pruning failed"
494- fi
504+
495505
496506# Final cleanup
497507cleanup_temp_dirs
0 commit comments