Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 4 additions & 44 deletions builder/manywheel/scripts/install_cuda.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
set -ex

function install_118 {
echo "Installing CUDA 11.8 and cuDNN 8.7 and NCCL 2.15"
echo "Installing CUDA 11.8 and NCCL 2.15"
rm -rf /usr/local/cuda-11.8 /usr/local/cuda
# install CUDA 11.8.0 in the same container
wget -q https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_520.61.05_linux.run
Expand All @@ -12,16 +12,6 @@ function install_118 {
rm -f cuda_11.8.0_520.61.05_linux.run
rm -f /usr/local/cuda && ln -s /usr/local/cuda-11.8 /usr/local/cuda

# cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
mkdir tmp_cudnn && cd tmp_cudnn
wget -q https://developer.download.nvidia.com/compute/redist/cudnn/v8.7.0/local_installers/11.8/cudnn-linux-x86_64-8.7.0.84_cuda11-archive.tar.xz -O cudnn-linux-x86_64-8.7.0.84_cuda11-archive.tar.xz
tar xf cudnn-linux-x86_64-8.7.0.84_cuda11-archive.tar.xz
cp -a cudnn-linux-x86_64-8.7.0.84_cuda11-archive/include/* /usr/local/cuda/include/
cp -a cudnn-linux-x86_64-8.7.0.84_cuda11-archive/lib/* /usr/local/cuda/lib64/
cd ..
rm -rf tmp_cudnn
ldconfig

# NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses
mkdir tmp_nccl && cd tmp_nccl
wget -q https://developer.download.nvidia.com/compute/redist/nccl/v2.15.5/nccl_2.15.5-1+cuda11.8_x86_64.txz
Expand All @@ -34,7 +24,7 @@ function install_118 {
}

function install_121 {
echo "Installing CUDA 12.1 and cuDNN 8.9 and NCCL 2.18.1"
echo "Installing CUDA 12.1 and NCCL 2.18.1"
rm -rf /usr/local/cuda-12.1 /usr/local/cuda
# install CUDA 12.1.0 in the same container
wget -q https://developer.download.nvidia.com/compute/cuda/12.1.0/local_installers/cuda_12.1.0_530.30.02_linux.run
Expand All @@ -43,16 +33,6 @@ function install_121 {
rm -f cuda_12.1.0_530.30.02_linux.run
rm -f /usr/local/cuda && ln -s /usr/local/cuda-12.1 /usr/local/cuda

# cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
mkdir tmp_cudnn && cd tmp_cudnn
wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-x86_64/cudnn-linux-x86_64-8.9.2.26_cuda12-archive.tar.xz -O cudnn-linux-x86_64-8.9.2.26_cuda12-archive.tar.xz
tar xf cudnn-linux-x86_64-8.9.2.26_cuda12-archive.tar.xz
cp -a cudnn-linux-x86_64-8.9.2.26_cuda12-archive/include/* /usr/local/cuda/include/
cp -a cudnn-linux-x86_64-8.9.2.26_cuda12-archive/lib/* /usr/local/cuda/lib64/
cd ..
rm -rf tmp_cudnn
ldconfig

# NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses
mkdir tmp_nccl && cd tmp_nccl
wget -q https://developer.download.nvidia.com/compute/redist/nccl/v2.18.1/nccl_2.18.1-1+cuda12.1_x86_64.txz
Expand All @@ -65,7 +45,7 @@ function install_121 {
}

function install_124 {
echo "Installing CUDA 12.4 and cuDNN 8.9 and NCCL 2.25.1"
echo "Installing CUDA 12.4 and NCCL 2.25.1"
rm -rf /usr/local/cuda-12.4 /usr/local/cuda
# install CUDA 12.4.1 in the same container
wget -q https://developer.download.nvidia.com/compute/cuda/12.4.1/local_installers/cuda_12.4.1_550.54.15_linux.run
Expand All @@ -74,16 +54,6 @@ function install_124 {
rm -f cuda_12.4.1_550.54.15_linux.run
rm -f /usr/local/cuda && ln -s /usr/local/cuda-12.4 /usr/local/cuda

# cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
mkdir tmp_cudnn && cd tmp_cudnn
wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-x86_64/cudnn-linux-x86_64-8.9.2.26_cuda12-archive.tar.xz -O cudnn-linux-x86_64-8.9.2.26_cuda12-archive.tar.xz
tar xf cudnn-linux-x86_64-8.9.2.26_cuda12-archive.tar.xz
cp -a cudnn-linux-x86_64-8.9.2.26_cuda12-archive/include/* /usr/local/cuda/include/
cp -a cudnn-linux-x86_64-8.9.2.26_cuda12-archive/lib/* /usr/local/cuda/lib64/
cd ..
rm -rf tmp_cudnn
ldconfig

# NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses
mkdir tmp_nccl && cd tmp_nccl
wget -q https://developer.download.nvidia.com/compute/redist/nccl/v2.25.1/nccl_2.25.1-1+cuda12.4_x86_64.txz
Expand All @@ -96,7 +66,7 @@ function install_124 {
}

function install_128 {
echo "Installing CUDA 12.8 and cuDNN 8.9 and NCCL 2.25.1"
echo "Installing CUDA 12.8 and NCCL 2.25.1"
rm -rf /usr/local/cuda-12.8 /usr/local/cuda
# install CUDA 12.8.1 in the same container
wget -q https://developer.download.nvidia.com/compute/cuda/12.8.1/local_installers/cuda_12.8.1_570.124.06_linux.run
Expand All @@ -105,16 +75,6 @@ function install_128 {
rm -f cuda_12.8.1_570.124.06_linux.run
rm -f /usr/local/cuda && ln -s /usr/local/cuda-12.8 /usr/local/cuda

# cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
mkdir tmp_cudnn && cd tmp_cudnn
wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-x86_64/cudnn-linux-x86_64-8.9.2.26_cuda12-archive.tar.xz -O cudnn-linux-x86_64-8.9.2.26_cuda12-archive.tar.xz
tar xf cudnn-linux-x86_64-8.9.2.26_cuda12-archive.tar.xz
cp -a cudnn-linux-x86_64-8.9.2.26_cuda12-archive/include/* /usr/local/cuda/include/
cp -a cudnn-linux-x86_64-8.9.2.26_cuda12-archive/lib/* /usr/local/cuda/lib64/
cd ..
rm -rf tmp_cudnn
ldconfig

# NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses
mkdir tmp_nccl && cd tmp_nccl
wget -q https://developer.download.nvidia.com/compute/redist/nccl/v2.25.1/nccl_2.25.1-1+cuda12.8_x86_64.txz
Expand Down
51 changes: 0 additions & 51 deletions cmake/Modules/FindCUDNN.cmake

This file was deleted.

10 changes: 0 additions & 10 deletions src/turbomind/utils/cuda_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -107,16 +107,6 @@ void syncAndCheck(const char* const file, int const line);
throw std::runtime_error(msg.c_str()); \
}

#define checkCUDNN(expression) \
{ \
cudnnStatus_t status = (expression); \
if (status != CUDNN_STATUS_SUCCESS) { \
std::cerr << "Error on file " << __FILE__ << " line " << __LINE__ << ": " << cudnnGetErrorString(status) \
<< std::endl; \
std::exit(EXIT_FAILURE); \
} \
}

template<typename T>
void printMatrix(T* ptr, int m, int k, int stride, bool is_device_ptr);

Expand Down
Loading