Skip to content

Commit e40497f

Browse files
committed
feat(install_cuda): add CI mode with automatic disk cleanup for GitHub Actions
Add CI_MODE environment variable to enable CI-specific optimizations: ## New Features - CI_MODE=1 enables automatic disk cleanup before installation - Removes common pre-installed software on GitHub Actions runners: - .NET SDK, Android SDK, GHC, CodeQL, Boost, Swift, GraalVM, etc. - Cleans apt cache and Docker images when available - Displays disk space statistics before and after cleanup: - Total disk size, used space, available space, usage percentage - Calculates and shows freed disk space ## New Environment Variables - CI_MODE: Set to 1 to enable CI optimizations (default: 0) - REQUIRED_DISK_SPACE_GB: Minimum required disk space in GB (default: 15) ## Usage ```bash # GitHub Actions / CI environment CI_MODE=1 CUDA_INSTALL_PREFIX=/tmp/cuda ./install_cuda.sh 12.8 # Local installation (unchanged behavior) CUDA_INSTALL_PREFIX=~/opt ./install_cuda.sh 12.8 ``` This should resolve 'No space left on device' errors on GitHub Actions runners by freeing approximately 30-50GB of disk space before CUDA installation.
1 parent aea190a commit e40497f

File tree

1 file changed

+109
-1
lines changed

1 file changed

+109
-1
lines changed

install_cuda.sh

Lines changed: 109 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#!/bin/bash
22
# Usage: CUDA_INSTALL_PREFIX=/home/yhao/opt ./install_cuda.sh 12.8
3+
# CI Usage: CI_MODE=1 CUDA_INSTALL_PREFIX=/tmp/cuda ./install_cuda.sh 12.8
34
# Notice: Part of this script is synced with https://github.com/pytorch/pytorch/blob/main/.ci/docker/common/install_cuda.sh
45
set -ex
56

@@ -23,9 +24,16 @@ CUDA_VERSION=${CUDA_VERSION:-12.8}
2324
NVSHMEM_VERSION=${NVSHMEM_VERSION:-3.4.5}
2425
INSTALL_NCCL=${INSTALL_NCCL:-1}
2526

27+
# CI mode settings
28+
# Set CI_MODE=1 to enable CI-specific optimizations (disk cleanup, etc.)
29+
CI_MODE=${CI_MODE:-0}
30+
# Minimum required disk space in GB (can be overridden)
31+
REQUIRED_DISK_SPACE_GB=${REQUIRED_DISK_SPACE_GB:-15}
32+
2633
echo "CUDA_INSTALL_PREFIX=${CUDA_INSTALL_PREFIX}"
2734
echo "CUDA_VERSION=${CUDA_VERSION}"
2835
echo "INSTALL_NCCL=${INSTALL_NCCL}"
36+
echo "CI_MODE=${CI_MODE}"
2937

3038
# Version configuration using associative arrays
3139
declare -A CUDA_FULL_VERSION=(
@@ -179,6 +187,103 @@ function check_network {
179187
fi
180188
}
181189

190+
# Get disk space information for a given path
191+
# Returns: total_gb used_gb available_gb use_percent
192+
function get_disk_info {
193+
local target_path="${1:-/}"
194+
df -BG "${target_path}" | awk 'NR==2 {
195+
gsub(/G/, "", $2); gsub(/G/, "", $3); gsub(/G/, "", $4); gsub(/%/, "", $5);
196+
print $2, $3, $4, $5
197+
}'
198+
}
199+
200+
# Print disk space summary
201+
function print_disk_summary {
202+
local label="$1"
203+
local target_path="${2:-/}"
204+
205+
local disk_info
206+
disk_info=$(get_disk_info "${target_path}")
207+
local total_gb=$(echo "${disk_info}" | awk '{print $1}')
208+
local used_gb=$(echo "${disk_info}" | awk '{print $2}')
209+
local available_gb=$(echo "${disk_info}" | awk '{print $3}')
210+
local use_percent=$(echo "${disk_info}" | awk '{print $4}')
211+
212+
echo "💾 ${label}:"
213+
echo " Total: ${total_gb}GB | Used: ${used_gb}GB | Available: ${available_gb}GB | Usage: ${use_percent}%"
214+
}
215+
216+
# CI mode: Free up disk space by removing unnecessary pre-installed software
217+
# This is useful for GitHub Actions runners which have limited disk space
218+
function ci_free_disk_space {
219+
if [ "${CI_MODE}" != "1" ]; then
220+
echo "CI_MODE not enabled, skipping disk cleanup"
221+
return 0
222+
fi
223+
224+
echo "🧹 ===== CI Mode: Freeing up disk space ====="
225+
226+
# Print disk space before cleanup
227+
print_disk_summary "Disk space BEFORE cleanup" "/"
228+
local before_available
229+
before_available=$(get_disk_info "/" | awk '{print $3}')
230+
231+
echo ""
232+
echo "Removing unnecessary software packages..."
233+
234+
# List of directories to remove (common on GitHub Actions runners)
235+
local dirs_to_remove=(
236+
"/usr/share/dotnet"
237+
"/usr/local/lib/android"
238+
"/opt/ghc"
239+
"/opt/hostedtoolcache/CodeQL"
240+
"/usr/local/share/boost"
241+
"/usr/share/swift"
242+
"/usr/local/graalvm"
243+
"/usr/local/.ghcup"
244+
"/opt/hostedtoolcache/Python"
245+
"/opt/hostedtoolcache/Ruby"
246+
"/opt/hostedtoolcache/go"
247+
"/opt/hostedtoolcache/node"
248+
)
249+
250+
local removed_count=0
251+
for dir in "${dirs_to_remove[@]}"; do
252+
if [ -d "${dir}" ]; then
253+
echo " Removing ${dir}..."
254+
sudo rm -rf "${dir}" 2>/dev/null || rm -rf "${dir}" 2>/dev/null || true
255+
((removed_count++)) || true
256+
fi
257+
done
258+
259+
echo " Removed ${removed_count} directories"
260+
261+
# Clean apt cache if apt is available
262+
if command_exists apt-get; then
263+
echo " Cleaning apt cache..."
264+
sudo apt-get clean 2>/dev/null || true
265+
sudo apt-get autoremove -y 2>/dev/null || true
266+
fi
267+
268+
# Clean docker if available
269+
if command_exists docker; then
270+
echo " Cleaning Docker images..."
271+
docker system prune -af 2>/dev/null || true
272+
fi
273+
274+
echo ""
275+
# Print disk space after cleanup
276+
print_disk_summary "Disk space AFTER cleanup" "/"
277+
local after_available
278+
after_available=$(get_disk_info "/" | awk '{print $3}')
279+
280+
# Calculate freed space
281+
local freed_gb=$((after_available - before_available))
282+
echo ""
283+
echo "✅ Freed approximately ${freed_gb}GB of disk space"
284+
echo ""
285+
}
286+
182287
# Check if file or directory exists
183288
function check_exists {
184289
if [ ! -e "$1" ]; then
@@ -192,8 +297,11 @@ check_exists "${CUDA_INSTALL_PREFIX}"
192297
# Check dependencies
193298
check_dependencies
194299

300+
# CI mode: Free up disk space before checking disk space requirements
301+
ci_free_disk_space
302+
195303
# Check disk space
196-
check_disk_space 15
304+
check_disk_space "${REQUIRED_DISK_SPACE_GB}"
197305

198306
# Check network connectivity
199307
check_network

0 commit comments

Comments
 (0)