FindHao
diff --git a/‎.github/workflows/test_install_cuda.yml‎
Lines changed: 30 additions & 0 deletions b/‎.github/workflows/test_install_cuda.yml‎
Lines changed: 30 additions & 0 deletions
diff --git a/‎.shellcheckrc‎
Lines changed: 20 additions & 0 deletions b/‎.shellcheckrc‎
Lines changed: 20 additions & 0 deletions
diff --git a/‎backup_torchbench/generate_profiling.sh‎
Lines changed: 11 additions & 12 deletions b/‎backup_torchbench/generate_profiling.sh‎
Lines changed: 11 additions & 12 deletions
diff --git a/‎backup_torchbench/gpu_mem_monitor.sh‎
Lines changed: 2 additions & 3 deletions b/‎backup_torchbench/gpu_mem_monitor.sh‎
Lines changed: 2 additions & 3 deletions
diff --git a/‎backup_torchbench/print_all_task.sh‎
Lines changed: 9 additions & 10 deletions b/‎backup_torchbench/print_all_task.sh‎
Lines changed: 9 additions & 10 deletions
diff --git a/‎backup_torchbench/run_all.sh‎
Lines changed: 12 additions & 15 deletions b/‎backup_torchbench/run_all.sh‎
Lines changed: 12 additions & 15 deletions
diff --git a/‎backup_torchbench/run_all_eg.sh‎
Lines changed: 11 additions & 17 deletions b/‎backup_torchbench/run_all_eg.sh‎
Lines changed: 11 additions & 17 deletions
diff --git a/‎backup_torchbench/run_all_for_overhead.sh‎
Lines changed: 23 additions & 25 deletions b/‎backup_torchbench/run_all_for_overhead.sh‎
Lines changed: 23 additions & 25 deletions
diff --git a/‎backup_torchbench/run_all_inductor.sh‎
Lines changed: 15 additions & 15 deletions b/‎backup_torchbench/run_all_inductor.sh‎
Lines changed: 15 additions & 15 deletions
@@ -5,15 +5,45 @@ on:
     paths:
       - 'install_cuda.sh'
       - '.github/workflows/test_install_cuda.yml'
+      - '*.sh'
   pull_request:
     paths:
       - 'install_cuda.sh'
       - '.github/workflows/test_install_cuda.yml'
+      - '*.sh'
   workflow_dispatch:
 
 jobs:
+  shellcheck:
+    runs-on: ubuntu-latest
+    name: ShellCheck and Format Check
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v3
+
+      - name: Install ShellCheck
+        run: |
+          sudo apt-get update
+          sudo apt-get install -y shellcheck
+
+      - name: Run format and check script
+        run: |
+          chmod +x format_scripts.sh
+          ./format_scripts.sh
+
+      - name: Check if any files were modified by formatting
+        run: |
+          if [ -n "$(git status --porcelain)" ]; then
+            echo "❌ Files need formatting! Please run ./format_scripts.sh locally and commit the changes."
+            git diff
+            exit 1
+          else
+            echo "✅ All files are properly formatted!"
+          fi
+
   test-install-cuda:
     runs-on: ubuntu-latest
+    needs: shellcheck
     timeout-minutes: 30
 
     steps:
 
@@ -0,0 +1,20 @@
+# ShellCheck configuration file
+# See https://github.com/koalaman/shellcheck/wiki/Ignore
+
+# Disable warnings for unused variables (common in script templates)
+disable=SC2034
+
+# Disable warnings for cd without error checking (we use pushd/popd pattern)
+disable=SC2164
+
+# Disable warnings for command substitution in variable assignment
+disable=SC2155
+
+# Enable additional checks
+enable=all
+
+# Set shell dialect (bash by default)
+shell=bash
+
+# External sources (if any)
+# external-sources=true 
@@ -6,22 +6,21 @@ output=/home/yhao/d/testing/runlog_for_profiling_jul12.txt
 
 work_dir=/home/yhao/d/benchmark
 
-func(){
-    echo "" > $output
+func() {
+  echo "" >$output
 
-    # for model in fambench_dlrm fambench_xlmr detectron2_maskrcnn vision_maskrcnn timm_efficientnet timm_vision_transformer hf_Bert hf_GPT2 hf_T5 
-    # for model in fambench_dlrm fambench_xlmr detectron2_maskrcnn vision_maskrcnn timm_efficientnet timm_vision_transformer hf_Bert hf_GPT2 hf_T5  resnet50 timm_resnest resnext50_32x4d hf_BigBird hf_Bart soft_actor_critic alexnet timm_vovnet mobilenet_v3_large vgg16 shufflenet_v2_x1_0 pytorch_unet dlrm mnasnet1_0 resnet50_quantized_qat tts_angular hf_Reformer nvidia_deeprecommender mobilenet_v2_quantized_qat mobilenet_v2 LearningToPaint hf_Longformer opacus_cifar10 resnet18 timm_regnet dcgan maml BERT_pytorch Super_SloMo pytorch_struct pplbench_beanmachine drq pyhpc_isoneutral_mixing hf_Albert attention_is_all_you_need_pytorch moco Background_Matting pyhpc_turbulent_kinetic_energy maml_omniglot pyhpc_equation_of_state timm_nfnet demucs densenet121 pytorch_CycleGAN_and_pix2pix tacotron2 squeezenet1_1 fastNLP_Bert pytorch_stargan hf_DistilBert speech_transformer yolov3 timm_efficientdet
-    for model in dlrm nvidia_deeprecommender
-    do 
+  # for model in fambench_dlrm fambench_xlmr detectron2_maskrcnn vision_maskrcnn timm_efficientnet timm_vision_transformer hf_Bert hf_GPT2 hf_T5
+  # for model in fambench_dlrm fambench_xlmr detectron2_maskrcnn vision_maskrcnn timm_efficientnet timm_vision_transformer hf_Bert hf_GPT2 hf_T5  resnet50 timm_resnest resnext50_32x4d hf_BigBird hf_Bart soft_actor_critic alexnet timm_vovnet mobilenet_v3_large vgg16 shufflenet_v2_x1_0 pytorch_unet dlrm mnasnet1_0 resnet50_quantized_qat tts_angular hf_Reformer nvidia_deeprecommender mobilenet_v2_quantized_qat mobilenet_v2 LearningToPaint hf_Longformer opacus_cifar10 resnet18 timm_regnet dcgan maml BERT_pytorch Super_SloMo pytorch_struct pplbench_beanmachine drq pyhpc_isoneutral_mixing hf_Albert attention_is_all_you_need_pytorch moco Background_Matting pyhpc_turbulent_kinetic_energy maml_omniglot pyhpc_equation_of_state timm_nfnet demucs densenet121 pytorch_CycleGAN_and_pix2pix tacotron2 squeezenet1_1 fastNLP_Bert pytorch_stargan hf_DistilBert speech_transformer yolov3 timm_efficientdet
+  for model in dlrm nvidia_deeprecommender; do
 
-    echo "@Yueming Hao: start model tests"  >> $output
-    echo "@Yueming Hao: Run $model" >> $output
+    echo "@Yueming Hao: start model tests" >>$output
+    echo "@Yueming Hao: Run $model" >>$output
     mkdir ./logs/$model
-    python run.py -d cuda --profile --profile-detailed --profile-devices cpu,cuda --profile-folder ./logs/$model/ -t train $model --precision fp32 >> $output 2>&1
-    echo "@Yueming Hao: end model tests" >> $output
+    python run.py -d cuda --profile --profile-detailed --profile-devices cpu,cuda --profile-folder ./logs/$model/ -t train $model --precision fp32 >>$output 2>&1
+    echo "@Yueming Hao: end model tests" >>$output
 
-    done
+  done
 
 }
 
-func 
+func
@@ -9,7 +9,7 @@ while true; do
 
     # Call the test.py script using Python
     bash training_scripts/single_gpu/run_350m.sh
-    
+
     # Check the return code of the script
     if [[ $? -eq 0 ]]; then
       # If the script returns successfully, send a "successfully" notification
@@ -18,11 +18,10 @@ while true; do
       # If the script fails, send a "fail" notification
       notify "Task failed."
     fi
-    
+
     exit 0
   fi
 
   # Sleep for 60 seconds before the next check
   sleep 60
 done
-
@@ -1,15 +1,14 @@
 #!/bin/bash
 # This script prints all models' categories in torchbenchmark/models
 benchmark_path=${benchmark_path:-"/home/yhao24/p/p8/benchmark"}
-all_models=`ls $benchmark_path/torchbenchmark/models`
+all_models=$(ls $benchmark_path/torchbenchmark/models)
 cd $benchmark_path/
 output="/tmp/model_class.txt"
-for model in $all_models
-do
-    # if model ends with .md, skip it
-    if [[ $model == *.md ]]; then
-        continue
-    fi
-    echo -n "$model, " >> $output
-    python3 -c "from torchbenchmark.models.$model import Model; print(Model.task)" >> $output
-done
+for model in $all_models; do
+  # if model ends with .md, skip it
+  if [[ $model == *.md ]]; then
+    continue
+  fi
+  echo -n "$model, " >>$output
+  python3 -c "from torchbenchmark.models.$model import Model; print(Model.task)" >>$output
+done
@@ -1,31 +1,28 @@
 #!/bin/bash
 
 SHELL_FOLDER=$(
-    cd "$(dirname "$0")"
-    pwd
+  cd "$(dirname "$0")"
+  pwd
 )
 source ${SHELL_FOLDER}/run_base.sh
 
 cd $tb_path
 
-
 func() {
-    for ((i = 1; i <= $max_iter; i++)); do
-        python run.py -d cuda ${tflops} -t $mode $model >>$output 2>&1
-        if [ $? -ne 0 ]; then
-            break
-        fi
-    done
+  for ((i = 1; i <= $max_iter; i++)); do
+    python run.py -d cuda ${tflops} -t $mode $model >>$output 2>&1
+    if [ $? -ne 0 ]; then
+      break
+    fi
+  done
 }
 
-
-
 echo $(date) >>$output
 for model in $all_models; do
-# for model in resnet50 hf_Bart hf_Bart; do
-    conda activate $env1
-    echo "@Yueming Hao origin $model" >>$output
-    func
+  # for model in resnet50 hf_Bart hf_Bart; do
+  conda activate $env1
+  echo "@Yueming Hao origin $model" >>$output
+  func
 done
 
 echo $(date) >>$output
 
@@ -1,29 +1,23 @@
 #!/bin/bash
 
-
-
-
 # model=detectron2_maskrcnn_r_50_c4
 output=/home/yhao/d/tmp/run_all_egs.log
-echo "" > $output
+echo "" >$output
 cd /home/yhao/d/benchmark_11.6
 
 max_iter=1
-func(){
-    for (( i = 1 ; i <= $max_iter; i++ ))
-    do
-        # python run.py -d cuda -m jit -t train $model --precision fp32 --torchdynamo nvfuser  >> $output 2>&1
-        python run.py -d cuda -t train --profile-eg --profile --profile-detailed --profile-devices cpu,cuda --profile-folder  ./logs/$model $model  --precision fp32  >> $output 2>&1
-    done
+func() {
+  for ((i = 1; i <= $max_iter; i++)); do
+    # python run.py -d cuda -m jit -t train $model --precision fp32 --torchdynamo nvfuser  >> $output 2>&1
+    python run.py -d cuda -t train --profile-eg --profile --profile-detailed --profile-devices cpu,cuda --profile-folder ./logs/$model $model --precision fp32 >>$output 2>&1
+  done
 }
 
-for model in detectron2_maskrcnn_r_101_fpn mnasnet1_0 shufflenet_v2_x1_0 BERT_pytorch detectron2_maskrcnn_r_50_c4 mobilenet_v2 soft_actor_critic Background_Matting detectron2_maskrcnn_r_50_fpn mobilenet_v2_quantized_qat speech_transformer LearningToPaint dlrm mobilenet_v3_large squeezenet1_1 Super_SloMo drq moco tacotron2 alexnet fambench_dlrm nvidia_deeprecommender timm_efficientdet attention_is_all_you_need_pytorch fambench_xlmr opacus_cifar10 timm_efficientnet dcgan fastNLP_Bert pplbench_beanmachine timm_nfnet demucs hf_Albert pyhpc_equation_of_state timm_regnet densenet121 hf_Bart pyhpc_isoneutral_mixing timm_resnest detectron2_fasterrcnn_r_101_c4 hf_Bert pyhpc_turbulent_kinetic_energy timm_vision_transformer detectron2_fasterrcnn_r_101_dc5 hf_BigBird  timm_vovnet detectron2_fasterrcnn_r_101_fpn hf_DistilBert pytorch_stargan tts_angular detectron2_fasterrcnn_r_50_c4 hf_GPT2 pytorch_struct vgg16 detectron2_fasterrcnn_r_50_dc5 hf_Longformer pytorch_unet vision_maskrcnn detectron2_fasterrcnn_r_50_fpn hf_Reformer resnet18 yolov3 detectron2_fcos_r_50_fpn hf_T5 resnet50 detectron2_maskrcnn maml resnet50_quantized_qat detectron2_maskrcnn_r_101_c4 maml_omniglot resnext50_32x4d
-do 
+for model in detectron2_maskrcnn_r_101_fpn mnasnet1_0 shufflenet_v2_x1_0 BERT_pytorch detectron2_maskrcnn_r_50_c4 mobilenet_v2 soft_actor_critic Background_Matting detectron2_maskrcnn_r_50_fpn mobilenet_v2_quantized_qat speech_transformer LearningToPaint dlrm mobilenet_v3_large squeezenet1_1 Super_SloMo drq moco tacotron2 alexnet fambench_dlrm nvidia_deeprecommender timm_efficientdet attention_is_all_you_need_pytorch fambench_xlmr opacus_cifar10 timm_efficientnet dcgan fastNLP_Bert pplbench_beanmachine timm_nfnet demucs hf_Albert pyhpc_equation_of_state timm_regnet densenet121 hf_Bart pyhpc_isoneutral_mixing timm_resnest detectron2_fasterrcnn_r_101_c4 hf_Bert pyhpc_turbulent_kinetic_energy timm_vision_transformer detectron2_fasterrcnn_r_101_dc5 hf_BigBird timm_vovnet detectron2_fasterrcnn_r_101_fpn hf_DistilBert pytorch_stargan tts_angular detectron2_fasterrcnn_r_50_c4 hf_GPT2 pytorch_struct vgg16 detectron2_fasterrcnn_r_50_dc5 hf_Longformer pytorch_unet vision_maskrcnn detectron2_fasterrcnn_r_50_fpn hf_Reformer resnet18 yolov3 detectron2_fcos_r_50_fpn hf_T5 resnet50 detectron2_maskrcnn maml resnet50_quantized_qat detectron2_maskrcnn_r_101_c4 maml_omniglot resnext50_32x4d; do
 
-    source /home/yhao/d/conda/bin/activate
-    conda activate pt_aug1
-    echo "@Yueming Hao origin $model" >>$output
-    func
+  source /home/yhao/d/conda/bin/activate
+  conda activate pt_aug1
+  echo "@Yueming Hao origin $model" >>$output
+  func
 
 done
-
@@ -4,14 +4,13 @@ cd /home/yhao/d/benchmark
 
 output=/home/yhao/d/testing/runlog_100ms_train_all.txt
 
-func(){
-    echo "" > $output
+func() {
+  echo "" >$output
 
-    # for model in fambench_dlrm fambench_xlmr detectron2_maskrcnn vision_maskrcnn timm_efficientnet timm_vision_transformer hf_Bert hf_GPT2 hf_T5 
-    for model in fambench_dlrm fambench_xlmr detectron2_maskrcnn vision_maskrcnn timm_efficientnet timm_vision_transformer hf_Bert hf_GPT2 hf_T5  resnet50 timm_resnest resnext50_32x4d hf_BigBird hf_Bart soft_actor_critic alexnet timm_vovnet mobilenet_v3_large vgg16 shufflenet_v2_x1_0 pytorch_unet dlrm mnasnet1_0 resnet50_quantized_qat tts_angular hf_Reformer nvidia_deeprecommender mobilenet_v2_quantized_qat mobilenet_v2 LearningToPaint hf_Longformer opacus_cifar10 resnet18 timm_regnet dcgan maml BERT_pytorch Super_SloMo pytorch_struct pplbench_beanmachine drq pyhpc_isoneutral_mixing hf_Albert attention_is_all_you_need_pytorch moco Background_Matting pyhpc_turbulent_kinetic_energy maml_omniglot pyhpc_equation_of_state timm_nfnet demucs densenet121 pytorch_CycleGAN_and_pix2pix tacotron2 squeezenet1_1 fastNLP_Bert pytorch_stargan hf_DistilBert speech_transformer yolov3 timm_efficientdet
-    do 
+  # for model in fambench_dlrm fambench_xlmr detectron2_maskrcnn vision_maskrcnn timm_efficientnet timm_vision_transformer hf_Bert hf_GPT2 hf_T5
+  for model in fambench_dlrm fambench_xlmr detectron2_maskrcnn vision_maskrcnn timm_efficientnet timm_vision_transformer hf_Bert hf_GPT2 hf_T5 resnet50 timm_resnest resnext50_32x4d hf_BigBird hf_Bart soft_actor_critic alexnet timm_vovnet mobilenet_v3_large vgg16 shufflenet_v2_x1_0 pytorch_unet dlrm mnasnet1_0 resnet50_quantized_qat tts_angular hf_Reformer nvidia_deeprecommender mobilenet_v2_quantized_qat mobilenet_v2 LearningToPaint hf_Longformer opacus_cifar10 resnet18 timm_regnet dcgan maml BERT_pytorch Super_SloMo pytorch_struct pplbench_beanmachine drq pyhpc_isoneutral_mixing hf_Albert attention_is_all_you_need_pytorch moco Background_Matting pyhpc_turbulent_kinetic_energy maml_omniglot pyhpc_equation_of_state timm_nfnet demucs densenet121 pytorch_CycleGAN_and_pix2pix tacotron2 squeezenet1_1 fastNLP_Bert pytorch_stargan hf_DistilBert speech_transformer yolov3 timm_efficientdet; do
 
-# this part is for total overhead
+    # this part is for total overhead
     # echo "@Yueming Hao: start model tests"  >> $output
     # echo "@Yueming Hao: Run $model" >> $output
     # { time python run.py -d cuda -t train  $model  >> $output 2>&1 ; } 2>> $output
@@ -27,28 +26,27 @@ func(){
     # { time python run.py -d cuda -t train --flops dcgm $model  >> $output 2>&1 ; } 2>> $output
     # echo "@Yueming Hao: end model tests" >> $output
 
-
-# this part is for computation overhead 
-    echo "@Yueming Hao: start model tests"  >> $output
-    echo "@Yueming Hao: Run $model" >> $output
-    python run.py -d cuda -t train  $model  >> $output 2>&1 
-    python run.py -d cuda -t train $model  >> $output 2>&1 
-    python run.py -d cuda -t train $model  >> $output 2>&1 
-    python run.py -d cuda -t train $model  >> $output 2>&1 
-    python run.py -d cuda -t train $model  >> $output 2>&1 
-    echo "@Yueming Hao: Run $model with dcgm flops" >> $output 2>&1
-    python run.py -d cuda -t train --flops dcgm $model  >> $output 2>&1 
-    python run.py -d cuda -t train --flops dcgm $model  >> $output 2>&1 
-    python run.py -d cuda -t train --flops dcgm $model  >> $output 2>&1 
-    python run.py -d cuda -t train --flops dcgm $model  >> $output 2>&1 
-    python run.py -d cuda -t train --flops dcgm $model  >> $output 2>&1 
-    echo "@Yueming Hao: end model tests" >> $output
-    done
+    # this part is for computation overhead
+    echo "@Yueming Hao: start model tests" >>$output
+    echo "@Yueming Hao: Run $model" >>$output
+    python run.py -d cuda -t train $model >>$output 2>&1
+    python run.py -d cuda -t train $model >>$output 2>&1
+    python run.py -d cuda -t train $model >>$output 2>&1
+    python run.py -d cuda -t train $model >>$output 2>&1
+    python run.py -d cuda -t train $model >>$output 2>&1
+    echo "@Yueming Hao: Run $model with dcgm flops" >>$output 2>&1
+    python run.py -d cuda -t train --flops dcgm $model >>$output 2>&1
+    python run.py -d cuda -t train --flops dcgm $model >>$output 2>&1
+    python run.py -d cuda -t train --flops dcgm $model >>$output 2>&1
+    python run.py -d cuda -t train --flops dcgm $model >>$output 2>&1
+    python run.py -d cuda -t train --flops dcgm $model >>$output 2>&1
+    echo "@Yueming Hao: end model tests" >>$output
+  done
 
 }
 
-func 
+func
 
 # output=runlog_100ms_all.txt
 
-# func
+# func
@@ -1,37 +1,37 @@
 #!/bin/bash
 # This script is used to print out all guard check logs with TorchInductor
 SHELL_FOLDER=$(
-    cd "$(dirname "$0")"
-    pwd
+  cd "$(dirname "$0")"
+  pwd
 )
 source ${SHELL_FOLDER}/run_base.sh
 
 cd $tb_path
 
 profile_suffix=logs_profile_${mode}
 if [ $env1 ] && [ $env1 != "pt_sep14" ]; then
-    profile_suffix=logs_profile_${mode}_$env1_$(date +'%Y%m%d%H%M')
+  profile_suffix=logs_profile_${mode}_$env1_$(date +'%Y%m%d%H%M')
 fi
 
 enable_profile=${enable_profile:-0}
 
 enable_amp=${enable_amp:-0}
 if [ $enable_amp -eq 1 ]; then
-    amp_placeholder="--amp"
+  amp_placeholder="--amp"
 else
-    amp_placeholder=""
+  amp_placeholder=""
 fi
 
 max_iter=1
 func_torchinductor() {
-    if [ $enable_profile -eq 1 ]; then
-        profile_placeholder="--profile --profile-detailed --profile-devices cpu,cuda --profile-folder ${work_path}/${profile_suffix}/${model}/"
-    else
-        profile_placeholder=""
-    fi
-    for ((i = 1; i <= $max_iter; i++)); do
-        python run.py -d cuda $profile_placeholder $amp_placeholder -t $mode --metrics none $model --torchdynamo inductor >>$output 2>&1
-    done
+  if [ $enable_profile -eq 1 ]; then
+    profile_placeholder="--profile --profile-detailed --profile-devices cpu,cuda --profile-folder ${work_path}/${profile_suffix}/${model}/"
+  else
+    profile_placeholder=""
+  fi
+  for ((i = 1; i <= $max_iter; i++)); do
+    python run.py -d cuda $profile_placeholder $amp_placeholder -t $mode --metrics none $model --torchdynamo inductor >>$output 2>&1
+  done
 }
 
 source $cuda_env1
@@ -40,8 +40,8 @@ echo $(date) >>$output
 conda activate $env1
 # for model in $all_models
 for model in resnet18 resnet50; do
-    echo "@Yueming Hao origin $model" >>$output
-    func_torchinductor
+  echo "@Yueming Hao origin $model" >>$output
+  func_torchinductor
 done
 echo $(date) >>$output
 notify