diff --git a/.github/workflows/nv-torch110-p40.yml b/.github/workflows/nv-torch110-p40.yml deleted file mode 100644 index 31d7805db7bb..000000000000 --- a/.github/workflows/nv-torch110-p40.yml +++ /dev/null @@ -1,65 +0,0 @@ -name: nv-torch110-p40 - -on: - workflow_dispatch: - schedule: - - cron: "0 0 * * *" - -concurrency: - group: ${{ github.workflow }}-${{ github.ref }} - cancel-in-progress: true - -permissions: - contents: read - issues: write - -jobs: - unit-tests: - runs-on: [self-hosted, nvidia, cu111, p40] - - env: {ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true} # Allow using Node16 actions - - steps: - - uses: actions/checkout@v4 - - - id: setup-venv - uses: ./.github/workflows/setup-venv - - - name: Install pytorch - run: | - pip install -U --cache-dir $TORCH_CACHE torch==1.10.0+cu111 torchvision==0.11.0+cu111 -f https://download.pytorch.org/whl/torch_stable.html - python -c "import torch; print('torch:', torch.__version__, torch)" - python -c "import torch; print('CUDA available:', torch.cuda.is_available())" - - - name: Install transformers - run: | - git clone https://github.com/huggingface/transformers - cd transformers - # if needed switch to the last known good SHA until transformers@master is fixed - # git checkout 1cc453d33 - git rev-parse --short HEAD - pip install . - - - name: Install deepspeed - run: | - pip install .[dev,1bit,autotuning] --no-build-isolation - ds_report - - - name: Python environment - run: | - pip list - - - name: Unit tests - run: | - unset TORCH_CUDA_ARCH_LIST # only jit compile for current arch - cd tests - DS_ALLOW_DEPRECATED_FP16=1 pytest $PYTEST_OPTS --forked -n 4 unit/ --torch_ver="1.10" --cuda_ver="11.1" - - - name: Open GitHub issue if nightly CI fails - if: ${{ failure() && (github.event_name == 'schedule') }} - uses: JasonEtco/create-an-issue@v2 - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - with: - filename: .github/ISSUE_TEMPLATE/ci_failure_report.md - update_existing: true diff --git a/.github/workflows/nv-torch110-v100.yml b/.github/workflows/nv-torch110-v100.yml deleted file mode 100644 index bb1bc987379c..000000000000 --- a/.github/workflows/nv-torch110-v100.yml +++ /dev/null @@ -1,66 +0,0 @@ -name: nv-torch110-v100 - -on: - workflow_dispatch: - schedule: - - cron: "0 0 * * *" - -concurrency: - group: ${{ github.workflow }}-${{ github.ref }} - cancel-in-progress: true - -permissions: - contents: read - issues: write - -jobs: - unit-tests: - runs-on: [self-hosted, nvidia, cu111, v100] - - env: {ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true} # Allow using Node16 actions - - steps: - - uses: actions/checkout@v4 - - - id: setup-venv - uses: ./.github/workflows/setup-venv - - - name: Install pytorch - run: | - pip install -U --cache-dir $TORCH_CACHE torch==1.10.0+cu111 torchvision==0.11.0+cu111 -f https://download.pytorch.org/whl/torch_stable.html - python -c "import torch; print('torch:', torch.__version__, torch)" - python -c "import torch; print('CUDA available:', torch.cuda.is_available())" - - - name: Install transformers - run: | - git clone https://github.com/huggingface/transformers - cd transformers - # if needed switch to the last known good SHA until transformers@master is fixed - # git checkout 1cc453d33 - git rev-parse --short HEAD - pip install . - - - name: Install deepspeed - run: | - pip install .[dev,1bit,autotuning] --no-build-isolation - ds_report - - - name: Python environment - run: | - pip list - - - name: Unit tests - run: | - unset TORCH_CUDA_ARCH_LIST # only jit compile for current arch - cd tests - pytest $PYTEST_OPTS --forked -n 4 unit/ --torch_ver="1.10" --cuda_ver="11" - pytest $PYTEST_OPTS --forked -m 'sequential' unit/ --torch_ver="1.10" --cuda_ver="11" - - - name: Open GitHub issue if nightly CI fails - if: ${{ failure() && (github.event_name == 'schedule') }} - uses: JasonEtco/create-an-issue@v2 - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - with: - filename: .github/ISSUE_TEMPLATE/ci_failure_report.md - update_existing: true