# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # This workflow verifies that the basic install works across all supported platforms. # For basic install, all imports need to either be successful or appropriately guarded. name: Installation Test on: push: branches: - dev - main - "pull-request/[0-9]+" - "deploy-release/*" merge_group: types: [checks_requested] jobs: pre-flight: uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_cicd_preflight.yml@v0.65.5 pip-test-pytorch: needs: [pre-flight] if: | !(needs.pre-flight.outputs.docs_only == 'true' || needs.pre-flight.outputs.is_merge_group == 'true' || needs.pre-flight.outputs.is_deployment_workflow == 'true') runs-on: linux-amd64-cpu16 name: Pip - Python${{ matrix.python-version }} - AMD64/Linux - NGC PyTorch container: image: nvcr.io/nvidia/pytorch:25.05-py3 environment: nemo-ci strategy: fail-fast: false matrix: python-version: ["3.12"] steps: - name: Checkout repository uses: actions/checkout@v4 - name: Set PATH run: | echo "UV_PROJECT_ENVIRONMENT=/opt/venv" | tee -a "$GITHUB_ENV" echo "UV_LINK_MODE=copy" | tee -a "$GITHUB_ENV" echo "CUDA_HOME=/usr/local/cuda" | tee -a "$GITHUB_ENV" echo "LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH" | tee -a "$GITHUB_ENV" echo "PATH=$HOME/.local/bin:$PATH:$CUDA_HOME/bin" | tee -a "$GITHUB_ENV" echo "TORCH_CUDA_ARCH_LIST=6.0;6.1;7.0;7.5;8.0;8.6;9.0" | tee -a "$GITHUB_ENV" - name: Install megatron-core shell: bash -x -e -u -o pipefail {0} run: bash docker/common/install.sh --environment dev --base-image pytorch --python-version ${{ matrix.python-version }} - name: Checkout check-imports uses: actions/checkout@v4 with: repository: NVIDIA-NeMo/FW-CI-templates ref: v0.63.2 path: FW-CI-templates - name: Check imports for megatron-core uses: ./FW-CI-templates/.github/actions/check-imports with: package-name: megatron.core python-binary: ${{ env.UV_PROJECT_ENVIRONMENT }}/bin/python uv-test-pytorch: needs: [pre-flight] if: | !(needs.pre-flight.outputs.docs_only == 'true' || needs.pre-flight.outputs.is_merge_group == 'true' || needs.pre-flight.outputs.is_deployment_workflow == 'true') runs-on: linux-amd64-cpu16 name: UV - Python${{ matrix.python-version }} - AMD64/Linux - NGC PyTorch container: image: nvcr.io/nvidia/pytorch:25.05-py3 environment: nemo-ci strategy: fail-fast: false matrix: python-version: ["3.12"] steps: - name: Checkout repository uses: actions/checkout@v4 - name: Set PATH run: | echo "UV_PROJECT_ENVIRONMENT=/opt/venv" | tee -a "$GITHUB_ENV" echo "VIRTUAL_ENV=/opt/venv" | tee -a "$GITHUB_ENV" echo "UV_LINK_MODE=copy" | tee -a "$GITHUB_ENV" echo "CUDA_HOME=/usr/local/cuda" | tee -a "$GITHUB_ENV" echo "LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH" | tee -a "$GITHUB_ENV" echo "PATH=$HOME/.local/bin:$PATH:$CUDA_HOME/bin" | tee -a "$GITHUB_ENV" echo "CUDACXX=/usr/local/cuda/bin/nvcc" | tee -a "$GITHUB_ENV" echo "TORCH_CUDA_ARCH_LIST=6.0;6.1;7.0;7.5;8.0;8.6;9.0" | tee -a "$GITHUB_ENV" - name: Install project shell: bash run: bash docker/common/install.sh --environment dev --base-image pytorch --use-uv # NGC PyTorch 25.05 has a version of triton that is broken on CPU only machines. # - name: Checkout check-imports # uses: actions/checkout@v4 # with: # repository: NVIDIA-NeMo/FW-CI-templates # ref: v0.63.2 # path: FW-CI-templates # - name: Check imports for megatron-core # uses: ./FW-CI-templates/.github/actions/check-imports # with: # package-name: megatron.core # python-binary: ${{ env.UV_PROJECT_ENVIRONMENT }}/bin/python install-test-summary: needs: [pre-flight, pip-test-pytorch, uv-test-pytorch] runs-on: ubuntu-latest name: Install test summary if: | ( needs.pre-flight.outputs.docs_only == 'true' || needs.pre-flight.outputs.is_deployment_workflow == 'true' || always() ) && !cancelled() steps: - name: Get workflow result id: result shell: bash -x -e -u -o pipefail {0} env: GH_TOKEN: ${{ github.token }} RUN_ID: ${{ github.run_id }} SKIPPING_IS_ALLOWED: ${{ needs.pre-flight.outputs.docs_only == 'true' || needs.pre-flight.outputs.is_deployment_workflow == 'true' }} run: | FAILED_JOBS=$(gh run view $GITHUB_RUN_ID --json jobs --jq '[.jobs[] | select(.status == "completed" and .conclusion != "success")] | length') || echo 0 if [ "${FAILED_JOBS:-0}" -eq 0 ] || [ "$SKIPPING_IS_ALLOWED" == "true" ]; then echo "✅ All previous jobs completed successfully" exit 0 else echo "❌ Found $FAILED_JOBS failed job(s)" # Show which jobs failed gh run view $GITHUB_RUN_ID --json jobs --jq '.jobs[] | select(.status == "completed" and .conclusion != "success") | .name' exit 1 fi