Initial commit

2d2fca6c · jerrrrry · 2d2fca6c · 2d2fca6c · 2d2fca6c · 2d2fca6c
Commit 2d2fca6c authored Feb 12, 2026 by jerrrrry
20 changed files
--- a/Megatron-LM/.github/actions/action.yml
+++ b/Megatron-LM/.github/actions/action.yml
+# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+name: "Test Template"
+description: "Template for running NeMo tests in a containerized environment"
+inputs:
+  container-image:
+    description: "Container image to use for test"
+    required: true
+  timeout:
+    description: "Max runtime of test in minutes"
+    required: false
+    default: "30"
+  script:
+    description: "Test script to execute"
+    required: true
+  is-optional:
+    description: "Pass this job on failure."
+    required: false
+    default: "false"
+  is_unit_test:
+    description: "Upload coverage as unit test"
+    required: false
+    default: "false"
+  tag:
+    description: Latest or legacy test suite
+    required: true
+  test_case:
+    description: Test case to launch
+    required: true
+  model:
+    description: Model to launch
+    required: false
+  PAT:
+    description: "GitHub Personal Access Token"
+    required: true
+runs:
+  using: "composite"
+  steps:
+    - name: Checkout repository
+      uses: actions/checkout@v2
+    - name: Change ownership of /home/runner/
+      shell: bash
+      run: sudo chown -R $(whoami) /home/runner/
+    - name: Setup python
+      uses: actions/setup-python@v5
+      with:
+        python-version: 3.12
+    - name: Install uuidgen
+      shell: bash -x -e -u -o pipefail {0}
+      run: |
+        apt-get update
+        apt-get install -y uuid-runtime
+    - name: Create run-script (unit test)
+      shell: bash -x -e -u -o pipefail {0}
+      if: inputs.is_unit_test == 'true'
+      run: |
+        echo "::group::Create run-script"
+        cmd=$(cat <<'RUN_TEST_EOF'
+        #!/bin/bash
+        export PYTHONPATH=$(pwd)
+        export NEMORUN_HOME=$(pwd)
+        pip install --no-cache-dir uv
+        uv sync --only-group test 
+        uv run python tests/test_utils/python_scripts/launch_nemo_run_workload.py \
+          --scope unit-tests \
+          --model unit-tests \
+          --test-case "${{ inputs.test_case }}" \
+          --environment dev \
+          --platform dgx_h100 \
+          --tag ${{ inputs.tag }} \
+          --container-image ${{ inputs.container-image }}
+        RUN_TEST_EOF
+        )
+        echo "$cmd" | tee "job.sh"        
+        echo "::endgroup::"
+    - name: Get PR info
+      id: get-pr-info
+      if: startsWith(github.ref, 'refs/heads/pull-request/')
+      uses: nv-gha-runners/get-pr-info@main
+    - name: Install GH CLI
+      shell: bash -x -e -u -o pipefail {0}
+      run: |
+        apt-get update
+        apt-get install -y gh
+    - name: Has Run tests label
+      shell: bash -x -e -u -o pipefail {0}
+      id: has-run-tests-label
+      env:
+        GH_TOKEN: ${{ github.token }}
+      run: |
+        PR_NUMBER=${{ fromJSON(steps.get-pr-info.outputs.pr-info || '{}').number }}
+        HAS_RUN_TESTS_LABEL=$(gh pr view $PR_NUMBER --json labels | jq '[.labels[].name] | any(. == "Run tests")') || echo "false"
+        echo "main=$HAS_RUN_TESTS_LABEL" | tee -a $GITHUB_OUTPUT
+    - name: Create run-script (e2e test)
+      shell: bash -x -e -u -o pipefail {0}
+      if: inputs.is_unit_test == 'false'
+      env:
+        MODEL: ${{ inputs.model }}
+      run: |
+        echo "::group::Create run-script"
+        cmd=$(cat <<'RUN_TEST_EOF'
+        #!/bin/bash
+        set -euxo pipefail
+        export PYTHONPATH=$(pwd)
+        export NEMORUN_HOME=$(pwd)
+        pip install --no-cache-dir uv
+        uv sync --only-group test 
+        uv run python tests/test_utils/python_scripts/launch_nemo_run_workload.py \
+          --scope mr \
+          --model ${{ inputs.model }} \
+          --test-case ${{ inputs.test_case }} \
+          --environment dev \
+          --platform dgx_h100 \
+          --container-image ${{ inputs.container-image }} \
+          --data-dir /mnt/datadrive/TestData/megatron-lm/artifacts \
+          --enable-lightweight-mode
+        RUN_TEST_EOF
+        )
+        echo "$cmd" | tee "job.sh"        
+        echo "::endgroup::"
+    - name: Set timeout
+      shell: bash -x -e -u -o pipefail {0}
+      id: timeout_in_seconds
+      run: |
+        echo "::group::Set timeout"
+        echo "main=$(( ${{ inputs.timeout }} * 60 ))" | tee -a "$GITHUB_OUTPUT"
+        echo "::endgroup::"
+    - name: Pull container
+      shell: bash -x -e -u -o pipefail {0}
+      run: |
+        echo "::group::Pull container"
+        docker pull ${{ inputs.container-image }}
+        echo "::endgroup::"
+    - name: Run main script
+      shell: bash -x -e -u -o pipefail {0}
+      id: run-main-script
+      run: |
+        echo "::group::Run main script"
+        EXIT_CODE=0
+        /bin/bash job.sh || EXIT_CODE=$?
+        echo "exit_code=$EXIT_CODE" | tee -a "$GITHUB_OUTPUT"
+        exit $EXIT_CODE
+        echo "::endgroup::"
+    - name: Check result
+      id: check
+      shell: bash -x -e -u -o pipefail {0}
+      if: always()
+      env:
+        IS_UNIT_TEST: ${{ inputs.is_unit_test == 'true' }}
+      run: |
+        echo "::group::Check result"
+        logs_report=logs-${{ inputs.test_case }}-${{ github.run_id }}-$(uuidgen)
+        echo "logs_report=$logs_report" | sed 's/\//-/g' | sed 's/\*/-/g' | tee -a "$GITHUB_OUTPUT"
+        if [[ "$IS_UNIT_TEST" == "true" ]]; then
+          coverage_report=coverage-${{ inputs.is_unit_test == 'true' && 'unit-test' || 'e2e' }}-${{ github.run_id }}-$(uuidgen)
+        else
+          coverage_report=none
+        fi
+        echo "coverage_report=$coverage_report" | tee -a "$GITHUB_OUTPUT"
+        EXIT_CODE=${{ steps.run-main-script.outputs.exit_code }}
+        IS_SUCCESS=$([[ "$EXIT_CODE" -eq 0 ]] && echo "true" || echo "false")
+        if [[ "$IS_SUCCESS" == "false" && "${{ inputs.is-optional }}" == "true" ]]; then
+          echo "::warning:: Test failed, but displayed as successful because it is marked as optional."
+          IS_SUCCESS=true
+        fi
+        if [[ "$IS_SUCCESS" == "false" ]]; then
+          echo Test did not finish successfully.
+          exit 1
+        fi
+        if [[ "$coverage_report" != "none" ]]; then
+          uv run coverage report -i
+        fi
+        exit $EXIT_CODE
+        echo "::endgroup::"
+    - name: Upload coverage
+      uses: actions/upload-artifact@v4
+      if: ${{ always() && steps.check.outputs.coverage_report != 'none' }}
+      with:
+        name: ${{ steps.check.outputs.coverage_report }}
+        path: |
+          coverage.xml
+          .coverage
+        include-hidden-files: true
+    - name: Upload logs
+      uses: actions/upload-artifact@v4
+      if: always()
+      with:
+        name: ${{ steps.check.outputs.logs_report }}
+        path: ${{ inputs.is_unit_test == 'true' && 'logs' || 'assets_dir' }}
+        include-hidden-files: true
--- a/Megatron-LM/.github/copy-pr-bot.yaml
+++ b/Megatron-LM/.github/copy-pr-bot.yaml
+enabled: true
+auto_sync_draft: false
+auto_sync_ready: true
--- a/Megatron-LM/.github/pull_request_template.md
+++ b/Megatron-LM/.github/pull_request_template.md
+# What does this PR do ?
+<!-- Add a one line overview of what this PR aims to accomplish. -->
+:warning: For major changes (either in lines of code or in its impact), please make sure to first share discuss a design-doc with the team.  
+## Contribution process
+```mermaid
+flowchart LR
+    A[Pre-checks] --> B[PR Tests]
+    subgraph Code Review/Approval
+        C1[Expert Review] --> C2[Final Review]
+    end
+    B --> C1
+    C2 --> D[Merge]
+```
+### Pre-checks
+- [ ] I want this PR in a versioned release and have added the appropriate Milestone (e.g., `Core 0.8`)
+- [ ] I have added relevant unit tests
+- [ ] I have added relevant functional tests
+- [ ] I have added proper typing to my code [Typing guidelines](https://docs.python.org/3/library/typing.html)
+- [ ] I have added relevant documentation
+- [ ] I have run the [autoformatter.sh](https://github.com/NVIDIA/Megatron-LM/blob/main/tools/autoformat.sh) on my PR
+### Code review
+The following process is enforced via the CODEOWNERS file for changes into `megatron/core`. For changes outside of `megatron/core`, it is up to the PR author whether or not to tag the Final Reviewer team.
+<details>
+<summary>For MRs into `main` branch</summary>
+#### (Step 1): Add PR label `Expert Review`
+#### (Step 2): Collect the expert reviewers reviews
+1. Attach the `Expert Review` label when your PR is ready for review.
+2. GitHub auto-assigns expert reviewers based on your changes. They will get notified and pick up your PR soon.
+:warning: Only proceed to the next step once all reviewers have approved, merge-conflict are resolved and the CI is passing.  
+Final Review might get declined if these requirements are not fulfilled.
+#### (Step 3): Final Review
+1. Add `Final Review` label
+2. GitHub auto-assigns final reviewers based on your changes. They will get notified and pick up your PR soon.
+#### (Optional Step 4): Cherry-pick into release branch
+If this PR also needs to be merged into `core_r*` release branches, after this PR has been merged, select `Cherry-pick` to open a new PR into the release branch.
+</details>
+<details>
+<summary>For MRs into `dev` branch</summary>
+The proposed review process for `dev` branch is under active discussion.
+MRs are mergable after one approval by either `eharper@nvidia.com` or `zijiey@nvidia.com`.
+</details>
+### Merging your PR
+Any member of [core-adlr](https://github.com/orgs/teams/NVIDIA/core-adlr) and [`core-nemo`](https://github.com/orgs/teams/NVIDIA/core-nemo) will be able to merge your PR.
--- a/Megatron-LM/.github/workflows/_update_dependencies.yml
+++ b/Megatron-LM/.github/workflows/_update_dependencies.yml
+name: ~Update dependencies template
+on:
+  workflow_call:
+    inputs:
+      target-branch:
+        required: true
+        type: string
+        description: "The target branch to bump"
+    secrets:
+      PAT:
+        required: true
+      AZURE_CLIENT_ID:
+        required: true
+      AZURE_TENANT_ID:
+        required: true
+      AZURE_SUBSCRIPTION_ID:
+        required: true
+      SSH_KEY:
+        required: true
+      SSH_PWD:
+        required: true
+jobs:
+  pre-flight:
+    runs-on: ubuntu-latest
+    outputs:
+      bump-branch: bump-ci-container-${{ steps.ref.outputs.date }}-${{ inputs.target-branch }}
+      date: ${{ steps.ref.outputs.date }}
+    steps:
+      - name: Get date
+        id: ref
+        run: echo "date=$(date +%F)" | tee -a "$GITHUB_OUTPUT"
+  update-lockfile:
+    environment: nemo-ci
+    runs-on: linux-amd64-cpu16
+    needs: [pre-flight]
+    env:
+      SOURCE_BRANCH: ${{ needs.pre-flight.outputs.bump-branch }}
+      TARGET_BRANCH: ${{ inputs.target-branch }}
+    steps:
+      - name: Install Azure CLI
+        run: curl -sL https://aka.ms/InstallAzureCLIDeb | sudo bash
+      - name: Azure Login
+        uses: azure/login@v2
+        with:
+          client-id: ${{ secrets.AZURE_CLIENT_ID }}
+          tenant-id: ${{ secrets.AZURE_TENANT_ID }}
+          subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
+      - name: Azure ACR Login
+        run: az acr login --name nemoci
+      - name: Checkout repo
+        uses: actions/checkout@v4
+        with:
+          ref: ${{ env.TARGET_BRANCH }}
+      - name: Build container
+        env:
+          GH_TOKEN: ${{ secrets.PAT }}
+        run: |
+          docker build -f docker/Dockerfile.ci.dev --build-arg FROM_IMAGE_NAME="nvcr.io/nvidia/pytorch:25.06-py3" --target=main -t megatron-core .
+      - name: Create bump branch if not exists
+        run: |
+          if ! git ls-remote --exit-code origin $SOURCE_BRANCH; then
+            git checkout -b $SOURCE_BRANCH $TARGET_BRANCH
+            git push origin $SOURCE_BRANCH
+          fi
+      - name: Checkout repo
+        uses: actions/checkout@v4
+        with:
+          ref: ${{ env.SOURCE_BRANCH }}
+      - name: Upgrade lock file
+        env:
+          GH_TOKEN: ${{ secrets.PAT }}
+        run: |
+          docker run \
+          --rm \
+          -v $(pwd):/workspace \
+          -w /workspace \
+          -e GH_TOKEN=${{ secrets.PAT }} \
+          megatron-core \
+          bash -c 'uv lock --upgrade'
+      - name: Upload lock file
+        uses: actions/upload-artifact@v4
+        with:
+          name: lock-file-${{ env.SOURCE_BRANCH }}
+          path: uv.lock
+  create-pr:
+    needs: [update-lockfile, pre-flight]
+    runs-on: ubuntu-latest
+    environment: main
+    env:
+      SOURCE_BRANCH: ${{ needs.pre-flight.outputs.bump-branch }}
+      TARGET_BRANCH: ${{ inputs.target-branch }}
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+        with:
+          token: ${{ secrets.PAT }}
+          ref: ${{ env.TARGET_BRANCH }}
+      - name: Install GPG
+        run: sudo apt-get install -y gnupg2
+      - name: Import GPG key (for signing)
+        uses: crazy-max/ghaction-import-gpg@e89d40939c28e39f97cf32126055eeae86ba74ec
+        id: gpg-action
+        with:
+          gpg_private_key: ${{ secrets.SSH_KEY }}
+          passphrase: ${{ secrets.SSH_PWD }}
+          git_user_signingkey: true
+          git_commit_gpgsign: true
+      - name: Rebase against ${{ env.SOURCE_BRANCH }}
+        run: |
+          if git ls-remote --exit-code origin ${{ env.SOURCE_BRANCH }}; then
+            git fetch origin ${{ env.SOURCE_BRANCH }}
+            git rebase -S origin/${{ env.SOURCE_BRANCH }}
+          fi
+      - name: Download lock file
+        uses: actions/download-artifact@v4
+        with:
+          name: lock-file-${{ env.SOURCE_BRANCH }}
+      - name: Create Bump PR
+        uses: peter-evans/create-pull-request@v6
+        id: create-pull-request
+        env:
+          title: "chore(beep boop 🤖): Bump `uv.lock` (${{ inputs.target-branch}}) (${{ needs.pre-flight.outputs.date }})"
+        with:
+          branch: ${{ env.SOURCE_BRANCH }}
+          base: ${{ env.TARGET_BRANCH }}
+          title: ${{ env.title }}
+          token: ${{ secrets.PAT }}
+          body: |
+            🚀 PR to bump `uv.lock` in `${{ inputs.target-branch }}`.  
+            📝 Please remember the following to-do's before merge: 
+            - [ ] Verify the presubmit CI  
+            🙏 Please merge this PR only if the CI workflow completed successfully.
+          commit-message: ${{ env.title }}
+          signoff: true
+          committer: "${{ steps.gpg-action.outputs.name }} <${{ steps.gpg-action.outputs.email }}>"
--- a/Megatron-LM/.github/workflows/build-test-publish-wheel.yml
+++ b/Megatron-LM/.github/workflows/build-test-publish-wheel.yml
+# Copyright (c) 2019-2025, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+name: Build, test, and publish a PyPi wheel (to testpypi).
+on:
+  push:
+    branches:
+      - main
+      - "pull-request/[0-9]+"
+      - "deploy-release/*"
+  merge_group:
+    types: [checks_requested]
+defaults:
+  run:
+    shell: bash -x -e -u -o pipefail {0}
+permissions:
+  id-token: write
+  contents: read
+jobs:
+  pre-flight:
+    uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_cicd_preflight.yml@v0.65.5
+  build-test-publish-wheel:
+    needs: [pre-flight]
+    if: |
+      !(needs.pre-flight.outputs.docs_only == 'true'
+      || needs.pre-flight.outputs.is_merge_group == 'true'
+      || needs.pre-flight.outputs.is_deployment_workflow == 'true')
+    uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_build_test_publish_wheel.yml@v0.63.1
+    with:
+      dry-run: true
+      python-package: megatron.core
+      python-version: "3.10"
+      packaging: uv
+      no-publish: ${{ !(github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/heads/r')) }}
+      custom-container: nvcr.io/nvidia/pytorch:25.05-py3
+      no-build-isolation: true
+      runner: linux-amd64-cpu16
+    secrets:
+      TWINE_USERNAME: ${{ secrets.TWINE_USERNAME }}
+      TWINE_PASSWORD: ${{ secrets.TWINE_PASSWORD }}
+      SLACK_WEBHOOK: ${{ secrets.SLACK_RELEASE_ENDPOINT }}
+      SLACK_WEBHOOK_ADMIN: ${{ secrets.SLACK_WEBHOOK_ADMIN }}
+      GH_TOKEN: ${{ secrets.PAT }}
+  build-test-publish-wheel-summary:
+    needs: [pre-flight, build-test-publish-wheel]
+    if: |
+      (
+        needs.pre-flight.outputs.docs_only == 'true'
+        || needs.pre-flight.outputs.is_merge_group == 'true'
+        || needs.pre-flight.outputs.is_deployment_workflow == 'true'
+        || always()
+      )
+      && !cancelled()
+    runs-on: ubuntu-latest
+    steps:
+      - name: Result
+        run: |
+          FAILED_JOBS=$(gh run view $GITHUB_RUN_ID --json jobs --jq '[.jobs[] | select(.status == "completed" and .conclusion != "success")] | length') || echo 0
+          if [ "${FAILED_JOBS:-0}" -eq 0 ] || [ "$SKIPPING_IS_ALLOWED" == "true" ]; then
+              echo "✅ All previous jobs completed successfully"
+              exit 0
+          else
+              echo "❌ Found $FAILED_JOBS failed job(s)"
+              # Show which jobs failed
+              gh run view $GITHUB_RUN_ID --json jobs --jq '.jobs[] | select(.status == "completed" and .conclusion != "success") | .name'
+              exit 1
+          fi
--- a/Megatron-LM/.github/workflows/cherry-pick-release-commit.yml
+++ b/Megatron-LM/.github/workflows/cherry-pick-release-commit.yml
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+name: Create PR to main with cherry-pick from release
+on:
+  push:
+    branches:
+      - main
+jobs:
+  cherry-pick:
+    uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_cherry_pick.yml@v0.65.9
+    with:
+      target-branches-pattern: 'core_(*dev_)?r[0-9]+\.[0-9]+\.[0-9]+'
+    secrets:
+      PAT: ${{ secrets.PAT }}
+      SLACK_WEBHOOK_ADMIN: ${{ secrets.SLACK_WEBHOOK_ADMIN }}
+      SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
--- a/Megatron-LM/.github/workflows/cicd-approve-test-queue.yml
+++ b/Megatron-LM/.github/workflows/cicd-approve-test-queue.yml
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+name: Approve Test Queue
+on:
+  schedule:
+    - cron: "*/5 * * * *" # Runs every 5 minutes
+  workflow_dispatch: # Allows manual triggering
+jobs:
+  approve-queue:
+    runs-on: ubuntu-latest
+    environment: main
+    strategy:
+      matrix:
+        branch: [main, dev, others]
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install requests
+      - name: Approve waiting deployments
+        env:
+          GITHUB_TOKEN: ${{ secrets.PAT }}
+          MAX_CONCURRENCY: ${{ vars.MAX_CONCURRENCY || 1 }}
+          PYTHONUNBUFFERED: 1
+        shell: python
+        run: |
+          import os
+          import requests
+          import re
+          # GitHub API configuration
+          GITHUB_TOKEN = os.environ["GITHUB_TOKEN"]
+          REPO = os.environ["GITHUB_REPOSITORY"]
+          MAX_CONCURRENCY = int(os.environ["MAX_CONCURRENCY"]) // 2
+          API_BASE = f"https://api.github.com/repos/NVIDIA/Megatron-LM"
+          # Headers for GitHub API
+          headers = {
+              "Authorization": f"token {GITHUB_TOKEN}",
+              "Accept": "application/vnd.github.v3+json",
+              "X-GitHub-Api-Version": "2022-11-28",
+          }
+          def make_request(endpoint, method="GET", data=None):
+              """Make a request to the GitHub API with error handling."""
+              url = f"{API_BASE}/{endpoint}"
+              try:
+                  if method == "GET":
+                      response = requests.get(url, headers=headers)
+                  else:
+                      response = requests.post(url, headers=headers, json=data)
+                  response.raise_for_status()
+                  return response.json()
+              except requests.exceptions.RequestException as e:
+                  print(f"Error making request to {endpoint}: {str(e)}")
+                  if hasattr(e.response, 'text'):
+                      print(f"Response: {e.response.text}")
+                  return None
+          def is_pr_targeting_branch(workflow_run, target_branch):
+              """
+              Check if a workflow run belongs to a PR targeting the given branch.
+              Extract PR number from head branch like 'pull-request/1913' and verify base branch.
+              """
+              print(workflow_run.get("head_branch", ""))
+              head_branch = workflow_run.get("head_branch", "")
+              match = re.match(r"pull-request/(\d+)", head_branch)
+              if not match:
+                  return False  # Not a PR branch pattern
+              pr_number = int(match.group(1))
+              # Fetch PR info from GitHub API
+              pr_info = make_request(f"pulls/{pr_number}")
+              if not pr_info:
+                  print(f"Failed to fetch PR #{pr_number}")
+                  return False
+              base_branch = pr_info.get("base", {}).get("ref")
+              if (
+                (base_branch == target_branch) or 
+                (base_branch != "main" and base_branch != "dev" and target_branch == "others")
+              ):
+                  print(f"PR #{pr_number} targets {target_branch}")
+                  return True
+              return False
+          # Get current running and queued workflows
+          print("Fetching workflow runs...")
+          queued_workflow_runs = make_request("actions/runs?status=queued").get("workflow_runs", [])
+          in_progress_workflow_runs = make_request("actions/runs?status=in_progress").get("workflow_runs", [])
+          # Filter for workflows belonging to PRs targeting ${{ matrix.branch }}
+          queued_workflow_runs = [run for run in queued_workflow_runs 
+                                  if run["name"] == "CICD Megatron-LM" and is_pr_targeting_branch(run, "${{ matrix.branch }}")]
+          in_progress_workflow_runs = [run for run in in_progress_workflow_runs 
+                                      if run["name"] == "CICD Megatron-LM" and is_pr_targeting_branch(run, "${{ matrix.branch }}")]
+          # Count running and queued workflows
+          queued_workflows = len(queued_workflow_runs)
+          in_progress_workflows = len(in_progress_workflow_runs)
+          total_workflows = queued_workflows + in_progress_workflows
+          print(f"Current queued workflows (PRs targeting ${{ matrix.branch }}): {queued_workflows}")
+          print(f"Current running workflows (PRs targeting ${{ matrix.branch }}): {in_progress_workflows}")
+          print(f"Total workflows: {total_workflows}")
+          print(f"Max concurrency: {MAX_CONCURRENCY}")
+          if total_workflows >= MAX_CONCURRENCY:
+              print("Maximum concurrency reached, no new approvals will be made")
+              exit(0)
+          # Get waiting CI workflows for test environment
+          print("Fetching deployments...")
+          pending_workflows = make_request("actions/runs?status=waiting").get("workflow_runs", [])
+          print("Pending workflows:", len(pending_workflows))
+          pending_workflows = [run for run in pending_workflows 
+                              if run["name"] == "CICD Megatron-LM" and is_pr_targeting_branch(run, "${{ matrix.branch }}")]
+          # Sort deployments by creation date (oldest first)
+          print("Sorting workflows...")
+          pending_workflows = sorted(pending_workflows, key=lambda x: x["created_at"])
+          # Process each deployment
+          print(f"Processing {len(pending_workflows)} pending workflows...")
+          for workflow in pending_workflows:
+              if total_workflows >= MAX_CONCURRENCY:
+                  print("Maximum concurrency reached, stopping approvals")
+                  break
+              workflow_id = workflow["id"]
+              workflow_name = workflow["display_title"]
+              print(f"Approving workflow {workflow_name} with Run Id: {workflow_id}")
+              deployment_url = f"actions/runs/{workflow_id}/pending_deployments"
+              deployment = make_request(deployment_url)[0]
+              environment_id = deployment["environment"]["id"]
+              # Approve the deployment
+              status_data = {
+                  "environment_ids": [environment_id],
+                  "state": "approved",
+                  "comment": "Automatically approved by queue manager"
+              }
+              result = make_request(deployment_url, method="POST", data=status_data)
+              if result:
+                  total_workflows += 1
+              else:
+                  print(f"Failed to approve deployment {deployment['id']}")
+                  exit(1)
+  notify:
+    if: failure()
+    runs-on: ubuntu-latest
+    needs: [approve-queue]
+    steps:
+      - name: Notify
+        env:
+          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
+          SLACK_WEBHOOK_ADMIN: <!subteam^${{ secrets.SLACK_WEBHOOK_ADMIN }}>
+          GITHUB_RUN_ID: ${{ github.run_id }}
+          GITHUB_REPOSITORY: ${{ github.repository }}
+        run: |
+          curl -X POST \
+            -H 'Content-type: application/json' \
+            --data "{\"text\":\":robot_joy: <https://github.com/${GITHUB_REPOSITORY}/actions/runs/${GITHUB_RUN_ID}|Test-queue-approval-bot workflow> failed. Please review manually.\n\ncc ${SLACK_WEBHOOK_ADMIN}\"}" \
+            $SLACK_WEBHOOK
--- a/Megatron-LM/.github/workflows/cicd-main.yml
+++ b/Megatron-LM/.github/workflows/cicd-main.yml
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+name: CICD Megatron-LM
+on:
+  schedule:
+    - cron: 0 0 * * *
+  push:
+    branches:
+      - dev
+      - main
+      - "pull-request/[0-9]+"
+      - "deploy-release/*"
+  merge_group:
+    types: [checks_requested]
+  workflow_dispatch:
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}-${{ github.event.label.name || 'main' }}-${{ github.event_name }}
+  cancel-in-progress: true
+permissions:
+  id-token: write
+  contents: read
+env:
+  container-registry: 766267172432.dkr.ecr.us-east-1.amazonaws.com
+jobs:
+  is-not-external-contributor:
+    runs-on: ubuntu-latest
+    environment: nemo-ci
+    outputs:
+      is_external_contributor: ${{ github.event.pull_request.user.type == 'User' }}
+    permissions:
+      issues: write
+      pull-requests: write
+    env:
+      GITHUB_TOKEN: ${{ secrets.PAT }}
+      REPO: ${{ github.repository }}
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+        with:
+          token: ${{ env.GITHUB_TOKEN }}
+      - name: Get PR info
+        id: get-pr-info
+        if: startsWith(github.ref, 'refs/heads/pull-request/')
+        uses: nv-gha-runners/get-pr-info@main
+      - name: Check membership
+        id: check-membership
+        env:
+          IS_MAIN_BRANCH: ${{ github.ref == 'refs/heads/main' }}
+          IS_MERGE_GROUP: ${{ github.event_name == 'merge_group' }}
+          SCHEDULED_JOB: ${{ github.event_name == 'schedule' }}
+        run: |
+          PR_AUTHOR=${{ fromJSON(steps.get-pr-info.outputs.pr-info || '{}').user.login }}
+          if [ "${{ env.SCHEDULED_JOB }}" == "true" ] || [ "${IS_MAIN_BRANCH}" == "true" ] || [ "${IS_MERGE_GROUP}" == "true" ]; then
+            echo "is_maintainer=true" | tee -a $GITHUB_OUTPUT
+            exit 0
+          fi
+          echo "Checking if $PR_AUTHOR is a repo collaborator..."
+          API_URL="https://api.github.com/repos/$REPO/collaborators/$PR_AUTHOR"
+          REPO_MEMBERSHIP_RESPONSE=$(curl -s -o /dev/null -w "%{http_code}" -L \
+            -H "Accept: application/vnd.github+json" \
+            -H "Authorization: Bearer $GITHUB_TOKEN" \
+            -H "X-GitHub-Api-Version: 2022-11-28" \
+            $API_URL)
+          echo "Checking if $PR_AUTHOR is an org collaborator to NVIDIA-NeMo..."
+          API_URL="https://api.github.com/orgs/NVIDIA-NeMo/members/$PR_AUTHOR"
+          ORG_NVIDIA_NEMO_MEMBERSHIP_RESPONSE=$(curl -s -o /dev/null -w "%{http_code}" -L \
+            -H "Accept: application/vnd.github+json" \
+            -H "Authorization: Bearer $GITHUB_TOKEN" \
+            -H "X-GitHub-Api-Version: 2022-11-28" \
+            $API_URL)
+          echo "Checking if $PR_AUTHOR is an org collaborator to NVIDIA..."
+          API_URL="https://api.github.com/orgs/NVIDIA/members/$PR_AUTHOR"
+          ORG_NVIDIA_MEMBERSHIP_RESPONSE=$(curl -s -o /dev/null -w "%{http_code}" -L \
+            -H "Accept: application/vnd.github+json" \
+            -H "Authorization: Bearer $GITHUB_TOKEN" \
+            -H "X-GitHub-Api-Version: 2022-11-28" \
+            $API_URL)
+          if [ "$REPO_MEMBERSHIP_RESPONSE" -eq 204 ] || [ "$ORG_NVIDIA_NEMO_MEMBERSHIP_RESPONSE" -eq 204 ] || [ "$ORG_NVIDIA_MEMBERSHIP_RESPONSE" -eq 204 ]; then
+            echo "is_maintainer=true" | tee -a $GITHUB_OUTPUT
+          else
+            echo "is_maintainer=false" | tee -a $GITHUB_OUTPUT
+          fi
+      - name: Find Comment
+        uses: peter-evans/find-comment@v4
+        if: startsWith(github.ref, 'refs/heads/pull-request/')
+        id: fc
+        with:
+          issue-number: ${{ fromJSON(steps.get-pr-info.outputs.pr-info || '{}').number }}
+          repository: ${{ github.repository }}
+          body-includes: "<!--external-contributor-comment-->"
+      - name: Delete comment
+        uses: actions/github-script@v7
+        if: startsWith(github.ref, 'refs/heads/pull-request/') && steps.fc.outputs.comment-id != ''
+        with:
+          github-token: ${{ secrets.GITHUB_TOKEN }}
+          script: |
+            await github.rest.issues.deleteComment({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              comment_id: ${{ steps.fc.outputs.comment-id }}
+            })
+      - name: Write pull request comment
+        if: startsWith(github.ref, 'refs/heads/pull-request/') && steps.check-membership.outputs.is_maintainer == 'false'
+        uses: peter-evans/create-or-update-comment@v5
+        with:
+          issue-number: ${{ fromJSON(steps.get-pr-info.outputs.pr-info || '{}').number }}
+          repository: ${{ github.repository }}
+          body: |
+            <!--external-contributor-comment-->
+            Thank you for your contribution!
+            NVIDIA Megatron-LM is currently transitioning to development on Github. We will aim to review your PR after we complete our transition and stabilize our Github development process.
+            Thank you for your understanding.
+      - name: exit
+        run: |
+          if [ "${{ steps.check-membership.outputs.is_maintainer }}" == "true" ]; then
+            exit 0
+          else
+            exit 1
+          fi
+  pre-flight:
+    needs: [is-not-external-contributor]
+    uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_cicd_preflight.yml@v0.65.10
+  linting:
+    runs-on: ubuntu-latest
+    needs: [pre-flight]
+    if: |
+      (
+        needs.pre-flight.outputs.is_deployment_workflow == 'false' 
+          && needs.pre-flight.outputs.is_ci_workload == 'true'
+      ) || (
+        needs.pre-flight.outputs.is_deployment_workflow == 'false' 
+          && needs.pre-flight.outputs.is_ci_workload == 'false'
+          && needs.pre-flight.outputs.docs_only == 'false'
+      )
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+      - name: Install uv
+        uses: astral-sh/setup-uv@v1
+        with:
+          version: 0.7.2
+      - name: Install linting tools
+        run: |
+          uv sync --locked --only-group linting
+      - name: Get PR info
+        id: get-pr-info
+        if: startsWith(github.ref, 'refs/heads/pull-request/')
+        uses: nv-gha-runners/get-pr-info@main
+      - name: Run linting
+        if: startsWith(github.ref, 'refs/heads/pull-request/')
+        run: |
+          export PATH=".venv/bin:$PATH"
+          export GITLAB_ENDPOINT=github.com
+          export CI_PROJECT_NAMESPACE=NVIDIA
+          export BASE_REF="${{ fromJSON(steps.get-pr-info.outputs.pr-info).base.ref }}"
+          export CHECK_ONLY=true 
+          export SKIP_DOCS=false 
+          bash tools/autoformat.sh
+  cicd-wait-in-queue:
+    runs-on: ubuntu-latest
+    needs: [pre-flight, linting]
+    environment: ${{ needs.pre-flight.outputs.is_merge_group == 'true' && 'merge-gate' || 'test' }}
+    if: |
+      !(needs.pre-flight.outputs.is_ci_workload == 'true'
+      || needs.pre-flight.outputs.is_deployment_workflow == 'true'
+      || needs.pre-flight.outputs.docs_only == 'true')
+    steps:
+      - name: Running CI tests
+        run: |
+          echo "Running CI tests"
+          echo "is_merge_group: ${{ needs.pre-flight.outputs.is_merge_group }}"
+  cicd-container-build:
+    needs: [pre-flight, cicd-wait-in-queue]
+    runs-on: nvidia-ci-aws-gpu-x8
+    environment: nemo-ci
+    if: |
+      (
+        success() 
+        || needs.pre-flight.outputs.is_ci_workload == 'true'
+        || needs.pre-flight.outputs.force_run_all == 'true'
+      )
+      && needs.pre-flight.outputs.is_merge_group == 'false'
+      && !cancelled()
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+      - name: Setup python
+        uses: actions/setup-python@v5
+        with:
+          python-version: 3.12
+      - name: Get PR info
+        id: get-pr-info
+        if: startsWith(github.ref, 'refs/heads/pull-request/')
+        uses: nv-gha-runners/get-pr-info@main
+      - name: Download test data
+        shell: bash
+        env:
+          GH_TOKEN: ${{ secrets.PAT }}
+        run: |
+          echo "::group::Download test data"
+          pip install --no-cache-dir pygithub click
+          python tests/test_utils/python_scripts/download_unit_tests_dataset.py --assets-dir ./assets
+          echo "::endgroup::"
+      - name: Install GH CLI
+        shell: bash
+        run: |
+          apt-get update
+          apt-get install -y gh
+      - name: Pull cache
+        run: |
+          docker pull ${{ env.container-registry }}/megatron-lm:main || true
+          docker pull ${{ env.container-registry }}/megatron-lm:${{ fromJSON(steps.get-pr-info.outputs.pr-info || '{}').number }} || true
+      - name: Get last merged PR
+        id: cache_from
+        env:
+          GH_TOKEN: ${{ github.token }}
+        run: |
+          LAST_PRS=$(gh api graphql -f query='
+            query {
+              repository(owner: "NVIDIA", name: "Megatron-LM") {
+                pullRequests(states: MERGED, first: 100, orderBy: {field: UPDATED_AT, direction: DESC}) {
+                  nodes {
+                    number
+                  }
+                }
+              }
+            }' | jq -r '.data.repository.pullRequests.nodes[].number' | while read -r number; do
+              echo "${{ env.container-registry }}/megatron-lm:$number"
+            done)
+          echo "LAST_PRS<<EOF" | tee -a $GITHUB_OUTPUT
+          echo "$LAST_PRS" | tee -a $GITHUB_OUTPUT
+          echo "EOF" | tee -a $GITHUB_OUTPUT
+      - name: Build and push
+        uses: docker/build-push-action@v5
+        with:
+          file: ./docker/Dockerfile.ci.dev
+          push: true
+          context: .
+          target: main
+          build-args: |
+            FROM_IMAGE_NAME=nvcr.io/nvidia/pytorch:25.09-py3
+          cache-from: |
+            ${{ env.container-registry }}/megatron-lm:${{ fromJSON(steps.get-pr-info.outputs.pr-info || '{}').number || 0 }}
+            ${{ env.container-registry }}/megatron-lm:main
+            ${{ steps.cache_from.outputs.LAST_PRS }}
+          no-cache: false
+          tags: |
+            ${{ env.container-registry }}/megatron-lm:${{ fromJSON(steps.get-pr-info.outputs.pr-info || '{}').number || 0 }}
+            ${{ env.container-registry }}/megatron-lm:${{ github.sha }}
+          secrets: |
+            GH_TOKEN=${{ secrets.PAT }}
+  cicd-parse-unit-tests:
+    runs-on: ubuntu-latest
+    outputs:
+      unit-tests: ${{ steps.parse-unit-tests.outputs.unit-tests }}
+    needs:
+      - pre-flight
+      - cicd-wait-in-queue
+      - cicd-container-build
+    if: |
+      (
+        success() 
+        || needs.pre-flight.outputs.is_ci_workload == 'true'
+        || needs.pre-flight.outputs.force_run_all == 'true'
+      )
+      && needs.pre-flight.outputs.is_merge_group == 'false'
+      && !cancelled()
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+      - name: Parse unit tests
+        id: parse-unit-tests
+        run: |
+          cat tests/test_utils/recipes/unit-tests.yaml | yq -o json '[.products[].test_case[] | { "bucket": .}] | sort_by(.model, .test_case)' | jq -c > unit-tests.json
+          echo "unit-tests=$(cat unit-tests.json)" | tee -a $GITHUB_OUTPUT
+  cicd-unit-tests-latest:
+    strategy:
+      fail-fast: false
+      matrix:
+        include: ${{ fromJson(needs.cicd-parse-unit-tests.outputs.unit-tests) }}
+    needs:
+      - pre-flight
+      - cicd-wait-in-queue
+      - cicd-container-build
+      - cicd-parse-unit-tests
+    runs-on: nvidia-ci-aws-gpu-x8
+    name: "${{ matrix.bucket }} - latest"
+    environment: nemo-ci
+    if: |
+      (
+        success() 
+        || needs.pre-flight.outputs.is_ci_workload == 'true'
+        || needs.pre-flight.outputs.force_run_all == 'true'
+      )
+      && needs.pre-flight.outputs.is_merge_group == 'false'
+      && !cancelled()
+    env:
+      PIP_DISABLE_PIP_VERSION_CHECK: 1
+      PIP_NO_PYTHON_VERSION_WARNING: 1
+      PIP_ROOT_USER_ACTION: ignore
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+      - name: main
+        uses: ./.github/actions
+        with:
+          test_case: ${{ matrix.bucket }}
+          tag: latest
+          timeout: ${{ matrix.timeout || 30 }}
+          is_unit_test: "true"
+          PAT: ${{ secrets.PAT }}
+          container-image: ${{ env.container-registry }}/megatron-lm:${{ github.sha }}
+  cicd-parse-integration-tests:
+    runs-on: ubuntu-latest
+    needs:
+      - pre-flight
+      - cicd-wait-in-queue
+      - cicd-container-build
+      - cicd-unit-tests-latest
+    if: |
+      (
+        success() 
+        || needs.pre-flight.outputs.is_ci_workload == 'true'
+        || needs.pre-flight.outputs.force_run_all == 'true'
+      )
+      && needs.pre-flight.outputs.is_merge_group == 'false'
+      && !cancelled()
+    outputs:
+      integration-tests: ${{ steps.main.outputs.integration-tests }}
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+      - name: Get PR info
+        id: get-pr-info
+        if: startsWith(github.ref, 'refs/heads/pull-request/')
+        uses: nv-gha-runners/get-pr-info@main
+      - name: Has Run tests label
+        id: has-run-tests-label
+        env:
+          GH_TOKEN: ${{ secrets.PAT }}
+        run: |
+          PR_NUMBER=${{ fromJSON(steps.get-pr-info.outputs.pr-info || '{}').number }}
+          HAS_RUN_TESTS_LABEL=$(gh pr view $PR_NUMBER --json labels | jq '[.labels[].name] | any(. == "Run tests")') || echo "false"
+          echo "main=$HAS_RUN_TESTS_LABEL" | tee -a $GITHUB_OUTPUT
+      - name: Parse functional tests
+        id: main
+        env:
+          HAS_RUN_TESTS_LABEL: ${{ steps.has-run-tests-label.outputs.HAS_RUN_TESTS_LABEL }}
+        run: |
+          export PYTHONPATH=$(pwd)
+          if [ "$HAS_RUN_TESTS_LABEL" == "true" ]; then
+            ARGS=(
+              --scope mr
+              --enable-lightweight-mode
+            )
+          else
+            ARGS=(
+              --scope mr-slim
+            )
+          fi
+          python tests/test_utils/python_scripts/generate_jet_trigger_job.py \
+            --n-repeat 5 \
+            --time-limit 2700 \
+            --test-cases all \
+            --container-image mcore_ci_dev \
+            --container-tag latest \
+            --dependent-job functional:configure \
+            --record-checkpoints false \
+            --slurm-account gh \
+            --no-enable-warmup \
+            --environment dev \
+            --platform dgx_h100 \
+            --cluster ghci \
+            ${ARGS[@]} \
+            --output-path integration-tests.yaml
+          cat integration-tests.yaml | \
+            yq -o json 'del(.default, .stages, .workflow) | to_entries | map({"model": .value.stage, "test_case": .key}) | sort_by(.model, .test_case)' | jq -c  > integration-tests.json
+          echo "integration-tests=$(cat integration-tests.json)" | tee -a "$GITHUB_OUTPUT"
+  cicd-integration-tests-latest:
+    strategy:
+      fail-fast: false
+      matrix:
+        include: ${{ fromJson(needs.cicd-parse-integration-tests.outputs.integration-tests) }}
+    needs:
+      - pre-flight
+      - cicd-wait-in-queue
+      - cicd-parse-integration-tests
+      - cicd-unit-tests-latest
+    runs-on: nvidia-ci-aws-gpu-x8
+    name: "${{ matrix.model }}/${{ matrix.test_case }} - latest"
+    environment: nemo-ci
+    env:
+      PIP_DISABLE_PIP_VERSION_CHECK: 1
+      PIP_NO_PYTHON_VERSION_WARNING: 1
+      PIP_ROOT_USER_ACTION: ignore
+    if: |
+      (
+        success() 
+        || needs.pre-flight.outputs.is_ci_workload == 'true'
+        || needs.pre-flight.outputs.force_run_all == 'true'
+      )
+      && needs.pre-flight.outputs.is_merge_group == 'false'
+      && !cancelled()
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+      - name: main
+        uses: ./.github/actions
+        with:
+          test_case: ${{ matrix.test_case }}
+          model: ${{ matrix.model }}
+          tag: latest
+          timeout: ${{ matrix.timeout || 30 }}
+          is_unit_test: "false"
+          PAT: ${{ secrets.PAT }}
+          container-image: ${{ env.container-registry }}/megatron-lm:${{ github.sha }}
+  Nemo_CICD_Test:
+    needs:
+      - pre-flight
+      - cicd-unit-tests-latest
+      - cicd-integration-tests-latest
+    if: |
+      (
+        needs.pre-flight.outputs.docs_only == 'true'
+        || needs.pre-flight.outputs.is_deployment_workflow == 'true'
+        || needs.pre-flight.outputs.is_ci_workload == 'true'
+        || needs.pre-flight.outputs.is_merge_group == 'true'
+        || always()
+      )
+      && !cancelled()
+    runs-on: ubuntu-latest
+    permissions: write-all
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+      - name: Get workflow result
+        id: result
+        shell: bash -x -e -u -o pipefail {0}
+        env:
+          GH_TOKEN: ${{ github.token }}
+          RUN_ID: ${{ github.run_id }}
+          SKIPPING_IS_ALLOWED: ${{ needs.pre-flight.outputs.docs_only == 'true' || needs.pre-flight.outputs.is_deployment_workflow == 'true' || needs.pre-flight.outputs.is_merge_group == 'true' || needs.pre-flight.outputs.is_ci_workload == 'true' }}
+        run: |
+          FAILED_JOBS=$(gh run view $GITHUB_RUN_ID --json jobs --jq '[.jobs[] | select(.status == "completed" and .conclusion == "failure")] | length') || echo 0
+          SKIPPED_JOBS=$(gh run view $GITHUB_RUN_ID --json jobs --jq '[.jobs[] | select(.status == "completed" and .conclusion == "skipped")] | length') || echo 0
+          if [ "${FAILED_JOBS:-0}" -eq 0 ] && ([ "${SKIPPED_JOBS:-0}" -eq 0 ] || [ "$SKIPPING_IS_ALLOWED" == "true" ]); then
+              echo "✅ All previous jobs completed successfully"
+              exit 0
+          else
+              echo "❌ Found $FAILED_JOBS failed job(s)"
+              # Show which jobs failed
+              gh run view $GITHUB_RUN_ID --json jobs --jq '.jobs[] | select(.status == "completed" and .conclusion == "failure") | .name'
+              exit 1
+          fi
+  Coverage_Fake:
+    runs-on: ubuntu-latest
+    needs: [Nemo_CICD_Test, pre-flight]
+    if: |
+      (
+        needs.pre-flight.outputs.docs_only == 'true'
+        || needs.pre-flight.outputs.is_deployment_workflow == 'true'
+        || github.event == 'merge_group'
+      )
+      && needs.pre-flight.outputs.is_ci_workload == 'false'
+      && !cancelled()
+    environment: nemo-ci
+    steps:
+      - name: Generate fake coverage report
+        uses: actions/github-script@v6
+        with:
+          github-token: ${{ secrets.PAT }}
+          script: |
+            await github.rest.repos.createCommitStatus({
+              owner: context.repo.owner,
+              repo: context.repo.repo, 
+              sha: context.sha,
+              state: 'success',
+              description: 'No code changes - coverage check skipped',
+              context: 'codecov/patch'
+            });
+  Coverage:
+    runs-on: ubuntu-latest
+    needs: [Nemo_CICD_Test]
+    if: |
+      (
+        (needs.pre-flight.outputs.is_ci_workload == 'true' && !failure())
+        || success()
+      )
+      && !cancelled()
+    strategy:
+      matrix:
+        flag: [unit-test]
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+      - name: Download coverage reports of current branch
+        uses: actions/download-artifact@v4
+        with:
+          pattern: coverage-${{ matrix.flag }}-*
+      - name: List coverage files
+        run: find . -type f -name "*.xml" -o -name "*.lcov"
+      - name: Get total coverage of current branch
+        shell: bash -x -e -u -o pipefail {0}
+        if: always()
+        run: |
+          pip install coverage
+          ls -al .
+          ls -al coverage-*/
+          coverage combine --keep $(ls coverage-*/.coverage)
+          coverage report -i
+          rm -rf coverage-*
+          ls -al
+      - name: Upload coverage reports to Codecov
+        uses: codecov/codecov-action@v5
+        with:
+          token: ${{ secrets.CODECOV_TOKEN }}
+          verbose: true
+          flags: ${{ matrix.flag }}
+      - name: Upload artifacts
+        uses: actions/upload-artifact@v4
+        with:
+          name: coverage-${{ matrix.flag }}-aggregated
+          path: |
+            .coverage
+          include-hidden-files: true
--- a/Megatron-LM/.github/workflows/close-inactive-issue-pr.yml
+++ b/Megatron-LM/.github/workflows/close-inactive-issue-pr.yml
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+name: Stale-Close-Inactive-Issues-PRs
+on:
+  schedule:
+    - cron: "30 1 * * *"
+jobs:
+  close-issues:
+    uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_close_inactive_issue_pr.yml@v0.44.0
--- a/Megatron-LM/.github/workflows/community-bot.yml
+++ b/Megatron-LM/.github/workflows/community-bot.yml
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+name: Community Bot
+on:
+  issues:
+    types: [opened, edited, reopened, closed, deleted]
+  issue_comment:
+    types: [created, edited, deleted]
+jobs:
+  community-bot:
+    uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_community_bot.yml@v0.65.10
+    secrets:
+      GH_TOKEN: ${{ secrets.PAT }}
+      environment: main
--- a/Megatron-LM/.github/workflows/copyright-check.yml
+++ b/Megatron-LM/.github/workflows/copyright-check.yml
+# Copyright (c) 2025, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+name: Copyright check
+on:
+  push:
+    branches:
+      - dev
+      - main
+      - "pull-request/[0-9]+"
+      - "deploy-release/*"
+jobs:
+  pre-flight:
+    uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_cicd_preflight.yml@v0.64.2
+  copyright-check:
+    needs: [pre-flight]
+    if: |
+      !(needs.pre-flight.outputs.docs_only == 'true'
+      || needs.pre-flight.outputs.is_deployment_workflow == 'true')
+    uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_copyright_check.yml@v0.65.11
+  copyright-check-summary:
+    needs: [pre-flight, copyright-check]
+    if: |
+      (
+        needs.pre-flight.outputs.docs_only == 'true'
+        || needs.pre-flight.outputs.is_deployment_workflow == 'true'
+        || always()
+      )
+      && !cancelled()
+    runs-on: ubuntu-latest
+    steps:
+      - name: Result
+        run: |
+          FAILED_JOBS=$(gh run view $GITHUB_RUN_ID --json jobs --jq '[.jobs[] | select(.status == "completed" and .conclusion != "success")] | length') || echo 0
+          if [ "${FAILED_JOBS:-0}" -eq 0 ] || [ "$SKIPPING_IS_ALLOWED" == "true" ]; then
+              echo "✅ All previous jobs completed successfully"
+              exit 0
+          else
+              echo "❌ Found $FAILED_JOBS failed job(s)"
+              # Show which jobs failed
+              gh run view $GITHUB_RUN_ID --json jobs --jq '.jobs[] | select(.status == "completed" and .conclusion != "success") | .name'
+              exit 1
+          fi
--- a/Megatron-LM/.github/workflows/dependabot.yml
+++ b/Megatron-LM/.github/workflows/dependabot.yml
+name: Dependabot
+on:
+  schedule:
+    - cron: "0 8 * * 1"
+  workflow_dispatch: # Allow manual triggering
+permissions:
+  id-token: write
+  contents: write
+jobs:
+  get-release-branch-names:
+    runs-on: ubuntu-latest
+    environment: nemo-ci
+    outputs:
+      mcore: ${{ steps.get-branch.outputs.mcore_release_branch }}
+    steps:
+      - name: Get release branch names
+        id: get-branch
+        env:
+          PAT: ${{ secrets.PAT }}
+        run: |
+          latest_branch=$(git ls-remote --heads https://token:${PAT}@github.com/NVIDIA-NeMo/Eval.git 'refs/heads/r*' | 
+            grep -o 'core_r[0-9]\+\.[0-9]\+\.[0-9]\+' | 
+            sort -V | 
+            tail -n1)
+          echo "mcore_release_branch=$latest_branch" >> $GITHUB_OUTPUT
+  bump-tags:
+    needs: [get-release-branch-names]
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - target-branch: ${{ needs.get-release-branch-names.outputs.mcore }}
+          - target-branch: main
+    uses: ./.github/workflows/_update_dependencies.yml
+    with:
+      target-branch: ${{ matrix.target-branch }}
+    secrets:
+      PAT: ${{ secrets.PAT }}
+      AZURE_CLIENT_ID: ${{ secrets.AZURE_CLIENT_ID }}
+      AZURE_TENANT_ID: ${{ secrets.AZURE_TENANT_ID }}
+      AZURE_SUBSCRIPTION_ID: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
+      SSH_KEY: ${{ secrets.SSH_KEY }}
+      SSH_PWD: ${{ secrets.SSH_PWD }}
+  notify:
+    if: failure()
+    runs-on: ubuntu-latest
+    needs: [bump-tags]
+    steps:
+      - name: Notify
+        env:
+          SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
+          SLACK_WEBHOOK_ADMIN: <!subteam^${{ secrets.SLACK_WEBHOOK_ADMIN }}>
+          GITHUB_RUN_ID: ${{ github.run_id }}
+          GITHUB_REPOSITORY: ${{ github.repository }}
+        run: |
+          curl -X POST \
+            -H 'Content-type: application/json' \
+            --data "{\"text\":\":robot_joy: <https://github.com/${GITHUB_REPOSITORY}/actions/runs/${GITHUB_RUN_ID}|Dependabot workflow> failed. Please fix manually.\n\ncc ${SLACK_WEBHOOK_ADMIN}\"}" \
+            $SLACK_WEBHOOK
--- a/Megatron-LM/.github/workflows/install-test.yml
+++ b/Megatron-LM/.github/workflows/install-test.yml
+# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# This workflow verifies that the basic install works across all supported platforms.
+# For basic install, all imports need to either be successful or appropriately guarded.
+name: Installation Test
+on:
+  push:
+    branches:
+      - dev
+      - main
+      - "pull-request/[0-9]+"
+      - "deploy-release/*"
+  merge_group:
+    types: [checks_requested]
+jobs:
+  pre-flight:
+    uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_cicd_preflight.yml@v0.65.5
+  pip-test-pytorch:
+    needs: [pre-flight]
+    if: |
+      !(needs.pre-flight.outputs.docs_only == 'true'
+      || needs.pre-flight.outputs.is_merge_group == 'true'
+      || needs.pre-flight.outputs.is_deployment_workflow == 'true')
+    runs-on: linux-amd64-cpu16
+    name: Pip - Python${{ matrix.python-version }} - AMD64/Linux - NGC PyTorch
+    container:
+      image: nvcr.io/nvidia/pytorch:25.05-py3
+    environment: nemo-ci
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ["3.12"]
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+      - name: Set PATH
+        run: |
+          echo "UV_PROJECT_ENVIRONMENT=/opt/venv" | tee -a "$GITHUB_ENV"
+          echo "UV_LINK_MODE=copy" | tee -a "$GITHUB_ENV"
+          echo "CUDA_HOME=/usr/local/cuda" | tee -a "$GITHUB_ENV"
+          echo "LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH" | tee -a "$GITHUB_ENV"
+          echo "PATH=$HOME/.local/bin:$PATH:$CUDA_HOME/bin" | tee -a "$GITHUB_ENV"
+          echo "TORCH_CUDA_ARCH_LIST=6.0;6.1;7.0;7.5;8.0;8.6;9.0" | tee -a "$GITHUB_ENV"
+      - name: Install megatron-core
+        shell: bash -x -e -u -o pipefail {0}
+        run: bash docker/common/install.sh --environment dev --base-image pytorch --python-version ${{ matrix.python-version }}
+      - name: Checkout check-imports
+        uses: actions/checkout@v4
+        with:
+          repository: NVIDIA-NeMo/FW-CI-templates
+          ref: v0.63.2
+          path: FW-CI-templates
+      - name: Check imports for megatron-core
+        uses: ./FW-CI-templates/.github/actions/check-imports
+        with:
+          package-name: megatron.core
+          python-binary: ${{ env.UV_PROJECT_ENVIRONMENT }}/bin/python
+  uv-test-pytorch:
+    needs: [pre-flight]
+    if: |
+      !(needs.pre-flight.outputs.docs_only == 'true'
+      || needs.pre-flight.outputs.is_merge_group == 'true'
+      || needs.pre-flight.outputs.is_deployment_workflow == 'true')
+    runs-on: linux-amd64-cpu16
+    name: UV - Python${{ matrix.python-version }} - AMD64/Linux - NGC PyTorch
+    container:
+      image: nvcr.io/nvidia/pytorch:25.05-py3
+    environment: nemo-ci
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ["3.12"]
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+      - name: Set PATH
+        run: |
+          echo "UV_PROJECT_ENVIRONMENT=/opt/venv" | tee -a "$GITHUB_ENV"
+          echo "VIRTUAL_ENV=/opt/venv" | tee -a "$GITHUB_ENV"
+          echo "UV_LINK_MODE=copy" | tee -a "$GITHUB_ENV"
+          echo "CUDA_HOME=/usr/local/cuda" | tee -a "$GITHUB_ENV"
+          echo "LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH" | tee -a "$GITHUB_ENV"
+          echo "PATH=$HOME/.local/bin:$PATH:$CUDA_HOME/bin" | tee -a "$GITHUB_ENV"
+          echo "CUDACXX=/usr/local/cuda/bin/nvcc" | tee -a "$GITHUB_ENV"
+          echo "TORCH_CUDA_ARCH_LIST=6.0;6.1;7.0;7.5;8.0;8.6;9.0" | tee -a "$GITHUB_ENV"
+      - name: Install project
+        shell: bash
+        run: bash docker/common/install.sh --environment dev --base-image pytorch --use-uv
+      # NGC PyTorch 25.05 has a version of triton that is broken on CPU only machines.
+      # - name: Checkout check-imports
+      #   uses: actions/checkout@v4
+      #   with:
+      #     repository: NVIDIA-NeMo/FW-CI-templates
+      #     ref: v0.63.2
+      #     path: FW-CI-templates
+      # - name: Check imports for megatron-core
+      #   uses: ./FW-CI-templates/.github/actions/check-imports
+      #   with:
+      #     package-name: megatron.core
+      #     python-binary: ${{ env.UV_PROJECT_ENVIRONMENT }}/bin/python
+  install-test-summary:
+    needs: [pre-flight, pip-test-pytorch, uv-test-pytorch]
+    runs-on: ubuntu-latest
+    name: Install test summary
+    if: |
+      (
+        needs.pre-flight.outputs.docs_only == 'true'
+        || needs.pre-flight.outputs.is_deployment_workflow == 'true'
+        || always()
+      )
+      && !cancelled()
+    steps:
+      - name: Get workflow result
+        id: result
+        shell: bash -x -e -u -o pipefail {0}
+        env:
+          GH_TOKEN: ${{ github.token }}
+          RUN_ID: ${{ github.run_id }}
+          SKIPPING_IS_ALLOWED: ${{ needs.pre-flight.outputs.docs_only == 'true' || needs.pre-flight.outputs.is_deployment_workflow == 'true' }}
+        run: |
+          FAILED_JOBS=$(gh run view $GITHUB_RUN_ID --json jobs --jq '[.jobs[] | select(.status == "completed" and .conclusion != "success")] | length') || echo 0
+          if [ "${FAILED_JOBS:-0}" -eq 0 ] || [ "$SKIPPING_IS_ALLOWED" == "true" ]; then
+              echo "✅ All previous jobs completed successfully"
+              exit 0
+          else
+              echo "❌ Found $FAILED_JOBS failed job(s)"
+              # Show which jobs failed
+              gh run view $GITHUB_RUN_ID --json jobs --jq '.jobs[] | select(.status == "completed" and .conclusion != "success") | .name'
+              exit 1
+          fi
--- a/Megatron-LM/.gitignore
+++ b/Megatron-LM/.gitignore
+__pycache__
+*.so
+build
+.coverage_*
+*.egg-info
+*~
+slurm*
+logs
+.vscode
+local/
+.gitmodules
+wandb/
+onelogger.log
+onelogger.err
+.venv
+runs/
+/test_cases/
+**/dist/
\ No newline at end of file
--- a/Megatron-LM/.gitlab-ci.yml
+++ b/Megatron-LM/.gitlab-ci.yml
+.merge_train_rule: &merge_train_rule
+  UNIT_TEST: "yes"
+  UNIT_TEST_REPEAT: 1
+  UNIT_TEST_TIMEOUT: 30
+  INTEGRATION_TEST: "no"
+  INTEGRATION_TEST_SCOPE: mr
+  FUNCTIONAL_TEST: "yes"
+  FUNCTIONAL_TEST_SCOPE: mr-slim
+  FUNCTIONAL_TEST_REPEAT: 1
+  FUNCTIONAL_TEST_TIME_LIMIT: 2700
+  CLUSTER_A100: ""
+  CLUSTER_H100: ""
+  PUBLISH: "no"
+workflow:
+  rules:
+    # Do not trigger for forks
+    - if: $CI_PROJECT_NAMESPACE != "ADLR" || ($CI_PIPELINE_SOURCE == "merge_request_event" && $CI_MERGE_REQUEST_PROJECT_PATH != "ADLR/megatron-lm")
+      when: never
+    # ci-branches only for schedule
+    - if: $CI_COMMIT_BRANCH =~ /ci-/ && $CI_PIPELINE_SOURCE != "schedule"
+      when: never
+    # For schedules pipelines
+    - if: $CI_PIPELINE_SOURCE == "schedule"
+      auto_cancel:
+        on_new_commit: none
+    # For manual pipelines
+    - if: $CI_PIPELINE_SOURCE == "web"
+    # For push to main
+    - if: $CI_PIPELINE_SOURCE == 'push' && ($CI_COMMIT_BRANCH == "main" || $CI_COMMIT_BRANCH == "dev" || $CI_COMMIT_BRANCH =~ /^core_/)
+      variables:
+        UNIT_TEST: "no"
+        INTEGRATION_TEST: "no"
+        FUNCTIONAL_TEST: "yes"
+        FUNCTIONAL_TEST_SCOPE: mr
+        FUNCTIONAL_TEST_REPEAT: 5
+        FUNCTIONAL_TEST_RECORD_CHECKPOINTS: "no"
+        FUNCTIONAL_TEST_TIME_LIMIT: 3600
+        CLUSTER_A100: ""
+        CLUSTER_H100: ""
+        PUBLISH: "no"
+      auto_cancel:
+        on_new_commit: interruptible
+    # For merge-trains that need to be fast-tracked
+    - if: $CI_MERGE_REQUEST_EVENT_TYPE == 'merge_train' && $CI_MERGE_REQUEST_LABELS =~ /fast-track/
+      variables:
+        UNIT_TEST: "yes"
+        UNIT_TEST_REPEAT: 1
+        UNIT_TEST_TIMEOUT: 30
+        INTEGRATION_TEST: "no"
+        FUNCTIONAL_TEST: "no"
+        CLUSTER_A100: ""
+        CLUSTER_H100: ""
+        PUBLISH: "no"
+    # For normal merge-trains
+    - if: $CI_MERGE_REQUEST_EVENT_TYPE == 'merge_train'
+      variables: *merge_train_rule
+    # For MRs with integration suite
+    - if: $CI_MERGE_REQUEST_EVENT_TYPE == 'merged_result' && $CI_MERGE_REQUEST_LABELS =~ /Run tests/
+      variables:
+        UNIT_TEST: "yes"
+        UNIT_TEST_REPEAT: 1
+        UNIT_TEST_TIMEOUT: 30
+        INTEGRATION_TEST: "yes"
+        INTEGRATION_TEST_SCOPE: mr
+        FUNCTIONAL_TEST: "no"
+        FUNCTIONAL_TEST_SCOPE: mr-slim
+        FUNCTIONAL_TEST_REPEAT: 1
+        FUNCTIONAL_TEST_TIME_LIMIT: 2700
+        CLUSTER_A100: ""
+        CLUSTER_H100: ""
+        PUBLISH: "no"
+    # For MRs with nightly
+    - if: $CI_MERGE_REQUEST_EVENT_TYPE == 'merged_result' && $CI_MERGE_REQUEST_LABELS =~ /Run nightly/
+      variables:
+        UNIT_TEST: "yes"
+        UNIT_TEST_REPEAT: 1
+        UNIT_TEST_TIMEOUT: 30
+        INTEGRATION_TEST: "no"
+        FUNCTIONAL_TEST: "yes"
+        FUNCTIONAL_TEST_SCOPE: nightly
+        FUNCTIONAL_TEST_REPEAT: 5
+        FUNCTIONAL_TEST_RECORD_CHECKPOINTS: "no"
+        FUNCTIONAL_TEST_TIME_LIMIT: 2700
+        CLUSTER_A100: ""
+        CLUSTER_H100: ""
+        PUBLISH: "no"
+    # For MRs with weekly
+    - if: $CI_MERGE_REQUEST_EVENT_TYPE == 'merged_result' && $CI_MERGE_REQUEST_LABELS =~ /Run weekly/
+      variables:
+        UNIT_TEST: "yes"
+        UNIT_TEST_REPEAT: 1
+        UNIT_TEST_TIMEOUT: 30
+        INTEGRATION_TEST: "no"
+        FUNCTIONAL_TEST: "yes"
+        FUNCTIONAL_TEST_SCOPE: weekly
+        FUNCTIONAL_TEST_REPEAT: 1
+        FUNCTIONAL_TEST_RECORD_CHECKPOINTS: "no"
+        FUNCTIONAL_TEST_TIME_LIMIT: 9000
+        CLUSTER_A100: ""
+        CLUSTER_H100: ""
+        PUBLISH: "no"
+    # For MRs with heavy suite
+    - if: $CI_MERGE_REQUEST_EVENT_TYPE == 'merged_result' && $CI_MERGE_REQUEST_LABELS =~ /Run functional tests/
+      variables:
+        UNIT_TEST: "yes"
+        UNIT_TEST_REPEAT: 1
+        UNIT_TEST_TIMEOUT: 30
+        INTEGRATION_TEST: "no"
+        FUNCTIONAL_TEST: "yes"
+        FUNCTIONAL_TEST_SCOPE: mr
+        FUNCTIONAL_TEST_REPEAT: 1
+        FUNCTIONAL_TEST_TIME_LIMIT: 2700
+        CLUSTER_A100: ""
+        CLUSTER_H100: ""
+        PUBLISH: "no"
+    # Default MRs
+    - if: $CI_MERGE_REQUEST_EVENT_TYPE == 'merged_result'
+      variables:
+        UNIT_TEST: "yes"
+        UNIT_TEST_REPEAT: 1
+        UNIT_TEST_TIMEOUT: 30
+        INTEGRATION_TEST: "no"
+        FUNCTIONAL_TEST: "no"
+        PUBLISH: "no"
+    - when: never
+  auto_cancel:
+    on_new_commit: interruptible
+stages:
+  - build
+  - test
+  - integration_tests
+  - functional_tests
+  - publish
+default:
+  interruptible: true
+  retry:
+    max: 2
+    when: runner_system_failure
+variables:
+  UNIT_TEST:
+    value: "yes"
+    options:
+      - "yes"
+      - "no"
+    description: To run the funtional test suite
+  UNIT_TEST_REPEAT:
+    value: "1"
+    description: "Number of repetitions"
+  UNIT_TEST_TIMEOUT:
+    value: "30"
+    description: Timeout (minutes) for Unit tests (all repeats)
+  INTEGRATION_TEST:
+    value: "yes"
+    options:
+      - "yes"
+      - "no"
+    description: To run the integration test suite
+  INTEGRATION_TEST_SCOPE:
+    value: "mr"
+    options:
+      - "mr"
+      - "nightly"
+      - "weekly"
+      - "pre-release"
+      - "release"
+    description: "Testsuite to run (only for INTEGRATION_TEST=yes)"
+  INTEGRATION_TEST_TIME_LIMIT:
+    value: "900"
+    description: "Timeout in seconds per test"
+  INTEGRATION_TEST_CASES:
+    value: "all"
+    description: "Comma-separated list of test_cases to run. Use 'all' to run the full suite."
+  FUNCTIONAL_TEST:
+    value: "yes"
+    options:
+      - "yes"
+      - "no"
+    description: To run the funtional test suite
+  FUNCTIONAL_TEST_SCOPE:
+    value: "mr"
+    options:
+      - "mr"
+      - "nightly"
+      - "weekly"
+      - "pre-release"
+      - "release"
+    description: "Testsuite to run (only for FUNCTIONAL_TEST=yes)"
+  FUNCTIONAL_TEST_REPEAT:
+    value: "5"
+    description: "Number of repetitions per test"
+  FUNCTIONAL_TEST_TIME_LIMIT:
+    value: "2700"
+    description: "Timeout in seconds per test"
+  FUNCTIONAL_TEST_CASES:
+    value: "all"
+    description: "Comma-separated list of test_cases to run. Use 'all' to run the full suite."
+  FUNCTIONAL_TEST_NAME:
+    description: "Name of functional test run (only for pre-release and release)"
+    value: "$$CI_COMMIT_SHA"
+  FUNCTIONAL_TEST_RECORD_CHECKPOINTS:
+    value: "no"
+    description: "Record golden checkpoints"
+    options:
+      - "yes"
+      - "no"
+  CLUSTER_A100:
+    value: "dgxa100_dracooci"
+    options:
+      - "dgxa100_dracooci"
+      - "dgxa100_dracooci-ord"
+    description: "Cluster for A100 workloads"
+  CLUSTER_H100:
+    value: "dgxh100_coreweave"
+    options:
+      - "dgxh100_coreweave"
+      - "dgxh100_eos"
+    description: "Cluster for H100 workloads"
+  PUBLISH:
+    value: "no"
+    options:
+      - "yes"
+      - "no"
+    description: Build and publish a wheel to PyPi
+  PUBLISH_COMMIT:
+    value: "$$CI_COMMIT_SHA"
+    description: Which commit to publish
+  PUBLISH_VERSION_BUMP_BRANCH:
+    value: "$$CI_COMMIT_BRANCH"
+    description: Which branch to target for version bump
+  PUBLISH_SCOPE:
+    value: "code-freeze"
+    options:
+      - "code-freeze"
+      - "release"
+      - "review-reminder"
+      - "upgrade-dependencies"
+    description: Type of publish (freeze or final release)
+  # CI wide variables
+  CI_MCORE_LTS_IMAGE: ${GITLAB_ENDPOINT}:5005/adlr/megatron-lm/mcore_ci_lts
+  CI_MCORE_DEV_IMAGE: ${GITLAB_ENDPOINT}:5005/adlr/megatron-lm/mcore_ci_dev
+  CI_NEMO_IMAGE: ${GITLAB_ENDPOINT}:5005/adlr/megatron-lm/nemo_ci
+  UTILITY_IMAGE: ${GITLAB_ENDPOINT}:5005/adlr/megatron-lm/mcore_utility
+  TE_GIT_REF: ""
+include:
+  - .gitlab/stages/00.pre.yml
+  - .gitlab/stages/01.build.yml
+  - .gitlab/stages/02.test.yml
+  - .gitlab/stages/03.integration-tests.yml
+  - .gitlab/stages/04.functional-tests.yml
+  - .gitlab/stages/05.publish.yml
--- a/Megatron-LM/.gitlab/labeler-config.yml
+++ b/Megatron-LM/.gitlab/labeler-config.yml
+CI:
+  - .gitlab-ci.yml
+  - Dockerfile.ci.lts
+  - Dockerfile.ci.dev
+  - .github/**
+  - .gitlab/**
+Datasets:
+  - megatron/core/datasets/**
+BERT:
+  - megatron/core/models/bert/**
+GPT:
+  - megatron/core/models/gpt/**
+RETRO:
+  - megatron/core/models/retro/**
+Dist-Ckpt:
+  - megatron/core/dist_checkpointing
+Dist-Opt:
+  - megatron/core/optimizer/distrib_optimizer
+Inference:
+  - megatron/core/inference
+MoE:
+  - megatron/core/transformer/moe
+Tests:
+  - tests/**
+ParallelState:
+  - megatron/core/parallel_state.py
--- a/Megatron-LM/.gitlab/scripts/build.sh
+++ b/Megatron-LM/.gitlab/scripts/build.sh
+#! /bin/bash
+set -x
+env
+eval "IMAGE=\$$IMAGE"
+# Start a named container in detached mode
+docker run -d --name download_test_data -w /workdir/ python:3.12-slim bash -c 'sleep infinity'
+docker cp tests/. download_test_data:/workdir/tests
+docker exec -e GH_TOKEN=$GH_TOKEN download_test_data bash -c '
+    ls -al /workdir/
+    pip install --no-cache-dir pygithub click
+    python tests/test_utils/python_scripts/download_unit_tests_dataset.py --assets-dir ./assets
+'
+docker cp download_test_data:/workdir/assets ./
+docker rm -f download_test_data
+docker context create tls-environment
+docker buildx create --name container --driver=docker-container --use tls-environment
+ADDITIONAL_PARAMS=()
+if [[ "$CI_COMMIT_BRANCH" == "ci-rebuild-mcore-nemo-image" || "$CI_COMMIT_BRANCH" == "main" || "$CI_COMMIT_BRANCH" == "dev" ]]; then
+    ADDITIONAL_PARAMS+=("--pull")
+    ADDITIONAL_PARAMS+=("--cache-to type=registry,ref=${IMAGE}-buildcache:main,mode=max")
+    ADDITIONAL_PARAMS+=("-t ${IMAGE}:${CI_COMMIT_BRANCH}")
+elif [[ -n "$CI_MERGE_REQUEST_IID" ]]; then
+    ADDITIONAL_PARAMS+=("--cache-to type=registry,ref=${IMAGE}-buildcache:${CI_MERGE_REQUEST_IID},mode=max")
+    ADDITIONAL_PARAMS+=("-t ${IMAGE}:${CI_MERGE_REQUEST_IID}")
+fi
+if [[ "$CI_COMMIT_BRANCH" == "ci-nightly" ]]; then
+    ADDITIONAL_PARAMS+=("-t ${IMAGE}:nightly")
+fi
+if [[ -n "$TE_GIT_REF" ]]; then
+    ADDITIONAL_PARAMS+=("--build-arg TE_COMMIT=${TE_GIT_REF}")
+fi
+echo $(git rev-parse HEAD)
+JET_API_VERSION=$(curl -s -u "$ARTIFACTORY_USER:$ARTIFACTORY_TOKEN" "https://sc-hw-artf.nvidia.com/artifactory/api/pypi/hw-joc-pypi/simple/jet-api/" | grep -o 'href="../../jet-api/[0-9.]*/' | sed 's|href="../../jet-api/||;s|/||' | sort -V -r | head -n1)
+DOCKER_BUILDKIT=1 docker build \
+    --secret id=JET_INDEX_URLS \
+    --secret id=LOGGER_INDEX_URL \
+    --target $STAGE \
+    -f docker/$FILE \
+    -t ${IMAGE}:${CI_PIPELINE_ID} \
+    --builder=container \
+    --build-arg JET_API_VERSION=$JET_API_VERSION \
+    --cache-from type=registry,ref=${IMAGE}-buildcache:${CI_MERGE_REQUEST_IID} \
+    --cache-from type=registry,ref=${IMAGE}-buildcache:main \
+    --build-arg FROM_IMAGE_NAME=$BASE_IMAGE \
+    --push \
+    --progress plain \
+    ${ADDITIONAL_PARAMS[@]} .
--- a/Megatron-LM/.gitlab/scripts/check_imports.py
+++ b/Megatron-LM/.gitlab/scripts/check_imports.py
+# Copyright (c) 2025, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#!/usr/bin/env python3
+"""
+Import checker script for megatron.hub package.
+This script recursively discovers all Python modules in the specified package
+and attempts to import them, reporting any import errors.
+"""
+import importlib
+import os
+import sys
+import traceback
+from typing import Dict, List, Tuple
+import click
+class ImportChecker:
+    """Check imports for all modules in a package."""
+    def __init__(self, package_name: str = "megatron.core", verbose: bool = False):
+        self.package_name = package_name
+        self.success_count = 0
+        self.failure_count = 0
+        self.graceful_count = 0
+        self.skipped_count = 0
+        self.failures: Dict[str, str] = {}
+        self.successes: List[str] = []
+        self.graceful_failures: Dict[str, str] = {}
+        self.skipped: List[str] = []
+        # Modules to skip (known problematic ones)
+        self.skip_patterns = {
+            "__pycache__",
+            ".pytest_cache",
+            ".git",
+            "test_",
+            "_test",
+        }
+        # Add current directory to Python path if not already there
+        current_dir = os.getcwd()
+        if current_dir not in sys.path:
+            sys.path.insert(0, current_dir)
+    def should_skip_module(self, module_name: str) -> bool:
+        """Check if a module should be skipped."""
+        for pattern in self.skip_patterns:
+            if pattern in module_name:
+                return True
+        return False
+    def discover_modules(self, package_path: str) -> List[str]:
+        """Discover all Python modules in the given package path."""
+        modules = []
+        package = importlib.import_module(package_path)
+        package_path = package.__path__[0]
+        # Walk through all Python files
+        for root, dirs, files in os.walk(package.__path__[0]):
+            # Skip hidden directories and __pycache__
+            dirs[:] = [d for d in dirs if not d.startswith(".") and d != "__pycache__"]
+            for file in files:
+                if file.endswith(".py") and not file.startswith("."):
+                    # Convert file path to module name
+                    rel_path = os.path.relpath(os.path.join(root, file), package_path)
+                    module_parts = rel_path.replace(os.sep, ".").replace(".py", "")
+                    # Handle __init__.py files
+                    if module_parts.endswith(".__init__"):
+                        module_parts = module_parts[:-9]  # Remove .__init__
+                    full_module_name = (
+                        f"{self.package_name}.{module_parts}"
+                        if module_parts
+                        else self.package_name
+                    )
+                    if not self.should_skip_module(full_module_name):
+                        modules.append(full_module_name)
+            # Remove duplicates and sort
+            modules = sorted(list(set(modules)))
+        return modules
+    def import_module(self, module_name: str) -> Tuple[str, str]:
+        """
+        Try to import a module and return success status and error message.
+        Returns:
+            Tuple of (status: str, error_message: str)
+            status can be: "success", "graceful", or "failed"
+        """
+        try:
+            if module_name in sys.modules:
+                del sys.modules[module_name]
+            importlib.import_module(module_name)
+            return "success", ""
+        except Exception:
+            tb = traceback.format_exc()
+            if "UnavailableError" in tb:
+                return "graceful", "UnavailableError detected during import"
+            return "failed", f"{str(tb)}"
+    def check_all_imports(self):
+        """Check imports for all discovered modules."""
+        print(f"Discovering modules in package '{self.package_name}'...")
+        modules = self.discover_modules(self.package_name)
+        if not modules:
+            print("No modules found!")
+            return
+        print(f"Found {len(modules)} modules to check")
+        print("=" * 60)
+        for i, module_name in enumerate(modules, 1):
+            status, error_msg = self.import_module(module_name)
+            if status == "success":
+                self.success_count += 1
+                self.successes.append(module_name)
+            elif status == "graceful":
+                self.graceful_count += 1
+                self.graceful_failures[module_name] = error_msg
+            else:  # failed
+                self.failure_count += 1
+                self.failures[module_name] = error_msg
+        """Print a summary of the import check results."""
+        total = (
+            self.success_count
+            + self.failure_count
+            + self.graceful_count
+            + self.skipped_count
+        )
+        print("\n" + "=" * 60)
+        print("IMPORT CHECK SUMMARY")
+        print("=" * 60)
+        print(f"Total modules checked: {total}")
+        print(
+            f"Successful imports:    {self.success_count} ({self.success_count / total * 100:.1f}%)"
+        )
+        print(
+            f"Gracefully handled:    {self.graceful_count} ({self.graceful_count / total * 100:.1f}%)"
+        )
+        print(
+            f"Failed imports:        {self.failure_count} ({self.failure_count / total * 100:.1f}%)"
+        )
+        if self.skipped_count > 0:
+            print(
+                f"Skipped modules:       {self.skipped_count} ({self.skipped_count / total * 100:.1f}%)"
+            )
+        if self.graceful_failures:
+            print(f"\n🟡 GRACEFULLY HANDLED ({len(self.graceful_failures)}):")
+            print("-" * 40)
+        if self.failures:
+            print(f"\n❌ FAILED IMPORTS ({len(self.failures)}):")
+            print("-" * 40)
+            for module_name, error_msg in self.failures.items():
+                print(f"\n• {module_name}")
+                # Show only the first few lines of error to keep output manageable
+                error_lines = error_msg.split("\n")
+                for line in error_lines:
+                    # if self.package_name.replace(".", os.sep) not in line:
+                    #     continue
+                    if line.strip():
+                        print(f"  {line}")
+        return self.failure_count == 0
+@click.command()
+@click.option(
+    "--package-name",
+    required=True,
+    help="Package name to check imports for",
+)
+def main(package_name: str):
+    """Main entry point."""
+    checker = ImportChecker(package_name=package_name)
+    successful = checker.check_all_imports()
+    exit(0 if successful else 1)
+if __name__ == "__main__":
+    main()
--- a/Megatron-LM/.gitlab/scripts/fetch-legacy-suite.sh
+++ b/Megatron-LM/.gitlab/scripts/fetch-legacy-suite.sh
+#!/bin/bash
+set -euxo pipefail
+# Default values
+MCORE_REPO="https://github.com/nvidia/megatron-lm.git"
+MCORE_MR_COMMIT="main"
+MCORE_BACKWARDS_COMMIT=""
+# Parse command line arguments
+usage() {
+    cat <<EOF
+Usage: $0 [OPTIONS]
+Clone and setup megatron-lm repositories for testing.
+Options:
+    --repo URL              Git repository URL (default: $MCORE_REPO)
+    --backwards-commit COMMIT Commit hash or reference for the backwards compatibility test
+    --help                  Show this help message
+Example:
+    $0 --repo $MCORE_REPO \\
+       --backwards-commit core_r0.12.0
+EOF
+    exit 1
+}
+# Parse arguments
+while [[ $# -gt 0 ]]; do
+    case $1 in
+    --repo)
+        MCORE_REPO="$2"
+        shift 2
+        ;;
+    --backwards-commit)
+        MCORE_BACKWARDS_COMMIT="$2"
+        shift 2
+        ;;
+    --help)
+        usage
+        ;;
+    *)
+        echo "Unknown option: $1"
+        usage
+        ;;
+    esac
+done
+# Validate required arguments
+if [[ -z "${MCORE_BACKWARDS_COMMIT:-}" ]]; then
+    echo "Error: --backwards-commit is required"
+    usage
+fi
+# Checkout backwards-ref
+rm -rf megatron-lm-legacy
+mkdir megatron-lm-legacy
+pushd megatron-lm-legacy
+git init
+git remote add origin $MCORE_REPO
+git fetch origin $MCORE_BACKWARDS_COMMIT
+git checkout $MCORE_BACKWARDS_COMMIT
+git rev-parse HEAD
+rm -rf megatron
+cp -a ../megatron-lm/megatron ./
+popd
+# Copy unit test script
+cp megatron-lm/tests/unit_tests/run_ci_test.sh megatron-lm-legacy/tests/unit_tests/run_ci_test.sh
+cp megatron-lm/pyproject.toml megatron-lm-legacy/pyproject.toml
\ No newline at end of file
--- a/Megatron-LM/.gitlab/stages/00.pre.yml
+++ b/Megatron-LM/.gitlab/stages/00.pre.yml
+include:
+  - template: Security/Secret-Detection.gitlab-ci.yml
+.pre_rules:
+  rules:
+    - if: $CI_MERGE_REQUEST_EVENT_TYPE == 'merged_result' && $CI_MERGE_REQUEST_TARGET_BRANCH_PROTECTED != "true"
+      allow_failure: true
+      when: always
+    - if: $CI_MERGE_REQUEST_EVENT_TYPE == 'merged_result'
+      when: always
+    - when: never
+  stage: .pre
+.dind_rules:
+  image: docker:26.1.4-dind
+  variables:
+    DOCKER_HOST: unix:///var/run/docker.sock
+  before_script:
+    - docker system prune -a --filter "until=36h" -f || true
+    - echo "$NGC_API_KEY" | docker login nvcr.io -u '$oauthtoken' --password-stdin
+    - echo "$CI_REGISTRY_PASSWORD" | docker login $CI_REGISTRY -u $CI_REGISTRY_USER --password-stdin
+pre:mirror_to_github:
+  rules:
+    - if: '($CI_COMMIT_BRANCH == "main" || $CI_COMMIT_BRANCH == "dev") && $CI_PIPELINE_SOURCE == "push"'
+      allow_failure: true
+    - when: never
+  tags:
+    - arch/amd64
+    - env/prod
+    - origin/jet-fleet
+    - owner/jet-core
+    - purpose/utility
+    - team/megatron
+  stage: .pre
+  image: python:3.10
+  variables:
+    GIT_STRATEGY: "clone"
+  script:
+    - git checkout $CI_COMMIT_BRANCH
+    - git remote add github https://ko3n1g:$GH_TOKEN@github.com/NVIDIA/Megatron-LM.git || true
+    - git push -u github $CI_COMMIT_BRANCH
+  retry:
+    max: 2
+pre:create_ci_branches:
+  rules:
+    - if: '$CI_COMMIT_BRANCH == "main" && $CI_PIPELINE_SOURCE == "push"'
+      allow_failure: true
+    - when: never
+  parallel:
+    matrix:
+      - branch: ci-unit-test-extended
+      - branch: ci-rebuild-mcore-nemo-image
+      - branch: ci-mr
+      - branch: ci-nightly
+      - branch: ci-weekly
+      - branch: ci-pre-release
+      - branch: ci-review-reminder
+      - branch: ci-upgrade-dependencies
+      - branch: ci-approve-main
+      - branch: ci-approve-dev
+  tags:
+    - arch/amd64
+    - env/prod
+    - origin/jet-fleet
+    - owner/jet-core
+    - purpose/utility
+    - team/megatron
+  stage: .pre
+  image: python:3.10
+  variables:
+    GIT_STRATEGY: "clone"
+  script:
+    - git remote set-url origin "https://gitlab-ci-token:${PROJECT_ACCESS_TOKEN_MCORE}@${GITLAB_ENDPOINT}/adlr/megatron-lm.git"
+    - git switch --force-create $branch
+    - git push --force -u origin $branch
+  retry:
+    max: 2
+pre:create_ci_branches_dev:
+  rules:
+    - if: '$CI_COMMIT_BRANCH == "dev" && $CI_PIPELINE_SOURCE == "push"'
+      allow_failure: true
+    - when: never
+  parallel:
+    matrix:
+      - branch: ci-dev-unit-test-extended
+      - branch: ci-dev-rebuild-mcore-nemo-image
+      - branch: ci-dev-mr
+      - branch: ci-dev-nightly
+      - branch: ci-dev-upgrade-dependencies
+  tags:
+    - arch/amd64
+    - env/prod
+    - origin/jet-fleet
+    - owner/jet-core
+    - purpose/utility
+    - team/megatron
+  stage: .pre
+  image: python:3.10
+  variables:
+    GIT_STRATEGY: "clone"
+  script:
+    - git remote set-url origin "https://gitlab-ci-token:${PROJECT_ACCESS_TOKEN_MCORE}@${GITLAB_ENDPOINT}/adlr/megatron-lm.git"
+    - git switch --force-create $branch
+    - git push --force -u origin $branch
+  retry:
+    max: 2
+pre:label_merge_request:
+  extends: [.pre_rules]
+  image: golang:1.22
+  tags:
+    - arch/amd64
+    - env/prod
+    - origin/jet-fleet
+    - owner/jet-core
+    - purpose/utility
+    - team/megatron
+  before_script:
+    - git clone -b nv https://${GITLAB_ENDPOINT}/okoenig/gitlab-mr-labeler.git
+    - cd gitlab-mr-labeler
+    - go install .
+    - cd ..
+    - go install github.com/itchyny/gojq/cmd/gojq@latest
+  script:
+    - set -x
+    - |
+      LABELS=$(curl --header "PRIVATE-TOKEN: ${PROJECT_ACCESS_TOKEN_MCORE}" --url "https://${GITLAB_ENDPOINT}/api/v4/projects/${CI_PROJECT_ID}/merge_requests/${CI_MERGE_REQUEST_IID}")
+    - LABELS=$(echo "$LABELS" | gojq '.labels -= ["ParallelState"]')
+    - |
+      if git --no-pager diff --merge-base origin/${CI_MERGE_REQUEST_TARGET_BRANCH_NAME} -- 'megatron/core/' | grep -q 'parallel_state'; then
+        LABELS=$(echo "$LABELS" | gojq '.labels += ["ParallelState"]')
+        echo "$LABELS"
+      fi
+    - echo LABELS=$(echo "$LABELS" | gojq '.labels | join(",")') > labels
+    - gitlab-mr-labeler -f .gitlab/labeler-config.yml -t ${PROJECT_ACCESS_TOKEN_MCORE} --debug true
+    - cat labels
+  after_script:
+    - |
+      source labels
+      curl --header "PRIVATE-TOKEN: ${PROJECT_ACCESS_TOKEN_MCORE}" --url "https://${GITLAB_ENDPOINT}/api/v4/projects/${CI_PROJECT_ID}/merge_requests/${CI_MERGE_REQUEST_IID}" --data-urlencode "add_labels=$LABELS" -X PUT
+pre:maybe_cherry_pick_to_main:
+  rules:
+    - if: "$CI_MERGE_REQUEST_EVENT_TYPE == 'merged_result' && $CI_MERGE_REQUEST_TARGET_BRANCH_NAME == 'dev' && $CI_MERGE_REQUEST_LABELS =~ /mirror-to-main/"
+    - when: never
+  tags:
+    - arch/amd64
+    - env/prod
+    - origin/jet-fleet
+    - owner/jet-core
+    - purpose/utility
+    - team/megatron
+  stage: .pre
+  image: nentangso/alpine-git-curl-jq
+  variables:
+    GIT_STRATEGY: "clone"
+  script:
+    - |
+      set -x
+      MR_ID=$CI_MERGE_REQUEST_IID
+      TARGET_BRANCH="cp/$MR_ID-into-main"
+      TARGET_BRANCH_EXISTS_OK=$([[ "$(git ls-remote --heads origin refs/heads/$TARGET_BRANCH)" != "" ]] && echo true || echo false)
+      if [[ "$TARGET_BRANCH_EXISTS_OK" == "true" ]]; then
+        echo Target branch already exists, will not cherry-pick again.
+        exit 0
+      fi
+      MR=$(curl --header "PRIVATE-TOKEN: ${PROJECT_ACCESS_TOKEN_MCORE}" --url "https://${GITLAB_ENDPOINT}/api/v4/projects/${CI_PROJECT_ID}/merge_requests/${MR_ID}")
+      LABELS=$(echo -E $MR | jq '.labels | join(",")' | tr -d '"')
+      AUTHOR_ID=$(echo -E $MR | jq '.author.id' | tr -d '"')
+      AUTHOR_NAME=$(echo -E $MR | jq '.author.username' | tr -d '"')
+      TITLE=$(echo -E $MR | jq '.title' | tr -d '"')
+      MILESTONE_ID=$(echo -E $MR | jq '.milestone.id' | tr -d '"')
+      git remote set-url origin "https://gitlab-ci-token:${PROJECT_ACCESS_TOKEN_MCORE}@${GITLAB_ENDPOINT}/$CI_PROJECT_PATH.git"
+      git remote add mr-origin "https://gitlab-ci-token:${PROJECT_ACCESS_TOKEN_MCORE}@${GITLAB_ENDPOINT}/$CI_MERGE_REQUEST_SOURCE_PROJECT_PATH.git"
+      git config --global user.email "mcore-bot@nvidia.com"
+      git config --global user.name "Mcore Bot"
+      git fetch origin dev
+      git fetch mr-origin $CI_MERGE_REQUEST_SOURCE_BRANCH_NAME
+      git checkout $CI_MERGE_REQUEST_SOURCE_BRANCH_NAME
+      START_COMMIT=$(git merge-base origin/dev mr-origin/$CI_MERGE_REQUEST_SOURCE_BRANCH_NAME)
+      END_COMMIT=$(git rev-parse HEAD)
+      git fetch origin main
+      git checkout main
+      git checkout -b $TARGET_BRANCH
+      git cherry-pick $START_COMMIT..$END_COMMIT
+      git push -u origin $TARGET_BRANCH
+      curl \
+        --header "PRIVATE-TOKEN: $PAT" \
+        --url https://${GITLAB_ENDPOINT}/api/v4/projects/${CI_PROJECT_ID}/merge_requests \
+        -d "source_branch=$TARGET_BRANCH" \
+        -d "target_branch=main" \
+        -d "title=cp MR !$MR_ID from dev: \`$TITLE\`" \
+        -d "labels=cherry-picked-from-dev" \
+        -d "reviewer_ids=$AUTHOR_ID" \
+        -d "milestone_id=$MILESTONE_ID" \
+        -d "description=[🤖]: Hi @$AUTHOR_NAME 👋,<br><br>we've cherry picked \`$TITLE (!$MR_ID)\` into \`main\` for you! 🚀<br><br>Please review and approve this cherry pick by your convenience\!"
+pre:maybe_cherry_pick_commit:
+  rules:
+    - if: '$CI_COMMIT_BRANCH == "main" && $CI_PIPELINE_SOURCE == "push"'
+    - when: never
+  tags:
+    - arch/amd64
+    - env/prod
+    - origin/jet-fleet
+    - owner/jet-core
+    - purpose/utility
+    - team/megatron
+  stage: .pre
+  image: nentangso/alpine-git-curl-jq
+  variables:
+    GIT_STRATEGY: "clone"
+  script:
+    - set -x
+    - set +e
+    - SHA=$(git rev-list --no-merges -n 1 HEAD)
+    - MESSAGE=$(git log -n 1 --pretty=format:%s $SHA)
+    - MR_ID=$(echo $MESSAGE | awk -F'!' '{print $2}' | awk '{print $1}' )
+    - git remote set-url origin "https://gitlab-ci-token:${PROJECT_ACCESS_TOKEN_MCORE}@${GITLAB_ENDPOINT}/$CI_PROJECT_NAMESPACE/megatron-lm.git"
+    - git config --global user.email "mcore-bot@nvidia.com"
+    - git config --global user.name "Mcore Bot"
+    - |
+      MR=$(curl --header "PRIVATE-TOKEN: ${PROJECT_ACCESS_TOKEN_MCORE}" --url "https://${GITLAB_ENDPOINT}/api/v4/projects/${CI_PROJECT_ID}/merge_requests/${MR_ID}")
+      LABELS=$(echo -E $MR | jq '.labels | join(",")' | tr -d '"')
+      AUTHOR_ID=$(echo -E $MR | jq '.author.id' | tr -d '"')
+      AUTHOR_NAME=$(echo -E $MR | jq '.author.username' | tr -d '"')
+      TITLE=$(echo -E $MR | jq '.title' | tr -d '"')
+      MILESTONE_ID=$(echo -E $MR | jq '.milestone.id' | tr -d '"')
+      TARGET_BRANCHES=$(echo "$LABELS" | grep -o 'core_[^,]*')
+      if [[ $TARGET_BRANCHES == "" ]]; then
+        echo Nothing to cherry pick
+        exit 0
+      fi
+      echo $TARGET_BRANCHES | while read -r RELEASE_BRANCH ; do
+        TARGET_BRANCH_EXISTS_OK=$([[ "$(git ls-remote --heads origin refs/heads/$RELEASE_BRANCH)" != "" ]] && echo true || echo false)
+        if [[ "$TARGET_BRANCH_EXISTS_OK" == "false" ]]; then
+          echo Release branch does not yet exist, will not  cherry-pick
+          continue
+        fi
+        (
+          git fetch origin $RELEASE_BRANCH:$RELEASE_BRANCH
+          git switch --force-create cherry-pick-$MR_ID-$RELEASE_BRANCH $RELEASE_BRANCH
+          git cherry-pick $SHA
+          git push -u origin --force cherry-pick-$MR_ID-$RELEASE_BRANCH
+          git checkout main
+        )
+        CHERRYPICK_SUCCESSFUL=$?
+        if [[ $CHERRYPICK_SUCCESSFUL -eq 0 ]]; then
+          curl \
+            --header "PRIVATE-TOKEN: $PAT" \
+            --url https://${GITLAB_ENDPOINT}/api/v4/projects/${CI_PROJECT_ID}/merge_requests \
+            -d "source_branch=cherry-pick-$MR_ID-$RELEASE_BRANCH" \
+            -d "target_branch=$RELEASE_BRANCH" \
+            -d "title=Cherry pick \`$TITLE ($MR_ID)\` into \`$RELEASE_BRANCH\`" \
+            -d "labels=cherry-pick" \
+            -d "reviewer_ids=$AUTHOR_ID" \
+            -d "milestone_id=$MILESTONE_ID" \
+            -d "description=[🤖]: Hi @$AUTHOR_NAME 👋,<br><br>we've cherry picked \`$TITLE ($MR_ID)\` into \`$RELEASE_BRANCH\` for you! 🚀<br><br>Please review and approve this cherry pick by your convenience\!"
+        else
+          URL=https://${GITLAB_ENDPOINT}/ADLR/megatron-lm/-/merge_requests/$MR_ID
+          MESSAGE='{
+            "blocks": [
+              {
+                "type": "section",
+                "text": {
+                  "type": "mrkdwn",
+                  "text": "beep boop 🤖: Cherry-pick of <'$URL'|!'$MR_ID'> failed\ncc '$SLACK_ADMIN'"
+                }
+              }
+            ]
+          }'
+          curl -X POST -H "Content-type: application/json" --data "$MESSAGE" ${MCORE_NOTIFICATION_HOOK}
+        fi
+      done
+  interruptible: false
+pre:check_milestone:
+  extends: [.pre_rules]
+  image: badouralix/curl-jq
+  tags:
+    - arch/amd64
+    - env/prod
+    - origin/jet-fleet
+    - owner/jet-core
+    - purpose/utility
+    - team/megatron
+  script:
+    - env
+    - |
+      MILESTONE=$(curl --header "PRIVATE-TOKEN: ${PROJECT_ACCESS_TOKEN_MCORE}" --url "https://${GITLAB_ENDPOINT}/api/v4/projects/${CI_PROJECT_ID}/merge_requests/${CI_MERGE_REQUEST_IID}" | jq '.milestone')
+    - |
+      if [[ "$MILESTONE" == "null" ]]; then
+        LATEST_MILESTONE=$(curl --header "PRIVATE-TOKEN: ${PROJECT_ACCESS_TOKEN_MCORE}" --url "https://${GITLAB_ENDPOINT}/api/v4/projects/${CI_PROJECT_ID}/milestones?state=active&order_by=due_date&sort=desc" | jq '.[0].id')
+        curl --request PUT --header "PRIVATE-TOKEN: ${PROJECT_ACCESS_TOKEN_MCORE}" --url "https://${GITLAB_ENDPOINT}/api/v4/projects/${CI_PROJECT_ID}/merge_requests/${CI_MERGE_REQUEST_IID}" --data "milestone_id=${LATEST_MILESTONE}"
+        echo "Applied latest milestone (ID: ${LATEST_MILESTONE}) to this MR"
+      fi
+pre:check_status_of_main:
+  extends: [.pre_rules]
+  image: python:3.10
+  timeout: 7 days
+  variables:
+    KUBERNETES_SERVICE_MEMORY_REQUEST: 32Gi
+    KUBERNETES_SERVICE_MEMORY_LIMIT: 32Gi
+    KUBERNETES_SERVICE_CPU_REQUEST: 8
+    KUBERNETES_SERVICE_CPU_LIMIT: 12
+  tags:
+    - arch/amd64
+    - env/prod
+    - origin/jet-fleet
+    - owner/jet-core
+    - purpose/utility
+    - team/megatron
+  script:
+    - env
+    - pip install --no-cache-dir python-gitlab click
+    - export RO_API_TOKEN=${PROJECT_ACCESS_TOKEN_MCORE}
+    - export GITLAB_ENDPOINT
+    - python tests/test_utils/python_scripts/check_status_of_main.py --target-branch "$CI_MERGE_REQUEST_TARGET_BRANCH_NAME"
+  rules:
+    - if: $CI_MERGE_REQUEST_EVENT_TYPE == 'merge_train' && $CI_MERGE_REQUEST_LABELS =~ /fast-track/
+      when: never
+    - if: $CI_MERGE_REQUEST_EVENT_TYPE == 'merge_train'
+      when: always
+    - when: never
+pre:approve_merge_gate:
+  extends: [.pre_rules]
+  image: maniator/gh
+  tags:
+    - arch/amd64
+    - env/prod
+    - origin/jet-fleet
+    - owner/jet-core
+    - purpose/utility
+    - team/megatron
+  script:
+    - |
+      set -eoux pipefail
+      EXIT_CODE=0
+      python tests/test_utils/python_scripts/check_status_of_main.py --target-branch "$CI_COMMIT_BRANCH" --once || EXIT_CODE=$?
+      export GH_TOKEN=$GH_TOKEN
+      export REPO=NVIDIA/Megatron-LM
+      export TARGET_BRANCH="$CI_COMMIT_BRANCH"
+      if [[ $EXIT_CODE -eq 0 ]]; then
+        STATUS="approved"
+        COMMENT="Main is healthy. Submitting PR."
+      else
+        STATUS="rejected"
+        COMMENT="Main is not healthy. An automation engineer is investigating. No need to take any action."
+      fi
+      gh api "repos/$REPO/actions/runs?status=waiting" --jq '.workflow_runs[].id' \
+        | while read run_id; do
+            HEAD_BRANCH=$(gh api "repos/$REPO/actions/runs/$run_id" --jq '.head_branch')
+            PR_NUMBER="${HEAD_BRANCH##*/}"
+            if [ -n "$PR_NUMBER" ]; then
+                PR_BASE=$(gh api "repos/$REPO/pulls/$PR_NUMBER" --jq '.base.ref')
+                if [ "$PR_BASE" = "$TARGET_BRANCH" ]; then
+                    gh api \
+                      --method POST "repos/$REPO/actions/runs/$run_id/pending_deployments" \
+                      -F "environment_ids[]=$(gh api "repos/$REPO/environments" --jq '.environments[] | select(.name=="merge-gate") | .id')" \
+                      -f state="$STATUS" \
+                      -f comment="$COMMENT";
+                fi
+            fi
+        done
+  retry:
+    max: 2
+  rules:
+    - if: $CI_PIPELINE_SOURCE == "schedule" && ($CI_COMMIT_BRANCH == 'ci-approve-dev' || $CI_COMMIT_BRANCH == 'ci-approve-main')
+      when: always
+    - when: never