ci: add a preliminary compliance scan to ci (#7289)

Signed-off-by: Harrison King Saturley-Hall <hsaturleyhal@nvidia.com>

ci: add a preliminary compliance scan to ci (#7289)
Signed-off-by: Harrison King Saturley-Hall <hsaturleyhal@nvidia.com>
f842a8e4 · Harrison Saturley-Hall · GitHub · 87ac404e · f842a8e4 · f842a8e4
Unverified Commit f842a8e4 authored Mar 12, 2026 by Harrison Saturley-Hall Committed by GitHub Mar 12, 2026
12 changed files
--- a/.dockerignore
+++ b/.dockerignore
 # SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
 **/*.onnx
 **/*.plan
@@ -45,6 +33,7 @@ container/Dockerfile*
 container/**/*.Dockerfile
 container/render.py
 container/context.yaml
+container/compliance/
 .venv
 .venv-docs

--- a/.github/actions/compliance-scan/action.yml
+++ b/.github/actions/compliance-scan/action.yml
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+name: 'Compliance Scan'
+description: 'Generate attribution CSVs (dpkg + Python) for a container image and upload as workflow artifacts'
+inputs:
+  image:
+    description: 'Full container image URI to scan (must be pullable)'
+    required: true
+  artifact_name:
+    description: 'Name for the uploaded artifact (e.g., compliance-vllm-cuda12-amd64)'
+    required: true
+  framework:
+    description: 'Framework name for base image resolution (vllm, sglang, trtllm, dynamo)'
+    required: false
+    default: ''
+  target:
+    description: 'Build target for base image resolution (runtime or frontend)'
+    required: false
+    default: 'runtime'
+  cuda_version:
+    description: 'CUDA version for base image resolution (e.g., 12.9, 13.0, 13.1)'
+    required: false
+    default: ''
+  base_image:
+    description: 'Explicit base image for diff (overrides framework/cuda-version auto-resolve)'
+    required: false
+    default: ''
+  retention_days:
+    description: 'Artifact retention in days'
+    required: false
+    default: '90'
+runs:
+  using: "composite"
+  steps:
+    - name: Set up Docker Buildx
+      uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 #v3.11.1
+      with:
+        driver: docker-container
+        # Enable BuildKit for enhanced metadata
+        buildkitd-flags: --debug
+        version: v0.14.1
+    - name: Cleanup
+      if: always()
+      shell: bash
+      run: |
+        docker system prune -af
+    - name: Set up Python
+      uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405  # v6.2.0
+      with:
+        python-version: '3.12'
+        pip-install: pyyaml
+    - name: Pull container image
+      shell: bash
+      run: |
+        source ./.github/scripts/retry_docker.sh
+        retry_pull ${{ inputs.image }}
+    - name: Generate attribution CSVs
+      shell: bash
+      run: |
+        ARGS=""
+        if [ -n "${{ inputs.base_image }}" ]; then
+          ARGS+=" --base-image ${{ inputs.base_image }}"
+        elif [ -n "${{ inputs.framework }}" ]; then
+          ARGS+=" --framework ${{ inputs.framework }}"
+          ARGS+=" --target ${{ inputs.target }}"
+          if [ -n "${{ inputs.cuda_version }}" ]; then
+            ARGS+=" --cuda-version ${{ inputs.cuda_version }}"
+          fi
+        fi
+        python container/compliance/generate_attributions.py \
+          "${{ inputs.image }}" \
+          --output "${{ inputs.artifact_name }}.csv" \
+          --verbose \
+          ${ARGS}
+    - name: Upload attribution artifacts
+      if: always()
+      uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02  # v4.6.2
+      with:
+        name: ${{ inputs.artifact_name }}
+        path: ${{ inputs.artifact_name }}*.csv
+        retention-days: ${{ inputs.retention_days }}
--- a/.github/filters.yaml
+++ b/.github/filters.yaml
@@ -83,6 +83,7 @@ core:
  - 'container/templates/wheel_builder.Dockerfile'
  - '.dockerignore'
  - 'container/deps/*'
+  - 'container/compliance/**'
  - '.cargo/config.toml'
  - 'lib/**'
  - 'tests/**'
@@ -154,6 +155,7 @@ frontend:
  - '*.toml'
  - '*.lock'
  - 'container/deps/*'
+  - 'container/compliance/**'
  - 'components/src/dynamo/router/**'
  - 'components/src/dynamo/mocker/**'
  - 'components/src/dynamo/frontend/**'

--- a/.github/workflows/build-frontend-image.yaml
+++ b/.github/workflows/build-frontend-image.yaml
@@ -220,6 +220,44 @@ jobs:
          echo "| \`${{ steps.calculate-target-tag.outputs.default_target_image_uri }}\` |" >> $GITHUB_STEP_SUMMARY
          echo "| \`${{ steps.calculate-target-tag.outputs.azure_target_image_uri }}\` |" >> $GITHUB_STEP_SUMMARY
+  # ============================================================================
+  # COMPLIANCE — Generate attribution CSVs for dpkg and Python packages
+  # ============================================================================
+  compliance:
+    needs: [build-frontend-image, changed-files]
+    if: needs.build-frontend-image.result == 'success'
+    strategy:
+      fail-fast: false
+      matrix:
+        arch: [amd64, arm64]
+    name: Compliance frontend-${{ matrix.arch }}
+    runs-on: ${{ matrix.arch == 'amd64' && 'prod-builder-amd-v1' || 'prod-tester-arm-v1' }}
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955  # v4.3.0
+      - name: Docker Login
+        uses: ./.github/actions/docker-login
+        with:
+          aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
+          aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }}
+          azure_acr_hostname: ${{ secrets.AZURE_ACR_HOSTNAME }}
+          azure_acr_user: ${{ secrets.AZURE_ACR_USER }}
+          azure_acr_password: ${{ secrets.AZURE_ACR_PASSWORD }}
+      - name: Calculate image URI
+        id: images
+        shell: bash
+        run: |
+          TARGET_TAG="${{ github.sha }}-frontend-${{ matrix.arch }}"
+          FRONTEND_IMAGE="${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com/ai-dynamo/dynamo:${TARGET_TAG}"
+          echo "frontend_image=${FRONTEND_IMAGE}" >> $GITHUB_OUTPUT
+      - name: Compliance scan
+        uses: ./.github/actions/compliance-scan
+        with:
+          image: ${{ steps.images.outputs.frontend_image }}
+          artifact_name: compliance-frontend-${{ matrix.arch }}
+          framework: dynamo
+          target: frontend
  frontend-status-check:
    runs-on: ubuntu-latest
    needs: [changed-files, build-frontend-image, build-epp-image]

--- a/.github/workflows/build-test-distribute-flavor.yml
+++ b/.github/workflows/build-test-distribute-flavor.yml
@@ -504,6 +504,43 @@ jobs:
          dind_as_sidecar: 'true'
+  # ============================================================================
+  # COMPLIANCE — Generate attribution CSVs for dpkg and Python packages
+  # ============================================================================
+  compliance:
+    if: inputs.build_image && inputs.push_image
+    needs: [build]
+    name: Compliance ${{ inputs.framework }}-cuda${{ inputs.cuda_version }}-${{ inputs.platform }}
+    runs-on: ${{ inputs.platform == 'amd64' && 'prod-builder-amd-v1' || 'prod-tester-arm-v1' }}
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955  # v4.3.0
+      - name: Docker Login
+        uses: ./.github/actions/docker-login
+        with:
+          aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
+          aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }}
+          azure_acr_hostname: ${{ secrets.AZURE_ACR_HOSTNAME }}
+          azure_acr_user: ${{ secrets.AZURE_ACR_USER }}
+          azure_acr_password: ${{ secrets.AZURE_ACR_PASSWORD }}
+      - name: Calculate image URI
+        id: images
+        shell: bash
+        run: |
+          CUDA_VERSION_RAW=${{ inputs.cuda_version }}
+          CUDA_VERSION=${CUDA_VERSION_RAW%%.*}
+          echo "cuda_major=${CUDA_VERSION}" >> $GITHUB_OUTPUT
+          RUNTIME_IMAGE=${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com/ai-dynamo/dynamo:${{ needs.build.outputs.target_tag_plain }}-cuda${CUDA_VERSION}-${{ inputs.platform }}
+          echo "runtime_image=${RUNTIME_IMAGE}" >> $GITHUB_OUTPUT
+      - name: Compliance scan
+        uses: ./.github/actions/compliance-scan
+        with:
+          image: ${{ steps.images.outputs.runtime_image }}
+          artifact_name: compliance-${{ inputs.framework }}-cuda${{ steps.images.outputs.cuda_major }}-${{ inputs.platform }}
+          framework: ${{ inputs.framework }}
+          cuda_version: ${{ inputs.cuda_version }}
  # ============================================================================
  # COPY TO ACR
  # ============================================================================

--- a/container/compliance/README.md
+++ b/container/compliance/README.md
+# Container Compliance Tooling
+Scripts for generating attribution CSVs from built container images, listing all installed dpkg and Python packages with their SPDX license identifiers where known.
+## Output format
+Each run produces up to two CSV files:
+| Column | Description |
+|--------|-------------|
+| `package_name` | Package name as reported by dpkg or pip |
+| `version` | Installed version |
+| `type` | `dpkg` or `python` |
+| `spdx_license` | SPDX identifier (e.g. `MIT`, `Apache-2.0`) or `UNKNOWN` |
+Files are sorted by `(type, package_name)` for stable diffs.
+When a base image is provided, a second `_diff.csv` file is written containing only packages that are new or version-changed relative to the base — i.e. what Dynamo's build layers added on top of the upstream image.
+## Usage
+```bash
+# Full scan, output to stdout
+python container/compliance/generate_attributions.py <image:tag>
+# Write to file
+python container/compliance/generate_attributions.py <image:tag> -o attribution.csv
+# With base image diff — auto-resolved from context.yaml
+python container/compliance/generate_attributions.py <image:tag> \
+    --framework vllm \
+    --cuda-version 12.9 \
+    -o attribution-vllm-cuda12-amd64.csv
+# Produces: attribution-vllm-cuda12-amd64.csv  (full)
+#           attribution-vllm-cuda12-amd64_diff.csv  (delta from base)
+# With explicit base image override
+python container/compliance/generate_attributions.py <image:tag> \
+    --base-image nvcr.io/nvidia/cuda:12.9.1-runtime-ubuntu24.04 \
+    -o attribution.csv
+# Frontend image
+python container/compliance/generate_attributions.py <image:tag> \
+    --framework dynamo \
+    --target frontend \
+    -o attribution-frontend-amd64.csv
+# dpkg only
+python container/compliance/generate_attributions.py <image:tag> \
+    --types dpkg \
+    -o attribution-dpkg.csv
+```
+### All flags
+| Flag | Default | Description |
+|------|---------|-------------|
+| `image` | *(required)* | Container image to scan |
+| `--output`, `-o` | stdout | Output CSV path |
+| `--framework` | — | Auto-resolve base image from `context.yaml` (`vllm`, `sglang`, `trtllm`, `dynamo`) |
+| `--target` | `runtime` | Build target for base resolution (`runtime` or `frontend`) |
+| `--cuda-version` | — | CUDA version for base resolution (e.g. `12.9`, `13.0`, `13.1`) |
+| `--base-image` | — | Explicit base image URI (overrides `--framework` auto-resolve) |
+| `--context-yaml` | `container/context.yaml` | Path to context.yaml |
+| `--types` | `dpkg,python` | Comma-separated list of types to extract |
+| `--docker-cmd` | `docker` | Docker binary to use |
+| `--verbose`, `-v` | — | Enable verbose logging to stderr |
+## Base image reference
+| Framework | CUDA | Base image |
+|-----------|------|------------|
+| `vllm` | 12.9 | `nvcr.io/nvidia/cuda:12.9.1-runtime-ubuntu24.04` |
+| `vllm` | 13.0 | `nvcr.io/nvidia/cuda:13.0.2-runtime-ubuntu24.04` |
+| `sglang` | 12.9 | `lmsysorg/sglang:v0.5.9-runtime` |
+| `sglang` | 13.0 | `lmsysorg/sglang:v0.5.9-cu130-runtime` |
+| `trtllm` | 13.1 | `nvcr.io/nvidia/cuda-dl-base:25.12-cuda13.1-runtime-ubuntu24.04` |
+| `dynamo` frontend | — | `nvcr.io/nvidia/base/ubuntu:noble-20250619` |
+These values are sourced from `container/context.yaml` at runtime; the table above reflects the current defaults.
+## How it works
+The script runs two lightweight helper scripts **inside the container** via `docker run --rm -v`:
+- **dpkg extractor** — runs `dpkg-query` to list packages, then reads `/usr/share/doc/<pkg>/copyright` files for license info. Only DEP-5 machine-readable copyright files are parsed; ambiguous cases return `UNKNOWN`.
+- **Python extractor** — uses `importlib.metadata.distributions()` to iterate installed packages. License is read from `License-Expression` (PEP 639), then `License` metadata, then trove classifiers. Ambiguous cases return `UNKNOWN`.
+Both helpers are self-contained and have no external dependencies — they run with whatever Python is in the container.
+## License detection
+Detection is intentionally conservative: only unambiguous matches are assigned SPDX identifiers. The `UNKNOWN` entries are expected; they can be resolved with additional analysis against the raw copyright files.
+## CI integration
+Attribution CSVs are generated automatically as part of CI after every successful image build. Artifacts are available in the GitHub Actions workflow run under:
+- `compliance-{framework}-cuda{major}-{platform}` — runtime images
+- `compliance-frontend-{arch}` — frontend image
+The scan runs as a separate lightweight job (`prod-default-small-v2`) in parallel with tests, so it does not extend pipeline wall time.
+## Requirements
+- Python 3.11+
+- `docker` (or compatible CLI) with access to the target registry
+- `pyyaml` — only required on the host when using `--framework`/`--cuda-version` base image auto-resolution (`pip install pyyaml`)
--- a/container/compliance/extractors/__init__.py
+++ b/container/compliance/extractors/__init__.py
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+"""Attribution extractors for container dependency scanning."""
--- a/container/compliance/extractors/dpkg.py
+++ b/container/compliance/extractors/dpkg.py
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+"""Extract dpkg package information from a container image."""
+import logging
+import subprocess
+from pathlib import Path
+log = logging.getLogger(__name__)
+_HELPER_SCRIPT_PATH = Path(__file__).resolve().parent / "helpers" / "dpkg_helper.py"
+def extract_dpkg(
+    image: str,
+    docker_cmd: str = "docker",
+    verbose: bool = False,
+) -> list[dict[str, str]]:
+    """Extract dpkg package attributions from a container image.
+    Returns a list of dicts with keys: package_name, version, type, spdx_license
+    """
+    cmd = [
+        docker_cmd,
+        "run",
+        "--rm",
+        "--entrypoint",
+        "python3",
+        "-v",
+        f"{_HELPER_SCRIPT_PATH}:/tmp/dpkg_helper.py:ro",
+        image,
+        "/tmp/dpkg_helper.py",
+    ]
+    if verbose:
+        log.info("Running: %s", " ".join(cmd))
+    result = subprocess.run(cmd, capture_output=True, text=True, timeout=300)
+    if result.returncode != 0:
+        # Exit 127 means python3 not found — fall back to shell-based dpkg-query
+        if result.returncode == 127:
+            log.warning(
+                "python3 not found in %s, falling back to shell-based dpkg extraction (no license info)",
+                image,
+            )
+            return _extract_dpkg_shell(image, docker_cmd, verbose)
+        log.error(
+            "dpkg extraction failed (exit %d): %s", result.returncode, result.stderr
+        )
+        raise RuntimeError(f"dpkg extraction failed: {result.stderr}")
+    packages = []
+    for line in result.stdout.strip().splitlines():
+        parts = line.split("\t", 2)
+        if len(parts) != 3:
+            if verbose:
+                log.warning("Skipping malformed line: %r", line)
+            continue
+        pkg_name, version, spdx_license = parts
+        packages.append(
+            {
+                "package_name": pkg_name,
+                "version": version,
+                "type": "dpkg",
+                "spdx_license": spdx_license,
+            }
+        )
+    if verbose:
+        log.info("Extracted %d dpkg packages", len(packages))
+    return packages
+def _extract_dpkg_shell(
+    image: str,
+    docker_cmd: str = "docker",
+    verbose: bool = False,
+) -> list[dict[str, str]]:
+    """Fallback: extract dpkg packages via shell when python3 is unavailable.
+    License info will be UNKNOWN for all packages.
+    """
+    cmd = [
+        docker_cmd,
+        "run",
+        "--rm",
+        "--entrypoint",
+        "sh",
+        image,
+        "-c",
+        "dpkg-query -W -f='${Package}\\t${Version}\\n'",
+    ]
+    if verbose:
+        log.info("Running (shell fallback): %s", " ".join(cmd))
+    result = subprocess.run(cmd, capture_output=True, text=True, timeout=300)
+    if result.returncode != 0:
+        log.error(
+            "dpkg shell extraction failed (exit %d): %s",
+            result.returncode,
+            result.stderr,
+        )
+        raise RuntimeError(f"dpkg shell extraction failed: {result.stderr}")
+    packages = []
+    for line in result.stdout.strip().splitlines():
+        parts = line.split("\t", 1)
+        if len(parts) != 2:
+            continue
+        pkg_name, version = parts
+        packages.append(
+            {
+                "package_name": pkg_name,
+                "version": version,
+                "type": "dpkg",
+                "spdx_license": "UNKNOWN",
+            }
+        )
+    if verbose:
+        log.info("Extracted %d dpkg packages (shell fallback)", len(packages))
+    return packages
--- a/container/compliance/extractors/helpers/dpkg_helper.py
+++ b/container/compliance/extractors/helpers/dpkg_helper.py
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+# This script runs INSIDE the container. It must be fully self-contained
+# with zero external dependencies (only Python stdlib).
+import os
+import subprocess
+import sys
+# Conservative DEP-5 license field -> SPDX mapping
+_DEP5_MAP = {
+    "Apache-2.0": "Apache-2.0",
+    "Apache-2": "Apache-2.0",
+    "Artistic-2.0": "Artistic-2.0",
+    "BSD-2-clause": "BSD-2-Clause",
+    "BSD-3-clause": "BSD-3-Clause",
+    "BSL-1.0": "BSL-1.0",
+    "CC0-1.0": "CC0-1.0",
+    "Expat": "MIT",
+    "GPL-2": "GPL-2.0-only",
+    "GPL-2+": "GPL-2.0-or-later",
+    "GPL-2.0": "GPL-2.0-only",
+    "GPL-2.0+": "GPL-2.0-or-later",
+    "GPL-3": "GPL-3.0-only",
+    "GPL-3+": "GPL-3.0-or-later",
+    "GPL-3.0": "GPL-3.0-only",
+    "GPL-3.0+": "GPL-3.0-or-later",
+    "ISC": "ISC",
+    "LGPL-2": "LGPL-2.0-only",
+    "LGPL-2+": "LGPL-2.0-or-later",
+    "LGPL-2.0": "LGPL-2.0-only",
+    "LGPL-2.0+": "LGPL-2.0-or-later",
+    "LGPL-2.1": "LGPL-2.1-only",
+    "LGPL-2.1+": "LGPL-2.1-or-later",
+    "LGPL-3": "LGPL-3.0-only",
+    "LGPL-3+": "LGPL-3.0-or-later",
+    "LGPL-3.0": "LGPL-3.0-only",
+    "LGPL-3.0+": "LGPL-3.0-or-later",
+    "MIT": "MIT",
+    "MPL-2.0": "MPL-2.0",
+    "PSF-2": "PSF-2.0",
+    "public-domain": "CC0-1.0",
+    "Zlib": "Zlib",
+    "OpenSSL": "OpenSSL",
+    "WTFPL": "WTFPL",
+}
+_DEP5_MAP_LOWER = {k.lower(): v for k, v in _DEP5_MAP.items()}
+def is_dep5(content):
+    for line in content.splitlines():
+        s = line.strip()
+        if not s or s.startswith("#"):
+            continue
+        return s.startswith("Format:")
+    return False
+def extract_dep5_license(content):
+    """Extract the primary license from a DEP-5 copyright file."""
+    licenses = set()
+    for line in content.splitlines():
+        s = line.strip()
+        if s.startswith("License:"):
+            val = s[len("License:") :].strip()
+            if val:
+                mapped = _DEP5_MAP.get(val) or _DEP5_MAP_LOWER.get(val.lower())
+                if mapped:
+                    licenses.add(mapped)
+    if len(licenses) == 1:
+        return licenses.pop()
+    elif len(licenses) > 1:
+        return " AND ".join(sorted(licenses))
+    return "UNKNOWN"
+def get_license_for_package(pkg_name):
+    """Read /usr/share/doc/<pkg>/copyright and extract license info."""
+    copyright_path = f"/usr/share/doc/{pkg_name}/copyright"
+    if not os.path.isfile(copyright_path):
+        return "UNKNOWN"
+    try:
+        with open(copyright_path, "r", errors="replace") as f:
+            content = f.read()
+    except (OSError, IOError):
+        return "UNKNOWN"
+    if not content.strip():
+        return "UNKNOWN"
+    if is_dep5(content):
+        return extract_dep5_license(content)
+    return "UNKNOWN"
+def main():
+    result = subprocess.run(
+        ["dpkg-query", "-W", "-f=${Package}\t${Version}\n"],
+        capture_output=True,
+        text=True,
+    )
+    if result.returncode != 0:
+        print(f"ERROR: dpkg-query failed: {result.stderr}", file=sys.stderr)
+        sys.exit(1)
+    for line in result.stdout.strip().splitlines():
+        parts = line.split("\t", 1)
+        if len(parts) != 2:
+            continue
+        pkg, version = parts
+        license_id = get_license_for_package(pkg)
+        print(f"{pkg}\t{version}\t{license_id}")
+if __name__ == "__main__":
+    main()
--- a/container/compliance/extractors/helpers/python_helper.py
+++ b/container/compliance/extractors/helpers/python_helper.py
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+# This script runs INSIDE the container. It must be fully self-contained
+# with zero external dependencies (only Python stdlib).
+import importlib.metadata
+# Conservative classifier -> SPDX mapping
+_CLASSIFIER_MAP = {
+    "License :: OSI Approved :: MIT License": "MIT",
+    "License :: OSI Approved :: Apache Software License": "Apache-2.0",
+    "License :: OSI Approved :: BSD License": "BSD-3-Clause",
+    "License :: OSI Approved :: ISC License (ISCL)": "ISC",
+    "License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0)": "MPL-2.0",
+    "License :: OSI Approved :: GNU General Public License v2 (GPLv2)": "GPL-2.0-only",
+    "License :: OSI Approved :: GNU General Public License v2 or later (GPLv2+)": "GPL-2.0-or-later",
+    "License :: OSI Approved :: GNU General Public License v3 (GPLv3)": "GPL-3.0-only",
+    "License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)": "GPL-3.0-or-later",
+    "License :: OSI Approved :: GNU Lesser General Public License v2 (LGPLv2)": "LGPL-2.0-only",
+    "License :: OSI Approved :: GNU Lesser General Public License v2 or later (LGPLv2+)": "LGPL-2.0-or-later",
+    "License :: OSI Approved :: GNU Lesser General Public License v3 (LGPLv3)": "LGPL-3.0-only",
+    "License :: OSI Approved :: GNU Lesser General Public License v3 or later (LGPLv3+)": "LGPL-3.0-or-later",
+    "License :: OSI Approved :: Python Software Foundation License": "PSF-2.0",
+    "License :: OSI Approved :: Boost Software License 1.0 (BSL-1.0)": "BSL-1.0",
+    "License :: OSI Approved :: The Unlicense (Unlicense)": "Unlicense",
+    "License :: OSI Approved :: Artistic License": "Artistic-2.0",
+    "License :: OSI Approved :: zlib/libpng License": "Zlib",
+    "License :: CC0 1.0 Universal (CC0 1.0) Public Domain Dedication": "CC0-1.0",
+    "License :: Public Domain": "CC0-1.0",
+}
+# Conservative free-text license -> SPDX mapping
+_LICENSE_MAP = {
+    "MIT": "MIT",
+    "MIT License": "MIT",
+    "The MIT License": "MIT",
+    "The MIT License (MIT)": "MIT",
+    "Apache License 2.0": "Apache-2.0",
+    "Apache License, Version 2.0": "Apache-2.0",
+    "Apache Software License": "Apache-2.0",
+    "Apache 2.0": "Apache-2.0",
+    "Apache-2.0": "Apache-2.0",
+    "BSD License": "BSD-3-Clause",
+    "BSD": "BSD-3-Clause",
+    "BSD-2-Clause": "BSD-2-Clause",
+    "BSD-3-Clause": "BSD-3-Clause",
+    "3-Clause BSD License": "BSD-3-Clause",
+    "2-Clause BSD License": "BSD-2-Clause",
+    "Simplified BSD License": "BSD-2-Clause",
+    "New BSD License": "BSD-3-Clause",
+    "ISC": "ISC",
+    "ISC License": "ISC",
+    "ISC License (ISCL)": "ISC",
+    "MPL-2.0": "MPL-2.0",
+    "Mozilla Public License 2.0": "MPL-2.0",
+    "Mozilla Public License 2.0 (MPL 2.0)": "MPL-2.0",
+    "PSF-2.0": "PSF-2.0",
+    "Python Software Foundation License": "PSF-2.0",
+    "Unlicense": "Unlicense",
+    "The Unlicense": "Unlicense",
+    "CC0-1.0": "CC0-1.0",
+    "Public Domain": "CC0-1.0",
+    "WTFPL": "WTFPL",
+    "Zlib": "Zlib",
+}
+_LICENSE_MAP_LOWER = {k.lower(): v for k, v in _LICENSE_MAP.items()}
+def get_license(dist):
+    """Extract SPDX license for a distribution, conservative approach."""
+    meta = dist.metadata
+    # 1. PEP 639 License-Expression (already SPDX)
+    license_expr = meta.get("License-Expression")
+    if license_expr and license_expr.strip():
+        return license_expr.strip()
+    # 2. Free-text License field
+    license_field = meta.get("License")
+    if license_field and license_field.strip():
+        val = license_field.strip()
+        mapped = _LICENSE_MAP.get(val) or _LICENSE_MAP_LOWER.get(val.lower())
+        if mapped:
+            return mapped
+    # 3. Trove classifiers
+    classifiers = meta.get_all("Classifier") or []
+    license_classifiers = [c for c in classifiers if c.startswith("License ::")]
+    for clf in license_classifiers:
+        if clf in _CLASSIFIER_MAP:
+            return _CLASSIFIER_MAP[clf]
+    return "UNKNOWN"
+def main():
+    seen = set()
+    for dist in importlib.metadata.distributions():
+        name = dist.metadata["Name"]
+        if not name:
+            continue
+        # Deduplicate (importlib.metadata can return duplicates)
+        key = name.lower()
+        if key in seen:
+            continue
+        seen.add(key)
+        version = dist.metadata["Version"] or "UNKNOWN"
+        spdx = get_license(dist)
+        print(f"{name}\t{version}\t{spdx}")
+if __name__ == "__main__":
+    main()
--- a/container/compliance/extractors/python_pkgs.py
+++ b/container/compliance/extractors/python_pkgs.py
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+"""Extract Python package information from a container image."""
+import logging
+import subprocess
+from pathlib import Path
+log = logging.getLogger(__name__)
+_HELPER_SCRIPT_PATH = Path(__file__).resolve().parent / "helpers" / "python_helper.py"
+def extract_python(
+    image: str,
+    docker_cmd: str = "docker",
+    verbose: bool = False,
+) -> list[dict[str, str]]:
+    """Extract Python package attributions from a container image.
+    Returns a list of dicts with keys: package_name, version, type, spdx_license
+    """
+    cmd = [
+        docker_cmd,
+        "run",
+        "--rm",
+        "--entrypoint",
+        "python3",
+        "-v",
+        f"{_HELPER_SCRIPT_PATH}:/tmp/python_helper.py:ro",
+        image,
+        "/tmp/python_helper.py",
+    ]
+    if verbose:
+        log.info("Running: %s", " ".join(cmd))
+    result = subprocess.run(cmd, capture_output=True, text=True, timeout=300)
+    if result.returncode != 0:
+        # Exit 127 means python3 not found — no Python packages in this image
+        if result.returncode == 127:
+            log.warning(
+                "python3 not found in %s, skipping Python package extraction", image
+            )
+            return []
+        log.error(
+            "Python extraction failed (exit %d): %s",
+            result.returncode,
+            result.stderr,
+        )
+        raise RuntimeError(f"Python extraction failed: {result.stderr}")
+    packages = []
+    for line in result.stdout.strip().splitlines():
+        parts = line.split("\t", 2)
+        if len(parts) != 3:
+            if verbose:
+                log.warning("Skipping malformed line: %r", line)
+            continue
+        pkg_name, version, spdx_license = parts
+        packages.append(
+            {
+                "package_name": pkg_name,
+                "version": version,
+                "type": "python",
+                "spdx_license": spdx_license,
+            }
+        )
+    if verbose:
+        log.info("Extracted %d Python packages", len(packages))
+    return packages
--- a/container/compliance/generate_attributions.py
+++ b/container/compliance/generate_attributions.py
+#!/usr/bin/env python3
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+"""Generate attribution CSV files for container images.
+Extracts dpkg and Python package information from a container image by
+running helper scripts inside the container via `docker run`. Optionally
+computes a diff against a base image to show only added/changed packages.
+Usage:
+    python generate_attributions.py <image:tag> [--output out.csv] [--base-image base:tag]
+    python generate_attributions.py <image:tag> --framework vllm --cuda-version 12.9
+"""
+import argparse
+import csv
+import logging
+import sys
+from pathlib import Path
+# Allow running as a script from any directory
+_SCRIPT_DIR = Path(__file__).resolve().parent
+_REPO_ROOT = _SCRIPT_DIR.parent.parent
+sys.path.insert(0, str(_SCRIPT_DIR))
+from extractors.dpkg import extract_dpkg  # noqa: E402
+from extractors.python_pkgs import extract_python  # noqa: E402
+log = logging.getLogger(__name__)
+VALID_TYPES = {"dpkg", "python"}
+def resolve_base_image(
+    framework: str,
+    target: str,
+    cuda_version: str,
+    context_yaml_path: Path,
+) -> str:
+    """Resolve the base image from context.yaml for a given framework/target/cuda combo."""
+    try:
+        import yaml
+    except ImportError:
+        log.error(
+            "PyYAML is required for --framework/--cuda-version base image resolution. "
+            "Install it with: pip install pyyaml"
+        )
+        sys.exit(1)
+    if not context_yaml_path.is_file():
+        log.error("context.yaml not found at %s", context_yaml_path)
+        sys.exit(1)
+    with open(context_yaml_path, "r") as f:
+        context = yaml.safe_load(f)
+    if target == "frontend":
+        frontend_image = context.get("dynamo", {}).get("frontend_image")
+        if not frontend_image:
+            log.error("frontend_image not found in context.yaml dynamo section")
+            sys.exit(1)
+        return frontend_image
+    # Runtime target: look up runtime_image and runtime_image_tag
+    fw_config = context.get(framework, {})
+    cuda_key = f"cuda{cuda_version}"
+    cuda_config = fw_config.get(cuda_key, {})
+    runtime_image = cuda_config.get("runtime_image")
+    runtime_image_tag = cuda_config.get("runtime_image_tag")
+    if not runtime_image or not runtime_image_tag:
+        log.error(
+            "Could not resolve base image for framework=%s cuda=%s target=%s. "
+            "Keys runtime_image/runtime_image_tag not found under %s.%s in context.yaml",
+            framework,
+            cuda_version,
+            target,
+            framework,
+            cuda_key,
+        )
+        sys.exit(1)
+    return f"{runtime_image}:{runtime_image_tag}"
+def compute_diff(
+    target_packages: list[dict[str, str]],
+    base_packages: list[dict[str, str]],
+) -> list[dict[str, str]]:
+    """Compute packages in target that are new or have different versions vs base.
+    Returns packages present in target but not in base, or with a different version.
+    """
+    base_lookup = {}
+    for pkg in base_packages:
+        key = (pkg["package_name"], pkg["type"])
+        base_lookup[key] = pkg["version"]
+    diff = []
+    for pkg in target_packages:
+        key = (pkg["package_name"], pkg["type"])
+        base_version = base_lookup.get(key)
+        if base_version is None or base_version != pkg["version"]:
+            diff.append(pkg)
+    return diff
+def write_csv(packages: list[dict[str, str]], output_path: str | None) -> None:
+    """Write packages to CSV, sorted by (type, package_name)."""
+    sorted_packages = sorted(packages, key=lambda p: (p["type"], p["package_name"]))
+    fieldnames = ["package_name", "version", "type", "spdx_license"]
+    if output_path:
+        with open(output_path, "w", newline="") as f:
+            writer = csv.DictWriter(f, fieldnames=fieldnames)
+            writer.writeheader()
+            writer.writerows(sorted_packages)
+        log.info("Wrote %d entries to %s", len(sorted_packages), output_path)
+    else:
+        writer = csv.DictWriter(sys.stdout, fieldnames=fieldnames)
+        writer.writeheader()
+        writer.writerows(sorted_packages)
+def extract_all(
+    image: str,
+    types: set[str],
+    docker_cmd: str,
+    verbose: bool,
+) -> list[dict[str, str]]:
+    """Run all requested extractors against an image."""
+    packages = []
+    if "dpkg" in types:
+        log.info("Extracting dpkg packages from %s ...", image)
+        packages.extend(extract_dpkg(image, docker_cmd=docker_cmd, verbose=verbose))
+    if "python" in types:
+        log.info("Extracting Python packages from %s ...", image)
+        packages.extend(extract_python(image, docker_cmd=docker_cmd, verbose=verbose))
+    return packages
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(
+        description="Generate attribution CSV files for container images",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  %(prog)s my-registry/dynamo:vllm-runtime -o vllm.csv
+  %(prog)s my-registry/dynamo:vllm-runtime --framework vllm --cuda-version 12.9 -o vllm.csv
+  %(prog)s my-registry/dynamo:vllm-runtime --base-image nvcr.io/nvidia/cuda:12.9.1-runtime-ubuntu24.04 -o vllm.csv
+  %(prog)s my-registry/dynamo:frontend --framework dynamo --target frontend -o frontend.csv
+        """,
+    )
+    parser.add_argument(
+        "image", help="Container image to scan (e.g., my-registry/dynamo:latest)"
+    )
+    parser.add_argument(
+        "--output",
+        "-o",
+        help="Output CSV file path (default: stdout)",
+    )
+    parser.add_argument(
+        "--base-image",
+        help="Base image for diff calculation (explicit, overrides --framework auto-resolve)",
+    )
+    parser.add_argument(
+        "--framework",
+        choices=["vllm", "sglang", "trtllm", "dynamo"],
+        help="Framework name for auto-resolving base image from context.yaml",
+    )
+    parser.add_argument(
+        "--target",
+        default="runtime",
+        choices=["runtime", "frontend"],
+        help="Build target for base image resolution (default: runtime)",
+    )
+    parser.add_argument(
+        "--cuda-version",
+        choices=["12.9", "13.0", "13.1"],
+        help="CUDA version for base image resolution",
+    )
+    parser.add_argument(
+        "--context-yaml",
+        default=str(_REPO_ROOT / "container" / "context.yaml"),
+        help="Path to context.yaml (default: container/context.yaml in repo root)",
+    )
+    parser.add_argument(
+        "--types",
+        default="dpkg,python",
+        help="Comma-separated extraction types (default: dpkg,python)",
+    )
+    parser.add_argument(
+        "--docker-cmd",
+        default="docker",
+        help="Docker command to use (default: docker)",
+    )
+    parser.add_argument(
+        "--verbose",
+        "-v",
+        action="store_true",
+        help="Enable verbose logging",
+    )
+    return parser.parse_args()
+def main() -> None:
+    args = parse_args()
+    logging.basicConfig(
+        level=logging.DEBUG if args.verbose else logging.INFO,
+        format="%(levelname)s: %(message)s",
+        stream=sys.stderr,
+    )
+    types = set(args.types.split(","))
+    invalid = types - VALID_TYPES
+    if invalid:
+        log.error("Invalid types: %s (valid: %s)", invalid, VALID_TYPES)
+        sys.exit(1)
+    # Resolve base image if needed
+    base_image = args.base_image
+    if not base_image and args.framework:
+        if args.target != "frontend" and not args.cuda_version:
+            log.error(
+                "--cuda-version is required when using --framework for runtime targets"
+            )
+            sys.exit(1)
+        base_image = resolve_base_image(
+            framework=args.framework,
+            target=args.target,
+            cuda_version=args.cuda_version or "",
+            context_yaml_path=Path(args.context_yaml),
+        )
+        log.info("Auto-resolved base image: %s", base_image)
+    # Extract from target image
+    target_packages = extract_all(args.image, types, args.docker_cmd, args.verbose)
+    log.info("Total packages extracted from target: %d", len(target_packages))
+    # Write full CSV
+    write_csv(target_packages, args.output)
+    # Compute and write diff if base image is available
+    if base_image:
+        log.info("Extracting packages from base image for diff: %s", base_image)
+        base_packages = extract_all(base_image, types, args.docker_cmd, args.verbose)
+        log.info("Total packages extracted from base: %d", len(base_packages))
+        diff_packages = compute_diff(target_packages, base_packages)
+        log.info("Diff: %d new/changed packages", len(diff_packages))
+        if args.output:
+            # Insert _diff before the file extension
+            output_path = Path(args.output)
+            diff_path = str(output_path.with_stem(output_path.stem + "_diff"))
+            write_csv(diff_packages, diff_path)
+        else:
+            # Write diff to stdout with a separator
+            print("\n# --- DIFF (new/changed packages vs base) ---", file=sys.stderr)
+            write_csv(diff_packages, None)
+if __name__ == "__main__":
+    main()