python_pkgs.py 2.12 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

"""Extract Python package information from a container image."""

import logging
import subprocess
from pathlib import Path

log = logging.getLogger(__name__)

_HELPER_SCRIPT_PATH = Path(__file__).resolve().parent / "helpers" / "python_helper.py"


def extract_python(
    image: str,
    docker_cmd: str = "docker",
    verbose: bool = False,
) -> list[dict[str, str]]:
    """Extract Python package attributions from a container image.

    Returns a list of dicts with keys: package_name, version, type, spdx_license
    """
    cmd = [
        docker_cmd,
        "run",
        "--rm",
        "--entrypoint",
        "python3",
        "-v",
        f"{_HELPER_SCRIPT_PATH}:/tmp/python_helper.py:ro",
        image,
        "/tmp/python_helper.py",
    ]
    if verbose:
        log.info("Running: %s", " ".join(cmd))

    result = subprocess.run(cmd, capture_output=True, text=True, timeout=300)

    if result.returncode != 0:
        # Exit 127 means python3 not found — no Python packages in this image
        if result.returncode == 127:
            log.warning(
                "python3 not found in %s, skipping Python package extraction", image
            )
            return []
        log.error(
            "Python extraction failed (exit %d): %s",
            result.returncode,
            result.stderr,
        )
        raise RuntimeError(f"Python extraction failed: {result.stderr}")

    packages = []
    for line in result.stdout.strip().splitlines():
        parts = line.split("\t", 2)
        if len(parts) != 3:
            if verbose:
                log.warning("Skipping malformed line: %r", line)
            continue
        pkg_name, version, spdx_license = parts
        packages.append(
            {
                "package_name": pkg_name,
                "version": version,
                "type": "python",
                "spdx_license": spdx_license,
            }
        )

    if verbose:
        log.info("Extracted %d Python packages", len(packages))

    return packages