dpkg_helper.py 5.7 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

# This script runs INSIDE the container (local mode) or against a mounted
# filesystem root (--root /target mode for BuildKit extraction).
# It must be fully self-contained with zero external dependencies (only Python stdlib).

import argparse
import os
import subprocess
import sys

# Conservative DEP-5 license field -> SPDX mapping
_DEP5_MAP = {
    "Apache-2.0": "Apache-2.0",
    "Apache-2": "Apache-2.0",
    "Artistic-2.0": "Artistic-2.0",
    "BSD-2-clause": "BSD-2-Clause",
    "BSD-3-clause": "BSD-3-Clause",
    "BSL-1.0": "BSL-1.0",
    "CC0-1.0": "CC0-1.0",
    "Expat": "MIT",
    "GPL-2": "GPL-2.0-only",
    "GPL-2+": "GPL-2.0-or-later",
    "GPL-2.0": "GPL-2.0-only",
    "GPL-2.0+": "GPL-2.0-or-later",
    "GPL-3": "GPL-3.0-only",
    "GPL-3+": "GPL-3.0-or-later",
    "GPL-3.0": "GPL-3.0-only",
    "GPL-3.0+": "GPL-3.0-or-later",
    "ISC": "ISC",
    "LGPL-2": "LGPL-2.0-only",
    "LGPL-2+": "LGPL-2.0-or-later",
    "LGPL-2.0": "LGPL-2.0-only",
    "LGPL-2.0+": "LGPL-2.0-or-later",
    "LGPL-2.1": "LGPL-2.1-only",
    "LGPL-2.1+": "LGPL-2.1-or-later",
    "LGPL-3": "LGPL-3.0-only",
    "LGPL-3+": "LGPL-3.0-or-later",
    "LGPL-3.0": "LGPL-3.0-only",
    "LGPL-3.0+": "LGPL-3.0-or-later",
    "MIT": "MIT",
    "MPL-2.0": "MPL-2.0",
    "PSF-2": "PSF-2.0",
    "public-domain": "CC0-1.0",
    "Zlib": "Zlib",
    "OpenSSL": "OpenSSL",
    "WTFPL": "WTFPL",
}

_DEP5_MAP_LOWER = {k.lower(): v for k, v in _DEP5_MAP.items()}


def is_dep5(content):
    for line in content.splitlines():
        s = line.strip()
        if not s or s.startswith("#"):
            continue
        return s.startswith("Format:")
    return False


def extract_dep5_license(content):
    """Extract the primary license from a DEP-5 copyright file."""
    licenses = set()
    for line in content.splitlines():
        s = line.strip()
        if s.startswith("License:"):
            val = s[len("License:") :].strip()
            if val:
                mapped = _DEP5_MAP.get(val) or _DEP5_MAP_LOWER.get(val.lower())
                if mapped:
                    licenses.add(mapped)
    if len(licenses) == 1:
        return licenses.pop()
    elif len(licenses) > 1:
        return " AND ".join(sorted(licenses))
    return "UNKNOWN"


def get_license_for_package(pkg_name, root="/"):
    """Read <root>/usr/share/doc/<pkg>/copyright and extract license info."""
    root = root.rstrip("/")
    copyright_path = f"{root}/usr/share/doc/{pkg_name}/copyright"
    if not os.path.isfile(copyright_path):
        return "UNKNOWN"
    try:
        with open(copyright_path, "r", errors="replace") as f:
            content = f.read()
    except (OSError, IOError):
        return "UNKNOWN"

    if not content.strip():
        return "UNKNOWN"

    if is_dep5(content):
        return extract_dep5_license(content)

    return "UNKNOWN"


def parse_dpkg_status(status_path):
    """Parse a dpkg status file and return {pkg: version} for installed packages."""
    packages = {}
    current = {}
    try:
        with open(status_path, "r", errors="replace") as f:
            for line in f:
                line = line.rstrip("\n")
                if not line:
                    # End of stanza — record if installed
                    if current.get("Package") and "installed" in current.get(
                        "Status", ""
                    ):
                        packages[current["Package"]] = current.get("Version", "UNKNOWN")
                    current = {}
                elif line.startswith((" ", "\t")):
                    # Continuation line — ignore
                    pass
                elif ":" in line:
                    key, _, val = line.partition(":")
                    current[key.strip()] = val.strip()
    except (OSError, IOError):
        print(f"ERROR: Cannot read dpkg status file: {status_path}", file=sys.stderr)
        sys.exit(1)
    # Handle last stanza if file has no trailing blank line
    if current.get("Package") and "installed" in current.get("Status", ""):
        packages[current["Package"]] = current.get("Version", "UNKNOWN")
    return packages


def main():
    parser = argparse.ArgumentParser(
        description="Extract dpkg package info (stdlib only)"
    )
    parser.add_argument(
        "--root",
        default="/",
        help="Filesystem root to inspect (default: /, i.e. running system)",
    )
    args = parser.parse_args()
    root = args.root.rstrip("/") or "/"

    count = 0
    if root != "/":
        # BuildKit mode: parse dpkg status file from mounted target filesystem
        status_path = f"{root}/var/lib/dpkg/status"
        pkgs = parse_dpkg_status(status_path)
        for pkg, version in pkgs.items():
            license_id = get_license_for_package(pkg, root)
            print(f"{pkg}\t{version}\t{license_id}")
            count += 1
    else:
        # Local mode: run dpkg-query inside the container
        result = subprocess.run(
            ["dpkg-query", "-W", "-f=${Package}\t${Version}\n"],
            capture_output=True,
            text=True,
        )
        if result.returncode != 0:
            print(f"ERROR: dpkg-query failed: {result.stderr}", file=sys.stderr)
            sys.exit(1)

        for line in result.stdout.strip().splitlines():
            parts = line.split("\t", 1)
            if len(parts) != 2:
                continue
            pkg, version = parts
            license_id = get_license_for_package(pkg)
            print(f"{pkg}\t{version}\t{license_id}")
            count += 1

    icon = "✅" if count > 0 else "⚠️"
    print(f"{icon} [dpkg] extracted {count} package(s)", file=sys.stderr)


if __name__ == "__main__":
    main()