#!/usr/bin/env python3 # SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 """Generate a dependency CSV (and optional attribution markdown) for Rust crates. Extracts external Rust dependencies from one or more Cargo workspaces using ``cargo metadata`` and writes a CSV with dependency type, package name, version, SPDX license identifier, and repository URL. Optionally generates an attribution markdown file with full license texts. Usage: python generate_rust_deps.py -o rust_deps.csv python generate_rust_deps.py --attributions ATTRIBUTIONS-Rust.md python generate_rust_deps.py -o rust_deps.csv --attributions attr.md -v """ import argparse import csv import json import logging import re import subprocess import sys from pathlib import Path _SCRIPT_DIR = Path(__file__).resolve().parent _REPO_ROOT = _SCRIPT_DIR.parent.parent log = logging.getLogger(__name__) FIELDNAMES = ["dependency_type", "package_name", "version", "spdx_license", "repo_url"] DEFAULT_MANIFEST_PATHS = [ "Cargo.toml", "lib/bindings/python/Cargo.toml", "lib/bindings/kvbm/Cargo.toml", ] _LICENSE_NAMES = [ "LICENSE", "LICENSE.md", "LICENSE.txt", "LICENSE-MIT", "LICENSE-APACHE", "LICENCE", "LICENCE.md", "LICENCE.txt", "COPYING", "COPYING.md", ] _ATTRIBUTION_PREAMBLE = """\ # Third-Party Software Attributions This project uses the following third-party libraries. Each library is \ open-source and licensed under the terms indicated below. This file is automatically generated. Please do not edit it directly. """ def get_cargo_metadata(manifest_path: Path, cargo_cmd: str) -> dict: """Run ``cargo metadata`` and return the parsed JSON.""" cmd = [ cargo_cmd, "metadata", "--format-version", "1", "--locked", "--manifest-path", str(manifest_path), ] log.debug("Running: %s", " ".join(cmd)) result = subprocess.run(cmd, capture_output=True, text=True, timeout=300) if result.returncode != 0: log.error( "cargo metadata failed (exit %d) for %s: %s", result.returncode, manifest_path, result.stderr, ) raise RuntimeError( f"cargo metadata failed for {manifest_path}: {result.stderr}" ) return json.loads(result.stdout) def extract_external_deps(metadata: dict) -> list[dict[str, str]]: """Extract external (non-workspace, non-local) dependencies from cargo metadata.""" workspace_member_ids = set(metadata.get("workspace_members", [])) packages = [] for pkg in metadata.get("packages", []): # Skip workspace members (local crates) if pkg["id"] in workspace_member_ids: continue # Skip path-only dependencies (local crates outside the workspace) if pkg.get("source") is None: continue repo_url = pkg.get("repository") or "" if not repo_url: repo_url = f"https://crates.io/crates/{pkg['name']}" packages.append( { "dependency_type": "rust", "package_name": pkg["name"], "version": pkg["version"], "spdx_license": pkg.get("license") or "UNKNOWN", "repo_url": repo_url, "manifest_path": pkg.get("manifest_path", ""), } ) return packages def deduplicate(entries: list[dict[str, str]]) -> list[dict[str, str]]: """Deduplicate entries by (package_name, version).""" seen: dict[tuple[str, str], dict[str, str]] = {} for entry in entries: key = (entry["package_name"], entry["version"]) if key not in seen: seen[key] = entry return list(seen.values()) def _find_license_text(directory: Path) -> str: """Find and read a license file in *directory*.""" for name in _LICENSE_NAMES: path = directory / name if path.is_file(): return path.read_text(errors="replace").rstrip() return "" def _raw_license_url(repo_url: str, license_filename: str = "LICENSE") -> str: """Convert a GitHub/GitLab repo URL to a raw license URL for display.""" m = re.match(r"https://github\.com/([^/]+/[^/]+)", repo_url) if m: return f"https://raw.githubusercontent.com/{m.group(1)}/HEAD/{license_filename}" return repo_url def write_attributions_md(packages: list[dict[str, str]], output_path: str) -> None: """Write a markdown attribution file following the ATTRIBUTIONS-Rust.md style.""" sorted_packages = sorted( packages, key=lambda p: (p["package_name"].lower(), p["version"]) ) lines = [_ATTRIBUTION_PREAMBLE] found = 0 for pkg in sorted_packages: manifest = pkg.get("manifest_path", "") license_text = "" license_filename = "LICENSE" if manifest: pkg_dir = Path(manifest).parent for name in _LICENSE_NAMES: path = pkg_dir / name if path.is_file(): license_text = path.read_text(errors="replace").rstrip() license_filename = name break raw_url = _raw_license_url(pkg["repo_url"], license_filename) lines.append(f"\n## {pkg['package_name']} - {pkg['version']}") lines.append(f"**Repository URL**: {pkg['repo_url']}") lines.append(f"**License Type(s)**: {pkg['spdx_license']}") lines.append(f"### License: {raw_url}") if license_text: lines.append(f"```\n{license_text}\n```") found += 1 else: lines.append("```\nLicense text not available locally.\n```") with open(output_path, "w") as f: f.write("\n".join(lines) + "\n") log.info( "Wrote attribution markdown to %s (%d/%d with license text)", output_path, found, len(sorted_packages), ) def write_csv(packages: list[dict[str, str]], output_path: str | None) -> None: """Write packages to CSV, sorted by package_name.""" sorted_packages = sorted(packages, key=lambda p: p["package_name"]) if output_path: with open(output_path, "w", newline="") as f: writer = csv.DictWriter(f, fieldnames=FIELDNAMES, extrasaction="ignore") writer.writeheader() writer.writerows(sorted_packages) log.info("Wrote %d entries to %s", len(sorted_packages), output_path) else: writer = csv.DictWriter( sys.stdout, fieldnames=FIELDNAMES, extrasaction="ignore" ) writer.writeheader() writer.writerows(sorted_packages) def parse_args() -> argparse.Namespace: parser = argparse.ArgumentParser( description="Generate a dependency CSV for Rust crates in the workspace", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" Examples: %(prog)s -o rust_deps.csv %(prog)s --manifest-paths Cargo.toml -o rust_deps.csv %(prog)s -v """, ) parser.add_argument( "--output", "-o", help="Output CSV file path (default: stdout)", ) parser.add_argument( "--attributions", help="Output attribution markdown file path (e.g. ATTRIBUTIONS-Rust.md)", ) parser.add_argument( "--manifest-paths", default=",".join(DEFAULT_MANIFEST_PATHS), help=( "Comma-separated Cargo.toml paths relative to repo root " f"(default: {','.join(DEFAULT_MANIFEST_PATHS)})" ), ) parser.add_argument( "--cargo-cmd", default="cargo", help="Path to cargo binary (default: cargo)", ) parser.add_argument( "--verbose", "-v", action="store_true", help="Enable verbose logging", ) return parser.parse_args() def main() -> None: args = parse_args() logging.basicConfig( level=logging.DEBUG if args.verbose else logging.INFO, format="%(levelname)s: %(message)s", stream=sys.stderr, ) manifest_paths = [p.strip() for p in args.manifest_paths.split(",") if p.strip()] all_entries: list[dict[str, str]] = [] for rel_path in manifest_paths: abs_path = _REPO_ROOT / rel_path if not abs_path.is_file(): log.warning("Manifest not found, skipping: %s", abs_path) continue log.info("Scanning %s ...", rel_path) try: metadata = get_cargo_metadata(abs_path, args.cargo_cmd) except (RuntimeError, FileNotFoundError) as exc: log.error("Failed to get metadata for %s: %s", rel_path, exc) sys.exit(1) entries = extract_external_deps(metadata) log.info("Found %d external deps in %s", len(entries), rel_path) all_entries.extend(entries) deduplicated = deduplicate(all_entries) log.info("Total unique Rust dependencies: %d", len(deduplicated)) if args.output or not args.attributions: write_csv(deduplicated, args.output) if args.attributions: write_attributions_md(deduplicated, args.attributions) if __name__ == "__main__": main()