context.yaml

# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# This file represents the default ARG values of Dockerfiles generated
# by render.py. These are the recommended default values for users and
# is the source of truth for the values used in our delivered images.
#
# Some ARGs have multiple valid values and can be changed for local testing,
# you can do so locally in this file, or pass the --build-arg into docker build
# when building.

dynamo:
  base_image: nvcr.io/nvidia/cuda-dl-base
  cuda12.9:
    base_image_tag: 25.06-cuda12.9-devel-ubuntu24.04
  cuda13.0:
    base_image_tag: 25.11-cuda13.0-devel-ubuntu24.04
  epp_image: us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/epp:v0.5.1
  frontend_image: nvcr.io/nvidia/base/ubuntu:noble-20250619
  python_version: "3.12"

  nats_version: v2.10.28
  etcd_version: v3.5.21

  nixl_ref: 0.9.0
  nixl_ucx_ref: v1.20.0
  nixl_gdrcopy_ref: v2.5.1
  nixl_ucx_efa_ref: 9d2b88a1f67faf9876f267658bd077b379b8bb76
  nixl_libfabric_ref: v2.3.0
  enable_kvbm: "false"
  enable_media_ffmpeg: "false"
  enable_gpu_memory_service: "false"
  ffmpeg_version: "7.1"
  efa_version: 1.45.1

vllm:
  base_image: nvcr.io/nvidia/cuda-dl-base
  runtime_image: nvcr.io/nvidia/cuda
  cuda12.9:
    base_image_tag: 25.06-cuda12.9-devel-ubuntu24.04
    runtime_image_tag: 12.9.1-runtime-ubuntu24.04
  cuda13.0:
    base_image_tag: 25.11-cuda13.0-devel-ubuntu24.04
    runtime_image_tag: 13.0.2-runtime-ubuntu24.04
  vllm_ref: v0.15.1
  flashinf_ref: v0.6.1
  lmcache_ref: 0.3.13
  max_jobs: "10"
  enable_media_ffmpeg: "true"
  enable_gpu_memory_service: "true"
  enable_kvbm: "true"
  enable_modelexpress_p2p: "false"
  modelexpress_ref: "3d73992ce6c10e52ddc54f7f12af35d27e173f15"

sglang:
  base_image: nvcr.io/nvidia/cuda-dl-base
  runtime_image: lmsysorg/sglang
  cuda12.9:
    base_image_tag: 25.06-cuda12.9-devel-ubuntu24.04
    runtime_image_tag: v0.5.7-runtime
  cuda13.0:
    base_image_tag: 25.11-cuda13.0-devel-ubuntu24.04
    runtime_image_tag: v0.5.8-cu130-runtime
  enable_media_ffmpeg: "true"
  enable_gpu_memory_service: "true"
  enable_kvbm: "false"

trtllm:
  base_image: nvcr.io/nvidia/pytorch
  runtime_image: nvcr.io/nvidia/cuda-dl-base
  cuda13.1:
    base_image_tag: 25.12-py3
    runtime_image_tag: 25.12-cuda13.1-runtime-ubuntu24.04
  enable_media_ffmpeg: "true"
  enable_gpu_memory_service: "false"
  enable_kvbm: "true"
  python_version: "3.12"
  index_url: https://pypi.nvidia.com/
  pip_wheel_dir: /tmp/trtllm_wheel/
  pip_wheel: tensorrt-llm==1.3.0rc1
  trtllm_wheel_image: nvcr.io/nvidia/tensorrt-llm/release:${TENSORRTLLM_PIP_WHEEL#*==}

  github_trtllm_commit: 1.2.0rc6
  torch_version: 2.10.0a0+b4e4ee81d3.nv25.12
  torch_tensorrt_version: 2.10.0a0
  torchvision_version: 0.25.0a0+ca221243
  torchao_ver: 0.15.0+git01374eb5
  torchdata_ver: 0.11.0
  torchtitan_ver: 0.2.0
  jinja2_version: 3.1.6
  sympy_version: 1.14.0
  pytorch_triton_ver: 3.5.1+gitbfeb0668.nv25.12
  flash_attn_version: 2.7.4.post1+25.12
  flashinfer_python_ver: 0.6.1
  has_trtllm_context: "0"