"docs/vscode:/vscode.git/clone" did not exist on "8afcd0f6336b076f1e8328d664f03f41319a65b8"
context.yaml 3.58 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# This file represents the default ARG values of Dockerfiles generated
# by render.py. These are the recommended default values for users and
# is the source of truth for the values used in our delivered images.
#
# Some ARGs have multiple valid values and can be changed for local testing,
# you can do so locally in this file, or pass the --build-arg into docker build
# when building.

dynamo:
13
  cuda12.9:
14
    base_image: nvcr.io/nvidia/cuda-dl-base
15
16
    base_image_tag: 25.06-cuda12.9-devel-ubuntu24.04
  cuda13.0:
17
    base_image: nvcr.io/nvidia/cuda-dl-base
18
    base_image_tag: 25.11-cuda13.0-devel-ubuntu24.04
19
20
21
22
23
24
25
  epp_image: us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/epp:v0.5.1
  frontend_image: nvcr.io/nvidia/base/ubuntu:noble-20250619
  python_version: "3.12"

  nats_version: v2.10.28
  etcd_version: v3.5.21

26
  nixl_ref: 0.10.1
27
28
29
30
  nixl_ucx_ref: v1.20.0
  nixl_gdrcopy_ref: v2.5.1
  nixl_ucx_efa_ref: 9d2b88a1f67faf9876f267658bd077b379b8bb76
  nixl_libfabric_ref: v2.3.0
31
  enable_kvbm: "true"
32
  enable_media_ffmpeg: "false"
33
34
35
36
37
  enable_gpu_memory_service: "false"
  ffmpeg_version: "7.1"
  efa_version: 1.45.1

vllm:
38
  cuda12.9:
39
40
    base_image: nvcr.io/nvidia/cuda-dl-base
    runtime_image: nvcr.io/nvidia/cuda
41
    base_image_tag: 25.06-cuda12.9-devel-ubuntu24.04
42
    runtime_image_tag: 12.9.1-runtime-ubuntu24.04
43
    vllm_ref: v0.16.0
44
  cuda13.0:
45
46
    base_image: nvcr.io/nvidia/cuda-dl-base
    runtime_image: nvcr.io/nvidia/cuda
47
    base_image_tag: 25.11-cuda13.0-devel-ubuntu24.04
48
    runtime_image_tag: 13.0.2-runtime-ubuntu24.04
49
50
51
52
53
54
55
    vllm_ref: v0.16.0
  xpu:
    base_image: intel/deep-learning-essentials
    runtime_image: intel/deep-learning-essentials
    base_image_tag: 2025.3.2-0-devel-ubuntu24.04
    runtime_image_tag: 2025.3.2-0-devel-ubuntu24.04
    vllm_ref: v0.14.0
56
57
58
  flashinf_ref: v0.6.3
  lmcache_ref: 0.3.14
  vllm_omni_ref: "v0.16.0rc1"
59
  max_jobs: "10"
60
  enable_media_ffmpeg: "false"
61
62
  enable_gpu_memory_service: "true"
  enable_kvbm: "true"
63
64
  enable_modelexpress_p2p: "false"
  modelexpress_ref: "3d73992ce6c10e52ddc54f7f12af35d27e173f15"
65
66
67

sglang:
  cuda12.9:
68
69
    base_image: nvcr.io/nvidia/cuda-dl-base
    runtime_image: lmsysorg/sglang
70
    base_image_tag: 25.06-cuda12.9-devel-ubuntu24.04
71
    runtime_image_tag: v0.5.9-runtime
72
  cuda13.0:
73
74
    base_image: nvcr.io/nvidia/cuda-dl-base
    runtime_image: lmsysorg/sglang
75
    base_image_tag: 25.11-cuda13.0-devel-ubuntu24.04
76
    runtime_image_tag: v0.5.9-cu130-runtime
77
  enable_media_ffmpeg: "false"
78
79
80
81
  enable_gpu_memory_service: "true"
  enable_kvbm: "false"

trtllm:
82
  cuda13.1:
83
84
    base_image: nvcr.io/nvidia/pytorch
    runtime_image: nvcr.io/nvidia/cuda-dl-base
85
86
    base_image_tag: 25.12-py3
    runtime_image_tag: 25.12-cuda13.1-runtime-ubuntu24.04
87
  enable_media_ffmpeg: "false"
88
89
90
91
92
  enable_gpu_memory_service: "false"
  enable_kvbm: "true"
  python_version: "3.12"
  index_url: https://pypi.nvidia.com/
  pip_wheel_dir: /tmp/trtllm_wheel/
93
  pip_wheel: tensorrt-llm==1.3.0rc5.post1
94
95
  trtllm_wheel_image: nvcr.io/nvidia/tensorrt-llm/release:${TENSORRTLLM_PIP_WHEEL#*==}

96
  github_trtllm_commit: v1.3.0rc5.post1
97
98
99
100
101
102
103
104
105
106
107
108
  torch_version: 2.10.0a0+b4e4ee81d3.nv25.12
  torch_tensorrt_version: 2.10.0a0
  torchvision_version: 0.25.0a0+ca221243
  torchao_ver: 0.15.0+git01374eb5
  torchdata_ver: 0.11.0
  torchtitan_ver: 0.2.0
  jinja2_version: 3.1.6
  sympy_version: 1.14.0
  pytorch_triton_ver: 3.5.1+gitbfeb0668.nv25.12
  flash_attn_version: 2.7.4.post1+25.12
  flashinfer_python_ver: 0.6.1
  has_trtllm_context: "0"