"vllm/vscode:/vscode.git/clone" did not exist on "089cd4f002484599aeed366c31629dccf491ce81"
args.Dockerfile 5.29 KB
Newer Older
1
2
3
4
{#
# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#}
5
# === BEGIN templates/args.Dockerfile ===
6
7
8
##########################
#### Build Arguments #####
##########################
9
10
11
# TARGETARCH is set automatically by Docker BuildKit for every --platform build.
# It must NOT be declared in the global scope (before any FROM) — doing so shadows
# the automatic per-platform value that BuildKit injects.
12
#
13
# In each stage that needs it, re-declare with:  ARG TARGETARCH
14
#
15
16
# ARCH_ALT (x86_64 / aarch64) is computed inline in RUN steps:
#   ARCH_ALT=$([ "${TARGETARCH}" = "amd64" ] && echo "x86_64" || echo "aarch64")
17
18
19
20
21
22
ARG DEVICE={{ device }}
{% if device == "cuda" -%}
{% set device_key = device + cuda_version -%}
{% else -%}
{% set device_key = device -%}
{% endif %}
23
24
25

# Python/CUDA configuration
ARG PYTHON_VERSION={{ context.dynamo.python_version }}
26
{% if device == "cuda" -%}
27
28
ARG CUDA_VERSION={{ cuda_version }}
ARG CUDA_MAJOR=${CUDA_VERSION%%.*}
29
{% endif %}
30

31
# Base and runtime images configuration
32
33
ARG BASE_IMAGE={{ context[framework][device_key].base_image }}
ARG BASE_IMAGE_TAG={{ context[framework][device_key].base_image_tag }}
34
{% if framework in ["sglang", "trtllm", "vllm"] -%}
35
36
ARG RUNTIME_IMAGE={{ context[framework][device_key].runtime_image }}
ARG RUNTIME_IMAGE_TAG={{ context[framework][device_key].runtime_image_tag }}
37
38
{%- endif %}

39
# wheel builder image selection
40
{% if device == "xpu" or device == "cpu" %}
41
ARG WHEEL_BUILDER_IMAGE=${BASE_IMAGE}:${BASE_IMAGE_TAG}
42
43
44
{% elif platform == "multi" %}
{# Multi-arch: manylinux selection is handled via --platform-pinned stage aliases   #}
{# in wheel_builder.Dockerfile using TARGETARCH. No static ARG needed here.         #}
45
{% else %}
46
ARG WHEEL_BUILDER_IMAGE=quay.io/pypa/manylinux_2_28_{{ "x86_64" if platform == "amd64" else "aarch64" }}
47
48
{% endif %}

49
50
51
52
53
54
55
56
57
# Build configuration
ARG ENABLE_KVBM={{ context[framework].enable_kvbm }}
ARG CARGO_BUILD_JOBS

ARG NATS_VERSION={{ context.dynamo.nats_version }}
ARG ETCD_VERSION={{ context.dynamo.etcd_version }}

ARG ENABLE_MEDIA_FFMPEG={{ context[framework].enable_media_ffmpeg }}
ARG FFMPEG_VERSION={{ context.dynamo.ffmpeg_version }}
58
{% if device == "cuda" -%}
59
ARG ENABLE_GPU_MEMORY_SERVICE={{ context[framework].enable_gpu_memory_service }}
60
{% endif %}
61
62
63
64
65
66
67
68

# SCCACHE configuration
ARG USE_SCCACHE
ARG SCCACHE_BUCKET=""
ARG SCCACHE_REGION=""

# NIXL configuration
ARG NIXL_UCX_REF={{ context.dynamo.nixl_ucx_ref }}
69
ARG NIXL_REF={{ context[framework].nixl_ref }}
70
{% if device == "cuda" %}
71
72
ARG NIXL_GDRCOPY_REF={{ context.dynamo.nixl_gdrcopy_ref }}
ARG NIXL_LIBFABRIC_REF={{ context.dynamo.nixl_libfabric_ref }}
73
{% endif %}
74
75
76
77
78
79
80
81
82
83

{% if target == "dev" or target == "local-dev" %}
ARG FRAMEWORK={{ framework }}
{% endif %}

{% if target == "frontend" %}
ARG EPP_IMAGE={{ context.dynamo.epp_image }}
ARG FRONTEND_IMAGE={{ context.dynamo.frontend_image }}
{% endif %}

84
85
86
87
88
89
90
{% if target == "planner" %}
ARG PLANNER_BUILD_IMAGE={{ context.dynamo.planner_build_image }}
ARG PLANNER_BUILD_IMAGE_TAG={{ context.dynamo.planner_build_image_tag }}
ARG PLANNER_RUNTIME_IMAGE={{ context.dynamo.planner_runtime_image }}
ARG PLANNER_RUNTIME_IMAGE_TAG={{ context.dynamo.planner_runtime_image_tag }}
{% endif %}

91
92
{% if framework == "vllm" -%}
# Make sure to update the dependency version in pyproject.toml when updating this
93
ARG VLLM_REF={{ context[framework][device_key].vllm_ref }}
94
95
ARG MAX_JOBS={{ context.vllm.max_jobs }}
# FlashInfer only respected when building vLLM from source, ie when VLLM_REF does not start with 'v' or for arm64 builds
96
{% if device == "cuda" -%}
97
ARG FLASHINF_REF={{ context.vllm.flashinf_ref }}
98
{% endif %}
99
ARG LMCACHE_REF={{ context.vllm.lmcache_ref }}
100
ARG VLLM_OMNI_REF={{ context.vllm.vllm_omni_ref }}
101

102
{% if device == "cuda" -%}
103
104
# If left blank, then we will fallback to vLLM defaults
ARG DEEPGEMM_REF=""
105
106
107
108

# ModelExpress for P2P weight transfer (optional)
ARG ENABLE_MODELEXPRESS_P2P={{ context.vllm.enable_modelexpress_p2p }}
ARG MODELEXPRESS_REF={{ context.vllm.modelexpress_ref }}
109
{% endif %}
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
{%- endif -%}

{% if framework == "trtllm" %}
# TensorRT-LLM specific configuration
ARG HAS_TRTLLM_CONTEXT={{ context.trtllm.has_trtllm_context }}
ARG TENSORRTLLM_PIP_WHEEL={{ context.trtllm.pip_wheel }}
ARG TENSORRTLLM_INDEX_URL={{ context.trtllm.index_url }}
ARG GITHUB_TRTLLM_COMMIT={{ context.trtllm.github_trtllm_commit }}
ARG TRTLLM_WHEEL_IMAGE={{ context.trtllm.trtllm_wheel_image }}

# Copy pytorch installation from NGC PyTorch
ARG FLASHINFER_PYTHON_VER={{ context.trtllm.flashinfer_python_ver }}
ARG PYTORCH_TRITON_VER={{ context.trtllm.pytorch_triton_ver }}
ARG TORCHAO_VER={{ context.trtllm.torchao_ver }}
ARG TORCHDATA_VER={{ context.trtllm.torchdata_ver }}
ARG TORCHTITAN_VER={{ context.trtllm.torchtitan_ver }}
ARG TORCH_VER={{ context.trtllm.torch_version }}
ARG TORCH_TENSORRT_VER={{ context.trtllm.torch_tensorrt_version }}
ARG TORCHVISION_VER={{ context.trtllm.torchvision_version }}
ARG JINJA2_VER={{ context.trtllm.jinja2_version }}
ARG SYMPY_VER={{ context.trtllm.sympy_version }}
ARG FLASH_ATTN_VER={{ context.trtllm.flash_attn_version }}

# Python configuration
ARG TRTLLM_PYTHON_VERSION={{ context[framework].python_version }}
{%- endif -%}

{% if make_efa == true %}
ARG EFA_VERSION={{ context.dynamo.efa_version }}
ARG EFA_BASE_IMAGE={{ "runtime" if target=="runtime" else "dev" }}
140
{%- endif -%}