Dockerfile.aws 3.23 KB
Newer Older
Joe Chandler's avatar
Joe Chandler committed
1
# syntax=docker/dockerfile:1.10.0
2
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
Joe Chandler's avatar
Joe Chandler committed
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
# SPDX-License-Identifier: Apache-2.0
#
# PURPOSE: AWS EFA support layer
#
# This Dockerfile adds AWS EFA (Elastic Fabric Adapter) support on top of
# the runtime or dev stages from framework Dockerfiles (vllm, sglang, trtllm).
#
# Usage (via build.sh with --make-efa flag):
#   ./build.sh --framework vllm --target runtime --make-efa
#   ./build.sh --framework vllm --target local-dev --make-efa

ARG BASE_IMAGE
ARG EFA_VERSION

###########################################################
########## Runtime with AWS EFA ##########################
###########################################################
#
# This stage extends the runtime stage with AWS EFA installer
# which includes: libfabric and aws-ofi-nccl plugin
#
# Use this stage when deploying on AWS infrastructure with EFA support

FROM ${BASE_IMAGE} AS runtime-aws

ARG EFA_VERSION

USER root

# Install AWS EFA installer with bundled libfabric and aws-ofi-nccl
# Flags explanation:
#   --skip-kmod: Skip kernel module installation (handled by host)
#   --skip-limit-conf: Skip ulimit configuration (handled by container runtime)
#   --no-verify: Skip GPG verification (optional, can be removed if verification is needed)
RUN mkdir -p /tmp/efa && \
    cd /tmp/efa && \
    curl --retry 3 --retry-delay 2 -fsSL -o aws-efa-installer-${EFA_VERSION}.tar.gz \
        https://efa-installer.amazonaws.com/aws-efa-installer-${EFA_VERSION}.tar.gz && \
    tar -xf aws-efa-installer-${EFA_VERSION}.tar.gz && \
    cd aws-efa-installer && \
    apt-get update && \
    ./efa_installer.sh -y --skip-kmod --skip-limit-conf --no-verify && \
    rm -rf /tmp/efa && \
46
    rm -rf /opt/amazon/aws-ofi-nccl && \
Joe Chandler's avatar
Joe Chandler committed
47
48
    ldconfig

49
50
ENV EFA_VERSION="${EFA_VERSION}"

Joe Chandler's avatar
Joe Chandler committed
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
USER dynamo

ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
CMD []

########################################################################
########## Development with AWS EFA (run.sh, runs as root user) ########
########################################################################
#
# PURPOSE: Development environment with AWS EFA support
#
# This stage extends dev stages with development tools for building and
# debugging on EFA-enabled AWS instances.

FROM ${BASE_IMAGE} AS dev-aws

ARG EFA_VERSION

# Dev stage runs as root, no USER switch needed
# Install AWS EFA installer with bundled libfabric and aws-ofi-nccl
# Flags explanation:
#   --skip-kmod: Skip kernel module installation (handled by host)
#   --skip-limit-conf: Skip ulimit configuration (handled by container runtime)
#   --no-verify: Skip GPG verification (optional, can be removed if verification is needed)
RUN mkdir -p /tmp/efa && \
    cd /tmp/efa && \
    curl --retry 3 --retry-delay 2 -fsSL -o aws-efa-installer-${EFA_VERSION}.tar.gz \
        https://efa-installer.amazonaws.com/aws-efa-installer-${EFA_VERSION}.tar.gz && \
    tar -xf aws-efa-installer-${EFA_VERSION}.tar.gz && \
    cd aws-efa-installer && \
    apt-get update && \
    ./efa_installer.sh -y --skip-kmod --skip-limit-conf --no-verify && \
    rm -rf /tmp/efa && \
84
    rm -rf /opt/amazon/aws-ofi-nccl && \
Joe Chandler's avatar
Joe Chandler committed
85
86
    ldconfig

87
88
ENV EFA_VERSION="${EFA_VERSION}"

Joe Chandler's avatar
Joe Chandler committed
89
90
ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
CMD []