# syntax=docker/dockerfile:1.3-labs ARG FROM_IMAGE_NAME FROM ${FROM_IMAGE_NAME} as mcore_image ENV PIP_CONSTRAINT="" RUN pip3 install -U pip FROM mcore_image as build_te ARG TE_COMMIT=bee4649c15a79ffcb9689ca7c0c963f5febaa28a WORKDIR /opt COPY patches/nemo_2.3.0_te.patch . RUN \ git clone https://github.com/NVIDIA/TransformerEngine.git && \ cd TransformerEngine && \ git fetch origin ${TE_COMMIT} && \ git checkout ${TE_COMMIT} && \ patch -p1 < /opt/nemo_2.3.0_te.patch && \ git submodule init && git submodule update && \ rm /opt/nemo_2.3.0_te.patch && \ pip3 wheel --no-cache-dir -v . FROM mcore_image as build_causal_conv1d WORKDIR /opt RUN CAUSAL_CONV1D_FORCE_BUILD=TRUE pip3 wheel --no-cache-dir -v git+https://github.com/Dao-AILab/causal-conv1d.git@v1.2.2.post1 FROM mcore_image as build_grouped_gemm WORKDIR /opt RUN pip3 wheel --no-cache-dir -v git+https://github.com/fanshiqing/grouped_gemm@v1.1.2 FROM mcore_image as build_experimental_flash_attention WORKDIR /opt ARG EXPERIMENTAL_FLASH_ATTN_VERSION=c0f04c0b6c747914d95205867d86dd19c027d01d RUN --mount=type=secret,id=EXPERIMENTAL_FLASH_ATTN \ EXPERIMENTAL_FLASH_ATTN=$(cat /run/secrets/EXPERIMENTAL_FLASH_ATTN) && \ pip uninstall -y ninja && \ pip install --no-cache-dir ninja && \ MAX_JOBS=4 pip wheel --no-cache-dir -v $EXPERIMENTAL_FLASH_ATTN@${EXPERIMENTAL_FLASH_ATTN_VERSION} && \ ls -al FROM mcore_image as build_mamba_ssm WORKDIR /opt RUN git clone https://github.com/state-spaces/mamba.git && \ cd mamba && \ git checkout v2.2.0 && \ sed -i "/triton/d" setup.py && \ MAMBA_FORCE_BUILD=TRUE pip3 wheel --no-cache-dir -v . FROM mcore_image as main ENV DEBIAN_FRONTEND=noninteractive RUN apt-get update && \ apt-get install -y --no-install-recommends gettext python3-venv && \ apt-get clean && \ python -m venv /opt/jet && \ wget https://github.com/mikefarah/yq/releases/download/v4.44.1/yq_linux_amd64 -O /usr/local/bin/yq && \ chmod a+x /usr/local/bin/yq COPY --from=build_causal_conv1d /opt/causal_conv1d-*.whl ./ COPY --from=build_grouped_gemm /opt/grouped_gemm-*.whl ./ COPY --from=build_mamba_ssm /opt/mamba/mamba_ssm-*.whl ./ COPY --from=build_te /opt/TransformerEngine/transformer_engine-*.whl ./ RUN \ --mount=type=bind,source=requirements,target=requirements \ --mount=type=bind,source=pyproject.toml,target=pyproject.toml \ --mount=type=bind,source=setup.py,target=setup.py \ --mount=type=bind,source=megatron/core/package_info.py,target=megatron/core/package_info.py \ --mount=type=bind,source=megatron/core/README.md,target=megatron/core/README.md \ --mount=type=bind,source=megatron/core/requirements.txt,target=megatron/core/requirements.txt \ --mount=type=bind,source=requirements_mlm.txt,target=requirements_mlm.txt \ --mount=type=bind,source=requirements_ci.txt,target=requirements_ci.txt \ --mount=type=bind,source=megatron/core/__init__.py,target=megatron/core/__init__.py <<"EOF" bash -ex pip install -U pip pip install --no-cache-dir causal_conv1d-*.whl mamba_ssm-*.whl grouped_gemm-*.whl transformer_engine*.whl PY_ENV=pytorch_25.03 pip install --no-cache-dir . -r requirements_mlm.txt -r requirements_ci.txt EOF # Since megatron does not have any dependencies (and isn't a dependency to any other package), we can install it separately to make everything a bit quicker ARG MCORE_REPO ARG MCORE_REF ARG MCORE_BACKWARDS_REF RUN <<"EOF" bash -exu # Checkout latest cd /opt rm -rf /opt/megatron-lm; mkdir megatron-lm; cd megatron-lm git init git remote add origin ${MCORE_REPO} git fetch origin '+refs/merge-requests/*:refs/remotes/merge-requests/*' git fetch origin $MCORE_REF git checkout $MCORE_REF # Checkout backwards-ref cd /opt rm -rf /opt/megatron-lm-legacy; mkdir megatron-lm-legacy; cd megatron-lm-legacy git init git remote add origin ${MCORE_REPO} git fetch origin $MCORE_BACKWARDS_REF git checkout $MCORE_BACKWARDS_REF rm -rf megatron; cp -a /opt/megatron-lm/megatron ./ EOF RUN <<"EOF" bash -ex pip install -U pip PY_ENV=pytorch_25.03 pip install --no-cache-dir -e /opt/megatron-lm EOF ENV PYTHONPATH="/opt/megatron-lm:$PYTHONPATH" ##### For NVIDIANS only ##### FROM main as jet ARG CACHEBUST=0 # COPY --from=build_experimental_flash_attention /opt/*.whl ./experimental_flash_attention/ ARG EXPERIMENTAL_FLASH_ATTN_VERSION=c0f04c0b6c747914d95205867d86dd19c027d01d COPY --from=build_experimental_flash_attention /opt/*.whl ./experimental_flash_attention/ RUN --mount=type=secret,id=JET_INDEX_URLS \ --mount=type=secret,id=LOGGER_INDEX_URL \ --mount=type=secret,id=EXPERIMENTAL_FLASH_ATTN \ LOGGER_INDEX_URL=$(cat /run/secrets/LOGGER_INDEX_URL) && \ JET_INDEX_URLS=$(cat /run/secrets/JET_INDEX_URLS) && \ EXPERIMENTAL_FLASH_ATTN=$(cat /run/secrets/EXPERIMENTAL_FLASH_ATTN) && \ pip install --no-cache-dir "jet-client~=2.0" jet-api --upgrade $JET_INDEX_URLS && \ pip install --no-cache-dir "one-logger" --upgrade $LOGGER_INDEX_URL && \ pip install --no-cache-dir --no-build-isolation ./experimental_flash_attention/*flash_attn*.whl ENV PATH="$PATH:/opt/jet/bin" ###