Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
06ec4867
Unverified
Commit
06ec4867
authored
Mar 14, 2024
by
Thomas Parnell
Committed by
GitHub
Mar 14, 2024
Browse files
Install `flash_attn` in Docker image (#3396)
parent
8fe83865
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
24 additions
and
0 deletions
+24
-0
Dockerfile
Dockerfile
+24
-0
No files found.
Dockerfile
View file @
06ec4867
...
...
@@ -57,6 +57,22 @@ ENV VLLM_INSTALL_PUNICA_KERNELS=1
RUN
python3 setup.py build_ext
--inplace
#################### EXTENSION Build IMAGE ####################
#################### FLASH_ATTENTION Build IMAGE ####################
FROM
dev as flash-attn-builder
# max jobs used for build
ARG
max_jobs=2
ENV
MAX_JOBS=${max_jobs}
# flash attention version
ARG
flash_attn_version=v2.5.6
ENV
FLASH_ATTN_VERSION=${flash_attn_version}
WORKDIR
/usr/src/flash-attention-v2
# Download the wheel or build it if a pre-compiled release doesn't exist
RUN
pip
--verbose
wheel flash-attn
==
${
FLASH_ATTN_VERSION
}
\
--no-build-isolation
--no-deps
--no-cache-dir
#################### FLASH_ATTENTION Build IMAGE ####################
#################### TEST IMAGE ####################
# image to run unit testing suite
...
...
@@ -68,6 +84,9 @@ WORKDIR /vllm-workspace
# ADD is used to preserve directory structure
ADD
. /vllm-workspace/
COPY
--from=build /workspace/vllm/*.so /vllm-workspace/vllm/
# Install flash attention (from pre-built wheel)
RUN
--mount
=
type
=
bind
,from
=
flash-attn-builder,src
=
/usr/src/flash-attention-v2,target
=
/usr/src/flash-attention-v2
\
pip
install
/usr/src/flash-attention-v2/
*
.whl
--no-cache-dir
# ignore build dependencies installation because we are using pre-complied extensions
RUN
rm
pyproject.toml
RUN
--mount
=
type
=
cache,target
=
/root/.cache/pip
VLLM_USE_PRECOMPILED
=
1 pip
install
.
--verbose
...
...
@@ -88,6 +107,11 @@ WORKDIR /workspace
COPY
requirements.txt requirements.txt
RUN
--mount
=
type
=
cache,target
=
/root/.cache/pip
\
pip
install
-r
requirements.txt
# Install flash attention (from pre-built wheel)
RUN
--mount
=
type
=
bind
,from
=
flash-attn-builder,src
=
/usr/src/flash-attention-v2,target
=
/usr/src/flash-attention-v2
\
pip
install
/usr/src/flash-attention-v2/
*
.whl
--no-cache-dir
#################### RUNTIME BASE IMAGE ####################
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment