Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
text-generation-inference
Commits
1883d8ec
Unverified
Commit
1883d8ec
authored
Apr 09, 2023
by
OlivierDehaene
Committed by
GitHub
Apr 09, 2023
Browse files
feat(docker): improve flash_attention caching (#160)
parent
3f2542bb
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
26 additions
and
21 deletions
+26
-21
Dockerfile
Dockerfile
+4
-2
server/Makefile
server/Makefile
+2
-19
server/Makefile-flash-att
server/Makefile-flash-att
+10
-0
server/Makefile-transformers
server/Makefile-transformers
+10
-0
No files found.
Dockerfile
View file @
1883d8ec
...
...
@@ -56,14 +56,16 @@ WORKDIR /usr/src
# Install torch
RUN
pip
install
torch
--extra-index-url
https://download.pytorch.org/whl/cu118
--no-cache-dir
COPY
server/Makefile server/Makefile
# Install specific version of flash attention
COPY
server/Makefile-flash-att server/Makefile
RUN
cd
server
&&
make install-flash-attention
# Install specific version of transformers
COPY
server/Makefile-transformers server/Makefile
RUN
cd
server
&&
BUILD_EXTENSIONS
=
"True"
make install-transformers
COPY
server/Makefile server/Makefile
# Install server
COPY
proto proto
COPY
server server
...
...
server/Makefile
View file @
1883d8ec
transformers_commit
:=
2b57aa18da658e7d2f42ef6bd5b56751af582fef
flash_att_commit
:=
4d87e4d875077ad9efd25030efa4ab0ba92c19e1
include
Makefile-transformers
include
Makefile-flash-att
gen-server
:
# Compile protos
...
...
@@ -10,23 +10,6 @@ gen-server:
find text_generation_server/pb/
-type
f
-name
"*.py"
-print0
-exec
sed
-i
-e
's/^\(import.*pb2\)/from . \1/g'
{}
\;
touch
text_generation_server/pb/__init__.py
install-transformers
:
# Install specific version of transformers with custom cuda kernels
pip uninstall transformers
-y
||
true
rm
-rf
transformers
||
true
git clone https://github.com/OlivierDehaene/transformers.git
cd
transformers
&&
git checkout
$(transformers_commit)
cd
transformers
&&
python setup.py
install
install-flash-attention
:
# Install specific version of flash attention
pip
install
packaging
pip uninstall flash_attn rotary_emb dropout_layer_norm
-y
||
true
rm
-rf
flash-attention
||
true
git clone https://github.com/HazyResearch/flash-attention.git
cd
flash-attention
&&
git checkout
$(flash_att_commit)
cd
flash-attention
&&
python setup.py
install
&&
cd
csrc/layer_norm
&&
python setup.py
install
&&
cd
../rotary
&&
python setup.py
install
install-torch
:
# Install specific version of torch
pip
install
torch
--extra-index-url
https://download.pytorch.org/whl/cu118
--no-cache-dir
...
...
server/Makefile-flash-att
0 → 100644
View file @
1883d8ec
flash_att_commit := d478eeec8f16c7939c54e4617dbd36f59b8eeed7
install-flash-attention:
# Install specific version of flash attention
pip install packaging
pip uninstall flash_attn rotary_emb dropout_layer_norm -y || true
rm -rf flash-attention || true
git clone https://github.com/HazyResearch/flash-attention.git
cd flash-attention && git checkout $(flash_att_commit)
cd flash-attention && python setup.py install && cd csrc/layer_norm && python setup.py install && cd ../rotary && python setup.py install
\ No newline at end of file
server/Makefile-transformers
0 → 100644
View file @
1883d8ec
transformers_commit := b8d969ff47c6a9d40538a6ea33df021953363afc
install-transformers:
# Install specific version of transformers with custom cuda kernels
pip install --upgrade setuptools
pip uninstall transformers -y || true
rm -rf transformers || true
git clone https://github.com/OlivierDehaene/transformers.git
cd transformers && git checkout $(transformers_commit)
cd transformers && python setup.py install
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment