Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
text-generation-inference
Commits
35509ff5
Unverified
Commit
35509ff5
authored
Nov 23, 2023
by
OlivierDehaene
Committed by
GitHub
Nov 23, 2023
Browse files
chore: update to torch 2.1.0 (#1182)
Close #1142
parent
e12c34bd
Changes
6
Show whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
1133 additions
and
933 deletions
+1133
-933
Dockerfile
Dockerfile
+19
-20
integration-tests/conftest.py
integration-tests/conftest.py
+1
-0
server/Makefile
server/Makefile
+3
-7
server/poetry.lock
server/poetry.lock
+1084
-854
server/pyproject.toml
server/pyproject.toml
+5
-3
server/requirements.txt
server/requirements.txt
+21
-49
No files found.
Dockerfile
View file @
35509ff5
...
...
@@ -37,13 +37,13 @@ RUN cargo build --release
# Python builder
# Adapted from: https://github.com/pytorch/pytorch/blob/master/Dockerfile
FROM
debian:bullseye-slim
as pytorch-install
FROM
nvidia/cuda:12.1.0-devel-ubuntu20.04
as pytorch-install
ARG
PYTORCH_VERSION=2.
0
.1
ARG
PYTHON_VERSION=3.
9
ARG
PYTORCH_VERSION=2.
1
.1
ARG
PYTHON_VERSION=3.
10
# Keep in sync with `server/pyproject.toml
ARG
CUDA_VERSION=1
1.8
ARG
MAMBA_VERSION=23.
1.0
-1
ARG
CUDA_VERSION=1
2.1
ARG
MAMBA_VERSION=23.
3.1
-1
ARG
CUDA_CHANNEL=nvidia
ARG
INSTALL_CHANNEL=pytorch
# Automatically set by buildx
...
...
@@ -75,20 +75,19 @@ RUN chmod +x ~/mambaforge.sh && \
RUN case
${
TARGETPLATFORM
}
in
\
"linux/arm64"
)
exit
1
;;
\
*
)
/opt/conda/bin/conda update
-y
conda
&&
\
/opt/conda/bin/conda
install
-c
"
${
INSTALL_CHANNEL
}
"
-c
"
${
CUDA_CHANNEL
}
"
-y
"python=
${
PYTHON_VERSION
}
"
pytorch
=
=
$PYTORCH_VERSION
"pytorch-cuda=
$(
echo
$CUDA_VERSION
|
cut
-d
'.'
-f
1-2
)
"
;;
\
/opt/conda/bin/conda
install
-c
"
${
INSTALL_CHANNEL
}
"
-c
"
${
CUDA_CHANNEL
}
"
-y
"python=
${
PYTHON_VERSION
}
"
"
pytorch=
$PYTORCH_VERSION
"
"pytorch-cuda=
$(
echo
$CUDA_VERSION
|
cut
-d
'.'
-f
1-2
)
"
;;
\
esac
&&
\
/opt/conda/bin/conda clean
-ya
# CUDA kernels builder image
FROM
pytorch-install as kernel-builder
ARG
MAX_JOBS=8
RUN
apt-get update
&&
DEBIAN_FRONTEND
=
noninteractive apt-get
install
-y
--no-install-recommends
\
ninja-build
\
&&
rm
-rf
/var/lib/apt/lists/
*
RUN
/opt/conda/bin/conda
install
-c
"nvidia/label/cuda-11.8.0"
cuda
==
11.8
&&
\
/opt/conda/bin/conda clean
-ya
# Build Flash Attention CUDA kernels
FROM
kernel-builder as flash-att-builder
...
...
@@ -148,7 +147,7 @@ COPY server/Makefile-vllm Makefile
RUN
make build-vllm
# Text Generation Inference base image
FROM
nvidia/cuda:1
1.8
.0-base-ubuntu20.04 as base
FROM
nvidia/cuda:1
2.1
.0-base-ubuntu20.04 as base
# Conda env
ENV
PATH=/opt/conda/bin:$PATH \
...
...
@@ -172,24 +171,24 @@ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-ins
COPY
--from=pytorch-install /opt/conda /opt/conda
# Copy build artifacts from flash attention builder
COPY
--from=flash-att-builder /usr/src/flash-attention/build/lib.linux-x86_64-cpython-3
9
/opt/conda/lib/python3.
9
/site-packages
COPY
--from=flash-att-builder /usr/src/flash-attention/csrc/layer_norm/build/lib.linux-x86_64-cpython-3
9
/opt/conda/lib/python3.
9
/site-packages
COPY
--from=flash-att-builder /usr/src/flash-attention/csrc/rotary/build/lib.linux-x86_64-cpython-3
9
/opt/conda/lib/python3.
9
/site-packages
COPY
--from=flash-att-builder /usr/src/flash-attention/build/lib.linux-x86_64-cpython-3
10
/opt/conda/lib/python3.
10
/site-packages
COPY
--from=flash-att-builder /usr/src/flash-attention/csrc/layer_norm/build/lib.linux-x86_64-cpython-3
10
/opt/conda/lib/python3.
10
/site-packages
COPY
--from=flash-att-builder /usr/src/flash-attention/csrc/rotary/build/lib.linux-x86_64-cpython-3
10
/opt/conda/lib/python3.
10
/site-packages
# Copy build artifacts from flash attention v2 builder
COPY
--from=flash-att-v2-builder /usr/src/flash-attention-v2/build/lib.linux-x86_64-cpython-3
9
/opt/conda/lib/python3.
9
/site-packages
COPY
--from=flash-att-v2-builder /usr/src/flash-attention-v2/build/lib.linux-x86_64-cpython-3
10
/opt/conda/lib/python3.
10
/site-packages
# Copy build artifacts from custom kernels builder
COPY
--from=custom-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-3
9
/opt/conda/lib/python3.
9
/site-packages
COPY
--from=custom-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-3
10
/opt/conda/lib/python3.
10
/site-packages
# Copy build artifacts from exllama kernels builder
COPY
--from=exllama-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-3
9
/opt/conda/lib/python3.
9
/site-packages
COPY
--from=exllama-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-3
10
/opt/conda/lib/python3.
10
/site-packages
# Copy build artifacts from awq kernels builder
COPY
--from=awq-kernels-builder /usr/src/llm-awq/awq/kernels/build/lib.linux-x86_64-cpython-3
9
/opt/conda/lib/python3.
9
/site-packages
COPY
--from=awq-kernels-builder /usr/src/llm-awq/awq/kernels/build/lib.linux-x86_64-cpython-3
10
/opt/conda/lib/python3.
10
/site-packages
# Copy build artifacts from eetq kernels builder
COPY
--from=eetq-kernels-builder /usr/src/eetq/build/lib.linux-x86_64-cpython-3
9
/opt/conda/lib/python3.
9
/site-packages
COPY
--from=eetq-kernels-builder /usr/src/eetq/build/lib.linux-x86_64-cpython-3
10
/opt/conda/lib/python3.
10
/site-packages
# Copy builds artifacts from vllm builder
COPY
--from=vllm-builder /usr/src/vllm/build/lib.linux-x86_64-cpython-3
9
/opt/conda/lib/python3.
9
/site-packages
COPY
--from=vllm-builder /usr/src/vllm/build/lib.linux-x86_64-cpython-3
10
/opt/conda/lib/python3.
10
/site-packages
# Install flash-attention dependencies
RUN
pip
install
einops
--no-cache-dir
...
...
@@ -201,7 +200,7 @@ COPY server/Makefile server/Makefile
RUN
cd
server
&&
\
make gen-server
&&
\
pip
install
-r
requirements.txt
&&
\
pip
install
".[bnb, accelerate, quantize]"
--no-cache-dir
pip
install
".[bnb, accelerate, quantize
, peft
]"
--no-cache-dir
# Install benchmarker
COPY
--from=builder /usr/src/target/release/text-generation-benchmark /usr/local/bin/text-generation-benchmark
...
...
integration-tests/conftest.py
View file @
35509ff5
...
...
@@ -318,6 +318,7 @@ def launcher(event_loop):
],
volumes
=
volumes
,
ports
=
{
"80/tcp"
:
port
},
shm_size
=
"1G"
)
yield
ContainerLauncherHandle
(
client
,
container
.
name
,
port
)
...
...
server/Makefile
View file @
35509ff5
...
...
@@ -16,17 +16,13 @@ gen-server:
find text_generation_server/pb/
-type
f
-name
"*.py"
-print0
-exec
sed
-i
-e
's/^\(import.*pb2\)/from . \1/g'
{}
\;
touch
text_generation_server/pb/__init__.py
install-torch
:
# Install specific version of torch
pip
install
torch
--extra-index-url
https://download.pytorch.org/whl/cu118
--no-cache-dir
install
:
gen-server install-torch
install
:
gen-server
pip
install
pip
--upgrade
pip
install
-r
requirements.txt
pip
install
-e
".[bnb, accelerate]"
pip
install
-e
".[bnb, accelerate
, quantize, peft
]"
run-dev
:
SAFETENSORS_FAST_GPU
=
1 python
-m
torch.distributed.run
--nproc_per_node
=
2 text_generation_server/cli.py serve bigscience/bloom-560m
--sharded
export-requirements
:
poetry
export
-o
requirements.txt
-E
bnb
-E
quantize
--without-hashes
poetry
export
-o
requirements.txt
-E
bnb
--without-hashes
server/poetry.lock
View file @
35509ff5
This source diff could not be displayed because it is too large. You can
view the blob
instead.
server/pyproject.toml
View file @
35509ff5
...
...
@@ -30,14 +30,16 @@ transformers = "^4.32.1"
einops
=
"^0.6.1"
texttable
=
{
version
=
"^1.6.7"
,
optional
=
true
}
datasets
=
{
version
=
"^2.14.0"
,
optional
=
true
}
peft
=
"^0.4.0"
torch
=
{
version
=
"^2.
0
.1"
}
peft
=
{
version
=
"^0.4.0"
,
optional
=
true
}
torch
=
{
version
=
"^2.
1
.1"
,
optional
=
true
}
scipy
=
"^1.11.1"
pillow
=
"^10.0.0"
[tool.poetry.extras]
torch
=
["torch"]
accelerate
=
["accelerate"]
bnb
=
["bitsandbytes"]
peft
=
["peft"]
quantize
=
[
"texttable"
,
"datasets"
,
"accelerate"
]
[tool.poetry.group.dev.dependencies]
...
...
@@ -47,7 +49,7 @@ pytest = "^7.3.0"
[[tool.poetry.source]]
name
=
"pytorch-gpu-src"
url
=
"https://download.pytorch.org/whl/cu11
8
"
url
=
"https://download.pytorch.org/whl/cu1
2
1"
priority
=
"explicit"
[tool.pytest.ini_options]
...
...
server/requirements.txt
View file @
35509ff5
accelerate==0.20.3 ; python_version >= "3.9" and python_version < "3.13"
aiohttp==3.8.5 ; python_version >= "3.9" and python_version < "3.13"
aiosignal==1.3.1 ; python_version >= "3.9" and python_version < "3.13"
async-timeout==4.0.3 ; python_version >= "3.9" and python_version < "3.13"
attrs==23.1.0 ; python_version >= "3.9" and python_version < "3.13"
backoff==2.2.1 ; python_version >= "3.9" and python_version < "3.13"
bitsandbytes==0.41.
1
; python_version >= "3.9" and python_version < "3.13"
certifi==2023.
7.22
; python_version >= "3.9" and python_version < "3.13"
charset-normalizer==3.
2.0
; python_version >= "3.9" and python_version < "3.13"
bitsandbytes==0.41.
2.post2
; python_version >= "3.9" and python_version < "3.13"
certifi==2023.
11.17
; python_version >= "3.9" and python_version < "3.13"
charset-normalizer==3.
3.2
; python_version >= "3.9" and python_version < "3.13"
click==8.1.7 ; python_version >= "3.9" and python_version < "3.13"
colorama==0.4.6 ; python_version >= "3.9" and python_version < "3.13" and (sys_platform == "win32" or platform_system == "Windows")
datasets==2.14.5 ; python_version >= "3.9" and python_version < "3.13"
deprecated==1.2.14 ; python_version >= "3.9" and python_version < "3.13"
dill==0.3.7 ; python_version >= "3.9" and python_version < "3.13"
einops==0.6.1 ; python_version >= "3.9" and python_version < "3.13"
filelock==3.12.4 ; python_version >= "3.9" and python_version < "3.13"
frozenlist==1.4.0 ; python_version >= "3.9" and python_version < "3.13"
fsspec==2023.6.0 ; python_version >= "3.9" and python_version < "3.13"
fsspec[http]==2023.6.0 ; python_version >= "3.9" and python_version < "3.13"
googleapis-common-protos==1.60.0 ; python_version >= "3.9" and python_version < "3.13"
grpc-interceptor==0.15.3 ; python_version >= "3.9" and python_version < "3.13"
grpcio-reflection==1.58.0 ; python_version >= "3.9" and python_version < "3.13"
grpcio-status==1.58.0 ; python_version >= "3.9" and python_version < "3.13"
grpcio==1.58.0 ; python_version >= "3.9" and python_version < "3.13"
hf-transfer==0.1.3 ; python_version >= "3.9" and python_version < "3.13"
filelock==3.13.1 ; python_version >= "3.9" and python_version < "3.13"
fsspec==2023.10.0 ; python_version >= "3.9" and python_version < "3.13"
googleapis-common-protos==1.61.0 ; python_version >= "3.9" and python_version < "3.13"
grpc-interceptor==0.15.4 ; python_version >= "3.9" and python_version < "3.13"
grpcio-reflection==1.59.3 ; python_version >= "3.9" and python_version < "3.13"
grpcio-status==1.59.3 ; python_version >= "3.9" and python_version < "3.13"
grpcio==1.59.3 ; python_version >= "3.9" and python_version < "3.13"
hf-transfer==0.1.4 ; python_version >= "3.9" and python_version < "3.13"
huggingface-hub==0.16.4 ; python_version >= "3.9" and python_version < "3.13"
idna==3.4 ; python_version >= "3.9" and python_version < "3.13"
jinja2==3.1.2 ; python_version >= "3.9" and python_version < "3.13"
loguru==0.6.0 ; python_version >= "3.9" and python_version < "3.13"
markupsafe==2.1.3 ; python_version >= "3.9" and python_version < "3.13"
mpmath==1.3.0 ; python_version >= "3.9" and python_version < "3.13"
multidict==6.0.4 ; python_version >= "3.9" and python_version < "3.13"
multiprocess==0.70.15 ; python_version >= "3.9" and python_version < "3.13"
networkx==3.1 ; python_version >= "3.9" and python_version < "3.13"
numpy==1.26.0 ; python_version >= "3.9" and python_version < "3.13"
numpy==1.26.2 ; python_version >= "3.9" and python_version < "3.13"
opentelemetry-api==1.15.0 ; python_version >= "3.9" and python_version < "3.13"
opentelemetry-exporter-otlp-proto-grpc==1.15.0 ; python_version >= "3.9" and python_version < "3.13"
opentelemetry-exporter-otlp-proto-http==1.15.0 ; python_version >= "3.9" and python_version < "3.13"
...
...
@@ -42,34 +27,21 @@ opentelemetry-instrumentation==0.36b0 ; python_version >= "3.9" and python_versi
opentelemetry-proto==1.15.0 ; python_version >= "3.9" and python_version < "3.13"
opentelemetry-sdk==1.15.0 ; python_version >= "3.9" and python_version < "3.13"
opentelemetry-semantic-conventions==0.36b0 ; python_version >= "3.9" and python_version < "3.13"
packaging==23.1 ; python_version >= "3.9" and python_version < "3.13"
pandas==2.1.1 ; python_version >= "3.9" and python_version < "3.13"
peft==0.4.0 ; python_version >= "3.9" and python_version < "3.13"
pillow==10.0.1 ; python_version >= "3.9" and python_version < "3.13"
protobuf==4.24.3 ; python_version >= "3.9" and python_version < "3.13"
psutil==5.9.5 ; python_version >= "3.9" and python_version < "3.13"
pyarrow==13.0.0 ; python_version >= "3.9" and python_version < "3.13"
python-dateutil==2.8.2 ; python_version >= "3.9" and python_version < "3.13"
pytz==2023.3.post1 ; python_version >= "3.9" and python_version < "3.13"
packaging==23.2 ; python_version >= "3.9" and python_version < "3.13"
pillow==10.1.0 ; python_version >= "3.9" and python_version < "3.13"
protobuf==4.25.1 ; python_version >= "3.9" and python_version < "3.13"
pyyaml==6.0.1 ; python_version >= "3.9" and python_version < "3.13"
regex==2023.
8.8
; python_version >= "3.9" and python_version < "3.13"
regex==2023.
10.3
; python_version >= "3.9" and python_version < "3.13"
requests==2.31.0 ; python_version >= "3.9" and python_version < "3.13"
safetensors==0.3.3 ; python_version >= "3.9" and python_version < "3.13"
scipy==1.11.
2
; python_version >= "3.9" and python_version < "3.13"
scipy==1.11.
4
; python_version >= "3.9" and python_version < "3.13"
sentencepiece==0.1.99 ; python_version >= "3.9" and python_version < "3.13"
setuptools==68.2.2 ; python_version >= "3.9" and python_version < "3.13"
six==1.16.0 ; python_version >= "3.9" and python_version < "3.13"
sympy==1.12 ; python_version >= "3.9" and python_version < "3.13"
texttable==1.6.7 ; python_version >= "3.9" and python_version < "3.13"
setuptools==69.0.2 ; python_version >= "3.9" and python_version < "3.13"
tokenizers==0.13.3 ; python_version >= "3.9" and python_version < "3.13"
torch==2.0.1 ; python_version >= "3.9" and python_version < "3.13"
tqdm==4.66.1 ; python_version >= "3.9" and python_version < "3.13"
transformers==4.33.
2
; python_version >= "3.9" and python_version < "3.13"
transformers==4.33.
3
; python_version >= "3.9" and python_version < "3.13"
typer==0.6.1 ; python_version >= "3.9" and python_version < "3.13"
typing-extensions==4.8.0 ; python_version >= "3.9" and python_version < "3.13"
tzdata==2023.3 ; python_version >= "3.9" and python_version < "3.13"
urllib3==2.0.5 ; python_version >= "3.9" and python_version < "3.13"
urllib3==2.1.0 ; python_version >= "3.9" and python_version < "3.13"
win32-setctime==1.1.0 ; python_version >= "3.9" and python_version < "3.13" and sys_platform == "win32"
wrapt==1.15.0 ; python_version >= "3.9" and python_version < "3.13"
xxhash==3.3.0 ; python_version >= "3.9" and python_version < "3.13"
yarl==1.9.2 ; python_version >= "3.9" and python_version < "3.13"
wrapt==1.16.0 ; python_version >= "3.9" and python_version < "3.13"
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment