Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
7e0861bd
Unverified
Commit
7e0861bd
authored
Aug 01, 2024
by
Sage Moore
Committed by
GitHub
Aug 01, 2024
Browse files
[CI/Build] Update PyTorch to 2.4.0 (#6951)
Co-authored-by:
Michael Goin
<
michael@neuralmagic.com
>
parent
a72a424b
Changes
8
Show whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
13 additions
and
13 deletions
+13
-13
.buildkite/test-pipeline.yaml
.buildkite/test-pipeline.yaml
+3
-3
.github/workflows/publish.yml
.github/workflows/publish.yml
+1
-1
CMakeLists.txt
CMakeLists.txt
+1
-1
Dockerfile
Dockerfile
+1
-1
pyproject.toml
pyproject.toml
+1
-1
requirements-build.txt
requirements-build.txt
+1
-1
requirements-cuda.txt
requirements-cuda.txt
+4
-4
vllm/model_executor/layers/ops/sample.py
vllm/model_executor/layers/ops/sample.py
+1
-1
No files found.
.buildkite/test-pipeline.yaml
View file @
7e0861bd
...
@@ -44,7 +44,7 @@ steps:
...
@@ -44,7 +44,7 @@ steps:
fast_check
:
true
fast_check
:
true
commands
:
commands
:
# This flashinfer installation will fail on AMD ROCm, so it is set as optional.
# This flashinfer installation will fail on AMD ROCm, so it is set as optional.
-
pip install https://github.com/flashinfer-ai/flashinfer/releases/download/v0.
0.8
/flashinfer-0.
0.8
+cu121torch2.
3
-cp310-cp310-linux_x86_64.whl ||
true
-
pip install https://github.com/flashinfer-ai/flashinfer/releases/download/v0.
1.2
/flashinfer-0.
1.2
+cu121torch2.
4
-cp310-cp310-linux_x86_64.whl ||
true
-
pytest -v -s basic_correctness/test_basic_correctness.py
-
pytest -v -s basic_correctness/test_basic_correctness.py
-
pytest -v -s basic_correctness/test_cpu_offload.py
-
pytest -v -s basic_correctness/test_cpu_offload.py
-
VLLM_ATTENTION_BACKEND=XFORMERS pytest -v -s basic_correctness/test_chunked_prefill.py
-
VLLM_ATTENTION_BACKEND=XFORMERS pytest -v -s basic_correctness/test_chunked_prefill.py
...
@@ -164,7 +164,7 @@ steps:
...
@@ -164,7 +164,7 @@ steps:
-
label
:
Models Test
-
label
:
Models Test
#mirror_hardwares: [amd]
#mirror_hardwares: [amd]
commands
:
commands
:
-
pip install https://github.com/flashinfer-ai/flashinfer/releases/download/v0.
0.8
/flashinfer-0.
0.8
+cu121torch2.
3
-cp310-cp310-linux_x86_64.whl
-
pip install https://github.com/flashinfer-ai/flashinfer/releases/download/v0.
1.2
/flashinfer-0.
1.2
+cu121torch2.
4
-cp310-cp310-linux_x86_64.whl
-
pytest -v -s models -m \"not vlm\"
-
pytest -v -s models -m \"not vlm\"
-
label
:
Vision Language Models Test
-
label
:
Vision Language Models Test
...
@@ -281,7 +281,7 @@ steps:
...
@@ -281,7 +281,7 @@ steps:
-
pytest -v -s distributed/test_custom_all_reduce.py
-
pytest -v -s distributed/test_custom_all_reduce.py
-
TEST_DIST_MODEL=facebook/opt-125m DISTRIBUTED_EXECUTOR_BACKEND=ray pytest -v -s distributed/test_basic_distributed_correctness.py
-
TEST_DIST_MODEL=facebook/opt-125m DISTRIBUTED_EXECUTOR_BACKEND=ray pytest -v -s distributed/test_basic_distributed_correctness.py
-
TEST_DIST_MODEL=facebook/opt-125m DISTRIBUTED_EXECUTOR_BACKEND=mp pytest -v -s distributed/test_basic_distributed_correctness.py
-
TEST_DIST_MODEL=facebook/opt-125m DISTRIBUTED_EXECUTOR_BACKEND=mp pytest -v -s distributed/test_basic_distributed_correctness.py
-
pip install https://github.com/flashinfer-ai/flashinfer/releases/download/v0.
0.8
/flashinfer-0.
0.8
+cu121torch2.
3
-cp310-cp310-linux_x86_64.whl
-
pip install https://github.com/flashinfer-ai/flashinfer/releases/download/v0.
1.2
/flashinfer-0.
1.2
+cu121torch2.
4
-cp310-cp310-linux_x86_64.whl
-
VLLM_ATTENTION_BACKEND=FLASHINFER TEST_DIST_MODEL=facebook/opt-125m DISTRIBUTED_EXECUTOR_BACKEND=ray pytest -v -s distributed/test_basic_distributed_correctness.py
-
VLLM_ATTENTION_BACKEND=FLASHINFER TEST_DIST_MODEL=facebook/opt-125m DISTRIBUTED_EXECUTOR_BACKEND=ray pytest -v -s distributed/test_basic_distributed_correctness.py
-
VLLM_ATTENTION_BACKEND=FLASHINFER TEST_DIST_MODEL=meta-llama/Meta-Llama-3-8B DISTRIBUTED_EXECUTOR_BACKEND=ray pytest -v -s distributed/test_basic_distributed_correctness.py
-
VLLM_ATTENTION_BACKEND=FLASHINFER TEST_DIST_MODEL=meta-llama/Meta-Llama-3-8B DISTRIBUTED_EXECUTOR_BACKEND=ray pytest -v -s distributed/test_basic_distributed_correctness.py
-
pytest -v -s -x lora/test_mixtral.py
-
pytest -v -s -x lora/test_mixtral.py
.github/workflows/publish.yml
View file @
7e0861bd
...
@@ -49,7 +49,7 @@ jobs:
...
@@ -49,7 +49,7 @@ jobs:
matrix
:
matrix
:
os
:
[
'
ubuntu-20.04'
]
os
:
[
'
ubuntu-20.04'
]
python-version
:
[
'
3.8'
,
'
3.9'
,
'
3.10'
,
'
3.11'
]
python-version
:
[
'
3.8'
,
'
3.9'
,
'
3.10'
,
'
3.11'
]
pytorch-version
:
[
'
2.
3.1
'
]
# Must be the most recent version that meets requirements-cuda.txt.
pytorch-version
:
[
'
2.
4.0
'
]
# Must be the most recent version that meets requirements-cuda.txt.
cuda-version
:
[
'
11.8'
,
'
12.1'
]
cuda-version
:
[
'
11.8'
,
'
12.1'
]
steps
:
steps
:
...
...
CMakeLists.txt
View file @
7e0861bd
...
@@ -32,7 +32,7 @@ set(HIP_SUPPORTED_ARCHS "gfx906;gfx908;gfx90a;gfx940;gfx941;gfx942;gfx1030;gfx11
...
@@ -32,7 +32,7 @@ set(HIP_SUPPORTED_ARCHS "gfx906;gfx908;gfx90a;gfx940;gfx941;gfx942;gfx1030;gfx11
# requirements.txt files and should be kept consistent. The ROCm torch
# requirements.txt files and should be kept consistent. The ROCm torch
# versions are derived from Dockerfile.rocm
# versions are derived from Dockerfile.rocm
#
#
set
(
TORCH_SUPPORTED_VERSION_CUDA
"2.
3.1
"
)
set
(
TORCH_SUPPORTED_VERSION_CUDA
"2.
4.0
"
)
set
(
TORCH_SUPPORTED_VERSION_ROCM
"2.5.0"
)
set
(
TORCH_SUPPORTED_VERSION_ROCM
"2.5.0"
)
#
#
...
...
Dockerfile
View file @
7e0861bd
...
@@ -192,7 +192,7 @@ RUN --mount=type=bind,from=mamba-builder,src=/usr/src/mamba,target=/usr/src/mamb
...
@@ -192,7 +192,7 @@ RUN --mount=type=bind,from=mamba-builder,src=/usr/src/mamba,target=/usr/src/mamb
python3
-m
pip
install
/usr/src/mamba/
*
.whl
--no-cache-dir
python3
-m
pip
install
/usr/src/mamba/
*
.whl
--no-cache-dir
RUN
--mount
=
type
=
cache,target
=
/root/.cache/pip
\
RUN
--mount
=
type
=
cache,target
=
/root/.cache/pip
\
python3
-m
pip
install
https://github.com/flashinfer-ai/flashinfer/releases/download/v0.
0.9
/flashinfer-0.
0.9
+cu121torch2.
3
-cp310-cp310-linux_x86_64.whl
python3
-m
pip
install
https://github.com/flashinfer-ai/flashinfer/releases/download/v0.
1.2
/flashinfer-0.
1.2
+cu121torch2.
4
-cp310-cp310-linux_x86_64.whl
#################### vLLM installation IMAGE ####################
#################### vLLM installation IMAGE ####################
...
...
pyproject.toml
View file @
7e0861bd
...
@@ -5,7 +5,7 @@ requires = [
...
@@ -5,7 +5,7 @@ requires = [
"ninja"
,
"ninja"
,
"packaging"
,
"packaging"
,
"setuptools >= 49.4.0"
,
"setuptools >= 49.4.0"
,
"torch == 2.
3.1
"
,
"torch == 2.
4.0
"
,
"wheel"
,
"wheel"
,
]
]
build-backend
=
"setuptools.build_meta"
build-backend
=
"setuptools.build_meta"
...
...
requirements-build.txt
View file @
7e0861bd
...
@@ -3,5 +3,5 @@ cmake>=3.21
...
@@ -3,5 +3,5 @@ cmake>=3.21
ninja
ninja
packaging
packaging
setuptools>=49.4.0
setuptools>=49.4.0
torch==2.
3.1
torch==2.
4.0
wheel
wheel
requirements-cuda.txt
View file @
7e0861bd
...
@@ -4,8 +4,8 @@
...
@@ -4,8 +4,8 @@
# Dependencies for NVIDIA GPUs
# Dependencies for NVIDIA GPUs
ray >= 2.9
ray >= 2.9
nvidia-ml-py # for pynvml package
nvidia-ml-py # for pynvml package
torch == 2.
3.1
torch == 2.
4.0
# These must be updated alongside torch
# These must be updated alongside torch
torchvision == 0.1
8.1
# Required for phi3v processor. See https://github.com/pytorch/vision?tab=readme-ov-file#installation for corresponding version
torchvision == 0.1
9
# Required for phi3v processor. See https://github.com/pytorch/vision?tab=readme-ov-file#installation for corresponding version
xformers == 0.0.27 # Requires PyTorch 2.
3.1
xformers == 0.0.27
.post2
# Requires PyTorch 2.
4.0
vllm-flash-attn == 2.
5.9.post1
# Requires PyTorch 2.
3.1
vllm-flash-attn == 2.
6.0
# Requires PyTorch 2.
4.0
vllm/model_executor/layers/ops/sample.py
View file @
7e0861bd
...
@@ -7,7 +7,7 @@ import triton.language as tl
...
@@ -7,7 +7,7 @@ import triton.language as tl
from
vllm.model_executor.layers.ops.rand
import
seeded_uniform
from
vllm.model_executor.layers.ops.rand
import
seeded_uniform
from
vllm.triton_utils.sample
import
get_num_triton_sampler_splits
from
vllm.triton_utils.sample
import
get_num_triton_sampler_splits
_EPS
=
1e-6
_EPS
:
tl
.
constexpr
=
1e-6
def
_multi_split_sample
(
def
_multi_split_sample
(
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment