Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
55c5f16f
Commit
55c5f16f
authored
Nov 07, 2024
by
zhuwenwen
Browse files
remove xformers deps
parent
32a996c5
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
40 additions
and
15 deletions
+40
-15
README.md
README.md
+1
-2
requirements-rocm.txt
requirements-rocm.txt
+1
-2
vllm/model_executor/models/blip.py
vllm/model_executor/models/blip.py
+6
-2
vllm/model_executor/models/clip.py
vllm/model_executor/models/clip.py
+6
-2
vllm/model_executor/models/intern_vit.py
vllm/model_executor/models/intern_vit.py
+8
-3
vllm/model_executor/models/pixtral.py
vllm/model_executor/models/pixtral.py
+12
-2
vllm/model_executor/models/siglip.py
vllm/model_executor/models/siglip.py
+6
-2
No files found.
README.md
View file @
55c5f16f
...
...
@@ -19,10 +19,10 @@ vLLM是一个快速且易于使用的LLM推理和服务库,使用PageAttention
| BloomForCausalLM | BLOOM | Yes | Yes |
| InternLMForCausalLM | InternLM | Yes | Yes |
| InternLM2ForCausalLM | InternLM2 | Yes | Yes |
| TeleChat12BForCausalLM (#TelechatForCausalLM) | TeleChat-12B | Yes | Yes |
| MiniCPMForCausalLM | MiniCPM | Yes | Yes |
| MiniCPM3ForCausalLM | MiniCPM3 | Yes | Yes |
| MixtralForCausalLM | Mixtral-8x7B,Mixtral-8x7B-Instruct | Yes | Yes |
| TeleChat12BForCausalLM (#TelechatForCausalLM) | TeleChat-12B | Yes | Yes |
| Qwen2MoeForCausalLM | Qwen2-57B-A14B,Qwen2-57B-A14B-Instruct | Yes | Yes |
| LlavaForConditionalGeneration | LLaMA,LLaMA-2,LLaMA-3 | Yes | Yes |
| Qwen2VLForConditionalGeneration | Qwen2-VL | Yes | Yes |
...
...
@@ -74,7 +74,6 @@ VLLM_INSTALL_PUNICA_KERNELS=1 python3 setup.py install
2、根据pytorch2.3.0、python、dtk及系统下载对应的依赖包:
-
triton:
[
https://cancon.hpccube.com:65024/4/main/triton
](
https://cancon.hpccube.com:65024/4/main/triton/
)
-
xformers:
[
https://cancon.hpccube.com:65024/4/main/xformers
](
https://cancon.hpccube.com:65024/4/main/xformers
)
-
flash_attn:
[
https://cancon.hpccube.com:65024/4/main/flash_attn
](
https://cancon.hpccube.com:65024/4/main/flash_attn
)
-
lmslim:
[
https://cancon.hpccube.com:65024/4/main/lmslim
](
https://cancon.hpccube.com:65024/4/main/lmslim
)
...
...
requirements-rocm.txt
View file @
55c5f16f
...
...
@@ -14,5 +14,4 @@ setuptools_scm>=8
torch == 2.3.0
triton == 2.1.0
flash_attn == 2.6.1
xformers == 0.0.25
lmslim == 0.1.2
\ No newline at end of file
lmslim == 0.1.2 # future version 0.2.0
\ No newline at end of file
vllm/model_executor/models/blip.py
View file @
55c5f16f
...
...
@@ -20,10 +20,14 @@ from vllm.model_executor.model_loader.weight_utils import default_weight_loader
from
vllm.multimodal.utils
import
(
cached_get_tokenizer
,
repeat_and_pad_placeholder_tokens
)
from
vllm.sequence
import
SequenceData
import
vllm.envs
as
envs
try
:
from
xformers
import
ops
as
xops
USE_XFORMERS_OPS
=
True
if
envs
.
VLLM_ATTENTION_BACKEND
==
"XFormers"
:
from
xformers
import
ops
as
xops
USE_XFORMERS_OPS
=
True
else
:
USE_XFORMERS_OPS
=
False
except
ImportError
:
USE_XFORMERS_OPS
=
False
...
...
vllm/model_executor/models/clip.py
View file @
55c5f16f
...
...
@@ -21,10 +21,14 @@ from vllm.model_executor.model_loader.weight_utils import default_weight_loader
from
vllm.multimodal.utils
import
(
cached_get_tokenizer
,
repeat_and_pad_placeholder_tokens
)
from
vllm.sequence
import
SequenceData
import
vllm.envs
as
envs
try
:
from
xformers
import
ops
as
xops
USE_XFORMERS_OPS
=
True
if
envs
.
VLLM_ATTENTION_BACKEND
==
"XFormers"
:
from
xformers
import
ops
as
xops
USE_XFORMERS_OPS
=
True
else
:
USE_XFORMERS_OPS
=
False
except
ImportError
:
USE_XFORMERS_OPS
=
False
...
...
vllm/model_executor/models/intern_vit.py
View file @
55c5f16f
...
...
@@ -19,10 +19,14 @@ from vllm.model_executor.layers.linear import (ColumnParallelLinear,
RowParallelLinear
)
from
vllm.model_executor.layers.quantization
import
QuantizationConfig
from
vllm.model_executor.model_loader.weight_utils
import
default_weight_loader
import
vllm.envs
as
envs
try
:
from
xformers
import
ops
as
xops
USE_XFORMERS_OPS
=
True
if
envs
.
VLLM_ATTENTION_BACKEND
==
"XFormers"
:
from
xformers
import
ops
as
xops
USE_XFORMERS_OPS
=
True
else
:
USE_XFORMERS_OPS
=
False
except
ImportError
:
USE_XFORMERS_OPS
=
False
...
...
@@ -200,7 +204,8 @@ class InternSdpaAttention(nn.Module):
v
=
v
.
transpose
(
1
,
2
)
x
=
F
.
scaled_dot_product_attention
(
q
,
k
,
v
,
scale
=
self
.
scale
)
x
=
x
.
transpose
(
1
,
2
).
view
(
B
,
N
,
-
1
)
# x = x.transpose(1, 2).view(B, N, -1)
x
=
x
.
transpose
(
1
,
2
).
reshape
(
B
,
N
,
-
1
)
x
=
self
.
proj
(
x
)
return
x
...
...
vllm/model_executor/models/pixtral.py
View file @
55c5f16f
...
...
@@ -8,8 +8,18 @@ import torch.nn.functional as F
from
mistral_common.protocol.instruct.messages
import
ImageChunk
from
PIL
import
Image
from
transformers
import
PretrainedConfig
from
xformers.ops.fmha
import
memory_efficient_attention
from
xformers.ops.fmha.attn_bias
import
BlockDiagonalMask
# from xformers.ops.fmha import memory_efficient_attention
# from xformers.ops.fmha.attn_bias import BlockDiagonalMask
import
vllm.envs
as
envs
try
:
if
envs
.
VLLM_ATTENTION_BACKEND
==
"XFormers"
:
from
xformers.ops.fmha
import
memory_efficient_attention
from
xformers.ops.fmha.attn_bias
import
BlockDiagonalMask
else
:
print
(
"INFO: VLLM_ATTENTION_BACKEND is not XFormers.
\n
"
)
except
ImportError
:
print
(
"INFO: Please install xformers if you want to infer pixtral.
\n
"
)
from
vllm.attention
import
AttentionMetadata
from
vllm.config
import
CacheConfig
,
MultiModalConfig
...
...
vllm/model_executor/models/siglip.py
View file @
55c5f16f
...
...
@@ -25,10 +25,14 @@ from vllm.model_executor.model_loader.weight_utils import default_weight_loader
from
vllm.multimodal.utils
import
(
cached_get_tokenizer
,
repeat_and_pad_placeholder_tokens
)
from
vllm.sequence
import
SequenceData
import
vllm.envs
as
envs
try
:
from
xformers
import
ops
as
xops
USE_XFORMERS_OPS
=
True
if
envs
.
VLLM_ATTENTION_BACKEND
==
"XFormers"
:
from
xformers
import
ops
as
xops
USE_XFORMERS_OPS
=
True
else
:
USE_XFORMERS_OPS
=
False
except
ImportError
:
USE_XFORMERS_OPS
=
False
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment