Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
5cdabd7b
Commit
5cdabd7b
authored
Jul 08, 2024
by
zhuwenwen
Browse files
add 7b pad dim
parent
371b1251
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
4 additions
and
6 deletions
+4
-6
vllm/model_executor/layers/linear.py
vllm/model_executor/layers/linear.py
+0
-2
vllm/model_executor/models/baichuan.py
vllm/model_executor/models/baichuan.py
+1
-1
vllm/model_executor/models/chatglm.py
vllm/model_executor/models/chatglm.py
+1
-1
vllm/model_executor/models/llama.py
vllm/model_executor/models/llama.py
+1
-1
vllm/model_executor/models/qwen2.py
vllm/model_executor/models/qwen2.py
+1
-1
No files found.
vllm/model_executor/layers/linear.py
View file @
5cdabd7b
...
...
@@ -14,8 +14,6 @@ from vllm.logger import init_logger
from
vllm.model_executor.layers.quantization.base_config
import
(
QuantizationConfig
,
QuantizeMethodBase
)
from
vllm.model_executor.utils
import
set_weight_attrs
from
vllm.logger
import
init_logger
import
os
logger
=
init_logger
(
__name__
)
...
...
vllm/model_executor/models/baichuan.py
View file @
5cdabd7b
...
...
@@ -179,7 +179,7 @@ class BaiChuanAttention(nn.Module):
attn_metadata
:
AttentionMetadata
,
)
->
torch
.
Tensor
:
qkv
,
_
=
self
.
W_pack
(
hidden_states
)
if
os
.
environ
.
get
(
'FA_PAD'
)
==
'1'
:
if
os
.
environ
.
get
(
'FA_PAD'
)
==
'1'
and
qkv
.
shape
[
-
1
]
==
12320
:
qkv
=
qkv
[...,:
-
32
]
q
,
k
,
v
=
qkv
.
chunk
(
chunks
=
3
,
dim
=-
1
)
if
self
.
postion_embedding
!=
"ALIBI"
:
...
...
vllm/model_executor/models/chatglm.py
View file @
5cdabd7b
...
...
@@ -102,7 +102,7 @@ class GLMAttention(nn.Module):
attn_metadata
:
AttentionMetadata
,
)
->
torch
.
Tensor
:
qkv
,
_
=
self
.
query_key_value
(
hidden_states
)
if
os
.
environ
.
get
(
'FA_PAD'
)
==
'1'
:
if
os
.
environ
.
get
(
'FA_PAD'
)
==
'1'
and
qkv
.
shape
[
-
1
]
==
12320
:
qkv
=
qkv
[...,:
-
32
]
q
,
k
,
v
=
qkv
.
split
([
self
.
q_size
,
self
.
kv_size
,
self
.
kv_size
],
dim
=-
1
)
q
,
k
=
self
.
rotary_emb
(
position_ids
,
q
,
k
)
...
...
vllm/model_executor/models/llama.py
View file @
5cdabd7b
...
...
@@ -157,7 +157,7 @@ class LlamaAttention(nn.Module):
attn_metadata
:
AttentionMetadata
,
)
->
torch
.
Tensor
:
qkv
,
_
=
self
.
qkv_proj
(
hidden_states
)
if
os
.
environ
.
get
(
'FA_PAD'
)
==
'1'
:
if
os
.
environ
.
get
(
'FA_PAD'
)
==
'1'
and
qkv
.
shape
[
-
1
]
==
12320
:
qkv
=
qkv
[...,:
-
32
]
q
,
k
,
v
=
qkv
.
split
([
self
.
q_size
,
self
.
kv_size
,
self
.
kv_size
],
dim
=-
1
)
q
,
k
=
self
.
rotary_emb
(
positions
,
q
,
k
)
...
...
vllm/model_executor/models/qwen2.py
View file @
5cdabd7b
...
...
@@ -149,7 +149,7 @@ class Qwen2Attention(nn.Module):
attn_metadata
:
AttentionMetadata
,
)
->
torch
.
Tensor
:
qkv
,
_
=
self
.
qkv_proj
(
hidden_states
)
if
os
.
environ
.
get
(
'FA_PAD'
)
==
'1'
:
if
os
.
environ
.
get
(
'FA_PAD'
)
==
'1'
and
qkv
.
shape
[
-
1
]
==
12320
:
qkv
=
qkv
[...,:
-
32
]
q
,
k
,
v
=
qkv
.
split
([
self
.
q_size
,
self
.
kv_size
,
self
.
kv_size
],
dim
=-
1
)
q
,
k
=
self
.
rotary_emb
(
positions
,
q
,
k
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment