Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
4ea1f967
Unverified
Commit
4ea1f967
authored
Apr 27, 2024
by
Robert Shaw
Committed by
GitHub
Apr 27, 2024
Browse files
[BugFix] Resolved Issues For LinearMethod --> QuantConfig (#4418)
parent
ba4be44c
Changes
10
Show whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
0 additions
and
10 deletions
+0
-10
vllm/model_executor/models/bloom.py
vllm/model_executor/models/bloom.py
+0
-1
vllm/model_executor/models/falcon.py
vllm/model_executor/models/falcon.py
+0
-1
vllm/model_executor/models/gpt2.py
vllm/model_executor/models/gpt2.py
+0
-1
vllm/model_executor/models/gpt_bigcode.py
vllm/model_executor/models/gpt_bigcode.py
+0
-1
vllm/model_executor/models/gpt_j.py
vllm/model_executor/models/gpt_j.py
+0
-1
vllm/model_executor/models/gpt_neox.py
vllm/model_executor/models/gpt_neox.py
+0
-1
vllm/model_executor/models/mpt.py
vllm/model_executor/models/mpt.py
+0
-1
vllm/model_executor/models/opt.py
vllm/model_executor/models/opt.py
+0
-1
vllm/model_executor/models/phi.py
vllm/model_executor/models/phi.py
+0
-1
vllm/model_executor/models/starcoder2.py
vllm/model_executor/models/starcoder2.py
+0
-1
No files found.
vllm/model_executor/models/bloom.py
View file @
4ea1f967
...
@@ -139,7 +139,6 @@ class BloomMLP(nn.Module):
...
@@ -139,7 +139,6 @@ class BloomMLP(nn.Module):
4
*
hidden_size
,
4
*
hidden_size
,
quant_config
=
quant_config
,
quant_config
=
quant_config
,
)
)
quant_config
=
getattr
(
quant_config
,
"quant_config"
,
None
)
self
.
gelu_impl
=
get_act_fn
(
"gelu"
,
quant_config
,
4
*
hidden_size
)
self
.
gelu_impl
=
get_act_fn
(
"gelu"
,
quant_config
,
4
*
hidden_size
)
self
.
dense_4h_to_h
=
RowParallelLinear
(
self
.
dense_4h_to_h
=
RowParallelLinear
(
4
*
hidden_size
,
4
*
hidden_size
,
...
...
vllm/model_executor/models/falcon.py
View file @
4ea1f967
...
@@ -203,7 +203,6 @@ class FalconMLP(nn.Module):
...
@@ -203,7 +203,6 @@ class FalconMLP(nn.Module):
bias
=
config
.
bias
,
bias
=
config
.
bias
,
skip_bias_add
=
True
,
skip_bias_add
=
True
,
quant_config
=
quant_config
)
quant_config
=
quant_config
)
quant_config
=
getattr
(
quant_config
,
"quant_config"
,
None
)
self
.
act
=
get_act_fn
(
"gelu"
,
quant_config
,
4
*
hidden_size
)
self
.
act
=
get_act_fn
(
"gelu"
,
quant_config
,
4
*
hidden_size
)
self
.
reduce_row_parallel_results
=
not
(
config
.
new_decoder_architecture
self
.
reduce_row_parallel_results
=
not
(
config
.
new_decoder_architecture
or
config
.
parallel_attn
)
or
config
.
parallel_attn
)
...
...
vllm/model_executor/models/gpt2.py
View file @
4ea1f967
...
@@ -107,7 +107,6 @@ class GPT2MLP(nn.Module):
...
@@ -107,7 +107,6 @@ class GPT2MLP(nn.Module):
bias
=
True
,
bias
=
True
,
quant_config
=
quant_config
,
quant_config
=
quant_config
,
)
)
quant_config
=
getattr
(
quant_config
,
"quant_config"
,
None
)
self
.
act
=
get_act_fn
(
config
.
activation_function
,
quant_config
,
self
.
act
=
get_act_fn
(
config
.
activation_function
,
quant_config
,
intermediate_size
)
intermediate_size
)
...
...
vllm/model_executor/models/gpt_bigcode.py
View file @
4ea1f967
...
@@ -128,7 +128,6 @@ class GPTBigMLP(nn.Module):
...
@@ -128,7 +128,6 @@ class GPTBigMLP(nn.Module):
bias
=
True
,
bias
=
True
,
quant_config
=
quant_config
,
quant_config
=
quant_config
,
)
)
quant_config
=
getattr
(
quant_config
,
"quant_config"
,
None
)
self
.
act
=
get_act_fn
(
config
.
activation_function
,
quant_config
,
self
.
act
=
get_act_fn
(
config
.
activation_function
,
quant_config
,
intermediate_size
)
intermediate_size
)
...
...
vllm/model_executor/models/gpt_j.py
View file @
4ea1f967
...
@@ -120,7 +120,6 @@ class GPTJMLP(nn.Module):
...
@@ -120,7 +120,6 @@ class GPTJMLP(nn.Module):
hidden_size
,
hidden_size
,
quant_config
=
quant_config
,
quant_config
=
quant_config
,
)
)
quant_config
=
getattr
(
quant_config
,
"quant_config"
,
None
)
self
.
act
=
get_act_fn
(
config
.
activation_function
,
quant_config
,
self
.
act
=
get_act_fn
(
config
.
activation_function
,
quant_config
,
intermediate_size
)
intermediate_size
)
...
...
vllm/model_executor/models/gpt_neox.py
View file @
4ea1f967
...
@@ -119,7 +119,6 @@ class GPTNeoXMLP(nn.Module):
...
@@ -119,7 +119,6 @@ class GPTNeoXMLP(nn.Module):
config
.
hidden_size
,
config
.
hidden_size
,
quant_config
=
quant_config
,
quant_config
=
quant_config
,
)
)
quant_config
=
getattr
(
quant_config
,
"quant_config"
,
None
)
self
.
act
=
get_act_fn
(
config
.
hidden_act
,
quant_config
,
self
.
act
=
get_act_fn
(
config
.
hidden_act
,
quant_config
,
config
.
intermediate_size
)
config
.
intermediate_size
)
...
...
vllm/model_executor/models/mpt.py
View file @
4ea1f967
...
@@ -146,7 +146,6 @@ class MPTMLP(nn.Module):
...
@@ -146,7 +146,6 @@ class MPTMLP(nn.Module):
bias
=
not
config
.
no_bias
,
bias
=
not
config
.
no_bias
,
quant_config
=
quant_config
,
quant_config
=
quant_config
,
)
)
quant_config
=
getattr
(
quant_config
,
"quant_config"
,
None
)
self
.
act
=
get_act_fn
(
"gelu"
,
quant_config
,
intermediate_size
)
self
.
act
=
get_act_fn
(
"gelu"
,
quant_config
,
intermediate_size
)
self
.
down_proj
=
RowParallelLinear
(
self
.
down_proj
=
RowParallelLinear
(
intermediate_size
,
intermediate_size
,
...
...
vllm/model_executor/models/opt.py
View file @
4ea1f967
...
@@ -130,7 +130,6 @@ class OPTDecoderLayer(nn.Module):
...
@@ -130,7 +130,6 @@ class OPTDecoderLayer(nn.Module):
bias
=
config
.
enable_bias
,
bias
=
config
.
enable_bias
,
quant_config
=
quant_config
,
quant_config
=
quant_config
,
)
)
quant_config
=
getattr
(
quant_config
,
"quant_config"
,
None
)
self
.
activation_fn
=
get_act_fn
(
config
.
activation_function
,
self
.
activation_fn
=
get_act_fn
(
config
.
activation_function
,
quant_config
,
config
.
ffn_dim
)
quant_config
,
config
.
ffn_dim
)
self
.
fc2
=
RowParallelLinear
(
self
.
fc2
=
RowParallelLinear
(
...
...
vllm/model_executor/models/phi.py
View file @
4ea1f967
...
@@ -142,7 +142,6 @@ class PhiMLP(nn.Module):
...
@@ -142,7 +142,6 @@ class PhiMLP(nn.Module):
config
.
hidden_size
,
config
.
hidden_size
,
quant_config
=
quant_config
,
quant_config
=
quant_config
,
)
)
quant_config
=
getattr
(
quant_config
,
"quant_config"
,
None
)
self
.
act
=
get_act_fn
(
config
.
hidden_act
,
quant_config
,
n_inner
)
self
.
act
=
get_act_fn
(
config
.
hidden_act
,
quant_config
,
n_inner
)
def
forward
(
self
,
hidden_states
):
def
forward
(
self
,
hidden_states
):
...
...
vllm/model_executor/models/starcoder2.py
View file @
4ea1f967
...
@@ -136,7 +136,6 @@ class Starcoder2MLP(nn.Module):
...
@@ -136,7 +136,6 @@ class Starcoder2MLP(nn.Module):
bias
=
config
.
use_bias
,
bias
=
config
.
use_bias
,
quant_config
=
quant_config
,
quant_config
=
quant_config
,
)
)
quant_config
=
getattr
(
quant_config
,
"quant_config"
,
None
)
self
.
act
=
get_act_fn
(
config
.
hidden_act
,
quant_config
,
self
.
act
=
get_act_fn
(
config
.
hidden_act
,
quant_config
,
config
.
intermediate_size
)
config
.
intermediate_size
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment