Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
399c7986
Unverified
Commit
399c7986
authored
Nov 06, 2024
by
Michael Goin
Committed by
GitHub
Nov 06, 2024
Browse files
Remove ScaledActivation for AWQ (#10057)
Signed-off-by:
mgoin
<
michael@neuralmagic.com
>
parent
406d4cc4
Changes
34
Hide whitespace changes
Inline
Side-by-side
Showing
14 changed files
with
15 additions
and
26 deletions
+15
-26
vllm/model_executor/layers/quantization/tpu_int8.py
vllm/model_executor/layers/quantization/tpu_int8.py
+0
-3
vllm/model_executor/models/bart.py
vllm/model_executor/models/bart.py
+3
-5
vllm/model_executor/models/bloom.py
vllm/model_executor/models/bloom.py
+1
-1
vllm/model_executor/models/falcon.py
vllm/model_executor/models/falcon.py
+1
-1
vllm/model_executor/models/gpt2.py
vllm/model_executor/models/gpt2.py
+1
-2
vllm/model_executor/models/gpt_bigcode.py
vllm/model_executor/models/gpt_bigcode.py
+1
-2
vllm/model_executor/models/gpt_j.py
vllm/model_executor/models/gpt_j.py
+1
-2
vllm/model_executor/models/gpt_neox.py
vllm/model_executor/models/gpt_neox.py
+1
-2
vllm/model_executor/models/mpt.py
vllm/model_executor/models/mpt.py
+1
-1
vllm/model_executor/models/opt.py
vllm/model_executor/models/opt.py
+1
-2
vllm/model_executor/models/persimmon.py
vllm/model_executor/models/persimmon.py
+1
-1
vllm/model_executor/models/phi.py
vllm/model_executor/models/phi.py
+1
-1
vllm/model_executor/models/qwen.py
vllm/model_executor/models/qwen.py
+1
-1
vllm/model_executor/models/starcoder2.py
vllm/model_executor/models/starcoder2.py
+1
-2
No files found.
vllm/model_executor/layers/quantization/tpu_int8.py
View file @
399c7986
...
...
@@ -50,9 +50,6 @@ class Int8TpuConfig(QuantizationConfig):
return
TPUInt8LinearMethod
(
self
)
return
None
def
get_scaled_act_names
(
self
)
->
List
[
str
]:
return
[]
class
TPUInt8LinearMethod
(
LinearMethodBase
):
"""Int8 Linear method for TPU Quant. """
...
...
vllm/model_executor/models/bart.py
View file @
399c7986
...
...
@@ -393,8 +393,7 @@ class BartEncoderLayer(nn.Module):
cache_config
=
cache_config
,
quant_config
=
quant_config
)
self
.
self_attn_layer_norm
=
nn
.
LayerNorm
(
self
.
embed_dim
)
self
.
activation_fn
=
get_act_fn
(
config
.
activation_function
,
quant_config
)
self
.
activation_fn
=
get_act_fn
(
config
.
activation_function
)
ffn_hidden_size
=
self
.
embed_dim
ffn_intermediate_size
=
config
.
encoder_ffn_dim
...
...
@@ -405,7 +404,7 @@ class BartEncoderLayer(nn.Module):
bias
=
ffn_has_bias
,
quant_config
=
quant_config
,
)
self
.
act
=
get_act_fn
(
"gelu"
,
quant_config
,
ffn_intermediate_size
)
self
.
act
=
get_act_fn
(
"gelu"
)
self
.
fc2
=
RowParallelLinear
(
ffn_intermediate_size
,
ffn_hidden_size
,
...
...
@@ -473,8 +472,7 @@ class BartDecoderLayer(nn.Module):
config
=
config
,
cache_config
=
cache_config
,
quant_config
=
quant_config
)
self
.
activation_fn
=
get_act_fn
(
config
.
activation_function
,
quant_config
)
self
.
activation_fn
=
get_act_fn
(
config
.
activation_function
)
self
.
self_attn_layer_norm
=
nn
.
LayerNorm
(
self
.
embed_dim
)
'''
...
...
vllm/model_executor/models/bloom.py
View file @
399c7986
...
...
@@ -146,7 +146,7 @@ class BloomMLP(nn.Module):
4
*
hidden_size
,
quant_config
=
quant_config
,
)
self
.
gelu_impl
=
get_act_fn
(
"gelu"
,
quant_config
,
4
*
hidden_size
)
self
.
gelu_impl
=
get_act_fn
(
"gelu"
)
self
.
dense_4h_to_h
=
RowParallelLinear
(
4
*
hidden_size
,
hidden_size
,
...
...
vllm/model_executor/models/falcon.py
View file @
399c7986
...
...
@@ -212,7 +212,7 @@ class FalconMLP(nn.Module):
bias
=
config
.
bias
,
skip_bias_add
=
True
,
quant_config
=
quant_config
)
self
.
act
=
get_act_fn
(
"gelu"
,
quant_config
,
4
*
hidden_size
)
self
.
act
=
get_act_fn
(
"gelu"
)
self
.
reduce_row_parallel_results
=
not
(
config
.
new_decoder_architecture
or
config
.
parallel_attn
)
self
.
dense_4h_to_h
=
RowParallelLinear
(
...
...
vllm/model_executor/models/gpt2.py
View file @
399c7986
...
...
@@ -123,8 +123,7 @@ class GPT2MLP(nn.Module):
quant_config
=
quant_config
,
prefix
=
f
"
{
prefix
}
.c_proj"
,
)
self
.
act
=
get_act_fn
(
config
.
activation_function
,
quant_config
,
intermediate_size
)
self
.
act
=
get_act_fn
(
config
.
activation_function
)
def
forward
(
self
,
hidden_states
:
torch
.
Tensor
)
->
torch
.
Tensor
:
hidden_states
,
_
=
self
.
c_fc
(
hidden_states
)
...
...
vllm/model_executor/models/gpt_bigcode.py
View file @
399c7986
...
...
@@ -135,8 +135,7 @@ class GPTBigMLP(nn.Module):
bias
=
True
,
quant_config
=
quant_config
,
)
self
.
act
=
get_act_fn
(
config
.
activation_function
,
quant_config
,
intermediate_size
)
self
.
act
=
get_act_fn
(
config
.
activation_function
)
def
forward
(
self
,
hidden_states
:
torch
.
Tensor
)
->
torch
.
Tensor
:
hidden_states
,
_
=
self
.
c_fc
(
hidden_states
)
...
...
vllm/model_executor/models/gpt_j.py
View file @
399c7986
...
...
@@ -130,8 +130,7 @@ class GPTJMLP(nn.Module):
hidden_size
,
quant_config
=
quant_config
,
)
self
.
act
=
get_act_fn
(
config
.
activation_function
,
quant_config
,
intermediate_size
)
self
.
act
=
get_act_fn
(
config
.
activation_function
)
def
forward
(
self
,
hidden_states
:
torch
.
Tensor
)
->
torch
.
Tensor
:
hidden_states
,
_
=
self
.
fc_in
(
hidden_states
)
...
...
vllm/model_executor/models/gpt_neox.py
View file @
399c7986
...
...
@@ -128,8 +128,7 @@ class GPTNeoXMLP(nn.Module):
config
.
hidden_size
,
quant_config
=
quant_config
,
)
self
.
act
=
get_act_fn
(
config
.
hidden_act
,
quant_config
,
config
.
intermediate_size
)
self
.
act
=
get_act_fn
(
config
.
hidden_act
)
def
forward
(
self
,
hidden_states
):
hidden_states
,
_
=
self
.
dense_h_to_4h
(
hidden_states
)
...
...
vllm/model_executor/models/mpt.py
View file @
399c7986
...
...
@@ -153,7 +153,7 @@ class MPTMLP(nn.Module):
bias
=
not
config
.
no_bias
,
quant_config
=
quant_config
,
)
self
.
act
=
get_act_fn
(
"gelu"
,
quant_config
,
intermediate_size
)
self
.
act
=
get_act_fn
(
"gelu"
)
self
.
down_proj
=
RowParallelLinear
(
intermediate_size
,
hidden_size
,
...
...
vllm/model_executor/models/opt.py
View file @
399c7986
...
...
@@ -147,8 +147,7 @@ class OPTDecoderLayer(nn.Module):
quant_config
=
quant_config
,
prefix
=
f
"
{
prefix
}
.fc1"
,
)
self
.
activation_fn
=
get_act_fn
(
config
.
activation_function
,
quant_config
,
config
.
ffn_dim
)
self
.
activation_fn
=
get_act_fn
(
config
.
activation_function
)
self
.
fc2
=
RowParallelLinear
(
config
.
ffn_dim
,
self
.
embed_dim
,
...
...
vllm/model_executor/models/persimmon.py
View file @
399c7986
...
...
@@ -60,7 +60,7 @@ class PersimmonMLP(nn.Module):
self
.
dense_4h_to_h
=
RowParallelLinear
(
config
.
intermediate_size
,
config
.
hidden_size
,
quant_config
=
quant_config
)
self
.
act
=
get_act_fn
(
config
.
hidden_act
,
quant_config
)
self
.
act
=
get_act_fn
(
config
.
hidden_act
)
def
forward
(
self
,
hidden_states
)
->
torch
.
Tensor
:
hidden_states
,
_
=
self
.
dense_h_to_4h
(
hidden_states
)
...
...
vllm/model_executor/models/phi.py
View file @
399c7986
...
...
@@ -152,7 +152,7 @@ class PhiMLP(nn.Module):
config
.
hidden_size
,
quant_config
=
quant_config
,
)
self
.
act
=
get_act_fn
(
config
.
hidden_act
,
quant_config
,
n_inner
)
self
.
act
=
get_act_fn
(
config
.
hidden_act
)
def
forward
(
self
,
hidden_states
):
hidden_states
,
_
=
self
.
fc1
(
hidden_states
)
...
...
vllm/model_executor/models/qwen.py
View file @
399c7986
...
...
@@ -203,7 +203,7 @@ class QwenVMLP(nn.Module):
intermediate_size
,
bias
=
True
,
quant_config
=
quant_config
)
self
.
act_fn
=
get_act_fn
(
"gelu"
,
quant_config
,
intermediate_size
)
self
.
act_fn
=
get_act_fn
(
"gelu"
)
self
.
c_proj
=
RowParallelLinear
(
intermediate_size
,
hidden_size
,
...
...
vllm/model_executor/models/starcoder2.py
View file @
399c7986
...
...
@@ -139,8 +139,7 @@ class Starcoder2MLP(nn.Module):
bias
=
config
.
use_bias
,
quant_config
=
quant_config
,
)
self
.
act
=
get_act_fn
(
config
.
hidden_act
,
quant_config
,
config
.
intermediate_size
)
self
.
act
=
get_act_fn
(
config
.
hidden_act
)
def
forward
(
self
,
hidden_states
:
torch
.
Tensor
)
->
torch
.
Tensor
:
hidden_states
,
_
=
self
.
c_fc
(
hidden_states
)
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment