Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
d6f4bd7c
Unverified
Commit
d6f4bd7c
authored
May 01, 2024
by
Jee Li
Committed by
GitHub
Apr 30, 2024
Browse files
[Misc]Add customized information for models (#4132)
parent
c3845d82
Changes
9
Show whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
87 additions
and
0 deletions
+87
-0
tests/models/test_big_models.py
tests/models/test_big_models.py
+15
-0
tests/models/test_models.py
tests/models/test_models.py
+15
-0
vllm/attention/layer.py
vllm/attention/layer.py
+7
-0
vllm/model_executor/layers/activation.py
vllm/model_executor/layers/activation.py
+3
-0
vllm/model_executor/layers/layernorm.py
vllm/model_executor/layers/layernorm.py
+5
-0
vllm/model_executor/layers/linear.py
vllm/model_executor/layers/linear.py
+22
-0
vllm/model_executor/layers/logits_processor.py
vllm/model_executor/layers/logits_processor.py
+6
-0
vllm/model_executor/layers/rotary_embedding.py
vllm/model_executor/layers/rotary_embedding.py
+6
-0
vllm/model_executor/layers/vocab_parallel_embedding.py
vllm/model_executor/layers/vocab_parallel_embedding.py
+8
-0
No files found.
tests/models/test_big_models.py
View file @
d6f4bd7c
...
...
@@ -43,3 +43,18 @@ def test_models(
f
"Test
{
i
}
:
\n
HF:
{
hf_output_str
!
r
}
\n
vLLM:
{
vllm_output_str
!
r
}
"
)
assert
hf_output_ids
==
vllm_output_ids
,
(
f
"Test
{
i
}
:
\n
HF:
{
hf_output_ids
}
\n
vLLM:
{
vllm_output_ids
}
"
)
@
pytest
.
mark
.
parametrize
(
"model"
,
MODELS
)
@
pytest
.
mark
.
parametrize
(
"dtype"
,
[
"half"
])
def
test_model_print
(
vllm_runner
,
model
:
str
,
dtype
:
str
,
)
->
None
:
vllm_model
=
vllm_runner
(
model
,
dtype
=
dtype
)
# This test is for verifying whether the model's extra_repr
# can be printed correctly.
print
(
vllm_model
.
model
.
llm_engine
.
model_executor
.
driver_worker
.
model_runner
.
model
)
del
vllm_model
tests/models/test_models.py
View file @
d6f4bd7c
...
...
@@ -49,3 +49,18 @@ def test_models(
f
"Test
{
i
}
:
\n
HF:
{
hf_output_str
!
r
}
\n
vLLM:
{
vllm_output_str
!
r
}
"
)
assert
hf_output_ids
==
vllm_output_ids
,
(
f
"Test
{
i
}
:
\n
HF:
{
hf_output_ids
}
\n
vLLM:
{
vllm_output_ids
}
"
)
@
pytest
.
mark
.
parametrize
(
"model"
,
MODELS
)
@
pytest
.
mark
.
parametrize
(
"dtype"
,
[
"float"
])
def
test_model_print
(
vllm_runner
,
model
:
str
,
dtype
:
str
,
)
->
None
:
vllm_model
=
vllm_runner
(
model
,
dtype
=
dtype
)
# This test is for verifying whether the model's extra_repr
# can be printed correctly.
print
(
vllm_model
.
model
.
llm_engine
.
model_executor
.
driver_worker
.
model_runner
.
model
)
del
vllm_model
vllm/attention/layer.py
View file @
d6f4bd7c
...
...
@@ -47,3 +47,10 @@ class Attention(nn.Module):
)
->
torch
.
Tensor
:
return
self
.
impl
.
forward
(
query
,
key
,
value
,
kv_cache
,
attn_metadata
,
kv_scale
)
def
extra_repr
(
self
)
->
str
:
s
=
f
"head_size=
{
self
.
impl
.
head_size
}
"
# type: ignore
s
+=
f
", num_heads=
{
self
.
impl
.
num_heads
}
"
# type: ignore
s
+=
f
", num_kv_heads=
{
self
.
impl
.
num_kv_heads
}
"
# type: ignore
s
+=
f
", scale=
{
self
.
impl
.
scale
}
"
# type: ignore
return
s
vllm/model_executor/layers/activation.py
View file @
d6f4bd7c
...
...
@@ -67,6 +67,9 @@ class GeluAndMul(nn.Module):
ops
.
gelu_tanh_and_mul
(
out
,
x
)
return
out
def
extra_repr
(
self
)
->
str
:
return
f
'approximate=
{
repr
(
self
.
approximate
)
}
'
class
NewGELU
(
nn
.
Module
):
...
...
vllm/model_executor/layers/layernorm.py
View file @
d6f4bd7c
...
...
@@ -64,3 +64,8 @@ class RMSNorm(nn.Module):
self
.
variance_epsilon
,
)
return
out
def
extra_repr
(
self
)
->
str
:
s
=
f
"hidden_size=
{
self
.
weight
.
data
.
size
(
0
)
}
"
s
+=
f
", eps=
{
self
.
variance_epsilon
}
"
return
s
vllm/model_executor/layers/linear.py
View file @
d6f4bd7c
...
...
@@ -181,6 +181,12 @@ class ReplicatedLinear(LinearBase):
output_bias
=
self
.
bias
if
self
.
skip_bias_add
else
None
return
output
,
output_bias
def
extra_repr
(
self
)
->
str
:
s
=
f
"in_features=
{
self
.
input_size
}
"
s
+=
f
", output_features=
{
self
.
output_size
}
"
s
+=
f
", bias=
{
self
.
bias
is
not
None
}
"
return
s
class
ColumnParallelLinear
(
LinearBase
):
"""Linear layer with column parallelism.
...
...
@@ -281,6 +287,14 @@ class ColumnParallelLinear(LinearBase):
output_bias
=
self
.
bias
if
self
.
skip_bias_add
else
None
return
output
,
output_bias
def
extra_repr
(
self
)
->
str
:
s
=
f
"in_features=
{
self
.
input_size
}
"
s
+=
f
", output_features=
{
self
.
output_size_per_partition
}
"
s
+=
f
", bias=
{
self
.
bias
is
not
None
}
"
s
+=
f
", tp_size=
{
get_tensor_model_parallel_world_size
()
}
"
s
+=
f
", gather_output=
{
self
.
gather_output
}
"
return
s
class
MergedColumnParallelLinear
(
ColumnParallelLinear
):
"""Packed linear layers with column parallelism.
...
...
@@ -685,3 +699,11 @@ class RowParallelLinear(LinearBase):
output
=
output_
output_bias
=
self
.
bias
return
output
,
output_bias
def
extra_repr
(
self
)
->
str
:
s
=
f
"input_features=
{
self
.
input_size_per_partition
}
"
s
+=
f
", output_features=
{
self
.
output_size
}
"
s
+=
f
", bias=
{
self
.
bias
is
not
None
}
"
s
+=
f
", tp_size=
{
self
.
tp_size
}
"
s
+=
f
", reduce_results=
{
self
.
reduce_results
}
"
return
s
vllm/model_executor/layers/logits_processor.py
View file @
d6f4bd7c
...
...
@@ -70,6 +70,12 @@ class LogitsProcessor(nn.Module):
logits
=
logits
[:,
:
self
.
org_vocab_size
]
return
logits
def
extra_repr
(
self
)
->
str
:
s
=
f
"vocab_size=
{
self
.
vocab_size
}
"
s
+=
f
", forg_vocab_size=
{
self
.
org_vocab_size
}
"
s
+=
f
", scale=
{
self
.
scale
}
, logits_as_input=
{
self
.
logits_as_input
}
"
return
s
def
_prune_hidden_states
(
hidden_states
:
torch
.
Tensor
,
...
...
vllm/model_executor/layers/rotary_embedding.py
View file @
d6f4bd7c
...
...
@@ -156,6 +156,12 @@ class RotaryEmbedding(nn.Module):
self
.
cos_sin_cache
,
self
.
is_neox_style
)
return
query
,
key
def
extra_repr
(
self
)
->
str
:
s
=
f
"head_size=
{
self
.
head_size
}
, rotary_dim=
{
self
.
rotary_dim
}
"
s
+=
f
", max_position_embeddings=
{
self
.
max_position_embeddings
}
"
s
+=
f
", base=
{
self
.
base
}
, is_neox_style=
{
self
.
is_neox_style
}
"
return
s
class
LinearScalingRotaryEmbedding
(
RotaryEmbedding
):
"""RotaryEmbedding extended with linear scaling.
...
...
vllm/model_executor/layers/vocab_parallel_embedding.py
View file @
d6f4bd7c
...
...
@@ -105,6 +105,14 @@ class VocabParallelEmbedding(torch.nn.Module):
output
=
tensor_model_parallel_all_reduce
(
output_parallel
)
return
output
def
extra_repr
(
self
)
->
str
:
s
=
f
"num_embeddings=
{
self
.
num_embeddings_per_partition
}
"
s
+=
f
", embedding_dim=
{
self
.
embedding_dim
}
"
s
+=
f
", org_vocab_size=
{
self
.
org_vocab_size
}
"
s
+=
f
', num_embeddings_padded=
{
self
.
num_embeddings_padded
}
'
s
+=
f
', tp_size=
{
self
.
tp_size
}
'
return
s
class
ParallelLMHead
(
VocabParallelEmbedding
):
"""Parallelized LM head.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment