Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
572ddf83
Unverified
Commit
572ddf83
authored
Sep 20, 2025
by
Woosuk Kwon
Committed by
GitHub
Sep 20, 2025
Browse files
[Chore] Remove unused sampler in models (#25324)
Signed-off-by:
Woosuk Kwon
<
woosuk.kwon@berkeley.edu
>
parent
86647d1c
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
0 additions
and
49 deletions
+0
-49
tests/lora/conftest.py
tests/lora/conftest.py
+0
-3
vllm/model_executor/models/ernie_mtp.py
vllm/model_executor/models/ernie_mtp.py
+0
-10
vllm/model_executor/models/plamo2.py
vllm/model_executor/models/plamo2.py
+0
-10
vllm/model_executor/models/step3_text.py
vllm/model_executor/models/step3_text.py
+0
-10
vllm/model_executor/models/step3_vl.py
vllm/model_executor/models/step3_vl.py
+0
-16
No files found.
tests/lora/conftest.py
View file @
572ddf83
...
@@ -17,7 +17,6 @@ from vllm.model_executor.layers.linear import (ColumnParallelLinear,
...
@@ -17,7 +17,6 @@ from vllm.model_executor.layers.linear import (ColumnParallelLinear,
MergedColumnParallelLinear
,
MergedColumnParallelLinear
,
RowParallelLinear
)
RowParallelLinear
)
from
vllm.model_executor.layers.logits_processor
import
LogitsProcessor
from
vllm.model_executor.layers.logits_processor
import
LogitsProcessor
from
vllm.model_executor.layers.sampler
import
Sampler
from
vllm.model_executor.layers.vocab_parallel_embedding
import
ParallelLMHead
from
vllm.model_executor.layers.vocab_parallel_embedding
import
ParallelLMHead
from
vllm.model_executor.models.interfaces
import
SupportsLoRA
from
vllm.model_executor.models.interfaces
import
SupportsLoRA
from
vllm.platforms
import
current_platform
from
vllm.platforms
import
current_platform
...
@@ -97,7 +96,6 @@ def dummy_model() -> nn.Module:
...
@@ -97,7 +96,6 @@ def dummy_model() -> nn.Module:
# Special handling for lm_head & sampler
# Special handling for lm_head & sampler
(
"lm_head"
,
ParallelLMHead
(
512
,
10
)),
(
"lm_head"
,
ParallelLMHead
(
512
,
10
)),
(
"logits_processor"
,
LogitsProcessor
(
512
)),
(
"logits_processor"
,
LogitsProcessor
(
512
)),
(
"sampler"
,
Sampler
())
]))
]))
model
.
config
=
MagicMock
()
model
.
config
=
MagicMock
()
model
.
embedding_modules
=
{
"lm_head"
:
"lm_head"
}
model
.
embedding_modules
=
{
"lm_head"
:
"lm_head"
}
...
@@ -125,7 +123,6 @@ def dummy_model_gate_up() -> nn.Module:
...
@@ -125,7 +123,6 @@ def dummy_model_gate_up() -> nn.Module:
# Special handling for lm_head & sampler
# Special handling for lm_head & sampler
(
"lm_head"
,
ParallelLMHead
(
512
,
10
)),
(
"lm_head"
,
ParallelLMHead
(
512
,
10
)),
(
"logits_processor"
,
LogitsProcessor
(
512
)),
(
"logits_processor"
,
LogitsProcessor
(
512
)),
(
"sampler"
,
Sampler
())
]))
]))
model
.
config
=
MagicMock
()
model
.
config
=
MagicMock
()
model
.
packed_modules_mapping
=
{
model
.
packed_modules_mapping
=
{
...
...
vllm/model_executor/models/ernie_mtp.py
View file @
572ddf83
...
@@ -33,7 +33,6 @@ from vllm.config import CacheConfig, ModelConfig, VllmConfig
...
@@ -33,7 +33,6 @@ from vllm.config import CacheConfig, ModelConfig, VllmConfig
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.logits_processor
import
LogitsProcessor
from
vllm.model_executor.layers.logits_processor
import
LogitsProcessor
from
vllm.model_executor.layers.quantization
import
QuantizationConfig
from
vllm.model_executor.layers.quantization
import
QuantizationConfig
from
vllm.model_executor.layers.sampler
import
SamplerOutput
,
get_sampler
from
vllm.model_executor.layers.vocab_parallel_embedding
import
(
from
vllm.model_executor.layers.vocab_parallel_embedding
import
(
ParallelLMHead
,
VocabParallelEmbedding
)
ParallelLMHead
,
VocabParallelEmbedding
)
from
vllm.model_executor.model_loader.weight_utils
import
default_weight_loader
from
vllm.model_executor.model_loader.weight_utils
import
default_weight_loader
...
@@ -160,7 +159,6 @@ class ErnieMTP(nn.Module, SupportsPP):
...
@@ -160,7 +159,6 @@ class ErnieMTP(nn.Module, SupportsPP):
self
.
lm_head
=
ParallelLMHead
(
self
.
config
.
vocab_size
,
self
.
lm_head
=
ParallelLMHead
(
self
.
config
.
vocab_size
,
self
.
config
.
hidden_size
,
self
.
config
.
hidden_size
,
prefix
=
maybe_prefix
(
prefix
,
"lm_head"
))
prefix
=
maybe_prefix
(
prefix
,
"lm_head"
))
self
.
sampler
=
get_sampler
()
if
self
.
config
.
tie_word_embeddings
:
if
self
.
config
.
tie_word_embeddings
:
self
.
lm_head
.
weight
=
self
.
model
.
embed_tokens
.
weight
self
.
lm_head
.
weight
=
self
.
model
.
embed_tokens
.
weight
...
@@ -188,14 +186,6 @@ class ErnieMTP(nn.Module, SupportsPP):
...
@@ -188,14 +186,6 @@ class ErnieMTP(nn.Module, SupportsPP):
return
self
.
model
.
compute_logits
(
hidden_states
,
self
.
lm_head
,
return
self
.
model
.
compute_logits
(
hidden_states
,
self
.
lm_head
,
sampling_metadata
,
spec_step_idx
)
sampling_metadata
,
spec_step_idx
)
def
sample
(
self
,
logits
:
torch
.
Tensor
,
sampling_metadata
:
SamplingMetadata
,
)
->
Optional
[
SamplerOutput
]:
next_tokens
=
self
.
sampler
(
logits
,
sampling_metadata
)
return
next_tokens
def
load_weights
(
self
,
weights
:
Iterable
[
tuple
[
str
,
def
load_weights
(
self
,
weights
:
Iterable
[
tuple
[
str
,
torch
.
Tensor
]])
->
set
[
str
]:
torch
.
Tensor
]])
->
set
[
str
]:
stacked_params_mapping
=
[
stacked_params_mapping
=
[
...
...
vllm/model_executor/models/plamo2.py
View file @
572ddf83
...
@@ -41,7 +41,6 @@ from vllm.model_executor.layers.mamba.ops.ssd_combined import (
...
@@ -41,7 +41,6 @@ from vllm.model_executor.layers.mamba.ops.ssd_combined import (
mamba_chunk_scan_combined
)
mamba_chunk_scan_combined
)
from
vllm.model_executor.layers.quantization
import
QuantizationConfig
from
vllm.model_executor.layers.quantization
import
QuantizationConfig
from
vllm.model_executor.layers.rotary_embedding
import
get_rope
from
vllm.model_executor.layers.rotary_embedding
import
get_rope
from
vllm.model_executor.layers.sampler
import
SamplerOutput
,
get_sampler
from
vllm.model_executor.layers.vocab_parallel_embedding
import
(
from
vllm.model_executor.layers.vocab_parallel_embedding
import
(
DEFAULT_VOCAB_PADDING_SIZE
,
ParallelLMHead
,
VocabParallelEmbedding
)
DEFAULT_VOCAB_PADDING_SIZE
,
ParallelLMHead
,
VocabParallelEmbedding
)
from
vllm.model_executor.model_loader.weight_utils
import
(
from
vllm.model_executor.model_loader.weight_utils
import
(
...
@@ -932,7 +931,6 @@ class Plamo2ForCausalLM(torch.nn.Module, HasInnerState, SupportsPP, IsHybrid):
...
@@ -932,7 +931,6 @@ class Plamo2ForCausalLM(torch.nn.Module, HasInnerState, SupportsPP, IsHybrid):
self
.
logits_processor
=
LogitsProcessor
(
self
.
unpadded_vocab_size
,
self
.
logits_processor
=
LogitsProcessor
(
self
.
unpadded_vocab_size
,
self
.
config
.
vocab_size
)
self
.
config
.
vocab_size
)
self
.
sampler
=
get_sampler
()
self
.
make_empty_intermediate_tensors
=
(
self
.
make_empty_intermediate_tensors
=
(
self
.
model
.
make_empty_intermediate_tensors
)
self
.
model
.
make_empty_intermediate_tensors
)
...
@@ -1030,14 +1028,6 @@ class Plamo2ForCausalLM(torch.nn.Module, HasInnerState, SupportsPP, IsHybrid):
...
@@ -1030,14 +1028,6 @@ class Plamo2ForCausalLM(torch.nn.Module, HasInnerState, SupportsPP, IsHybrid):
sampling_metadata
)
sampling_metadata
)
return
logits
return
logits
def
sample
(
self
,
logits
:
Optional
[
torch
.
Tensor
],
sampling_metadata
:
SamplingMetadata
,
)
->
Optional
[
SamplerOutput
]:
next_tokens
=
self
.
sampler
(
logits
,
sampling_metadata
)
return
next_tokens
def
load_weights
(
self
,
weights
:
Iterable
[
tuple
[
str
,
torch
.
Tensor
]]):
def
load_weights
(
self
,
weights
:
Iterable
[
tuple
[
str
,
torch
.
Tensor
]]):
params_dict
=
dict
(
self
.
named_parameters
())
params_dict
=
dict
(
self
.
named_parameters
())
for
name
,
loaded_weight
in
weights
:
for
name
,
loaded_weight
in
weights
:
...
...
vllm/model_executor/models/step3_text.py
View file @
572ddf83
...
@@ -26,7 +26,6 @@ from vllm.model_executor.layers.logits_processor import LogitsProcessor
...
@@ -26,7 +26,6 @@ from vllm.model_executor.layers.logits_processor import LogitsProcessor
from
vllm.model_executor.layers.quantization.base_config
import
(
from
vllm.model_executor.layers.quantization.base_config
import
(
QuantizationConfig
)
QuantizationConfig
)
from
vllm.model_executor.layers.rotary_embedding
import
get_rope
from
vllm.model_executor.layers.rotary_embedding
import
get_rope
from
vllm.model_executor.layers.sampler
import
SamplerOutput
,
get_sampler
from
vllm.model_executor.layers.vocab_parallel_embedding
import
(
from
vllm.model_executor.layers.vocab_parallel_embedding
import
(
DEFAULT_VOCAB_PADDING_SIZE
,
ParallelLMHead
,
VocabParallelEmbedding
)
DEFAULT_VOCAB_PADDING_SIZE
,
ParallelLMHead
,
VocabParallelEmbedding
)
from
vllm.model_executor.model_loader.weight_utils
import
default_weight_loader
from
vllm.model_executor.model_loader.weight_utils
import
default_weight_loader
...
@@ -391,7 +390,6 @@ class Step3TextForCausalLM(nn.Module, SupportsPP):
...
@@ -391,7 +390,6 @@ class Step3TextForCausalLM(nn.Module, SupportsPP):
)
)
self
.
logits_processor
=
LogitsProcessor
(
self
.
unpadded_vocab_size
,
self
.
logits_processor
=
LogitsProcessor
(
self
.
unpadded_vocab_size
,
config
.
vocab_size
)
config
.
vocab_size
)
self
.
sampler
=
get_sampler
()
else
:
else
:
self
.
lm_head
=
PPMissingLayer
()
self
.
lm_head
=
PPMissingLayer
()
...
@@ -413,14 +411,6 @@ class Step3TextForCausalLM(nn.Module, SupportsPP):
...
@@ -413,14 +411,6 @@ class Step3TextForCausalLM(nn.Module, SupportsPP):
sampling_metadata
)
sampling_metadata
)
return
logits
return
logits
def
sample
(
self
,
logits
:
Optional
[
torch
.
Tensor
],
sampling_metadata
:
SamplingMetadata
,
)
->
Optional
[
SamplerOutput
]:
next_tokens
=
self
.
sampler
(
logits
,
sampling_metadata
)
return
next_tokens
def
load_weights
(
self
,
weights
:
Iterable
[
tuple
[
str
,
def
load_weights
(
self
,
weights
:
Iterable
[
tuple
[
str
,
torch
.
Tensor
]])
->
set
[
str
]:
torch
.
Tensor
]])
->
set
[
str
]:
qkv_params_mapping
=
[
qkv_params_mapping
=
[
...
...
vllm/model_executor/models/step3_vl.py
View file @
572ddf83
...
@@ -2,7 +2,6 @@
...
@@ -2,7 +2,6 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import
math
import
math
from
collections.abc
import
Iterable
,
Mapping
,
Sequence
from
collections.abc
import
Iterable
,
Mapping
,
Sequence
from
functools
import
cached_property
from
itertools
import
product
from
itertools
import
product
from
math
import
ceil
,
sqrt
from
math
import
ceil
,
sqrt
from
typing
import
Any
,
Literal
,
Optional
,
TypedDict
,
Union
from
typing
import
Any
,
Literal
,
Optional
,
TypedDict
,
Union
...
@@ -24,7 +23,6 @@ from vllm.model_executor.layers.linear import (ColumnParallelLinear,
...
@@ -24,7 +23,6 @@ from vllm.model_executor.layers.linear import (ColumnParallelLinear,
QKVParallelLinear
,
QKVParallelLinear
,
RowParallelLinear
)
RowParallelLinear
)
from
vllm.model_executor.layers.quantization
import
QuantizationConfig
from
vllm.model_executor.layers.quantization
import
QuantizationConfig
from
vllm.model_executor.layers.sampler
import
SamplerOutput
,
get_sampler
from
vllm.model_executor.sampling_metadata
import
SamplingMetadata
from
vllm.model_executor.sampling_metadata
import
SamplingMetadata
from
vllm.multimodal
import
MULTIMODAL_REGISTRY
from
vllm.multimodal
import
MULTIMODAL_REGISTRY
from
vllm.multimodal.inputs
import
(
MultiModalDataDict
,
MultiModalFieldConfig
,
from
vllm.multimodal.inputs
import
(
MultiModalDataDict
,
MultiModalFieldConfig
,
...
@@ -897,13 +895,6 @@ class Step3VLForConditionalGeneration(nn.Module, SupportsMultiModal,
...
@@ -897,13 +895,6 @@ class Step3VLForConditionalGeneration(nn.Module, SupportsMultiModal,
self
.
make_empty_intermediate_tensors
=
(
self
.
make_empty_intermediate_tensors
=
(
self
.
language_model
.
make_empty_intermediate_tensors
)
self
.
language_model
.
make_empty_intermediate_tensors
)
@
cached_property
def
sampler
(
self
):
if
hasattr
(
self
.
language_model
,
"sampler"
):
return
self
.
language_model
.
sampler
return
get_sampler
()
@
property
@
property
def
device
(
self
):
def
device
(
self
):
return
next
(
self
.
parameters
()).
device
return
next
(
self
.
parameters
()).
device
...
@@ -1069,13 +1060,6 @@ class Step3VLForConditionalGeneration(nn.Module, SupportsMultiModal,
...
@@ -1069,13 +1060,6 @@ class Step3VLForConditionalGeneration(nn.Module, SupportsMultiModal,
return
self
.
language_model
.
compute_logits
(
hidden_states
,
return
self
.
language_model
.
compute_logits
(
hidden_states
,
sampling_metadata
)
sampling_metadata
)
def
sample
(
self
,
logits
:
torch
.
Tensor
,
sampling_metadata
:
SamplingMetadata
,
)
->
Optional
[
SamplerOutput
]:
return
self
.
language_model
.
sample
(
logits
,
sampling_metadata
)
def
load_weights
(
self
,
weights
:
Iterable
[
tuple
[
str
,
torch
.
Tensor
]]):
def
load_weights
(
self
,
weights
:
Iterable
[
tuple
[
str
,
torch
.
Tensor
]]):
skip_prefixes
=
[]
skip_prefixes
=
[]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment