Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
b411418f
Unverified
Commit
b411418f
authored
Apr 24, 2025
by
Woosuk Kwon
Committed by
GitHub
Apr 24, 2025
Browse files
[Chore] Remove Sampler from Model Code (#17084)
Signed-off-by:
Woosuk Kwon
<
woosuk.kwon@berkeley.edu
>
parent
2bc0f72a
Changes
103
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
7 additions
and
6 deletions
+7
-6
vllm/worker/model_runner.py
vllm/worker/model_runner.py
+3
-2
vllm/worker/multi_step_model_runner.py
vllm/worker/multi_step_model_runner.py
+1
-2
vllm/worker/xpu_model_runner.py
vllm/worker/xpu_model_runner.py
+3
-2
No files found.
vllm/worker/model_runner.py
View file @
b411418f
...
...
@@ -35,7 +35,7 @@ from vllm.lora.request import LoRARequest
from
vllm.lora.worker_manager
import
LRUCacheWorkerLoRAManager
from
vllm.model_executor
import
SamplingMetadata
,
SamplingMetadataCache
from
vllm.model_executor.layers.rotary_embedding
import
MRotaryEmbedding
from
vllm.model_executor.layers.sampler
import
SamplerOutput
from
vllm.model_executor.layers.sampler
import
SamplerOutput
,
get_sampler
from
vllm.model_executor.model_loader
import
get_model
from
vllm.model_executor.model_loader.tensorizer
import
TensorizerConfig
from
vllm.model_executor.models
import
supports_lora
,
supports_multimodal
...
...
@@ -1094,6 +1094,7 @@ class GPUModelRunnerBase(ModelRunnerBase[TModelInputForGPU]):
# Set after load_model.
self
.
lora_manager
:
Optional
[
LRUCacheWorkerLoRAManager
]
=
None
self
.
prompt_adapter_manager
:
LRUCacheWorkerPromptAdapterManager
=
None
self
.
sampler
=
get_sampler
()
set_cpu_offload_max_bytes
(
int
(
self
.
cache_config
.
cpu_offload_gb
*
1024
**
3
))
...
...
@@ -1832,7 +1833,7 @@ class ModelRunner(GPUModelRunnerBase[ModelInputForGPUWithSamplingMetadata]):
model_input
.
async_callback
()
# Sample the next token.
output
:
SamplerOutput
=
self
.
model
.
sample
(
output
:
SamplerOutput
=
self
.
sample
r
(
logits
=
logits
,
sampling_metadata
=
model_input
.
sampling_metadata
,
)
...
...
vllm/worker/multi_step_model_runner.py
View file @
b411418f
...
...
@@ -488,8 +488,7 @@ class MultiStepModelRunner(GPUModelRunnerBase[StatefulModelInput]):
device
=
"cpu"
,
pin_memory
=
True
)
self
.
_base_model_runner
.
model
.
sampler
.
include_gpu_probs_tensor
=
(
True
)
self
.
_base_model_runner
.
sampler
.
include_gpu_probs_tensor
=
True
if
frozen_model_input
.
sampling_metadata
:
frozen_model_input
.
sampling_metadata
.
skip_sampler_cpu_output
=
(
True
)
...
...
vllm/worker/xpu_model_runner.py
View file @
b411418f
...
...
@@ -18,7 +18,7 @@ from vllm.forward_context import set_forward_context
from
vllm.inputs
import
INPUT_REGISTRY
,
InputRegistry
from
vllm.logger
import
init_logger
from
vllm.model_executor
import
SamplingMetadataCache
from
vllm.model_executor.layers.sampler
import
SamplerOutput
from
vllm.model_executor.layers.sampler
import
SamplerOutput
,
get_sampler
from
vllm.model_executor.model_loader
import
get_model
from
vllm.multimodal
import
(
MULTIMODAL_REGISTRY
,
BatchedTensorInputs
,
MultiModalKwargs
,
MultiModalPlaceholderMap
,
...
...
@@ -410,6 +410,7 @@ class XPUModelRunner(ModelRunnerBase[ModelInputForXPUWithSamplingMetadata]):
# Lazy initialization.
self
.
model
:
nn
.
Module
# Set after init_Model
self
.
sampler
=
get_sampler
()
self
.
sampling_metadata_cache
:
SamplingMetadataCache
=
\
SamplingMetadataCache
()
\
...
...
@@ -596,7 +597,7 @@ class XPUModelRunner(ModelRunnerBase[ModelInputForXPUWithSamplingMetadata]):
model_input
.
async_callback
()
# Sample the next token.
output
:
SamplerOutput
=
self
.
model
.
sample
(
output
:
SamplerOutput
=
self
.
sample
r
(
logits
=
logits
,
sampling_metadata
=
model_input
.
sampling_metadata
,
)
...
...
Prev
1
2
3
4
5
6
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment