Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
d7219bcd
Unverified
Commit
d7219bcd
authored
Nov 21, 2025
by
Cyrus Leung
Committed by
GitHub
Nov 21, 2025
Browse files
[Misc] Move dynamic seed initialization to `EngineArgs` (#29165)
Signed-off-by:
DarkLight1337
<
tlleungac@connect.ust.hk
>
parent
4050bae4
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
23 additions
and
37 deletions
+23
-37
vllm/config/model.py
vllm/config/model.py
+7
-27
vllm/config/speculative.py
vllm/config/speculative.py
+1
-6
vllm/engine/arg_utils.py
vllm/engine/arg_utils.py
+15
-1
vllm/v1/worker/tpu_worker.py
vllm/v1/worker/tpu_worker.py
+0
-3
No files found.
vllm/config/model.py
View file @
d7219bcd
...
@@ -146,9 +146,12 @@ class ModelConfig:
...
@@ -146,9 +146,12 @@ class ModelConfig:
- "bfloat16" for a balance between precision and range.
\n
- "bfloat16" for a balance between precision and range.
\n
- "float" is shorthand for FP32 precision.
\n
- "float" is shorthand for FP32 precision.
\n
- "float32" for FP32 precision."""
- "float32" for FP32 precision."""
seed
:
int
|
None
=
None
seed
:
int
=
0
"""Random seed for reproducibility. Initialized to None in V0, but
"""Random seed for reproducibility.
initialized to 0 in V1."""
We must set the global seed because otherwise,
different tensor parallel workers would sample different tokens,
leading to inconsistent results."""
hf_config
:
PretrainedConfig
=
field
(
init
=
False
)
hf_config
:
PretrainedConfig
=
field
(
init
=
False
)
"""The Hugging Face config of the model."""
"""The Hugging Face config of the model."""
hf_text_config
:
PretrainedConfig
=
field
(
init
=
False
)
hf_text_config
:
PretrainedConfig
=
field
(
init
=
False
)
...
@@ -415,7 +418,7 @@ class ModelConfig:
...
@@ -415,7 +418,7 @@ class ModelConfig:
def
__post_init__
(
def
__post_init__
(
self
,
self
,
# Multimodal config init vars
# Multimodal config init vars
limit_mm_per_prompt
:
dict
[
str
,
int
]
|
None
,
limit_mm_per_prompt
:
dict
[
str
,
int
|
dict
[
str
,
int
]
]
|
None
,
enable_mm_embeds
:
bool
|
None
,
enable_mm_embeds
:
bool
|
None
,
media_io_kwargs
:
dict
[
str
,
dict
[
str
,
Any
]]
|
None
,
media_io_kwargs
:
dict
[
str
,
dict
[
str
,
Any
]]
|
None
,
mm_processor_kwargs
:
dict
[
str
,
Any
]
|
None
,
mm_processor_kwargs
:
dict
[
str
,
Any
]
|
None
,
...
@@ -428,23 +431,6 @@ class ModelConfig:
...
@@ -428,23 +431,6 @@ class ModelConfig:
skip_mm_profiling
:
bool
|
None
,
skip_mm_profiling
:
bool
|
None
,
video_pruning_rate
:
float
|
None
,
video_pruning_rate
:
float
|
None
,
)
->
None
:
)
->
None
:
# Set the default seed to 0 in V1.
# NOTE(woosuk): In V1, we use separate processes for workers (unless
# VLLM_ENABLE_V1_MULTIPROCESSING=0), so setting a seed here
# doesn't affect the user process. However, without a consistent seed,
# different tensor parallel workers would sample different tokens,
# leading to inconsistent results.
if
self
.
seed
is
None
:
self
.
seed
=
0
if
not
envs
.
VLLM_ENABLE_V1_MULTIPROCESSING
:
logger
.
warning
(
"The global random seed is set to %d. Since "
"VLLM_ENABLE_V1_MULTIPROCESSING is set to False, this may "
"affect the random state of the Python process that "
"launched vLLM."
,
self
.
seed
,
)
# Keep set served_model_name before maybe_model_redirect(self.model)
# Keep set served_model_name before maybe_model_redirect(self.model)
self
.
served_model_name
=
get_served_model_name
(
self
.
served_model_name
=
get_served_model_name
(
self
.
model
,
self
.
served_model_name
self
.
model
,
self
.
served_model_name
...
@@ -1151,12 +1137,6 @@ class ModelConfig:
...
@@ -1151,12 +1137,6 @@ class ModelConfig:
self
,
self
,
parallel_config
:
ParallelConfig
,
parallel_config
:
ParallelConfig
,
)
->
None
:
)
->
None
:
if
parallel_config
.
distributed_executor_backend
==
"external_launcher"
:
assert
self
.
seed
is
not
None
,
(
"Seed must be set when using external launcher backend to "
"make sure sampling results are the same across workers."
)
total_num_attention_heads
=
getattr
(
total_num_attention_heads
=
getattr
(
self
.
hf_text_config
,
"num_attention_heads"
,
0
self
.
hf_text_config
,
"num_attention_heads"
,
0
)
)
...
...
vllm/config/speculative.py
View file @
d7219bcd
...
@@ -9,6 +9,7 @@ from pydantic import Field, SkipValidation, model_validator
...
@@ -9,6 +9,7 @@ from pydantic import Field, SkipValidation, model_validator
from
pydantic.dataclasses
import
dataclass
from
pydantic.dataclasses
import
dataclass
from
typing_extensions
import
Self
from
typing_extensions
import
Self
from
vllm.config.model
import
ModelConfig
from
vllm.config.parallel
import
ParallelConfig
from
vllm.config.parallel
import
ParallelConfig
from
vllm.config.utils
import
config
from
vllm.config.utils
import
config
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
...
@@ -18,10 +19,8 @@ if TYPE_CHECKING:
...
@@ -18,10 +19,8 @@ if TYPE_CHECKING:
from
transformers
import
PretrainedConfig
from
transformers
import
PretrainedConfig
import
vllm.model_executor.layers.quantization
as
me_quant
import
vllm.model_executor.layers.quantization
as
me_quant
from
vllm.config
import
ModelConfig
else
:
else
:
PretrainedConfig
=
Any
PretrainedConfig
=
Any
ModelConfig
=
Any
me_quant
=
LazyLoader
(
me_quant
=
LazyLoader
(
"model_executor"
,
globals
(),
"vllm.model_executor.layers.quantization"
"model_executor"
,
globals
(),
"vllm.model_executor.layers.quantization"
...
@@ -316,10 +315,6 @@ class SpeculativeConfig:
...
@@ -316,10 +315,6 @@ class SpeculativeConfig:
self
.
prompt_lookup_min
=
0
self
.
prompt_lookup_min
=
0
if
self
.
model
is
not
None
:
if
self
.
model
is
not
None
:
# TODO: Move this import to the top once `ModelConfig`
# lives in `vllm.config.model`.
from
vllm.config
import
ModelConfig
self
.
draft_model_config
=
ModelConfig
(
self
.
draft_model_config
=
ModelConfig
(
model
=
self
.
model
,
model
=
self
.
model
,
runner
=
"draft"
,
runner
=
"draft"
,
...
...
vllm/engine/arg_utils.py
View file @
d7219bcd
...
@@ -367,7 +367,7 @@ class EngineArgs:
...
@@ -367,7 +367,7 @@ class EngineArgs:
config_format
:
str
=
ModelConfig
.
config_format
config_format
:
str
=
ModelConfig
.
config_format
dtype
:
ModelDType
=
ModelConfig
.
dtype
dtype
:
ModelDType
=
ModelConfig
.
dtype
kv_cache_dtype
:
CacheDType
=
CacheConfig
.
cache_dtype
kv_cache_dtype
:
CacheDType
=
CacheConfig
.
cache_dtype
seed
:
int
|
None
=
ModelConfig
.
seed
seed
:
int
|
None
=
None
max_model_len
:
int
|
None
=
ModelConfig
.
max_model_len
max_model_len
:
int
|
None
=
ModelConfig
.
max_model_len
cuda_graph_sizes
:
list
[
int
]
|
None
=
CompilationConfig
.
cudagraph_capture_sizes
cuda_graph_sizes
:
list
[
int
]
|
None
=
CompilationConfig
.
cudagraph_capture_sizes
cudagraph_capture_sizes
:
list
[
int
]
|
None
=
(
cudagraph_capture_sizes
:
list
[
int
]
|
None
=
(
...
@@ -1188,6 +1188,20 @@ class EngineArgs:
...
@@ -1188,6 +1188,20 @@ class EngineArgs:
if
check_gguf_file
(
self
.
model
):
if
check_gguf_file
(
self
.
model
):
self
.
quantization
=
self
.
load_format
=
"gguf"
self
.
quantization
=
self
.
load_format
=
"gguf"
# NOTE(woosuk): In V1, we use separate processes for workers (unless
# VLLM_ENABLE_V1_MULTIPROCESSING=0), so setting a seed here
# doesn't affect the user process.
if
self
.
seed
is
None
:
self
.
seed
=
0
if
not
envs
.
VLLM_ENABLE_V1_MULTIPROCESSING
:
logger
.
warning
(
"The global random seed is set to %d. Since "
"VLLM_ENABLE_V1_MULTIPROCESSING is set to False, this may "
"affect the random state of the Python process that "
"launched vLLM."
,
self
.
seed
,
)
if
self
.
disable_mm_preprocessor_cache
:
if
self
.
disable_mm_preprocessor_cache
:
logger
.
warning
(
logger
.
warning
(
"`--disable-mm-preprocessor-cache` is deprecated "
"`--disable-mm-preprocessor-cache` is deprecated "
...
...
vllm/v1/worker/tpu_worker.py
View file @
d7219bcd
...
@@ -106,9 +106,6 @@ class TPUWorker:
...
@@ -106,9 +106,6 @@ class TPUWorker:
"Profiling enabled. Traces will be saved to: %s"
,
self
.
profile_dir
"Profiling enabled. Traces will be saved to: %s"
,
self
.
profile_dir
)
)
if
self
.
model_config
.
seed
is
None
:
self
.
model_config
.
seed
=
0
def
initialize_cache
(
self
,
num_gpu_blocks
:
int
,
num_cpu_blocks
:
int
)
->
None
:
def
initialize_cache
(
self
,
num_gpu_blocks
:
int
,
num_cpu_blocks
:
int
)
->
None
:
self
.
cache_config
.
num_gpu_blocks
=
num_gpu_blocks
self
.
cache_config
.
num_gpu_blocks
=
num_gpu_blocks
self
.
cache_config
.
num_cpu_blocks
=
num_cpu_blocks
self
.
cache_config
.
num_cpu_blocks
=
num_cpu_blocks
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment