Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
bb4337b3
Unverified
Commit
bb4337b3
authored
Jan 05, 2026
by
wangxiyuan
Committed by
GitHub
Jan 04, 2026
Browse files
[Platform] Deprecate seed_everything (#31659)
Signed-off-by:
wangxiyuan
<
wangxiyuan1007@gmail.com
>
parent
367856de
Changes
77
Show whitespace changes
Inline
Side-by-side
Showing
17 changed files
with
50 additions
and
42 deletions
+50
-42
tests/kernels/test_fla_layernorm_guard.py
tests/kernels/test_fla_layernorm_guard.py
+8
-8
tests/lora/test_fused_moe_lora_kernel.py
tests/lora/test_fused_moe_lora_kernel.py
+4
-4
tests/lora/test_layers.py
tests/lora/test_layers.py
+1
-1
tests/lora/test_punica_ops.py
tests/lora/test_punica_ops.py
+3
-3
tests/models/test_vision.py
tests/models/test_vision.py
+4
-3
tests/v1/attention/test_attention_backends.py
tests/v1/attention/test_attention_backends.py
+6
-2
tests/v1/kv_offload/test_cpu_gpu.py
tests/v1/kv_offload/test_cpu_gpu.py
+2
-1
tests/v1/tpu/test_mha_attn.py
tests/v1/tpu/test_mha_attn.py
+2
-1
tests/v1/worker/test_gpu_model_runner.py
tests/v1/worker/test_gpu_model_runner.py
+2
-1
vllm/model_executor/__init__.py
vllm/model_executor/__init__.py
+0
-2
vllm/model_executor/utils.py
vllm/model_executor/utils.py
+0
-6
vllm/platforms/interface.py
vllm/platforms/interface.py
+4
-0
vllm/utils/torch_utils.py
vllm/utils/torch_utils.py
+10
-5
vllm/v1/worker/cpu_worker.py
vllm/v1/worker/cpu_worker.py
+1
-1
vllm/v1/worker/gpu_worker.py
vllm/v1/worker/gpu_worker.py
+1
-1
vllm/v1/worker/tpu_worker.py
vllm/v1/worker/tpu_worker.py
+1
-2
vllm/v1/worker/xpu_worker.py
vllm/v1/worker/xpu_worker.py
+1
-1
No files found.
tests/kernels/test_fla_layernorm_guard.py
View file @
bb4337b3
...
...
@@ -10,7 +10,7 @@ from vllm.model_executor.layers.fla.ops.layernorm_guard import (
layernorm_fn
,
rms_norm_ref
,
)
from
vllm.
platforms
import
current_platform
from
vllm.
utils.torch_utils
import
set_random_seed
def
layer_norm_ref
(
...
...
@@ -114,7 +114,7 @@ def test_layer_norm_fwd_basic(
is_rms_norm
:
bool
,
)
->
None
:
"""Test basic layer norm forward pass without z (gate) tensor."""
current_platform
.
seed_everything
(
seed
)
set_random_seed
(
seed
)
device
=
torch
.
device
(
"cuda:0"
)
# Create inputs
...
...
@@ -156,7 +156,7 @@ def test_layer_norm_fwd_with_gate(
is_rms_norm
:
bool
,
)
->
None
:
"""Test layer norm forward pass with z (gate) tensor."""
current_platform
.
seed_everything
(
42
)
set_random_seed
(
42
)
device
=
torch
.
device
(
"cuda:0"
)
# Create inputs
...
...
@@ -213,7 +213,7 @@ def test_layer_norm_fwd_with_groups(
f
"hidden_size
{
hidden_size
}
not divisible by group_size
{
group_size
}
"
)
current_platform
.
seed_everything
(
42
)
set_random_seed
(
42
)
device
=
torch
.
device
(
"cuda:0"
)
# Create inputs
...
...
@@ -253,7 +253,7 @@ def test_layer_norm_rows_per_block(
dtype
:
torch
.
dtype
,
)
->
None
:
"""Test that rows_per_block logic works correctly for various M sizes."""
current_platform
.
seed_everything
(
42
)
set_random_seed
(
42
)
device
=
torch
.
device
(
"cuda:0"
)
hidden_size
=
1024
...
...
@@ -278,7 +278,7 @@ def test_layer_norm_rows_per_block(
def
test_strided_input
(
dtype
:
torch
.
dtype
)
->
None
:
"""Test that the kernel handles non-contiguous (strided)
inputs correctly."""
current_platform
.
seed_everything
(
42
)
set_random_seed
(
42
)
device
=
torch
.
device
(
"cuda:0"
)
num_tokens
=
128
hidden_size
=
1024
...
...
@@ -318,7 +318,7 @@ def test_output_buffer_provided(
dtype
:
torch
.
dtype
,
)
->
None
:
"""Test that the kernel works when an output buffer is provided."""
current_platform
.
seed_everything
(
42
)
set_random_seed
(
42
)
device
=
torch
.
device
(
"cuda:0"
)
# Create inputs
...
...
@@ -359,7 +359,7 @@ def test_multidimensional_input(
dtype
:
torch
.
dtype
,
)
->
None
:
"""Test that the autograd function handles multidimensional inputs."""
current_platform
.
seed_everything
(
42
)
set_random_seed
(
42
)
device
=
torch
.
device
(
"cuda:0"
)
hidden_size
=
shape
[
-
1
]
...
...
tests/lora/test_fused_moe_lora_kernel.py
View file @
bb4337b3
...
...
@@ -18,8 +18,8 @@ from vllm.distributed.parallel_state import (
get_tensor_model_parallel_world_size
,
)
from
vllm.lora.ops.triton_ops
import
fused_moe_lora
from
vllm.platforms
import
current_platform
from
vllm.utils.network_utils
import
get_open_port
from
vllm.utils.torch_utils
import
set_random_seed
@
pytest
.
fixture
(
autouse
=
True
)
...
...
@@ -265,7 +265,7 @@ def test_fused_moe_lora_kernel(
seed
,
):
torch
.
set_default_device
(
device
)
current_platform
.
seed_everything
(
seed
)
set_random_seed
(
seed
)
# the number of randomly generated sentences.
num_sequences
=
10
# generate data
...
...
@@ -358,7 +358,7 @@ def test_fused_moe_lora_kernel_fully_sharded(
seed
,
column_parallel
,
):
current_platform
.
seed_everything
(
seed
)
set_random_seed
(
seed
)
# the number of randomly generated sentences.
num_sequences
=
10
# generate data
...
...
@@ -415,7 +415,7 @@ def use_fused_moe_lora_kernel_tensor_parallel(
def
_get_shard_slice
(
shard_size
):
return
slice
(
local_rank
*
shard_size
,
(
local_rank
+
1
)
*
shard_size
)
current_platform
.
seed_everything
(
seed
)
set_random_seed
(
seed
)
device
=
torch
.
device
(
f
"cuda:
{
local_rank
}
"
)
torch
.
cuda
.
set_device
(
device
)
...
...
tests/lora/test_layers.py
View file @
bb4337b3
...
...
@@ -43,8 +43,8 @@ from vllm.model_executor.layers.vocab_parallel_embedding import (
VocabParallelEmbedding
,
get_masked_input_and_mask
,
)
from
vllm.model_executor.utils
import
set_random_seed
from
vllm.platforms
import
current_platform
from
vllm.utils.torch_utils
import
set_random_seed
from
.utils
import
DummyLoRAManager
...
...
tests/lora/test_punica_ops.py
View file @
bb4337b3
...
...
@@ -9,7 +9,7 @@ import vllm.lora.ops.torch_ops as torch_ops
import
vllm.lora.ops.triton_ops
as
triton_ops
from
vllm.lora.ops.triton_ops
import
LoRAKernelMeta
from
vllm.lora.ops.triton_ops.utils
import
_LORA_A_PTR_DICT
,
_LORA_B_PTR_DICT
from
vllm.
platforms
import
current_platform
from
vllm.
utils.torch_utils
import
set_random_seed
from
.utils
import
PunicaTensors
,
assert_close
,
generate_data_for_nslices
...
...
@@ -395,7 +395,7 @@ def test_kernels(
Tests LoRA kernels.
"""
torch
.
set_default_device
(
device
)
current_platform
.
seed_everything
(
seed
)
set_random_seed
(
seed
)
if
op_type
==
"shrink"
:
check_lora_shrink_kernel
(
...
...
@@ -447,7 +447,7 @@ def test_kernels_hidden_size(
Tests SGMV and LoRA kernels.
"""
torch
.
set_default_device
(
device
)
current_platform
.
seed_everything
(
seed
)
set_random_seed
(
seed
)
if
op_type
==
"shrink"
:
check_lora_shrink_kernel
(
...
...
tests/models/test_vision.py
View file @
bb4337b3
...
...
@@ -21,6 +21,7 @@ from vllm.model_executor.models.vision import (
from
vllm.platforms
import
current_platform
from
vllm.utils.network_utils
import
get_open_port
from
vllm.utils.system_utils
import
update_environment_variables
from
vllm.utils.torch_utils
import
set_random_seed
pytestmark
=
pytest
.
mark
.
cpu_test
...
...
@@ -98,7 +99,7 @@ def run_dp_sharded_vision_model_vs_direct(
"""
# Set random seed for reproducibility
current_platform
.
seed_everything
(
0
)
set_random_seed
(
0
)
device
=
f
"
{
current_platform
.
device_name
}
:
{
local_rank
}
"
current_platform
.
set_device
(
device
)
...
...
@@ -284,7 +285,7 @@ def run_dp_sharded_mrope_vision_model_vs_direct(
calling the model directly.
"""
# Set random seed for reproducibility
current_platform
.
seed_everything
(
0
)
set_random_seed
(
0
)
device
=
f
"
{
current_platform
.
device_name
}
:
{
local_rank
}
"
current_platform
.
set_device
(
device
)
torch
.
set_default_device
(
device
)
...
...
@@ -408,7 +409,7 @@ def run_dp_sharded_mrope_vision_model_uneven_load_worker(
):
"""Test run_dp_sharded_mrope_vision_model with uneven load distribution."""
# Set up distributed environment
current_platform
.
seed_everything
(
123
)
set_random_seed
(
123
)
device
=
f
"
{
current_platform
.
device_name
}
:
{
local_rank
}
"
current_platform
.
set_device
(
device
)
torch
.
set_default_device
(
device
)
...
...
tests/v1/attention/test_attention_backends.py
View file @
bb4337b3
...
...
@@ -19,7 +19,11 @@ from vllm.attention.backends.registry import AttentionBackendEnum
from
vllm.config
import
ModelConfig
from
vllm.platforms
import
current_platform
from
vllm.utils.math_utils
import
cdiv
from
vllm.utils.torch_utils
import
STR_DTYPE_TO_TORCH_DTYPE
,
is_torch_equal_or_newer
from
vllm.utils.torch_utils
import
(
STR_DTYPE_TO_TORCH_DTYPE
,
is_torch_equal_or_newer
,
set_random_seed
,
)
from
vllm.v1.attention.backends.utils
import
(
CommonAttentionMetadata
,
set_kv_cache_layout
,
...
...
@@ -320,7 +324,7 @@ def _test_backend_correctness(
multiple GPUs. This tests that backends work correctly with different
head counts.
"""
current_platform
.
seed_everything
(
42
)
set_random_seed
(
42
)
hf_config_override
=
None
if
tensor_parallel_size
>
1
:
...
...
tests/v1/kv_offload/test_cpu_gpu.py
View file @
bb4337b3
...
...
@@ -7,6 +7,7 @@ import pytest
import
torch
from
vllm.platforms
import
current_platform
from
vllm.utils.torch_utils
import
set_random_seed
from
vllm.v1.attention.backends.flash_attn
import
FlashAttentionBackend
from
vllm.v1.kv_offload.mediums
import
CPULoadStoreSpec
,
GPULoadStoreSpec
from
vllm.v1.kv_offload.worker.cpu_gpu
import
CpuGpuOffloadingHandlers
...
...
@@ -62,7 +63,7 @@ def test_transfer(
seed
:
int
,
device
:
str
,
)
->
None
:
current_platform
.
seed_everything
(
seed
)
set_random_seed
(
seed
)
# create per-layer GPU KV caches based on available attn_backends
attn_backends_list
=
BACKENDS_TO_TEST
...
...
tests/v1/tpu/test_mha_attn.py
View file @
bb4337b3
...
...
@@ -15,6 +15,7 @@ import torch_xla.core.xla_model
from
vllm.attention.layers.mm_encoder_attention
import
MMEncoderAttention
from
vllm.attention.selector
import
_cached_get_attn_backend
from
vllm.platforms
import
current_platform
from
vllm.utils.torch_utils
import
set_random_seed
@
pytest
.
fixture
(
autouse
=
True
)
...
...
@@ -63,7 +64,7 @@ def test_mha_attn_forward(
head_size
:
int
,
device
:
str
,
):
current_platform
.
seed_everything
(
0
)
set_random_seed
(
0
)
# These are expected to be f32
q
=
torch
.
randn
(
batch_size
,
seq_len
,
num_heads
*
head_size
,
device
=
device
)
k
=
torch
.
randn
(
batch_size
,
seq_len
,
num_kv_heads
*
head_size
,
device
=
device
)
...
...
tests/v1/worker/test_gpu_model_runner.py
View file @
bb4337b3
...
...
@@ -26,6 +26,7 @@ from vllm.platforms import current_platform
from
vllm.sampling_params
import
SamplingParams
from
vllm.utils.mem_constants
import
GiB_bytes
from
vllm.utils.system_utils
import
update_environment_variables
from
vllm.utils.torch_utils
import
set_random_seed
from
vllm.v1.core.kv_cache_utils
import
estimate_max_model_len
,
get_kv_cache_configs
from
vllm.v1.core.sched.output
import
CachedRequestData
,
NewRequestData
,
SchedulerOutput
from
vllm.v1.kv_cache_interface
import
(
...
...
@@ -776,7 +777,7 @@ def test_hybrid_attention_mamba_tensor_shapes():
will not corrupt an attention block and vice versa
"""
current_platform
.
seed_everything
(
42
)
set_random_seed
(
42
)
update_environment_variables
(
{
...
...
vllm/model_executor/__init__.py
View file @
bb4337b3
...
...
@@ -2,10 +2,8 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from
vllm.model_executor.parameter
import
BasevLLMParameter
,
PackedvLLMParameter
from
vllm.model_executor.utils
import
set_random_seed
__all__
=
[
"set_random_seed"
,
"BasevLLMParameter"
,
"PackedvLLMParameter"
,
]
vllm/model_executor/utils.py
View file @
bb4337b3
...
...
@@ -10,12 +10,6 @@ import torch
from
vllm.utils.torch_utils
import
is_torch_equal_or_newer
def
set_random_seed
(
seed
:
int
|
None
)
->
None
:
from
vllm.platforms
import
current_platform
current_platform
.
seed_everything
(
seed
)
def
set_weight_attrs
(
weight
:
torch
.
Tensor
,
weight_attrs
:
dict
[
str
,
Any
]
|
None
,
...
...
vllm/platforms/interface.py
View file @
bb4337b3
...
...
@@ -372,6 +372,10 @@ class Platform:
Loosely based on: https://github.com/Lightning-AI/pytorch-lightning/blob/2.4.0/src/lightning/fabric/utilities/seed.py#L20
"""
logger
.
info_once
(
"`seed_everything` is deprecated. It will be removed in v0.14.0 or later. "
"Please use `vllm.utils.torch_utils.set_random_seed` instead."
)
if
seed
is
not
None
:
random
.
seed
(
seed
)
np
.
random
.
seed
(
seed
)
...
...
vllm/utils/torch_utils.py
View file @
bb4337b3
...
...
@@ -3,6 +3,7 @@
import
contextlib
import
importlib.metadata
import
os
import
random
import
threading
from
collections.abc
import
Callable
,
Collection
from
functools
import
lru_cache
...
...
@@ -278,6 +279,13 @@ def kv_cache_dtype_str_to_dtype(
return
STR_DTYPE_TO_TORCH_DTYPE
[
kv_cache_dtype
]
def
set_random_seed
(
seed
:
int
|
None
)
->
None
:
if
seed
is
not
None
:
random
.
seed
(
seed
)
np
.
random
.
seed
(
seed
)
torch
.
manual_seed
(
seed
)
def
create_kv_caches_with_random_flash
(
num_blocks
:
int
,
block_size
:
int
,
...
...
@@ -290,9 +298,7 @@ def create_kv_caches_with_random_flash(
device
:
str
|
None
=
"cuda"
,
cache_layout
:
str
|
None
=
"NHD"
,
)
->
tuple
[
list
[
torch
.
Tensor
],
list
[
torch
.
Tensor
]]:
from
vllm.platforms
import
current_platform
current_platform
.
seed_everything
(
seed
)
set_random_seed
(
seed
)
dtype
=
get_kv_cache_torch_dtype
(
cache_dtype
,
model_dtype
)
generic_kv_cache_shape
=
(
num_blocks
,
2
,
block_size
,
num_heads
,
head_size
)
...
...
@@ -335,9 +341,8 @@ def create_kv_caches_with_random(
raise
ValueError
(
f
"Does not support key cache of type fp8 with head_size
{
head_size
}
"
)
from
vllm.platforms
import
current_platform
current_platform
.
seed_everything
(
seed
)
set_random_seed
(
seed
)
dtype
=
get_kv_cache_torch_dtype
(
cache_dtype
,
model_dtype
)
...
...
vllm/v1/worker/cpu_worker.py
View file @
bb4337b3
...
...
@@ -10,10 +10,10 @@ import torch
from
vllm
import
envs
from
vllm.config
import
VllmConfig
from
vllm.logger
import
init_logger
from
vllm.model_executor.utils
import
set_random_seed
from
vllm.platforms
import
CpuArchEnum
,
current_platform
from
vllm.platforms.cpu
import
CpuPlatform
,
LogicalCPUInfo
from
vllm.profiler.wrapper
import
TorchProfilerWrapper
from
vllm.utils.torch_utils
import
set_random_seed
from
vllm.v1.worker.cpu_model_runner
import
CPUModelRunner
from
vllm.v1.worker.gpu_worker
import
Worker
,
init_worker_distributed_environment
...
...
vllm/v1/worker/gpu_worker.py
View file @
bb4337b3
...
...
@@ -34,7 +34,6 @@ from vllm.distributed.parallel_state import (
)
from
vllm.logger
import
init_logger
from
vllm.lora.request
import
LoRARequest
from
vllm.model_executor
import
set_random_seed
from
vllm.model_executor.models.interfaces
import
is_mixture_of_experts
from
vllm.model_executor.warmup.kernel_warmup
import
kernel_warmup
from
vllm.platforms
import
current_platform
...
...
@@ -43,6 +42,7 @@ from vllm.sequence import IntermediateTensors
from
vllm.tasks
import
SupportedTask
from
vllm.utils.mem_constants
import
GiB_bytes
from
vllm.utils.mem_utils
import
MemorySnapshot
,
memory_profiling
from
vllm.utils.torch_utils
import
set_random_seed
from
vllm.v1.core.sched.output
import
GrammarOutput
,
SchedulerOutput
from
vllm.v1.engine
import
ReconfigureDistributedRequest
,
ReconfigureRankType
from
vllm.v1.kv_cache_interface
import
KVCacheConfig
,
KVCacheSpec
...
...
vllm/v1/worker/tpu_worker.py
View file @
bb4337b3
...
...
@@ -20,12 +20,11 @@ from vllm.distributed.kv_transfer import (
)
from
vllm.logger
import
init_logger
from
vllm.lora.request
import
LoRARequest
from
vllm.model_executor
import
set_random_seed
from
vllm.platforms
import
current_platform
from
vllm.platforms.tpu
import
USE_TPU_INFERENCE
from
vllm.tasks
import
SupportedTask
from
vllm.utils.math_utils
import
cdiv
from
vllm.utils.torch_utils
import
STR_DTYPE_TO_TORCH_DTYPE
from
vllm.utils.torch_utils
import
STR_DTYPE_TO_TORCH_DTYPE
,
set_random_seed
from
vllm.v1.core.sched.output
import
GrammarOutput
,
SchedulerOutput
from
vllm.v1.kv_cache_interface
import
AttentionSpec
,
KVCacheConfig
,
KVCacheSpec
from
vllm.v1.outputs
import
ModelRunnerOutput
...
...
vllm/v1/worker/xpu_worker.py
View file @
bb4337b3
...
...
@@ -9,9 +9,9 @@ import torch.distributed
from
vllm.config
import
VllmConfig
from
vllm.distributed
import
get_world_group
from
vllm.logger
import
init_logger
from
vllm.model_executor
import
set_random_seed
from
vllm.platforms
import
current_platform
from
vllm.profiler.wrapper
import
TorchProfilerWrapper
from
vllm.utils.torch_utils
import
set_random_seed
from
vllm.v1.worker.gpu_worker
import
Worker
,
init_worker_distributed_environment
from
vllm.v1.worker.xpu_model_runner
import
XPUModelRunner
...
...
Prev
1
2
3
4
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment