Unverified Commit bb4337b3 authored by wangxiyuan's avatar wangxiyuan Committed by GitHub
Browse files

[Platform] Deprecate seed_everything (#31659)


Signed-off-by: default avatarwangxiyuan <wangxiyuan1007@gmail.com>
parent 367856de
......@@ -10,7 +10,7 @@ from vllm.model_executor.layers.fla.ops.layernorm_guard import (
layernorm_fn,
rms_norm_ref,
)
from vllm.platforms import current_platform
from vllm.utils.torch_utils import set_random_seed
def layer_norm_ref(
......@@ -114,7 +114,7 @@ def test_layer_norm_fwd_basic(
is_rms_norm: bool,
) -> None:
"""Test basic layer norm forward pass without z (gate) tensor."""
current_platform.seed_everything(seed)
set_random_seed(seed)
device = torch.device("cuda:0")
# Create inputs
......@@ -156,7 +156,7 @@ def test_layer_norm_fwd_with_gate(
is_rms_norm: bool,
) -> None:
"""Test layer norm forward pass with z (gate) tensor."""
current_platform.seed_everything(42)
set_random_seed(42)
device = torch.device("cuda:0")
# Create inputs
......@@ -213,7 +213,7 @@ def test_layer_norm_fwd_with_groups(
f"hidden_size {hidden_size} not divisible by group_size {group_size}"
)
current_platform.seed_everything(42)
set_random_seed(42)
device = torch.device("cuda:0")
# Create inputs
......@@ -253,7 +253,7 @@ def test_layer_norm_rows_per_block(
dtype: torch.dtype,
) -> None:
"""Test that rows_per_block logic works correctly for various M sizes."""
current_platform.seed_everything(42)
set_random_seed(42)
device = torch.device("cuda:0")
hidden_size = 1024
......@@ -278,7 +278,7 @@ def test_layer_norm_rows_per_block(
def test_strided_input(dtype: torch.dtype) -> None:
"""Test that the kernel handles non-contiguous (strided)
inputs correctly."""
current_platform.seed_everything(42)
set_random_seed(42)
device = torch.device("cuda:0")
num_tokens = 128
hidden_size = 1024
......@@ -318,7 +318,7 @@ def test_output_buffer_provided(
dtype: torch.dtype,
) -> None:
"""Test that the kernel works when an output buffer is provided."""
current_platform.seed_everything(42)
set_random_seed(42)
device = torch.device("cuda:0")
# Create inputs
......@@ -359,7 +359,7 @@ def test_multidimensional_input(
dtype: torch.dtype,
) -> None:
"""Test that the autograd function handles multidimensional inputs."""
current_platform.seed_everything(42)
set_random_seed(42)
device = torch.device("cuda:0")
hidden_size = shape[-1]
......
......@@ -18,8 +18,8 @@ from vllm.distributed.parallel_state import (
get_tensor_model_parallel_world_size,
)
from vllm.lora.ops.triton_ops import fused_moe_lora
from vllm.platforms import current_platform
from vllm.utils.network_utils import get_open_port
from vllm.utils.torch_utils import set_random_seed
@pytest.fixture(autouse=True)
......@@ -265,7 +265,7 @@ def test_fused_moe_lora_kernel(
seed,
):
torch.set_default_device(device)
current_platform.seed_everything(seed)
set_random_seed(seed)
# the number of randomly generated sentences.
num_sequences = 10
# generate data
......@@ -358,7 +358,7 @@ def test_fused_moe_lora_kernel_fully_sharded(
seed,
column_parallel,
):
current_platform.seed_everything(seed)
set_random_seed(seed)
# the number of randomly generated sentences.
num_sequences = 10
# generate data
......@@ -415,7 +415,7 @@ def use_fused_moe_lora_kernel_tensor_parallel(
def _get_shard_slice(shard_size):
return slice(local_rank * shard_size, (local_rank + 1) * shard_size)
current_platform.seed_everything(seed)
set_random_seed(seed)
device = torch.device(f"cuda:{local_rank}")
torch.cuda.set_device(device)
......
......@@ -43,8 +43,8 @@ from vllm.model_executor.layers.vocab_parallel_embedding import (
VocabParallelEmbedding,
get_masked_input_and_mask,
)
from vllm.model_executor.utils import set_random_seed
from vllm.platforms import current_platform
from vllm.utils.torch_utils import set_random_seed
from .utils import DummyLoRAManager
......
......@@ -9,7 +9,7 @@ import vllm.lora.ops.torch_ops as torch_ops
import vllm.lora.ops.triton_ops as triton_ops
from vllm.lora.ops.triton_ops import LoRAKernelMeta
from vllm.lora.ops.triton_ops.utils import _LORA_A_PTR_DICT, _LORA_B_PTR_DICT
from vllm.platforms import current_platform
from vllm.utils.torch_utils import set_random_seed
from .utils import PunicaTensors, assert_close, generate_data_for_nslices
......@@ -395,7 +395,7 @@ def test_kernels(
Tests LoRA kernels.
"""
torch.set_default_device(device)
current_platform.seed_everything(seed)
set_random_seed(seed)
if op_type == "shrink":
check_lora_shrink_kernel(
......@@ -447,7 +447,7 @@ def test_kernels_hidden_size(
Tests SGMV and LoRA kernels.
"""
torch.set_default_device(device)
current_platform.seed_everything(seed)
set_random_seed(seed)
if op_type == "shrink":
check_lora_shrink_kernel(
......
......@@ -21,6 +21,7 @@ from vllm.model_executor.models.vision import (
from vllm.platforms import current_platform
from vllm.utils.network_utils import get_open_port
from vllm.utils.system_utils import update_environment_variables
from vllm.utils.torch_utils import set_random_seed
pytestmark = pytest.mark.cpu_test
......@@ -98,7 +99,7 @@ def run_dp_sharded_vision_model_vs_direct(
"""
# Set random seed for reproducibility
current_platform.seed_everything(0)
set_random_seed(0)
device = f"{current_platform.device_name}:{local_rank}"
current_platform.set_device(device)
......@@ -284,7 +285,7 @@ def run_dp_sharded_mrope_vision_model_vs_direct(
calling the model directly.
"""
# Set random seed for reproducibility
current_platform.seed_everything(0)
set_random_seed(0)
device = f"{current_platform.device_name}:{local_rank}"
current_platform.set_device(device)
torch.set_default_device(device)
......@@ -408,7 +409,7 @@ def run_dp_sharded_mrope_vision_model_uneven_load_worker(
):
"""Test run_dp_sharded_mrope_vision_model with uneven load distribution."""
# Set up distributed environment
current_platform.seed_everything(123)
set_random_seed(123)
device = f"{current_platform.device_name}:{local_rank}"
current_platform.set_device(device)
torch.set_default_device(device)
......
......@@ -19,7 +19,11 @@ from vllm.attention.backends.registry import AttentionBackendEnum
from vllm.config import ModelConfig
from vllm.platforms import current_platform
from vllm.utils.math_utils import cdiv
from vllm.utils.torch_utils import STR_DTYPE_TO_TORCH_DTYPE, is_torch_equal_or_newer
from vllm.utils.torch_utils import (
STR_DTYPE_TO_TORCH_DTYPE,
is_torch_equal_or_newer,
set_random_seed,
)
from vllm.v1.attention.backends.utils import (
CommonAttentionMetadata,
set_kv_cache_layout,
......@@ -320,7 +324,7 @@ def _test_backend_correctness(
multiple GPUs. This tests that backends work correctly with different
head counts.
"""
current_platform.seed_everything(42)
set_random_seed(42)
hf_config_override = None
if tensor_parallel_size > 1:
......
......@@ -7,6 +7,7 @@ import pytest
import torch
from vllm.platforms import current_platform
from vllm.utils.torch_utils import set_random_seed
from vllm.v1.attention.backends.flash_attn import FlashAttentionBackend
from vllm.v1.kv_offload.mediums import CPULoadStoreSpec, GPULoadStoreSpec
from vllm.v1.kv_offload.worker.cpu_gpu import CpuGpuOffloadingHandlers
......@@ -62,7 +63,7 @@ def test_transfer(
seed: int,
device: str,
) -> None:
current_platform.seed_everything(seed)
set_random_seed(seed)
# create per-layer GPU KV caches based on available attn_backends
attn_backends_list = BACKENDS_TO_TEST
......
......@@ -15,6 +15,7 @@ import torch_xla.core.xla_model
from vllm.attention.layers.mm_encoder_attention import MMEncoderAttention
from vllm.attention.selector import _cached_get_attn_backend
from vllm.platforms import current_platform
from vllm.utils.torch_utils import set_random_seed
@pytest.fixture(autouse=True)
......@@ -63,7 +64,7 @@ def test_mha_attn_forward(
head_size: int,
device: str,
):
current_platform.seed_everything(0)
set_random_seed(0)
# These are expected to be f32
q = torch.randn(batch_size, seq_len, num_heads * head_size, device=device)
k = torch.randn(batch_size, seq_len, num_kv_heads * head_size, device=device)
......
......@@ -26,6 +26,7 @@ from vllm.platforms import current_platform
from vllm.sampling_params import SamplingParams
from vllm.utils.mem_constants import GiB_bytes
from vllm.utils.system_utils import update_environment_variables
from vllm.utils.torch_utils import set_random_seed
from vllm.v1.core.kv_cache_utils import estimate_max_model_len, get_kv_cache_configs
from vllm.v1.core.sched.output import CachedRequestData, NewRequestData, SchedulerOutput
from vllm.v1.kv_cache_interface import (
......@@ -776,7 +777,7 @@ def test_hybrid_attention_mamba_tensor_shapes():
will not corrupt an attention block and vice versa
"""
current_platform.seed_everything(42)
set_random_seed(42)
update_environment_variables(
{
......
......@@ -2,10 +2,8 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from vllm.model_executor.parameter import BasevLLMParameter, PackedvLLMParameter
from vllm.model_executor.utils import set_random_seed
__all__ = [
"set_random_seed",
"BasevLLMParameter",
"PackedvLLMParameter",
]
......@@ -10,12 +10,6 @@ import torch
from vllm.utils.torch_utils import is_torch_equal_or_newer
def set_random_seed(seed: int | None) -> None:
from vllm.platforms import current_platform
current_platform.seed_everything(seed)
def set_weight_attrs(
weight: torch.Tensor,
weight_attrs: dict[str, Any] | None,
......
......@@ -372,6 +372,10 @@ class Platform:
Loosely based on: https://github.com/Lightning-AI/pytorch-lightning/blob/2.4.0/src/lightning/fabric/utilities/seed.py#L20
"""
logger.info_once(
"`seed_everything` is deprecated. It will be removed in v0.14.0 or later. "
"Please use `vllm.utils.torch_utils.set_random_seed` instead."
)
if seed is not None:
random.seed(seed)
np.random.seed(seed)
......
......@@ -3,6 +3,7 @@
import contextlib
import importlib.metadata
import os
import random
import threading
from collections.abc import Callable, Collection
from functools import lru_cache
......@@ -278,6 +279,13 @@ def kv_cache_dtype_str_to_dtype(
return STR_DTYPE_TO_TORCH_DTYPE[kv_cache_dtype]
def set_random_seed(seed: int | None) -> None:
if seed is not None:
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
def create_kv_caches_with_random_flash(
num_blocks: int,
block_size: int,
......@@ -290,9 +298,7 @@ def create_kv_caches_with_random_flash(
device: str | None = "cuda",
cache_layout: str | None = "NHD",
) -> tuple[list[torch.Tensor], list[torch.Tensor]]:
from vllm.platforms import current_platform
current_platform.seed_everything(seed)
set_random_seed(seed)
dtype = get_kv_cache_torch_dtype(cache_dtype, model_dtype)
generic_kv_cache_shape = (num_blocks, 2, block_size, num_heads, head_size)
......@@ -335,9 +341,8 @@ def create_kv_caches_with_random(
raise ValueError(
f"Does not support key cache of type fp8 with head_size {head_size}"
)
from vllm.platforms import current_platform
current_platform.seed_everything(seed)
set_random_seed(seed)
dtype = get_kv_cache_torch_dtype(cache_dtype, model_dtype)
......
......@@ -10,10 +10,10 @@ import torch
from vllm import envs
from vllm.config import VllmConfig
from vllm.logger import init_logger
from vllm.model_executor.utils import set_random_seed
from vllm.platforms import CpuArchEnum, current_platform
from vllm.platforms.cpu import CpuPlatform, LogicalCPUInfo
from vllm.profiler.wrapper import TorchProfilerWrapper
from vllm.utils.torch_utils import set_random_seed
from vllm.v1.worker.cpu_model_runner import CPUModelRunner
from vllm.v1.worker.gpu_worker import Worker, init_worker_distributed_environment
......
......@@ -34,7 +34,6 @@ from vllm.distributed.parallel_state import (
)
from vllm.logger import init_logger
from vllm.lora.request import LoRARequest
from vllm.model_executor import set_random_seed
from vllm.model_executor.models.interfaces import is_mixture_of_experts
from vllm.model_executor.warmup.kernel_warmup import kernel_warmup
from vllm.platforms import current_platform
......@@ -43,6 +42,7 @@ from vllm.sequence import IntermediateTensors
from vllm.tasks import SupportedTask
from vllm.utils.mem_constants import GiB_bytes
from vllm.utils.mem_utils import MemorySnapshot, memory_profiling
from vllm.utils.torch_utils import set_random_seed
from vllm.v1.core.sched.output import GrammarOutput, SchedulerOutput
from vllm.v1.engine import ReconfigureDistributedRequest, ReconfigureRankType
from vllm.v1.kv_cache_interface import KVCacheConfig, KVCacheSpec
......
......@@ -20,12 +20,11 @@ from vllm.distributed.kv_transfer import (
)
from vllm.logger import init_logger
from vllm.lora.request import LoRARequest
from vllm.model_executor import set_random_seed
from vllm.platforms import current_platform
from vllm.platforms.tpu import USE_TPU_INFERENCE
from vllm.tasks import SupportedTask
from vllm.utils.math_utils import cdiv
from vllm.utils.torch_utils import STR_DTYPE_TO_TORCH_DTYPE
from vllm.utils.torch_utils import STR_DTYPE_TO_TORCH_DTYPE, set_random_seed
from vllm.v1.core.sched.output import GrammarOutput, SchedulerOutput
from vllm.v1.kv_cache_interface import AttentionSpec, KVCacheConfig, KVCacheSpec
from vllm.v1.outputs import ModelRunnerOutput
......
......@@ -9,9 +9,9 @@ import torch.distributed
from vllm.config import VllmConfig
from vllm.distributed import get_world_group
from vllm.logger import init_logger
from vllm.model_executor import set_random_seed
from vllm.platforms import current_platform
from vllm.profiler.wrapper import TorchProfilerWrapper
from vllm.utils.torch_utils import set_random_seed
from vllm.v1.worker.gpu_worker import Worker, init_worker_distributed_environment
from vllm.v1.worker.xpu_model_runner import XPUModelRunner
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment