Unverified Commit db318326 authored by Cyrus Leung's avatar Cyrus Leung Committed by GitHub
Browse files

[Misc] Use `deprecated` for `seed_everything` (#31780)


Signed-off-by: default avatarDarkLight1337 <tlleungac@connect.ust.hk>
parent 799b5721
...@@ -8,10 +8,9 @@ import torch ...@@ -8,10 +8,9 @@ import torch
import vllm.model_executor.layers.activation # noqa F401 import vllm.model_executor.layers.activation # noqa F401
from vllm.model_executor.custom_op import CustomOp from vllm.model_executor.custom_op import CustomOp
from vllm.platforms import current_platform
from vllm.triton_utils import triton from vllm.triton_utils import triton
from vllm.utils.argparse_utils import FlexibleArgumentParser from vllm.utils.argparse_utils import FlexibleArgumentParser
from vllm.utils.torch_utils import STR_DTYPE_TO_TORCH_DTYPE from vllm.utils.torch_utils import STR_DTYPE_TO_TORCH_DTYPE, set_random_seed
batch_size_range = [1, 16, 128] batch_size_range = [1, 16, 128]
seq_len_range = [1, 16, 64, 1024, 4096] seq_len_range = [1, 16, 64, 1024, 4096]
...@@ -30,7 +29,7 @@ def benchmark_activation( ...@@ -30,7 +29,7 @@ def benchmark_activation(
device = "cuda" device = "cuda"
num_tokens = batch_size * seq_len num_tokens = batch_size * seq_len
dim = intermediate_size dim = intermediate_size
current_platform.seed_everything(42) set_random_seed(42)
torch.set_default_device(device) torch.set_default_device(device)
if func_name == "gelu_and_mul": if func_name == "gelu_and_mul":
......
...@@ -6,9 +6,8 @@ import time ...@@ -6,9 +6,8 @@ import time
import torch import torch
from vllm.model_executor.layers.layernorm import RMSNorm from vllm.model_executor.layers.layernorm import RMSNorm
from vllm.platforms import current_platform
from vllm.utils.argparse_utils import FlexibleArgumentParser from vllm.utils.argparse_utils import FlexibleArgumentParser
from vllm.utils.torch_utils import STR_DTYPE_TO_TORCH_DTYPE from vllm.utils.torch_utils import STR_DTYPE_TO_TORCH_DTYPE, set_random_seed
@torch.inference_mode() @torch.inference_mode()
...@@ -22,7 +21,7 @@ def main( ...@@ -22,7 +21,7 @@ def main(
num_warmup_iters: int = 5, num_warmup_iters: int = 5,
num_iters: int = 100, num_iters: int = 100,
) -> None: ) -> None:
current_platform.seed_everything(seed) set_random_seed(seed)
torch.set_default_device("cuda") torch.set_default_device("cuda")
layer = RMSNorm(hidden_size).to(dtype=dtype) layer = RMSNorm(hidden_size).to(dtype=dtype)
......
...@@ -24,6 +24,7 @@ from vllm.platforms import current_platform ...@@ -24,6 +24,7 @@ from vllm.platforms import current_platform
from vllm.transformers_utils.config import get_config from vllm.transformers_utils.config import get_config
from vllm.triton_utils import triton from vllm.triton_utils import triton
from vllm.utils.argparse_utils import FlexibleArgumentParser from vllm.utils.argparse_utils import FlexibleArgumentParser
from vllm.utils.torch_utils import set_random_seed
FP8_DTYPE = current_platform.fp8_dtype() FP8_DTYPE = current_platform.fp8_dtype()
...@@ -431,7 +432,7 @@ def merge_unique_dicts(list1, list2): ...@@ -431,7 +432,7 @@ def merge_unique_dicts(list1, list2):
class BenchmarkWorker: class BenchmarkWorker:
def __init__(self, seed: int) -> None: def __init__(self, seed: int) -> None:
torch.set_default_device("cuda") torch.set_default_device("cuda")
current_platform.seed_everything(seed) set_random_seed(seed)
self.seed = seed self.seed = seed
# Get the device ID to allocate tensors and kernels # Get the device ID to allocate tensors and kernels
# on the respective GPU. This is required for Ray to work # on the respective GPU. This is required for Ray to work
...@@ -451,7 +452,7 @@ class BenchmarkWorker: ...@@ -451,7 +452,7 @@ class BenchmarkWorker:
block_quant_shape: list[int] = None, block_quant_shape: list[int] = None,
use_deep_gemm: bool = False, use_deep_gemm: bool = False,
) -> tuple[dict[str, int], float]: ) -> tuple[dict[str, int], float]:
current_platform.seed_everything(self.seed) set_random_seed(self.seed)
dtype_str = _get_config_dtype_str( dtype_str = _get_config_dtype_str(
dtype, use_int8_w8a16=use_int8_w8a16, use_fp8_w8a8=use_fp8_w8a8 dtype, use_int8_w8a16=use_int8_w8a16, use_fp8_w8a8=use_fp8_w8a8
) )
......
...@@ -18,6 +18,7 @@ from vllm.model_executor.layers.fused_moe.moe_permute_unpermute import ( ...@@ -18,6 +18,7 @@ from vllm.model_executor.layers.fused_moe.moe_permute_unpermute import (
from vllm.model_executor.layers.fused_moe.utils import _fp8_quantize from vllm.model_executor.layers.fused_moe.utils import _fp8_quantize
from vllm.platforms import current_platform from vllm.platforms import current_platform
from vllm.utils.argparse_utils import FlexibleArgumentParser from vllm.utils.argparse_utils import FlexibleArgumentParser
from vllm.utils.torch_utils import set_random_seed
FP8_DTYPE = current_platform.fp8_dtype() FP8_DTYPE = current_platform.fp8_dtype()
...@@ -261,7 +262,7 @@ def benchmark_unpermute( ...@@ -261,7 +262,7 @@ def benchmark_unpermute(
class BenchmarkWorker: class BenchmarkWorker:
def __init__(self, seed: int) -> None: def __init__(self, seed: int) -> None:
torch.set_default_device("cuda") torch.set_default_device("cuda")
current_platform.seed_everything(seed) set_random_seed(seed)
self.seed = seed self.seed = seed
# Get the device ID to allocate tensors and kernels # Get the device ID to allocate tensors and kernels
# on the respective GPU. This is required for Ray to work # on the respective GPU. This is required for Ray to work
...@@ -279,7 +280,7 @@ class BenchmarkWorker: ...@@ -279,7 +280,7 @@ class BenchmarkWorker:
use_int8_w8a16: bool, use_int8_w8a16: bool,
use_customized_permute: bool = False, use_customized_permute: bool = False,
) -> tuple[dict[str, int], float]: ) -> tuple[dict[str, int], float]:
current_platform.seed_everything(self.seed) set_random_seed(self.seed)
permute_time = benchmark_permute( permute_time = benchmark_permute(
num_tokens, num_tokens,
......
...@@ -37,9 +37,9 @@ import numpy as np ...@@ -37,9 +37,9 @@ import numpy as np
import torch import torch
from vllm.model_executor.layers.rotary_embedding import get_rope from vllm.model_executor.layers.rotary_embedding import get_rope
from vllm.platforms import current_platform
from vllm.transformers_utils.config import get_config from vllm.transformers_utils.config import get_config
from vllm.utils.argparse_utils import FlexibleArgumentParser from vllm.utils.argparse_utils import FlexibleArgumentParser
from vllm.utils.torch_utils import set_random_seed
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
...@@ -94,7 +94,7 @@ def benchmark_mrope( ...@@ -94,7 +94,7 @@ def benchmark_mrope(
benchmark_iter: int = 100, benchmark_iter: int = 100,
csv_writer=None, csv_writer=None,
): ):
current_platform.seed_everything(seed) set_random_seed(seed)
torch.set_default_device(device) torch.set_default_device(device)
# the parameters to compute the q k v size based on tp_size # the parameters to compute the q k v size based on tp_size
mrope_helper_class = get_rope( mrope_helper_class = get_rope(
......
...@@ -13,6 +13,7 @@ from vllm.utils.argparse_utils import FlexibleArgumentParser ...@@ -13,6 +13,7 @@ from vllm.utils.argparse_utils import FlexibleArgumentParser
from vllm.utils.torch_utils import ( from vllm.utils.torch_utils import (
STR_DTYPE_TO_TORCH_DTYPE, STR_DTYPE_TO_TORCH_DTYPE,
create_kv_caches_with_random, create_kv_caches_with_random,
set_random_seed,
) )
logger = init_logger(__name__) logger = init_logger(__name__)
...@@ -38,7 +39,7 @@ def main( ...@@ -38,7 +39,7 @@ def main(
device: str = "cuda", device: str = "cuda",
kv_cache_dtype: str | None = None, kv_cache_dtype: str | None = None,
) -> None: ) -> None:
current_platform.seed_everything(seed) set_random_seed(seed)
scale = float(1.0 / (head_size**0.5)) scale = float(1.0 / (head_size**0.5))
query = torch.empty( query = torch.empty(
......
...@@ -6,9 +6,8 @@ import time ...@@ -6,9 +6,8 @@ import time
import torch import torch
from vllm import _custom_ops as ops from vllm import _custom_ops as ops
from vllm.platforms import current_platform
from vllm.utils.argparse_utils import FlexibleArgumentParser from vllm.utils.argparse_utils import FlexibleArgumentParser
from vllm.utils.torch_utils import STR_DTYPE_TO_TORCH_DTYPE from vllm.utils.torch_utils import STR_DTYPE_TO_TORCH_DTYPE, set_random_seed
@torch.inference_mode() @torch.inference_mode()
...@@ -23,7 +22,7 @@ def main( ...@@ -23,7 +22,7 @@ def main(
num_warmup_iters: int = 5, num_warmup_iters: int = 5,
num_iters: int = 100, num_iters: int = 100,
) -> None: ) -> None:
current_platform.seed_everything(seed) set_random_seed(seed)
torch.set_default_device("cuda") torch.set_default_device("cuda")
x = torch.randn(num_tokens, hidden_size, dtype=dtype) x = torch.randn(num_tokens, hidden_size, dtype=dtype)
......
...@@ -8,11 +8,11 @@ from tabulate import tabulate ...@@ -8,11 +8,11 @@ from tabulate import tabulate
from vllm import _custom_ops as ops from vllm import _custom_ops as ops
from vllm.logger import init_logger from vllm.logger import init_logger
from vllm.platforms import current_platform
from vllm.utils.argparse_utils import FlexibleArgumentParser from vllm.utils.argparse_utils import FlexibleArgumentParser
from vllm.utils.torch_utils import ( from vllm.utils.torch_utils import (
STR_DTYPE_TO_TORCH_DTYPE, STR_DTYPE_TO_TORCH_DTYPE,
create_kv_caches_with_random, create_kv_caches_with_random,
set_random_seed,
) )
logger = init_logger(__name__) logger = init_logger(__name__)
...@@ -36,7 +36,7 @@ def run_benchmark( ...@@ -36,7 +36,7 @@ def run_benchmark(
if kv_cache_dtype == "fp8" and head_size % 16: if kv_cache_dtype == "fp8" and head_size % 16:
raise ValueError("fp8 kv-cache requires head_size to be a multiple of 16.") raise ValueError("fp8 kv-cache requires head_size to be a multiple of 16.")
current_platform.seed_everything(42) set_random_seed(42)
torch.set_default_device(device) torch.set_default_device(device)
# create random key / value tensors [T, H, D]. # create random key / value tensors [T, H, D].
......
...@@ -11,11 +11,11 @@ from vllm.attention.ops.triton_reshape_and_cache_flash import ( ...@@ -11,11 +11,11 @@ from vllm.attention.ops.triton_reshape_and_cache_flash import (
triton_reshape_and_cache_flash, triton_reshape_and_cache_flash,
) )
from vllm.logger import init_logger from vllm.logger import init_logger
from vllm.platforms import current_platform
from vllm.utils.argparse_utils import FlexibleArgumentParser from vllm.utils.argparse_utils import FlexibleArgumentParser
from vllm.utils.torch_utils import ( from vllm.utils.torch_utils import (
STR_DTYPE_TO_TORCH_DTYPE, STR_DTYPE_TO_TORCH_DTYPE,
create_kv_caches_with_random_flash, create_kv_caches_with_random_flash,
set_random_seed,
) )
logger = init_logger(__name__) logger = init_logger(__name__)
...@@ -49,7 +49,7 @@ def run_benchmark( ...@@ -49,7 +49,7 @@ def run_benchmark(
if implementation == "triton" and kv_cache_layout == "HND": if implementation == "triton" and kv_cache_layout == "HND":
return float("nan") # Triton does not support HND layout yet. return float("nan") # Triton does not support HND layout yet.
current_platform.seed_everything(42) set_random_seed(42)
torch.set_default_device(device) torch.set_default_device(device)
# create random key / value tensors [T, H, D]. # create random key / value tensors [T, H, D].
......
...@@ -23,9 +23,9 @@ import torch ...@@ -23,9 +23,9 @@ import torch
from vllm.model_executor.layers.fused_moe.batched_deep_gemm_moe import ( from vllm.model_executor.layers.fused_moe.batched_deep_gemm_moe import (
persistent_masked_m_silu_mul_quant, persistent_masked_m_silu_mul_quant,
) )
from vllm.platforms import current_platform
from vllm.triton_utils import tl, triton from vllm.triton_utils import tl, triton
from vllm.utils.deep_gemm import is_deep_gemm_e8m0_used from vllm.utils.deep_gemm import is_deep_gemm_e8m0_used
from vllm.utils.torch_utils import set_random_seed
@triton.jit @triton.jit
...@@ -207,7 +207,7 @@ def benchmark( ...@@ -207,7 +207,7 @@ def benchmark(
): ):
def generate_data(seed_offset=0): def generate_data(seed_offset=0):
"""Generate input data with given seed offset""" """Generate input data with given seed offset"""
current_platform.seed_everything(42 + seed_offset) set_random_seed(42 + seed_offset)
y = torch.rand((E, T, 2 * H), dtype=torch.bfloat16, device="cuda").contiguous() y = torch.rand((E, T, 2 * H), dtype=torch.bfloat16, device="cuda").contiguous()
if gen_strategy == "random_imbalanced": if gen_strategy == "random_imbalanced":
......
...@@ -154,4 +154,4 @@ The interface for the model/module may change during vLLM's development. If you ...@@ -154,4 +154,4 @@ The interface for the model/module may change during vLLM's development. If you
!!! warning "Deprecations" !!! warning "Deprecations"
- `use_v1` parameter in `Platform.get_attn_backend_cls` is deprecated. It has been removed in v0.13.0. - `use_v1` parameter in `Platform.get_attn_backend_cls` is deprecated. It has been removed in v0.13.0.
- `_Backend` in `vllm.attention` is deprecated. It has been removed in v0.13.0. Please use `vllm.attention.backends.registry.register_backend` to add new attention backend to `AttentionBackendEnum` instead. - `_Backend` in `vllm.attention` is deprecated. It has been removed in v0.13.0. Please use `vllm.attention.backends.registry.register_backend` to add new attention backend to `AttentionBackendEnum` instead.
- `seed_everything` platform interface is deprecated. It will be removed in v0.14.0 or later. Please use `vllm.utils.torch_utils.set_random_seed` instead. - `seed_everything` platform interface is deprecated. It will be removed in v0.15.0 or later. Please use `vllm.utils.torch_utils.set_random_seed` instead.
...@@ -11,6 +11,7 @@ from typing import TYPE_CHECKING, Any, NamedTuple, Optional ...@@ -11,6 +11,7 @@ from typing import TYPE_CHECKING, Any, NamedTuple, Optional
import numpy as np import numpy as np
import torch import torch
from typing_extensions import deprecated
from vllm.attention.backends.registry import AttentionBackendEnum from vllm.attention.backends.registry import AttentionBackendEnum
from vllm.logger import init_logger from vllm.logger import init_logger
...@@ -365,6 +366,10 @@ class Platform: ...@@ -365,6 +366,10 @@ class Platform:
return torch.inference_mode(mode=True) return torch.inference_mode(mode=True)
@classmethod @classmethod
@deprecated(
"`seed_everything` is deprecated. It will be removed in v0.15.0 or later. "
"Please use `vllm.utils.torch_utils.set_random_seed` instead."
)
def seed_everything(cls, seed: int | None = None) -> None: def seed_everything(cls, seed: int | None = None) -> None:
""" """
Set the seed of each random module. Set the seed of each random module.
...@@ -372,10 +377,6 @@ class Platform: ...@@ -372,10 +377,6 @@ class Platform:
Loosely based on: https://github.com/Lightning-AI/pytorch-lightning/blob/2.4.0/src/lightning/fabric/utilities/seed.py#L20 Loosely based on: https://github.com/Lightning-AI/pytorch-lightning/blob/2.4.0/src/lightning/fabric/utilities/seed.py#L20
""" """
logger.info_once(
"`seed_everything` is deprecated. It will be removed in v0.14.0 or later. "
"Please use `vllm.utils.torch_utils.set_random_seed` instead."
)
if seed is not None: if seed is not None:
random.seed(seed) random.seed(seed)
np.random.seed(seed) np.random.seed(seed)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment