Unverified Commit bb4337b3 authored by wangxiyuan's avatar wangxiyuan Committed by GitHub
Browse files

[Platform] Deprecate seed_everything (#31659)


Signed-off-by: default avatarwangxiyuan <wangxiyuan1007@gmail.com>
parent 367856de
...@@ -10,7 +10,7 @@ from vllm.model_executor.layers.fla.ops.layernorm_guard import ( ...@@ -10,7 +10,7 @@ from vllm.model_executor.layers.fla.ops.layernorm_guard import (
layernorm_fn, layernorm_fn,
rms_norm_ref, rms_norm_ref,
) )
from vllm.platforms import current_platform from vllm.utils.torch_utils import set_random_seed
def layer_norm_ref( def layer_norm_ref(
...@@ -114,7 +114,7 @@ def test_layer_norm_fwd_basic( ...@@ -114,7 +114,7 @@ def test_layer_norm_fwd_basic(
is_rms_norm: bool, is_rms_norm: bool,
) -> None: ) -> None:
"""Test basic layer norm forward pass without z (gate) tensor.""" """Test basic layer norm forward pass without z (gate) tensor."""
current_platform.seed_everything(seed) set_random_seed(seed)
device = torch.device("cuda:0") device = torch.device("cuda:0")
# Create inputs # Create inputs
...@@ -156,7 +156,7 @@ def test_layer_norm_fwd_with_gate( ...@@ -156,7 +156,7 @@ def test_layer_norm_fwd_with_gate(
is_rms_norm: bool, is_rms_norm: bool,
) -> None: ) -> None:
"""Test layer norm forward pass with z (gate) tensor.""" """Test layer norm forward pass with z (gate) tensor."""
current_platform.seed_everything(42) set_random_seed(42)
device = torch.device("cuda:0") device = torch.device("cuda:0")
# Create inputs # Create inputs
...@@ -213,7 +213,7 @@ def test_layer_norm_fwd_with_groups( ...@@ -213,7 +213,7 @@ def test_layer_norm_fwd_with_groups(
f"hidden_size {hidden_size} not divisible by group_size {group_size}" f"hidden_size {hidden_size} not divisible by group_size {group_size}"
) )
current_platform.seed_everything(42) set_random_seed(42)
device = torch.device("cuda:0") device = torch.device("cuda:0")
# Create inputs # Create inputs
...@@ -253,7 +253,7 @@ def test_layer_norm_rows_per_block( ...@@ -253,7 +253,7 @@ def test_layer_norm_rows_per_block(
dtype: torch.dtype, dtype: torch.dtype,
) -> None: ) -> None:
"""Test that rows_per_block logic works correctly for various M sizes.""" """Test that rows_per_block logic works correctly for various M sizes."""
current_platform.seed_everything(42) set_random_seed(42)
device = torch.device("cuda:0") device = torch.device("cuda:0")
hidden_size = 1024 hidden_size = 1024
...@@ -278,7 +278,7 @@ def test_layer_norm_rows_per_block( ...@@ -278,7 +278,7 @@ def test_layer_norm_rows_per_block(
def test_strided_input(dtype: torch.dtype) -> None: def test_strided_input(dtype: torch.dtype) -> None:
"""Test that the kernel handles non-contiguous (strided) """Test that the kernel handles non-contiguous (strided)
inputs correctly.""" inputs correctly."""
current_platform.seed_everything(42) set_random_seed(42)
device = torch.device("cuda:0") device = torch.device("cuda:0")
num_tokens = 128 num_tokens = 128
hidden_size = 1024 hidden_size = 1024
...@@ -318,7 +318,7 @@ def test_output_buffer_provided( ...@@ -318,7 +318,7 @@ def test_output_buffer_provided(
dtype: torch.dtype, dtype: torch.dtype,
) -> None: ) -> None:
"""Test that the kernel works when an output buffer is provided.""" """Test that the kernel works when an output buffer is provided."""
current_platform.seed_everything(42) set_random_seed(42)
device = torch.device("cuda:0") device = torch.device("cuda:0")
# Create inputs # Create inputs
...@@ -359,7 +359,7 @@ def test_multidimensional_input( ...@@ -359,7 +359,7 @@ def test_multidimensional_input(
dtype: torch.dtype, dtype: torch.dtype,
) -> None: ) -> None:
"""Test that the autograd function handles multidimensional inputs.""" """Test that the autograd function handles multidimensional inputs."""
current_platform.seed_everything(42) set_random_seed(42)
device = torch.device("cuda:0") device = torch.device("cuda:0")
hidden_size = shape[-1] hidden_size = shape[-1]
......
...@@ -18,8 +18,8 @@ from vllm.distributed.parallel_state import ( ...@@ -18,8 +18,8 @@ from vllm.distributed.parallel_state import (
get_tensor_model_parallel_world_size, get_tensor_model_parallel_world_size,
) )
from vllm.lora.ops.triton_ops import fused_moe_lora from vllm.lora.ops.triton_ops import fused_moe_lora
from vllm.platforms import current_platform
from vllm.utils.network_utils import get_open_port from vllm.utils.network_utils import get_open_port
from vllm.utils.torch_utils import set_random_seed
@pytest.fixture(autouse=True) @pytest.fixture(autouse=True)
...@@ -265,7 +265,7 @@ def test_fused_moe_lora_kernel( ...@@ -265,7 +265,7 @@ def test_fused_moe_lora_kernel(
seed, seed,
): ):
torch.set_default_device(device) torch.set_default_device(device)
current_platform.seed_everything(seed) set_random_seed(seed)
# the number of randomly generated sentences. # the number of randomly generated sentences.
num_sequences = 10 num_sequences = 10
# generate data # generate data
...@@ -358,7 +358,7 @@ def test_fused_moe_lora_kernel_fully_sharded( ...@@ -358,7 +358,7 @@ def test_fused_moe_lora_kernel_fully_sharded(
seed, seed,
column_parallel, column_parallel,
): ):
current_platform.seed_everything(seed) set_random_seed(seed)
# the number of randomly generated sentences. # the number of randomly generated sentences.
num_sequences = 10 num_sequences = 10
# generate data # generate data
...@@ -415,7 +415,7 @@ def use_fused_moe_lora_kernel_tensor_parallel( ...@@ -415,7 +415,7 @@ def use_fused_moe_lora_kernel_tensor_parallel(
def _get_shard_slice(shard_size): def _get_shard_slice(shard_size):
return slice(local_rank * shard_size, (local_rank + 1) * shard_size) return slice(local_rank * shard_size, (local_rank + 1) * shard_size)
current_platform.seed_everything(seed) set_random_seed(seed)
device = torch.device(f"cuda:{local_rank}") device = torch.device(f"cuda:{local_rank}")
torch.cuda.set_device(device) torch.cuda.set_device(device)
......
...@@ -43,8 +43,8 @@ from vllm.model_executor.layers.vocab_parallel_embedding import ( ...@@ -43,8 +43,8 @@ from vllm.model_executor.layers.vocab_parallel_embedding import (
VocabParallelEmbedding, VocabParallelEmbedding,
get_masked_input_and_mask, get_masked_input_and_mask,
) )
from vllm.model_executor.utils import set_random_seed
from vllm.platforms import current_platform from vllm.platforms import current_platform
from vllm.utils.torch_utils import set_random_seed
from .utils import DummyLoRAManager from .utils import DummyLoRAManager
......
...@@ -9,7 +9,7 @@ import vllm.lora.ops.torch_ops as torch_ops ...@@ -9,7 +9,7 @@ import vllm.lora.ops.torch_ops as torch_ops
import vllm.lora.ops.triton_ops as triton_ops import vllm.lora.ops.triton_ops as triton_ops
from vllm.lora.ops.triton_ops import LoRAKernelMeta from vllm.lora.ops.triton_ops import LoRAKernelMeta
from vllm.lora.ops.triton_ops.utils import _LORA_A_PTR_DICT, _LORA_B_PTR_DICT from vllm.lora.ops.triton_ops.utils import _LORA_A_PTR_DICT, _LORA_B_PTR_DICT
from vllm.platforms import current_platform from vllm.utils.torch_utils import set_random_seed
from .utils import PunicaTensors, assert_close, generate_data_for_nslices from .utils import PunicaTensors, assert_close, generate_data_for_nslices
...@@ -395,7 +395,7 @@ def test_kernels( ...@@ -395,7 +395,7 @@ def test_kernels(
Tests LoRA kernels. Tests LoRA kernels.
""" """
torch.set_default_device(device) torch.set_default_device(device)
current_platform.seed_everything(seed) set_random_seed(seed)
if op_type == "shrink": if op_type == "shrink":
check_lora_shrink_kernel( check_lora_shrink_kernel(
...@@ -447,7 +447,7 @@ def test_kernels_hidden_size( ...@@ -447,7 +447,7 @@ def test_kernels_hidden_size(
Tests SGMV and LoRA kernels. Tests SGMV and LoRA kernels.
""" """
torch.set_default_device(device) torch.set_default_device(device)
current_platform.seed_everything(seed) set_random_seed(seed)
if op_type == "shrink": if op_type == "shrink":
check_lora_shrink_kernel( check_lora_shrink_kernel(
......
...@@ -21,6 +21,7 @@ from vllm.model_executor.models.vision import ( ...@@ -21,6 +21,7 @@ from vllm.model_executor.models.vision import (
from vllm.platforms import current_platform from vllm.platforms import current_platform
from vllm.utils.network_utils import get_open_port from vllm.utils.network_utils import get_open_port
from vllm.utils.system_utils import update_environment_variables from vllm.utils.system_utils import update_environment_variables
from vllm.utils.torch_utils import set_random_seed
pytestmark = pytest.mark.cpu_test pytestmark = pytest.mark.cpu_test
...@@ -98,7 +99,7 @@ def run_dp_sharded_vision_model_vs_direct( ...@@ -98,7 +99,7 @@ def run_dp_sharded_vision_model_vs_direct(
""" """
# Set random seed for reproducibility # Set random seed for reproducibility
current_platform.seed_everything(0) set_random_seed(0)
device = f"{current_platform.device_name}:{local_rank}" device = f"{current_platform.device_name}:{local_rank}"
current_platform.set_device(device) current_platform.set_device(device)
...@@ -284,7 +285,7 @@ def run_dp_sharded_mrope_vision_model_vs_direct( ...@@ -284,7 +285,7 @@ def run_dp_sharded_mrope_vision_model_vs_direct(
calling the model directly. calling the model directly.
""" """
# Set random seed for reproducibility # Set random seed for reproducibility
current_platform.seed_everything(0) set_random_seed(0)
device = f"{current_platform.device_name}:{local_rank}" device = f"{current_platform.device_name}:{local_rank}"
current_platform.set_device(device) current_platform.set_device(device)
torch.set_default_device(device) torch.set_default_device(device)
...@@ -408,7 +409,7 @@ def run_dp_sharded_mrope_vision_model_uneven_load_worker( ...@@ -408,7 +409,7 @@ def run_dp_sharded_mrope_vision_model_uneven_load_worker(
): ):
"""Test run_dp_sharded_mrope_vision_model with uneven load distribution.""" """Test run_dp_sharded_mrope_vision_model with uneven load distribution."""
# Set up distributed environment # Set up distributed environment
current_platform.seed_everything(123) set_random_seed(123)
device = f"{current_platform.device_name}:{local_rank}" device = f"{current_platform.device_name}:{local_rank}"
current_platform.set_device(device) current_platform.set_device(device)
torch.set_default_device(device) torch.set_default_device(device)
......
...@@ -19,7 +19,11 @@ from vllm.attention.backends.registry import AttentionBackendEnum ...@@ -19,7 +19,11 @@ from vllm.attention.backends.registry import AttentionBackendEnum
from vllm.config import ModelConfig from vllm.config import ModelConfig
from vllm.platforms import current_platform from vllm.platforms import current_platform
from vllm.utils.math_utils import cdiv from vllm.utils.math_utils import cdiv
from vllm.utils.torch_utils import STR_DTYPE_TO_TORCH_DTYPE, is_torch_equal_or_newer from vllm.utils.torch_utils import (
STR_DTYPE_TO_TORCH_DTYPE,
is_torch_equal_or_newer,
set_random_seed,
)
from vllm.v1.attention.backends.utils import ( from vllm.v1.attention.backends.utils import (
CommonAttentionMetadata, CommonAttentionMetadata,
set_kv_cache_layout, set_kv_cache_layout,
...@@ -320,7 +324,7 @@ def _test_backend_correctness( ...@@ -320,7 +324,7 @@ def _test_backend_correctness(
multiple GPUs. This tests that backends work correctly with different multiple GPUs. This tests that backends work correctly with different
head counts. head counts.
""" """
current_platform.seed_everything(42) set_random_seed(42)
hf_config_override = None hf_config_override = None
if tensor_parallel_size > 1: if tensor_parallel_size > 1:
......
...@@ -7,6 +7,7 @@ import pytest ...@@ -7,6 +7,7 @@ import pytest
import torch import torch
from vllm.platforms import current_platform from vllm.platforms import current_platform
from vllm.utils.torch_utils import set_random_seed
from vllm.v1.attention.backends.flash_attn import FlashAttentionBackend from vllm.v1.attention.backends.flash_attn import FlashAttentionBackend
from vllm.v1.kv_offload.mediums import CPULoadStoreSpec, GPULoadStoreSpec from vllm.v1.kv_offload.mediums import CPULoadStoreSpec, GPULoadStoreSpec
from vllm.v1.kv_offload.worker.cpu_gpu import CpuGpuOffloadingHandlers from vllm.v1.kv_offload.worker.cpu_gpu import CpuGpuOffloadingHandlers
...@@ -62,7 +63,7 @@ def test_transfer( ...@@ -62,7 +63,7 @@ def test_transfer(
seed: int, seed: int,
device: str, device: str,
) -> None: ) -> None:
current_platform.seed_everything(seed) set_random_seed(seed)
# create per-layer GPU KV caches based on available attn_backends # create per-layer GPU KV caches based on available attn_backends
attn_backends_list = BACKENDS_TO_TEST attn_backends_list = BACKENDS_TO_TEST
......
...@@ -15,6 +15,7 @@ import torch_xla.core.xla_model ...@@ -15,6 +15,7 @@ import torch_xla.core.xla_model
from vllm.attention.layers.mm_encoder_attention import MMEncoderAttention from vllm.attention.layers.mm_encoder_attention import MMEncoderAttention
from vllm.attention.selector import _cached_get_attn_backend from vllm.attention.selector import _cached_get_attn_backend
from vllm.platforms import current_platform from vllm.platforms import current_platform
from vllm.utils.torch_utils import set_random_seed
@pytest.fixture(autouse=True) @pytest.fixture(autouse=True)
...@@ -63,7 +64,7 @@ def test_mha_attn_forward( ...@@ -63,7 +64,7 @@ def test_mha_attn_forward(
head_size: int, head_size: int,
device: str, device: str,
): ):
current_platform.seed_everything(0) set_random_seed(0)
# These are expected to be f32 # These are expected to be f32
q = torch.randn(batch_size, seq_len, num_heads * head_size, device=device) q = torch.randn(batch_size, seq_len, num_heads * head_size, device=device)
k = torch.randn(batch_size, seq_len, num_kv_heads * head_size, device=device) k = torch.randn(batch_size, seq_len, num_kv_heads * head_size, device=device)
......
...@@ -26,6 +26,7 @@ from vllm.platforms import current_platform ...@@ -26,6 +26,7 @@ from vllm.platforms import current_platform
from vllm.sampling_params import SamplingParams from vllm.sampling_params import SamplingParams
from vllm.utils.mem_constants import GiB_bytes from vllm.utils.mem_constants import GiB_bytes
from vllm.utils.system_utils import update_environment_variables from vllm.utils.system_utils import update_environment_variables
from vllm.utils.torch_utils import set_random_seed
from vllm.v1.core.kv_cache_utils import estimate_max_model_len, get_kv_cache_configs from vllm.v1.core.kv_cache_utils import estimate_max_model_len, get_kv_cache_configs
from vllm.v1.core.sched.output import CachedRequestData, NewRequestData, SchedulerOutput from vllm.v1.core.sched.output import CachedRequestData, NewRequestData, SchedulerOutput
from vllm.v1.kv_cache_interface import ( from vllm.v1.kv_cache_interface import (
...@@ -776,7 +777,7 @@ def test_hybrid_attention_mamba_tensor_shapes(): ...@@ -776,7 +777,7 @@ def test_hybrid_attention_mamba_tensor_shapes():
will not corrupt an attention block and vice versa will not corrupt an attention block and vice versa
""" """
current_platform.seed_everything(42) set_random_seed(42)
update_environment_variables( update_environment_variables(
{ {
......
...@@ -2,10 +2,8 @@ ...@@ -2,10 +2,8 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from vllm.model_executor.parameter import BasevLLMParameter, PackedvLLMParameter from vllm.model_executor.parameter import BasevLLMParameter, PackedvLLMParameter
from vllm.model_executor.utils import set_random_seed
__all__ = [ __all__ = [
"set_random_seed",
"BasevLLMParameter", "BasevLLMParameter",
"PackedvLLMParameter", "PackedvLLMParameter",
] ]
...@@ -10,12 +10,6 @@ import torch ...@@ -10,12 +10,6 @@ import torch
from vllm.utils.torch_utils import is_torch_equal_or_newer from vllm.utils.torch_utils import is_torch_equal_or_newer
def set_random_seed(seed: int | None) -> None:
from vllm.platforms import current_platform
current_platform.seed_everything(seed)
def set_weight_attrs( def set_weight_attrs(
weight: torch.Tensor, weight: torch.Tensor,
weight_attrs: dict[str, Any] | None, weight_attrs: dict[str, Any] | None,
......
...@@ -372,6 +372,10 @@ class Platform: ...@@ -372,6 +372,10 @@ class Platform:
Loosely based on: https://github.com/Lightning-AI/pytorch-lightning/blob/2.4.0/src/lightning/fabric/utilities/seed.py#L20 Loosely based on: https://github.com/Lightning-AI/pytorch-lightning/blob/2.4.0/src/lightning/fabric/utilities/seed.py#L20
""" """
logger.info_once(
"`seed_everything` is deprecated. It will be removed in v0.14.0 or later. "
"Please use `vllm.utils.torch_utils.set_random_seed` instead."
)
if seed is not None: if seed is not None:
random.seed(seed) random.seed(seed)
np.random.seed(seed) np.random.seed(seed)
......
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
import contextlib import contextlib
import importlib.metadata import importlib.metadata
import os import os
import random
import threading import threading
from collections.abc import Callable, Collection from collections.abc import Callable, Collection
from functools import lru_cache from functools import lru_cache
...@@ -278,6 +279,13 @@ def kv_cache_dtype_str_to_dtype( ...@@ -278,6 +279,13 @@ def kv_cache_dtype_str_to_dtype(
return STR_DTYPE_TO_TORCH_DTYPE[kv_cache_dtype] return STR_DTYPE_TO_TORCH_DTYPE[kv_cache_dtype]
def set_random_seed(seed: int | None) -> None:
if seed is not None:
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
def create_kv_caches_with_random_flash( def create_kv_caches_with_random_flash(
num_blocks: int, num_blocks: int,
block_size: int, block_size: int,
...@@ -290,9 +298,7 @@ def create_kv_caches_with_random_flash( ...@@ -290,9 +298,7 @@ def create_kv_caches_with_random_flash(
device: str | None = "cuda", device: str | None = "cuda",
cache_layout: str | None = "NHD", cache_layout: str | None = "NHD",
) -> tuple[list[torch.Tensor], list[torch.Tensor]]: ) -> tuple[list[torch.Tensor], list[torch.Tensor]]:
from vllm.platforms import current_platform set_random_seed(seed)
current_platform.seed_everything(seed)
dtype = get_kv_cache_torch_dtype(cache_dtype, model_dtype) dtype = get_kv_cache_torch_dtype(cache_dtype, model_dtype)
generic_kv_cache_shape = (num_blocks, 2, block_size, num_heads, head_size) generic_kv_cache_shape = (num_blocks, 2, block_size, num_heads, head_size)
...@@ -335,9 +341,8 @@ def create_kv_caches_with_random( ...@@ -335,9 +341,8 @@ def create_kv_caches_with_random(
raise ValueError( raise ValueError(
f"Does not support key cache of type fp8 with head_size {head_size}" f"Does not support key cache of type fp8 with head_size {head_size}"
) )
from vllm.platforms import current_platform
current_platform.seed_everything(seed) set_random_seed(seed)
dtype = get_kv_cache_torch_dtype(cache_dtype, model_dtype) dtype = get_kv_cache_torch_dtype(cache_dtype, model_dtype)
......
...@@ -10,10 +10,10 @@ import torch ...@@ -10,10 +10,10 @@ import torch
from vllm import envs from vllm import envs
from vllm.config import VllmConfig from vllm.config import VllmConfig
from vllm.logger import init_logger from vllm.logger import init_logger
from vllm.model_executor.utils import set_random_seed
from vllm.platforms import CpuArchEnum, current_platform from vllm.platforms import CpuArchEnum, current_platform
from vllm.platforms.cpu import CpuPlatform, LogicalCPUInfo from vllm.platforms.cpu import CpuPlatform, LogicalCPUInfo
from vllm.profiler.wrapper import TorchProfilerWrapper from vllm.profiler.wrapper import TorchProfilerWrapper
from vllm.utils.torch_utils import set_random_seed
from vllm.v1.worker.cpu_model_runner import CPUModelRunner from vllm.v1.worker.cpu_model_runner import CPUModelRunner
from vllm.v1.worker.gpu_worker import Worker, init_worker_distributed_environment from vllm.v1.worker.gpu_worker import Worker, init_worker_distributed_environment
......
...@@ -34,7 +34,6 @@ from vllm.distributed.parallel_state import ( ...@@ -34,7 +34,6 @@ from vllm.distributed.parallel_state import (
) )
from vllm.logger import init_logger from vllm.logger import init_logger
from vllm.lora.request import LoRARequest from vllm.lora.request import LoRARequest
from vllm.model_executor import set_random_seed
from vllm.model_executor.models.interfaces import is_mixture_of_experts from vllm.model_executor.models.interfaces import is_mixture_of_experts
from vllm.model_executor.warmup.kernel_warmup import kernel_warmup from vllm.model_executor.warmup.kernel_warmup import kernel_warmup
from vllm.platforms import current_platform from vllm.platforms import current_platform
...@@ -43,6 +42,7 @@ from vllm.sequence import IntermediateTensors ...@@ -43,6 +42,7 @@ from vllm.sequence import IntermediateTensors
from vllm.tasks import SupportedTask from vllm.tasks import SupportedTask
from vllm.utils.mem_constants import GiB_bytes from vllm.utils.mem_constants import GiB_bytes
from vllm.utils.mem_utils import MemorySnapshot, memory_profiling from vllm.utils.mem_utils import MemorySnapshot, memory_profiling
from vllm.utils.torch_utils import set_random_seed
from vllm.v1.core.sched.output import GrammarOutput, SchedulerOutput from vllm.v1.core.sched.output import GrammarOutput, SchedulerOutput
from vllm.v1.engine import ReconfigureDistributedRequest, ReconfigureRankType from vllm.v1.engine import ReconfigureDistributedRequest, ReconfigureRankType
from vllm.v1.kv_cache_interface import KVCacheConfig, KVCacheSpec from vllm.v1.kv_cache_interface import KVCacheConfig, KVCacheSpec
......
...@@ -20,12 +20,11 @@ from vllm.distributed.kv_transfer import ( ...@@ -20,12 +20,11 @@ from vllm.distributed.kv_transfer import (
) )
from vllm.logger import init_logger from vllm.logger import init_logger
from vllm.lora.request import LoRARequest from vllm.lora.request import LoRARequest
from vllm.model_executor import set_random_seed
from vllm.platforms import current_platform from vllm.platforms import current_platform
from vllm.platforms.tpu import USE_TPU_INFERENCE from vllm.platforms.tpu import USE_TPU_INFERENCE
from vllm.tasks import SupportedTask from vllm.tasks import SupportedTask
from vllm.utils.math_utils import cdiv from vllm.utils.math_utils import cdiv
from vllm.utils.torch_utils import STR_DTYPE_TO_TORCH_DTYPE from vllm.utils.torch_utils import STR_DTYPE_TO_TORCH_DTYPE, set_random_seed
from vllm.v1.core.sched.output import GrammarOutput, SchedulerOutput from vllm.v1.core.sched.output import GrammarOutput, SchedulerOutput
from vllm.v1.kv_cache_interface import AttentionSpec, KVCacheConfig, KVCacheSpec from vllm.v1.kv_cache_interface import AttentionSpec, KVCacheConfig, KVCacheSpec
from vllm.v1.outputs import ModelRunnerOutput from vllm.v1.outputs import ModelRunnerOutput
......
...@@ -9,9 +9,9 @@ import torch.distributed ...@@ -9,9 +9,9 @@ import torch.distributed
from vllm.config import VllmConfig from vllm.config import VllmConfig
from vllm.distributed import get_world_group from vllm.distributed import get_world_group
from vllm.logger import init_logger from vllm.logger import init_logger
from vllm.model_executor import set_random_seed
from vllm.platforms import current_platform from vllm.platforms import current_platform
from vllm.profiler.wrapper import TorchProfilerWrapper from vllm.profiler.wrapper import TorchProfilerWrapper
from vllm.utils.torch_utils import set_random_seed
from vllm.v1.worker.gpu_worker import Worker, init_worker_distributed_environment from vllm.v1.worker.gpu_worker import Worker, init_worker_distributed_environment
from vllm.v1.worker.xpu_model_runner import XPUModelRunner from vllm.v1.worker.xpu_model_runner import XPUModelRunner
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment