[Platform] Deprecate seed_everything (#31659)

Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>

[Platform] Deprecate seed_everything (#31659)
Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
bb4337b3 · wangxiyuan · GitHub · 367856de · bb4337b3 · bb4337b3
Unverified Commit bb4337b3 authored Jan 05, 2026 by wangxiyuan Committed by GitHub Jan 04, 2026
20 changed files
--- a/tests/kernels/moe/test_nvfp4_moe.py
+++ b/tests/kernels/moe/test_nvfp4_moe.py
@@ -16,6 +16,7 @@ from vllm.model_executor.layers.fused_moe.config import nvfp4_moe_quant_config
 from vllm.model_executor.layers.fused_moe.cutlass_moe import cutlass_moe_fp4
 from vllm.model_executor.layers.fused_moe.fused_moe import fused_topk
 from vllm.platforms import current_platform
+from vllm.utils.torch_utils import set_random_seed

 if not current_platform.has_device_capability(100):
    pytest.skip(
@@ -42,7 +43,7 @@ MNK_FACTORS = [
 def test_cutlass_fp4_moe_no_graph(
    m: int, n: int, k: int, e: int, topk: int, dtype: torch.dtype, workspace_init
 ):
-    current_platform.seed_everything(7)
+    set_random_seed(7)
    with set_current_vllm_config(
        VllmConfig(parallel_config=ParallelConfig(pipeline_parallel_size=1))
    ):

--- a/tests/kernels/moe/test_pplx_cutlass_moe.py
+++ b/tests/kernels/moe/test_pplx_cutlass_moe.py
@@ -14,6 +14,7 @@ from vllm.model_executor.layers.fused_moe.fused_moe import fused_topk
 from vllm.model_executor.layers.fused_moe.modular_kernel import FusedMoEModularKernel
 from vllm.platforms import current_platform
 from vllm.utils.math_utils import cdiv
+from vllm.utils.torch_utils import set_random_seed

 from ...utils import multi_gpu_test
 from .parallel_utils import ProcessGroupInfo, parallel_launch
@@ -290,7 +291,7 @@ def test_cutlass_moe_pplx(
    world_dp_size: tuple[int, int],
    use_internode: bool,
 ):
-    current_platform.seed_everything(7)
+    set_random_seed(7)

    with set_current_vllm_config(vllm_config):
        dtype = torch.half

--- a/tests/kernels/moe/test_pplx_moe.py
+++ b/tests/kernels/moe/test_pplx_moe.py
@@ -44,8 +44,8 @@ from vllm.model_executor.layers.fused_moe.modular_kernel import FusedMoEModularK
 from vllm.model_executor.layers.fused_moe.topk_weight_and_reduce import (
    TopKWeightAndReduceDelegate,
 )
-from vllm.platforms import current_platform
 from vllm.utils.math_utils import round_up
+from vllm.utils.torch_utils import set_random_seed
 from vllm.v1.worker.workspace import init_workspace_manager

 from ...utils import multi_gpu_test
@@ -184,7 +184,7 @@ def test_fused_moe_batched_experts(
    dtype: torch.dtype,
    workspace_init,
 ):
-    current_platform.seed_everything(7)
+    set_random_seed(7)

    a = torch.randn((m, k), device="cuda", dtype=dtype) / 10
    w1 = torch.randn((e, 2 * n, k), device="cuda", dtype=dtype) / 10
@@ -491,7 +491,7 @@ def test_pplx_prepare_finalize_slow(
    if per_act_token_quant and block_shape is not None:
        pytest.skip("Skip illegal quantization combination")

-    current_platform.seed_everything(7)
+    set_random_seed(7)
    m, n, k = mnk
    world_size, dp_size = world_dp_size
    device = "cuda"
@@ -809,7 +809,7 @@ def test_pplx_moe_slow(
    block_shape: list[int] | None,
    use_internode: bool,
 ):
-    current_platform.seed_everything(7)
+    set_random_seed(7)
    m, n, k = mnk
    world_size, dp_size = world_dp_size

@@ -888,7 +888,7 @@ def _pplx_test_loop(
        new_vllm_config.parallel_config.enable_expert_parallel = True
        _set_vllm_config(new_vllm_config, pgi.world_size, pgi.rank, pgi.local_rank)

-    current_platform.seed_everything(7)
+    set_random_seed(7)
    combos = itertools.product(
        PPLX_COMBOS, NUM_EXPERTS, TOP_KS, DTYPES, [False, True], [None, [128, 128]]
    )
@@ -982,7 +982,7 @@ def test_pplx_prepare_finalize(
    world_dp_size: tuple[int, int],
    use_internode: bool,
 ):
-    current_platform.seed_everything(7)
+    set_random_seed(7)
    world_size, dp_size = world_dp_size
    parallel_launch(
        world_size * dp_size,
@@ -1005,7 +1005,7 @@ def test_pplx_moe(
    use_internode: bool,
    use_shared_experts: bool,
 ):
-    current_platform.seed_everything(7)
+    set_random_seed(7)
    world_size, dp_size = world_dp_size
    parallel_launch(
        world_size,

--- a/tests/kernels/moe/test_silu_mul_fp8_quant_deep_gemm.py
+++ b/tests/kernels/moe/test_silu_mul_fp8_quant_deep_gemm.py
@@ -13,6 +13,7 @@ from vllm.model_executor.layers.fused_moe.batched_deep_gemm_moe import (
 from vllm.platforms import current_platform
 from vllm.utils.deep_gemm import DeepGemmQuantScaleFMT, has_deep_gemm
 from vllm.utils.math_utils import cdiv, round_up
+from vllm.utils.torch_utils import set_random_seed

 if current_platform.is_fp8_fnuz():
    pytest.skip(
@@ -201,7 +202,7 @@ def token_random(E, T, H2, tokens_per_expert):
 @torch.inference_mode()
 def test_silu_mul_fp8_quant_deep_gemm(E: int, T: int, H: int, fp8_type: torch.dtype):
    group_size = 128
-    current_platform.seed_everything(42)
+    set_random_seed(42)

    tokens_per_expert = torch.randint(
        low=0,

--- a/tests/kernels/moe/test_silu_mul_per_token_group_quant_fp8_colmajor.py
+++ b/tests/kernels/moe/test_silu_mul_per_token_group_quant_fp8_colmajor.py
@@ -11,6 +11,7 @@ from vllm.model_executor.layers.quantization.utils.fp8_utils import (
 from vllm.platforms import current_platform
 from vllm.triton_utils import triton
 from vllm.utils.deep_gemm import is_deep_gemm_e8m0_used
+from vllm.utils.torch_utils import set_random_seed

 FLOAT8_DTYPE = torch.float8_e4m3fn
 GROUP_SIZE = 128
@@ -72,7 +73,7 @@ def reference(x: torch.Tensor, use_ue8m0: bool) -> tuple[torch.Tensor, torch.Ten
    reason="ROCm does not support DeepGemm.",
 )
 def test_silu_mul_fp8_quant_deep_gemm(T: int, N: int):
-    current_platform.seed_everything(42)
+    set_random_seed(42)

    input = torch.rand((T, N), dtype=torch.bfloat16, device="cuda")


--- a/tests/kernels/quantization/test_awq_triton.py
+++ b/tests/kernels/quantization/test_awq_triton.py
@@ -13,7 +13,7 @@ from vllm.model_executor.layers.quantization.awq_triton import (
    awq_dequantize_triton,
    awq_gemm_triton,
 )
-from vllm.platforms import current_platform
+from vllm.utils.torch_utils import set_random_seed

 device = "cuda"

@@ -86,7 +86,7 @@ def test_dequantize(qweight_rows, qweight_cols, group_size):
    zeros_cols = qweight_cols
    zeros_dtype = torch.int32

-    current_platform.seed_everything(0)
+    set_random_seed(0)

    qweight = torch.randint(
        0,
@@ -141,7 +141,7 @@ def test_gemm(N, K, M, splitK, group_size):
    qzeros_rows = scales_rows
    qzeros_cols = qweight_cols

-    current_platform.seed_everything(0)
+    set_random_seed(0)

    input = torch.rand((input_rows, input_cols), dtype=input_dtype, device=device)
    qweight = torch.randint(

--- a/tests/kernels/quantization/test_cutlass_w4a8_moe.py
+++ b/tests/kernels/quantization/test_cutlass_w4a8_moe.py
@@ -17,6 +17,7 @@ from vllm.model_executor.layers.quantization.utils.quant_utils import (
 )
 from vllm.platforms import current_platform
 from vllm.scalar_type import ScalarType, scalar_types
+from vllm.utils.torch_utils import set_random_seed

 IS_SUPPORTED_BY_GPU = (
    current_platform.is_cuda() and current_platform.get_device_capability()[0] >= 9
@@ -248,7 +249,7 @@ def compute_moe_reference_output(setup: MoETestSetup) -> torch.Tensor:
 @pytest.mark.parametrize("random_zero", [True, False])
 def test_cutlass_w4a8_moe_mm_end_to_end(shape, random_zero):
    num_experts, N, K = shape
-    current_platform.seed_everything(42)
+    set_random_seed(42)
    setup = make_moe_test_setup(
        num_experts=num_experts, K=K, N=N, max_blocks=64, random_zero=random_zero
    )
@@ -308,7 +309,7 @@ class W4A8MoELayer(torch.nn.Module):
    reason="W4A8 Grouped GEMM is not supported on this GPU type.",
 )
 def test_cutlass_w4a8_moe_mm_cuda_graph():
-    current_platform.seed_everything(42)
+    set_random_seed(42)
    # Fixed config for CUDA graph test (single parameter point).
    num_experts = 8
    K = 512

--- a/tests/kernels/quantization/test_flashinfer_nvfp4_scaled_mm.py
+++ b/tests/kernels/quantization/test_flashinfer_nvfp4_scaled_mm.py
@@ -12,6 +12,7 @@ from nvfp4_utils import (
 from vllm import _custom_ops as ops
 from vllm.platforms import current_platform
 from vllm.utils.flashinfer import flashinfer_scaled_fp4_mm
+from vllm.utils.torch_utils import set_random_seed

 if not current_platform.has_device_capability(100):
    pytest.skip(
@@ -72,7 +73,7 @@ def test_flashinfer_nvfp4_gemm(
    if backend == "trtllm" and dtype == torch.float16:
        pytest.skip("Only torch.bfloat16 is supported for TRTLLM FP4 GEMM operations")

-    current_platform.seed_everything(seed)
+    set_random_seed(seed)
    m, n, packed_k = shape
    k = packed_k * 2
    block_size = 16

--- a/tests/kernels/quantization/test_flashinfer_scaled_mm.py
+++ b/tests/kernels/quantization/test_flashinfer_scaled_mm.py
@@ -6,6 +6,7 @@ import torch
 from vllm import _custom_ops as ops
 from vllm.platforms import current_platform
 from vllm.utils.flashinfer import flashinfer_scaled_fp8_mm
+from vllm.utils.torch_utils import set_random_seed

 if not current_platform.has_device_capability(100):
    pytest.skip(
@@ -38,7 +39,7 @@ def test_flashinfer_fp8_gemm(
    device: str,
    autotune: bool,
 ) -> None:
-    current_platform.seed_everything(seed)
+    set_random_seed(seed)
    m, n, k = shape
    a = torch.randn((m, k), dtype=dtype, device=device)
    b = torch.randn((n, k), dtype=dtype, device=device) / k

--- a/tests/kernels/quantization/test_fp8_quant.py
+++ b/tests/kernels/quantization/test_fp8_quant.py
@@ -11,7 +11,7 @@ from tests.kernels.quant_utils import (
    ref_dynamic_per_token_quant,
 )
 from tests.kernels.utils import opcheck
-from vllm.platforms import current_platform
+from vllm.utils.torch_utils import set_random_seed

 DTYPES = [torch.bfloat16, torch.float]
 HIDDEN_SIZES = [17, 1024, 1025, 1026, 5137, 8193]
@@ -51,7 +51,7 @@ def opcheck_fp8_quant(
 def test_dynamic_per_token_fp8_quant(
    num_tokens: int, hidden_size: int, dtype: torch.dtype, scale_ub: bool, seed: int
 ) -> None:
-    current_platform.seed_everything(seed)
+    set_random_seed(seed)

    x = (
        torch.rand(num_tokens, hidden_size, dtype=dtype, device="cuda") + 1e-6
@@ -81,7 +81,7 @@ def test_dynamic_per_token_fp8_quant(
 def test_dynamic_per_tensor_fp8_quant(
    num_tokens: int, hidden_size: int, dtype: torch.dtype, seed: int
 ) -> None:
-    current_platform.seed_everything(seed)
+    set_random_seed(seed)

    x = torch.rand(num_tokens, hidden_size, dtype=dtype, device="cuda")

@@ -101,7 +101,7 @@ def test_dynamic_per_tensor_fp8_quant(
 @torch.inference_mode()
 @pytest.mark.parametrize("seed", SEEDS)
 def test_fp8_quant_large(seed: int) -> None:
-    current_platform.seed_everything(seed)
+    set_random_seed(seed)

    num_tokens = 1024000  # Mistral-Nemo's max_position_embeddings
    hidden_size = 1152  # Smallest hidden_size to reproduce the error

--- a/tests/kernels/quantization/test_fp8_quant_group.py
+++ b/tests/kernels/quantization/test_fp8_quant_group.py
@@ -7,7 +7,7 @@ import torch

 from vllm.model_executor.layers.quantization.input_quant_fp8 import QuantFP8
 from vllm.model_executor.layers.quantization.utils.quant_utils import GroupShape
-from vllm.platforms import current_platform
+from vllm.utils.torch_utils import set_random_seed


 @pytest.mark.parametrize(
@@ -30,7 +30,7 @@ def test_quantfp8_group_functionality(
    Tests both CUDA and native implementations, column-major scales,
    and verifies consistency between implementations.
    """
-    current_platform.seed_everything(seed)
+    set_random_seed(seed)

    x = torch.randn((batch_size, hidden_dim), dtype=torch.bfloat16, device="cuda") * 8
    expected_num_groups = (hidden_dim + group_size - 1) // group_size
@@ -83,7 +83,7 @@ def test_quantfp8_group_functionality(
 @pytest.mark.parametrize("use_ue8m0", [True, False])
 @torch.inference_mode()
 def test_quantfp8_group_multidimensional(seed: int, use_ue8m0: bool) -> None:
-    current_platform.seed_everything(seed)
+    set_random_seed(seed)

    group_size = 64

@@ -136,7 +136,7 @@ def test_quantfp8_group_multidimensional(seed: int, use_ue8m0: bool) -> None:
 @pytest.mark.parametrize("seed", [42])
 @torch.inference_mode()
 def test_quantfp8_group_edge_cases(seed: int) -> None:
-    current_platform.seed_everything(seed)
+    set_random_seed(seed)

    batch_size = 16
    group_size = 64

--- a/tests/kernels/quantization/test_gguf.py
+++ b/tests/kernels/quantization/test_gguf.py
@@ -11,7 +11,7 @@ from huggingface_hub import snapshot_download
 import vllm._custom_ops as ops
 from vllm.model_executor.layers.fused_moe import fused_experts
 from vllm.model_executor.layers.quantization.gguf import _fused_moe_gguf
-from vllm.platforms import current_platform
+from vllm.utils.torch_utils import set_random_seed

 GGUF_SAMPLE = snapshot_download("Isotr0py/test-gguf-sample")
 GGUF_SAMPLE_MOE = snapshot_download("SzymonOzog/test-gguf-moe-sample")
@@ -91,7 +91,7 @@ def test_dequantize(
 @pytest.mark.parametrize("quant_type", QUANT_TYPES)
 @torch.inference_mode()
 def test_mmvq(hidden_size: int, dtype: torch.dtype, quant_type: GGMLQuantizationType):
-    current_platform.seed_everything(0)
+    set_random_seed(0)

    tensors = get_gguf_sample_tensors(hidden_size, quant_type)
    x = torch.rand((1, hidden_size), dtype=dtype, device="cuda")
@@ -134,7 +134,7 @@ def test_mmq(
    dtype: torch.dtype,
    quant_type: GGMLQuantizationType,
 ):
-    current_platform.seed_everything(0)
+    set_random_seed(0)

    tensors = get_gguf_sample_tensors(hidden_size, quant_type)
    x = torch.rand((num_tokens, hidden_size), dtype=dtype, device="cuda")
@@ -169,7 +169,7 @@ def test_moe(
    quant_type: GGMLQuantizationType,
    top_k: int,
 ):
-    current_platform.seed_everything(0)
+    set_random_seed(0)
    H, E = 1024, 256

    x = torch.rand((num_tokens, H), dtype=dtype, device="cuda")

--- a/tests/kernels/quantization/test_int8_quant.py
+++ b/tests/kernels/quantization/test_int8_quant.py
@@ -7,7 +7,7 @@ import torch
 from tests.kernels.quant_utils import ref_dynamic_per_token_quant
 from tests.kernels.utils import opcheck
 from vllm._custom_ops import scaled_int8_quant
-from vllm.platforms import current_platform
+from vllm.utils.torch_utils import set_random_seed

 DTYPES = [torch.bfloat16, torch.float]
 HIDDEN_SIZES = [17, 1024, 1025, 1026, 5137, 8193]
@@ -46,7 +46,7 @@ def opcheck_int8_quant_dynamic(output, input, symmetric=True):
 def test_dynamic_scaled_int8_quant(
    num_tokens: int, hidden_size: int, dtype: torch.dtype, seed: int
 ) -> None:
-    current_platform.seed_everything(seed)
+    set_random_seed(seed)

    x = torch.rand(num_tokens, hidden_size, dtype=dtype, device="cuda") * 1000

@@ -70,7 +70,7 @@ def test_dynamic_scaled_int8_quant(
 def test_dynamic_scaled_int8_azp_quant(
    num_tokens: int, hidden_size: int, dtype: torch.dtype, seed: int
 ) -> None:
-    current_platform.seed_everything(seed)
+    set_random_seed(seed)
    int8_traits = torch.iinfo(torch.int8)

    x = torch.rand(num_tokens, hidden_size, dtype=dtype, device="cuda") * 1000 - 300
@@ -111,7 +111,7 @@ def test_dynamic_scaled_int8_azp_quant(
 def test_static_scaled_int8_quant(
    num_tokens: int, hidden_size: int, dtype: torch.dtype, seed: int, scale: float
 ) -> None:
-    current_platform.seed_everything(seed)
+    set_random_seed(seed)
    int8_traits = torch.iinfo(torch.int8)

    x = torch.rand(num_tokens, hidden_size, dtype=dtype, device="cuda") * 1000
@@ -144,7 +144,7 @@ def test_static_scaled_int8_azp_quant(
    scale: float,
    azp: int,
 ) -> None:
-    current_platform.seed_everything(seed)
+    set_random_seed(seed)
    int8_traits = torch.iinfo(torch.int8)

    x = torch.rand(num_tokens, hidden_size, dtype=dtype, device="cuda") * 1000 - 300

--- a/tests/kernels/quantization/test_mxfp4_qutlass.py
+++ b/tests/kernels/quantization/test_mxfp4_qutlass.py
@@ -24,6 +24,7 @@ from compressed_tensors.transform.utils.hadamard import deterministic_hadamard_m
 from vllm._custom_ops import fusedQuantizeMx, matmul_mxf4_bf16_tn
 from vllm.model_executor.layers.quantization.qutlass_utils import to_blocked
 from vllm.platforms import current_platform
+from vllm.utils.torch_utils import set_random_seed

 if not torch.cuda.is_available():
    pytest.skip("CUDA required for these tests.", allow_module_level=True)
@@ -205,7 +206,7 @@ LLAMA_MODELS = {

 @pytest.fixture(autouse=True)
 def _seed_each_test():
-    current_platform.seed_everything(0)
+    set_random_seed(0)
    np.random.seed(0)
    torch.random.manual_seed(0)


--- a/tests/kernels/quantization/test_nvfp4_quant.py
+++ b/tests/kernels/quantization/test_nvfp4_quant.py
@@ -6,6 +6,7 @@ import torch
 from vllm import _custom_ops as ops
 from vllm.platforms import current_platform
 from vllm.scalar_type import scalar_types
+from vllm.utils.torch_utils import set_random_seed

 if not current_platform.has_device_capability(100):
    pytest.skip(
@@ -134,7 +135,7 @@ def test_quantize_to_fp4(
    seed: int,
    device: str,
 ) -> None:
-    current_platform.seed_everything(seed)
+    set_random_seed(seed)
    torch.set_default_device(device)

    m, n = shape
@@ -156,7 +157,7 @@ def test_quantize_to_fp4(
 @torch.inference_mode()
 def test_quantize_to_fp4_padded(pad_shape: tuple[int, int]) -> None:
    dtype = torch.float16
-    current_platform.seed_everything(42)
+    set_random_seed(42)
    torch.set_default_device("cuda:0")

    m, n = pad_shape

--- a/tests/kernels/quantization/test_nvfp4_qutlass.py
+++ b/tests/kernels/quantization/test_nvfp4_qutlass.py
@@ -25,6 +25,7 @@ from vllm import _custom_ops as ops  # use existing nvfp4 gemm in vllm
 from vllm._custom_ops import fusedQuantizeNv
 from vllm.model_executor.layers.quantization.qutlass_utils import to_blocked
 from vllm.platforms import current_platform
+from vllm.utils.torch_utils import set_random_seed

 if not torch.cuda.is_available():
    pytest.skip("CUDA required for these tests.", allow_module_level=True)
@@ -193,7 +194,7 @@ LLAMA_MODELS = {

 @pytest.fixture(autouse=True)
 def _seed_each_test():
-    current_platform.seed_everything(0)
+    set_random_seed(0)
    np.random.seed(0)
    torch.random.manual_seed(0)


--- a/tests/kernels/quantization/test_nvfp4_scaled_mm.py
+++ b/tests/kernels/quantization/test_nvfp4_scaled_mm.py
@@ -6,6 +6,7 @@ from nvfp4_utils import FLOAT4_E2M1_MAX, FLOAT8_E4M3_MAX, dequantize_nvfp4_to_dt

 from vllm import _custom_ops as ops
 from vllm.platforms import current_platform
+from vllm.utils.torch_utils import set_random_seed

 if not current_platform.has_device_capability(100):
    pytest.skip(
@@ -59,7 +60,7 @@ def test_nvfp4_gemm(
    seed: int,
    device: str,
 ) -> None:
-    current_platform.seed_everything(seed)
+    set_random_seed(seed)
    m, n, packed_k = shape
    k = packed_k * 2
    block_size = 16

--- a/tests/kernels/quantization/test_silu_mul_nvfp4_quant.py
+++ b/tests/kernels/quantization/test_silu_mul_nvfp4_quant.py
@@ -11,6 +11,7 @@ from tests.kernels.quantization.nvfp4_utils import (
 from vllm._custom_ops import scaled_fp4_quant
 from vllm.model_executor.layers.activation import SiluAndMul
 from vllm.platforms import current_platform
+from vllm.utils.torch_utils import set_random_seed

 if not current_platform.has_device_capability(100):
    pytest.skip(
@@ -33,7 +34,7 @@ def test_silu_mul_nvfp4_quant(
    dtype: torch.dtype,
    shape: tuple[int, int],
 ) -> None:
-    current_platform.seed_everything(42)
+    set_random_seed(42)
    device = "cuda:0"
    torch.set_default_device(device)


--- a/tests/kernels/quantization/test_triton_scaled_mm.py
+++ b/tests/kernels/quantization/test_triton_scaled_mm.py
@@ -11,6 +11,7 @@ import pytest
 import torch

 from vllm.platforms import current_platform
+from vllm.utils.torch_utils import set_random_seed

 device = "cuda"

@@ -85,7 +86,7 @@ def test_scaled_mm(
 ):
    is_floating_point_type = lambda t: torch.tensor([1, 1], dtype=t).is_floating_point()

-    current_platform.seed_everything(0)
+    set_random_seed(0)

    # NOTE: There are cases, where if the matrix is large enough, an output
    # like 65504.4 can be produced, and can easily turn into inf when

--- a/tests/kernels/test_apply_repetition_penalties.py
+++ b/tests/kernels/test_apply_repetition_penalties.py
@@ -9,6 +9,7 @@ from vllm._custom_ops import (
    apply_repetition_penalties_torch,
 )
 from vllm.platforms import current_platform
+from vllm.utils.torch_utils import set_random_seed

 NUM_SEQS = [1, 2, 3, 4, 8, 13, 17, 32, 37, 256, 1023, 1024, 1025]
 # [stress, stress, stress, Qwen, llama 4]
@@ -38,7 +39,7 @@ def test_apply_repetition_penalties(
    Test the apply_repetition_penalties custom op
    against a reference implementation.
    """
-    current_platform.seed_everything(seed)
+    set_random_seed(seed)
    torch.set_default_device("cuda:0")

    # Create test data
@@ -95,7 +96,7 @@ def test_apply_repetition_penalties_zero_seqs() -> None:
    dtype = torch.float32
    seed = 0

-    current_platform.seed_everything(seed)
+    set_random_seed(seed)
    torch.set_default_device("cuda:0")

    # Create test data