Unverified Commit 1e4ecca1 authored by Cyrus Leung's avatar Cyrus Leung Committed by GitHub
Browse files

[V0 Deprecation] Remove `VLLM_USE_V1` from tests (#26341)


Signed-off-by: default avatarDarkLight1337 <tlleungac@connect.ust.hk>
parent c0a7b89d
...@@ -296,6 +296,7 @@ steps: ...@@ -296,6 +296,7 @@ steps:
- tests/v1 - tests/v1
commands: commands:
# split the test to avoid interference # split the test to avoid interference
- pytest -v -s -m 'not cpu_test' v1/core
- pytest -v -s v1/executor - pytest -v -s v1/executor
- pytest -v -s v1/kv_offload - pytest -v -s v1/kv_offload
- pytest -v -s v1/sample - pytest -v -s v1/sample
...@@ -317,7 +318,7 @@ steps: ...@@ -317,7 +318,7 @@ steps:
no_gpu: true no_gpu: true
commands: commands:
# split the test to avoid interference # split the test to avoid interference
- pytest -v -s v1/core - pytest -v -s -m 'cpu_test' v1/core
- pytest -v -s v1/structured_output - pytest -v -s v1/structured_output
- pytest -v -s v1/test_serial_utils.py - pytest -v -s v1/test_serial_utils.py
- pytest -v -s -m 'cpu_test' v1/kv_connector/unit - pytest -v -s -m 'cpu_test' v1/kv_connector/unit
......
...@@ -13,7 +13,7 @@ import pytest ...@@ -13,7 +13,7 @@ import pytest
import torch import torch
from vllm import LLM from vllm import LLM
from vllm.v1.engine.llm_engine import LLMEngine as LLMEngineV1 from vllm.v1.engine.llm_engine import LLMEngine
from ..conftest import HfRunner, VllmRunner from ..conftest import HfRunner, VllmRunner
from ..models.utils import check_outputs_equal from ..models.utils import check_outputs_equal
...@@ -211,16 +211,11 @@ def test_models_distributed( ...@@ -211,16 +211,11 @@ def test_models_distributed(
def test_failed_model_execution(vllm_runner, monkeypatch) -> None: def test_failed_model_execution(vllm_runner, monkeypatch) -> None:
from vllm.envs import VLLM_USE_V1
if not VLLM_USE_V1:
pytest.skip("Skipping V0 test, dump input not supported")
# Needed to mock an error in the same process # Needed to mock an error in the same process
monkeypatch.setenv("VLLM_ENABLE_V1_MULTIPROCESSING", "0") monkeypatch.setenv("VLLM_ENABLE_V1_MULTIPROCESSING", "0")
with vllm_runner("facebook/opt-125m", enforce_eager=True) as vllm_model: with vllm_runner("facebook/opt-125m", enforce_eager=True) as vllm_model:
if isinstance(vllm_model.llm.llm_engine, LLMEngineV1): if isinstance(vllm_model.llm.llm_engine, LLMEngine):
v1_test_failed_model_execution(vllm_model) v1_test_failed_model_execution(vllm_model)
......
...@@ -117,18 +117,15 @@ def test_cumem_with_cudagraph(): ...@@ -117,18 +117,15 @@ def test_cumem_with_cudagraph():
@create_new_process_for_each_test() @create_new_process_for_each_test()
@pytest.mark.parametrize( @pytest.mark.parametrize(
"model, use_v1", "model",
[ [
# sleep mode with safetensors # sleep mode with safetensors
("meta-llama/Llama-3.2-1B", True), "meta-llama/Llama-3.2-1B",
# sleep mode with pytorch checkpoint # sleep mode with pytorch checkpoint
("facebook/opt-125m", True), "facebook/opt-125m",
], ],
) )
def test_end_to_end(monkeypatch: pytest.MonkeyPatch, model: str, use_v1: bool): def test_end_to_end(model: str):
with monkeypatch.context() as m:
assert use_v1
m.setenv("VLLM_USE_V1", "1")
free, total = torch.cuda.mem_get_info() free, total = torch.cuda.mem_get_info()
used_bytes_baseline = total - free # in case other process is running used_bytes_baseline = total - free # in case other process is running
llm = LLM(model, enable_sleep_mode=True) llm = LLM(model, enable_sleep_mode=True)
...@@ -151,10 +148,7 @@ def test_end_to_end(monkeypatch: pytest.MonkeyPatch, model: str, use_v1: bool): ...@@ -151,10 +148,7 @@ def test_end_to_end(monkeypatch: pytest.MonkeyPatch, model: str, use_v1: bool):
# therefore high memory usage after `llm.sleep` is called is expected. # therefore high memory usage after `llm.sleep` is called is expected.
# FIXME(youkaichao & ywang96): Fix memory buffer issue with sleep mode # FIXME(youkaichao & ywang96): Fix memory buffer issue with sleep mode
# in V1. # in V1.
if use_v1:
assert used_bytes < 7 * GiB_bytes assert used_bytes < 7 * GiB_bytes
else:
assert used_bytes < 2 * GiB_bytes
llm.wake_up() llm.wake_up()
output2 = llm.generate(prompt, sampling_params) output2 = llm.generate(prompt, sampling_params)
...@@ -168,10 +162,7 @@ def test_end_to_end(monkeypatch: pytest.MonkeyPatch, model: str, use_v1: bool): ...@@ -168,10 +162,7 @@ def test_end_to_end(monkeypatch: pytest.MonkeyPatch, model: str, use_v1: bool):
used_bytes = total - free_gpu_bytes_wake_up_w - used_bytes_baseline used_bytes = total - free_gpu_bytes_wake_up_w - used_bytes_baseline
# should just reallocate memory for weights (1B model, ~2GiB weights) # should just reallocate memory for weights (1B model, ~2GiB weights)
if use_v1:
assert used_bytes < 10 * GiB_bytes assert used_bytes < 10 * GiB_bytes
else:
assert used_bytes < 6 * GiB_bytes
# now allocate kv cache memory # now allocate kv cache memory
llm.wake_up(tags=["kv_cache"]) llm.wake_up(tags=["kv_cache"])
......
...@@ -66,7 +66,6 @@ def llm_pair(request): ...@@ -66,7 +66,6 @@ def llm_pair(request):
pytest.skip("Only Blackwell GPUs support Cutlass MLA") pytest.skip("Only Blackwell GPUs support Cutlass MLA")
env_vars = { env_vars = {
"VLLM_USE_V1": "1",
# Force native sampler to avoid potential nondeterminism in FlashInfer # Force native sampler to avoid potential nondeterminism in FlashInfer
# when per-request generators are not used in V1. # when per-request generators are not used in V1.
"VLLM_USE_FLASHINFER_SAMPLER": "0", "VLLM_USE_FLASHINFER_SAMPLER": "0",
...@@ -161,7 +160,6 @@ def test_full_cudagraph_with_invalid_backend(): ...@@ -161,7 +160,6 @@ def test_full_cudagraph_with_invalid_backend():
with ( with (
temporary_environ( temporary_environ(
{ {
"VLLM_USE_V1": "1",
"VLLM_ATTENTION_BACKEND": "FLEX_ATTENTION", "VLLM_ATTENTION_BACKEND": "FLEX_ATTENTION",
# Flex_Attention is not supported with full cuda graph # Flex_Attention is not supported with full cuda graph
} }
......
...@@ -18,7 +18,6 @@ from vllm.config import ( ...@@ -18,7 +18,6 @@ from vllm.config import (
VllmConfig, VllmConfig,
set_current_vllm_config, set_current_vllm_config,
) )
from vllm.envs import VLLM_USE_V1
from vllm.forward_context import BatchDescriptor, set_forward_context from vllm.forward_context import BatchDescriptor, set_forward_context
from vllm.utils import is_torch_equal_or_newer from vllm.utils import is_torch_equal_or_newer
...@@ -127,7 +126,6 @@ def _run_simple_model( ...@@ -127,7 +126,6 @@ def _run_simple_model(
@pytest.mark.parametrize("use_inductor", [True, False]) @pytest.mark.parametrize("use_inductor", [True, False])
@torch.inference_mode() @torch.inference_mode()
def test_simple_piecewise_compile(use_inductor): def test_simple_piecewise_compile(use_inductor):
assert VLLM_USE_V1
_run_simple_model( _run_simple_model(
splitting_ops=["silly.attention"], splitting_ops=["silly.attention"],
use_inductor_graph_partition=False, use_inductor_graph_partition=False,
...@@ -146,7 +144,6 @@ def test_simple_piecewise_compile(use_inductor): ...@@ -146,7 +144,6 @@ def test_simple_piecewise_compile(use_inductor):
@torch.inference_mode() @torch.inference_mode()
@pytest.mark.parametrize("splitting_ops", [["silly.attention"], []]) @pytest.mark.parametrize("splitting_ops", [["silly.attention"], []])
def test_simple_inductor_graph_partition(splitting_ops): def test_simple_inductor_graph_partition(splitting_ops):
assert VLLM_USE_V1
if not is_torch_equal_or_newer("2.9.0.dev"): if not is_torch_equal_or_newer("2.9.0.dev"):
pytest.skip("inductor graph partition is only available in PyTorch 2.9+") pytest.skip("inductor graph partition is only available in PyTorch 2.9+")
......
...@@ -388,10 +388,6 @@ def test_async_tp_pass_correctness( ...@@ -388,10 +388,6 @@ def test_async_tp_pass_correctness(
"pass_config": {"enable_async_tp": async_tp_enabled}, "pass_config": {"enable_async_tp": async_tp_enabled},
} }
async_tp_env = tp_env = {
"VLLM_USE_V1": "1",
}
async_tp_args = [ async_tp_args = [
*common_args, *common_args,
"--tensor-parallel-size", "--tensor-parallel-size",
...@@ -410,6 +406,4 @@ def test_async_tp_pass_correctness( ...@@ -410,6 +406,4 @@ def test_async_tp_pass_correctness(
"mp", "mp",
] ]
compare_two_settings( compare_two_settings(model_id, async_tp_args, tp_args, method="generate")
model_id, async_tp_args, tp_args, async_tp_env, tp_env, method="generate"
)
...@@ -2,7 +2,6 @@ ...@@ -2,7 +2,6 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import pytest import pytest
import vllm
from vllm.compilation.counter import compilation_counter from vllm.compilation.counter import compilation_counter
from vllm.config import CompilationConfig, CUDAGraphMode, VllmConfig from vllm.config import CompilationConfig, CUDAGraphMode, VllmConfig
from vllm.utils import _is_torch_equal_or_newer from vllm.utils import _is_torch_equal_or_newer
...@@ -16,15 +15,10 @@ def test_version(): ...@@ -16,15 +15,10 @@ def test_version():
assert not _is_torch_equal_or_newer("2.7.1", "2.8.0.dev") assert not _is_torch_equal_or_newer("2.7.1", "2.8.0.dev")
def test_use_cudagraphs_dynamic(monkeypatch): def test_use_cudagraphs_dynamic():
assert vllm.envs.VLLM_USE_V1
vllm_config = VllmConfig() vllm_config = VllmConfig()
assert vllm_config.compilation_config.use_cudagraph assert vllm_config.compilation_config.use_cudagraph
monkeypatch.setenv("VLLM_USE_V1", "0")
vllm_config = VllmConfig()
assert not vllm_config.compilation_config.use_cudagraph
def test_custom_op(): def test_custom_op():
# proper syntax # proper syntax
...@@ -41,8 +35,6 @@ def test_custom_op(): ...@@ -41,8 +35,6 @@ def test_custom_op():
# may be influenced by other tests. # may be influenced by other tests.
@pytest.mark.parametrize("val", ["1"]) @pytest.mark.parametrize("val", ["1"])
def test_VLLM_DISABLE_COMPILE_CACHE(vllm_runner, monkeypatch, val): def test_VLLM_DISABLE_COMPILE_CACHE(vllm_runner, monkeypatch, val):
assert vllm.envs.VLLM_USE_V1
# Disable multiprocessing so that the counter is in the same process # Disable multiprocessing so that the counter is in the same process
monkeypatch.setenv("VLLM_ENABLE_V1_MULTIPROCESSING", "0") monkeypatch.setenv("VLLM_ENABLE_V1_MULTIPROCESSING", "0")
monkeypatch.setenv("VLLM_DISABLE_COMPILE_CACHE", val) monkeypatch.setenv("VLLM_DISABLE_COMPILE_CACHE", val)
...@@ -68,8 +60,6 @@ def test_VLLM_DISABLE_COMPILE_CACHE(vllm_runner, monkeypatch, val): ...@@ -68,8 +60,6 @@ def test_VLLM_DISABLE_COMPILE_CACHE(vllm_runner, monkeypatch, val):
@pytest.mark.forked @pytest.mark.forked
@pytest.mark.parametrize("enabled", [True, False]) @pytest.mark.parametrize("enabled", [True, False])
def test_use_cudagraphs(vllm_runner, monkeypatch, enabled): def test_use_cudagraphs(vllm_runner, monkeypatch, enabled):
assert vllm.envs.VLLM_USE_V1
# Disable multiprocessing so that the counter is in the same process # Disable multiprocessing so that the counter is in the same process
monkeypatch.setenv("VLLM_ENABLE_V1_MULTIPROCESSING", "0") monkeypatch.setenv("VLLM_ENABLE_V1_MULTIPROCESSING", "0")
......
...@@ -303,7 +303,6 @@ def test_attention_quant_pattern( ...@@ -303,7 +303,6 @@ def test_attention_quant_pattern(
model_class: type[AttentionQuantPatternModel], model_class: type[AttentionQuantPatternModel],
backend: _Backend, backend: _Backend,
use_inductor_graph_partition: bool, use_inductor_graph_partition: bool,
monkeypatch,
dist_init, dist_init,
caplog_vllm, caplog_vllm,
): ):
...@@ -312,8 +311,6 @@ def test_attention_quant_pattern( ...@@ -312,8 +311,6 @@ def test_attention_quant_pattern(
if use_inductor_graph_partition and not is_torch_equal_or_newer("2.9.0.dev"): if use_inductor_graph_partition and not is_torch_equal_or_newer("2.9.0.dev"):
pytest.skip("inductor graph partition is only available in PyTorch 2.9+") pytest.skip("inductor graph partition is only available in PyTorch 2.9+")
monkeypatch.setenv("VLLM_USE_V1", "1")
device = torch.device("cuda:0") device = torch.device("cuda:0")
torch.manual_seed(42) torch.manual_seed(42)
......
...@@ -8,16 +8,13 @@ from vllm.engine.arg_utils import AsyncEngineArgs ...@@ -8,16 +8,13 @@ from vllm.engine.arg_utils import AsyncEngineArgs
from vllm.v1.engine.async_llm import AsyncLLM from vllm.v1.engine.async_llm import AsyncLLM
def test_mp_reducer(monkeypatch): def test_mp_reducer():
""" """
Test that _reduce_config reducer is registered when AsyncLLM is instantiated Test that _reduce_config reducer is registered when AsyncLLM is instantiated
without transformers_modules. This is a regression test for without transformers_modules. This is a regression test for
https://github.com/vllm-project/vllm/pull/18640. https://github.com/vllm-project/vllm/pull/18640.
""" """
# Use V1 AsyncLLM which calls maybe_register_config_serialize_by_value
monkeypatch.setenv("VLLM_USE_V1", "1")
# Ensure transformers_modules is not in sys.modules # Ensure transformers_modules is not in sys.modules
if "transformers_modules" in sys.modules: if "transformers_modules" in sys.modules:
del sys.modules["transformers_modules"] del sys.modules["transformers_modules"]
......
...@@ -5,7 +5,7 @@ from typing import Any, Optional ...@@ -5,7 +5,7 @@ from typing import Any, Optional
import pytest import pytest
from vllm import LLM, SamplingParams, envs from vllm import LLM, SamplingParams
MODEL = "meta-llama/llama-2-7b-hf" MODEL = "meta-llama/llama-2-7b-hf"
MAX_TOKENS = 200 MAX_TOKENS = 200
...@@ -111,9 +111,7 @@ def _stop_token_id(llm): ...@@ -111,9 +111,7 @@ def _stop_token_id(llm):
@pytest.mark.skip_global_cleanup @pytest.mark.skip_global_cleanup
def test_stop_strings(): def test_stop_strings():
# If V0, must set enforce_eager=False since we use llm = LLM(MODEL, enforce_eager=True)
# async output processing below.
llm = LLM(MODEL, enforce_eager=envs.VLLM_USE_V1)
_stop_basic(llm) _stop_basic(llm)
_stop_multi_tokens(llm) _stop_multi_tokens(llm)
......
...@@ -42,24 +42,10 @@ class CPTestOptions(NamedTuple): ...@@ -42,24 +42,10 @@ class CPTestOptions(NamedTuple):
@dataclass @dataclass
class CPTestSettings: class CPTestSettings:
parallel_setups: list[ParallelSetup] parallel_setups: list[ParallelSetup]
# NOTE: the length of distributed_backends and
# vllm_major_versions should be the same, and they
# are first zipped together to iterate over all
# test settings.
distributed_backends: list[str] distributed_backends: list[str]
# vllm major version: "0" for V0, "1" for V1
vllm_major_versions: list[str]
runner: RunnerOption runner: RunnerOption
test_options: CPTestOptions test_options: CPTestOptions
def __post_init__(self):
if len(self.distributed_backends) != len(self.vllm_major_versions):
raise ValueError(
f"Length mismatch: distributed_backends "
f"({len(self.distributed_backends)}) != "
f"vllm_major_versions ({len(self.vllm_major_versions)})"
)
@staticmethod @staticmethod
def detailed( def detailed(
*, *,
...@@ -87,7 +73,6 @@ class CPTestSettings: ...@@ -87,7 +73,6 @@ class CPTestSettings:
return CPTestSettings( return CPTestSettings(
parallel_setups=parallel_setups, parallel_setups=parallel_setups,
distributed_backends=["mp"], distributed_backends=["mp"],
vllm_major_versions=["1"],
runner=runner, runner=runner,
test_options=CPTestOptions( test_options=CPTestOptions(
multi_node_only=multi_node_only, load_format=load_format multi_node_only=multi_node_only, load_format=load_format
...@@ -98,14 +83,11 @@ class CPTestSettings: ...@@ -98,14 +83,11 @@ class CPTestSettings:
opts = self.test_options opts = self.test_options
for parallel_setup in self.parallel_setups: for parallel_setup in self.parallel_setups:
for backend, vllm_major_version in zip( for backend in self.distributed_backends:
self.distributed_backends, self.vllm_major_versions
):
yield ( yield (
model_id, model_id,
parallel_setup, parallel_setup,
backend, backend,
vllm_major_version,
self.runner, self.runner,
opts, opts,
) )
...@@ -115,7 +97,6 @@ def _compare_cp_with_tp( ...@@ -115,7 +97,6 @@ def _compare_cp_with_tp(
model_id: str, model_id: str,
parallel_setup: ParallelSetup, parallel_setup: ParallelSetup,
distributed_backend: str, distributed_backend: str,
vllm_major_version: str,
runner: RunnerOption, runner: RunnerOption,
test_options: CPTestOptions, test_options: CPTestOptions,
num_gpus_available: int, num_gpus_available: int,
...@@ -191,10 +172,6 @@ def _compare_cp_with_tp( ...@@ -191,10 +172,6 @@ def _compare_cp_with_tp(
if hf_overrides: if hf_overrides:
common_args.extend(["--hf-overrides", json.dumps(hf_overrides)]) common_args.extend(["--hf-overrides", json.dumps(hf_overrides)])
cp_env = tp_env = {
"VLLM_USE_V1": vllm_major_version, # Note(hc): DCP only support V1 engine only
}
cp_args = [ cp_args = [
*common_args, *common_args,
"--tensor-parallel-size", "--tensor-parallel-size",
...@@ -217,24 +194,13 @@ def _compare_cp_with_tp( ...@@ -217,24 +194,13 @@ def _compare_cp_with_tp(
distributed_backend, distributed_backend,
] ]
try:
compare_two_settings( compare_two_settings(
model_id, model_id,
cp_args, cp_args,
tp_args, tp_args,
cp_env,
tp_env,
method=method, method=method,
max_wait_seconds=720, max_wait_seconds=720,
) )
except Exception:
testing_ray_compiled_graph = cp_env is not None
if testing_ray_compiled_graph and vllm_major_version == "0":
# Ray Compiled Graph tests are flaky for V0,
# so we don't want to fail the test
logger.exception("Ray Compiled Graph tests failed")
else:
raise
CP_TEXT_GENERATION_MODELS = { CP_TEXT_GENERATION_MODELS = {
...@@ -257,7 +223,6 @@ CP_TEST_MODELS = [ ...@@ -257,7 +223,6 @@ CP_TEST_MODELS = [
"model_id", "model_id",
"parallel_setup", "parallel_setup",
"distributed_backend", "distributed_backend",
"vllm_major_version",
"runner", "runner",
"test_options", "test_options",
), ),
...@@ -274,7 +239,6 @@ def test_cp_generation( ...@@ -274,7 +239,6 @@ def test_cp_generation(
model_id: str, model_id: str,
parallel_setup: ParallelSetup, parallel_setup: ParallelSetup,
distributed_backend: str, distributed_backend: str,
vllm_major_version: str,
runner: RunnerOption, runner: RunnerOption,
test_options: CPTestOptions, test_options: CPTestOptions,
num_gpus_available, num_gpus_available,
...@@ -283,7 +247,6 @@ def test_cp_generation( ...@@ -283,7 +247,6 @@ def test_cp_generation(
model_id, model_id,
parallel_setup, parallel_setup,
distributed_backend, distributed_backend,
vllm_major_version,
runner, runner,
test_options, test_options,
num_gpus_available, num_gpus_available,
......
...@@ -307,7 +307,6 @@ def _compare_tp( ...@@ -307,7 +307,6 @@ def _compare_tp(
if distributed_backend == "ray": if distributed_backend == "ray":
# For V1, test Ray Compiled Graph for all the tests # For V1, test Ray Compiled Graph for all the tests
pp_env = { pp_env = {
"VLLM_USE_V1": "1",
"VLLM_USE_RAY_COMPILED_DAG": "1", "VLLM_USE_RAY_COMPILED_DAG": "1",
"VLLM_USE_RAY_SPMD_WORKER": "1", "VLLM_USE_RAY_SPMD_WORKER": "1",
"VLLM_USE_RAY_COMPILED_DAG_NCCL_CHANNEL": "1", "VLLM_USE_RAY_COMPILED_DAG_NCCL_CHANNEL": "1",
...@@ -316,15 +315,11 @@ def _compare_tp( ...@@ -316,15 +315,11 @@ def _compare_tp(
# terminate because of a Ray Compiled Graph issue. # terminate because of a Ray Compiled Graph issue.
common_args.append("--disable-frontend-multiprocessing") common_args.append("--disable-frontend-multiprocessing")
elif distributed_backend == "mp": elif distributed_backend == "mp":
pp_env = { pp_env = None
"VLLM_USE_V1": "1",
}
else: else:
pp_env = None pp_env = None
tp_env = { tp_env = None
"VLLM_USE_V1": "1",
}
pp_args = [ pp_args = [
*common_args, *common_args,
......
...@@ -42,24 +42,10 @@ class SPTestOptions(NamedTuple): ...@@ -42,24 +42,10 @@ class SPTestOptions(NamedTuple):
@dataclass @dataclass
class SPTestSettings: class SPTestSettings:
parallel_setups: list[ParallelSetup] parallel_setups: list[ParallelSetup]
# NOTE: the length of distributed_backends and
# vllm_major_versions should be the same, and they
# are first zipped together to iterate over all
# test settings.
distributed_backends: list[str] distributed_backends: list[str]
# vllm major version: "0" for V0, "1" for V1
vllm_major_versions: list[str]
runner: RunnerOption runner: RunnerOption
test_options: SPTestOptions test_options: SPTestOptions
def __post_init__(self):
if len(self.distributed_backends) != len(self.vllm_major_versions):
raise ValueError(
f"Length mismatch: distributed_backends "
f"({len(self.distributed_backends)}) != "
f"vllm_major_versions ({len(self.vllm_major_versions)})"
)
@staticmethod @staticmethod
def detailed( def detailed(
*, *,
...@@ -85,7 +71,6 @@ class SPTestSettings: ...@@ -85,7 +71,6 @@ class SPTestSettings:
return SPTestSettings( return SPTestSettings(
parallel_setups=parallel_setups, parallel_setups=parallel_setups,
distributed_backends=["mp", "ray"], distributed_backends=["mp", "ray"],
vllm_major_versions=["1", "1"],
runner=runner, runner=runner,
test_options=SPTestOptions( test_options=SPTestOptions(
multi_node_only=multi_node_only, load_format=load_format multi_node_only=multi_node_only, load_format=load_format
...@@ -117,7 +102,6 @@ class SPTestSettings: ...@@ -117,7 +102,6 @@ class SPTestSettings:
return SPTestSettings( return SPTestSettings(
parallel_setups=parallel_setups, parallel_setups=parallel_setups,
distributed_backends=["mp", "ray"], distributed_backends=["mp", "ray"],
vllm_major_versions=["1", "1"],
runner=runner, runner=runner,
test_options=SPTestOptions( test_options=SPTestOptions(
multi_node_only=multi_node_only, load_format=load_format multi_node_only=multi_node_only, load_format=load_format
...@@ -147,7 +131,6 @@ class SPTestSettings: ...@@ -147,7 +131,6 @@ class SPTestSettings:
return SPTestSettings( return SPTestSettings(
parallel_setups=parallel_setups, parallel_setups=parallel_setups,
distributed_backends=["mp", "ray"], distributed_backends=["mp", "ray"],
vllm_major_versions=["1", "1"],
runner=runner, runner=runner,
test_options=SPTestOptions( test_options=SPTestOptions(
multi_node_only=multi_node_only, load_format=load_format multi_node_only=multi_node_only, load_format=load_format
...@@ -158,14 +141,11 @@ class SPTestSettings: ...@@ -158,14 +141,11 @@ class SPTestSettings:
opts = self.test_options opts = self.test_options
for parallel_setup in self.parallel_setups: for parallel_setup in self.parallel_setups:
for backend, vllm_major_version in zip( for backend in self.distributed_backends:
self.distributed_backends, self.vllm_major_versions
):
yield ( yield (
model_id, model_id,
parallel_setup, parallel_setup,
backend, backend,
vllm_major_version,
self.runner, self.runner,
opts, opts,
) )
...@@ -175,7 +155,6 @@ def _compare_sp( ...@@ -175,7 +155,6 @@ def _compare_sp(
model_id: str, model_id: str,
parallel_setup: ParallelSetup, parallel_setup: ParallelSetup,
distributed_backend: str, distributed_backend: str,
vllm_major_version: str,
runner: RunnerOption, runner: RunnerOption,
test_options: SPTestOptions, test_options: SPTestOptions,
num_gpus_available: int, num_gpus_available: int,
...@@ -265,10 +244,6 @@ def _compare_sp( ...@@ -265,10 +244,6 @@ def _compare_sp(
}, },
} }
tp_sp_env = tp_env = {
"VLLM_USE_V1": vllm_major_version,
}
tp_sp_args = [ tp_sp_args = [
*common_args, *common_args,
"--tensor-parallel-size", "--tensor-parallel-size",
...@@ -281,9 +256,6 @@ def _compare_sp( ...@@ -281,9 +256,6 @@ def _compare_sp(
json.dumps(compilation_config), json.dumps(compilation_config),
] ]
tp_env = {
"VLLM_USE_V1": vllm_major_version,
}
tp_args = [ tp_args = [
*common_args, *common_args,
"--tensor-parallel-size", "--tensor-parallel-size",
...@@ -292,18 +264,7 @@ def _compare_sp( ...@@ -292,18 +264,7 @@ def _compare_sp(
"mp", "mp",
] ]
try: compare_two_settings(model_id, tp_sp_args, tp_args, method=method)
compare_two_settings(
model_id, tp_sp_args, tp_args, tp_sp_env, tp_env, method=method
)
except Exception:
testing_ray_compiled_graph = tp_sp_env is not None
if testing_ray_compiled_graph and vllm_major_version == "0":
# Ray Compiled Graph tests are flaky for V0,
# so we don't want to fail the test
logger.exception("Ray Compiled Graph tests failed")
else:
raise
SP_TEXT_GENERATION_MODELS = { SP_TEXT_GENERATION_MODELS = {
...@@ -325,7 +286,6 @@ SP_TEST_MODELS = [ ...@@ -325,7 +286,6 @@ SP_TEST_MODELS = [
"model_id", "model_id",
"parallel_setup", "parallel_setup",
"distributed_backend", "distributed_backend",
"vllm_major_version",
"runner", "runner",
"test_options", "test_options",
), ),
...@@ -341,7 +301,6 @@ def test_tp_sp_generation( ...@@ -341,7 +301,6 @@ def test_tp_sp_generation(
model_id: str, model_id: str,
parallel_setup: ParallelSetup, parallel_setup: ParallelSetup,
distributed_backend: str, distributed_backend: str,
vllm_major_version: str,
runner: RunnerOption, runner: RunnerOption,
test_options: SPTestOptions, test_options: SPTestOptions,
num_gpus_available, num_gpus_available,
...@@ -350,7 +309,6 @@ def test_tp_sp_generation( ...@@ -350,7 +309,6 @@ def test_tp_sp_generation(
model_id, model_id,
parallel_setup, parallel_setup,
distributed_backend, distributed_backend,
vllm_major_version,
runner, runner,
test_options, test_options,
num_gpus_available, num_gpus_available,
......
...@@ -61,17 +61,10 @@ def run_test(model_name, more_args=None): ...@@ -61,17 +61,10 @@ def run_test(model_name, more_args=None):
TPU_TP_TEST_STR = "" # "tensor_parallel_size=4" TPU_TP_TEST_STR = "" # "tensor_parallel_size=4"
@pytest.mark.skipif(
not current_platform.is_cuda() and not current_platform.is_tpu(),
reason="V1 is currently only supported on CUDA and TPU",
)
@pytest.mark.parametrize("model", MODEL_NAMES) @pytest.mark.parametrize("model", MODEL_NAMES)
def test_lm_eval_accuracy_v1_engine(model, monkeypatch: pytest.MonkeyPatch): def test_lm_eval_accuracy_v1_engine(model):
"""Run with the V1 Engine.""" """Run with the V1 Engine."""
with monkeypatch.context() as m:
m.setenv("VLLM_USE_V1", "1")
more_args = None more_args = None
if current_platform.is_tpu(): if current_platform.is_tpu():
# Limit compilation time for TPU V1 # Limit compilation time for TPU V1
...@@ -85,19 +78,10 @@ def test_lm_eval_accuracy_v1_engine(model, monkeypatch: pytest.MonkeyPatch): ...@@ -85,19 +78,10 @@ def test_lm_eval_accuracy_v1_engine(model, monkeypatch: pytest.MonkeyPatch):
run_test(model, more_args) run_test(model, more_args)
@pytest.mark.skipif(
not current_platform.is_cuda() and not current_platform.is_tpu(),
reason="V1 is currently only supported on CUDA and TPU",
)
@pytest.mark.parametrize("model", FP8_KV_MODEL_NAMES) @pytest.mark.parametrize("model", FP8_KV_MODEL_NAMES)
def test_lm_eval_accuracy_v1_engine_fp8_kv_cache( def test_lm_eval_accuracy_v1_engine_fp8_kv_cache(model):
model, monkeypatch: pytest.MonkeyPatch
):
"""Run with the V1 Engine.""" """Run with the V1 Engine."""
with monkeypatch.context() as m:
m.setenv("VLLM_USE_V1", "1")
more_args = None more_args = None
if current_platform.is_tpu(): if current_platform.is_tpu():
# Limit compilation time for TPU V1 # Limit compilation time for TPU V1
......
...@@ -10,7 +10,6 @@ AsyncLLMEngine are working correctly. ...@@ -10,7 +10,6 @@ AsyncLLMEngine are working correctly.
""" """
import lm_eval import lm_eval
import pytest
from vllm.platforms import current_platform from vllm.platforms import current_platform
...@@ -67,17 +66,9 @@ def run_test(more_args): ...@@ -67,17 +66,9 @@ def run_test(more_args):
), f"Expected: {EXPECTED_VALUE} | Measured: {measured_value}" ), f"Expected: {EXPECTED_VALUE} | Measured: {measured_value}"
@pytest.mark.skipif( def test_lm_eval_accuracy_v1_engine():
not current_platform.is_cuda()
and not current_platform.is_tpu()
and not current_platform.is_xpu(),
reason="V1 currently only supported on CUDA, XPU and TPU",
)
def test_lm_eval_accuracy_v1_engine(monkeypatch: pytest.MonkeyPatch):
"""Run with the V1 Engine.""" """Run with the V1 Engine."""
with monkeypatch.context() as m:
m.setenv("VLLM_USE_V1", "1")
more_args = [] more_args = []
# Limit compilation time for V1 # Limit compilation time for V1
......
...@@ -21,18 +21,7 @@ MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta" ...@@ -21,18 +21,7 @@ MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"
@pytest.fixture(scope="module") @pytest.fixture(scope="module")
def monkeypatch_module(): def server(zephyr_lora_files): # noqa: F811
from _pytest.monkeypatch import MonkeyPatch
mpatch = MonkeyPatch()
yield mpatch
mpatch.undo()
@pytest.fixture(scope="module")
def server(monkeypatch_module, zephyr_lora_files): # noqa: F811
monkeypatch_module.setenv("VLLM_USE_V1", "1")
args = [ args = [
# use half precision for speed and memory savings in CI environment # use half precision for speed and memory savings in CI environment
"--dtype", "--dtype",
......
...@@ -37,21 +37,8 @@ BADREQUEST_CASES = [ ...@@ -37,21 +37,8 @@ BADREQUEST_CASES = [
] ]
@pytest.fixture(scope="module")
def monkeypatch_module():
from _pytest.monkeypatch import MonkeyPatch
mpatch = MonkeyPatch()
yield mpatch
mpatch.undo()
@pytest.fixture(scope="module", params=[True]) @pytest.fixture(scope="module", params=[True])
def server_with_lora_modules_json(request, monkeypatch_module, zephyr_lora_files): def server_with_lora_modules_json(request, zephyr_lora_files):
use_v1 = request.param
assert use_v1
monkeypatch_module.setenv("VLLM_USE_V1", "1")
# Define the json format LoRA module configurations # Define the json format LoRA module configurations
lora_module_1 = { lora_module_1 = {
"name": "zephyr-lora", "name": "zephyr-lora",
......
...@@ -22,24 +22,6 @@ MODEL_NAME = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" ...@@ -22,24 +22,6 @@ MODEL_NAME = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
PREV_MINOR_VERSION = version._prev_minor_version() PREV_MINOR_VERSION = version._prev_minor_version()
@pytest.fixture(scope="module", params=[True])
def use_v1(request):
# Module-scoped variant of run_with_both_engines
#
# Use this fixture to run a test with both v0 and v1, and
# also to conditionalize the test logic e.g.
#
# def test_metrics_exist(use_v1, server, client):
# ...
# expected = EXPECTED_V1_METRICS if use_v1 else EXPECTED_METRICS
# for metric in expected:
# assert metric in response.text
#
# @skip_v1 wouldn't work here because this is a module-level
# fixture - per-function decorators would have no effect
yield request.param
@pytest.fixture(scope="module") @pytest.fixture(scope="module")
def default_server_args(): def default_server_args():
return [ return [
...@@ -63,13 +45,11 @@ def default_server_args(): ...@@ -63,13 +45,11 @@ def default_server_args():
f"--show-hidden-metrics-for-version={PREV_MINOR_VERSION}", f"--show-hidden-metrics-for-version={PREV_MINOR_VERSION}",
], ],
) )
def server(use_v1, default_server_args, request): def server(default_server_args, request):
if request.param: if request.param:
default_server_args.append(request.param) default_server_args.append(request.param)
env_dict = dict(VLLM_USE_V1="1" if use_v1 else "0")
with RemoteOpenAIServer( with RemoteOpenAIServer(MODEL_NAME, default_server_args) as remote_server:
MODEL_NAME, default_server_args, env_dict=env_dict
) as remote_server:
yield remote_server yield remote_server
...@@ -129,7 +109,8 @@ EXPECTED_VALUES = { ...@@ -129,7 +109,8 @@ EXPECTED_VALUES = {
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_metrics_counts( async def test_metrics_counts(
server: RemoteOpenAIServer, client: openai.AsyncClient, use_v1: bool server: RemoteOpenAIServer,
client: openai.AsyncClient,
): ):
for _ in range(_NUM_REQUESTS): for _ in range(_NUM_REQUESTS):
# sending a request triggers the metrics to be logged. # sending a request triggers the metrics to be logged.
...@@ -145,7 +126,7 @@ async def test_metrics_counts( ...@@ -145,7 +126,7 @@ async def test_metrics_counts(
# Loop over all expected metric_families # Loop over all expected metric_families
for metric_family, suffix_values_list in EXPECTED_VALUES.items(): for metric_family, suffix_values_list in EXPECTED_VALUES.items():
if (use_v1 and metric_family not in EXPECTED_METRICS_V1) or ( if (metric_family not in EXPECTED_METRICS_V1) or (
not server.show_hidden_metrics not server.show_hidden_metrics
and metric_family in HIDDEN_DEPRECATED_METRICS and metric_family in HIDDEN_DEPRECATED_METRICS
): ):
...@@ -183,62 +164,6 @@ async def test_metrics_counts( ...@@ -183,62 +164,6 @@ async def test_metrics_counts(
assert found_metric, f"Did not find {metric_family} in prom endpoint" assert found_metric, f"Did not find {metric_family} in prom endpoint"
EXPECTED_METRICS = [
"vllm:num_requests_running",
"vllm:num_requests_waiting",
"vllm:gpu_cache_usage_perc",
"vllm:time_to_first_token_seconds_sum",
"vllm:time_to_first_token_seconds_bucket",
"vllm:time_to_first_token_seconds_count",
"vllm:time_per_output_token_seconds_sum",
"vllm:time_per_output_token_seconds_bucket",
"vllm:time_per_output_token_seconds_count",
"vllm:e2e_request_latency_seconds_sum",
"vllm:e2e_request_latency_seconds_bucket",
"vllm:e2e_request_latency_seconds_count",
"vllm:request_queue_time_seconds_sum",
"vllm:request_queue_time_seconds_bucket",
"vllm:request_queue_time_seconds_count",
"vllm:request_inference_time_seconds_sum",
"vllm:request_inference_time_seconds_bucket",
"vllm:request_inference_time_seconds_count",
"vllm:request_prefill_time_seconds_sum",
"vllm:request_prefill_time_seconds_bucket",
"vllm:request_prefill_time_seconds_count",
"vllm:request_decode_time_seconds_sum",
"vllm:request_decode_time_seconds_bucket",
"vllm:request_decode_time_seconds_count",
"vllm:request_prompt_tokens_sum",
"vllm:request_prompt_tokens_bucket",
"vllm:request_prompt_tokens_count",
"vllm:request_generation_tokens_sum",
"vllm:request_generation_tokens_bucket",
"vllm:request_generation_tokens_count",
"vllm:request_params_n_sum",
"vllm:request_params_n_bucket",
"vllm:request_params_n_count",
"vllm:request_params_max_tokens_sum",
"vllm:request_params_max_tokens_bucket",
"vllm:request_params_max_tokens_count",
"vllm:iteration_tokens_total",
"vllm:num_preemptions_total",
"vllm:prompt_tokens_total",
"vllm:generation_tokens_total",
"vllm:request_success_total",
"vllm:cache_config_info",
# labels in cache_config_info
"block_size",
"cache_dtype",
"cpu_offload_gb",
"enable_prefix_caching",
"gpu_memory_utilization",
"num_cpu_blocks",
"num_gpu_blocks",
"num_gpu_blocks_override",
"sliding_window",
"swap_space_bytes",
]
EXPECTED_METRICS_V1 = [ EXPECTED_METRICS_V1 = [
"vllm:num_requests_running", "vllm:num_requests_running",
"vllm:num_requests_waiting", "vllm:num_requests_waiting",
...@@ -304,17 +229,21 @@ HIDDEN_DEPRECATED_METRICS: list[str] = [ ...@@ -304,17 +229,21 @@ HIDDEN_DEPRECATED_METRICS: list[str] = [
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_metrics_exist( async def test_metrics_exist(
server: RemoteOpenAIServer, client: openai.AsyncClient, use_v1: bool server: RemoteOpenAIServer,
client: openai.AsyncClient,
): ):
# sending a request triggers the metrics to be logged. # sending a request triggers the metrics to be logged.
await client.completions.create( await client.completions.create(
model=MODEL_NAME, prompt="Hello, my name is", max_tokens=5, temperature=0.0 model=MODEL_NAME,
prompt="Hello, my name is",
max_tokens=5,
temperature=0.0,
) )
response = requests.get(server.url_for("metrics")) response = requests.get(server.url_for("metrics"))
assert response.status_code == HTTPStatus.OK assert response.status_code == HTTPStatus.OK
for metric in EXPECTED_METRICS_V1 if use_v1 else EXPECTED_METRICS: for metric in EXPECTED_METRICS_V1:
if metric in HIDDEN_DEPRECATED_METRICS and not server.show_hidden_metrics: if metric in HIDDEN_DEPRECATED_METRICS and not server.show_hidden_metrics:
continue continue
assert metric in response.text assert metric in response.text
...@@ -322,10 +251,11 @@ async def test_metrics_exist( ...@@ -322,10 +251,11 @@ async def test_metrics_exist(
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_abort_metrics_reset( async def test_abort_metrics_reset(
server: RemoteOpenAIServer, client: openai.AsyncClient, use_v1: bool server: RemoteOpenAIServer,
client: openai.AsyncClient,
): ):
running_requests, waiting_requests, kv_cache_usage = _get_running_metrics_from_api( running_requests, waiting_requests, kv_cache_usage = _get_running_metrics_from_api(
server, use_v1 server
) )
# Expect no running requests or kvcache usage # Expect no running requests or kvcache usage
...@@ -351,7 +281,7 @@ async def test_abort_metrics_reset( ...@@ -351,7 +281,7 @@ async def test_abort_metrics_reset(
# Check that we have running requests # Check that we have running requests
running_requests, waiting_requests, kv_cache_usage = _get_running_metrics_from_api( running_requests, waiting_requests, kv_cache_usage = _get_running_metrics_from_api(
server, use_v1 server
) )
# Expect running requests and kvcache usage # Expect running requests and kvcache usage
...@@ -371,7 +301,7 @@ async def test_abort_metrics_reset( ...@@ -371,7 +301,7 @@ async def test_abort_metrics_reset(
# Verify running and waiting requests counts and KV cache usage are zero # Verify running and waiting requests counts and KV cache usage are zero
running_requests_after, waiting_requests_after, kv_cache_usage_after = ( running_requests_after, waiting_requests_after, kv_cache_usage_after = (
_get_running_metrics_from_api(server, use_v1) _get_running_metrics_from_api(server)
) )
assert running_requests_after == 0, ( assert running_requests_after == 0, (
...@@ -385,7 +315,7 @@ async def test_abort_metrics_reset( ...@@ -385,7 +315,7 @@ async def test_abort_metrics_reset(
) )
def _get_running_metrics_from_api(server: RemoteOpenAIServer, use_v1: bool): def _get_running_metrics_from_api(server: RemoteOpenAIServer):
"""Return (running_count, waiting_count, kv_cache_usage)""" """Return (running_count, waiting_count, kv_cache_usage)"""
response = requests.get(server.url_for("metrics")) response = requests.get(server.url_for("metrics"))
...@@ -394,9 +324,7 @@ def _get_running_metrics_from_api(server: RemoteOpenAIServer, use_v1: bool): ...@@ -394,9 +324,7 @@ def _get_running_metrics_from_api(server: RemoteOpenAIServer, use_v1: bool):
# Verify running and waiting requests counts and KV cache usage are zero # Verify running and waiting requests counts and KV cache usage are zero
running_requests, waiting_requests, kv_cache_usage = None, None, None running_requests, waiting_requests, kv_cache_usage = None, None, None
kv_cache_usage_metric = ( kv_cache_usage_metric = "vllm:kv_cache_usage_perc"
"vllm:kv_cache_usage_perc" if use_v1 else "vllm:gpu_cache_usage_perc"
)
for family in text_string_to_metric_families(response.text): for family in text_string_to_metric_families(response.text):
if family.name == "vllm:num_requests_running": if family.name == "vllm:num_requests_running":
...@@ -422,7 +350,7 @@ def _get_running_metrics_from_api(server: RemoteOpenAIServer, use_v1: bool): ...@@ -422,7 +350,7 @@ def _get_running_metrics_from_api(server: RemoteOpenAIServer, use_v1: bool):
return running_requests, waiting_requests, kv_cache_usage return running_requests, waiting_requests, kv_cache_usage
def test_metrics_exist_run_batch(use_v1: bool): def test_metrics_exist_run_batch():
input_batch = """{"custom_id": "request-0", "method": "POST", "url": "/v1/embeddings", "body": {"model": "intfloat/multilingual-e5-small", "input": "You are a helpful assistant."}}""" # noqa: E501 input_batch = """{"custom_id": "request-0", "method": "POST", "url": "/v1/embeddings", "body": {"model": "intfloat/multilingual-e5-small", "input": "You are a helpful assistant."}}""" # noqa: E501
base_url = "0.0.0.0" base_url = "0.0.0.0"
...@@ -452,7 +380,6 @@ def test_metrics_exist_run_batch(use_v1: bool): ...@@ -452,7 +380,6 @@ def test_metrics_exist_run_batch(use_v1: bool):
"--port", "--port",
port, port,
], ],
env={"VLLM_USE_V1": "1"},
) )
def is_server_up(url): def is_server_up(url):
......
...@@ -15,11 +15,6 @@ from vllm.entrypoints.renderer import BaseRenderer ...@@ -15,11 +15,6 @@ from vllm.entrypoints.renderer import BaseRenderer
from ...utils import RemoteOpenAIServer from ...utils import RemoteOpenAIServer
@pytest.fixture(scope="function", autouse=True)
def use_v1_only(monkeypatch):
monkeypatch.setenv("VLLM_USE_V1", "1")
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_empty_prompt(): async def test_empty_prompt():
model_name = "gpt2" model_name = "gpt2"
......
...@@ -80,7 +80,6 @@ def test_env( ...@@ -80,7 +80,6 @@ def test_env(
): ):
"""Test attention backend selection with valid device-backend pairs.""" """Test attention backend selection with valid device-backend pairs."""
with monkeypatch.context() as m: with monkeypatch.context() as m:
m.setenv("VLLM_USE_V1", "1")
m.setenv(STR_BACKEND_ENV_VAR, name) m.setenv(STR_BACKEND_ENV_VAR, name)
m.setenv("VLLM_MLA_DISABLE", "1" if use_mla else "0") m.setenv("VLLM_MLA_DISABLE", "1" if use_mla else "0")
...@@ -212,14 +211,8 @@ def test_env( ...@@ -212,14 +211,8 @@ def test_env(
@pytest.mark.parametrize("device", ["cpu", "cuda"]) @pytest.mark.parametrize("device", ["cpu", "cuda"])
def test_fp32_fallback( def test_fp32_fallback(device: str):
device: str,
monkeypatch: pytest.MonkeyPatch,
):
"""Test attention backend selection with fp32.""" """Test attention backend selection with fp32."""
with monkeypatch.context() as m:
m.setenv("VLLM_USE_V1", "1")
if device == "cpu": if device == "cpu":
with patch("vllm.attention.selector.current_platform", CpuPlatform()): with patch("vllm.attention.selector.current_platform", CpuPlatform()):
backend = get_attn_backend(16, torch.float32, None, 16) backend = get_attn_backend(16, torch.float32, None, 16)
...@@ -233,9 +226,6 @@ def test_fp32_fallback( ...@@ -233,9 +226,6 @@ def test_fp32_fallback(
def test_flash_attn(monkeypatch: pytest.MonkeyPatch): def test_flash_attn(monkeypatch: pytest.MonkeyPatch):
"""Test FlashAttn validation.""" """Test FlashAttn validation."""
# TODO: When testing for v1, pipe in `use_v1` as an argument to
# get_attn_backend
pytest.skip( pytest.skip(
"Skipping as current backend selector does not " "Skipping as current backend selector does not "
"handle fallbacks when a backend is set via env var." "handle fallbacks when a backend is set via env var."
...@@ -289,7 +279,6 @@ def test_invalid_env(monkeypatch: pytest.MonkeyPatch): ...@@ -289,7 +279,6 @@ def test_invalid_env(monkeypatch: pytest.MonkeyPatch):
monkeypatch.context() as m, monkeypatch.context() as m,
patch("vllm.attention.selector.current_platform", CudaPlatform()), patch("vllm.attention.selector.current_platform", CudaPlatform()),
): ):
m.setenv("VLLM_USE_V1", "1")
m.setenv(STR_BACKEND_ENV_VAR, STR_INVALID_VAL) m.setenv(STR_BACKEND_ENV_VAR, STR_INVALID_VAL)
# Should raise ValueError for invalid backend # Should raise ValueError for invalid backend
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment