remove unused code

84dfdb17 · zhuwenwen · f137e58c · f137e58c · f137e58c · f137e58c
Commit 84dfdb17 authored Jul 31, 2025 by zhuwenwen
5 changed files
--- a/tests/spec_decode/test_ngram_worker.py
+++ b/tests/spec_decode/test_ngram_worker.py
--- a/tests/spec_decode/test_scorer.py
+++ b/tests/spec_decode/test_scorer.py
--- a/tests/spec_decode/test_spec_decode_worker.py
+++ b/tests/spec_decode/test_spec_decode_worker.py
--- a/tests/test_regression.py
+++ b/tests/test_regression.py
@@ -39,7 +39,7 @@ def test_max_tokens_none():
    sampling_params = SamplingParams(temperature=0.01,
                                     top_p=0.1,
                                     max_tokens=None)
-    if gpuname.startswith('BW') and envs.VLLM_FLASH_ATTN_BACKEND:
+    if gpuname.startswith('BW'):
        llm = LLM(model=os.path.join(models_path_prefix, "distilbert/distilgpt2"),
                max_num_batched_tokens=4096,
                tensor_parallel_size=1,
@@ -75,7 +75,7 @@ def test_model_from_modelscope(monkeypatch: pytest.MonkeyPatch):
        # Don't use HF_TOKEN for ModelScope repos, otherwise it will fail
        # with 400 Client Error: Bad Request.
        m.setenv("HF_TOKEN", "")
-        if gpuname.startswith('BW') and envs.VLLM_FLASH_ATTN_BACKEND:
+        if envs.VLLM_USE_FLASH_ATTN_PA:
            llm = LLM(model=os.path.join(models_path_prefix, "qwen/Qwen1.5-0.5B-Chat"), block_size=64)
        else:
            llm = LLM(model=os.path.join(models_path_prefix, "qwen/Qwen1.5-0.5B-Chat"))

--- a/tests/utils.py
+++ b/tests/utils.py
@@ -769,36 +769,6 @@ def fork_new_process_for_each_test(
    return wrapper
-def large_gpu_test(*, min_gb: int):
-    """
-    Decorate a test to be skipped if no GPU is available or it does not have
-    sufficient memory.
-    Currently, the CI machine uses L4 GPU which has 24 GB VRAM.
-    """
-    try:
-        if current_platform.is_cpu():
-            memory_gb = 0
-        else:
-            memory_gb = current_platform.get_device_total_memory() / GB_bytes
-    except Exception as e:
-        warnings.warn(
-            f"An error occurred when finding the available memory: {e}",
-            stacklevel=2,
-        )
-        memory_gb = 0
-    test_skipif = pytest.mark.skipif(
-        memory_gb < min_gb,
-        reason=f"Need at least {memory_gb}GB GPU memory to run the test.",
-    )
-    def wrapper(f: Callable[_P, None]) -> Callable[_P, None]:
-        return test_skipif(fork_new_process_for_each_test(f))
-    return wrapper
 def spawn_new_process_for_each_test(
        f: Callable[_P, None]) -> Callable[_P, None]: