update DCU info and skip tests same to nv

129fce94 · zhuwenwen · 3f78216a · 129fce94 · 129fce94 · 129fce94
Commit 129fce94 authored Dec 05, 2024 by zhuwenwen
8 changed files
--- a/tests/kernels/test_cache.py
+++ b/tests/kernels/test_cache.py
@@ -6,8 +6,7 @@ import torch
 from tests.kernels.utils import DEFAULT_OPCHECK_TEST_UTILS
 from vllm import _custom_ops as ops
-from vllm.utils import seed_everything
+from vllm.utils import seed_everything, is_hip
-from vllm.utils import is_hip
 from .utils import torch_version
 COPYING_DIRECTION = [('cuda', 'cpu'), ('cuda', 'cuda'), ('cpu', 'cuda')]

--- a/tests/models/decoder_only/vision_language/test_llava_next_video.py
+++ b/tests/models/decoder_only/vision_language/test_llava_next_video.py
@@ -12,6 +12,7 @@ from vllm.sequence import SampleLogprobs
 from ....conftest import VIDEO_ASSETS, HfRunner, VllmRunner, _VideoAssets
 from ...utils import check_logprobs_close
 from ....utils import models_path_prefix
+from vllm.utils import is_hip
 _PREFACE = (
    "A chat between a curious human and an artificial intelligence assistant. "
@@ -160,12 +161,11 @@ def run_test(
        )
-@pytest.mark.skipif(transformers.__version__ < "4.45",
+def get_size_factors():
-                    reason="Waiting for next transformers release")
+    if is_hip():
-@pytest.mark.parametrize("model", models)
+        return [[],]  
-@pytest.mark.parametrize(
+    else:
-    "size_factors",
+        return [
-    [
            # No video
            [],
            # Single-scale
@@ -175,6 +175,14 @@ def run_test(
            # Multi-scale
            [0.25, 0.5, 1.0],
        ],
+@pytest.mark.skipif(transformers.__version__ < "4.45",
+                    reason="Waiting for next transformers release")
+@pytest.mark.parametrize("model", models)
+@pytest.mark.parametrize(
+    "size_factors",
+    get_size_factors()
 )
 @pytest.mark.parametrize("dtype", ["half"])
 @pytest.mark.parametrize("max_tokens", [128])
@@ -205,6 +213,8 @@ def test_models(hf_runner, vllm_runner, video_assets, model, size_factors,
    )
+@pytest.mark.skipif(is_hip(),
+                    reason="Consistent with NV.")
 @pytest.mark.skipif(transformers.__version__ < "4.45",
                    reason="Waiting for next transformers release")
 @pytest.mark.parametrize("model", models)

--- a/tests/models/decoder_only/vision_language/test_paligemma.py
+++ b/tests/models/decoder_only/vision_language/test_paligemma.py
@@ -125,10 +125,11 @@ def run_test(
        )
-@pytest.mark.parametrize("model", models)
+def get_size_factors():
-@pytest.mark.parametrize(
+    if is_hip():
-    "size_factors",
+        return [[],]  
-    [
+    else:
+        return [
            # No image
            [],
            # Single-scale
@@ -137,7 +138,12 @@ def run_test(
            [1.0, 1.0, 1.0],
            # Multi-scale
            [0.25, 0.5, 1.0],  
-    ],
+        ]
+@pytest.mark.parametrize("model", models)
+@pytest.mark.parametrize(
+    "size_factors",
+    get_size_factors()
 )
 @pytest.mark.parametrize("dtype", [
    pytest.param(

--- a/tests/samplers/test_rejection_sampler.py
+++ b/tests/samplers/test_rejection_sampler.py
@@ -201,6 +201,8 @@ def test_deterministic_when_seeded(k: int, vocab_size: int, batch_size: int,
                assert torch.equal(results[j][i], results[0][i])
+@pytest.mark.skipif(is_hip(),
+                    reason="Consistent with NV.")
 @pytest.mark.parametrize("k", [1, 3, 6])
 @pytest.mark.parametrize("vocab_size", [30_000, 50_000])
 @pytest.mark.parametrize("batch_size", [1, 8, 32, 128])

--- a/tests/tokenization/test_get_eos.py
+++ b/tests/tokenization/test_get_eos.py
@@ -7,8 +7,12 @@ from vllm.transformers_utils.config import try_get_generation_config
 from vllm.transformers_utils.tokenizer import get_tokenizer
 from ..utils import models_path_prefix
 import os
+import pytest
+from vllm.utils import is_hip
+@pytest.mark.skipif(is_hip(),
+                    reason="Consistent with NV.")
 def test_get_llama3_eos_token():
    model_name = os.path.join(models_path_prefix, "meta-llama/Meta-Llama-3-8B-Instruct")

--- a/vllm/model_executor/layers/fused_moe/configs/E=64,N=1408,device_name=DCU_K100_AI.json
+++ b/vllm/model_executor/layers/fused_moe/configs/E=64,N=1408,device_name=DCU_K100_AI.json
-{
-    "1": {
-        "BLOCK_SIZE_M": 16,
-        "BLOCK_SIZE_N": 32,
-        "BLOCK_SIZE_K": 128,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 8,
-        "num_stages": 2
-    },
-    "2": {
-        "BLOCK_SIZE_M": 16,
-        "BLOCK_SIZE_N": 32,
-        "BLOCK_SIZE_K": 128,
-        "GROUP_SIZE_M": 6,
-        "num_warps": 8,
-        "num_stages": 2
-    },
-    "4": {
-        "BLOCK_SIZE_M": 16,
-        "BLOCK_SIZE_N": 32,
-        "BLOCK_SIZE_K": 128,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 8,
-        "num_stages": 2
-    },
-    "8": {
-        "BLOCK_SIZE_M": 16,
-        "BLOCK_SIZE_N": 32,
-        "BLOCK_SIZE_K": 128,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 8,
-        "num_stages": 2
-    },
-    "16": {
-        "BLOCK_SIZE_M": 16,
-        "BLOCK_SIZE_N": 32,
-        "BLOCK_SIZE_K": 128,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 8,
-        "num_stages": 2
-    },
-    "32": {
-        "BLOCK_SIZE_M": 16,
-        "BLOCK_SIZE_N": 32,
-        "BLOCK_SIZE_K": 128,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 8,
-        "num_stages": 2
-    },
-    "1024": {
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "BLOCK_SIZE_K": 128,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4,
-        "num_stages": 2
-    },
-    "2048": {
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "BLOCK_SIZE_K": 128,
-        "GROUP_SIZE_M": 4,
-        "num_warps": 4,
-        "num_stages": 2
-    },
-    "4096": {
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "BLOCK_SIZE_K": 128,
-        "GROUP_SIZE_M": 6,
-        "num_warps": 4,
-        "num_stages": 1
-    },
-    "6144": {
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "BLOCK_SIZE_K": 128,
-        "GROUP_SIZE_M": 6,
-        "num_warps": 8,
-        "num_stages": 1
-    },
-    "8192": {
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "BLOCK_SIZE_K": 128,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4,
-        "num_stages": 1
-    },
-    "12288": {
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "BLOCK_SIZE_K": 128,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 8,
-        "num_stages": 1
-    },
-    "16384": {
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "BLOCK_SIZE_K": 128,
-        "GROUP_SIZE_M": 4,
-        "num_warps": 4,
-        "num_stages": 1
-    },
-    "32786": {
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "BLOCK_SIZE_K": 128,
-        "GROUP_SIZE_M": 6,
-        "num_warps": 4,
-        "num_stages": 1
-    }
-}
--- a/vllm/model_executor/layers/fused_moe/configs/E=64,N=704,device_name=DCU_K100_AI.json
+++ b/vllm/model_executor/layers/fused_moe/configs/E=64,N=704,device_name=DCU_K100_AI.json
-{
-    "1": {
-        "BLOCK_SIZE_M": 16,
-        "BLOCK_SIZE_N": 64,
-        "BLOCK_SIZE_K": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4,
-        "num_stages": 2
-    },
-    "2": {
-        "BLOCK_SIZE_M": 16,
-        "BLOCK_SIZE_N": 32,
-        "BLOCK_SIZE_K": 128,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 8,
-        "num_stages": 2
-    },
-    "4": {
-        "BLOCK_SIZE_M": 16,
-        "BLOCK_SIZE_N": 32,
-        "BLOCK_SIZE_K": 128,
-        "GROUP_SIZE_M": 4,
-        "num_warps": 8,
-        "num_stages": 2
-    },
-    "8": {
-        "BLOCK_SIZE_M": 16,
-        "BLOCK_SIZE_N": 32,
-        "BLOCK_SIZE_K": 128,
-        "GROUP_SIZE_M": 4,
-        "num_warps": 8,
-        "num_stages": 2
-    },
-    "16": {
-        "BLOCK_SIZE_M": 16,
-        "BLOCK_SIZE_N": 32,
-        "BLOCK_SIZE_K": 128,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 8,
-        "num_stages": 2
-    },
-    "32": {
-        "BLOCK_SIZE_M": 16,
-        "BLOCK_SIZE_N": 32,
-        "BLOCK_SIZE_K": 128,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 8,
-        "num_stages": 2
-    },
-    "1024": {
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "BLOCK_SIZE_K": 128,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4,
-        "num_stages": 2
-    },
-    "2048": {
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "BLOCK_SIZE_K": 128,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4,
-        "num_stages": 2
-    },
-    "4096": {
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "BLOCK_SIZE_K": 128,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4,
-        "num_stages": 1
-    },
-    "6144": {
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "BLOCK_SIZE_K": 128,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4,
-        "num_stages": 1
-    },
-    "8192": {
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "BLOCK_SIZE_K": 128,
-        "GROUP_SIZE_M": 4,
-        "num_warps": 4,
-        "num_stages": 1
-    },
-    "12288": {
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "BLOCK_SIZE_K": 128,
-        "GROUP_SIZE_M": 6,
-        "num_warps": 4,
-        "num_stages": 1
-    },
-    "16384": {
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "BLOCK_SIZE_K": 128,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4,
-        "num_stages": 1
-    },
-    "32786": {
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "BLOCK_SIZE_K": 128,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4,
-        "num_stages": 1
-    }
-}
--- a/vllm/utils.py
+++ b/vllm/utils.py
@@ -1418,5 +1418,5 @@ class W8a8GetCacheJSON:
    def get_w8a8json_name(self,n,k):
        device_name = current_platform.get_device_name().replace(" ", "_")
-        return self.triton_json_dir+f"/W8A8_{n}_{k}_DCU{device_name}.json"
+        return self.triton_json_dir+f"/W8A8_{n}_{k}_HCU{device_name}.json"
\ No newline at end of file