Commit 129fce94 authored by zhuwenwen's avatar zhuwenwen
Browse files

update DCU info and skip tests same to nv

parent 3f78216a
...@@ -6,8 +6,7 @@ import torch ...@@ -6,8 +6,7 @@ import torch
from tests.kernels.utils import DEFAULT_OPCHECK_TEST_UTILS from tests.kernels.utils import DEFAULT_OPCHECK_TEST_UTILS
from vllm import _custom_ops as ops from vllm import _custom_ops as ops
from vllm.utils import seed_everything from vllm.utils import seed_everything, is_hip
from vllm.utils import is_hip
from .utils import torch_version from .utils import torch_version
COPYING_DIRECTION = [('cuda', 'cpu'), ('cuda', 'cuda'), ('cpu', 'cuda')] COPYING_DIRECTION = [('cuda', 'cpu'), ('cuda', 'cuda'), ('cpu', 'cuda')]
......
...@@ -12,6 +12,7 @@ from vllm.sequence import SampleLogprobs ...@@ -12,6 +12,7 @@ from vllm.sequence import SampleLogprobs
from ....conftest import VIDEO_ASSETS, HfRunner, VllmRunner, _VideoAssets from ....conftest import VIDEO_ASSETS, HfRunner, VllmRunner, _VideoAssets
from ...utils import check_logprobs_close from ...utils import check_logprobs_close
from ....utils import models_path_prefix from ....utils import models_path_prefix
from vllm.utils import is_hip
_PREFACE = ( _PREFACE = (
"A chat between a curious human and an artificial intelligence assistant. " "A chat between a curious human and an artificial intelligence assistant. "
...@@ -160,12 +161,11 @@ def run_test( ...@@ -160,12 +161,11 @@ def run_test(
) )
@pytest.mark.skipif(transformers.__version__ < "4.45", def get_size_factors():
reason="Waiting for next transformers release") if is_hip():
@pytest.mark.parametrize("model", models) return [[],]
@pytest.mark.parametrize( else:
"size_factors", return [
[
# No video # No video
[], [],
# Single-scale # Single-scale
...@@ -175,6 +175,14 @@ def run_test( ...@@ -175,6 +175,14 @@ def run_test(
# Multi-scale # Multi-scale
[0.25, 0.5, 1.0], [0.25, 0.5, 1.0],
], ],
@pytest.mark.skipif(transformers.__version__ < "4.45",
reason="Waiting for next transformers release")
@pytest.mark.parametrize("model", models)
@pytest.mark.parametrize(
"size_factors",
get_size_factors()
) )
@pytest.mark.parametrize("dtype", ["half"]) @pytest.mark.parametrize("dtype", ["half"])
@pytest.mark.parametrize("max_tokens", [128]) @pytest.mark.parametrize("max_tokens", [128])
...@@ -205,6 +213,8 @@ def test_models(hf_runner, vllm_runner, video_assets, model, size_factors, ...@@ -205,6 +213,8 @@ def test_models(hf_runner, vllm_runner, video_assets, model, size_factors,
) )
@pytest.mark.skipif(is_hip(),
reason="Consistent with NV.")
@pytest.mark.skipif(transformers.__version__ < "4.45", @pytest.mark.skipif(transformers.__version__ < "4.45",
reason="Waiting for next transformers release") reason="Waiting for next transformers release")
@pytest.mark.parametrize("model", models) @pytest.mark.parametrize("model", models)
......
...@@ -125,10 +125,11 @@ def run_test( ...@@ -125,10 +125,11 @@ def run_test(
) )
@pytest.mark.parametrize("model", models) def get_size_factors():
@pytest.mark.parametrize( if is_hip():
"size_factors", return [[],]
[ else:
return [
# No image # No image
[], [],
# Single-scale # Single-scale
...@@ -137,7 +138,12 @@ def run_test( ...@@ -137,7 +138,12 @@ def run_test(
[1.0, 1.0, 1.0], [1.0, 1.0, 1.0],
# Multi-scale # Multi-scale
[0.25, 0.5, 1.0], [0.25, 0.5, 1.0],
], ]
@pytest.mark.parametrize("model", models)
@pytest.mark.parametrize(
"size_factors",
get_size_factors()
) )
@pytest.mark.parametrize("dtype", [ @pytest.mark.parametrize("dtype", [
pytest.param( pytest.param(
......
...@@ -201,6 +201,8 @@ def test_deterministic_when_seeded(k: int, vocab_size: int, batch_size: int, ...@@ -201,6 +201,8 @@ def test_deterministic_when_seeded(k: int, vocab_size: int, batch_size: int,
assert torch.equal(results[j][i], results[0][i]) assert torch.equal(results[j][i], results[0][i])
@pytest.mark.skipif(is_hip(),
reason="Consistent with NV.")
@pytest.mark.parametrize("k", [1, 3, 6]) @pytest.mark.parametrize("k", [1, 3, 6])
@pytest.mark.parametrize("vocab_size", [30_000, 50_000]) @pytest.mark.parametrize("vocab_size", [30_000, 50_000])
@pytest.mark.parametrize("batch_size", [1, 8, 32, 128]) @pytest.mark.parametrize("batch_size", [1, 8, 32, 128])
......
...@@ -7,8 +7,12 @@ from vllm.transformers_utils.config import try_get_generation_config ...@@ -7,8 +7,12 @@ from vllm.transformers_utils.config import try_get_generation_config
from vllm.transformers_utils.tokenizer import get_tokenizer from vllm.transformers_utils.tokenizer import get_tokenizer
from ..utils import models_path_prefix from ..utils import models_path_prefix
import os import os
import pytest
from vllm.utils import is_hip
@pytest.mark.skipif(is_hip(),
reason="Consistent with NV.")
def test_get_llama3_eos_token(): def test_get_llama3_eos_token():
model_name = os.path.join(models_path_prefix, "meta-llama/Meta-Llama-3-8B-Instruct") model_name = os.path.join(models_path_prefix, "meta-llama/Meta-Llama-3-8B-Instruct")
......
{
"1": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 128,
"GROUP_SIZE_M": 8,
"num_warps": 8,
"num_stages": 2
},
"2": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 128,
"GROUP_SIZE_M": 6,
"num_warps": 8,
"num_stages": 2
},
"4": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 128,
"GROUP_SIZE_M": 1,
"num_warps": 8,
"num_stages": 2
},
"8": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 128,
"GROUP_SIZE_M": 1,
"num_warps": 8,
"num_stages": 2
},
"16": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 128,
"GROUP_SIZE_M": 1,
"num_warps": 8,
"num_stages": 2
},
"32": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 128,
"GROUP_SIZE_M": 1,
"num_warps": 8,
"num_stages": 2
},
"1024": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 128,
"GROUP_SIZE_M": 1,
"num_warps": 4,
"num_stages": 2
},
"2048": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 128,
"GROUP_SIZE_M": 4,
"num_warps": 4,
"num_stages": 2
},
"4096": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 128,
"GROUP_SIZE_M": 6,
"num_warps": 4,
"num_stages": 1
},
"6144": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 128,
"GROUP_SIZE_M": 6,
"num_warps": 8,
"num_stages": 1
},
"8192": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 128,
"GROUP_SIZE_M": 8,
"num_warps": 4,
"num_stages": 1
},
"12288": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 128,
"GROUP_SIZE_M": 8,
"num_warps": 8,
"num_stages": 1
},
"16384": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 128,
"GROUP_SIZE_M": 4,
"num_warps": 4,
"num_stages": 1
},
"32786": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 128,
"GROUP_SIZE_M": 6,
"num_warps": 4,
"num_stages": 1
}
}
{
"1": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 32,
"GROUP_SIZE_M": 8,
"num_warps": 4,
"num_stages": 2
},
"2": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 128,
"GROUP_SIZE_M": 8,
"num_warps": 8,
"num_stages": 2
},
"4": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 128,
"GROUP_SIZE_M": 4,
"num_warps": 8,
"num_stages": 2
},
"8": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 128,
"GROUP_SIZE_M": 4,
"num_warps": 8,
"num_stages": 2
},
"16": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 128,
"GROUP_SIZE_M": 1,
"num_warps": 8,
"num_stages": 2
},
"32": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 128,
"GROUP_SIZE_M": 1,
"num_warps": 8,
"num_stages": 2
},
"1024": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 128,
"GROUP_SIZE_M": 1,
"num_warps": 4,
"num_stages": 2
},
"2048": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 128,
"GROUP_SIZE_M": 1,
"num_warps": 4,
"num_stages": 2
},
"4096": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 128,
"GROUP_SIZE_M": 1,
"num_warps": 4,
"num_stages": 1
},
"6144": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 128,
"GROUP_SIZE_M": 1,
"num_warps": 4,
"num_stages": 1
},
"8192": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 128,
"GROUP_SIZE_M": 4,
"num_warps": 4,
"num_stages": 1
},
"12288": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 128,
"GROUP_SIZE_M": 6,
"num_warps": 4,
"num_stages": 1
},
"16384": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 128,
"GROUP_SIZE_M": 1,
"num_warps": 4,
"num_stages": 1
},
"32786": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 128,
"GROUP_SIZE_M": 1,
"num_warps": 4,
"num_stages": 1
}
}
...@@ -1418,5 +1418,5 @@ class W8a8GetCacheJSON: ...@@ -1418,5 +1418,5 @@ class W8a8GetCacheJSON:
def get_w8a8json_name(self,n,k): def get_w8a8json_name(self,n,k):
device_name = current_platform.get_device_name().replace(" ", "_") device_name = current_platform.get_device_name().replace(" ", "_")
return self.triton_json_dir+f"/W8A8_{n}_{k}_DCU{device_name}.json" return self.triton_json_dir+f"/W8A8_{n}_{k}_HCU{device_name}.json"
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment