Unverified Commit fa96fb9c authored by Fardin Hoque's avatar Fardin Hoque Committed by GitHub
Browse files

Pruning kernel Core Tests (#26727)


Signed-off-by: default avatarFardin Hoque <kfhfar@amazon.com>
parent e3fdb627
......@@ -15,7 +15,6 @@ VEC_HIDDEN_SIZES = range(1024, 1030)
# Avoid combinatorial explosion with full Cartesian product
NUM_TOKENS_HIDDEN_SIZES = [
*[(1, i) for i in [1, 64, *VEC_HIDDEN_SIZES, 5120, 5137]],
*[(83, i) for i in [1, 1033, 2048, 5120]],
*[(2048, i) for i in [1, 64, *VEC_HIDDEN_SIZES, 5137]],
*[(4096, i) for i in [1, 64, 5137]],
]
......
......@@ -11,19 +11,7 @@ from vllm.platforms import current_platform
DTYPES = [torch.half, torch.bfloat16, torch.float]
NUM_TOKENS = [7, 83, 4096] # Arbitrary values for testing
HIDDEN_SIZES = [
8,
768,
769,
770,
771,
5120,
5124,
5125,
5126,
8192,
8199,
] # Arbitrary values for testing
HIDDEN_SIZES = [8, 768, 769, 5120, 5125, 8192] # Arbitrary values for testing
ADD_RESIDUAL = [False, True]
SEEDS = [0]
CUDA_DEVICES = [f"cuda:{i}" for i in range(1 if torch.cuda.device_count() == 1 else 2)]
......@@ -118,7 +106,7 @@ def test_poly_norm(
@pytest.mark.parametrize("hidden_size", HIDDEN_SIZES)
@pytest.mark.parametrize("add_residual", ADD_RESIDUAL)
@pytest.mark.parametrize("dtype", DTYPES)
@pytest.mark.parametrize("quant_scale", [1.0, 0.01, 10.0])
@pytest.mark.parametrize("quant_scale", [0.01, 1.0, 10.0])
@pytest.mark.parametrize("seed", SEEDS)
@pytest.mark.parametrize("device", CUDA_DEVICES)
@pytest.mark.parametrize("strided_input", [False, True])
......
......@@ -9,7 +9,7 @@ from vllm._custom_ops import permute_cols
@pytest.mark.parametrize("shape", [(1, 512), (544, 4096), (67, 8192)])
@pytest.mark.parametrize("dtype", [torch.bfloat16, torch.float16])
@pytest.mark.parametrize("dtype", [torch.bfloat16])
def test_permute_cols(shape, dtype):
x = torch.randn(shape, dtype=dtype).cuda()
perm = torch.randperm(x.shape[1]).to(torch.int).cuda()
......
......@@ -12,8 +12,8 @@ from vllm.model_executor.layers.rotary_embedding import get_rope
from vllm.platforms import current_platform
IS_NEOX_STYLE = [True, False]
DTYPES = [torch.half, torch.bfloat16, torch.float]
HEAD_SIZES = [64, 80, 112, 120, 256]
DTYPES = [torch.bfloat16, torch.float]
HEAD_SIZES = [64, 80, 120, 256]
ROTARY_DIMS = [None, 32] # None means rotary dim == head size
NUM_HEADS = [17] # Arbitrary values for testing
BATCH_SIZES = [5] # Arbitrary values for testing
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment