"docs/contributing/overview.md" did not exist on "3a7f15a398727887137a021b8b32dc372b532087"
test_gptq.py 1.14 KB
Newer Older
1
# SPDX-License-Identifier: Apache-2.0
2
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3

4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
import torch

from tests.kernels.utils import opcheck
from vllm import _custom_ops as ops  # noqa: F401


def test_gptq_shuffle_opcheck():
    weight = torch.randint(-2000000,
                           2000000, (1792, 4096),
                           device='cuda',
                           dtype=torch.int32)
    perm = torch.empty((0, ), device='cuda', dtype=torch.int32)
    bit = 4
    opcheck(torch.ops._C.gptq_shuffle, (weight, perm, bit))


def test_gptq_gemm_opcheck():
    a = torch.rand((240, 4096), device='cuda', dtype=torch.float16)
    weight = torch.randint(-2000000,
                           2000000, (512, 6144),
                           device='cuda',
                           dtype=torch.int32)
    zeros = torch.zeros((32, 768), device='cuda', dtype=torch.int32)
    scales = torch.rand((32, 6144), device='cuda', dtype=torch.float16)
    idx = torch.empty((0, ), device='cuda', dtype=torch.int32)
    use_exllama = True
    bit = 4
    opcheck(torch.ops._C.gptq_gemm,
            (a, weight, zeros, scales, idx, use_exllama, bit))