test_layernorm.py 1.76 KB
Newer Older
1
2
3
import torch
import torch.nn as nn

Woosuk Kwon's avatar
Woosuk Kwon committed
4
from vllm import layernorm_ops
5
6
7
8
9
10


class RefRMSNorm(nn.Module):

    def __init__(self, hidden_size, eps=1e-6):
        super().__init__()
11
12
        weight = torch.empty(hidden_size)
        weight.uniform_(-1e-3, 1e-3)
13
14
15
16
        self.weight = nn.Parameter(weight)
        self.variance_epsilon = eps

    def forward(self, hidden_states):
17
18
19
20
        variance = hidden_states.to(torch.float32).pow(2).mean(-1,
                                                               keepdim=True)
        hidden_states = hidden_states * torch.rsqrt(variance +
                                                    self.variance_epsilon)
21
22
23
24
25
26
        if self.weight.dtype in [torch.half, torch.float16, torch.bfloat16]:
            hidden_states = hidden_states.to(self.weight.dtype)
        return self.weight * hidden_states


@torch.inference_mode()
27
def run_rms_norm(
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
    num_tokens: int,
    hidden_size: int,
    dtype: torch.dtype,
) -> None:
    x = torch.randn(num_tokens, hidden_size, dtype=dtype, device='cuda')
    ref = RefRMSNorm(hidden_size).to(dtype).cuda()

    out = torch.empty_like(x)
    layernorm_ops.rms_norm(
        out,
        x,
        ref.weight.data,
        ref.variance_epsilon,
    )
    ref_out = ref(x)
    assert torch.allclose(out, ref_out, atol=1e-3, rtol=1e-5)


46
def test_rms_norm() -> None:
47
    for dtype in [torch.half, torch.bfloat16, torch.float]:
48
49
50
51
        for num_tokens in [7, 128, 2048]:
            for hidden_size in [13, 64, 1024, 5120]:
                print(f'Testing RMS kernel with dtype={dtype}, num_tokens='
                      f'{num_tokens}, hidden_size={hidden_size}')
52
                run_rms_norm(
53
54
55
56
                    num_tokens=num_tokens,
                    hidden_size=hidden_size,
                    dtype=dtype,
                )