test_int8_blockwise_layers.py 5.43 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
# Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# See LICENSE for license information.

from typing import Tuple
import math
import os
import pathlib
import pytest
import torch
import transformer_engine as te
import transformer_engine_torch as tex
from transformer_engine.pytorch.fp8 import FP8GlobalStateManager
from transformer_engine.common.recipe import Float8BlockScaling
from transformer_engine.pytorch.constants import TE_DType
from transformer_engine.pytorch.tensor.float8_blockwise_tensor import (
    Float8BlockQuantizer,
    Float8BlockwiseQTensor,
)

from references.blockwise_quantizer_reference import (
    BlockwiseQuantizerReference,
    QuantizeResult,
)
from test_float8_current_scaling_exact import (
    TestFP8RecipeLinearBase,
    TestFP8RecipeLayerNormLinearBase,
)
import logging
# read env variable NVTE_TEST_FLOAT8_BLOCK_SCALING_EXACT_TENSOR_DUMP_DIR to override the default tensor dump directory
TENSOR_DUMP_DIR = pathlib.Path(__file__).resolve().parent.parent.parent / "tensor_dumps"
tensor_dump_dir_env = os.getenv("NVTE_TEST_BLOCK_CURRENT_SCALING_EXACT_TENSOR_DUMP_DIR")
if tensor_dump_dir_env is not None:
    TENSOR_DUMP_DIR = pathlib.Path(tensor_dump_dir_env)
recipe_available, reason_for_no_recipe = FP8GlobalStateManager.is_fp8_block_scaling_available()


class GetRecipes:

    @staticmethod
    def none():
        return None

    @staticmethod
    def fp8_blockwise():
        # return default configs
        return Float8BlockScaling()


# FP8 per tesnor current scaling
@pytest.mark.skipif(not recipe_available, reason=reason_for_no_recipe)
class TestFP8BlockScalingRecipeLinear(TestFP8RecipeLinearBase):

    @staticmethod
    def setup_class(cls) -> None:
        # Configure RNG
        seed = 1234
        torch.manual_seed(seed)
        torch.cuda.manual_seed(seed)

    @pytest.mark.parametrize(
        "batch_size, hidden_size, out_size",
        [
            (16, 256, 128),
        ],
    )
    @pytest.mark.parametrize("dtype", [torch.bfloat16], ids=["bf16"])
    @pytest.mark.parametrize(
        "recipe1, recipe2",
        [
            (GetRecipes.none, GetRecipes.fp8_blockwise),
        ],
    )
    def test_fp8_current_scaling_with_linear_module(
        self,
        recipe1,
        recipe2,
        batch_size,
        hidden_size,
        out_size,
        dtype,
        use_bias=False,
    ):
        fp8_zero_tolerance_tensor_dumps_recipe2 = None
        # check tensor dumps dir, if the dir exists, then read files to get y, dgrad, wgrad, bgrad
        # if we cannot get all four tensors, then still set the tensor dump to None
        tensor_map = self._check_golden_tensor_dumps(
            TENSOR_DUMP_DIR, recipe2, (batch_size, hidden_size, out_size), dtype, use_bias
        )
        if tensor_map is not None:
            fp8_zero_tolerance_tensor_dumps_recipe2 = tensor_map

        assert recipe1 == GetRecipes.none, "Only None recipe is supported for recipe1"
        self.compare_recipe(
            recipe1,
            recipe2,
            batch_size,
            hidden_size,
            out_size,
            use_bias,
            seed=torch.initial_seed(),
            dtype=dtype,
            y_error=0.5,
            dgrad_error=1,
            wgrad_error=1,
            bgrad_error=0.5,
            recipe1_golden_tensors=None,
            recipe2_golden_tensors=fp8_zero_tolerance_tensor_dumps_recipe2,
        )


@pytest.mark.skipif(not recipe_available, reason=reason_for_no_recipe)
class TestFP8BlockScalingRecipeLayerNormLinear(TestFP8RecipeLayerNormLinearBase):

    @staticmethod
    def setup_class(cls) -> None:
        # Configure RNG
        seed = 1234
        torch.manual_seed(seed)
        torch.cuda.manual_seed(seed)

    @pytest.mark.parametrize(
        "batch_size, hidden_size, out_size",
        [
            (16, 256, 128),
        ],
    )
    @pytest.mark.parametrize("dtype", [torch.bfloat16], ids=["bf16"])
    @pytest.mark.parametrize(
        "recipe1, recipe2",
        [
            (GetRecipes.none, GetRecipes.fp8_blockwise),
        ],
    )
    def test_fp8_current_scaling_with_layernorm_linear_module(
        self,
        recipe1,
        recipe2,
        batch_size,
        hidden_size,
        out_size,
        dtype,
        use_bias=False,
    ):
        fp8_zero_tolerance_tensor_dumps_recipe2 = None
        # check tensor dumps dir, if the dir exists, then read files to get y, dgrad, wgrad, bgrad
        # if we cannot get all four tensors, then still set the tensor dump to None
        tensor_map = self._check_golden_tensor_dumps(
            TENSOR_DUMP_DIR,
            recipe2,
            (batch_size, hidden_size, out_size),
            dtype,
            use_bias,
            "LayerNorm",
        )
        if tensor_map is not None:
            fp8_zero_tolerance_tensor_dumps_recipe2 = tensor_map

        self.compare_recipe(
            recipe1,
            recipe2,
            batch_size,
            hidden_size,
            out_size,
            use_bias,
            seed=torch.initial_seed(),
            dtype=dtype,
            y_error=0.9,
            ln_out_error=0.5,
170
            dgrad_error=1,
171
172
173
174
175
            wgrad_error=1,
            bgrad_error=0.5,
            recipe1_golden_tensors=None,
            recipe2_golden_tensors=fp8_zero_tolerance_tensor_dumps_recipe2,
        )