conv.py 7.09 KB
Newer Older
1
import torch
PanZezhongQY's avatar
PanZezhongQY committed
2
import ctypes
3
from ctypes import c_uint64
PanZezhongQY's avatar
PanZezhongQY committed
4

5
6
7
8
from libinfiniop import (
    LIBINFINIOP,
    TestTensor,
    get_test_devices,
PanZezhongQY's avatar
PanZezhongQY committed
9
    check_error,
10
11
12
13
14
15
16
17
18
19
    test_operator,
    get_args,
    debug,
    get_tolerance,
    profile_operation,
    TestWorkspace,
    InfiniDtype,
    InfiniDtypeNames,
    InfiniDeviceNames,
    infiniopOperatorDescriptor_t,
PanZezhongQY's avatar
PanZezhongQY committed
20
)
21
22
from enum import Enum, auto
from typing import List, Tuple
PanZezhongQY's avatar
PanZezhongQY committed
23
24
25
26
27
28
29
30
31
import math
from torch.nn import functional as F

# constant for control whether profile the pytorch and lib functions
# NOTE: need to manually add synchronization function to the lib function,
#       e.g., cudaDeviceSynchronize() for CUDA
PROFILE = False
NUM_PRERUN = 10
NUM_ITERATIONS = 1000
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
_TEST_CASES = [
    # x_shape, x_stride, w_shape, w_stride, pads, strides, dilations, x_strides
    (
        (32, 3, 4),
        (12, 4, 1),
        (32, 3, 5),
        (15, 5, 1),
        (1,),
        (1,),
        (1,),
    ),
    (
        (1, 3, 4, 4),
        (48, 16, 4, 1), 
        (2, 3, 3, 3),
        (27, 9, 3, 1),
        (1, 1),
        (1, 2),
        (2, 1),
    ),
    (
        (32, 3, 32, 32),
        (32 * 32 * 3, 32 * 32, 32, 1),
        (64, 3, 5, 5),
        (75, 25, 5, 1),
        (2, 2),
        (2, 2),
        (1, 1),
    ),
    (
        (1, 1, 4, 4, 4),
        (64, 64, 16, 4, 1),
        (1, 1, 5, 5, 5),
        (125, 125, 25, 5, 1),
        (1, 1, 1),
        (1, 1, 1),
        (1, 1, 1),
    ),
    (
        (32, 3, 32, 32, 32),
        (32 * 32 * 32 * 3, 32 * 32 * 32, 32 * 32, 32, 1),
        (64, 3, 5, 5, 5),
        (375, 125, 25, 5, 1),
        (3, 2, 2),
        (4, 3, 3),
        (2, 2, 1),
    ),
]


# Data types used for testing
_TENSOR_DTYPES = [InfiniDtype.F16, InfiniDtype.F32, InfiniDtype.BF16]

# Tolerance map for different data types
_TOLERANCE_MAP = {
    InfiniDtype.F16: {"atol": 1e-3, "rtol": 1e-3},
    InfiniDtype.F32: {"atol": 1e-6, "rtol": 1e-6},
    InfiniDtype.BF16: {"atol": 1e-3, "rtol": 1e-2},
}

DEBUG = False
PROFILE = False
NUM_PRERUN = 10
NUM_ITERATIONS = 1000
PanZezhongQY's avatar
PanZezhongQY committed
96

97
def conv(x, w, stride, padding, dilation, y_tensor, bias=None):
PanZezhongQY's avatar
PanZezhongQY committed
98
99
    match len(x.shape) - 2:
        case 1:
100
            y_tensor.copy_(F.conv1d(x, w, bias=bias, stride=stride, padding=padding, dilation=dilation))
PanZezhongQY's avatar
PanZezhongQY committed
101
        case 2:
102
            y_tensor.copy_(F.conv2d(x, w, bias=bias, stride=stride, padding=padding, dilation=dilation))
PanZezhongQY's avatar
PanZezhongQY committed
103
        case 3:
104
            y_tensor.copy_(F.conv3d(x, w, bias=bias, stride=stride, padding=padding, dilation=dilation))
PanZezhongQY's avatar
PanZezhongQY committed
105
106
107
108
109
        case _:
            print("Error: Pytorch -> Unsupported tensor dimension")


# infer the shape of the output given the inputs for a N-ary convolution
110
def inferShapeStride(
PanZezhongQY's avatar
PanZezhongQY committed
111
112
113
114
115
    x_shape: List[int],
    w_shape: List[int],
    pads: List[int],
    strides: List[int],
    dilations: List[int],
116
) -> Tuple[Tuple[int, ...], Tuple[int, ...]]:
PanZezhongQY's avatar
PanZezhongQY committed
117
    assert (
118
119
120
121
122
        len(x_shape)
        == len(w_shape)
        == len(pads) + 2
        == len(dilations) + 2
        == len(strides) + 2
PanZezhongQY's avatar
PanZezhongQY committed
123
124
125
    ), "x and w should have the same length; pads, strides, and dilatinos should have the same length; the length of pads should be that of x - 2"
    output_dims = [
        math.floor(
126
            (x_shape[i + 2] + 2 * pads[i] - dilations[i] * (w_shape[i + 2] - 1) - 1)
PanZezhongQY's avatar
PanZezhongQY committed
127
128
129
130
131
            / strides[i]
            + 1
        )
        for i in range(len(pads))
    ]
132
133
134
135
136
137
    output_shape = (x_shape[0], w_shape[0]) + tuple(output_dims)
    output_strides = [1]
    for s in reversed(output_shape[1:]):
        output_strides.insert(0, output_strides[0] * s)
    output_strides = tuple(output_strides)
    return output_shape, output_strides
PanZezhongQY's avatar
PanZezhongQY committed
138
139
140
141
142
143
144
145
146
147
148


# convert a python tuple to a ctype void pointer
def tuple_to_void_p(py_tuple: Tuple):
    array = ctypes.c_int64 * len(py_tuple)
    data_array = array(*py_tuple)
    return ctypes.cast(data_array, ctypes.c_void_p)


def test(
    handle,
149
    device,
PanZezhongQY's avatar
PanZezhongQY committed
150
    x_shape,
151
    x_stride,
PanZezhongQY's avatar
PanZezhongQY committed
152
    w_shape,
153
    w_stride,
PanZezhongQY's avatar
PanZezhongQY committed
154
155
156
    pads,
    strides,
    dilations,
157
158
    tensor_dtype=InfiniDtype.F16,
    sync=None,
PanZezhongQY's avatar
PanZezhongQY committed
159
160
):
    assert len(pads) == len(strides) == len(dilations)
161
162
163
164
165
166
    x = TestTensor(x_shape, x_stride, dt=tensor_dtype, device=device, scale=0.01)
    w = TestTensor(w_shape, w_stride, dt=tensor_dtype, device=device, scale=0.01)
    y_shape, y_stride = inferShapeStride(x_shape, w_shape, pads, strides, dilations)
    y = TestTensor(y_shape, y_stride, dt=tensor_dtype, device=device)

    b = TestTensor((w.shape[0],), (1,), dt=tensor_dtype, device=device, scale=0.01) if w.shape[0] > 1 else None
PanZezhongQY's avatar
PanZezhongQY committed
167
    print(
168
169
        f"Testing Conv on {InfiniDeviceNames[device]} with x_shape: {x_shape}, w_shape: {w_shape}, b_shape: {w_shape[0]}, pads: {pads}, strides: {strides}, dilations: {dilations}, x_stride: {x_stride} dtype:{tensor_dtype}"
        f"dtype:{InfiniDtypeNames[tensor_dtype]}"
PanZezhongQY's avatar
PanZezhongQY committed
170
    )
171
    conv(x.torch_tensor(), w.torch_tensor(), strides, pads, dilations, y.torch_tensor(), b.torch_tensor() if b is not None else None)
PanZezhongQY's avatar
PanZezhongQY committed
172

173
174
    if sync is not None:
        sync()
PanZezhongQY's avatar
PanZezhongQY committed
175

176
    descriptor = infiniopOperatorDescriptor_t()
PanZezhongQY's avatar
PanZezhongQY committed
177
    check_error(
178
        LIBINFINIOP.infiniopCreateConvDescriptor(
PanZezhongQY's avatar
PanZezhongQY committed
179
180
            handle,
            ctypes.byref(descriptor),
181
182
183
184
            y.descriptor,
            x.descriptor,
            w.descriptor,
            b.descriptor if b is not None else None,
PanZezhongQY's avatar
PanZezhongQY committed
185
186
187
188
189
190
191
192
            tuple_to_void_p(pads),
            tuple_to_void_p(strides),
            tuple_to_void_p(dilations),
            len(pads),
        )
    )

    # Invalidate the shape and strides in the descriptor to prevent them from being directly used by the kernel
193
194
195
    for tensor in [x, y, w, b]:
        if tensor is not None:
            tensor.destroy_desc()
PanZezhongQY's avatar
PanZezhongQY committed
196

197
    workspace_size = ctypes.c_uint64(0)
PanZezhongQY's avatar
PanZezhongQY committed
198
    check_error(
199
        LIBINFINIOP.infiniopGetConvWorkspaceSize(descriptor, ctypes.byref(workspace_size))
PanZezhongQY's avatar
PanZezhongQY committed
200
    )
201
    workspace = TestWorkspace(workspace_size.value, y.device)
PanZezhongQY's avatar
PanZezhongQY committed
202

203
    def lib_conv():
PanZezhongQY's avatar
PanZezhongQY committed
204
        check_error(
205
            LIBINFINIOP.infiniopConv(
PanZezhongQY's avatar
PanZezhongQY committed
206
                descriptor,
207
208
209
210
211
212
                workspace.data(),
                workspace_size.value,
                y.data(),
                x.data(),
                w.data(),
                b.data() if b is not None else None,
PanZezhongQY's avatar
PanZezhongQY committed
213
214
215
216
                None,
            )
        )

217
218
219
220
221
    lib_conv()
    atol, rtol = get_tolerance(_TOLERANCE_MAP, tensor_dtype)
    if DEBUG:
        debug(y.actual_tensor(), y.torch_tensor(), atol=atol, rtol=rtol)
    assert torch.allclose(y.actual_tensor(), y.torch_tensor(), atol=atol, rtol=rtol)
PanZezhongQY's avatar
PanZezhongQY committed
222

223
224
    # Profiling workflow
    if PROFILE:
225
        # fmt: off
226
227
        profile_operation("PyTorch", lambda: conv(x.torch_tensor(), w.torch_tensor(), strides, pads, dilations, b.torch_tensor() if b is not None else None), device, NUM_PRERUN, NUM_ITERATIONS)
        profile_operation("    lib", lambda: lib_conv(), device, NUM_PRERUN, NUM_ITERATIONS)
228
        # fmt: on
229
    check_error(LIBINFINIOP.infiniopDestroyConvDescriptor(descriptor))
PanZezhongQY's avatar
PanZezhongQY committed
230
231
232
233
234


if __name__ == "__main__":
    args = get_args()

235
236
237
238
239
240
241
242
    # Configure testing options
    DEBUG = args.debug
    PROFILE = args.profile
    NUM_PRERUN = args.num_prerun
    NUM_ITERATIONS = args.num_iterations
    for device in get_test_devices(args):
        test_operator(device, test, _TEST_CASES, _TENSOR_DTYPES)

PanZezhongQY's avatar
PanZezhongQY committed
243
    print("\033[92mTest passed!\033[0m")
244