benchmark.py 15.6 KB
Newer Older
yan.yan's avatar
yan.yan committed
1
# Copyright 2021 Yan Yan
2
#
yan.yan's avatar
yan.yan committed
3
4
5
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
6
#
yan.yan's avatar
yan.yan committed
7
#     http://www.apache.org/licenses/LICENSE-2.0
8
#
yan.yan's avatar
yan.yan committed
9
10
11
12
13
14
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

yanyan's avatar
yanyan committed
15
16
17
18
19
import time
from pathlib import Path

import numpy as np
import torch
yanyan's avatar
yanyan committed
20
from torch import nn
yan.yan's avatar
v2.1  
yan.yan committed
21
from cumm import tensorview as tv
22
from spconv.core import ConvAlgo
yanyan's avatar
yanyan committed
23

yan.yan's avatar
yan.yan committed
24
25
import spconv.pytorch as spconv
from spconv.utils import Point2VoxelCPU3d
26

yan.yan's avatar
yan.yan committed
27
# torch.backends.cudnn.enabled = False
yan.yan's avatar
yan.yan committed
28
def waymo_data(batch_size=1, num_features=-1):
yan.yan's avatar
yan.yan committed
29
30
31
32
    gen = Point2VoxelCPU3d([0.1, 0.1, 0.1], [-80, -80, -2, 80, 80, 6], 3,
                           150000, 1)
    # gen = VoxelGeneratorV2([0.1, 0.1, 0.1], [-80, -80, -2, 80, 80, 6], 1,
    #                        150000)
yanyan's avatar
yanyan committed
33
    data = np.load(Path(__file__).parent / "data" / "benchmark-pc.npz")
yan.yan's avatar
yan.yan committed
34
35
36
37
    pc = np.ascontiguousarray(data["pc"])
    print(pc.shape)
    voxels_tv, indices_tv, _ = gen.point_to_voxel(tv.from_numpy(pc))
    voxels = voxels_tv.numpy().reshape(-1, 3)
yan.yan's avatar
yan.yan committed
38
39
    if num_features > 0:
        voxels = np.zeros((voxels.shape[0], num_features), dtype=voxels.dtype)
yan.yan's avatar
yan.yan committed
40
    coors = indices_tv.numpy()
yanyan's avatar
yanyan committed
41
42
43
44
    N = coors.shape[0]
    coors = np.concatenate([np.full([N, 1], 0, coors.dtype), coors], axis=1)
    return voxels, coors, gen.grid_size

yan.yan's avatar
yan.yan committed
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
def waymo_data_large(batch_size=1):
    gen = Point2VoxelCPU3d([0.1, 0.1, 0.1], [-80, -80, -2, 80, 80, 6], 3,
                           1200000, 1)
    # gen = VoxelGeneratorV2([0.1, 0.1, 0.1], [-80, -80, -2, 80, 80, 6], 1,
    #                        150000)
    data = np.load(Path(__file__).parent / "data" / "benchmark-pc.npz")
    pc = np.ascontiguousarray(data["pc"])
    pc2 = pc.copy()
    pc2[:, 1] += 1
    pc3 = pc.copy()
    pc3[:, 1] += 2
    pc4 = pc.copy()
    pc4[:, 1] += 3
    pc5 = pc.copy()
    pc5[:, 1] += 4

    pc = np.concatenate([pc, pc2, pc3, pc4, pc5])
    print(pc.shape)
    voxels_tv, indices_tv, _ = gen.point_to_voxel(tv.from_numpy(pc))
    voxels = voxels_tv.numpy().reshape(-1, 3)
    coors = indices_tv.numpy()
    N = coors.shape[0]
    print("num voxels", N)
    coors = np.concatenate([np.full([N, 1], 0, coors.dtype), coors], axis=1)
    return voxels, coors, gen.grid_size

yanyan's avatar
yanyan committed
71

yanyan's avatar
yanyan committed
72
class Net(nn.Module):
yanyan's avatar
yanyan committed
73
    def __init__(self, shape, algo):
yanyan's avatar
yanyan committed
74
        super().__init__()
75
        pool_algo = algo
yan.yan's avatar
v2.1  
yan.yan committed
76
        # pool_algo = ConvAlgo.Native
yanyan's avatar
yanyan committed
77
        self.net = spconv.SparseSequential(
yanyan's avatar
yanyan committed
78
79
            spconv.SubMConv3d(3, 64, 3, bias=False, indice_key="c0",
                              algo=algo),
yan.yan's avatar
yan.yan committed
80
81
82
83
84
85
86
87
88
89
90
91
            # spconv.SubMConv3d(32,
            #                   32,
            #                   3,
            #                   bias=False,
            #                   indice_key="c0",
            #                   algo=algo),
            # # nn.BatchNorm1d(32),
            # # nn.ReLU(),
            # # spconv.SparseConv3d(64, 64, 2, 2, bias=False,
            # #                   algo=algo),
            # spconv.SubMConv3d(32, 64, 3, bias=False, indice_key="c0",
            #                   algo=algo),
yan.yan's avatar
yan.yan committed
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106

            # spconv.SubMConv3d(64, 64, 3, bias=False, indice_key="c0",
            #                   algo=algo),
            # spconv.SubMConv3d(32,
            #                   32,
            #                   3,
            #                   bias=False,
            #                   indice_key="c0",
            #                   algo=algo),
            # # nn.BatchNorm1d(32),
            # # nn.ReLU(),
            # # spconv.SparseConv3d(64, 64, 2, 2, bias=False,
            # #                   algo=algo),
            # spconv.SubMConv3d(32, 64, 3, bias=False, indice_key="c0",
            #                   algo=algo),
yanyan's avatar
yanyan committed
107
108
109
110
111
112
            spconv.SubMConv3d(64,
                              64,
                              3,
                              bias=False,
                              indice_key="c0",
                              algo=algo),
113
114
            # nn.BatchNorm1d(32),
            # nn.ReLU(),
yan.yan's avatar
v2.1  
yan.yan committed
115
            # spconv.SparseConv3d(64, 64, 2, 2, bias=False, indice_key="m0"),
yan.yan's avatar
yan.yan committed
116
            spconv.SparseMaxPool3d(2, 2, algo=pool_algo),
yanyan's avatar
yanyan committed
117
118
119
120
121
122
123
124
125
126
127
128
            spconv.SubMConv3d(64,
                              96,
                              3,
                              bias=False,
                              indice_key="c1",
                              algo=algo),
            spconv.SubMConv3d(96,
                              96,
                              3,
                              bias=False,
                              indice_key="c1",
                              algo=algo),
yanyan's avatar
yanyan committed
129
130
            # nn.BatchNorm1d(64),
            # nn.ReLU(),
yan.yan's avatar
v2.1  
yan.yan committed
131
132
            # spconv.SparseConv3d(96, 96, 2, 2, bias=False, indice_key="m1"),
            spconv.SparseMaxPool3d(2, 2, algo=pool_algo),
yanyan's avatar
yanyan committed
133
134
135
136
137
138
139
140
141
142
143
144
            spconv.SubMConv3d(96,
                              128,
                              3,
                              bias=False,
                              indice_key="c2",
                              algo=algo),
            spconv.SubMConv3d(128,
                              128,
                              3,
                              bias=False,
                              indice_key="c2",
                              algo=algo),
yanyan's avatar
yanyan committed
145
146
            # nn.BatchNorm1d(128),
            # nn.ReLU(),
yan.yan's avatar
v2.1  
yan.yan committed
147
148
            # spconv.SparseConv3d(128, 128, 2, 2, bias=False, indice_key="m2"),
            spconv.SparseMaxPool3d(2, 2, algo=pool_algo),
yanyan's avatar
yanyan committed
149
150
151
152
153
154
155
156
157
158
159
160
            spconv.SubMConv3d(128,
                              160,
                              3,
                              bias=False,
                              indice_key="c3",
                              algo=algo),
            spconv.SubMConv3d(160,
                              160,
                              3,
                              bias=False,
                              indice_key="c3",
                              algo=algo),
yanyan's avatar
yanyan committed
161
162
            # nn.BatchNorm1d(128),
            # nn.ReLU(),
yan.yan's avatar
v2.1  
yan.yan committed
163
164
            # spconv.SparseConv3d(160, 160, 2, 2, bias=False, indice_key="m3"),
            spconv.SparseMaxPool3d(2, 2, algo=pool_algo),
yanyan's avatar
yanyan committed
165
166
167
168
169
170
171
172
173
174
175
176
            spconv.SubMConv3d(160,
                              192,
                              3,
                              bias=False,
                              indice_key="c4",
                              algo=algo),
            spconv.SubMConv3d(192,
                              192,
                              3,
                              bias=False,
                              indice_key="c4",
                              algo=algo),
yanyan's avatar
yanyan committed
177
178
            # nn.BatchNorm1d(128),
            # nn.ReLU(),
yan.yan's avatar
v2.1  
yan.yan committed
179
180
            spconv.SparseMaxPool3d(2, 2, indice_key="m4", algo=pool_algo),
            # spconv.SparseConv3d(192, 192, 2, 2, bias=False, indice_key="m4"),
yanyan's avatar
yanyan committed
181
182
183
184
185
186
187
188
189
190
191
192
            spconv.SubMConv3d(192,
                              224,
                              3,
                              bias=False,
                              indice_key="c5",
                              algo=algo),
            spconv.SubMConv3d(224,
                              224,
                              3,
                              bias=False,
                              indice_key="c5",
                              algo=algo),
yan.yan's avatar
v2.1  
yan.yan committed
193
194
195
196
            # nn.BatchNorm1d(224),
            # nn.ReLU(),
            # spconv.SparseConv3d(224, 224, 2, 2, bias=False, indice_key="m5"),
            spconv.SparseMaxPool3d(2, 2, indice_key="m5", algo=pool_algo),
yanyan's avatar
yanyan committed
197
198
199
200
201
202
203
204
205
206
207
208
            spconv.SubMConv3d(224,
                              256,
                              3,
                              bias=False,
                              indice_key="c6",
                              algo=algo),
            spconv.SubMConv3d(256,
                              256,
                              3,
                              bias=False,
                              indice_key="c6",
                              algo=algo),
209

yan.yan's avatar
v2.1  
yan.yan committed
210
211
            # nn.BatchNorm1d(256),
            # nn.ReLU(),
212

yan.yan's avatar
v2.1  
yan.yan committed
213
            # spconv.SparseInverseConv3d(256, 128, 2, indice_key="m5", bias=False, algo=algo),
yan.yan's avatar
yan.yan committed
214
215
            # # # nn.BatchNorm1d(128),
            # # # nn.ReLU(),
216

yan.yan's avatar
v2.1  
yan.yan committed
217
            # spconv.SparseInverseConv3d(128, 64, 2, indice_key="m4", bias=False, algo=algo),
yanyan's avatar
yanyan committed
218
219
        )
        max_batch_size = 1
yanyan's avatar
yanyan committed
220
        # grid (dense map) is used for indice generation. use pre-allocated grid can run faster.
yanyan's avatar
yanyan committed
221
222
        self.grid = torch.full([max_batch_size, *shape], -1,
                               dtype=torch.int32).cuda()
yanyan's avatar
yanyan committed
223
224
225
        # self.grid = None
        self.shape = shape

226
227
228
229
230
231
232
    def forward(self, features, coors, batch_size, enable_timer: bool = False):
        x = spconv.SparseConvTensor(features,
                                    coors,
                                    self.shape,
                                    batch_size,
                                    self.grid,
                                    enable_timer=enable_timer)
yanyan's avatar
yanyan committed
233
234
        return self.net(x)

235

yan.yan's avatar
yan.yan committed
236
237
238
239
class Net2(nn.Module):
    def __init__(self, shape, algo):
        super().__init__()
        self.net = spconv.SparseSequential(
240
241
242
243
244
            spconv.SubMConv3d(3,
                              128,
                              3,
                              bias=False,
                              indice_key="c0",
yan.yan's avatar
yan.yan committed
245
246
247
248
249
250
251
252
253
254
255
256
257
                              algo=algo),
            # spconv.SubMConv3d(32,
            #                   32,
            #                   3,
            #                   bias=False,
            #                   indice_key="c0",
            #                   algo=algo),
            # # nn.BatchNorm1d(32),
            # # nn.ReLU(),
            # # spconv.SparseConv3d(64, 64, 2, 2, bias=False,
            # #                   algo=algo),
            # spconv.SubMConv3d(32, 64, 3, bias=False, indice_key="c0",
            #                   algo=algo),
258
259
            spconv.SubMConv3d(128,
                              128,
yan.yan's avatar
yan.yan committed
260
261
262
263
264
265
                              3,
                              bias=False,
                              indice_key="c0",
                              algo=algo),
            # nn.BatchNorm1d(32),
            # nn.ReLU(),
266
267
268
269
270
271
272
273
274
275
276
277
278
            # spconv.SparseMaxPool3d(2, 2),
            # spconv.SubMConv3d(256,
            #                   512,
            #                   3,
            #                   bias=False,
            #                   indice_key="c1",
            #                   algo=algo),
            # spconv.SubMConv3d(512,
            #                   512,
            #                   3,
            #                   bias=False,
            #                   indice_key="c1",
            #                   algo=algo),
yan.yan's avatar
yan.yan committed
279
280
281
282
283
284
285
286
287
288
289
290
291
        )
        max_batch_size = 1
        # grid (dense map) is used for indice generation. use pre-allocated grid can run faster.
        self.grid = torch.full([max_batch_size, *shape], -1,
                               dtype=torch.int32).cuda()
        # self.grid = None
        self.shape = shape

    def forward(self, features, coors, batch_size):
        x = spconv.SparseConvTensor(features, coors, self.shape, batch_size,
                                    self.grid)
        return self.net(x)

292
293
294

import numpy as np
from cumm import tensorview as tv
yan.yan's avatar
v2.1  
yan.yan committed
295
from spconv.core_cc.csrc.sparse.all import SpconvOps
296
import pickle
yan.yan's avatar
v2.1  
yan.yan committed
297
298
import torch

299
300
from spconv.pytorch.cppcore import torch_tensor_to_tv

yan.yan's avatar
v2.1  
yan.yan committed
301
302
303
304
305

def sort_bench():
    with open("/home/yy/asd.pkl", "rb") as f:
        a_th = pickle.load(f)
    mask_argsort = torch.empty((1, a_th.shape[1]),
306
307
                               dtype=torch.int32,
                               device=a_th.device)
yan.yan's avatar
v2.1  
yan.yan committed
308
309
310
311
312
313
314

    a = a_th.cpu().numpy()[0]
    a_tv = torch_tensor_to_tv(a_th)
    mask_argsort_tv = torch_tensor_to_tv(mask_argsort)
    for i in range(10):
        a_tv_1 = a_tv.clone()
        SpconvOps.sort_1d_by_key(a_tv_1[0], mask_argsort_tv[0])
yan.yan's avatar
yan.yan committed
315
import json
316

yanyan's avatar
yanyan committed
317
def main():
318
    import pickle
yan.yan's avatar
yan.yan committed
319
320
    np.random.seed(50051)
    torch.manual_seed(50051)
yan.yan's avatar
yan.yan committed
321
    # voxels, coors, spatial_shape = waymo_data(num_features=128)
yan.yan's avatar
yan.yan committed
322
323
    # with open("/home/yy/test_spconv.pkl", "wb") as f:
    #     pickle.dump((voxels, coors, spatial_shape), f)
yan.yan's avatar
bug fix  
yan.yan committed
324
    with open(Path(__file__).parent / "data" / "test_spconv.pkl", "rb") as f:
yan.yan's avatar
yan.yan committed
325
        (voxels, coors, spatial_shape) = pickle.load(f)
yan.yan's avatar
yan.yan committed
326
327
    # voxels, coors, spatial_shape = waymo_data_large()

yan.yan's avatar
yan.yan committed
328
329
330
331
    print(spatial_shape)
    print(voxels.shape)
    # voxels = voxels[:100]
    # coors = coors[:100]
yan.yan's avatar
v2.1  
yan.yan committed
332
333
334
335
    dtype = torch.float16
    device = torch.device("cuda:0")
    voxels_th = torch.from_numpy(voxels).to(device).to(dtype)
    coors_th = torch.from_numpy(coors).to(device).int()
yan.yan's avatar
yan.yan committed
336
    voxels_th.requires_grad = True
yan.yan's avatar
yan.yan committed
337
338
    algo = spconv.ConvAlgo.MaskImplicitGemm
    print("ALGO")
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
    # 3080 Laptop
    # MaskImpGemm: 11.2ms
    # MaskSplitImpGemm: 12.2ms
    # Native: 13.7ms
    # F32
    # MaskSplitImpGemm: 22ms
    # MaskImplicitGemm: 23.5ms
    # Native: 21.7ms
    # Pure Gemm
    # Native: 6.6ms
    # MaskImpGemm: 4.3ms
    # MaskSplitImpGemm: 4.0ms
    # F16 Bwd
    # MaskSplitImpGemm: 12.2ms
    # MaskImpGemm: 13.8ms
    # Native: 25.2ms

    # F32 Bwd
    # Native: 41.9ms
    # MaskImpGemm: 51.0ms
    # MaskSplitImpGemm: 41.1ms
    # algo = None
yan.yan's avatar
yan.yan committed
361
    net = Net(spatial_shape, algo).to(device).eval().to(dtype)# .train()
yan.yan's avatar
yan.yan committed
362
    # net.load_state_dict(net.state_dict())
363
    spconv.assign_name_for_sparse_modules(net)
yanyan's avatar
yanyan committed
364
365
366
    print(coors_th.shape)
    out = net(voxels_th, coors_th, 1)
    print(out.spatial_shape)
367
368
    print(voxels.mean(), voxels.max(), voxels.min())
    dout = np.random.uniform(-0.2, 0.2, out.features.shape).astype(np.float32)
yan.yan's avatar
v2.1  
yan.yan committed
369
    dout_t = torch.from_numpy(dout).to(device).to(dtype)
yan.yan's avatar
yan.yan committed
370

yan.yan's avatar
yan.yan committed
371
    print(out.spatial_shape, out.features.sum(1).mean(), out.features.max(),
372
          out.features.min())
yan.yan's avatar
yan.yan committed
373
    times = []
yan.yan's avatar
yan.yan committed
374
    show_metrics = False
yan.yan's avatar
yan.yan committed
375
    with torch.no_grad():
yan.yan's avatar
yan.yan committed
376
377
378
379
        for i in range(100):
            # print("------------")
            with tv.measure_duration() as measure:
                out_nograd = net(voxels_th, coors_th, 1, show_metrics)
yan.yan's avatar
yan.yan committed
380
381
            # res = timer.collect_by_name("forward", timer.get_all_pair_time())
            # res2 = timer.collect_by_name("forward0", timer.get_all_pair_time())
382

yan.yan's avatar
yan.yan committed
383
384
            # print(sum(res.values()) + sum(res2.values()))
            # print(timer.get_all_pair_time())
385

yan.yan's avatar
yan.yan committed
386
387
            # print(sum(timer.get_all_pair_time().values()))
            # sort_bench()
yan.yan's avatar
yan.yan committed
388
            times.append(measure.duration)
yan.yan's avatar
yan.yan committed
389
390
391
392
            if show_metrics:
                timer = out_nograd._timer
                items = list(timer.get_all_pair_time().items())
                items.sort(key=lambda x: x[0])
yan.yan's avatar
yan.yan committed
393
                print("SUM TIME:",  sum([x[1] for x in items]))
yan.yan's avatar
yan.yan committed
394
                print(json.dumps(dict(items), indent=2))
yan.yan's avatar
yan.yan committed
395
396
397
398
399
                inds_sum = 0
                for k, v in items:
                    if "gen_pairs" in k:
                        inds_sum += v 
                print("SUM GEN INDS:",  inds_sum)
yan.yan's avatar
yan.yan committed
400

Yan Yan's avatar
Yan Yan committed
401
402
403
404
    # state = net.state_dict()
    # state.pop("net.2.max_num_voxels_during_training")
    # net.load_state_dict(state)
    # breakpoint()
yan.yan's avatar
yan.yan committed
405
    print("spconv time", np.mean(times[10:]))
Yan Yan's avatar
Yan Yan committed
406
407
408
409
410
411
412
413
414
415
416
417
418
419
    # times = []

    # for i in range(10):
    #     out = net(voxels_th, coors_th, 1)
    #     print("------------")
    #     torch.cuda.synchronize()
    #     t = time.time()
    #     out.features.backward(dout_t)
    #     torch.cuda.synchronize()
    #     times.append(time.time() - t)

    # # # print((net.grid == -1).float().sum(), net.grid.numel())
    # # # print("spconv time", time.time() - t)
    # print("spconv bw time", np.mean(times[5:]))
yanyan's avatar
yanyan committed
420

yanyan's avatar
yanyan committed
421

yanyan's avatar
yanyan committed
422
if __name__ == "__main__":
yanyan's avatar
yanyan committed
423
    main()