benchmark.py 17.1 KB
Newer Older
yan.yan's avatar
yan.yan committed
1
# Copyright 2021 Yan Yan
2
#
yan.yan's avatar
yan.yan committed
3
4
5
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
6
#
yan.yan's avatar
yan.yan committed
7
#     http://www.apache.org/licenses/LICENSE-2.0
8
#
yan.yan's avatar
yan.yan committed
9
10
11
12
13
14
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

yanyan's avatar
yanyan committed
15
16
17
18
19
import time
from pathlib import Path

import numpy as np
import torch
yanyan's avatar
yanyan committed
20
from torch import nn
yan.yan's avatar
v2.1  
yan.yan committed
21
from cumm import tensorview as tv
22
from spconv.core import ConvAlgo
yanyan's avatar
yanyan committed
23

yan.yan's avatar
yan.yan committed
24
25
import spconv.pytorch as spconv
from spconv.utils import Point2VoxelCPU3d
26

yan.yan's avatar
yan.yan committed
27
# torch.backends.cudnn.enabled = False
yan.yan's avatar
yan.yan committed
28
def waymo_data(batch_size=1, num_features=-1):
yan.yan's avatar
yan.yan committed
29
30
31
32
    gen = Point2VoxelCPU3d([0.1, 0.1, 0.1], [-80, -80, -2, 80, 80, 6], 3,
                           150000, 1)
    # gen = VoxelGeneratorV2([0.1, 0.1, 0.1], [-80, -80, -2, 80, 80, 6], 1,
    #                        150000)
yanyan's avatar
yanyan committed
33
    data = np.load(Path(__file__).parent / "data" / "benchmark-pc.npz")
yan.yan's avatar
yan.yan committed
34
35
36
    pc = np.ascontiguousarray(data["pc"])
    voxels_tv, indices_tv, _ = gen.point_to_voxel(tv.from_numpy(pc))
    voxels = voxels_tv.numpy().reshape(-1, 3)
yan.yan's avatar
yan.yan committed
37

yan.yan's avatar
yan.yan committed
38
39
    if num_features > 0:
        voxels = np.zeros((voxels.shape[0], num_features), dtype=voxels.dtype)
yan.yan's avatar
yan.yan committed
40
    coors = indices_tv.numpy()
yanyan's avatar
yanyan committed
41
42
43
44
    N = coors.shape[0]
    coors = np.concatenate([np.full([N, 1], 0, coors.dtype), coors], axis=1)
    return voxels, coors, gen.grid_size

yan.yan's avatar
yan.yan committed
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
def waymo_data_large(batch_size=1):
    gen = Point2VoxelCPU3d([0.1, 0.1, 0.1], [-80, -80, -2, 80, 80, 6], 3,
                           1200000, 1)
    # gen = VoxelGeneratorV2([0.1, 0.1, 0.1], [-80, -80, -2, 80, 80, 6], 1,
    #                        150000)
    data = np.load(Path(__file__).parent / "data" / "benchmark-pc.npz")
    pc = np.ascontiguousarray(data["pc"])
    pc2 = pc.copy()
    pc2[:, 1] += 1
    pc3 = pc.copy()
    pc3[:, 1] += 2
    pc4 = pc.copy()
    pc4[:, 1] += 3
    pc5 = pc.copy()
    pc5[:, 1] += 4
yan.yan's avatar
yan.yan committed
60
61
62
63
64
65
66
67
    pc6 = pc.copy()
    pc6[:, 1] += 5
    pc7 = pc.copy()
    pc7[:, 1] += 6
    pc8 = pc.copy()
    pc8[:, 1] += 7

    pc = np.concatenate([pc, pc2, pc3, pc4, pc5, pc6, pc7, pc8])
yan.yan's avatar
yan.yan committed
68
69
70
71
72
73
74
75
76
    print(pc.shape)
    voxels_tv, indices_tv, _ = gen.point_to_voxel(tv.from_numpy(pc))
    voxels = voxels_tv.numpy().reshape(-1, 3)
    coors = indices_tv.numpy()
    N = coors.shape[0]
    print("num voxels", N)
    coors = np.concatenate([np.full([N, 1], 0, coors.dtype), coors], axis=1)
    return voxels, coors, gen.grid_size

yanyan's avatar
yanyan committed
77

yanyan's avatar
yanyan committed
78
class Net(nn.Module):
yanyan's avatar
yanyan committed
79
    def __init__(self, shape, algo):
yanyan's avatar
yanyan committed
80
        super().__init__()
81
        pool_algo = algo
yan.yan's avatar
v2.1  
yan.yan committed
82
        # pool_algo = ConvAlgo.Native
yanyan's avatar
yanyan committed
83
        self.net = spconv.SparseSequential(
yanyan's avatar
yanyan committed
84
85
            spconv.SubMConv3d(3, 64, 3, bias=False, indice_key="c0",
                              algo=algo),
yan.yan's avatar
yan.yan committed
86
87
88
89
90
91
92
93
94
95
96
97
            # spconv.SubMConv3d(32,
            #                   32,
            #                   3,
            #                   bias=False,
            #                   indice_key="c0",
            #                   algo=algo),
            # # nn.BatchNorm1d(32),
            # # nn.ReLU(),
            # # spconv.SparseConv3d(64, 64, 2, 2, bias=False,
            # #                   algo=algo),
            # spconv.SubMConv3d(32, 64, 3, bias=False, indice_key="c0",
            #                   algo=algo),
yan.yan's avatar
yan.yan committed
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112

            # spconv.SubMConv3d(64, 64, 3, bias=False, indice_key="c0",
            #                   algo=algo),
            # spconv.SubMConv3d(32,
            #                   32,
            #                   3,
            #                   bias=False,
            #                   indice_key="c0",
            #                   algo=algo),
            # # nn.BatchNorm1d(32),
            # # nn.ReLU(),
            # # spconv.SparseConv3d(64, 64, 2, 2, bias=False,
            # #                   algo=algo),
            # spconv.SubMConv3d(32, 64, 3, bias=False, indice_key="c0",
            #                   algo=algo),
yanyan's avatar
yanyan committed
113
114
115
116
117
118
            spconv.SubMConv3d(64,
                              64,
                              3,
                              bias=False,
                              indice_key="c0",
                              algo=algo),
119
120
            # nn.BatchNorm1d(32),
            # nn.ReLU(),
yan.yan's avatar
v2.1  
yan.yan committed
121
            # spconv.SparseConv3d(64, 64, 2, 2, bias=False, indice_key="m0"),
yan.yan's avatar
yan.yan committed
122
            spconv.SparseMaxPool3d(2, 2, algo=pool_algo),
yanyan's avatar
yanyan committed
123
124
125
126
127
128
129
130
131
132
133
134
            spconv.SubMConv3d(64,
                              96,
                              3,
                              bias=False,
                              indice_key="c1",
                              algo=algo),
            spconv.SubMConv3d(96,
                              96,
                              3,
                              bias=False,
                              indice_key="c1",
                              algo=algo),
yanyan's avatar
yanyan committed
135
136
            # nn.BatchNorm1d(64),
            # nn.ReLU(),
yan.yan's avatar
v2.1  
yan.yan committed
137
138
            # spconv.SparseConv3d(96, 96, 2, 2, bias=False, indice_key="m1"),
            spconv.SparseMaxPool3d(2, 2, algo=pool_algo),
yanyan's avatar
yanyan committed
139
140
141
142
143
144
145
146
147
148
149
150
            spconv.SubMConv3d(96,
                              128,
                              3,
                              bias=False,
                              indice_key="c2",
                              algo=algo),
            spconv.SubMConv3d(128,
                              128,
                              3,
                              bias=False,
                              indice_key="c2",
                              algo=algo),
yanyan's avatar
yanyan committed
151
152
            # nn.BatchNorm1d(128),
            # nn.ReLU(),
yan.yan's avatar
v2.1  
yan.yan committed
153
154
            # spconv.SparseConv3d(128, 128, 2, 2, bias=False, indice_key="m2"),
            spconv.SparseMaxPool3d(2, 2, algo=pool_algo),
yanyan's avatar
yanyan committed
155
156
157
158
159
160
161
162
163
164
165
166
            spconv.SubMConv3d(128,
                              160,
                              3,
                              bias=False,
                              indice_key="c3",
                              algo=algo),
            spconv.SubMConv3d(160,
                              160,
                              3,
                              bias=False,
                              indice_key="c3",
                              algo=algo),
yanyan's avatar
yanyan committed
167
168
            # nn.BatchNorm1d(128),
            # nn.ReLU(),
yan.yan's avatar
v2.1  
yan.yan committed
169
170
            # spconv.SparseConv3d(160, 160, 2, 2, bias=False, indice_key="m3"),
            spconv.SparseMaxPool3d(2, 2, algo=pool_algo),
yanyan's avatar
yanyan committed
171
172
173
174
175
176
177
178
179
180
181
182
            spconv.SubMConv3d(160,
                              192,
                              3,
                              bias=False,
                              indice_key="c4",
                              algo=algo),
            spconv.SubMConv3d(192,
                              192,
                              3,
                              bias=False,
                              indice_key="c4",
                              algo=algo),
yanyan's avatar
yanyan committed
183
184
            # nn.BatchNorm1d(128),
            # nn.ReLU(),
yan.yan's avatar
v2.1  
yan.yan committed
185
186
            spconv.SparseMaxPool3d(2, 2, indice_key="m4", algo=pool_algo),
            # spconv.SparseConv3d(192, 192, 2, 2, bias=False, indice_key="m4"),
yanyan's avatar
yanyan committed
187
188
189
190
191
192
193
194
195
196
197
198
            spconv.SubMConv3d(192,
                              224,
                              3,
                              bias=False,
                              indice_key="c5",
                              algo=algo),
            spconv.SubMConv3d(224,
                              224,
                              3,
                              bias=False,
                              indice_key="c5",
                              algo=algo),
yan.yan's avatar
v2.1  
yan.yan committed
199
200
201
202
            # nn.BatchNorm1d(224),
            # nn.ReLU(),
            # spconv.SparseConv3d(224, 224, 2, 2, bias=False, indice_key="m5"),
            spconv.SparseMaxPool3d(2, 2, indice_key="m5", algo=pool_algo),
yanyan's avatar
yanyan committed
203
204
205
206
207
208
209
210
211
212
213
214
            spconv.SubMConv3d(224,
                              256,
                              3,
                              bias=False,
                              indice_key="c6",
                              algo=algo),
            spconv.SubMConv3d(256,
                              256,
                              3,
                              bias=False,
                              indice_key="c6",
                              algo=algo),
215

yan.yan's avatar
v2.1  
yan.yan committed
216
217
            # nn.BatchNorm1d(256),
            # nn.ReLU(),
218

yan.yan's avatar
v2.1  
yan.yan committed
219
            # spconv.SparseInverseConv3d(256, 128, 2, indice_key="m5", bias=False, algo=algo),
yan.yan's avatar
yan.yan committed
220
221
            # # # nn.BatchNorm1d(128),
            # # # nn.ReLU(),
222

yan.yan's avatar
v2.1  
yan.yan committed
223
            # spconv.SparseInverseConv3d(128, 64, 2, indice_key="m4", bias=False, algo=algo),
yanyan's avatar
yanyan committed
224
225
        )
        max_batch_size = 1
yanyan's avatar
yanyan committed
226
        # grid (dense map) is used for indice generation. use pre-allocated grid can run faster.
yanyan's avatar
yanyan committed
227
228
        self.grid = torch.full([max_batch_size, *shape], -1,
                               dtype=torch.int32).cuda()
yanyan's avatar
yanyan committed
229
230
231
        # self.grid = None
        self.shape = shape

232
233
234
235
236
237
238
    def forward(self, features, coors, batch_size, enable_timer: bool = False):
        x = spconv.SparseConvTensor(features,
                                    coors,
                                    self.shape,
                                    batch_size,
                                    self.grid,
                                    enable_timer=enable_timer)
yanyan's avatar
yanyan committed
239
240
        return self.net(x)

241

yan.yan's avatar
yan.yan committed
242
243
244
245
class Net2(nn.Module):
    def __init__(self, shape, algo):
        super().__init__()
        self.net = spconv.SparseSequential(
246
247
248
249
250
            spconv.SubMConv3d(3,
                              128,
                              3,
                              bias=False,
                              indice_key="c0",
yan.yan's avatar
yan.yan committed
251
252
253
254
255
256
257
258
259
260
261
262
263
                              algo=algo),
            # spconv.SubMConv3d(32,
            #                   32,
            #                   3,
            #                   bias=False,
            #                   indice_key="c0",
            #                   algo=algo),
            # # nn.BatchNorm1d(32),
            # # nn.ReLU(),
            # # spconv.SparseConv3d(64, 64, 2, 2, bias=False,
            # #                   algo=algo),
            # spconv.SubMConv3d(32, 64, 3, bias=False, indice_key="c0",
            #                   algo=algo),
264
265
            spconv.SubMConv3d(128,
                              128,
yan.yan's avatar
yan.yan committed
266
267
268
269
270
271
                              3,
                              bias=False,
                              indice_key="c0",
                              algo=algo),
            # nn.BatchNorm1d(32),
            # nn.ReLU(),
272
273
274
275
276
277
278
279
280
281
282
283
284
            # spconv.SparseMaxPool3d(2, 2),
            # spconv.SubMConv3d(256,
            #                   512,
            #                   3,
            #                   bias=False,
            #                   indice_key="c1",
            #                   algo=algo),
            # spconv.SubMConv3d(512,
            #                   512,
            #                   3,
            #                   bias=False,
            #                   indice_key="c1",
            #                   algo=algo),
yan.yan's avatar
yan.yan committed
285
286
287
288
289
290
291
292
293
294
295
296
297
        )
        max_batch_size = 1
        # grid (dense map) is used for indice generation. use pre-allocated grid can run faster.
        self.grid = torch.full([max_batch_size, *shape], -1,
                               dtype=torch.int32).cuda()
        # self.grid = None
        self.shape = shape

    def forward(self, features, coors, batch_size):
        x = spconv.SparseConvTensor(features, coors, self.shape, batch_size,
                                    self.grid)
        return self.net(x)

298

EvernightAurora's avatar
EvernightAurora committed
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341

class NetSm(nn.Module):
    def __init__(self, shape, algo):
        super().__init__()
        self.net = spconv.SparseSequential(
            spconv.SubMConv3d(3,
                              8,
                              3,
                              bias=False,
                              indice_key="c0",
                              algo=algo),
            spconv.SubMConv3d(8,
                              16,
                              3,
                              bias=False,
                              indice_key="c0",
                              algo=algo),
            spconv.SubMConv3d(16,
                              32,
                              3,
                              bias=False,
                              indice_key="c0",
                              algo=algo),
            spconv.SubMConv3d(32,
                              64,
                              3,
                              bias=False,
                              indice_key="c0",
                              algo=algo),
            
        )
        max_batch_size = 1
        # grid (dense map) is used for indice generation. use pre-allocated grid can run faster.
        self.grid = torch.full([max_batch_size, *shape], -1,
                               dtype=torch.int32).cuda()
        # self.grid = None
        self.shape = shape

    def forward(self, features, coors, batch_size, enable_timer: bool = False):
        x = spconv.SparseConvTensor(features, coors, self.shape, batch_size,
                                    self.grid, enable_timer=enable_timer)
        return self.net(x)

342
343
import numpy as np
from cumm import tensorview as tv
yan.yan's avatar
v2.1  
yan.yan committed
344
from spconv.core_cc.csrc.sparse.all import SpconvOps
345
import pickle
yan.yan's avatar
v2.1  
yan.yan committed
346
347
import torch

348
349
from spconv.pytorch.cppcore import torch_tensor_to_tv

yan.yan's avatar
v2.1  
yan.yan committed
350
351
352
353
354

def sort_bench():
    with open("/home/yy/asd.pkl", "rb") as f:
        a_th = pickle.load(f)
    mask_argsort = torch.empty((1, a_th.shape[1]),
355
356
                               dtype=torch.int32,
                               device=a_th.device)
yan.yan's avatar
v2.1  
yan.yan committed
357
358
359
360
361
362
363

    a = a_th.cpu().numpy()[0]
    a_tv = torch_tensor_to_tv(a_th)
    mask_argsort_tv = torch_tensor_to_tv(mask_argsort)
    for i in range(10):
        a_tv_1 = a_tv.clone()
        SpconvOps.sort_1d_by_key(a_tv_1[0], mask_argsort_tv[0])
yan.yan's avatar
yan.yan committed
364
import json
365

yanyan's avatar
yanyan committed
366
def main():
367
    import pickle
yan.yan's avatar
yan.yan committed
368

yan.yan's avatar
yan.yan committed
369
370
    np.random.seed(50051)
    torch.manual_seed(50051)
yan.yan's avatar
yan.yan committed
371
    # voxels, coors, spatial_shape = waymo_data(num_features=128)
yan.yan's avatar
yan.yan committed
372
373
    # with open("/home/yy/test_spconv.pkl", "wb") as f:
    #     pickle.dump((voxels, coors, spatial_shape), f)
yan.yan's avatar
bug fix  
yan.yan committed
374
    with open(Path(__file__).parent / "data" / "test_spconv.pkl", "rb") as f:
yan.yan's avatar
yan.yan committed
375
        (voxels, coors, spatial_shape) = pickle.load(f)
yan.yan's avatar
yan.yan committed
376
377
    # voxels, coors, spatial_shape = waymo_data_large()

yan.yan's avatar
yan.yan committed
378
379
380
381
    print(spatial_shape)
    print(voxels.shape)
    # voxels = voxels[:100]
    # coors = coors[:100]
yan.yan's avatar
v2.1  
yan.yan committed
382
383
384
385
    dtype = torch.float16
    device = torch.device("cuda:0")
    voxels_th = torch.from_numpy(voxels).to(device).to(dtype)
    coors_th = torch.from_numpy(coors).to(device).int()
yan.yan's avatar
yan.yan committed
386
    voxels_th.requires_grad = True
yan.yan's avatar
yan.yan committed
387
388
    algo = spconv.ConvAlgo.MaskImplicitGemm
    print("ALGO")
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
    # 3080 Laptop
    # MaskImpGemm: 11.2ms
    # MaskSplitImpGemm: 12.2ms
    # Native: 13.7ms
    # F32
    # MaskSplitImpGemm: 22ms
    # MaskImplicitGemm: 23.5ms
    # Native: 21.7ms
    # Pure Gemm
    # Native: 6.6ms
    # MaskImpGemm: 4.3ms
    # MaskSplitImpGemm: 4.0ms
    # F16 Bwd
    # MaskSplitImpGemm: 12.2ms
    # MaskImpGemm: 13.8ms
    # Native: 25.2ms

    # F32 Bwd
    # Native: 41.9ms
    # MaskImpGemm: 51.0ms
    # MaskSplitImpGemm: 41.1ms
    # algo = None
yan.yan's avatar
yan.yan committed
411
    net = Net(spatial_shape, algo).to(device).eval().to(dtype)# .train()
yan.yan's avatar
yan.yan committed
412
    # net.load_state_dict(net.state_dict())
413
    spconv.assign_name_for_sparse_modules(net)
yanyan's avatar
yanyan committed
414
415
416
    print(coors_th.shape)
    out = net(voxels_th, coors_th, 1)
    print(out.spatial_shape)
417
418
    print(voxels.mean(), voxels.max(), voxels.min())
    dout = np.random.uniform(-0.2, 0.2, out.features.shape).astype(np.float32)
yan.yan's avatar
v2.1  
yan.yan committed
419
    dout_t = torch.from_numpy(dout).to(device).to(dtype)
yan.yan's avatar
yan.yan committed
420

yan.yan's avatar
yan.yan committed
421
    print(out.spatial_shape, out.features.sum(1).mean(), out.features.max(),
422
          out.features.min())
yan.yan's avatar
yan.yan committed
423
    times = []
yan.yan's avatar
yan.yan committed
424
    show_metrics = False
yan.yan's avatar
yan.yan committed
425
    with torch.no_grad():
yan.yan's avatar
yan.yan committed
426
427
428
429
430
        for i in range(100):
            # print("------------")
            with tv.measure_duration() as measure:
                out_nograd = net(voxels_th, coors_th, 1, show_metrics)
            times.append(measure.duration)
yan.yan's avatar
yan.yan committed
431
432
433
434
            if show_metrics:
                timer = out_nograd._timer
                items = list(timer.get_all_pair_time().items())
                items.sort(key=lambda x: x[0])
yan.yan's avatar
yan.yan committed
435
                print("SUM TIME:",  sum([x[1] for x in items]))
yan.yan's avatar
yan.yan committed
436
                # print(json.dumps(dict(items), indent=2))
yan.yan's avatar
yan.yan committed
437
                inds_sum = 0
yan.yan's avatar
yan.yan committed
438
                gemm_sum = 0
yan.yan's avatar
yan.yan committed
439
440
441
                for k, v in items:
                    if "gen_pairs" in k:
                        inds_sum += v 
yan.yan's avatar
yan.yan committed
442
443
444
445
446
                for k, v in items:
                    if "gemm" in k:
                        gemm_sum += v 

                print("SUM GEN INDS:",  inds_sum, "GEMM:", gemm_sum)
yan.yan's avatar
yan.yan committed
447

Yan Yan's avatar
Yan Yan committed
448
449
450
451
    # state = net.state_dict()
    # state.pop("net.2.max_num_voxels_during_training")
    # net.load_state_dict(state)
    # breakpoint()
yan.yan's avatar
yan.yan committed
452
    print("spconv time", np.mean(times[10:]))
Yan Yan's avatar
Yan Yan committed
453
454
455
456
457
458
459
460
461
462
463
464
465
466
    # times = []

    # for i in range(10):
    #     out = net(voxels_th, coors_th, 1)
    #     print("------------")
    #     torch.cuda.synchronize()
    #     t = time.time()
    #     out.features.backward(dout_t)
    #     torch.cuda.synchronize()
    #     times.append(time.time() - t)

    # # # print((net.grid == -1).float().sum(), net.grid.numel())
    # # # print("spconv time", time.time() - t)
    # print("spconv bw time", np.mean(times[5:]))
yanyan's avatar
yanyan committed
467

yanyan's avatar
yanyan committed
468

yanyan's avatar
yanyan committed
469
if __name__ == "__main__":
yanyan's avatar
yanyan committed
470
    main()