benchmark.py 16.8 KB
Newer Older
yan.yan's avatar
yan.yan committed
1
# Copyright 2021 Yan Yan
2
#
yan.yan's avatar
yan.yan committed
3
4
5
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
6
#
yan.yan's avatar
yan.yan committed
7
#     http://www.apache.org/licenses/LICENSE-2.0
8
#
yan.yan's avatar
yan.yan committed
9
10
11
12
13
14
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

yanyan's avatar
yanyan committed
15
16
17
18
19
import time
from pathlib import Path

import numpy as np
import torch
yanyan's avatar
yanyan committed
20
from torch import nn
yan.yan's avatar
v2.1  
yan.yan committed
21
from cumm import tensorview as tv
22
from spconv.core import ConvAlgo
yanyan's avatar
yanyan committed
23

yan.yan's avatar
yan.yan committed
24
25
import spconv.pytorch as spconv
from spconv.utils import Point2VoxelCPU3d
26

yan.yan's avatar
yan.yan committed
27
# torch.backends.cudnn.enabled = False
yan.yan's avatar
yan.yan committed
28
def waymo_data(batch_size=1, num_features=-1):
yan.yan's avatar
yan.yan committed
29
30
31
32
    gen = Point2VoxelCPU3d([0.1, 0.1, 0.1], [-80, -80, -2, 80, 80, 6], 3,
                           150000, 1)
    # gen = VoxelGeneratorV2([0.1, 0.1, 0.1], [-80, -80, -2, 80, 80, 6], 1,
    #                        150000)
yanyan's avatar
yanyan committed
33
    data = np.load(Path(__file__).parent / "data" / "benchmark-pc.npz")
yan.yan's avatar
yan.yan committed
34
35
36
    pc = np.ascontiguousarray(data["pc"])
    voxels_tv, indices_tv, _ = gen.point_to_voxel(tv.from_numpy(pc))
    voxels = voxels_tv.numpy().reshape(-1, 3)
yan.yan's avatar
yan.yan committed
37

yan.yan's avatar
yan.yan committed
38
39
    if num_features > 0:
        voxels = np.zeros((voxels.shape[0], num_features), dtype=voxels.dtype)
yan.yan's avatar
yan.yan committed
40
    coors = indices_tv.numpy()
yanyan's avatar
yanyan committed
41
42
43
44
    N = coors.shape[0]
    coors = np.concatenate([np.full([N, 1], 0, coors.dtype), coors], axis=1)
    return voxels, coors, gen.grid_size

yan.yan's avatar
yan.yan committed
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
def waymo_data_large(batch_size=1):
    gen = Point2VoxelCPU3d([0.1, 0.1, 0.1], [-80, -80, -2, 80, 80, 6], 3,
                           1200000, 1)
    # gen = VoxelGeneratorV2([0.1, 0.1, 0.1], [-80, -80, -2, 80, 80, 6], 1,
    #                        150000)
    data = np.load(Path(__file__).parent / "data" / "benchmark-pc.npz")
    pc = np.ascontiguousarray(data["pc"])
    pc2 = pc.copy()
    pc2[:, 1] += 1
    pc3 = pc.copy()
    pc3[:, 1] += 2
    pc4 = pc.copy()
    pc4[:, 1] += 3
    pc5 = pc.copy()
    pc5[:, 1] += 4

    pc = np.concatenate([pc, pc2, pc3, pc4, pc5])
    print(pc.shape)
    voxels_tv, indices_tv, _ = gen.point_to_voxel(tv.from_numpy(pc))
    voxels = voxels_tv.numpy().reshape(-1, 3)
    coors = indices_tv.numpy()
    N = coors.shape[0]
    print("num voxels", N)
    coors = np.concatenate([np.full([N, 1], 0, coors.dtype), coors], axis=1)
    return voxels, coors, gen.grid_size

yanyan's avatar
yanyan committed
71

yanyan's avatar
yanyan committed
72
class Net(nn.Module):
yanyan's avatar
yanyan committed
73
    def __init__(self, shape, algo):
yanyan's avatar
yanyan committed
74
        super().__init__()
75
        pool_algo = algo
yan.yan's avatar
v2.1  
yan.yan committed
76
        # pool_algo = ConvAlgo.Native
yanyan's avatar
yanyan committed
77
        self.net = spconv.SparseSequential(
yanyan's avatar
yanyan committed
78
79
            spconv.SubMConv3d(3, 64, 3, bias=False, indice_key="c0",
                              algo=algo),
yan.yan's avatar
yan.yan committed
80
81
82
83
84
85
86
87
88
89
90
91
            # spconv.SubMConv3d(32,
            #                   32,
            #                   3,
            #                   bias=False,
            #                   indice_key="c0",
            #                   algo=algo),
            # # nn.BatchNorm1d(32),
            # # nn.ReLU(),
            # # spconv.SparseConv3d(64, 64, 2, 2, bias=False,
            # #                   algo=algo),
            # spconv.SubMConv3d(32, 64, 3, bias=False, indice_key="c0",
            #                   algo=algo),
yan.yan's avatar
yan.yan committed
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106

            # spconv.SubMConv3d(64, 64, 3, bias=False, indice_key="c0",
            #                   algo=algo),
            # spconv.SubMConv3d(32,
            #                   32,
            #                   3,
            #                   bias=False,
            #                   indice_key="c0",
            #                   algo=algo),
            # # nn.BatchNorm1d(32),
            # # nn.ReLU(),
            # # spconv.SparseConv3d(64, 64, 2, 2, bias=False,
            # #                   algo=algo),
            # spconv.SubMConv3d(32, 64, 3, bias=False, indice_key="c0",
            #                   algo=algo),
yanyan's avatar
yanyan committed
107
108
109
110
111
112
            spconv.SubMConv3d(64,
                              64,
                              3,
                              bias=False,
                              indice_key="c0",
                              algo=algo),
113
114
            # nn.BatchNorm1d(32),
            # nn.ReLU(),
yan.yan's avatar
v2.1  
yan.yan committed
115
            # spconv.SparseConv3d(64, 64, 2, 2, bias=False, indice_key="m0"),
yan.yan's avatar
yan.yan committed
116
            spconv.SparseMaxPool3d(2, 2, algo=pool_algo),
yanyan's avatar
yanyan committed
117
118
119
120
121
122
123
124
125
126
127
128
            spconv.SubMConv3d(64,
                              96,
                              3,
                              bias=False,
                              indice_key="c1",
                              algo=algo),
            spconv.SubMConv3d(96,
                              96,
                              3,
                              bias=False,
                              indice_key="c1",
                              algo=algo),
yanyan's avatar
yanyan committed
129
130
            # nn.BatchNorm1d(64),
            # nn.ReLU(),
yan.yan's avatar
v2.1  
yan.yan committed
131
132
            # spconv.SparseConv3d(96, 96, 2, 2, bias=False, indice_key="m1"),
            spconv.SparseMaxPool3d(2, 2, algo=pool_algo),
yanyan's avatar
yanyan committed
133
134
135
136
137
138
139
140
141
142
143
144
            spconv.SubMConv3d(96,
                              128,
                              3,
                              bias=False,
                              indice_key="c2",
                              algo=algo),
            spconv.SubMConv3d(128,
                              128,
                              3,
                              bias=False,
                              indice_key="c2",
                              algo=algo),
yanyan's avatar
yanyan committed
145
146
            # nn.BatchNorm1d(128),
            # nn.ReLU(),
yan.yan's avatar
v2.1  
yan.yan committed
147
148
            # spconv.SparseConv3d(128, 128, 2, 2, bias=False, indice_key="m2"),
            spconv.SparseMaxPool3d(2, 2, algo=pool_algo),
yanyan's avatar
yanyan committed
149
150
151
152
153
154
155
156
157
158
159
160
            spconv.SubMConv3d(128,
                              160,
                              3,
                              bias=False,
                              indice_key="c3",
                              algo=algo),
            spconv.SubMConv3d(160,
                              160,
                              3,
                              bias=False,
                              indice_key="c3",
                              algo=algo),
yanyan's avatar
yanyan committed
161
162
            # nn.BatchNorm1d(128),
            # nn.ReLU(),
yan.yan's avatar
v2.1  
yan.yan committed
163
164
            # spconv.SparseConv3d(160, 160, 2, 2, bias=False, indice_key="m3"),
            spconv.SparseMaxPool3d(2, 2, algo=pool_algo),
yanyan's avatar
yanyan committed
165
166
167
168
169
170
171
172
173
174
175
176
            spconv.SubMConv3d(160,
                              192,
                              3,
                              bias=False,
                              indice_key="c4",
                              algo=algo),
            spconv.SubMConv3d(192,
                              192,
                              3,
                              bias=False,
                              indice_key="c4",
                              algo=algo),
yanyan's avatar
yanyan committed
177
178
            # nn.BatchNorm1d(128),
            # nn.ReLU(),
yan.yan's avatar
v2.1  
yan.yan committed
179
180
            spconv.SparseMaxPool3d(2, 2, indice_key="m4", algo=pool_algo),
            # spconv.SparseConv3d(192, 192, 2, 2, bias=False, indice_key="m4"),
yanyan's avatar
yanyan committed
181
182
183
184
185
186
187
188
189
190
191
192
            spconv.SubMConv3d(192,
                              224,
                              3,
                              bias=False,
                              indice_key="c5",
                              algo=algo),
            spconv.SubMConv3d(224,
                              224,
                              3,
                              bias=False,
                              indice_key="c5",
                              algo=algo),
yan.yan's avatar
v2.1  
yan.yan committed
193
194
195
196
            # nn.BatchNorm1d(224),
            # nn.ReLU(),
            # spconv.SparseConv3d(224, 224, 2, 2, bias=False, indice_key="m5"),
            spconv.SparseMaxPool3d(2, 2, indice_key="m5", algo=pool_algo),
yanyan's avatar
yanyan committed
197
198
199
200
201
202
203
204
205
206
207
208
            spconv.SubMConv3d(224,
                              256,
                              3,
                              bias=False,
                              indice_key="c6",
                              algo=algo),
            spconv.SubMConv3d(256,
                              256,
                              3,
                              bias=False,
                              indice_key="c6",
                              algo=algo),
209

yan.yan's avatar
v2.1  
yan.yan committed
210
211
            # nn.BatchNorm1d(256),
            # nn.ReLU(),
212

yan.yan's avatar
v2.1  
yan.yan committed
213
            # spconv.SparseInverseConv3d(256, 128, 2, indice_key="m5", bias=False, algo=algo),
yan.yan's avatar
yan.yan committed
214
215
            # # # nn.BatchNorm1d(128),
            # # # nn.ReLU(),
216

yan.yan's avatar
v2.1  
yan.yan committed
217
            # spconv.SparseInverseConv3d(128, 64, 2, indice_key="m4", bias=False, algo=algo),
yanyan's avatar
yanyan committed
218
219
        )
        max_batch_size = 1
yanyan's avatar
yanyan committed
220
        # grid (dense map) is used for indice generation. use pre-allocated grid can run faster.
yanyan's avatar
yanyan committed
221
222
        self.grid = torch.full([max_batch_size, *shape], -1,
                               dtype=torch.int32).cuda()
yanyan's avatar
yanyan committed
223
224
225
        # self.grid = None
        self.shape = shape

226
227
228
229
230
231
232
    def forward(self, features, coors, batch_size, enable_timer: bool = False):
        x = spconv.SparseConvTensor(features,
                                    coors,
                                    self.shape,
                                    batch_size,
                                    self.grid,
                                    enable_timer=enable_timer)
yanyan's avatar
yanyan committed
233
234
        return self.net(x)

235

yan.yan's avatar
yan.yan committed
236
237
238
239
class Net2(nn.Module):
    def __init__(self, shape, algo):
        super().__init__()
        self.net = spconv.SparseSequential(
240
241
242
243
244
            spconv.SubMConv3d(3,
                              128,
                              3,
                              bias=False,
                              indice_key="c0",
yan.yan's avatar
yan.yan committed
245
246
247
248
249
250
251
252
253
254
255
256
257
                              algo=algo),
            # spconv.SubMConv3d(32,
            #                   32,
            #                   3,
            #                   bias=False,
            #                   indice_key="c0",
            #                   algo=algo),
            # # nn.BatchNorm1d(32),
            # # nn.ReLU(),
            # # spconv.SparseConv3d(64, 64, 2, 2, bias=False,
            # #                   algo=algo),
            # spconv.SubMConv3d(32, 64, 3, bias=False, indice_key="c0",
            #                   algo=algo),
258
259
            spconv.SubMConv3d(128,
                              128,
yan.yan's avatar
yan.yan committed
260
261
262
263
264
265
                              3,
                              bias=False,
                              indice_key="c0",
                              algo=algo),
            # nn.BatchNorm1d(32),
            # nn.ReLU(),
266
267
268
269
270
271
272
273
274
275
276
277
278
            # spconv.SparseMaxPool3d(2, 2),
            # spconv.SubMConv3d(256,
            #                   512,
            #                   3,
            #                   bias=False,
            #                   indice_key="c1",
            #                   algo=algo),
            # spconv.SubMConv3d(512,
            #                   512,
            #                   3,
            #                   bias=False,
            #                   indice_key="c1",
            #                   algo=algo),
yan.yan's avatar
yan.yan committed
279
280
281
282
283
284
285
286
287
288
289
290
291
        )
        max_batch_size = 1
        # grid (dense map) is used for indice generation. use pre-allocated grid can run faster.
        self.grid = torch.full([max_batch_size, *shape], -1,
                               dtype=torch.int32).cuda()
        # self.grid = None
        self.shape = shape

    def forward(self, features, coors, batch_size):
        x = spconv.SparseConvTensor(features, coors, self.shape, batch_size,
                                    self.grid)
        return self.net(x)

292

EvernightAurora's avatar
EvernightAurora committed
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335

class NetSm(nn.Module):
    def __init__(self, shape, algo):
        super().__init__()
        self.net = spconv.SparseSequential(
            spconv.SubMConv3d(3,
                              8,
                              3,
                              bias=False,
                              indice_key="c0",
                              algo=algo),
            spconv.SubMConv3d(8,
                              16,
                              3,
                              bias=False,
                              indice_key="c0",
                              algo=algo),
            spconv.SubMConv3d(16,
                              32,
                              3,
                              bias=False,
                              indice_key="c0",
                              algo=algo),
            spconv.SubMConv3d(32,
                              64,
                              3,
                              bias=False,
                              indice_key="c0",
                              algo=algo),
            
        )
        max_batch_size = 1
        # grid (dense map) is used for indice generation. use pre-allocated grid can run faster.
        self.grid = torch.full([max_batch_size, *shape], -1,
                               dtype=torch.int32).cuda()
        # self.grid = None
        self.shape = shape

    def forward(self, features, coors, batch_size, enable_timer: bool = False):
        x = spconv.SparseConvTensor(features, coors, self.shape, batch_size,
                                    self.grid, enable_timer=enable_timer)
        return self.net(x)

336
337
import numpy as np
from cumm import tensorview as tv
yan.yan's avatar
v2.1  
yan.yan committed
338
from spconv.core_cc.csrc.sparse.all import SpconvOps
339
import pickle
yan.yan's avatar
v2.1  
yan.yan committed
340
341
import torch

342
343
from spconv.pytorch.cppcore import torch_tensor_to_tv

yan.yan's avatar
v2.1  
yan.yan committed
344
345
346
347
348

def sort_bench():
    with open("/home/yy/asd.pkl", "rb") as f:
        a_th = pickle.load(f)
    mask_argsort = torch.empty((1, a_th.shape[1]),
349
350
                               dtype=torch.int32,
                               device=a_th.device)
yan.yan's avatar
v2.1  
yan.yan committed
351
352
353
354
355
356
357

    a = a_th.cpu().numpy()[0]
    a_tv = torch_tensor_to_tv(a_th)
    mask_argsort_tv = torch_tensor_to_tv(mask_argsort)
    for i in range(10):
        a_tv_1 = a_tv.clone()
        SpconvOps.sort_1d_by_key(a_tv_1[0], mask_argsort_tv[0])
yan.yan's avatar
yan.yan committed
358
import json
359

yanyan's avatar
yanyan committed
360
def main():
361
    import pickle
yan.yan's avatar
yan.yan committed
362

yan.yan's avatar
yan.yan committed
363
364
    np.random.seed(50051)
    torch.manual_seed(50051)
yan.yan's avatar
yan.yan committed
365
    # voxels, coors, spatial_shape = waymo_data(num_features=128)
yan.yan's avatar
yan.yan committed
366
367
    # with open("/home/yy/test_spconv.pkl", "wb") as f:
    #     pickle.dump((voxels, coors, spatial_shape), f)
yan.yan's avatar
bug fix  
yan.yan committed
368
    with open(Path(__file__).parent / "data" / "test_spconv.pkl", "rb") as f:
yan.yan's avatar
yan.yan committed
369
        (voxels, coors, spatial_shape) = pickle.load(f)
yan.yan's avatar
yan.yan committed
370
371
    # voxels, coors, spatial_shape = waymo_data_large()

yan.yan's avatar
yan.yan committed
372
373
374
375
    print(spatial_shape)
    print(voxels.shape)
    # voxels = voxels[:100]
    # coors = coors[:100]
yan.yan's avatar
v2.1  
yan.yan committed
376
377
378
379
    dtype = torch.float16
    device = torch.device("cuda:0")
    voxels_th = torch.from_numpy(voxels).to(device).to(dtype)
    coors_th = torch.from_numpy(coors).to(device).int()
yan.yan's avatar
yan.yan committed
380
    voxels_th.requires_grad = True
yan.yan's avatar
yan.yan committed
381
382
    algo = spconv.ConvAlgo.MaskImplicitGemm
    print("ALGO")
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
    # 3080 Laptop
    # MaskImpGemm: 11.2ms
    # MaskSplitImpGemm: 12.2ms
    # Native: 13.7ms
    # F32
    # MaskSplitImpGemm: 22ms
    # MaskImplicitGemm: 23.5ms
    # Native: 21.7ms
    # Pure Gemm
    # Native: 6.6ms
    # MaskImpGemm: 4.3ms
    # MaskSplitImpGemm: 4.0ms
    # F16 Bwd
    # MaskSplitImpGemm: 12.2ms
    # MaskImpGemm: 13.8ms
    # Native: 25.2ms

    # F32 Bwd
    # Native: 41.9ms
    # MaskImpGemm: 51.0ms
    # MaskSplitImpGemm: 41.1ms
    # algo = None
EvernightAurora's avatar
EvernightAurora committed
405
    net = NetSm(spatial_shape, algo).to(device).eval().to(dtype)# .train()
yan.yan's avatar
yan.yan committed
406
    # net.load_state_dict(net.state_dict())
407
    spconv.assign_name_for_sparse_modules(net)
yanyan's avatar
yanyan committed
408
409
410
    print(coors_th.shape)
    out = net(voxels_th, coors_th, 1)
    print(out.spatial_shape)
411
412
    print(voxels.mean(), voxels.max(), voxels.min())
    dout = np.random.uniform(-0.2, 0.2, out.features.shape).astype(np.float32)
yan.yan's avatar
v2.1  
yan.yan committed
413
    dout_t = torch.from_numpy(dout).to(device).to(dtype)
yan.yan's avatar
yan.yan committed
414

yan.yan's avatar
yan.yan committed
415
    print(out.spatial_shape, out.features.sum(1).mean(), out.features.max(),
416
          out.features.min())
yan.yan's avatar
yan.yan committed
417
    times = []
yan.yan's avatar
yan.yan committed
418
    show_metrics = False
yan.yan's avatar
yan.yan committed
419
    with torch.no_grad():
yan.yan's avatar
yan.yan committed
420
421
422
423
424
        for i in range(100):
            # print("------------")
            with tv.measure_duration() as measure:
                out_nograd = net(voxels_th, coors_th, 1, show_metrics)
            times.append(measure.duration)
yan.yan's avatar
yan.yan committed
425
426
427
428
            if show_metrics:
                timer = out_nograd._timer
                items = list(timer.get_all_pair_time().items())
                items.sort(key=lambda x: x[0])
yan.yan's avatar
yan.yan committed
429
                print("SUM TIME:",  sum([x[1] for x in items]))
yan.yan's avatar
yan.yan committed
430
                print(json.dumps(dict(items), indent=2))
yan.yan's avatar
yan.yan committed
431
432
433
434
435
                inds_sum = 0
                for k, v in items:
                    if "gen_pairs" in k:
                        inds_sum += v 
                print("SUM GEN INDS:",  inds_sum)
yan.yan's avatar
yan.yan committed
436

Yan Yan's avatar
Yan Yan committed
437
438
439
440
    # state = net.state_dict()
    # state.pop("net.2.max_num_voxels_during_training")
    # net.load_state_dict(state)
    # breakpoint()
yan.yan's avatar
yan.yan committed
441
    print("spconv time", np.mean(times[10:]))
Yan Yan's avatar
Yan Yan committed
442
443
444
445
446
447
448
449
450
451
452
453
454
455
    # times = []

    # for i in range(10):
    #     out = net(voxels_th, coors_th, 1)
    #     print("------------")
    #     torch.cuda.synchronize()
    #     t = time.time()
    #     out.features.backward(dout_t)
    #     torch.cuda.synchronize()
    #     times.append(time.time() - t)

    # # # print((net.grid == -1).float().sum(), net.grid.numel())
    # # # print("spconv time", time.time() - t)
    # print("spconv bw time", np.mean(times[5:]))
yanyan's avatar
yanyan committed
456

yanyan's avatar
yanyan committed
457

yanyan's avatar
yanyan committed
458
if __name__ == "__main__":
yanyan's avatar
yanyan committed
459
    main()