benchmark.py 12 KB
Newer Older
yan.yan's avatar
yan.yan committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
# Copyright 2021 Yan Yan
# 
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# 
#     http://www.apache.org/licenses/LICENSE-2.0
# 
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

yanyan's avatar
yanyan committed
15
16
17
18
19
import time
from pathlib import Path

import numpy as np
import torch
yanyan's avatar
yanyan committed
20
from torch import nn
yan.yan's avatar
v2.1  
yan.yan committed
21
22
from cumm import tensorview as tv
from spconv.core import ConvAlgo 
yanyan's avatar
yanyan committed
23

yan.yan's avatar
yan.yan committed
24
25
import spconv.pytorch as spconv
from spconv.utils import Point2VoxelCPU3d
yanyan's avatar
yanyan committed
26
def waymo_data(batch_size=1):
yan.yan's avatar
yan.yan committed
27
28
29
30
    gen = Point2VoxelCPU3d([0.1, 0.1, 0.1], [-80, -80, -2, 80, 80, 6], 3,
                           150000, 1)
    # gen = VoxelGeneratorV2([0.1, 0.1, 0.1], [-80, -80, -2, 80, 80, 6], 1,
    #                        150000)
yanyan's avatar
yanyan committed
31
    data = np.load(Path(__file__).parent / "data" / "benchmark-pc.npz")
yan.yan's avatar
yan.yan committed
32
33
34
35
36
    pc = np.ascontiguousarray(data["pc"])
    print(pc.shape)
    voxels_tv, indices_tv, _ = gen.point_to_voxel(tv.from_numpy(pc))
    voxels = voxels_tv.numpy().reshape(-1, 3)
    coors = indices_tv.numpy()
yanyan's avatar
yanyan committed
37
38
39
40
    N = coors.shape[0]
    coors = np.concatenate([np.full([N, 1], 0, coors.dtype), coors], axis=1)
    return voxels, coors, gen.grid_size

yanyan's avatar
yanyan committed
41

yanyan's avatar
yanyan committed
42
class Net(nn.Module):
yanyan's avatar
yanyan committed
43
    def __init__(self, shape, algo):
yanyan's avatar
yanyan committed
44
        super().__init__()
yan.yan's avatar
v2.1  
yan.yan committed
45
46
        pool_algo = algo 
        # pool_algo = ConvAlgo.Native
yanyan's avatar
yanyan committed
47
        self.net = spconv.SparseSequential(
yanyan's avatar
yanyan committed
48
49
            spconv.SubMConv3d(3, 64, 3, bias=False, indice_key="c0",
                              algo=algo),
yan.yan's avatar
yan.yan committed
50
51
52
53
54
55
56
57
58
59
60
61
            # spconv.SubMConv3d(32,
            #                   32,
            #                   3,
            #                   bias=False,
            #                   indice_key="c0",
            #                   algo=algo),
            # # nn.BatchNorm1d(32),
            # # nn.ReLU(),
            # # spconv.SparseConv3d(64, 64, 2, 2, bias=False,
            # #                   algo=algo),
            # spconv.SubMConv3d(32, 64, 3, bias=False, indice_key="c0",
            #                   algo=algo),
yanyan's avatar
yanyan committed
62
63
64
65
66
67
            spconv.SubMConv3d(64,
                              64,
                              3,
                              bias=False,
                              indice_key="c0",
                              algo=algo),
68
69
            # nn.BatchNorm1d(32),
            # nn.ReLU(),
yan.yan's avatar
v2.1  
yan.yan committed
70
            # spconv.SparseConv3d(64, 64, 2, 2, bias=False, indice_key="m0"),
yan.yan's avatar
yan.yan committed
71

yan.yan's avatar
v2.1  
yan.yan committed
72
            spconv.SparseMaxPool3d(2, 2, algo=pool_algo),
yanyan's avatar
yanyan committed
73
74
75
76
77
78
79
80
81
82
83
84
            spconv.SubMConv3d(64,
                              96,
                              3,
                              bias=False,
                              indice_key="c1",
                              algo=algo),
            spconv.SubMConv3d(96,
                              96,
                              3,
                              bias=False,
                              indice_key="c1",
                              algo=algo),
yanyan's avatar
yanyan committed
85
86
            # nn.BatchNorm1d(64),
            # nn.ReLU(),
yan.yan's avatar
v2.1  
yan.yan committed
87
88
            # spconv.SparseConv3d(96, 96, 2, 2, bias=False, indice_key="m1"),
            spconv.SparseMaxPool3d(2, 2, algo=pool_algo),
yanyan's avatar
yanyan committed
89
90
91
92
93
94
95
96
97
98
99
100
            spconv.SubMConv3d(96,
                              128,
                              3,
                              bias=False,
                              indice_key="c2",
                              algo=algo),
            spconv.SubMConv3d(128,
                              128,
                              3,
                              bias=False,
                              indice_key="c2",
                              algo=algo),
yanyan's avatar
yanyan committed
101
102
            # nn.BatchNorm1d(128),
            # nn.ReLU(),
yan.yan's avatar
v2.1  
yan.yan committed
103
            # spconv.SparseConv3d(128, 128, 2, 2, bias=False, indice_key="m2"),
104

yan.yan's avatar
v2.1  
yan.yan committed
105
            spconv.SparseMaxPool3d(2, 2, algo=pool_algo),
yanyan's avatar
yanyan committed
106
107
108
109
110
111
112
113
114
115
116
117
            spconv.SubMConv3d(128,
                              160,
                              3,
                              bias=False,
                              indice_key="c3",
                              algo=algo),
            spconv.SubMConv3d(160,
                              160,
                              3,
                              bias=False,
                              indice_key="c3",
                              algo=algo),
yanyan's avatar
yanyan committed
118
119
            # nn.BatchNorm1d(128),
            # nn.ReLU(),
yan.yan's avatar
v2.1  
yan.yan committed
120
            # spconv.SparseConv3d(160, 160, 2, 2, bias=False, indice_key="m3"),
121

yan.yan's avatar
v2.1  
yan.yan committed
122
            spconv.SparseMaxPool3d(2, 2, algo=pool_algo),
yanyan's avatar
yanyan committed
123
124
125
126
127
128
129
130
131
132
133
134
            spconv.SubMConv3d(160,
                              192,
                              3,
                              bias=False,
                              indice_key="c4",
                              algo=algo),
            spconv.SubMConv3d(192,
                              192,
                              3,
                              bias=False,
                              indice_key="c4",
                              algo=algo),
yanyan's avatar
yanyan committed
135
136
            # nn.BatchNorm1d(128),
            # nn.ReLU(),
yan.yan's avatar
v2.1  
yan.yan committed
137
138
            spconv.SparseMaxPool3d(2, 2, indice_key="m4", algo=pool_algo),
            # spconv.SparseConv3d(192, 192, 2, 2, bias=False, indice_key="m4"),
139

yanyan's avatar
yanyan committed
140
141
142
143
144
145
146
147
148
149
150
151
            spconv.SubMConv3d(192,
                              224,
                              3,
                              bias=False,
                              indice_key="c5",
                              algo=algo),
            spconv.SubMConv3d(224,
                              224,
                              3,
                              bias=False,
                              indice_key="c5",
                              algo=algo),
yan.yan's avatar
v2.1  
yan.yan committed
152
153
154
155
            # nn.BatchNorm1d(224),
            # nn.ReLU(),
            # spconv.SparseConv3d(224, 224, 2, 2, bias=False, indice_key="m5"),
            spconv.SparseMaxPool3d(2, 2, indice_key="m5", algo=pool_algo),
yanyan's avatar
yanyan committed
156
157
158
159
160
161
162
163
164
165
166
167
            spconv.SubMConv3d(224,
                              256,
                              3,
                              bias=False,
                              indice_key="c6",
                              algo=algo),
            spconv.SubMConv3d(256,
                              256,
                              3,
                              bias=False,
                              indice_key="c6",
                              algo=algo),
168

yan.yan's avatar
v2.1  
yan.yan committed
169
170
            # nn.BatchNorm1d(256),
            # nn.ReLU(),
171

yan.yan's avatar
v2.1  
yan.yan committed
172
173
174
            # spconv.SparseInverseConv3d(256, 128, 2, indice_key="m5", bias=False, algo=algo),
            # # nn.BatchNorm1d(128),
            # # nn.ReLU(),
175

yan.yan's avatar
v2.1  
yan.yan committed
176
            # spconv.SparseInverseConv3d(128, 64, 2, indice_key="m4", bias=False, algo=algo),
yan.yan's avatar
yan.yan committed
177

yanyan's avatar
yanyan committed
178
179
        )
        max_batch_size = 1
yanyan's avatar
yanyan committed
180
        # grid (dense map) is used for indice generation. use pre-allocated grid can run faster.
yanyan's avatar
yanyan committed
181
182
        self.grid = torch.full([max_batch_size, *shape], -1,
                               dtype=torch.int32).cuda()
yanyan's avatar
yanyan committed
183
184
185
186
187
188
189
190
        # self.grid = None
        self.shape = shape

    def forward(self, features, coors, batch_size):
        x = spconv.SparseConvTensor(features, coors, self.shape, batch_size,
                                    self.grid)
        return self.net(x)

yan.yan's avatar
yan.yan committed
191
192
193
194
class Net2(nn.Module):
    def __init__(self, shape, algo):
        super().__init__()
        self.net = spconv.SparseSequential(
195
            spconv.SubMConv3d(3, 128, 3, bias=False, indice_key="c0",
yan.yan's avatar
yan.yan committed
196
197
198
199
200
201
202
203
204
205
206
207
208
                              algo=algo),
            # spconv.SubMConv3d(32,
            #                   32,
            #                   3,
            #                   bias=False,
            #                   indice_key="c0",
            #                   algo=algo),
            # # nn.BatchNorm1d(32),
            # # nn.ReLU(),
            # # spconv.SparseConv3d(64, 64, 2, 2, bias=False,
            # #                   algo=algo),
            # spconv.SubMConv3d(32, 64, 3, bias=False, indice_key="c0",
            #                   algo=algo),
209
210
            spconv.SubMConv3d(128,
                              128,
yan.yan's avatar
yan.yan committed
211
212
213
214
215
216
                              3,
                              bias=False,
                              indice_key="c0",
                              algo=algo),
            # nn.BatchNorm1d(32),
            # nn.ReLU(),
217
218
219
220
221
222
223
224
225
226
227
228
229
            # spconv.SparseMaxPool3d(2, 2),
            # spconv.SubMConv3d(256,
            #                   512,
            #                   3,
            #                   bias=False,
            #                   indice_key="c1",
            #                   algo=algo),
            # spconv.SubMConv3d(512,
            #                   512,
            #                   3,
            #                   bias=False,
            #                   indice_key="c1",
            #                   algo=algo),
yan.yan's avatar
yan.yan committed
230
231
232
233
234
235
236
237
238
239
240
241
242
        )
        max_batch_size = 1
        # grid (dense map) is used for indice generation. use pre-allocated grid can run faster.
        self.grid = torch.full([max_batch_size, *shape], -1,
                               dtype=torch.int32).cuda()
        # self.grid = None
        self.shape = shape

    def forward(self, features, coors, batch_size):
        x = spconv.SparseConvTensor(features, coors, self.shape, batch_size,
                                    self.grid)
        return self.net(x)

yan.yan's avatar
v2.1  
yan.yan committed
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
import numpy as np 
from cumm import tensorview as tv 
from spconv.core_cc.csrc.sparse.all import SpconvOps
import pickle 
import torch

from spconv.pytorch.cppcore import torch_tensor_to_tv 

def sort_bench():
    with open("/home/yy/asd.pkl", "rb") as f:
        a_th = pickle.load(f)
    mask_argsort = torch.empty((1, a_th.shape[1]),
                                dtype=torch.int32,
                                device=a_th.device)

    a = a_th.cpu().numpy()[0]
    a_tv = torch_tensor_to_tv(a_th)
    mask_argsort_tv = torch_tensor_to_tv(mask_argsort)
    for i in range(10):
        a_tv_1 = a_tv.clone()
        SpconvOps.sort_1d_by_key(a_tv_1[0], mask_argsort_tv[0])
yanyan's avatar
yanyan committed
264
265

def main():
yan.yan's avatar
yan.yan committed
266
267
268
269
270
271
    import pickle 
    np.random.seed(50051)
    torch.manual_seed(50051)
    # voxels, coors, spatial_shape = waymo_data()
    # with open("/home/yy/test_spconv.pkl", "wb") as f:
    #     pickle.dump((voxels, coors, spatial_shape), f)
yan.yan's avatar
bug fix  
yan.yan committed
272
    with open(Path(__file__).parent / "data" / "test_spconv.pkl", "rb") as f:
yan.yan's avatar
yan.yan committed
273
274
275
276
277
        (voxels, coors, spatial_shape) = pickle.load(f)
    print(spatial_shape)
    print(voxels.shape)
    # voxels = voxels[:100]
    # coors = coors[:100]
yan.yan's avatar
v2.1  
yan.yan committed
278
279
280
281
    dtype = torch.float16
    device = torch.device("cuda:0")
    voxels_th = torch.from_numpy(voxels).to(device).to(dtype)
    coors_th = torch.from_numpy(coors).to(device).int()
yan.yan's avatar
yan.yan committed
282
    voxels_th.requires_grad = True
yan.yan's avatar
v2.1  
yan.yan committed
283
284
    algo = spconv.ConvAlgo.MaskImplicitGemm
    net = Net(spatial_shape, algo).to(device).eval().to(dtype).train()
yanyan's avatar
yanyan committed
285
286
287
    print(coors_th.shape)
    out = net(voxels_th, coors_th, 1)
    print(out.spatial_shape)
yan.yan's avatar
yan.yan committed
288
289
290
    print(voxels.mean(),  voxels.max(), voxels.min())
    dout = np.random.uniform(-0.2, 0.2,
                                out.features.shape).astype(np.float32)
yan.yan's avatar
v2.1  
yan.yan committed
291
    dout_t = torch.from_numpy(dout).to(device).to(dtype)
yan.yan's avatar
yan.yan committed
292
293
294

    print(out.spatial_shape, out.features.mean(),  out.features.max(),  out.features.min())
    times = []
yan.yan's avatar
v2.1  
yan.yan committed
295
296
297
298
299
300
301
302
303
304
305
    with torch.no_grad():
        for i in range(20):
            print("------------")
            torch.cuda.synchronize()
            t = time.time()
            out_nograd = net(voxels_th, coors_th, 1)
            torch.cuda.synchronize()
            # sort_bench()
            times.append(time.time() - t)
    print("spconv time", np.mean(times[10:]))
    # times = []
yan.yan's avatar
yan.yan committed
306

yan.yan's avatar
v2.1  
yan.yan committed
307
308
309
310
311
312
313
314
    # for i in range(10):
    #     out = net(voxels_th, coors_th, 1)
    #     print("------------")
    #     torch.cuda.synchronize()
    #     t = time.time()
    #     out.features.backward(dout_t)
    #     torch.cuda.synchronize()
    #     times.append(time.time() - t)
yan.yan's avatar
yan.yan committed
315

yan.yan's avatar
v2.1  
yan.yan committed
316
317
    # print((net.grid == -1).float().sum(), net.grid.numel())
    # print("spconv time", time.time() - t)
318
    # print("spconv bw time", np.mean(times[5:]))
yanyan's avatar
yanyan committed
319

yanyan's avatar
yanyan committed
320

yanyan's avatar
yanyan committed
321
if __name__ == "__main__":
yanyan's avatar
yanyan committed
322
    main()