Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
one
spconv
Commits
f0d7a46d
Unverified
Commit
f0d7a46d
authored
May 20, 2020
by
Yan Yan
Committed by
GitHub
May 20, 2020
Browse files
Merge branch 'master' into master
parents
999c834c
83344f71
Changes
26
Show whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
515 additions
and
9 deletions
+515
-9
src/spconv/CMakeLists.txt
src/spconv/CMakeLists.txt
+1
-1
src/spconv/all.cc
src/spconv/all.cc
+5
-6
src/spconv/spconv_ops.cc
src/spconv/spconv_ops.cc
+234
-0
test/fake_dist_train.py
test/fake_dist_train.py
+142
-0
test/fake_train.py
test/fake_train.py
+131
-0
test/test_conv.py
test/test_conv.py
+2
-2
No files found.
src/spconv/CMakeLists.txt
View file @
f0d7a46d
set
(
ALL_FILES all.cc indice.cc reordering.cc maxpool.cc nms.cc
)
set
(
ALL_FILES all.cc indice.cc reordering.cc maxpool.cc nms.cc
spconv_ops.cc
)
if
(
SPCONV_BuildCUDA
)
set
(
ALL_FILES
${
ALL_FILES
}
indice.cu reordering.cu maxpool.cu pillar_scatter.cu
)
endif
()
...
...
src/spconv/all.cc
View file @
f0d7a46d
...
...
@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include <torch/script.h>
#include <spconv/pool_ops.h>
#include <spconv/spconv_ops.h>
#include <spconv/pillar_scatter_ops.h>
...
...
@@ -19,16 +20,14 @@
#include <spconv/nms_ops.h>
static
auto
registry
=
torch
::
RegisterOperators
(
"spconv::get_indice_pairs_2d"
,
&
spconv
::
getIndicePair
<
2
>
)
torch
::
RegisterOperators
()
.
op
(
"spconv::get_indice_pairs_2d"
,
&
spconv
::
getIndicePair
<
2
>
)
.
op
(
"spconv::get_indice_pairs_3d"
,
&
spconv
::
getIndicePair
<
3
>
)
.
op
(
"spconv::get_indice_pairs_4d"
,
&
spconv
::
getIndicePair
<
4
>
)
.
op
(
"spconv::get_indice_pairs_grid_2d"
,
&
spconv
::
getIndicePairPreGrid
<
2
>
)
.
op
(
"spconv::get_indice_pairs_grid_3d"
,
&
spconv
::
getIndicePairPreGrid
<
3
>
)
.
op
(
"spconv::indice_conv_fp32"
,
&
spconv
::
indiceConv
<
float
>
)
.
op
(
"spconv::indice_conv_backward_fp32"
,
&
spconv
::
indiceConvBackward
<
float
>
)
.
op
(
"spconv::indice_conv_half"
,
&
spconv
::
indiceConv
<
at
::
Half
>
)
.
op
(
"spconv::indice_conv_backward_half"
,
&
spconv
::
indiceConvBackward
<
at
::
Half
>
)
.
op
(
"spconv::indice_conv"
,
&
spconv
::
indiceConv
)
.
op
(
"spconv::indice_conv_backward"
,
&
spconv
::
indiceConvBackward
)
.
op
(
"spconv::fused_indice_conv_fp32"
,
&
spconv
::
fusedIndiceConvBatchNorm
<
float
>
)
.
op
(
"spconv::fused_indice_conv_half"
,
&
spconv
::
fusedIndiceConvBatchNorm
<
at
::
Half
>
)
.
op
(
"spconv::indice_maxpool_fp32"
,
&
spconv
::
indiceMaxPool
<
float
>
)
...
...
src/spconv/spconv_ops.cc
0 → 100644
View file @
f0d7a46d
#include <spconv/spconv_ops.h>
namespace
spconv
{
torch
::
Tensor
indiceConv
(
torch
::
Tensor
features
,
torch
::
Tensor
filters
,
torch
::
Tensor
indicePairs
,
torch
::
Tensor
indiceNum
,
int64_t
numActOut
,
int64_t
_inverse
,
int64_t
_subM
)
{
bool
subM
=
_subM
!=
0
;
bool
inverse
=
_inverse
!=
0
;
auto
device
=
features
.
device
().
type
();
auto
ndim
=
filters
.
dim
()
-
2
;
auto
kernelVolume
=
indicePairs
.
size
(
0
);
auto
numInPlanes
=
features
.
size
(
1
);
auto
numOutPlanes
=
filters
.
size
(
ndim
+
1
);
auto
indicePairNumCpu
=
indiceNum
.
to
({
torch
::
kCPU
});
auto
indicePairMaxSizeIter
=
std
::
max_element
(
indicePairNumCpu
.
data_ptr
<
int
>
(),
indicePairNumCpu
.
data_ptr
<
int
>
()
+
kernelVolume
);
int
indicePairMaxOffset
=
indicePairMaxSizeIter
-
indicePairNumCpu
.
data_ptr
<
int
>
();
int
indicePairMaxSize
=
*
indicePairMaxSizeIter
;
/*if (_subM){
std::vector<int> indicePairNumVec(indicePairNumCpu.data_ptr<int>(),
indicePairNumCpu.data_ptr<int>() + kernelVolume);
indicePairNumVec.erase(indicePairNumVec.begin() + indicePairMaxOffset);
auto indicePairVecMaxSizeIter = std::max_element(
indicePairNumVec.begin(), indicePairNumVec.end());
indicePairMaxSize = *indicePairVecMaxSizeIter;
}*/
auto
options
=
torch
::
TensorOptions
().
dtype
(
features
.
dtype
()).
device
(
features
.
device
());
// auto indicePairOptions =
// torch::TensorOptions().dtype(torch::kInt64).device(indicePairs.device());
torch
::
Tensor
output
=
torch
::
zeros
({
numActOut
,
numOutPlanes
},
options
);
torch
::
Tensor
inputBuffer
=
torch
::
zeros
({
indicePairMaxSize
,
numInPlanes
},
options
);
torch
::
Tensor
outputBuffer
=
torch
::
zeros
({
indicePairMaxSize
,
numOutPlanes
},
options
);
filters
=
filters
.
view
({
-
1
,
numInPlanes
,
numOutPlanes
});
if
(
subM
)
{
// the center index of subm conv don't need gather and scatter
// add.
torch
::
mm_out
(
output
,
features
,
filters
[
indicePairMaxOffset
]);
}
double
totalGatherTime
=
0
;
double
totalGEMMTime
=
0
;
double
totalSAddTime
=
0
;
tv
::
torch_dispatch
<
float
,
double
,
at
::
Half
>
(
features
.
scalar_type
(),
[
&
](
auto
I
)
{
using
T
=
decltype
(
I
);
for
(
int
i
=
0
;
i
<
kernelVolume
;
++
i
)
{
auto
nHot
=
indicePairNumCpu
.
data_ptr
<
int
>
()[
i
];
if
(
nHot
<=
0
||
(
subM
&&
i
==
indicePairMaxOffset
))
{
continue
;
}
// auto timer = spconv::CudaContextTimer<>();
auto
outputBufferBlob
=
torch
::
from_blob
(
outputBuffer
.
data_ptr
<
T
>
(),
{
nHot
,
numOutPlanes
},
options
);
auto
inputBufferBlob
=
torch
::
from_blob
(
inputBuffer
.
data_ptr
<
T
>
(),
{
nHot
,
numInPlanes
},
options
);
if
(
device
==
torch
::
kCPU
)
{
functor
::
SparseGatherFunctor
<
tv
::
CPU
,
T
,
int
>
gatherFtor
;
gatherFtor
(
tv
::
CPU
(),
tv
::
torch2tv
<
T
>
(
inputBuffer
),
tv
::
torch2tv
<
const
T
>
(
features
),
tv
::
torch2tv
<
const
int
>
(
indicePairs
).
subview
(
i
,
inverse
),
nHot
);
}
#ifdef SPCONV_CUDA
else
if
(
device
==
torch
::
kCUDA
)
{
functor
::
SparseGatherFunctor
<
tv
::
GPU
,
T
,
int
>
gatherFtor
;
gatherFtor
(
tv
::
TorchGPU
(),
tv
::
torch2tv
<
T
>
(
inputBuffer
),
tv
::
torch2tv
<
const
T
>
(
features
),
tv
::
torch2tv
<
const
int
>
(
indicePairs
).
subview
(
i
,
inverse
),
nHot
);
TV_CHECK_CUDA_ERR
();
/* slower than SparseGatherFunctor, may due to int->long conversion
auto indicePairLong = indicePairs[i][inverse].to(torch::kInt64);
auto indicePairBlob = torch::from_blob(indicePairLong.data<long>(),
{nHot}, indicePairOptions); torch::index_select_out(inputBufferBlob,
features, 0, indicePairBlob);*/
}
#endif
else
{
TV_ASSERT_INVALID_ARG
(
false
,
"unknown device type"
);
}
// totalGatherTime += timer.report() / 1000.0;
torch
::
mm_out
(
outputBufferBlob
,
inputBufferBlob
,
filters
[
i
]);
// totalGEMMTime += timer.report() / 1000.0;
if
(
device
==
torch
::
kCPU
)
{
functor
::
SparseScatterAddFunctor
<
tv
::
CPU
,
T
,
int
>
scatterFtor
;
scatterFtor
(
tv
::
CPU
(),
tv
::
torch2tv
<
T
>
(
output
),
tv
::
torch2tv
<
const
T
>
(
outputBuffer
),
tv
::
torch2tv
<
const
int
>
(
indicePairs
).
subview
(
i
,
!
inverse
),
nHot
,
true
);
}
#ifdef SPCONV_CUDA
else
if
(
device
==
torch
::
kCUDA
)
{
functor
::
SparseScatterAddFunctor
<
tv
::
GPU
,
T
,
int
>
scatterFtor
;
scatterFtor
(
tv
::
TorchGPU
(),
tv
::
torch2tv
<
T
>
(
output
),
tv
::
torch2tv
<
const
T
>
(
outputBuffer
),
tv
::
torch2tv
<
const
int
>
(
indicePairs
).
subview
(
i
,
!
inverse
),
nHot
,
true
);
TV_CHECK_CUDA_ERR
();
}
#endif
else
{
TV_ASSERT_INVALID_ARG
(
false
,
"unknown device type"
);
}
// totalSAddTime += timer.report() / 1000.0;
}
});
// std::cout << "gather time " << totalGatherTime << std::endl;
// std::cout << "gemm time " << totalGEMMTime << std::endl;
// std::cout << "scatteradd time " << totalSAddTime << std::endl;
return
output
;
}
std
::
vector
<
torch
::
Tensor
>
indiceConvBackward
(
torch
::
Tensor
features
,
torch
::
Tensor
filters
,
torch
::
Tensor
outGrad
,
torch
::
Tensor
indicePairs
,
torch
::
Tensor
indiceNum
,
int64_t
_inverse
,
int64_t
_subM
)
{
bool
subM
=
_subM
!=
0
;
bool
inverse
=
_inverse
!=
0
;
auto
device
=
features
.
device
().
type
();
auto
ndim
=
filters
.
dim
()
-
2
;
auto
kernelVolume
=
indicePairs
.
size
(
0
);
auto
numInPlanes
=
features
.
size
(
1
);
auto
numOutPlanes
=
filters
.
size
(
ndim
+
1
);
auto
indicePairNumCpu
=
indiceNum
.
to
({
torch
::
kCPU
});
auto
indicePairMaxSizeIter
=
std
::
max_element
(
indicePairNumCpu
.
data_ptr
<
int
>
(),
indicePairNumCpu
.
data_ptr
<
int
>
()
+
kernelVolume
);
int
indicePairMaxOffset
=
indicePairMaxSizeIter
-
indicePairNumCpu
.
data_ptr
<
int
>
();
int
indicePairMaxSize
=
*
indicePairMaxSizeIter
;
auto
options
=
torch
::
TensorOptions
().
dtype
(
features
.
dtype
()).
device
(
features
.
device
());
auto
filterShape
=
filters
.
sizes
();
torch
::
Tensor
inputGrad
=
torch
::
zeros
(
features
.
sizes
(),
options
);
torch
::
Tensor
filtersGrad
=
torch
::
zeros
(
filterShape
,
options
);
torch
::
Tensor
inputBuffer
=
torch
::
zeros
({
indicePairMaxSize
,
numInPlanes
},
options
);
torch
::
Tensor
outputBuffer
=
torch
::
zeros
({
indicePairMaxSize
,
numOutPlanes
},
options
);
filters
=
filters
.
view
({
-
1
,
numInPlanes
,
numOutPlanes
});
filtersGrad
=
filtersGrad
.
view
({
-
1
,
numInPlanes
,
numOutPlanes
});
if
(
subM
)
{
auto
filterGradSub
=
filtersGrad
[
indicePairMaxOffset
];
torch
::
mm_out
(
filterGradSub
,
features
.
t
(),
outGrad
);
torch
::
mm_out
(
inputGrad
,
outGrad
,
filters
[
indicePairMaxOffset
].
t
());
}
tv
::
torch_dispatch
<
float
,
double
,
at
::
Half
>
(
features
.
scalar_type
(),
[
&
](
auto
I
)
{
using
T
=
decltype
(
I
);
for
(
int
i
=
0
;
i
<
kernelVolume
;
++
i
)
{
auto
nHot
=
indicePairNumCpu
.
data_ptr
<
int
>
()[
i
];
if
(
nHot
<=
0
||
(
subM
&&
i
==
indicePairMaxOffset
))
{
continue
;
}
if
(
device
==
torch
::
kCPU
)
{
functor
::
SparseGatherFunctor
<
tv
::
CPU
,
T
,
int
>
gatherFtor
;
functor
::
SparseGatherFunctor
<
tv
::
CPU
,
T
,
int
>
gatherFtorOut
;
gatherFtor
(
tv
::
CPU
(),
tv
::
torch2tv
<
T
>
(
inputBuffer
),
tv
::
torch2tv
<
const
T
>
(
features
),
tv
::
torch2tv
<
const
int
>
(
indicePairs
).
subview
(
i
,
inverse
),
nHot
);
gatherFtorOut
(
tv
::
CPU
(),
tv
::
torch2tv
<
T
>
(
outputBuffer
),
tv
::
torch2tv
<
const
T
>
(
outGrad
),
tv
::
torch2tv
<
const
int
>
(
indicePairs
).
subview
(
i
,
!
inverse
),
nHot
);
}
#ifdef SPCONV_CUDA
else
if
(
device
==
torch
::
kCUDA
)
{
functor
::
SparseGatherFunctor
<
tv
::
GPU
,
T
,
int
>
gatherFtor
;
functor
::
SparseGatherFunctor
<
tv
::
GPU
,
T
,
int
>
gatherFtorOut
;
gatherFtor
(
tv
::
TorchGPU
(),
tv
::
torch2tv
<
T
>
(
inputBuffer
),
tv
::
torch2tv
<
const
T
>
(
features
),
tv
::
torch2tv
<
const
int
>
(
indicePairs
).
subview
(
i
,
inverse
),
nHot
);
TV_CHECK_CUDA_ERR
();
gatherFtorOut
(
tv
::
TorchGPU
(),
tv
::
torch2tv
<
T
>
(
outputBuffer
),
tv
::
torch2tv
<
const
T
>
(
outGrad
),
tv
::
torch2tv
<
const
int
>
(
indicePairs
).
subview
(
i
,
!
inverse
),
nHot
);
TV_CHECK_CUDA_ERR
();
}
#endif
else
{
TV_ASSERT_INVALID_ARG
(
false
,
"unknown device type"
);
}
auto
filterGradSub
=
filtersGrad
[
i
];
auto
outputBufferBlob
=
torch
::
from_blob
(
outputBuffer
.
data_ptr
<
T
>
(),
{
nHot
,
numOutPlanes
},
options
);
auto
inputBufferBlob
=
torch
::
from_blob
(
inputBuffer
.
data_ptr
<
T
>
(),
{
nHot
,
numInPlanes
},
options
);
torch
::
mm_out
(
filterGradSub
,
inputBufferBlob
.
t
(),
outputBufferBlob
);
torch
::
mm_out
(
inputBufferBlob
,
outputBufferBlob
,
filters
[
i
].
t
());
if
(
device
==
torch
::
kCPU
)
{
functor
::
SparseScatterAddFunctor
<
tv
::
CPU
,
T
,
int
>
scatterFtor
;
scatterFtor
(
tv
::
CPU
(),
tv
::
torch2tv
<
T
>
(
inputGrad
),
tv
::
torch2tv
<
const
T
>
(
inputBuffer
),
tv
::
torch2tv
<
const
int
>
(
indicePairs
).
subview
(
i
,
inverse
),
nHot
);
}
#ifdef SPCONV_CUDA
else
if
(
device
==
torch
::
kCUDA
)
{
functor
::
SparseScatterAddFunctor
<
tv
::
GPU
,
T
,
int
>
scatterFtor
;
scatterFtor
(
tv
::
TorchGPU
(),
tv
::
torch2tv
<
T
>
(
inputGrad
),
tv
::
torch2tv
<
const
T
>
(
inputBuffer
),
tv
::
torch2tv
<
const
int
>
(
indicePairs
).
subview
(
i
,
inverse
),
nHot
);
TV_CHECK_CUDA_ERR
();
}
#endif
else
{
TV_ASSERT_INVALID_ARG
(
false
,
"unknown device type"
);
}
}
});
return
{
inputGrad
,
filtersGrad
.
view
(
filterShape
)};
}
}
// namespace spconv
\ No newline at end of file
test/fake_dist_train.py
0 → 100644
View file @
f0d7a46d
import
horovod.torch
as
hvd
import
time
from
pathlib
import
Path
import
fire
import
numpy
as
np
import
torch
import
torch.nn.functional
as
F
import
tqdm
from
torch
import
distributed
,
nn
from
torch.utils
import
data
from
torch.utils.data
import
DataLoader
,
Dataset
from
torchvision
import
datasets
,
transforms
import
spconv
from
spconv.test_utils
import
generate_sparse_data
class
FakeSparseDataset
(
Dataset
):
def
__len__
(
self
):
return
500
def
__getitem__
(
self
,
idx
):
data_ranges
=
{
0
:
[
-
1
,
1
],
1
:
[
0
,
2
],
2
:
[
-
2
,
0
],
3
:
[
-
2
,
-
2
],
}
l
=
np
.
random
.
randint
(
0
,
4
,
size
=
[
2
])
data
=
generate_sparse_data
([
16
,
64
,
64
],
[
16
*
64
*
64
//
2
],
3
,
data_range
=
data_ranges
[
l
[
0
]],
with_dense
=
False
)
data2
=
generate_sparse_data
([
16
,
64
,
64
],
[
16
*
64
*
64
//
2
],
3
,
data_range
=
data_ranges
[
l
[
1
]],
with_dense
=
False
)
features
=
np
.
ascontiguousarray
(
data
[
"features"
]).
astype
(
np
.
float32
)
indices
=
np
.
ascontiguousarray
(
data
[
"indices"
][:,
[
3
,
0
,
1
,
2
]]).
astype
(
np
.
int32
)
features2
=
np
.
ascontiguousarray
(
data2
[
"features"
]).
astype
(
np
.
float32
)
indices2
=
np
.
ascontiguousarray
(
data2
[
"indices"
][:,
[
3
,
0
,
1
,
2
]]).
astype
(
np
.
int32
)
features
=
np
.
ascontiguousarray
(
np
.
concatenate
([
features
,
features2
]))
indices
=
np
.
ascontiguousarray
(
np
.
concatenate
([
indices
,
indices2
]))
return
features
,
indices
,
l
class
FakeClassifier
(
nn
.
Module
):
def
__init__
(
self
):
super
().
__init__
()
self
.
net
=
spconv
.
SparseSequential
(
spconv
.
SubMConv3d
(
3
,
8
,
3
,
indice_key
=
"subm1"
,
padding
=
1
,
use_hash
=
False
),
nn
.
BatchNorm1d
(
8
),
nn
.
ReLU
(),
spconv
.
SparseConv3d
(
8
,
16
,
3
,
stride
=
2
,
padding
=
1
,
use_hash
=
False
),
nn
.
BatchNorm1d
(
16
),
nn
.
ReLU
(),
spconv
.
SubMConv3d
(
16
,
16
,
3
,
indice_key
=
"subm2"
,
padding
=
1
,
use_hash
=
False
),
nn
.
BatchNorm1d
(
16
),
nn
.
ReLU
(),
spconv
.
SparseConv3d
(
16
,
32
,
3
,
stride
=
2
,
padding
=
1
,
use_hash
=
False
),
nn
.
BatchNorm1d
(
32
),
nn
.
ReLU
(),
spconv
.
SubMConv3d
(
32
,
32
,
3
,
indice_key
=
"subm3"
,
padding
=
1
,
use_hash
=
False
),
nn
.
BatchNorm1d
(
32
),
nn
.
ReLU
(),
spconv
.
SparseConv3d
(
32
,
64
,
3
,
stride
=
2
,
padding
=
1
,
use_hash
=
False
),
nn
.
BatchNorm1d
(
64
),
nn
.
ReLU
(),
spconv
.
SubMConv3d
(
64
,
64
,
3
,
indice_key
=
"subm4"
,
padding
=
1
,
use_hash
=
False
),
nn
.
BatchNorm1d
(
64
),
nn
.
ReLU
(),
spconv
.
ToDense
()
# [64, 2, 8, 8]
)
self
.
linear
=
nn
.
Linear
(
64
*
2
*
8
*
8
,
4
)
def
forward
(
self
,
features
,
indices
):
indices
=
indices
.
int
()
x
=
spconv
.
SparseConvTensor
(
features
,
indices
,
[
16
,
64
,
64
],
2
)
x
=
self
.
net
(
x
)
x
=
x
.
view
(
2
,
-
1
)
x
=
self
.
linear
(
x
)
return
x
def
run
():
hvd
.
init
()
torch
.
cuda
.
set_device
(
hvd
.
local_rank
())
np
.
random
.
seed
(
50051
+
hvd
.
local_rank
())
ds
=
FakeSparseDataset
()
device
=
torch
.
device
(
'cuda'
)
model
=
FakeClassifier
()
model
.
to
(
device
)
optimizer
=
torch
.
optim
.
Adam
(
model
.
parameters
(),
lr
=
1e-3
)
hvd
.
broadcast_parameters
(
model
.
state_dict
(),
root_rank
=
0
)
hvd
.
broadcast_optimizer_state
(
optimizer
,
root_rank
=
0
)
compression
=
hvd
.
Compression
.
none
optimizer
=
hvd
.
DistributedOptimizer
(
optimizer
,
named_parameters
=
model
.
named_parameters
(),
compression
=
compression
,
op
=
hvd
.
Average
)
for
i
in
tqdm
.
tqdm
(
list
(
range
(
100
))):
# for j in range(4):
# features, indices, label = ds[(i * 4 + j) % len(ds)]
features
,
indices
,
label
=
ds
[
i
%
len
(
ds
)]
features_t
=
torch
.
from_numpy
(
features
)
indices_t
=
torch
.
from_numpy
(
indices
)
features_t
=
features_t
.
to
(
device
)
indices_t
=
indices_t
.
to
(
device
)
target
=
torch
.
from_numpy
(
label
).
to
(
device
)
output
=
model
(
features_t
,
indices_t
)
# print(output.shape)
loss
=
F
.
cross_entropy
(
output
,
target
)
optimizer
.
zero_grad
()
loss
.
backward
()
optimizer
.
step
()
def
dev
():
ds
=
FakeSparseDataset
()
for
i
in
range
(
10
):
features
,
indices
,
label
=
ds
[
i
]
print
(
indices
[:
10
])
features_t
=
torch
.
from_numpy
(
features
.
astype
(
np
.
float32
)).
cuda
()
indices_t
=
torch
.
from_numpy
(
indices
.
astype
(
np
.
int32
)).
cuda
()
net
=
FakeClassifier
().
cuda
()
net
(
features_t
,
indices_t
)
def
main
():
run
()
if
__name__
==
"__main__"
:
fire
.
Fire
(
main
)
test/fake_train.py
0 → 100644
View file @
f0d7a46d
import
time
from
pathlib
import
Path
import
fire
import
numpy
as
np
import
torch
import
torch.nn.functional
as
F
import
tqdm
from
torch
import
distributed
,
nn
from
torch.utils
import
data
from
torch.utils.data
import
DataLoader
,
Dataset
from
torchvision
import
datasets
,
transforms
import
spconv
from
spconv.test_utils
import
generate_sparse_data
class
FakeSparseDataset
(
Dataset
):
def
__len__
(
self
):
return
500
def
__getitem__
(
self
,
idx
):
data_ranges
=
{
0
:
[
-
1
,
1
],
1
:
[
0
,
2
],
2
:
[
-
2
,
0
],
3
:
[
-
2
,
-
2
],
}
l
=
np
.
random
.
randint
(
0
,
4
,
size
=
[
2
])
data
=
generate_sparse_data
([
16
,
64
,
64
],
[
16
*
64
*
64
//
2
],
3
,
data_range
=
data_ranges
[
l
[
0
]],
with_dense
=
False
)
data2
=
generate_sparse_data
([
16
,
64
,
64
],
[
16
*
64
*
64
//
2
],
3
,
data_range
=
data_ranges
[
l
[
1
]],
with_dense
=
False
)
features
=
np
.
ascontiguousarray
(
data
[
"features"
]).
astype
(
np
.
float32
)
indices
=
np
.
ascontiguousarray
(
data
[
"indices"
][:,
[
3
,
0
,
1
,
2
]]).
astype
(
np
.
int32
)
features2
=
np
.
ascontiguousarray
(
data2
[
"features"
]).
astype
(
np
.
float32
)
indices2
=
np
.
ascontiguousarray
(
data2
[
"indices"
][:,
[
3
,
0
,
1
,
2
]]).
astype
(
np
.
int32
)
features
=
np
.
ascontiguousarray
(
np
.
concatenate
([
features
,
features2
]))
indices
=
np
.
ascontiguousarray
(
np
.
concatenate
([
indices
,
indices2
]))
return
features
,
indices
,
l
class
FakeClassifier
(
nn
.
Module
):
def
__init__
(
self
):
super
().
__init__
()
self
.
net
=
spconv
.
SparseSequential
(
spconv
.
SubMConv3d
(
3
,
8
,
3
,
indice_key
=
"subm1"
,
padding
=
1
,
use_hash
=
False
),
nn
.
BatchNorm1d
(
8
),
nn
.
ReLU
(),
spconv
.
SparseConv3d
(
8
,
16
,
3
,
stride
=
2
,
padding
=
1
,
use_hash
=
False
),
nn
.
BatchNorm1d
(
16
),
nn
.
ReLU
(),
spconv
.
SubMConv3d
(
16
,
16
,
3
,
indice_key
=
"subm2"
,
padding
=
1
,
use_hash
=
False
),
nn
.
BatchNorm1d
(
16
),
nn
.
ReLU
(),
spconv
.
SparseConv3d
(
16
,
32
,
3
,
stride
=
2
,
padding
=
1
,
use_hash
=
False
),
nn
.
BatchNorm1d
(
32
),
nn
.
ReLU
(),
spconv
.
SubMConv3d
(
32
,
32
,
3
,
indice_key
=
"subm3"
,
padding
=
1
,
use_hash
=
False
),
nn
.
BatchNorm1d
(
32
),
nn
.
ReLU
(),
spconv
.
SparseConv3d
(
32
,
64
,
3
,
stride
=
2
,
padding
=
1
,
use_hash
=
False
),
nn
.
BatchNorm1d
(
64
),
nn
.
ReLU
(),
spconv
.
SubMConv3d
(
64
,
64
,
3
,
indice_key
=
"subm4"
,
padding
=
1
,
use_hash
=
False
),
nn
.
BatchNorm1d
(
64
),
nn
.
ReLU
(),
spconv
.
ToDense
()
# [64, 2, 8, 8]
)
self
.
linear
=
nn
.
Linear
(
64
*
2
*
8
*
8
,
4
)
def
forward
(
self
,
features
,
indices
):
indices
=
indices
.
int
()
x
=
spconv
.
SparseConvTensor
(
features
,
indices
,
[
16
,
64
,
64
],
2
)
x
=
self
.
net
(
x
)
x
=
x
.
view
(
2
,
-
1
)
x
=
self
.
linear
(
x
)
return
x
def
run
():
np
.
random
.
seed
(
50051
)
ds
=
FakeSparseDataset
()
device
=
torch
.
device
(
'cuda'
)
model
=
FakeClassifier
()
model
.
to
(
device
)
optimizer
=
torch
.
optim
.
Adam
(
model
.
parameters
(),
lr
=
1e-3
)
for
i
in
tqdm
.
tqdm
(
list
(
range
(
100
))):
# for j in range(4):
# features, indices, label = ds[(i * 4 + j) % len(ds)]
features
,
indices
,
label
=
ds
[
i
%
len
(
ds
)]
features_t
=
torch
.
from_numpy
(
features
)
indices_t
=
torch
.
from_numpy
(
indices
)
features_t
=
features_t
.
to
(
device
)
indices_t
=
indices_t
.
to
(
device
)
target
=
torch
.
from_numpy
(
label
).
to
(
device
)
output
=
model
(
features_t
,
indices_t
)
# print(output.shape)
loss
=
F
.
cross_entropy
(
output
,
target
)
optimizer
.
zero_grad
()
loss
.
backward
()
optimizer
.
step
()
def
dev
():
ds
=
FakeSparseDataset
()
for
i
in
range
(
10
):
features
,
indices
,
label
=
ds
[
i
]
print
(
indices
[:
10
])
features_t
=
torch
.
from_numpy
(
features
.
astype
(
np
.
float32
)).
cuda
()
indices_t
=
torch
.
from_numpy
(
indices
.
astype
(
np
.
int32
)).
cuda
()
net
=
FakeClassifier
().
cuda
()
net
(
features_t
,
indices_t
)
def
main
():
run
()
if
__name__
==
"__main__"
:
fire
.
Fire
(
main
)
test/test_conv.py
View file @
f0d7a46d
...
...
@@ -581,7 +581,7 @@ def main():
if
all
([
s
>
1
,
d
>
1
]):
continue
device
=
torch
.
device
(
dev
)
num_points
=
[
5
]
*
bs
num_points
=
[
5
00
]
*
bs
sparse_dict
=
generate_sparse_data
(
shape
,
num_points
,
IC
)
...
...
@@ -601,7 +601,7 @@ def main():
net
.
net
[
0
].
weight
[:]
=
filters_t
out_ref
=
net_ref
(
features_dense_t
)
times
=
[]
for
i
in
range
(
0
):
for
i
in
range
(
1
0
):
t
=
time
.
time
()
out
=
net
(
features_t
,
indices_t
,
bs
)
torch
.
cuda
.
synchronize
()
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment