Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
one
spconv
Commits
19e73bbe
Commit
19e73bbe
authored
May 20, 2020
by
Yan Yan
Browse files
format code with clang-format, better c++ code
parent
c336139f
Changes
77
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
2521 additions
and
1058 deletions
+2521
-1058
include/spconv/pillar_scatter_functor.h
include/spconv/pillar_scatter_functor.h
+7
-10
include/spconv/pillar_scatter_ops.h
include/spconv/pillar_scatter_ops.h
+4
-3
include/spconv/point2voxel.h
include/spconv/point2voxel.h
+10
-11
include/spconv/pool_ops.h
include/spconv/pool_ops.h
+16
-17
include/spconv/reordering.cu.h
include/spconv/reordering.cu.h
+1
-1
include/spconv/reordering.h
include/spconv/reordering.h
+14
-16
include/spconv/spconv_ops.h
include/spconv/spconv_ops.h
+5
-5
include/tensorview/common.h
include/tensorview/common.h
+94
-0
include/tensorview/cuda_utils.h
include/tensorview/cuda_utils.h
+31
-0
include/tensorview/eigen_utils.h
include/tensorview/eigen_utils.h
+41
-0
include/tensorview/helper_launch.h
include/tensorview/helper_launch.h
+0
-21
include/tensorview/kernel_utils.h
include/tensorview/kernel_utils.h
+22
-31
include/tensorview/mp_helper.h
include/tensorview/mp_helper.h
+6
-5
include/tensorview/prettyprint.h
include/tensorview/prettyprint.h
+475
-0
include/tensorview/pybind_utils.h
include/tensorview/pybind_utils.h
+170
-0
include/tensorview/tensor.h
include/tensorview/tensor.h
+598
-217
include/tensorview/tensorview.h
include/tensorview/tensorview.h
+830
-715
include/tensorview/tools.h
include/tensorview/tools.h
+58
-0
include/tensorview/torch_utils.h
include/tensorview/torch_utils.h
+133
-0
include/torch_utils.h
include/torch_utils.h
+6
-6
No files found.
include/spconv/pillar_scatter_functor.h
View file @
19e73bbe
// Copyright 2019 Yan Yan
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//
// http://www.apache.org/licenses/LICENSE-2.0
//
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...
...
@@ -16,14 +16,11 @@
#define POINTPILLARS_SCATTER_FUNCTOR_H_
#include <tensorview/tensorview.h>
namespace
spconv
{
namespace
functor
{
namespace
spconv
{
namespace
functor
{
template
<
typename
Device
,
typename
T
,
typename
Index
>
struct
PointPillarScatter
{
void
operator
()(
const
Device
&
d
,
tv
::
TensorView
<
T
>
canvas
,
struct
PointPillarScatter
{
void
operator
()(
const
Device
&
d
,
tv
::
TensorView
<
T
>
canvas
,
tv
::
TensorView
<
const
T
>
features
,
tv
::
TensorView
<
const
T
>
coors
);
};
...
...
include/spconv/pillar_scatter_ops.h
View file @
19e73bbe
...
...
@@ -16,8 +16,8 @@
#define PILLAR_SCATTER_OP_H_
#include <spconv/pillar_scatter_functor.h>
#include <tensorview/torch_utils.h>
#include <torch/script.h>
#include <torch_utils.h>
#include <utility/timer.h>
namespace
spconv
{
...
...
@@ -42,9 +42,10 @@ torch::Tensor pointPillarScatter(torch::Tensor features, torch::Tensor coors,
torch
::
zeros
({
shapeData
[
0
],
shapeData
[
1
],
shapeData
[
2
],
shapeData
[
3
]},
features
.
options
());
TV_ASSERT_RT_ERR
(
shapeData
[
1
]
==
features
.
size
(
1
),
"error"
);
#ifdef
SPCON
V_CUDA
#ifdef
T
V_CUDA
functor
::
PointPillarScatter
<
tv
::
GPU
,
T
,
int
>
ftor
;
ftor
(
tv
::
TorchGPU
(),
tv
::
torch2tv
<
T
>
(
canvas
),
tv
::
torch2tv
<
const
T
>
(
features
.
squeeze
()),
ftor
(
tv
::
TorchGPU
(),
tv
::
torch2tv
<
T
>
(
canvas
),
tv
::
torch2tv
<
const
T
>
(
features
.
squeeze
()),
tv
::
torch2tv
<
const
T
>
(
coors
.
squeeze
()));
#endif
return
canvas
;
...
...
include/spconv/point2voxel.h
View file @
19e73bbe
...
...
@@ -29,7 +29,8 @@ using namespace pybind11::literals;
template
<
typename
DType
,
int
NDim
>
int
points_to_voxel_3d_np
(
py
::
array_t
<
DType
>
points
,
py
::
array_t
<
DType
>
voxels
,
py
::
array_t
<
DType
>
voxel_point_mask
,
py
::
array_t
<
int
>
coors
,
py
::
array_t
<
DType
>
voxel_point_mask
,
py
::
array_t
<
int
>
coors
,
py
::
array_t
<
int
>
num_points_per_voxel
,
py
::
array_t
<
int
>
coor_to_voxelidx
,
std
::
vector
<
DType
>
voxel_size
,
...
...
@@ -94,14 +95,12 @@ int points_to_voxel_3d_np(py::array_t<DType> points, py::array_t<DType> voxels,
}
template
<
typename
DType
,
int
NDim
>
int
points_to_voxel_3d_np_mean
(
py
::
array_t
<
DType
>
points
,
py
::
array_t
<
DType
>
voxel_point_mask
,
py
::
array_t
<
DType
>
voxels
,
py
::
array_t
<
DType
>
means
,
py
::
array_t
<
int
>
coors
,
py
::
array_t
<
int
>
num_points_per_voxel
,
py
::
array_t
<
int
>
coor_to_voxelidx
,
std
::
vector
<
DType
>
voxel_size
,
std
::
vector
<
DType
>
coors_range
,
int
max_points
,
int
max_voxels
)
{
int
points_to_voxel_3d_np_mean
(
py
::
array_t
<
DType
>
points
,
py
::
array_t
<
DType
>
voxel_point_mask
,
py
::
array_t
<
DType
>
voxels
,
py
::
array_t
<
DType
>
means
,
py
::
array_t
<
int
>
coors
,
py
::
array_t
<
int
>
num_points_per_voxel
,
py
::
array_t
<
int
>
coor_to_voxelidx
,
std
::
vector
<
DType
>
voxel_size
,
std
::
vector
<
DType
>
coors_range
,
int
max_points
,
int
max_voxels
)
{
auto
points_rw
=
points
.
template
mutable_unchecked
<
2
>();
auto
means_rw
=
means
.
template
mutable_unchecked
<
2
>();
auto
voxels_rw
=
voxels
.
template
mutable_unchecked
<
3
>();
...
...
@@ -174,8 +173,8 @@ int points_to_voxel_3d_np_mean(py::array_t<DType> points,
template
<
typename
DType
,
int
NDim
>
int
points_to_voxel_3d_with_filtering
(
py
::
array_t
<
DType
>
points
,
py
::
array_t
<
DType
>
voxels
,
py
::
array_t
<
DType
>
voxel_point_mask
,
py
::
array_t
<
int
>
voxel_mask
,
py
::
array_t
<
DType
>
mins
,
py
::
array_t
<
DType
>
maxs
,
py
::
array_t
<
int
>
coors
,
py
::
array_t
<
DType
>
voxel_point_mask
,
py
::
array_t
<
int
>
voxel_mask
,
py
::
array_t
<
DType
>
mins
,
py
::
array_t
<
DType
>
maxs
,
py
::
array_t
<
int
>
coors
,
py
::
array_t
<
int
>
num_points_per_voxel
,
py
::
array_t
<
int
>
coor_to_voxelidx
,
std
::
vector
<
DType
>
voxel_size
,
std
::
vector
<
DType
>
coors_range
,
int
max_points
,
int
max_voxels
,
int
block_factor
,
int
block_size
,
...
...
include/spconv/pool_ops.h
View file @
19e73bbe
// Copyright 2019 Yan Yan
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//
// http://www.apache.org/licenses/LICENSE-2.0
//
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...
...
@@ -16,14 +16,14 @@
#define SPARSE_POOL_OP_H_
#include <spconv/maxpool.h>
#include <tensorview/torch_utils.h>
#include <torch/script.h>
#include <torch_utils.h>
#include <utility/timer.h>
namespace
spconv
{
template
<
typename
T
>
torch
::
Tensor
indiceMaxPool
(
torch
::
Tensor
features
,
torch
::
Tensor
indicePairs
,
torch
::
Tensor
indiceNum
,
int64_t
numAct
)
{
torch
::
Tensor
indiceNum
,
int64_t
numAct
)
{
auto
device
=
features
.
device
().
type
();
auto
kernelVolume
=
indicePairs
.
size
(
0
);
auto
numInPlanes
=
features
.
size
(
1
);
...
...
@@ -43,8 +43,8 @@ torch::Tensor indiceMaxPool(torch::Tensor features, torch::Tensor indicePairs,
forwardFtor
(
tv
::
CPU
(),
tv
::
torch2tv
<
T
>
(
output
),
tv
::
torch2tv
<
const
T
>
(
features
),
tv
::
torch2tv
<
const
int
>
(
indicePairs
).
subview
(
i
),
nHot
);
}
#ifdef
SPCON
V_CUDA
}
#ifdef
T
V_CUDA
else
if
(
device
==
torch
::
kCUDA
)
{
functor
::
SparseMaxPoolForwardFunctor
<
tv
::
GPU
,
T
,
int
>
forwardFtor
;
forwardFtor
(
tv
::
TorchGPU
(),
tv
::
torch2tv
<
T
>
(
output
),
...
...
@@ -53,7 +53,7 @@ torch::Tensor indiceMaxPool(torch::Tensor features, torch::Tensor indicePairs,
TV_CHECK_CUDA_ERR
();
}
#endif
else
{
else
{
TV_ASSERT_INVALID_ARG
(
false
,
"unknown device type"
);
}
// totalTime += timer.report() / 1000.0;
...
...
@@ -63,17 +63,17 @@ torch::Tensor indiceMaxPool(torch::Tensor features, torch::Tensor indicePairs,
}
template
<
typename
T
>
torch
::
Tensor
indiceMaxPoolBackward
(
torch
::
Tensor
features
,
torch
::
Tensor
outFeatures
,
torch
::
Tensor
outGrad
,
torch
::
Tensor
indicePairs
,
torch
::
Tensor
indiceNum
)
{
torch
::
Tensor
indiceMaxPoolBackward
(
torch
::
Tensor
features
,
torch
::
Tensor
outFeatures
,
torch
::
Tensor
outGrad
,
torch
::
Tensor
indicePairs
,
torch
::
Tensor
indiceNum
)
{
auto
device
=
features
.
device
().
type
();
auto
numInPlanes
=
features
.
size
(
1
);
auto
indicePairNumCpu
=
indiceNum
.
to
({
torch
::
kCPU
});
auto
options
=
torch
::
TensorOptions
().
dtype
(
features
.
dtype
()).
device
(
features
.
device
());
torch
::
Tensor
inputGrad
=
torch
::
zeros
(
features
.
sizes
(),
options
);
auto
kernelVolume
=
indicePairs
.
size
(
0
);
auto
kernelVolume
=
indicePairs
.
size
(
0
);
for
(
int
i
=
0
;
i
<
kernelVolume
;
++
i
)
{
auto
nHot
=
indicePairNumCpu
.
data_ptr
<
int
>
()[
i
];
if
(
nHot
<=
0
)
{
...
...
@@ -85,8 +85,8 @@ torch::Tensor indiceMaxPoolBackward(torch::Tensor features,
tv
::
torch2tv
<
const
T
>
(
features
),
tv
::
torch2tv
<
const
T
>
(
outGrad
),
tv
::
torch2tv
<
T
>
(
inputGrad
),
tv
::
torch2tv
<
const
int
>
(
indicePairs
).
subview
(
i
),
nHot
);
}
#ifdef
SPCON
V_CUDA
}
#ifdef
T
V_CUDA
else
if
(
device
==
torch
::
kCUDA
)
{
functor
::
SparseMaxPoolBackwardFunctor
<
tv
::
GPU
,
T
,
int
>
backwardFtor
;
backwardFtor
(
tv
::
TorchGPU
(),
tv
::
torch2tv
<
const
T
>
(
outFeatures
),
...
...
@@ -96,10 +96,9 @@ torch::Tensor indiceMaxPoolBackward(torch::Tensor features,
TV_CHECK_CUDA_ERR
();
}
#endif
else
{
else
{
TV_ASSERT_INVALID_ARG
(
false
,
"unknown device type"
);
}
}
return
inputGrad
;
}
...
...
include/spconv/reordering.cu.h
View file @
19e73bbe
...
...
@@ -14,7 +14,7 @@
#ifndef REORDERING_CU_H_
#define REORDERING_CU_H_
#include <tensorview/
helper_kernel.cu
.h>
#include <tensorview/
kernel_utils
.h>
// see http://www.nvidia.com/content/GTC-2010/pdfs/2238_GTC2010.pdf.
namespace
spconv
{
...
...
include/spconv/reordering.h
View file @
19e73bbe
// Copyright 2019 Yan Yan
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//
// http://www.apache.org/licenses/LICENSE-2.0
//
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...
...
@@ -16,23 +16,21 @@
#define SPARSE_REORDERING_FUNCTOR_H_
#include <tensorview/tensorview.h>
namespace
spconv
{
namespace
functor
{
namespace
spconv
{
namespace
functor
{
template
<
typename
Device
,
typename
T
,
typename
Index
>
struct
SparseGatherFunctor
{
void
operator
()(
const
Device
&
d
,
tv
::
TensorView
<
T
>
buffer
,
tv
::
TensorView
<
const
T
>
features
,
tv
::
TensorView
<
const
Index
>
indices
,
int
size
);
struct
SparseGatherFunctor
{
void
operator
()(
const
Device
&
d
,
tv
::
TensorView
<
T
>
buffer
,
tv
::
TensorView
<
const
T
>
features
,
tv
::
TensorView
<
const
Index
>
indices
,
int
size
);
};
template
<
typename
Device
,
typename
T
,
typename
Index
>
struct
SparseScatterAddFunctor
{
void
operator
()(
const
Device
&
d
,
tv
::
TensorView
<
T
>
out_features
,
tv
::
TensorView
<
const
T
>
buffer
,
tv
::
TensorView
<
const
Index
>
indices
,
int
size
,
bool
stable
=
false
);
struct
SparseScatterAddFunctor
{
void
operator
()(
const
Device
&
d
,
tv
::
TensorView
<
T
>
out_features
,
tv
::
TensorView
<
const
T
>
buffer
,
tv
::
TensorView
<
const
Index
>
indices
,
int
size
,
bool
stable
=
false
);
};
}
// namespace functor
}
// namespace spconv
...
...
include/spconv/spconv_ops.h
View file @
19e73bbe
...
...
@@ -17,8 +17,8 @@
#include <spconv/indice.h>
#include <spconv/reordering.h>
#include <tensorview/torch_utils.h>
#include <torch/script.h>
#include <torch_utils.h>
#include <utility/timer.h>
namespace
spconv
{
...
...
@@ -101,7 +101,7 @@ getIndicePair(torch::Tensor indices, int64_t batchSize,
tv
::
torch2tv
<
int
>
(
indiceNum
),
kernelSize32
,
stride32
,
padding32
,
dilation32
,
outSpatialShape32
,
transpose
,
false
,
useHash
);
}
#ifdef
SPCON
V_CUDA
#ifdef
T
V_CUDA
else
if
(
indices
.
device
().
type
()
==
torch
::
kCUDA
)
{
auto
getIndicePairFtor
=
functor
::
CreateSubMIndicePairFunctor
<
tv
::
GPU
,
int
,
int
,
NDim
>
();
...
...
@@ -149,7 +149,7 @@ getIndicePair(torch::Tensor indices, int64_t batchSize,
kernelSize32
,
stride32
,
padding32
,
dilation32
,
outSpatialShape32
,
transpose
);
}
#ifdef
SPCON
V_CUDA
#ifdef
T
V_CUDA
else
if
(
indices
.
device
().
type
()
==
torch
::
kCUDA
)
{
auto
getIndicePairFtorP1
=
functor
::
CreateConvIndicePairFunctorP1
<
tv
::
GPU
,
int
,
int
,
NDim
>
();
...
...
@@ -269,7 +269,7 @@ std::vector<torch::Tensor> getIndicePairPreGrid(
dilation32
,
outSpatialShape32
,
transpose
);
gridOut
.
fill_
(
-
1
);
}
#ifdef
SPCON
V_CUDA
#ifdef
T
V_CUDA
else
if
(
indices
.
device
().
type
()
==
torch
::
kCUDA
)
{
auto
getIndicePairFtor
=
functor
::
CreateSubMIndicePairFunctor
<
tv
::
GPU
,
int
,
int
,
NDim
>
();
...
...
@@ -299,7 +299,7 @@ std::vector<torch::Tensor> getIndicePairPreGrid(
transpose
,
true
);
gridOut
.
fill_
(
-
1
);
}
#ifdef
SPCON
V_CUDA
#ifdef
T
V_CUDA
else
if
(
indices
.
device
().
type
()
==
torch
::
kCUDA
)
{
auto
getIndicePairFtorP1
=
functor
::
CreateConvIndicePairFunctorP1
<
tv
::
GPU
,
int
,
int
,
NDim
>
();
...
...
include/tensorview/common.h
0 → 100644
View file @
19e73bbe
// Copyright 2019-2020 Yan Yan
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <iostream>
#include <sstream>
#ifdef TV_USE_STACKTRACE
#if defined(WIN32) || defined(_WIN32) || \
defined(__WIN32) && !defined(__CYGWIN__)
#define BOOST_STACKTRACE_USE_WINDBG
#else
// require linking with -ldl and -lbacktrace in linux
#define BOOST_STACKTRACE_USE_BACKTRACE
#endif
#include <boost/stacktrace.hpp>
#endif
namespace
tv
{
template
<
class
SStream
,
class
T
>
void
sstream_print
(
SStream
&
ss
,
T
val
)
{
ss
<<
val
;
}
template
<
class
SStream
,
class
T
,
class
...
TArgs
>
void
sstream_print
(
SStream
&
ss
,
T
val
,
TArgs
...
args
)
{
ss
<<
val
<<
" "
;
sstream_print
(
ss
,
args
...);
}
template
<
class
...
TArgs
>
void
ssprint
(
TArgs
...
args
)
{
std
::
stringstream
ss
;
sstream_print
(
ss
,
args
...);
std
::
cout
<<
ss
.
str
()
<<
std
::
endl
;
}
#ifdef TV_USE_STACKTRACE
#define TV_BACKTRACE_PRINT(ss) \
ss << std::endl << boost::stacktrace::stacktrace();
#else
#define TV_BACKTRACE_PRINT(ss)
#endif
#define TV_THROW_RT_ERR(...) \
{ \
std::stringstream __macro_s; \
__macro_s << __FILE__ << " " << __LINE__ << "\n"; \
tv::sstream_print(__macro_s, __VA_ARGS__); \
TV_BACKTRACE_PRINT(__macro_s); \
throw std::runtime_error(__macro_s.str()); \
}
#define TV_THROW_INVALID_ARG(...) \
{ \
std::stringstream __macro_s; \
__macro_s << __FILE__ << " " << __LINE__ << "\n"; \
tv::sstream_print(__macro_s, __VA_ARGS__); \
TV_BACKTRACE_PRINT(__macro_s); \
throw std::invalid_argument(__macro_s.str()); \
}
#define TV_ASSERT_RT_ERR(expr, ...) \
{ \
if (!(expr)) { \
std::stringstream __macro_s; \
__macro_s << __FILE__ << " " << __LINE__ << "\n"; \
__macro_s << #expr << " assert faild. "; \
tv::sstream_print(__macro_s, __VA_ARGS__); \
TV_BACKTRACE_PRINT(__macro_s); \
throw std::runtime_error(__macro_s.str()); \
} \
}
#define TV_ASSERT_INVALID_ARG(expr, ...) \
{ \
if (!(expr)) { \
std::stringstream __macro_s; \
__macro_s << __FILE__ << " " << __LINE__ << "\n"; \
__macro_s << #expr << " assert faild. "; \
tv::sstream_print(__macro_s, __VA_ARGS__); \
TV_BACKTRACE_PRINT(__macro_s); \
throw std::invalid_argument(__macro_s.str()); \
} \
}
}
// namespace tv
\ No newline at end of file
include/tensorview/cuda_utils.h
0 → 100644
View file @
19e73bbe
#pragma once
// from pytorch.aten
#include "tensorview.h"
#include <type_traits>
namespace
tv
{
namespace
cuda
{
template
<
typename
T1
,
typename
T2
>
inline
int
DivUp
(
const
T1
a
,
const
T2
b
)
{
return
(
a
+
b
-
1
)
/
b
;
}
// Use 1024 threads per block, which requires cuda sm_2x or above
constexpr
int
CUDA_NUM_THREADS
=
1024
;
// CUDA: number of blocks for threads.
inline
int
getNumThreads
(
const
int
N
)
{
if
(
N
>
CUDA_NUM_THREADS
)
{
return
CUDA_NUM_THREADS
;
}
return
DivUp
(
N
,
32
)
*
32
;
}
inline
int
getBlocks
(
const
int
N
)
{
TV_ASSERT_RT_ERR
(
N
>
0
,
"CUDA kernel launch blocks must be positive, but got N="
,
N
);
return
DivUp
(
N
,
getNumThreads
(
N
));
}
}
// namespace cuda
}
// namespace tv
\ No newline at end of file
include/tensorview/eigen_utils.h
0 → 100644
View file @
19e73bbe
// Copyright 2019-2020 Yan Yan
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "tensor.h"
#include "tensorview.h"
#include <eigen3/Eigen/Dense>
namespace
tv
{
template
<
typename
T
,
int
Row
=
Eigen
::
Dynamic
,
int
Col
=
Eigen
::
Dynamic
>
Eigen
::
Map
<
Eigen
::
Matrix
<
T
,
Row
,
Col
,
Eigen
::
RowMajor
>>
tv2eigen
(
TensorView
<
T
>
view
)
{
TV_ASSERT_INVALID_ARG
(
view
.
ndim
()
<=
2
&&
view
.
ndim
()
>
0
,
"error"
);
if
(
Row
!=
Eigen
::
Dynamic
)
{
TV_ASSERT_INVALID_ARG
(
view
.
dim
(
0
)
==
Row
,
"error"
);
}
if
(
Col
!=
Eigen
::
Dynamic
)
{
TV_ASSERT_INVALID_ARG
(
view
.
dim
(
1
)
==
Col
,
"error"
);
}
int
row
=
1
;
if
(
view
.
ndim
()
==
2
)
{
row
=
view
.
dim
(
0
);
}
Eigen
::
Map
<
Eigen
::
Matrix
<
T
,
Row
,
Col
,
Eigen
::
RowMajor
>>
eigen_map
(
view
.
data
(),
row
,
view
.
dim
(
1
));
return
eigen_map
;
}
}
// namespace tv
include/tensorview/helper_launch.h
deleted
100644 → 0
View file @
c336139f
#pragma once
// from pytorch.aten
#include "tensorview.h"
namespace
tv
{
namespace
launch
{
template
<
typename
T1
,
typename
T2
>
inline
int
DivUp
(
const
T1
a
,
const
T2
b
)
{
return
(
a
+
b
-
1
)
/
b
;
}
// Use 1024 threads per block, which requires cuda sm_2x or above
constexpr
int
CUDA_NUM_THREADS
=
1024
;
// CUDA: number of blocks for threads.
inline
int
getBlocks
(
const
int
N
)
{
TV_ASSERT_RT_ERR
(
N
>
0
,
"CUDA kernel launch blocks must be positive, but got N="
,
N
);
return
DivUp
(
N
,
CUDA_NUM_THREADS
);
}
}
// namespace launch
}
// namespace tv
\ No newline at end of file
include/tensorview/
helper_kernel.cu
.h
→
include/tensorview/
kernel_utils
.h
View file @
19e73bbe
#pragma once
// from tensorflow
namespace
tv
{
namespace
detail
{
namespace
tv
{
namespace
detail
{
template
<
typename
T
>
class
KernelLoop
{
struct
Iterator
{
__forceinline__
__device__
Iterator
(
T
index
,
T
delta
)
:
index_
(
index
),
delta_
(
delta
)
{}
template
<
typename
T
>
class
KernelLoop
{
struct
Iterator
{
__forceinline__
__device__
Iterator
(
T
index
,
T
delta
)
:
index_
(
index
),
delta_
(
delta
)
{}
__forceinline__
__device__
T
operator
*
()
const
{
return
index_
;
}
__forceinline__
__device__
Iterator
&
operator
++
()
{
__forceinline__
__device__
Iterator
&
operator
++
()
{
index_
+=
delta_
;
return
*
this
;
}
__forceinline__
__device__
bool
operator
!=
(
const
Iterator
&
other
)
const
{
__forceinline__
__device__
bool
operator
!=
(
const
Iterator
&
other
)
const
{
bool
greater
=
index_
>
other
.
index_
;
bool
less
=
index_
<
other
.
index_
;
// Anything past an end iterator (delta_ == 0) is equal.
// In range-based for loops, this optimizes to 'return less'.
if
(
!
other
.
delta_
)
{
if
(
!
other
.
delta_
)
{
return
less
;
}
if
(
!
delta_
)
{
if
(
!
delta_
)
{
return
greater
;
}
return
less
||
greater
;
...
...
@@ -43,7 +35,9 @@ public:
__forceinline__
__device__
KernelLoop
(
T
begin
,
T
delta
,
T
end
)
:
begin_
(
begin
),
delta_
(
delta
),
end_
(
end
)
{}
__forceinline__
__device__
Iterator
begin
()
const
{
return
Iterator
{
begin_
,
delta_
};
}
__forceinline__
__device__
Iterator
begin
()
const
{
return
Iterator
{
begin_
,
delta_
};
}
__forceinline__
__device__
Iterator
end
()
const
{
return
Iterator
{
end_
,
0
};
}
private:
...
...
@@ -53,29 +47,26 @@ private:
};
}
// namespace detail
template
<
typename
T
,
int
NumILP
=
1
>
__forceinline__
__device__
detail
::
KernelLoop
<
T
>
KernelLoopX
(
T
count
)
{
template
<
typename
T
,
int
NumILP
=
1
>
__forceinline__
__device__
detail
::
KernelLoop
<
T
>
KernelLoopX
(
T
count
)
{
return
detail
::
KernelLoop
<
T
>
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
,
gridDim
.
x
*
blockDim
.
x
*
NumILP
,
count
);
gridDim
.
x
*
blockDim
.
x
*
NumILP
,
count
);
}
// Helper to visit indices in the range 0 <= i < count using the y-coordinate.
// Usage: for(int i : KernelLoopY(count)) { visit(i); }
template
<
typename
T
,
int
NumILP
=
1
>
__forceinline__
__device__
detail
::
KernelLoop
<
T
>
KernelLoopY
(
T
count
)
{
template
<
typename
T
,
int
NumILP
=
1
>
__forceinline__
__device__
detail
::
KernelLoop
<
T
>
KernelLoopY
(
T
count
)
{
return
detail
::
KernelLoop
<
T
>
(
blockIdx
.
y
*
blockDim
.
y
+
threadIdx
.
y
,
gridDim
.
y
*
blockDim
.
y
*
NumILP
,
count
);
gridDim
.
y
*
blockDim
.
y
*
NumILP
,
count
);
}
// Helper to visit indices in the range 0 <= i < count using the z-coordinate.
// Usage: for(int i : KernelLoopZ(count)) { visit(i); }
template
<
typename
T
,
int
NumILP
=
1
>
__forceinline__
__device__
detail
::
KernelLoop
<
T
>
KernelLoopZ
(
T
count
)
{
template
<
typename
T
,
int
NumILP
=
1
>
__forceinline__
__device__
detail
::
KernelLoop
<
T
>
KernelLoopZ
(
T
count
)
{
return
detail
::
KernelLoop
<
T
>
(
blockIdx
.
z
*
blockDim
.
z
+
threadIdx
.
z
,
gridDim
.
z
*
blockDim
.
z
*
NumILP
,
count
);
gridDim
.
z
*
blockDim
.
z
*
NumILP
,
count
);
}
}
// namespace tv
\ No newline at end of file
include/
spconv
/mp_helper.h
→
include/
tensorview
/mp_helper.h
View file @
19e73bbe
...
...
@@ -3,7 +3,7 @@
#include <type_traits>
#include <utility>
namespace
spcon
v
{
namespace
t
v
{
template
<
class
...
T
>
struct
mp_list
{};
template
<
class
T
,
T
...
I
>
...
...
@@ -11,9 +11,10 @@ using mp_list_c = mp_list<std::integral_constant<T, I>...>;
namespace
detail
{
template
<
class
...
T
,
class
F
>
constexpr
F
mp_for_each_impl
(
mp_list
<
T
...
>
,
F
&&
f
)
{
return
std
::
initializer_list
<
int
>
{(
f
(
T
()),
0
)...},
std
::
forward
<
F
>
(
f
);
template
<
class
...
Ts
,
class
F
>
constexpr
F
mp_for_each_impl
(
mp_list
<
Ts
...
>
,
F
&&
f
)
{
return
(
void
)(
std
::
initializer_list
<
int
>
{(
f
(
Ts
()),
0
)...}),
std
::
forward
<
F
>
(
f
);
}
template
<
class
F
>
constexpr
F
mp_for_each_impl
(
mp_list
<>
,
F
&&
f
)
{
...
...
@@ -42,6 +43,6 @@ using mp_rename = typename detail::mp_rename_impl<A, B>::type;
template
<
class
L
,
class
F
>
constexpr
F
mp_for_each
(
F
&&
f
)
{
return
detail
::
mp_for_each_impl
(
mp_rename
<
L
,
mp_list
>
(),
std
::
forward
<
F
>
(
f
));
}
}
// namespace
spcon
v
}
// namespace
t
v
#endif
\ No newline at end of file
include/tensorview/prettyprint.h
0 → 100644
View file @
19e73bbe
// Copyright Louis Delacroix 2010 - 2014.
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)
//
// A pretty printing library for C++
//
// Usage:
// Include this header, and operator<< will "just work".
#ifndef H_PRETTY_PRINT
#define H_PRETTY_PRINT
#include <cstddef>
#include <iterator>
#include <memory>
#include <ostream>
#include <set>
#include <tuple>
#include <type_traits>
#include <unordered_set>
#include <utility>
#include <valarray>
namespace
pretty_print
{
namespace
detail
{
// SFINAE type trait to detect whether T::const_iterator exists.
struct
sfinae_base
{
using
yes
=
char
;
using
no
=
yes
[
2
];
};
template
<
typename
T
>
struct
has_const_iterator
:
private
sfinae_base
{
private:
template
<
typename
C
>
static
yes
&
test
(
typename
C
::
const_iterator
*
);
template
<
typename
C
>
static
no
&
test
(...);
public:
static
const
bool
value
=
sizeof
(
test
<
T
>
(
nullptr
))
==
sizeof
(
yes
);
using
type
=
T
;
};
template
<
typename
T
>
struct
has_begin_end
:
private
sfinae_base
{
private:
template
<
typename
C
>
static
yes
&
f
(
typename
std
::
enable_if
<
std
::
is_same
<
decltype
(
static_cast
<
typename
C
::
const_iterator
(
C
::*
)()
const
>
(
&
C
::
begin
)),
typename
C
::
const_iterator
(
C
::*
)()
const
>::
value
>::
type
*
);
template
<
typename
C
>
static
no
&
f
(...);
template
<
typename
C
>
static
yes
&
g
(
typename
std
::
enable_if
<
std
::
is_same
<
decltype
(
static_cast
<
typename
C
::
const_iterator
(
C
::*
)()
const
>
(
&
C
::
end
)),
typename
C
::
const_iterator
(
C
::*
)()
const
>::
value
,
void
>::
type
*
);
template
<
typename
C
>
static
no
&
g
(...);
public:
static
bool
const
beg_value
=
sizeof
(
f
<
T
>
(
nullptr
))
==
sizeof
(
yes
);
static
bool
const
end_value
=
sizeof
(
g
<
T
>
(
nullptr
))
==
sizeof
(
yes
);
};
}
// namespace detail
// Holds the delimiter values for a specific character type
template
<
typename
TChar
>
struct
delimiters_values
{
using
char_type
=
TChar
;
const
char_type
*
prefix
;
const
char_type
*
delimiter
;
const
char_type
*
postfix
;
};
// Defines the delimiter values for a specific container and character type
template
<
typename
T
,
typename
TChar
>
struct
delimiters
{
using
type
=
delimiters_values
<
TChar
>
;
static
const
type
values
;
};
// Functor to print containers. You can use this directly if you want
// to specificy a non-default delimiters type. The printing logic can
// be customized by specializing the nested template.
template
<
typename
T
,
typename
TChar
=
char
,
typename
TCharTraits
=
::
std
::
char_traits
<
TChar
>,
typename
TDelimiters
=
delimiters
<
T
,
TChar
>>
struct
print_container_helper
{
using
delimiters_type
=
TDelimiters
;
using
ostream_type
=
std
::
basic_ostream
<
TChar
,
TCharTraits
>
;
template
<
typename
U
>
struct
printer
{
static
void
print_body
(
const
U
&
c
,
ostream_type
&
stream
)
{
using
std
::
begin
;
using
std
::
end
;
auto
it
=
begin
(
c
);
const
auto
the_end
=
end
(
c
);
if
(
it
!=
the_end
)
{
for
(;;)
{
stream
<<
*
it
;
if
(
++
it
==
the_end
)
break
;
if
(
delimiters_type
::
values
.
delimiter
!=
NULL
)
stream
<<
delimiters_type
::
values
.
delimiter
;
}
}
}
};
print_container_helper
(
const
T
&
container
)
:
container_
(
container
)
{}
inline
void
operator
()(
ostream_type
&
stream
)
const
{
if
(
delimiters_type
::
values
.
prefix
!=
NULL
)
stream
<<
delimiters_type
::
values
.
prefix
;
printer
<
T
>::
print_body
(
container_
,
stream
);
if
(
delimiters_type
::
values
.
postfix
!=
NULL
)
stream
<<
delimiters_type
::
values
.
postfix
;
}
private:
const
T
&
container_
;
};
// Specialization for pairs
template
<
typename
T
,
typename
TChar
,
typename
TCharTraits
,
typename
TDelimiters
>
template
<
typename
T1
,
typename
T2
>
struct
print_container_helper
<
T
,
TChar
,
TCharTraits
,
TDelimiters
>::
printer
<
std
::
pair
<
T1
,
T2
>>
{
using
ostream_type
=
typename
print_container_helper
<
T
,
TChar
,
TCharTraits
,
TDelimiters
>::
ostream_type
;
static
void
print_body
(
const
std
::
pair
<
T1
,
T2
>
&
c
,
ostream_type
&
stream
)
{
stream
<<
c
.
first
;
if
(
print_container_helper
<
T
,
TChar
,
TCharTraits
,
TDelimiters
>::
delimiters_type
::
values
.
delimiter
!=
NULL
)
stream
<<
print_container_helper
<
T
,
TChar
,
TCharTraits
,
TDelimiters
>::
delimiters_type
::
values
.
delimiter
;
stream
<<
c
.
second
;
}
};
// Specialization for tuples
template
<
typename
T
,
typename
TChar
,
typename
TCharTraits
,
typename
TDelimiters
>
template
<
typename
...
Args
>
struct
print_container_helper
<
T
,
TChar
,
TCharTraits
,
TDelimiters
>::
printer
<
std
::
tuple
<
Args
...
>>
{
using
ostream_type
=
typename
print_container_helper
<
T
,
TChar
,
TCharTraits
,
TDelimiters
>::
ostream_type
;
using
element_type
=
std
::
tuple
<
Args
...
>
;
template
<
std
::
size_t
I
>
struct
Int
{};
static
void
print_body
(
const
element_type
&
c
,
ostream_type
&
stream
)
{
tuple_print
(
c
,
stream
,
Int
<
0
>
());
}
static
void
tuple_print
(
const
element_type
&
,
ostream_type
&
,
Int
<
sizeof
...(
Args
)
>
)
{}
static
void
tuple_print
(
const
element_type
&
c
,
ostream_type
&
stream
,
typename
std
::
conditional
<
sizeof
...(
Args
)
!=
0
,
Int
<
0
>
,
std
::
nullptr_t
>::
type
)
{
stream
<<
std
::
get
<
0
>
(
c
);
tuple_print
(
c
,
stream
,
Int
<
1
>
());
}
template
<
std
::
size_t
N
>
static
void
tuple_print
(
const
element_type
&
c
,
ostream_type
&
stream
,
Int
<
N
>
)
{
if
(
print_container_helper
<
T
,
TChar
,
TCharTraits
,
TDelimiters
>::
delimiters_type
::
values
.
delimiter
!=
NULL
)
stream
<<
print_container_helper
<
T
,
TChar
,
TCharTraits
,
TDelimiters
>::
delimiters_type
::
values
.
delimiter
;
stream
<<
std
::
get
<
N
>
(
c
);
tuple_print
(
c
,
stream
,
Int
<
N
+
1
>
());
}
};
// Prints a print_container_helper to the specified stream.
template
<
typename
T
,
typename
TChar
,
typename
TCharTraits
,
typename
TDelimiters
>
inline
std
::
basic_ostream
<
TChar
,
TCharTraits
>
&
operator
<<
(
std
::
basic_ostream
<
TChar
,
TCharTraits
>
&
stream
,
const
print_container_helper
<
T
,
TChar
,
TCharTraits
,
TDelimiters
>
&
helper
)
{
helper
(
stream
);
return
stream
;
}
// Basic is_container template; specialize to derive from std::true_type for all
// desired container types
template
<
typename
T
>
struct
is_container
:
public
std
::
integral_constant
<
bool
,
detail
::
has_const_iterator
<
T
>::
value
&&
detail
::
has_begin_end
<
T
>::
beg_value
&&
detail
::
has_begin_end
<
T
>::
end_value
>
{};
template
<
typename
T
,
std
::
size_t
N
>
struct
is_container
<
T
[
N
]
>
:
std
::
true_type
{};
template
<
std
::
size_t
N
>
struct
is_container
<
char
[
N
]
>
:
std
::
false_type
{};
template
<
typename
T
>
struct
is_container
<
std
::
valarray
<
T
>>
:
std
::
true_type
{};
template
<
typename
T1
,
typename
T2
>
struct
is_container
<
std
::
pair
<
T1
,
T2
>>
:
std
::
true_type
{};
template
<
typename
...
Args
>
struct
is_container
<
std
::
tuple
<
Args
...
>>
:
std
::
true_type
{};
// Default delimiters
template
<
typename
T
>
struct
delimiters
<
T
,
char
>
{
static
const
delimiters_values
<
char
>
values
;
};
template
<
typename
T
>
const
delimiters_values
<
char
>
delimiters
<
T
,
char
>::
values
=
{
"["
,
", "
,
"]"
};
template
<
typename
T
>
struct
delimiters
<
T
,
wchar_t
>
{
static
const
delimiters_values
<
wchar_t
>
values
;
};
template
<
typename
T
>
const
delimiters_values
<
wchar_t
>
delimiters
<
T
,
wchar_t
>::
values
=
{
L"["
,
L", "
,
L"]"
};
// Delimiters for (multi)set and unordered_(multi)set
template
<
typename
T
,
typename
TComp
,
typename
TAllocator
>
struct
delimiters
<::
std
::
set
<
T
,
TComp
,
TAllocator
>
,
char
>
{
static
const
delimiters_values
<
char
>
values
;
};
template
<
typename
T
,
typename
TComp
,
typename
TAllocator
>
const
delimiters_values
<
char
>
delimiters
<::
std
::
set
<
T
,
TComp
,
TAllocator
>
,
char
>::
values
=
{
"{"
,
", "
,
"}"
};
template
<
typename
T
,
typename
TComp
,
typename
TAllocator
>
struct
delimiters
<::
std
::
set
<
T
,
TComp
,
TAllocator
>
,
wchar_t
>
{
static
const
delimiters_values
<
wchar_t
>
values
;
};
template
<
typename
T
,
typename
TComp
,
typename
TAllocator
>
const
delimiters_values
<
wchar_t
>
delimiters
<::
std
::
set
<
T
,
TComp
,
TAllocator
>
,
wchar_t
>::
values
=
{
L"{"
,
L", "
,
L"}"
};
template
<
typename
T
,
typename
TComp
,
typename
TAllocator
>
struct
delimiters
<::
std
::
multiset
<
T
,
TComp
,
TAllocator
>
,
char
>
{
static
const
delimiters_values
<
char
>
values
;
};
template
<
typename
T
,
typename
TComp
,
typename
TAllocator
>
const
delimiters_values
<
char
>
delimiters
<::
std
::
multiset
<
T
,
TComp
,
TAllocator
>
,
char
>::
values
=
{
"{"
,
", "
,
"}"
};
template
<
typename
T
,
typename
TComp
,
typename
TAllocator
>
struct
delimiters
<::
std
::
multiset
<
T
,
TComp
,
TAllocator
>
,
wchar_t
>
{
static
const
delimiters_values
<
wchar_t
>
values
;
};
template
<
typename
T
,
typename
TComp
,
typename
TAllocator
>
const
delimiters_values
<
wchar_t
>
delimiters
<::
std
::
multiset
<
T
,
TComp
,
TAllocator
>
,
wchar_t
>::
values
=
{
L"{"
,
L", "
,
L"}"
};
template
<
typename
T
,
typename
THash
,
typename
TEqual
,
typename
TAllocator
>
struct
delimiters
<::
std
::
unordered_set
<
T
,
THash
,
TEqual
,
TAllocator
>
,
char
>
{
static
const
delimiters_values
<
char
>
values
;
};
template
<
typename
T
,
typename
THash
,
typename
TEqual
,
typename
TAllocator
>
const
delimiters_values
<
char
>
delimiters
<
::
std
::
unordered_set
<
T
,
THash
,
TEqual
,
TAllocator
>
,
char
>::
values
=
{
"{"
,
", "
,
"}"
};
template
<
typename
T
,
typename
THash
,
typename
TEqual
,
typename
TAllocator
>
struct
delimiters
<::
std
::
unordered_set
<
T
,
THash
,
TEqual
,
TAllocator
>
,
wchar_t
>
{
static
const
delimiters_values
<
wchar_t
>
values
;
};
template
<
typename
T
,
typename
THash
,
typename
TEqual
,
typename
TAllocator
>
const
delimiters_values
<
wchar_t
>
delimiters
<
::
std
::
unordered_set
<
T
,
THash
,
TEqual
,
TAllocator
>
,
wchar_t
>::
values
=
{
L"{"
,
L", "
,
L"}"
};
template
<
typename
T
,
typename
THash
,
typename
TEqual
,
typename
TAllocator
>
struct
delimiters
<::
std
::
unordered_multiset
<
T
,
THash
,
TEqual
,
TAllocator
>
,
char
>
{
static
const
delimiters_values
<
char
>
values
;
};
template
<
typename
T
,
typename
THash
,
typename
TEqual
,
typename
TAllocator
>
const
delimiters_values
<
char
>
delimiters
<
::
std
::
unordered_multiset
<
T
,
THash
,
TEqual
,
TAllocator
>
,
char
>::
values
=
{
"{"
,
", "
,
"}"
};
template
<
typename
T
,
typename
THash
,
typename
TEqual
,
typename
TAllocator
>
struct
delimiters
<::
std
::
unordered_multiset
<
T
,
THash
,
TEqual
,
TAllocator
>
,
wchar_t
>
{
static
const
delimiters_values
<
wchar_t
>
values
;
};
template
<
typename
T
,
typename
THash
,
typename
TEqual
,
typename
TAllocator
>
const
delimiters_values
<
wchar_t
>
delimiters
<::
std
::
unordered_multiset
<
T
,
THash
,
TEqual
,
TAllocator
>
,
wchar_t
>::
values
=
{
L"{"
,
L", "
,
L"}"
};
// Delimiters for pair and tuple
template
<
typename
T1
,
typename
T2
>
struct
delimiters
<
std
::
pair
<
T1
,
T2
>
,
char
>
{
static
const
delimiters_values
<
char
>
values
;
};
template
<
typename
T1
,
typename
T2
>
const
delimiters_values
<
char
>
delimiters
<
std
::
pair
<
T1
,
T2
>
,
char
>::
values
=
{
"("
,
", "
,
")"
};
template
<
typename
T1
,
typename
T2
>
struct
delimiters
<::
std
::
pair
<
T1
,
T2
>
,
wchar_t
>
{
static
const
delimiters_values
<
wchar_t
>
values
;
};
template
<
typename
T1
,
typename
T2
>
const
delimiters_values
<
wchar_t
>
delimiters
<::
std
::
pair
<
T1
,
T2
>
,
wchar_t
>::
values
=
{
L"("
,
L", "
,
L")"
};
template
<
typename
...
Args
>
struct
delimiters
<
std
::
tuple
<
Args
...
>
,
char
>
{
static
const
delimiters_values
<
char
>
values
;
};
template
<
typename
...
Args
>
const
delimiters_values
<
char
>
delimiters
<
std
::
tuple
<
Args
...
>
,
char
>::
values
=
{
"("
,
", "
,
")"
};
template
<
typename
...
Args
>
struct
delimiters
<::
std
::
tuple
<
Args
...
>
,
wchar_t
>
{
static
const
delimiters_values
<
wchar_t
>
values
;
};
template
<
typename
...
Args
>
const
delimiters_values
<
wchar_t
>
delimiters
<::
std
::
tuple
<
Args
...
>
,
wchar_t
>::
values
=
{
L"("
,
L", "
,
L")"
};
// Type-erasing helper class for easy use of custom delimiters.
// Requires TCharTraits = std::char_traits<TChar> and TChar = char or wchar_t,
// and MyDelims needs to be defined for TChar. Usage: "cout <<
// pretty_print::custom_delims<MyDelims>(x)".
struct
custom_delims_base
{
virtual
~
custom_delims_base
()
{}
virtual
std
::
ostream
&
stream
(
::
std
::
ostream
&
)
=
0
;
virtual
std
::
wostream
&
stream
(
::
std
::
wostream
&
)
=
0
;
};
template
<
typename
T
,
typename
Delims
>
struct
custom_delims_wrapper
:
custom_delims_base
{
custom_delims_wrapper
(
const
T
&
t_
)
:
t
(
t_
)
{}
std
::
ostream
&
stream
(
std
::
ostream
&
s
)
{
return
s
<<
print_container_helper
<
T
,
char
,
std
::
char_traits
<
char
>
,
Delims
>
(
t
);
}
std
::
wostream
&
stream
(
std
::
wostream
&
s
)
{
return
s
<<
print_container_helper
<
T
,
wchar_t
,
std
::
char_traits
<
wchar_t
>
,
Delims
>
(
t
);
}
private:
const
T
&
t
;
};
template
<
typename
Delims
>
struct
custom_delims
{
template
<
typename
Container
>
custom_delims
(
const
Container
&
c
)
:
base
(
new
custom_delims_wrapper
<
Container
,
Delims
>
(
c
))
{}
std
::
unique_ptr
<
custom_delims_base
>
base
;
};
template
<
typename
TChar
,
typename
TCharTraits
,
typename
Delims
>
inline
std
::
basic_ostream
<
TChar
,
TCharTraits
>
&
operator
<<
(
std
::
basic_ostream
<
TChar
,
TCharTraits
>
&
s
,
const
custom_delims
<
Delims
>
&
p
)
{
return
p
.
base
->
stream
(
s
);
}
// A wrapper for a C-style array given as pointer-plus-size.
// Usage: std::cout << pretty_print_array(arr, n) << std::endl;
template
<
typename
T
>
struct
array_wrapper_n
{
typedef
const
T
*
const_iterator
;
typedef
T
value_type
;
array_wrapper_n
(
const
T
*
const
a
,
size_t
n
)
:
_array
(
a
),
_n
(
n
)
{}
inline
const_iterator
begin
()
const
{
return
_array
;
}
inline
const_iterator
end
()
const
{
return
_array
+
_n
;
}
private:
const
T
*
const
_array
;
size_t
_n
;
};
// A wrapper for hash-table based containers that offer local iterators to each
// bucket. Usage: std::cout << bucket_print(m, 4) << std::endl; (Prints bucket
// 5 of container m.)
template
<
typename
T
>
struct
bucket_print_wrapper
{
typedef
typename
T
::
const_local_iterator
const_iterator
;
typedef
typename
T
::
size_type
size_type
;
const_iterator
begin
()
const
{
return
m_map
.
cbegin
(
n
);
}
const_iterator
end
()
const
{
return
m_map
.
cend
(
n
);
}
bucket_print_wrapper
(
const
T
&
m
,
size_type
bucket
)
:
m_map
(
m
),
n
(
bucket
)
{}
private:
const
T
&
m_map
;
const
size_type
n
;
};
}
// namespace pretty_print
// Global accessor functions for the convenience wrappers
template
<
typename
T
>
inline
pretty_print
::
array_wrapper_n
<
T
>
pretty_print_array
(
const
T
*
const
a
,
size_t
n
)
{
return
pretty_print
::
array_wrapper_n
<
T
>
(
a
,
n
);
}
template
<
typename
T
>
pretty_print
::
bucket_print_wrapper
<
T
>
bucket_print
(
const
T
&
m
,
typename
T
::
size_type
n
)
{
return
pretty_print
::
bucket_print_wrapper
<
T
>
(
m
,
n
);
}
// Main magic entry point: An overload snuck into namespace std.
// Can we do better?
namespace
std
{
// Prints a container to the stream using default delimiters
template
<
typename
T
,
typename
TChar
,
typename
TCharTraits
>
inline
typename
enable_if
<::
pretty_print
::
is_container
<
T
>::
value
,
basic_ostream
<
TChar
,
TCharTraits
>
&>::
type
operator
<<
(
basic_ostream
<
TChar
,
TCharTraits
>
&
stream
,
const
T
&
container
)
{
return
stream
<<
::
pretty_print
::
print_container_helper
<
T
,
TChar
,
TCharTraits
>
(
container
);
}
}
// namespace std
#endif // H_PRETTY_PRINT
include/tensorview/pybind_utils.h
0 → 100644
View file @
19e73bbe
// Copyright 2019-2020 Yan Yan
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "tensor.h"
#include "tensorview.h"
#include <algorithm>
#include <array>
#include <iostream>
#include <pybind11/functional.h>
#include <pybind11/numpy.h>
#include <pybind11/pybind11.h>
#include <pybind11/stl.h>
namespace
py
=
pybind11
;
namespace
tv
{
template
<
typename
Tarr
>
bool
is_c_stype
(
const
Tarr
&
arr
)
{
return
bool
(
arr
.
flags
()
&
py
::
array
::
c_style
);
}
template
<
typename
T
,
int
Rank
=
-
1
>
TensorView
<
T
,
Rank
>
arrayt2tv
(
py
::
array_t
<
T
>
arr
)
{
TV_ASSERT_INVALID_ARG
(
is_c_stype
(
arr
),
"array must be c-contiguous array"
);
Shape
shape
;
for
(
int
i
=
0
;
i
<
arr
.
ndim
();
++
i
)
{
shape
.
push_back
(
arr
.
shape
(
i
));
}
if
(
Rank
>=
0
)
{
TV_ASSERT_INVALID_ARG
(
shape
.
ndim
()
==
Rank
,
"error"
);
}
return
TensorView
<
T
,
Rank
>
(
arr
.
mutable_data
(),
shape
);
}
template
<
typename
T
,
int
Rank
=
-
1
>
TensorView
<
const
T
>
carrayt2tv
(
py
::
array_t
<
T
>
arr
)
{
TV_ASSERT_INVALID_ARG
(
is_c_stype
(
arr
),
"array must be c-contiguous array"
);
Shape
shape
;
for
(
int
i
=
0
;
i
<
arr
.
ndim
();
++
i
)
{
shape
.
push_back
(
arr
.
shape
(
i
));
}
if
(
Rank
>=
0
)
{
TV_ASSERT_INVALID_ARG
(
shape
.
ndim
()
==
Rank
,
"error"
);
}
return
TensorView
<
const
T
,
Rank
>
(
arr
.
data
(),
shape
);
}
template
<
typename
Tarr
>
tv
::
DType
get_array_tv_dtype
(
const
Tarr
&
arr
)
{
switch
(
arr
.
dtype
().
kind
())
{
case
'b'
:
return
tv
::
bool_
;
case
'i'
:
{
switch
(
arr
.
itemsize
())
{
case
1
:
return
tv
::
int8
;
case
2
:
return
tv
::
int16
;
case
4
:
return
tv
::
int32
;
case
8
:
return
tv
::
int64
;
default:
break
;
}
}
case
'u'
:
{
switch
(
arr
.
itemsize
())
{
case
1
:
return
tv
::
uint8
;
case
2
:
return
tv
::
uint16
;
case
4
:
return
tv
::
uint32
;
case
8
:
return
tv
::
uint64
;
default:
break
;
}
}
case
'f'
:
{
switch
(
arr
.
itemsize
())
{
case
2
:
return
tv
::
float16
;
case
4
:
return
tv
::
float32
;
case
8
:
return
tv
::
float64
;
default:
break
;
}
}
}
TV_THROW_RT_ERR
(
"unknown dtype"
,
arr
.
dtype
().
kind
(),
arr
.
itemsize
());
}
template
<
typename
Tarr
>
Tensor
array2tensor
(
Tarr
&
arr
)
{
TV_ASSERT_INVALID_ARG
(
is_c_stype
(
arr
),
"array must be c-contiguous array"
);
TensorShape
shape
;
for
(
int
i
=
0
;
i
<
arr
.
ndim
();
++
i
)
{
shape
.
push_back
(
arr
.
shape
(
i
));
}
return
tv
::
from_blob
(
arr
.
mutable_data
(),
shape
,
get_array_tv_dtype
(
arr
),
-
1
);
}
template
<
typename
T
>
Tensor
arrayt2tensor
(
py
::
array_t
<
T
>
&
arr
)
{
TV_ASSERT_INVALID_ARG
(
is_c_stype
(
arr
),
"array must be c-contiguous array"
);
TensorShape
shape
;
for
(
int
i
=
0
;
i
<
arr
.
ndim
();
++
i
)
{
shape
.
push_back
(
arr
.
shape
(
i
));
}
return
tv
::
from_blob
(
arr
.
mutable_data
(),
shape
,
tv
::
type_v
<
T
>
,
-
1
);
}
template
<
typename
TDType
>
py
::
dtype
tv_dtype_to_py
(
TDType
d
)
{
switch
(
d
)
{
case
float32
:
return
py
::
dtype
(
"float32"
);
case
float64
:
return
py
::
dtype
(
"float64"
);
case
float16
:
return
py
::
dtype
(
"float16"
);
case
int32
:
return
py
::
dtype
(
"int32"
);
case
int16
:
return
py
::
dtype
(
"int16"
);
case
int8
:
return
py
::
dtype
(
"int8"
);
case
int64
:
return
py
::
dtype
(
"int64"
);
case
uint32
:
return
py
::
dtype
(
"uint32"
);
case
uint16
:
return
py
::
dtype
(
"uint16"
);
case
uint8
:
return
py
::
dtype
(
"uint8"
);
case
uint64
:
return
py
::
dtype
(
"uint64"
);
case
bool_
:
return
py
::
dtype
(
"bool_"
);
default:
;
}
TV_THROW_INVALID_ARG
(
"unknown dtype"
,
d
);
}
// add template to define function in header
template
<
typename
Ttensor
>
py
::
array
tensor2array
(
Ttensor
&
tensor
)
{
// you cant call this function during GIL released.
TV_ASSERT_INVALID_ARG
(
tensor
.
device
()
==
-
1
,
"must be cpu tensor"
);
auto
shape
=
tensor
.
shape
();
std
::
vector
<
int
>
shape_vec
(
shape
.
begin
(),
shape
.
end
());
auto
dtype
=
tv_dtype_to_py
(
tensor
.
dtype
());
// construct py::array will copy content from ptr.
// its expected because we can't transfer ownership from
// c++ tv::Tensor to numpy array when c++ object is deleted.
return
py
::
array
(
dtype
,
shape_vec
,
{},
tensor
.
raw_data
());
}
}
// namespace tv
include/tensorview/tensor.h
View file @
19e73bbe
// Copyright 2019 Yan Yan
// Copyright 2019
-2020
Yan Yan
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
...
...
@@ -12,19 +12,30 @@
// See the License for the specific language governing permissions and
// limitations under the License.
/*
tv::Tensor is a lightweight header-only tensor container
without template and annoying dependencies. no algorithm is implemented.
it should only be used when you want a no-template simple container but
dont want to link with libtorch.
If you can use libtorch, dont use tv::Tensor.
*/
#pragma once
#include "mp_helper.h"
#include "tensorview.h"
#include <cstring>
#include <iomanip>
#include <memory>
#include <spconv/mp_helper.h>
#ifdef SPCONV_CUDA
#include <type_traits>
#ifdef TV_CUDA
#include <cuda_fp16.h>
#include <cuda_runtime.h>
#include <cuda_runtime_api.h>
#endif
namespace
tv
{
enum
DType
{
namespace
tv
{
enum
DType
{
float32
,
int32
,
int16
,
...
...
@@ -39,51 +50,46 @@ enum DType
uint64
};
namespace
detail
{
namespace
detail
{
template
<
typename
T
>
class
TensorStorage
{
using
all_tensor_types_t
=
std
::
tuple
<
float
,
double
,
int8_t
,
int16_t
,
int32_t
,
int64_t
,
uint8_t
,
uint16_t
,
uint32_t
,
uint64_t
,
bool
>
;
template
<
typename
T
>
class
TensorStorage
{
public:
TensorStorage
(
size_t
size
,
int
device
=
-
1
,
bool
managed
=
false
)
:
mSize
(
size
),
device_
(
device
),
managed_
(
managed
)
{
if
(
size
==
0
)
{
TensorStorage
(
size_t
size
,
int
device
=
-
1
,
bool
managed
=
false
,
bool
pinned
=
false
)
:
mSize
(
size
),
device_
(
device
),
managed_
(
managed
),
pinned_
(
pinned
)
{
if
(
size
==
0
)
{
mPtr
=
nullptr
;
}
else
{
if
(
device
==
-
1
)
{
#ifdef SPCONV_CUDA
checkCudaErrors
(
cudaMallocHost
(
&
mPtr
,
size
*
sizeof
(
T
)));
}
else
{
if
(
device
==
-
1
)
{
if
(
pinned_
)
{
#ifdef TV_CUDA
checkCudaErrors
(
cudaMallocHost
(
&
mPtr
,
size
*
sizeof
(
T
)));
#else
mPtr
=
new
T
[
size
]
;
TV_THROW_INVALID_ARG
(
"you need to define TV_CUDA to use pinned"
)
;
#endif
}
else
{
#ifdef SPCONV_CUDA
}
else
{
mPtr
=
new
T
[
size
];
}
}
else
{
#ifdef TV_CUDA
int
deviceCount
;
cudaGetDeviceCount
(
&
deviceCount
);
if
(
device
>=
deviceCount
)
{
TV_ASSERT_INVALID_ARG
(
"you provide device "
,
device
,
" but you only have "
,
deviceCount
,
" device."
);
if
(
device
>=
deviceCount
)
{
TV_THROW_INVALID_ARG
(
"you provide device "
,
device
,
" but you only have "
,
deviceCount
,
" device."
);
}
cudaSetDevice
(
device
);
if
(
managed
)
{
if
(
managed
)
{
checkCudaErrors
(
cudaMallocManaged
(
&
this
->
mPtr
,
size
*
sizeof
(
T
)));
}
else
{
}
else
{
checkCudaErrors
(
cudaMalloc
(
&
mPtr
,
size
*
sizeof
(
T
)));
}
#else
TV_
ASSERT
_INVALID_ARG
(
false
,
"don't compiled with cuda"
);
TV_
THROW
_INVALID_ARG
(
"don't compiled with cuda"
);
#endif
}
}
...
...
@@ -91,27 +97,23 @@ public:
TensorStorage
(
T
*
ptr
,
size_t
size
,
int
device
)
:
mSize
(
size
),
mPtr
(
ptr
),
from_blob_
(
true
),
device_
(
device
)
{}
virtual
~
TensorStorage
()
{
if
(
empty
())
{
virtual
~
TensorStorage
()
{
if
(
empty
())
{
return
;
}
if
(
from_blob_
)
{
if
(
from_blob_
)
{
return
;
}
if
(
device_
==
-
1
)
{
#ifdef SPCONV_CUDA
cudaFreeHost
(
mPtr
);
#else
delete
[]
mPtr
;
if
(
device_
==
-
1
)
{
if
(
pinned_
)
{
#ifdef TV_CUDA
cudaFreeHost
(
mPtr
);
#endif
}
else
{
#ifdef SPCONV_CUDA
}
else
{
delete
[]
mPtr
;
}
}
else
{
#ifdef TV_CUDA
cudaFree
(
mPtr
);
#endif
}
...
...
@@ -124,36 +126,33 @@ public:
bool
empty
()
const
{
return
mPtr
==
nullptr
||
mSize
==
0
;
}
bool
managed
()
const
{
return
managed_
;
}
bool
pinned
()
const
{
return
pinned_
;
}
int
device
()
const
{
return
device_
;
}
void
zero_
()
{
if
(
device_
==
-
1
)
{
void
zero_
()
{
if
(
device_
==
-
1
)
{
std
::
memset
(
data
(),
0
,
mSize
);
// std::fill(data(), data() + mSize, 0);
}
else
{
#ifdef SPCONV_CUDA
}
else
{
#ifdef TV_CUDA
checkCudaErrors
(
cudaMemset
(
data
(),
0
,
mSize
/
sizeof
(
T
)));
#else
TV_
ASSERT
_INVALID_ARG
(
false
,
"don't compiled with cuda"
);
TV_
THROW
_INVALID_ARG
(
"don't compiled with cuda"
);
#endif
}
}
private:
T
*
mPtr
=
nullptr
;
size_t
mSize
=
0
;
int
device_
=
-
1
;
T
*
mPtr
=
nullptr
;
bool
from_blob_
=
false
;
int
device_
=
-
1
;
bool
managed_
=
false
;
bool
pinned_
=
false
;
};
size_t
sizeof_dtype
(
DType
dtype
)
{
switch
(
dtype
)
{
template
<
typename
T
>
size_t
sizeof_dtype
(
T
dtype
)
{
switch
(
dtype
)
{
case
float32
:
return
sizeof
(
float
);
case
int8
:
...
...
@@ -176,20 +175,16 @@ size_t sizeof_dtype(DType dtype)
return
sizeof
(
uint32_t
);
case
uint64
:
return
sizeof
(
uint64_t
);
#ifdef SPCONV_CUDA
case
float16
:
return
sizeof
(
__half
);
#endif
return
2
;
default:
TV_THROW_RT_ERR
(
"unsupported dtype"
);
}
return
0
;
}
std
::
string
typeString
(
DType
t
)
{
switch
(
t
)
{
template
<
typename
T
>
std
::
string
typeString
(
T
t
)
{
switch
(
t
)
{
case
DType
::
bool_
:
return
"bool"
;
case
DType
::
float32
:
...
...
@@ -212,165 +207,477 @@ std::string typeString(DType t)
return
"uint32"
;
case
DType
::
uint64
:
return
"uint64"
;
#ifdef SPCONV_CUDA
case
DType
::
float16
:
return
"half"
;
#endif
default:
return
""
;
}
}
template
<
typename
T
>
struct
TypeToDtypeTraits
;
template
<
typename
T
>
struct
TypeToDtypeTraits
;
template
<
>
struct
TypeToDtypeTraits
<
int32_t
>
{
static
constexpr
DType
dtype
=
int32
;
};
#ifdef TV_CUDA
template
<
>
struct
TypeToDtypeTraits
<
__half
>
{
static
constexpr
DType
dtype
=
float16
;
};
#endif
template
<
>
struct
TypeToDtypeTraits
<
int32_t
>
{
template
<
>
struct
TypeToDtypeTraits
<
float
>
{
static
constexpr
DType
dtype
=
float32
;
};
template
<
>
struct
TypeToDtypeTraits
<
double
>
{
static
constexpr
DType
dtype
=
float64
;
};
template
<
>
struct
TypeToDtypeTraits
<
int16_t
>
{
static
constexpr
DType
dtype
=
int16
;
};
template
<
>
struct
TypeToDtypeTraits
<
int8_t
>
{
static
constexpr
DType
dtype
=
int8
;
};
template
<
>
struct
TypeToDtypeTraits
<
int64_t
>
{
static
constexpr
DType
dtype
=
int64
;
};
template
<
>
struct
TypeToDtypeTraits
<
uint8_t
>
{
static
constexpr
DType
dtype
=
uint8
;
};
template
<
>
struct
TypeToDtypeTraits
<
uint16_t
>
{
static
constexpr
DType
dtype
=
uint16
;
};
template
<
>
struct
TypeToDtypeTraits
<
uint32_t
>
{
static
constexpr
DType
dtype
=
uint32
;
};
template
<
>
struct
TypeToDtypeTraits
<
uint64_t
>
{
static
constexpr
DType
dtype
=
uint64
;
};
template
<
>
struct
TypeToDtypeTraits
<
bool
>
{
static
constexpr
DType
dtype
=
bool_
;
};
template
<
>
struct
TypeToDtypeTraits
<
const
int32_t
>
{
static
constexpr
DType
dtype
=
int32
;
};
#ifdef SPCONV_CUDA
template
<
>
struct
TypeToDtypeTraits
<
__half
>
{
#ifdef TV_CUDA
template
<
>
struct
TypeToDtypeTraits
<
const
__half
>
{
static
constexpr
DType
dtype
=
float16
;
};
#endif
template
<
>
struct
TypeToDtypeTraits
<
float
>
{
template
<
>
struct
TypeToDtypeTraits
<
const
float
>
{
static
constexpr
DType
dtype
=
float32
;
};
template
<
>
struct
TypeToDtypeTraits
<
double
>
{
template
<
>
struct
TypeToDtypeTraits
<
const
double
>
{
static
constexpr
DType
dtype
=
float64
;
};
template
<
>
struct
TypeToDtypeTraits
<
int16_t
>
{
template
<
>
struct
TypeToDtypeTraits
<
const
int16_t
>
{
static
constexpr
DType
dtype
=
int16
;
};
template
<
>
struct
TypeToDtypeTraits
<
int8_t
>
{
template
<
>
struct
TypeToDtypeTraits
<
const
int8_t
>
{
static
constexpr
DType
dtype
=
int8
;
};
template
<
>
struct
TypeToDtypeTraits
<
int64_t
>
{
template
<
>
struct
TypeToDtypeTraits
<
const
int64_t
>
{
static
constexpr
DType
dtype
=
int64
;
};
template
<
>
struct
TypeToDtypeTraits
<
uint8_t
>
{
template
<
>
struct
TypeToDtypeTraits
<
const
uint8_t
>
{
static
constexpr
DType
dtype
=
uint8
;
};
template
<
>
struct
TypeToDtypeTraits
<
uint16_t
>
{
template
<
>
struct
TypeToDtypeTraits
<
const
uint16_t
>
{
static
constexpr
DType
dtype
=
uint16
;
};
template
<
>
struct
TypeToDtypeTraits
<
uint32_t
>
{
template
<
>
struct
TypeToDtypeTraits
<
const
uint32_t
>
{
static
constexpr
DType
dtype
=
uint32
;
};
template
<
>
struct
TypeToDtypeTraits
<
uint64_t
>
{
template
<
>
struct
TypeToDtypeTraits
<
const
uint64_t
>
{
static
constexpr
DType
dtype
=
uint64
;
};
template
<
>
struct
TypeToDtypeTraits
<
const
bool
>
{
static
constexpr
DType
dtype
=
bool_
;
};
}
// namespace detail
template
<
class
T
>
constexpr
DType
type_v
=
detail
::
TypeToDtypeTraits
<
T
>::
dtype
;
template
<
class
T
>
constexpr
DType
type_v
=
detail
::
TypeToDtypeTraits
<
T
>::
dtype
;
struct
Tensor
{
template
<
class
...
Ts
,
typename
F
>
void
dispatch
(
DType
t
,
F
&&
f
)
{
static_assert
(
sizeof
...(
Ts
)
>
0
,
"you need to provide at least one type"
);
bool
notFound
=
true
;
mp_for_each
<
mp_list
<
Ts
...
>>
([
=
,
&
notFound
,
&
f
](
auto
I
)
{
if
(
type_v
<
decltype
(
I
)
>
==
t
)
{
std
::
forward
<
F
>
(
f
)(
decltype
(
I
)());
notFound
=
false
;
}
});
if
(
notFound
)
{
std
::
stringstream
ss
;
mp_for_each
<
mp_list
<
Ts
...
>>
([
=
,
&
ss
](
auto
I
)
{
ss
<<
detail
::
TypeToString
<
decltype
(
I
)
>::
value
<<
" "
;
});
TV_THROW_RT_ERR
(
"unknown type"
,
detail
::
typeString
(
t
),
", available:"
,
ss
.
str
());
}
}
template
<
typename
T
,
T
...
Is
,
typename
F
>
void
dispatch_scalar
(
T
idx
,
F
&&
f
)
{
static_assert
(
sizeof
...(
Is
)
>
0
,
"you need to provide at least one candidate"
);
bool
notFound
=
true
;
mp_for_each
<
mp_list_c
<
T
,
Is
...
>>
([
=
,
&
notFound
,
&
f
](
auto
I
)
{
if
(
T
(
I
)
==
idx
)
{
std
::
forward
<
F
>
(
f
)(
I
);
notFound
=
false
;
}
});
if
(
notFound
)
{
std
::
stringstream
ss
;
mp_for_each
<
mp_list_c
<
T
,
Is
...
>>
([
=
,
&
ss
](
auto
I
)
{
ss
<<
T
(
I
)
<<
" "
;
});
TV_THROW_RT_ERR
(
"unknown value"
,
idx
,
", available:"
,
ss
.
str
());
}
}
template
<
int
...
Is
,
typename
F
>
void
dispatch_int
(
int
idx
,
F
&&
f
)
{
// used for kernel parameter selection
static_assert
(
sizeof
...(
Is
)
>
0
,
"you need to provide at least one candidate"
);
bool
notFound
=
true
;
mp_for_each
<
mp_list_c
<
int
,
Is
...
>>
([
=
,
&
notFound
,
&
f
](
auto
I
)
{
if
(
int
(
I
)
==
idx
)
{
std
::
forward
<
F
>
(
f
)(
I
);
notFound
=
false
;
}
});
if
(
notFound
)
{
std
::
stringstream
ss
;
mp_for_each
<
mp_list_c
<
int
,
Is
...
>>
(
[
=
,
&
ss
](
auto
I
)
{
ss
<<
int
(
I
)
<<
" "
;
});
TV_THROW_RT_ERR
(
"unknown value"
,
idx
,
", available:"
,
ss
.
str
());
}
}
/*
template <int... Is, typename F> void dispatch_int(int idx, F &&f) {
return dispatch_scalar<int, Is...>(idx, f);
}
*/
template
<
class
T
>
struct
Dispatch
;
template
<
template
<
class
...
>
class
T
,
class
...
Args
>
struct
Dispatch
<
T
<
Args
...
>>
{
template
<
typename
F
>
inline
void
operator
()(
DType
t
,
F
&&
f
)
{
return
dispatch
<
Args
...
>
(
t
,
std
::
forward
<
F
>
(
f
));
}
};
template
<
class
T
>
struct
DispatchInt
;
template
<
template
<
int
...>
class
T
,
int
...
Ints
>
struct
DispatchInt
<
T
<
Ints
...
>>
{
template
<
typename
F
>
inline
void
operator
()(
int
t
,
F
&&
f
)
{
return
dispatch_int
<
Ints
...
>
(
t
,
std
::
forward
<
F
>
(
f
));
}
};
constexpr
size_t
kTensorMaxDim
=
10
;
using
TensorShape
=
ShapeBase
<
kTensorMaxDim
,
int64_t
>
;
struct
Tensor
{
Tensor
()
{}
Tensor
(
Shape
shape
,
DType
dtype
,
int
device
=
-
1
,
bool
managed
=
false
)
:
dtype_
(
dtype
)
{
Tensor
(
TensorShape
shape
,
TensorShape
stride
,
DType
dtype
,
int
device
=
-
1
,
bool
pinned
=
false
,
bool
managed
=
false
)
:
dtype_
(
dtype
)
{
TV_ASSERT_INVALID_ARG
(
!
shape
.
empty
(),
"dont support empty shape"
);
storage_
=
std
::
make_shared
<
detail
::
TensorStorage
<
uint8_t
>>
(
shape
.
size
()
*
detail
::
sizeof_dtype
(
dtype
),
device
,
managed
,
pinned
);
shape_
=
shape
;
stride_
=
stride
;
}
Tensor
(
TensorShape
shape
,
DType
dtype
,
int
device
=
-
1
,
bool
pinned
=
false
,
bool
managed
=
false
)
:
dtype_
(
dtype
)
{
TV_ASSERT_INVALID_ARG
(
!
shape
.
empty
(),
"dont support empty shape"
);
storage_
=
std
::
make_shared
<
detail
::
TensorStorage
<
uint8_t
>>
(
shape
.
size
()
*
detail
::
sizeof_dtype
(
dtype
),
device
,
managed
,
pinned
);
shape_
=
shape
;
stride_
=
shape
.
stride_rowmajor
();
}
Tensor
(
void
*
ptr
,
TensorShape
shape
,
TensorShape
stride
,
DType
dtype
,
int
device
=
-
1
)
:
dtype_
(
dtype
)
{
TV_ASSERT_INVALID_ARG
(
!
shape
.
empty
(),
"dont support empty shape"
);
storage_
=
std
::
make_shared
<
detail
::
TensorStorage
<
uint8_t
>>
(
shape
.
size
()
*
detail
::
sizeof_dtype
(
dtype
),
device
,
managed
);
reinterpret_cast
<
uint8_t
*>
(
ptr
),
shape
.
size
()
*
detail
::
sizeof_dtype
(
dtype
),
device
);
shape_
=
shape
;
stride_
=
stride
;
}
Tensor
(
void
*
ptr
,
Shape
shape
,
DType
dtype
,
int
device
=
-
1
)
:
dtype_
(
dtype
)
{
Tensor
(
void
*
ptr
,
TensorShape
shape
,
DType
dtype
,
int
device
=
-
1
)
:
dtype_
(
dtype
)
{
TV_ASSERT_INVALID_ARG
(
!
shape
.
empty
(),
"dont support empty shape"
);
storage_
=
std
::
make_shared
<
detail
::
TensorStorage
<
uint8_t
>>
(
reinterpret_cast
<
uint8_t
*>
(
ptr
),
shape
.
size
()
*
detail
::
sizeof_dtype
(
dtype
),
device
);
shape_
=
shape
;
stride_
=
shape
.
stride_rowmajor
();
}
template
<
typename
T
>
TensorView
<
T
>
tview
()
{
Tensor
(
const
void
*
ptr
,
TensorShape
shape
,
TensorShape
stride
,
DType
dtype
,
int
device
=
-
1
)
:
dtype_
(
dtype
),
writeable_
(
false
)
{
TV_ASSERT_INVALID_ARG
(
!
shape
.
empty
(),
"dont support empty shape"
);
storage_
=
std
::
make_shared
<
detail
::
TensorStorage
<
uint8_t
>>
(
reinterpret_cast
<
uint8_t
*>
(
const_cast
<
void
*>
(
ptr
)),
shape
.
size
()
*
detail
::
sizeof_dtype
(
dtype
),
device
);
shape_
=
shape
;
stride_
=
stride
;
}
Tensor
(
const
void
*
ptr
,
TensorShape
shape
,
DType
dtype
,
int
device
=
-
1
)
:
dtype_
(
dtype
),
writeable_
(
false
)
{
TV_ASSERT_INVALID_ARG
(
!
shape
.
empty
(),
"dont support empty shape"
);
storage_
=
std
::
make_shared
<
detail
::
TensorStorage
<
uint8_t
>>
(
reinterpret_cast
<
uint8_t
*>
(
const_cast
<
void
*>
(
ptr
)),
shape
.
size
()
*
detail
::
sizeof_dtype
(
dtype
),
device
);
shape_
=
shape
;
stride_
=
shape
.
stride_rowmajor
();
}
Tensor
(
std
::
initializer_list
<
int32_t
>
init
)
:
Tensor
({
int
(
init
.
size
())},
tv
::
int32
)
{
std
::
copy
(
init
.
begin
(),
init
.
end
(),
data
<
int32_t
>
());
}
Tensor
(
std
::
initializer_list
<
int64_t
>
init
)
:
Tensor
({
int
(
init
.
size
())},
tv
::
int64
)
{
std
::
copy
(
init
.
begin
(),
init
.
end
(),
data
<
int64_t
>
());
}
Tensor
(
std
::
initializer_list
<
float
>
init
)
:
Tensor
({
int
(
init
.
size
())},
tv
::
float32
)
{
std
::
copy
(
init
.
begin
(),
init
.
end
(),
data
<
float
>
());
}
Tensor
(
std
::
initializer_list
<
double
>
init
)
:
Tensor
({
int
(
init
.
size
())},
tv
::
float64
)
{
std
::
copy
(
init
.
begin
(),
init
.
end
(),
data
<
double
>
());
}
template
<
typename
T
,
int
Rank
=
-
1
,
template
<
class
>
class
PtrTraits
=
DefaultPtrTraits
,
typename
Tindex
=
int
,
typename
std
::
enable_if
<
(
Rank
>
0
),
int
>::
type
=
0
>
TensorView
<
T
,
Rank
,
PtrTraits
,
Tindex
>
tview
()
{
using
tv_shape_t
=
typename
TensorView
<
T
,
Rank
,
PtrTraits
,
Tindex
>::
tv_shape_t
;
writable_check
();
static_assert
(
Rank
==
-
1
||
Rank
>
0
,
"error"
);
TV_ASSERT_RT_ERR
(
dtype_
==
type_v
<
T
>
,
"error"
);
TV_ASSERT_RT_ERR
(
shape_
.
size
()
==
storage_
->
size
()
/
sizeof
(
T
),
"error"
);
return
TensorView
<
T
>
(
reinterpret_cast
<
T
*>
(
storage_
->
data
()),
shape_
);
tv_shape_t
shape
(
Rank
),
stride
(
Rank
);
for
(
int
i
=
0
;
i
<
Rank
;
++
i
)
{
shape
[
i
]
=
shape_
[
i
];
stride
[
i
]
=
stride_
[
i
];
}
return
TensorView
<
T
,
Rank
,
PtrTraits
,
Tindex
>
(
reinterpret_cast
<
T
*>
(
data
<
T
>
()),
shape
,
stride
);
}
template
<
typename
T
>
TensorView
<
T
>
tview
()
const
{
TV_ASSERT_RT_ERR
(
shape_
.
size
()
==
storage_
->
size
()
/
sizeof
(
T
),
"error"
);
template
<
typename
T
,
int
Rank
=
-
1
,
template
<
class
>
class
PtrTraits
=
DefaultPtrTraits
,
typename
Tindex
=
int
,
typename
std
::
enable_if
<
Rank
==
-
1
,
int
>
::
type
=
0
>
TensorView
<
T
,
Rank
,
PtrTraits
,
Tindex
>
tview
()
{
using
tv_shape_t
=
typename
TensorView
<
T
,
Rank
,
PtrTraits
,
Tindex
>::
tv_shape_t
;
writable_check
();
static_assert
(
Rank
==
-
1
||
Rank
>
0
,
"error"
);
TV_ASSERT_RT_ERR
(
dtype_
==
type_v
<
T
>
,
"error"
);
return
TensorView
<
const
std
::
remove_const_t
<
T
>>
(
reinterpret_cast
<
const
std
::
remove_const_t
<
T
>
*>
(
storage_
->
data
()),
shape_
);
ShapeBase
<
TV_MAX_DIM
,
Tindex
>
shape
(
ndim
()),
stride
(
ndim
());
for
(
int
i
=
0
;
i
<
ndim
();
++
i
)
{
shape
[
i
]
=
shape_
[
i
];
stride
[
i
]
=
stride_
[
i
];
}
return
TensorView
<
T
,
Rank
,
PtrTraits
,
Tindex
>
(
reinterpret_cast
<
T
*>
(
data
<
T
>
()),
shape
,
stride
);
}
template
<
typename
T
,
int
Rank
=
-
1
,
template
<
class
>
class
PtrTraits
=
DefaultPtrTraits
,
typename
Tindex
=
int
,
typename
std
::
enable_if
<
(
Rank
>
0
),
int
>::
type
=
0
>
TensorView
<
const
std
::
remove_const_t
<
T
>
,
Rank
,
PtrTraits
,
Tindex
>
tview
()
const
{
static_assert
(
Rank
==
-
1
||
Rank
>
0
,
"error"
);
if
(
Rank
>
0
)
{
TV_ASSERT_RT_ERR
(
Rank
==
ndim
(),
"error"
);
}
TV_ASSERT_RT_ERR
(
dtype_
==
type_v
<
T
>
,
"error"
);
ShapeBase
<
Rank
==
-
1
?
TV_MAX_DIM
:
Rank
,
Tindex
>
shape
(
Rank
),
stride
(
Rank
);
for
(
int
i
=
0
;
i
<
Rank
;
++
i
)
{
shape
[
i
]
=
shape_
[
i
];
stride
[
i
]
=
stride_
[
i
];
}
return
TensorView
<
const
std
::
remove_const_t
<
T
>
,
Rank
,
PtrTraits
,
Tindex
>
(
reinterpret_cast
<
const
std
::
remove_const_t
<
T
>
*>
(
data
<
T
>
()),
shape
,
stride
);
}
template
<
typename
T
,
int
Rank
=
-
1
,
template
<
class
>
class
PtrTraits
=
DefaultPtrTraits
,
typename
Tindex
=
int
,
typename
std
::
enable_if
<
Rank
==
-
1
,
int
>
::
type
=
0
>
TensorView
<
const
std
::
remove_const_t
<
T
>
,
Rank
,
PtrTraits
,
Tindex
>
tview
()
const
{
static_assert
(
Rank
==
-
1
||
Rank
>
0
,
"error"
);
if
(
Rank
>
0
)
{
TV_ASSERT_RT_ERR
(
Rank
==
ndim
(),
"error"
);
}
TV_ASSERT_RT_ERR
(
dtype_
==
type_v
<
T
>
,
"error"
);
ShapeBase
<
TV_MAX_DIM
,
Tindex
>
shape
(
ndim
()),
stride
(
ndim
());
for
(
int
i
=
0
;
i
<
ndim
();
++
i
)
{
shape
[
i
]
=
shape_
[
i
];
stride
[
i
]
=
stride_
[
i
];
}
return
TensorView
<
const
std
::
remove_const_t
<
T
>
,
Rank
,
PtrTraits
,
Tindex
>
(
reinterpret_cast
<
const
std
::
remove_const_t
<
T
>
*>
(
data
<
T
>
()),
shape
,
stride
);
}
template
<
class
...
Inds
>
Tensor
view
(
Inds
...
newShapes
)
const
{
static_assert
(
sizeof
...(
newShapes
)
>
0
,
"dont support empty for now"
);
TensorShape
shape
{
int
(
newShapes
)...};
bool
found_minus_1
=
false
;
for
(
size_t
i
=
0
;
i
<
shape
.
ndim
();
++
i
)
{
if
(
!
found_minus_1
)
{
if
(
shape
[
i
]
==
-
1
)
{
shape
[
i
]
=
1
;
shape
[
i
]
=
size
()
/
shape
.
size
();
found_minus_1
=
true
;
}
else
{
TV_ASSERT_INVALID_ARG
(
shape
[
i
]
>
0
,
"shape except -1 must larger than 0"
);
}
}
else
{
TV_ASSERT_INVALID_ARG
(
shape
[
i
]
>
0
,
"multiple -1 in your argument."
);
}
}
TV_ASSERT_RT_ERR
(
shape
.
size
()
==
size
(),
"error"
);
Tensor
res
(
*
this
);
res
.
shape_
=
shape
;
res
.
stride_
=
shape
.
stride_rowmajor
();
return
res
;
}
Tensor
view
(
TensorShape
shape
)
const
{
TV_ASSERT_RT_ERR
(
shape
.
size
()
==
size
(),
"error"
);
Tensor
res
(
*
this
);
res
.
shape_
=
shape
;
res
.
stride_
=
shape
.
stride_rowmajor
();
return
res
;
}
Tensor
squeeze
()
const
{
return
view
(
shape_
.
squeeze
());
}
Tensor
squeeze
(
int
axis
)
const
{
if
(
axis
<
0
)
{
axis
=
ndim
()
+
axis
;
}
return
view
(
shape_
.
squeeze
(
axis
));
}
Tensor
unsqueeze
(
int
axis
)
const
{
if
(
axis
<
0
)
{
axis
=
ndim
()
+
axis
;
}
return
view
(
shape_
.
unsqueeze
(
axis
));
}
bool
pinned
()
const
{
return
storage_
->
pinned
();
}
Tensor
slice_first_axis
(
int
start
,
int
end
)
const
{
TV_ASSERT_INVALID_ARG
(
contiguous_
,
"only support contiguous for now"
);
if
(
start
<
0
)
{
start
=
shape_
[
0
]
+
start
;
}
if
(
end
<
0
)
{
end
=
shape_
[
0
]
+
end
;
}
TV_ASSERT_INVALID_ARG
(
start
<
shape_
[
0
],
"start must small than dim 0"
);
TV_ASSERT_INVALID_ARG
(
start
<
end
,
"start must small than end"
);
size_t
new_offset
=
start
*
shape_
.
prod
(
1
)
*
itemsize
();
Tensor
res
(
*
this
);
TensorShape
newshape
(
shape_
);
newshape
[
0
]
=
end
-
start
;
res
.
shape_
=
newshape
;
res
.
stride_
=
stride_
;
res
.
offset_
=
new_offset
;
return
res
;
}
bool
empty
()
const
{
return
storage_
->
empty
();
}
DType
dtype
()
const
{
return
dtype_
;
}
int
device
()
const
{
return
storage_
->
device
();
}
const
Shape
&
shape
()
const
{
return
shape_
;
}
int
dim
(
int
idx
)
const
{
TV_ASSERT_RT_ERR
(
idx
<
shape_
.
size
(),
"error"
);
return
shape_
[
idx
];
size_t
ndim
()
const
{
return
shape_
.
ndim
();
}
const
TensorShape
&
shape
()
const
{
return
shape_
;
}
const
TensorShape
&
stride
()
const
{
return
stride_
;
}
int
dim
(
int
idx
)
const
{
if
(
idx
<
0
)
{
TV_ASSERT_RT_ERR
(
shape_
.
size
()
+
idx
<
shape_
.
size
(),
idx
,
shape_
);
return
shape_
[
shape_
.
size
()
+
idx
];
}
else
{
TV_ASSERT_RT_ERR
(
idx
<
int
(
shape_
.
size
()),
idx
,
shape_
);
return
shape_
[
idx
];
}
}
const
uint8_t
*
raw_data
()
const
{
return
storage_
->
data
();
}
const
uint8_t
*
raw_data
()
const
{
return
storage_
->
data
()
+
offset_
;
}
size_t
raw_size
()
const
{
return
size
()
*
itemsize
();
}
size_t
size
()
const
{
return
shape_
.
size
();
}
Tensor
&
zero_
()
{
size_t
itemsize
()
const
{
return
detail
::
sizeof_dtype
(
dtype_
);
}
Tensor
&
zero_
()
{
writable_check
();
storage_
->
zero_
();
return
*
this
;
}
uint8_t
*
raw_data
()
{
return
storage_
->
data
();
}
template
<
typename
T
>
Tensor
&
fill_
(
T
value
)
{
TV_ASSERT_RT_ERR
(
dtype_
==
type_v
<
T
>
,
"error"
);
auto
ptr
=
reinterpret_cast
<
T
*>
(
raw_data
());
std
::
fill
(
ptr
,
ptr
+
size
(),
value
);
uint8_t
*
raw_data
()
{
writable_check
();
return
storage_
->
data
()
+
offset_
;
}
template
<
typename
T
>
Tensor
&
fill_
(
T
value
)
{
writable_check
();
TV_ASSERT_RT_ERR
(
device
()
==
-
1
,
"error"
);
Dispatch
<
detail
::
all_tensor_types_t
>
()(
dtype_
,
[
&
](
auto
I
)
{
using
Treal
=
decltype
(
I
);
if
(
std
::
is_convertible
<
T
,
Treal
>::
value
)
{
auto
ptr
=
reinterpret_cast
<
Treal
*>
(
raw_data
());
std
::
fill
(
ptr
,
ptr
+
size
(),
Treal
(
value
));
}
else
{
TV_THROW_INVALID_ARG
(
"not convertable from"
,
type_s
<
T
>
,
"to"
,
type_s
<
Treal
>
);
}
});
return
*
this
;
}
template
<
typename
T
>
T
*
data
()
{
template
<
typename
T
>
T
*
data
()
{
TV_ASSERT_RT_ERR
(
dtype_
==
type_v
<
T
>
,
"error"
);
writable_check
();
return
reinterpret_cast
<
T
*>
(
raw_data
());
}
template
<
typename
T
>
const
T
*
data
()
const
{
template
<
typename
T
>
const
T
*
data
()
const
{
TV_ASSERT_RT_ERR
(
dtype_
==
type_v
<
T
>
,
"error"
);
return
reinterpret_cast
<
const
T
*>
(
raw_data
());
}
void
copy_
(
const
Tensor
&
tensor
)
{
void
copy_
(
const
Tensor
&
tensor
)
{
writable_check
();
TV_ASSERT_INVALID_ARG
(
contiguous_
,
"only support contiguous for now"
);
TV_ASSERT_RT_ERR
(
!
empty
()
&&
!
tensor
.
empty
(),
"must not empty"
);
TV_ASSERT_RT_ERR
(
size
()
==
tensor
.
size
(),
"must have same size"
);
TV_ASSERT_RT_ERR
(
dtype
()
==
tensor
.
dtype
(),
"must have same dtype"
);
if
(
device
()
==
-
1
&&
tensor
.
device
()
==
-
1
)
{
#ifdef SPCONV_CUDA
TV_ASSERT_RT_ERR
(
dtype
()
==
tensor
.
dtype
(),
"must have same dtype"
,
detail
::
typeString
(
dtype
()),
detail
::
typeString
(
tensor
.
dtype
()));
if
(
device
()
==
-
1
&&
tensor
.
device
()
==
-
1
)
{
#ifdef TV_CUDA
host2host
(
storage_
->
data
(),
tensor
.
raw_data
(),
size
()
*
detail
::
sizeof_dtype
(
dtype_
));
#else
...
...
@@ -379,88 +686,162 @@ struct Tensor
storage_
->
data
());
#endif
}
#ifdef SPCONV_CUDA
else
if
(
device
()
>=
0
&&
tensor
.
device
()
==
-
1
)
{
// host2dev
#ifdef TV_CUDA
else
if
(
device
()
>=
0
&&
tensor
.
device
()
==
-
1
)
{
host2dev
(
storage_
->
data
(),
tensor
.
raw_data
(),
size
()
*
detail
::
sizeof_dtype
(
dtype_
));
}
else
if
(
device
()
==
-
1
&&
tensor
.
device
()
>=
0
)
{
// dev2host
}
else
if
(
device
()
==
-
1
&&
tensor
.
device
()
>=
0
)
{
dev2host
(
storage_
->
data
(),
tensor
.
raw_data
(),
size
()
*
detail
::
sizeof_dtype
(
dtype_
));
}
else
if
(
device
()
>=
0
&&
tensor
.
device
()
>=
0
)
{
// dev2dev
}
else
if
(
device
()
>=
0
&&
tensor
.
device
()
>=
0
)
{
dev2dev
(
storage_
->
data
(),
tensor
.
raw_data
(),
size
()
*
detail
::
sizeof_dtype
(
dtype_
));
}
#endif
else
{
TV_ASSERT_RT_ERR
(
false
,
"only support cpu tensor"
);
else
{
TV_THROW_RT_ERR
(
"only support cpu tensor"
);
}
}
Tensor
cpu
()
const
{
if
(
storage_
->
device
()
==
-
1
)
{
return
*
this
;
#ifdef TV_CUDA
void
copy_
(
const
Tensor
&
tensor
,
cudaStream_t
stream
)
{
writable_check
();
TV_ASSERT_INVALID_ARG
(
contiguous_
,
"only support contiguous for now"
);
TV_ASSERT_RT_ERR
(
!
empty
()
&&
!
tensor
.
empty
(),
"must not empty"
);
TV_ASSERT_RT_ERR
(
size
()
==
tensor
.
size
(),
"must have same size"
);
TV_ASSERT_RT_ERR
(
dtype
()
==
tensor
.
dtype
(),
"must have same dtype"
,
detail
::
typeString
(
dtype
()),
detail
::
typeString
(
tensor
.
dtype
()));
if
(
device
()
==
-
1
&&
tensor
.
device
()
==
-
1
)
{
host2host
(
storage_
->
data
(),
tensor
.
raw_data
(),
size
()
*
detail
::
sizeof_dtype
(
dtype_
),
stream
);
}
else
if
(
device
()
>=
0
&&
tensor
.
device
()
==
-
1
)
{
host2dev
(
storage_
->
data
(),
tensor
.
raw_data
(),
size
()
*
detail
::
sizeof_dtype
(
dtype_
),
stream
);
}
else
if
(
device
()
==
-
1
&&
tensor
.
device
()
>=
0
)
{
dev2host
(
storage_
->
data
(),
tensor
.
raw_data
(),
size
()
*
detail
::
sizeof_dtype
(
dtype_
),
stream
);
}
else
if
(
device
()
>=
0
&&
tensor
.
device
()
>=
0
)
{
dev2dev
(
storage_
->
data
(),
tensor
.
raw_data
(),
size
()
*
detail
::
sizeof_dtype
(
dtype_
),
stream
);
}
else
{
TV_THROW_RT_ERR
(
"only support cpu tensor"
);
}
Tensor
res
(
shape_
,
dtype_
,
-
1
,
storage_
->
managed
());
}
#endif
Tensor
cpu
()
const
{
if
(
storage_
->
device
()
==
-
1
)
{
// cpu() should always copy tensor.
return
clone
();
}
Tensor
res
(
shape_
,
stride_
,
dtype_
,
-
1
,
storage_
->
managed
());
res
.
copy_
(
*
this
);
return
res
;
}
template
<
typename
T
>
void
copy_
(
const
TensorView
<
T
>
&
tensor
,
int
device
)
{
template
<
typename
T
>
void
copy_
(
const
TensorView
<
T
>
&
tensor
,
int
device
)
{
writable_check
();
TV_ASSERT_INVALID_ARG
(
contiguous_
,
"only support contiguous for now"
);
Tensor
src
=
from_blob
(
tensor
,
device
);
return
copy_
(
src
);
}
Tensor
&
operator
=
(
const
Tensor
&
tensor
)
{
dtype_
=
tensor
.
dtype_
;
storage_
=
tensor
.
storage_
;
shape_
=
tensor
.
shape_
;
writeable_
=
tensor
.
writeable_
;
offset_
=
tensor
.
offset_
;
stride_
=
tensor
.
stride_
;
return
*
this
;
}
Tensor
(
const
Tensor
&
tensor
)
{
dtype_
=
tensor
.
dtype_
;
storage_
=
tensor
.
storage_
;
shape_
=
tensor
.
shape_
;
writeable_
=
tensor
.
writeable_
;
offset_
=
tensor
.
offset_
;
stride_
=
tensor
.
stride_
;
}
Tensor
clone
(
bool
pinned
=
false
)
const
{
TV_ASSERT_RT_ERR
(
!
empty
(),
"clone a empty tensor"
);
TV_ASSERT_INVALID_ARG
(
contiguous_
,
"only support contiguous for now"
);
Tensor
newtensor
(
shape_
,
stride_
,
dtype_
,
device
(),
pinned
,
storage_
->
managed
());
newtensor
.
copy_
(
*
this
);
return
newtensor
;
}
Tensor
astype
(
DType
dtype
)
{
if
(
dtype
==
dtype_
)
{
return
clone
();
}
TV_ASSERT_INVALID_ARG
(
device
()
==
-
1
,
"only support cpu tensor"
);
TV_ASSERT_INVALID_ARG
(
!
empty
(),
"can't be used in empty tensor"
);
TV_ASSERT_INVALID_ARG
(
contiguous_
,
"only support contiguous for now"
);
auto
tensor
=
Tensor
();
Dispatch
<
detail
::
all_tensor_types_t
>
()(
dtype
,
[
&
](
auto
Idst
)
{
using
Tdst
=
decltype
(
Idst
);
Dispatch
<
detail
::
all_tensor_types_t
>
()(
dtype_
,
[
&
](
auto
Icur
)
{
using
Tcur
=
decltype
(
Icur
);
if
(
std
::
is_convertible
<
Tcur
,
Tdst
>::
value
)
{
auto
ptr
=
data
<
Tcur
>
();
tensor
=
Tensor
(
shape_
,
stride_
,
dtype
,
device
(),
pinned
(),
storage_
->
managed
());
std
::
copy
(
ptr
,
ptr
+
size
(),
tensor
.
data
<
Tdst
>
());
}
else
{
TV_THROW_INVALID_ARG
(
"not convertable from"
,
type_s
<
Tcur
>
,
"to"
,
type_s
<
Tdst
>
);
}
});
});
return
tensor
;
}
template
<
class
...
Ts
,
typename
F
>
inline
void
dispatch
(
F
&&
f
)
{
return
tv
::
dispatch
<
Ts
...
>
(
dtype_
,
std
::
forward
<
F
>
(
f
));
}
protected:
inline
void
writable_check
()
{
TV_ASSERT_RT_ERR
(
writeable_
,
"you cant do non-const operation when not writable"
);
}
DType
dtype_
;
std
::
shared_ptr
<
detail
::
TensorStorage
<
uint8_t
>>
storage_
;
Shape
shape_
;
TensorShape
shape_
;
size_t
offset_
=
0
;
TensorShape
stride_
;
private:
bool
writeable_
=
true
;
bool
contiguous_
=
true
;
};
inline
Tensor
from_blob
(
void
*
ptr
,
Shape
shape
,
DType
dtype
,
int
device
)
{
return
Tensor
(
ptr
,
shape
,
dtype
,
device
);
template
<
typename
Os
>
Os
&
operator
<<
(
Os
&
os
,
const
Tensor
&
tensor
)
{
TV_ASSERT_INVALID_ARG
(
tensor
.
device
()
==
-
1
,
"must be cpu tensor"
);
Dispatch
<
detail
::
all_tensor_types_t
>
()(
tensor
.
dtype
(),
[
&
](
auto
I
)
{
using
T
=
decltype
(
I
);
std
::
stringstream
ss
;
if
(
std
::
is_same
<
T
,
float
>::
value
||
std
::
is_same
<
T
,
double
>::
value
)
{
ss
<<
std
::
setprecision
(
4
);
}
os
<<
tensor
.
tview
<
T
,
-
1
,
DefaultPtrTraits
,
int64_t
>
().
repr
(
ss
);
});
return
os
;
}
template
<
typename
T
>
Tensor
from_blob
(
TensorView
<
T
>
tensor
,
int
device
)
{
return
Tensor
(
tensor
.
data
(),
tensor
.
shape
,
type_v
<
T
>
,
device
);
inline
Tensor
from_blob
(
void
*
ptr
,
TensorShape
shape
,
DType
dtype
,
int
device
)
{
return
Tensor
(
ptr
,
shape
,
dtype
,
device
);
}
template
<
class
...
Ts
,
typename
F
>
void
dispatch
(
DType
t
,
F
&&
f
)
{
static_assert
(
sizeof
...(
Ts
)
>
0
,
"you need to provide at least one type"
);
bool
notFound
=
true
;
spconv
::
mp_for_each
<
spconv
::
mp_list
<
Ts
...
>>
([
=
,
&
notFound
,
&
f
](
auto
I
)
{
if
(
type_v
<
decltype
(
I
)
>
==
t
)
{
std
::
forward
<
F
>
(
f
)(
decltype
(
I
)());
notFound
=
false
;
}
});
if
(
notFound
)
{
std
::
stringstream
ss
;
spconv
::
mp_for_each
<
spconv
::
mp_list
<
Ts
...
>>
([
=
,
&
ss
](
auto
I
)
{
ss
<<
detail
::
TypeToString
<
decltype
(
I
)
>::
value
<<
" "
;
});
TV_THROW_RT_ERR
(
"unknown type"
,
detail
::
typeString
(
t
),
", available: "
,
ss
.
str
());
}
inline
Tensor
from_blob
(
const
void
*
ptr
,
TensorShape
shape
,
DType
dtype
,
int
device
)
{
return
Tensor
(
ptr
,
shape
,
dtype
,
device
);
}
}
// namespace tv
\ No newline at end of file
include/tensorview/tensorview.h
View file @
19e73bbe
// Copyright 2019 Yan Yan
// Copyright 2019
-2020
Yan Yan
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
...
...
@@ -17,19 +17,19 @@
#include <cassert>
#include <cstdlib>
#include "common.h"
#include "prettyprint.h"
#include <iostream>
#include <memory>
#include <sstream>
#include <type_traits>
#include <vector>
#ifdef
SPCON
V_CUDA
#ifdef
T
V_CUDA
#include <cuda_runtime_api.h>
#endif
namespace
tv
{
#ifdef
__NVCC__
#if
(
def
ined(__clang__) && defined(__CUDA__)) || defined(
__NVCC__
)
#define TV_HOST_DEVICE_INLINE __forceinline__ __device__ __host__
#define TV_DEVICE_INLINE __forceinline__ __device__
...
...
@@ -54,54 +54,6 @@ namespace tv {
} \
}
#define TV_DEVICE_REQUIRE(expr, ...) \
{ \
if (!(expr) && threadIdx.x == 0) \
printf(__VA_ARGS__); \
assert(expr); \
}
template
<
class
SStream
,
class
T
>
void
sstream_print
(
SStream
&
ss
,
T
val
)
{
ss
<<
val
;
}
template
<
class
SStream
,
class
T
,
class
...
TArgs
>
void
sstream_print
(
SStream
&
ss
,
T
val
,
TArgs
...
args
)
{
ss
<<
val
<<
" "
;
sstream_print
(
ss
,
args
...);
}
template
<
class
...
TArgs
>
void
ssprint
(
TArgs
...
args
)
{
std
::
stringstream
ss
;
sstream_print
(
ss
,
args
...);
std
::
cout
<<
ss
.
str
()
<<
std
::
endl
;
}
#define TV_THROW_RT_ERR(...) \
{ \
std::stringstream __macro_s; \
__macro_s << __FILE__ << " " << __LINE__ << "\n"; \
tv::sstream_print(__macro_s, __VA_ARGS__); \
throw std::runtime_error(__macro_s.str()); \
}
#define TV_ASSERT_RT_ERR(expr, ...) \
{ \
if (!(expr)) \
TV_THROW_RT_ERR(__VA_ARGS__); \
}
#define TV_ASSERT_INVALID_ARG(expr, ...) \
{ \
if (!(expr)) { \
std::stringstream __macro_s; \
__macro_s << __FILE__ << " " << __LINE__ << "\n"; \
__macro_s << #expr << " assert faild. "; \
tv::sstream_print(__macro_s, __VA_ARGS__); \
throw std::invalid_argument(__macro_s.str()); \
} \
}
#define TV_CHECK_CUDA_ERR() \
{ \
auto __macro_err = cudaGetLastError(); \
...
...
@@ -109,6 +61,7 @@ template <class... TArgs> void ssprint(TArgs... args) {
std::stringstream __macro_s; \
__macro_s << __FILE__ << " " << __LINE__ << "\n"; \
__macro_s << "cuda execution failed with error " << __macro_err; \
TV_BACKTRACE_PRINT(__macro_s); \
throw std::runtime_error(__macro_s.str()); \
} \
}
...
...
@@ -122,11 +75,12 @@ template <class... TArgs> void ssprint(TArgs... args) {
__macro_s << "cuda execution failed with error " << __macro_err; \
__macro_s << " " << cudaGetErrorString(__macro_err) << "\n"; \
tv::sstream_print(__macro_s, __VA_ARGS__); \
TV_BACKTRACE_PRINT(__macro_s); \
throw std::runtime_error(__macro_s.str()); \
} \
}
#ifdef
SPCON
V_CUDA
#ifdef
T
V_CUDA
struct
GPU
{
GPU
(
cudaStream_t
s
=
0
)
:
mStream
(
s
)
{}
virtual
cudaStream_t
getStream
()
const
{
return
mStream
;
}
...
...
@@ -135,7 +89,18 @@ struct GPU {
#endif
struct
CPU
{};
#ifndef TV_MAX_DIM
#define TV_MAX_DIM 6
#endif
template
<
typename
T
>
struct
DefaultPtrTraits
{
typedef
T
*
type
;
};
#if defined(__CUDACC__) || defined(__HIPCC__)
template
<
typename
T
>
struct
RestrictPtrTraits
{
typedef
T
*
__restrict__
type
;
};
#endif
/*
template <typename T>
constexpr size_t calc_align(size_t ndim)
...
...
@@ -160,57 +125,73 @@ template <typename T, size_t MaxDim = TV_MAX_DIM>
struct
/*alignas(calc_align<T>(MaxDim))*/
SimpleVector
{
public:
TV_HOST_DEVICE_INLINE
SimpleVector
(){};
TV_HOST_DEVICE_INLINE
SimpleVector
(
size_t
count
,
T
init
=
T
())
:
size_
(
count
)
{
for
(
size_t
i
=
0
;
i
<
count
;
++
i
)
{
array_
[
i
]
=
init
;
}
};
template
<
typename
Iterator
>
SimpleVector
(
Iterator
first
,
Iterator
last
)
{
size_
=
0
;
for
(;
first
!=
last
;
++
first
)
{
if
(
size_
>=
MaxDim
)
{
TV_THROW_INVALID_ARG
(
"iterator too long"
);
}
array_
[
size_
++
]
=
*
first
;
}
};
TV_HOST_DEVICE_INLINE
SimpleVector
(
std
::
initializer_list
<
T
>
q
)
{
TV_ASSERT
(
q
.
size
()
<=
MaxDim
);
mS
ize
=
0
;
s
ize
_
=
0
;
for
(
T
s
:
q
)
{
mA
rray
[
mS
ize
++
]
=
s
;
a
rray
_
[
s
ize
_
++
]
=
s
;
}
mS
ize
=
q
.
size
();
s
ize
_
=
q
.
size
();
}
SimpleVector
(
const
std
::
vector
<
T
>
&
arr
)
{
TV_ASSERT
(
arr
.
size
()
<=
MaxDim
);
for
(
size_t
i
=
0
;
i
<
arr
.
size
();
++
i
)
{
mA
rray
[
i
]
=
arr
[
i
];
a
rray
_
[
i
]
=
arr
[
i
];
}
mS
ize
=
arr
.
size
();
s
ize
_
=
arr
.
size
();
}
TV_HOST_DEVICE_INLINE
SimpleVector
(
const
SimpleVector
<
T
,
MaxDim
>
&
arr
)
{
TV_ASSERT
(
arr
.
size
()
<=
MaxDim
);
for
(
size_t
i
=
0
;
i
<
arr
.
size
();
++
i
)
{
mA
rray
[
i
]
=
arr
[
i
];
a
rray
_
[
i
]
=
arr
[
i
];
}
mS
ize
=
arr
.
size
();
s
ize
_
=
arr
.
size
();
}
TV_HOST_DEVICE_INLINE
T
&
operator
[](
int
idx
)
{
#ifdef TV_DEBUG
TV_ASSERT
(
idx
>=
0
&&
idx
<
mS
ize
);
TV_ASSERT
(
idx
>=
0
&&
idx
<
s
ize
_
);
#endif
return
mA
rray
[
idx
];
return
a
rray
_
[
idx
];
}
TV_HOST_DEVICE_INLINE
const
T
&
operator
[](
int
idx
)
const
{
#ifdef TV_DEBUG
TV_ASSERT
(
idx
>=
0
&&
idx
<
mS
ize
);
TV_ASSERT
(
idx
>=
0
&&
idx
<
s
ize
_
);
#endif
return
mA
rray
[
idx
];
return
a
rray
_
[
idx
];
}
TV_HOST_DEVICE_INLINE
void
push_back
(
T
s
)
{
#ifdef TV_DEBUG
TV_ASSERT
(
mS
ize
<
MaxDim
);
TV_ASSERT
(
s
ize
_
<
MaxDim
);
#endif
mA
rray
[
mS
ize
]
=
s
;
mS
ize
++
;
a
rray
_
[
s
ize
_
]
=
s
;
s
ize
_
++
;
}
TV_HOST_DEVICE_INLINE
void
pop_back
()
{
#ifdef TV_DEBUG
TV_ASSERT
(
mS
ize
>
0
);
TV_ASSERT
(
s
ize
_
>
0
);
#endif
mS
ize
--
;
s
ize
_
--
;
}
TV_HOST_DEVICE_INLINE
size_t
size
()
const
{
return
mSize
;
}
TV_HOST_DEVICE_INLINE
const
T
*
data
()
const
{
return
mArray
;
}
TV_HOST_DEVICE_INLINE
size_t
empty
()
const
{
return
mSize
==
0
;
}
TV_HOST_DEVICE_INLINE
size_t
size
()
const
{
return
size_
;
}
TV_HOST_DEVICE_INLINE
const
T
*
data
()
const
{
return
array_
;
}
TV_HOST_DEVICE_INLINE
T
*
data
()
{
return
array_
;
}
TV_HOST_DEVICE_INLINE
size_t
empty
()
const
{
return
size_
==
0
;
}
typedef
size_t
size_type
;
...
...
@@ -234,10 +215,10 @@ public:
}
TV_HOST_DEVICE_INLINE
reference
operator
*
()
{
return
*
ptr_
;
}
TV_HOST_DEVICE_INLINE
pointer
operator
->
()
{
return
ptr_
;
}
TV_HOST_DEVICE_INLINE
bool
operator
==
(
const
self_type
&
rhs
)
{
TV_HOST_DEVICE_INLINE
bool
operator
==
(
const
self_type
&
rhs
)
const
{
return
ptr_
==
rhs
.
ptr_
;
}
TV_HOST_DEVICE_INLINE
bool
operator
!=
(
const
self_type
&
rhs
)
{
TV_HOST_DEVICE_INLINE
bool
operator
!=
(
const
self_type
&
rhs
)
const
{
return
ptr_
!=
rhs
.
ptr_
;
}
...
...
@@ -265,10 +246,10 @@ public:
}
TV_HOST_DEVICE_INLINE
reference
operator
*
()
{
return
*
ptr_
;
}
TV_HOST_DEVICE_INLINE
pointer
operator
->
()
{
return
ptr_
;
}
TV_HOST_DEVICE_INLINE
bool
operator
==
(
const
self_type
&
rhs
)
{
TV_HOST_DEVICE_INLINE
bool
operator
==
(
const
self_type
&
rhs
)
const
{
return
ptr_
==
rhs
.
ptr_
;
}
TV_HOST_DEVICE_INLINE
bool
operator
!=
(
const
self_type
&
rhs
)
{
TV_HOST_DEVICE_INLINE
bool
operator
!=
(
const
self_type
&
rhs
)
const
{
return
ptr_
!=
rhs
.
ptr_
;
}
...
...
@@ -276,28 +257,28 @@ public:
pointer
ptr_
;
};
TV_HOST_DEVICE_INLINE
iterator
begin
()
{
return
iterator
(
mA
rray
);
}
TV_HOST_DEVICE_INLINE
iterator
begin
()
{
return
iterator
(
a
rray
_
);
}
TV_HOST_DEVICE_INLINE
iterator
end
()
{
return
iterator
(
mA
rray
+
mS
ize
);
}
TV_HOST_DEVICE_INLINE
iterator
end
()
{
return
iterator
(
a
rray
_
+
s
ize
_
);
}
TV_HOST_DEVICE_INLINE
const_iterator
begin
()
const
{
return
const_iterator
(
mA
rray
);
return
const_iterator
(
a
rray
_
);
}
TV_HOST_DEVICE_INLINE
const_iterator
end
()
const
{
return
const_iterator
(
mA
rray
+
mS
ize
);
return
const_iterator
(
a
rray
_
+
s
ize
_
);
}
TV_HOST_DEVICE_INLINE
const_iterator
cbegin
()
const
{
return
const_iterator
(
mA
rray
);
return
const_iterator
(
a
rray
_
);
}
TV_HOST_DEVICE_INLINE
const_iterator
cend
()
const
{
return
const_iterator
(
mA
rray
+
mS
ize
);
return
const_iterator
(
a
rray
_
+
s
ize
_
);
}
protected:
T
mA
rray
[
MaxDim
];
size_t
mS
ize
=
0
;
T
a
rray
_
[
MaxDim
];
size_t
s
ize
_
=
0
;
};
template
<
typename
T
,
size_t
MaxDim
>
...
...
@@ -323,28 +304,28 @@ struct Slice {
template
<
class
...
Integers
>
TV_HOST_DEVICE_INLINE
Slice
(
Integers
...
ints
)
{
static_assert
(
sizeof
...(
ints
)
<=
3
,
"slice init must smaller than 3"
);
SimpleVector
<
int
,
3
>
slices
{
int
(
ints
)...};
mS
lices
[
0
]
=
-
1
;
mS
lices
[
1
]
=
-
1
;
mS
lices
[
2
]
=
-
1
;
s
lices
_
[
0
]
=
-
1
;
s
lices
_
[
1
]
=
-
1
;
s
lices
_
[
2
]
=
-
1
;
for
(
size_t
i
=
0
;
i
<
slices
.
size
();
++
i
)
{
mS
lices
[
i
]
=
slices
[
i
];
s
lices
_
[
i
]
=
slices
[
i
];
}
}
TV_HOST_DEVICE_INLINE
Slice
()
{
mS
lices
[
0
]
=
-
1
;
mS
lices
[
1
]
=
-
1
;
mS
lices
[
2
]
=
-
1
;
s
lices
_
[
0
]
=
-
1
;
s
lices
_
[
1
]
=
-
1
;
s
lices
_
[
2
]
=
-
1
;
}
template
<
typename
T
>
TV_HOST_DEVICE_INLINE
Slice
(
std
::
initializer_list
<
T
>
slice
)
{
mS
lices
[
0
]
=
-
1
;
mS
lices
[
1
]
=
-
1
;
mS
lices
[
2
]
=
-
1
;
s
lices
_
[
0
]
=
-
1
;
s
lices
_
[
1
]
=
-
1
;
s
lices
_
[
2
]
=
-
1
;
TV_ASSERT
(
slice
.
size
()
<=
3
);
int
idx
=
0
;
for
(
T
s
:
slice
)
{
mS
lices
[
idx
]
=
int
(
s
);
s
lices
_
[
idx
]
=
int
(
s
);
++
idx
;
}
}
...
...
@@ -352,90 +333,124 @@ struct Slice {
#ifdef TV_DEBUG
TV_ASSERT
(
idx
>=
0
&&
idx
<
3
);
#endif
return
mS
lices
[
idx
];
return
s
lices
_
[
idx
];
}
TV_HOST_DEVICE_INLINE
const
int
&
operator
[](
int
idx
)
const
{
#ifdef TV_DEBUG
TV_ASSERT
(
idx
>=
0
&&
idx
<
3
);
#endif
return
mS
lices
[
idx
];
return
s
lices
_
[
idx
];
}
protected:
int
mS
lices
[
3
];
int
s
lices
_
[
3
];
};
template
<
size_t
MaxDim
=
TV_MAX_DIM
>
struct
ShapeBase
:
public
SimpleVector
<
in
t
,
MaxDim
>
{
TV_HOST_DEVICE_INLINE
ShapeBase
()
:
SimpleVector
<
in
t
,
MaxDim
>
(){};
TV_HOST_DEVICE_INLINE
ShapeBase
(
std
::
initializer_list
<
in
t
>
shape
)
:
SimpleVector
<
in
t
,
MaxDim
>
(
shape
)
{}
TV_HOST_DEVICE_INLINE
ShapeBase
(
SimpleVector
<
in
t
,
MaxDim
>
vec
)
:
SimpleVector
<
in
t
,
MaxDim
>
(
vec
)
{}
template
<
size_t
MaxDim
=
TV_MAX_DIM
,
typename
Tindex
=
int
>
struct
ShapeBase
:
public
SimpleVector
<
T
in
dex
,
MaxDim
>
{
TV_HOST_DEVICE_INLINE
ShapeBase
()
:
SimpleVector
<
T
in
dex
,
MaxDim
>
(){};
TV_HOST_DEVICE_INLINE
ShapeBase
(
std
::
initializer_list
<
T
in
dex
>
shape
)
:
SimpleVector
<
T
in
dex
,
MaxDim
>
(
shape
)
{}
TV_HOST_DEVICE_INLINE
ShapeBase
(
SimpleVector
<
T
in
dex
,
MaxDim
>
vec
)
:
SimpleVector
<
T
in
dex
,
MaxDim
>
(
vec
)
{}
template
<
typename
T
,
template
<
class
...
>
class
Container
>
ShapeBase
(
Container
<
T
>
shape
)
:
SimpleVector
<
in
t
,
MaxDim
>
(
shape
)
{}
ShapeBase
(
Container
<
T
>
shape
)
:
SimpleVector
<
T
in
dex
,
MaxDim
>
(
shape
)
{}
TV_HOST_DEVICE_INLINE
ShapeBase
(
const
ShapeBase
<
MaxDim
>
&
shape
)
:
SimpleVector
<
int
,
MaxDim
>
(
shape
)
{}
ShapeBase
(
const
std
::
vector
<
int
>
&
arr
)
:
SimpleVector
<
int
,
MaxDim
>
(
arr
)
{}
ShapeBase
<
MaxDim
>
&
operator
=
(
const
ShapeBase
<
MaxDim
>
&
shape
)
=
default
;
TV_HOST_DEVICE_INLINE
ShapeBase
<
MaxDim
>
subshape
(
int
start
,
int
end
)
const
{
:
SimpleVector
<
Tindex
,
MaxDim
>
(
shape
)
{}
ShapeBase
(
const
std
::
vector
<
Tindex
>
&
arr
)
:
SimpleVector
<
Tindex
,
MaxDim
>
(
arr
)
{}
ShapeBase
<
MaxDim
,
Tindex
>
&
operator
=
(
const
ShapeBase
<
MaxDim
,
Tindex
>
&
shape
)
=
default
;
TV_HOST_DEVICE
ShapeBase
<
MaxDim
,
Tindex
>
subshape
(
Tindex
start
,
Tindex
end
)
const
{
#ifdef TV_DEBUG
TV_ASSERT
(
start
>=
0
&&
end
<=
this
->
mS
ize
&&
end
>
start
);
TV_ASSERT
(
start
>=
0
&&
end
<=
this
->
s
ize
_
&&
end
>
start
);
#endif
ShapeBase
<
MaxDim
>
shape
;
for
(
in
t
i
=
start
;
i
<
end
;
++
i
)
{
shape
.
push_back
(
this
->
mA
rray
[
i
]);
ShapeBase
<
MaxDim
,
Tindex
>
shape
;
for
(
T
in
dex
i
=
start
;
i
<
end
;
++
i
)
{
shape
.
push_back
(
this
->
a
rray
_
[
i
]);
}
return
shape
;
}
TV_HOST_DEVICE
_INLINE
ShapeBase
<
MaxDim
>
subshape
(
in
t
start
)
const
{
TV_HOST_DEVICE
ShapeBase
<
MaxDim
,
Tindex
>
subshape
(
T
in
dex
start
)
const
{
#ifdef TV_DEBUG
TV_ASSERT
(
start
>=
0
&&
start
<=
this
->
mS
ize
);
TV_ASSERT
(
start
>=
0
&&
start
<=
this
->
s
ize
_
);
#endif
ShapeBase
<
MaxDim
>
shape
;
for
(
in
t
i
=
start
;
i
<
this
->
mS
ize
;
++
i
)
{
shape
.
push_back
(
this
->
mA
rray
[
i
]);
ShapeBase
<
MaxDim
,
Tindex
>
shape
;
for
(
size_
t
i
=
start
;
i
<
this
->
s
ize
_
;
++
i
)
{
shape
.
push_back
(
this
->
a
rray
_
[
i
]);
}
return
shape
;
}
TV_HOST_DEVICE
_INLINE
size_t
size
()
const
{
if
(
this
->
mS
ize
==
0
)
TV_HOST_DEVICE
size_t
size
()
const
{
if
(
this
->
s
ize
_
==
0
)
return
0
;
size_t
s
=
1
;
for
(
int
i
=
0
;
i
<
int
(
this
->
mS
ize
);
++
i
)
{
s
*=
this
->
mA
rray
[
i
];
for
(
int
i
=
0
;
i
<
int
(
this
->
s
ize
_
);
++
i
)
{
s
*=
this
->
a
rray
_
[
i
];
}
return
s
;
}
TV_HOST_DEVICE_INLINE
size_t
ndim
()
const
{
return
this
->
mSize
;
}
TV_HOST_DEVICE_INLINE
ShapeBase
<
MaxDim
>
squeeze
()
const
{
ShapeBase
<
MaxDim
>
shape
;
for
(
int
i
=
0
;
i
<
this
->
mSize
;
++
i
)
{
if
(
this
->
mArray
[
i
]
!=
1
)
shape
.
push_back
(
this
->
mArray
[
i
]);
TV_HOST_DEVICE_INLINE
size_t
ndim
()
const
{
return
this
->
size_
;
}
TV_HOST_DEVICE
ShapeBase
<
MaxDim
,
Tindex
>
squeeze
()
const
{
ShapeBase
<
MaxDim
,
Tindex
>
shape
;
for
(
size_t
i
=
0
;
i
<
this
->
size_
;
++
i
)
{
if
(
this
->
array_
[
i
]
!=
1
)
shape
.
push_back
(
this
->
array_
[
i
]);
}
if
(
shape
.
empty
())
{
// dont support empty shape for now
shape
.
push_back
(
1
);
}
return
shape
;
}
template
<
size_t
MaxDim2
=
MaxDim
>
TV_HOST_DEVICE
ShapeBase
<
MaxDim2
,
Tindex
>
squeeze
(
int
dim
)
const
{
static_assert
(
MaxDim2
>=
MaxDim
-
1
,
"error"
);
ShapeBase
<
MaxDim2
,
Tindex
>
shape
;
for
(
size_t
i
=
0
;
i
<
this
->
size_
;
++
i
)
{
if
(
i
!=
size_t
(
dim
)
||
this
->
array_
[
i
]
!=
1
)
shape
.
push_back
(
this
->
array_
[
i
]);
}
return
shape
;
}
TV_HOST_DEVICE_INLINE
ShapeBase
<
MaxDim
>
squeeze
(
int
dim
)
const
{
ShapeBase
<
MaxDim
>
shape
;
for
(
int
i
=
0
;
i
<
this
->
mSize
;
++
i
)
{
if
(
i
!=
dim
||
this
->
mArray
[
i
]
!=
1
)
shape
.
push_back
(
this
->
mArray
[
i
]);
template
<
size_t
MaxDim2
=
MaxDim
>
TV_HOST_DEVICE
ShapeBase
<
MaxDim2
,
Tindex
>
unsqueeze
(
int
dim
)
const
{
static_assert
(
MaxDim2
>=
MaxDim
-
1
,
"error"
);
ShapeBase
<
MaxDim2
,
Tindex
>
shape
;
for
(
size_t
i
=
0
;
i
<
this
->
size_
;
++
i
)
{
if
(
i
==
size_t
(
dim
))
shape
.
push_back
(
1
);
shape
.
push_back
(
this
->
array_
[
i
]);
}
return
shape
;
}
TV_HOST_DEVICE
size_t
prod
()
const
{
TV_HOST_DEVICE
size_t
prod
(
Tindex
start
=
0
)
const
{
size_t
res
=
1
;
for
(
size_t
i
=
0
;
i
<
this
->
mSize
;
++
i
)
{
res
*=
this
->
mArray
[
i
];
for
(
size_t
i
=
start
;
i
<
this
->
size_
;
++
i
)
{
res
*=
this
->
array_
[
i
];
}
return
res
;
}
template
<
size_t
MaxDim2
=
MaxDim
>
TV_HOST_DEVICE
ShapeBase
<
MaxDim2
,
Tindex
>
stride_rowmajor
()
{
static_assert
(
MaxDim2
>=
MaxDim
,
"error"
);
Tindex
p
=
Tindex
(
1
);
ShapeBase
<
MaxDim2
,
Tindex
>
res
(
this
->
size_
);
for
(
Tindex
i
=
this
->
size_
-
1
;
i
>=
0
;
--
i
)
{
res
[
i
]
=
p
;
p
*=
this
->
array_
[
i
];
}
return
res
;
}
};
using
Shape
=
ShapeBase
<
TV_MAX_DIM
>
;
using
Shape
=
ShapeBase
<
TV_MAX_DIM
,
int
>
;
template
<
class
...
Inds
>
TV_HOST_DEVICE_INLINE
unsigned
rowArrayIdx
(
std
::
vector
<
int
>
&
shape
,
...
...
@@ -446,7 +461,9 @@ TV_HOST_DEVICE_INLINE unsigned rowArrayIdx(std::vector<int> &shape,
#ifdef TV_DEBUG
TV_ASSERT
(
sizeof
...(
indexes
)
==
shape
.
size
());
#endif
#if defined(__CUDA_ARCH__)
#pragma unroll
#endif
for
(
int
i
=
sizeof
...(
indexes
)
-
1
;
i
>=
0
;
--
i
)
{
offset
+=
m
*
indexes_vec
[
i
];
m
*=
shape
[
i
];
...
...
@@ -471,7 +488,9 @@ TV_HOST_DEVICE_INLINE unsigned rowArrayIdx(const Shape &shape,
unsigned
offset
=
0
;
unsigned
m
=
1
;
int
indexes_vec
[
sizeof
...(
indexes
)]
=
{
indexes
...};
#if defined(__CUDA_ARCH__)
#pragma unroll
#endif
for
(
int
i
=
sizeof
...(
indexes
)
-
1
;
i
>=
0
;
--
i
)
{
offset
+=
m
*
indexes_vec
[
i
];
m
*=
shape
[
i
];
...
...
@@ -495,7 +514,9 @@ TV_HOST_DEVICE_INLINE unsigned rowArrayIdx(const Index *indexes,
const
Index
*
shape
)
{
unsigned
offset
=
0
;
unsigned
m
=
1
;
#if defined(__CUDA_ARCH__)
#pragma unroll
#endif
for
(
int
i
=
NDim
-
1
;
i
>=
0
;
--
i
)
{
offset
+=
m
*
indexes
[
i
];
m
*=
shape
[
i
];
...
...
@@ -515,416 +536,501 @@ TV_HOST_DEVICE_INLINE Index rowArrayIdxInv(Index index, Index *output,
return
index
;
}
template
<
int
N
>
struct
ArrayIndexRowMajor
{
// mPtr[((i1 * mShape[1] + i2) * mShape[2] + i3) * mShape[3] + i4];
TV_HOST_DEVICE_INLINE
static
unsigned
run
(
const
Shape
&
shape
,
const
Shape
&
indexes
)
{
return
indexes
[
N
-
1
]
+
shape
[
N
-
1
]
*
ArrayIndexRowMajor
<
N
-
1
>::
run
(
shape
,
indexes
);
template
<
typename
Index
>
TV_HOST_DEVICE
Index
rowArrayIdxInv
(
Index
index
,
Index
*
output
,
const
Index
*
shape
,
int
ndim
)
{
for
(
int
i
=
ndim
-
1
;
i
>=
0
;
--
i
)
{
output
[
i
]
=
index
%
shape
[
i
];
index
-=
output
[
i
];
index
/=
shape
[
i
];
}
return
index
;
}
template
<
int
N
>
struct
ArrayIndexRowMajorReverse
{
template
<
typename
TShape
,
typename
T
,
class
...
Ts
>
TV_HOST_DEVICE_INLINE
static
unsigned
run
(
const
TShape
*
shape
,
T
index
,
Ts
...
inds
)
{
return
index
+
shape
[
N
-
1
]
*
ArrayIndexRowMajorReverse
<
N
-
1
>::
run
(
shape
,
inds
...);
}
template
<
typename
T
,
class
...
Ts
>
TV_HOST_DEVICE_INLINE
static
unsigned
runShape
(
const
Shape
&
shape
,
T
index
,
Ts
...
inds
)
{
return
index
+
shape
[
N
-
1
]
*
ArrayIndexRowMajorReverse
<
N
-
1
>::
run
(
shape
,
inds
...);
}
};
template
<
>
struct
ArrayIndexRowMajorReverse
<
1
>
{
template
<
typename
TShape
,
typename
T
>
TV_HOST_DEVICE_INLINE
static
unsigned
run
(
const
TShape
*
shape
,
T
idx
)
{
return
idx
;
}
template
<
typename
T
>
TV_HOST_DEVICE_INLINE
static
unsigned
runShape
(
const
Shape
&
shape
,
T
idx
)
{
return
idx
;
}
};
template
<
int
N
,
int
Ndim
>
struct
ArrayIndexRowMajor
{
// this array index provide almost same compiled code. compile it in
// https://godbolt.org/ for more details.
template
<
typename
TShape
,
typename
Tinit
,
typename
T
,
class
...
Ts
>
TV_HOST_DEVICE_INLINE
static
unsigned
run
(
const
TShape
*
shape
,
Tinit
start
,
T
index
,
Ts
...
inds
)
{
return
ArrayIndexRowMajor
<
N
-
1
,
Ndim
>::
run
(
shape
,
(
index
+
start
)
*
shape
[
Ndim
-
N
+
1
],
inds
...);
}
template
<
typename
Tinit
,
typename
T
,
class
...
Ts
>
TV_HOST_DEVICE_INLINE
static
unsigned
runShape
(
const
Shape
&
shape
,
Tinit
start
,
T
index
,
Ts
...
inds
)
{
return
ArrayIndexRowMajor
<
N
-
1
,
Ndim
>::
runShape
(
shape
,
(
index
+
start
)
*
shape
[
Ndim
-
N
+
1
],
inds
...);
}
};
template
<
int
Ndim
>
struct
ArrayIndexRowMajor
<
1
,
Ndim
>
{
template
<
typename
TShape
,
typename
Tinit
,
typename
T
>
TV_HOST_DEVICE_INLINE
static
unsigned
run
(
const
TShape
*
shape
,
Tinit
start
,
T
idx
)
{
return
start
+
idx
;
}
template
<
typename
Tinit
,
typename
T
>
TV_HOST_DEVICE_INLINE
static
unsigned
runShape
(
const
Shape
&
shape
,
Tinit
start
,
T
idx
)
{
return
start
+
idx
;
}
};
template
<
>
struct
ArrayIndexRowMajor
<
0
>
{
TV_HOST_DEVICE_INLINE
static
unsigned
run
(
const
Shape
&
shape
,
const
Shape
&
indexes
)
{
template
<
>
struct
ArrayIndexRowMajor
<
0
,
0
>
{
template
<
typename
TShape
,
typename
Tinit
>
TV_HOST_DEVICE_INLINE
static
unsigned
run
(
const
TShape
*
shape
,
Tinit
start
)
{
return
0
;
}
template
<
typename
Tinit
>
TV_HOST_DEVICE_INLINE
static
unsigned
runShape
(
const
Shape
&
shape
,
Tinit
start
)
{
return
0
;
}
};
namespace
detail
{
template
<
typename
T
>
constexpr
const
char
*
simpleTypeName
(
T
val
=
T
());
template
<
>
constexpr
const
char
*
simpleTypeName
(
float
val
)
{
return
"float32"
;
}
template
<
>
constexpr
const
char
*
simpleTypeName
(
double
val
)
{
return
"float64"
;
}
template
<
>
constexpr
const
char
*
simpleTypeName
(
int
val
)
{
return
"int32"
;
}
template
<
>
constexpr
const
char
*
simpleTypeName
(
unsigned
val
)
{
return
"uint32"
;
}
template
<
>
constexpr
const
char
*
simpleTypeName
(
long
val
)
{
return
"int64"
;
}
template
<
>
constexpr
const
char
*
simpleTypeName
(
unsigned
long
val
)
{
return
"uint64"
;
template
<
int
N
,
int
Ndim
>
struct
ArrayIndexStride
{
// this array index provide almost same compiled code. compile it in
// https://godbolt.org/ for more details.
template
<
typename
TShape
,
typename
Tinit
,
typename
T
,
class
...
Ts
>
TV_HOST_DEVICE_INLINE
static
unsigned
run
(
const
TShape
*
stride
,
Tinit
start
,
T
index
,
Ts
...
inds
)
{
return
ArrayIndexStride
<
N
-
1
,
Ndim
>::
run
(
stride
,
start
+
index
*
stride
[
Ndim
-
N
+
1
],
inds
...);
}
};
template
<
int
Ndim
>
struct
ArrayIndexStride
<
1
,
Ndim
>
{
template
<
typename
TShape
,
typename
Tinit
,
typename
T
>
TV_HOST_DEVICE_INLINE
static
unsigned
run
(
const
TShape
*
stride
,
Tinit
start
,
T
idx
)
{
return
start
+
idx
*
stride
[
Ndim
-
1
];
}
};
#if __cplusplus >= 201703L
template
<
size_t
...
N
,
class
T
,
class
...
Ts
>
TV_HOST_DEVICE_INLINE
T
array_index_stride
(
const
T
*
stride
,
Ts
...
ids
)
{
return
((
stride
[
N
]
*
std
::
get
<
N
>
(
std
::
forward_as_tuple
(
ids
...)))
+
...);
}
};
// namespace detail
#endif
template
<
typename
T
,
int
Rank
=
-
1
>
struct
TensorView
{
TV_HOST_DEVICE_INLINE
TensorView
()
{}
explicit
TV_HOST_DEVICE_INLINE
TensorView
(
T
*
ptr
,
Shape
shape
)
:
mPtr
(
ptr
),
mShape
(
shape
)
{}
// explicit TV_HOST_DEVICE_INLINE TensorView(const
// TensorView<std::remove_const_t<T>> &tview) : mPtr(tview.data()),
// mShape(tview.shape()) {}
template
<
class
...
Integers
>
explicit
TV_HOST_DEVICE_INLINE
TensorView
(
T
*
ptr
,
Integers
...
shapes
)
:
mPtr
(
ptr
)
{
mShape
=
{
int
(
shapes
)...};
namespace
detail
{
template
<
typename
T
>
struct
TypeToString
;
template
<
>
struct
TypeToString
<
bool
>
{
static
constexpr
const
char
*
value
=
"bool"
;
};
template
<
>
struct
TypeToString
<
const
bool
>
{
static
constexpr
const
char
*
value
=
"bool"
;
};
template
<
>
struct
TypeToString
<
int32_t
>
{
static
constexpr
const
char
*
value
=
"int32"
;
};
template
<
>
struct
TypeToString
<
float
>
{
static
constexpr
const
char
*
value
=
"float"
;
};
template
<
>
struct
TypeToString
<
double
>
{
static
constexpr
const
char
*
value
=
"double"
;
};
template
<
>
struct
TypeToString
<
int16_t
>
{
static
constexpr
const
char
*
value
=
"int16"
;
};
template
<
>
struct
TypeToString
<
int8_t
>
{
static
constexpr
const
char
*
value
=
"int8"
;
};
template
<
>
struct
TypeToString
<
int64_t
>
{
static
constexpr
const
char
*
value
=
"int64"
;
};
template
<
>
struct
TypeToString
<
uint8_t
>
{
static
constexpr
const
char
*
value
=
"uint8"
;
};
template
<
>
struct
TypeToString
<
uint16_t
>
{
static
constexpr
const
char
*
value
=
"uint16"
;
};
template
<
>
struct
TypeToString
<
uint32_t
>
{
static
constexpr
const
char
*
value
=
"uint32"
;
};
template
<
>
struct
TypeToString
<
uint64_t
>
{
static
constexpr
const
char
*
value
=
"uint64"
;
};
template
<
>
struct
TypeToString
<
const
int32_t
>
{
static
constexpr
const
char
*
value
=
"int32"
;
};
template
<
>
struct
TypeToString
<
const
float
>
{
static
constexpr
const
char
*
value
=
"float"
;
};
template
<
>
struct
TypeToString
<
const
double
>
{
static
constexpr
const
char
*
value
=
"double"
;
};
template
<
>
struct
TypeToString
<
const
int16_t
>
{
static
constexpr
const
char
*
value
=
"int16"
;
};
template
<
>
struct
TypeToString
<
const
int8_t
>
{
static
constexpr
const
char
*
value
=
"int8"
;
};
template
<
>
struct
TypeToString
<
const
int64_t
>
{
static
constexpr
const
char
*
value
=
"int64"
;
};
template
<
>
struct
TypeToString
<
const
uint8_t
>
{
static
constexpr
const
char
*
value
=
"uint8"
;
};
template
<
>
struct
TypeToString
<
const
uint16_t
>
{
static
constexpr
const
char
*
value
=
"uint16"
;
};
template
<
>
struct
TypeToString
<
const
uint32_t
>
{
static
constexpr
const
char
*
value
=
"uint32"
;
};
template
<
>
struct
TypeToString
<
const
uint64_t
>
{
static
constexpr
const
char
*
value
=
"uint64"
;
};
}
// namespace detail
template
<
typename
T
>
constexpr
const
char
*
type_s
=
detail
::
TypeToString
<
T
>::
value
;
namespace
detail
{
template
<
typename
T
,
int
Rank
,
template
<
class
>
class
PtrTraits
=
DefaultPtrTraits
,
typename
Tindex
=
int
>
struct
TensorAccesserBase
{
static
constexpr
int
rank_value
=
Rank
;
using
ptr_t
=
typename
PtrTraits
<
T
>::
type
;
static_assert
(
Rank
>
0
,
"error"
);
explicit
TV_HOST_DEVICE_INLINE
TensorAccesserBase
(
ptr_t
ptr
,
const
Tindex
*
stride_ptr
)
:
ptr_
(
ptr
),
stride_ptr_
(
stride_ptr
)
{}
TV_HOST_DEVICE_INLINE
ptr_t
data
()
{
return
ptr_
;
}
TV_HOST_DEVICE_INLINE
const
ptr_t
data
()
const
{
return
ptr_
;
}
template
<
class
...
Inds
>
TV_HOST_DEVICE_INLINE
T
&
operator
()(
Inds
...
inds
)
{
static_assert
(
sizeof
...(
inds
)
==
Rank
,
"error"
);
return
ptr_
[
ArrayIndexStride
<
Rank
,
Rank
>::
run
(
stride_ptr_
,
0
,
inds
...)];
}
operator
TensorView
<
const
T
>
()
{
return
TensorView
<
const
T
>
(
mPtr
,
mShape
);
}
// conversion function
TV_HOST_DEVICE_INLINE
TensorView
<
T
,
Rank
>
&
assign
(
const
TensorView
<
T
,
Rank
>
&
tensor
)
{
TV_REQUIRE
(
tensor
.
shape
()
==
shape
(),
"you must provide same input size%s"
,
"
\n
"
);
T
*
ptr
=
mPtr
;
const
T
*
other_ptr
=
tensor
.
data
();
for
(
size_t
i
=
0
;
i
<
size
();
++
i
)
*
(
ptr
++
)
=
*
(
other_ptr
++
);
return
*
this
;
template
<
class
...
Inds
>
TV_HOST_DEVICE_INLINE
const
T
&
operator
()(
Inds
...
inds
)
const
{
static_assert
(
sizeof
...(
inds
)
==
Rank
,
"error"
);
return
ptr_
[
ArrayIndexStride
<
Rank
,
Rank
>::
run
(
stride_ptr_
,
0
,
inds
...)];
}
template
<
typename
T1
>
TV_HOST_DEVICE_INLINE
TensorView
<
T
,
Rank
>
&
assign
(
std
::
initializer_list
<
T1
>
seq
)
{
TV_REQUIRE
(
seq
.
size
()
==
size
(),
"you must provide same input size%s"
,
"
\n
"
);
T
*
ptr
=
mPtr
;
for
(
const
T1
&
s
:
seq
)
*
(
ptr
++
)
=
T
(
s
);
return
*
this
;
protected:
const
Tindex
*
stride_ptr_
;
ptr_t
ptr_
;
};
}
// namespace detail
template
<
typename
T
,
int
Rank
,
template
<
class
>
class
PtrTraits
=
DefaultPtrTraits
,
typename
Tindex
=
int
>
struct
TensorAccesser
:
public
detail
::
TensorAccesserBase
<
T
,
Rank
,
PtrTraits
,
Tindex
>
{
using
ptr_t
=
typename
PtrTraits
<
T
>::
type
;
static_assert
(
Rank
>
0
,
"error"
);
explicit
TV_HOST_DEVICE_INLINE
TensorAccesser
(
ptr_t
ptr
,
const
Tindex
*
stride_ptr
)
:
detail
::
TensorAccesserBase
<
T
,
Rank
,
PtrTraits
,
Tindex
>
(
ptr
,
stride_ptr
)
{}
TV_HOST_DEVICE_INLINE
TensorAccesser
<
T
,
Rank
-
1
,
PtrTraits
,
Tindex
>
operator
[](
int
i
)
{
return
TensorAccesser
<
T
,
Rank
-
1
,
PtrTraits
,
Tindex
>
(
this
->
ptr_
+
this
->
stride_ptr_
[
0
]
*
i
,
this
->
stride_ptr_
+
1
);
}
TV_HOST_DEVICE_INLINE
TensorAccesser
<
T
,
Rank
-
1
,
PtrTraits
,
Tindex
>
operator
[](
int
i
)
const
{
return
TensorAccesser
<
T
,
Rank
-
1
,
PtrTraits
,
Tindex
>
(
this
->
ptr_
+
this
->
stride_ptr_
[
0
]
*
i
,
this
->
stride_ptr_
+
1
);
}
};
template
<
typename
T
,
template
<
class
>
class
PtrTraits
,
typename
Tindex
>
struct
TensorAccesser
<
T
,
1
,
PtrTraits
,
Tindex
>
:
public
detail
::
TensorAccesserBase
<
T
,
1
,
PtrTraits
,
Tindex
>
{
using
ptr_t
=
typename
PtrTraits
<
T
>::
type
;
explicit
TV_HOST_DEVICE_INLINE
TensorAccesser
(
ptr_t
ptr
,
const
Tindex
*
stride_ptr
)
:
detail
::
TensorAccesserBase
<
T
,
1
,
PtrTraits
,
Tindex
>
(
ptr
,
stride_ptr
)
{}
TV_HOST_DEVICE_INLINE
T
&
operator
[](
int
i
)
{
return
this
->
ptr_
[
this
->
stride_ptr_
[
0
]
*
i
];
}
TV_HOST_DEVICE_INLINE
T
&
operator
[](
int
i
)
const
{
return
this
->
ptr_
[
this
->
stride_ptr_
[
0
]
*
i
];
}
};
template
<
typename
T
,
int
Rank
=
-
1
,
template
<
class
>
class
PtrTraits
=
DefaultPtrTraits
,
typename
Tindex
=
int
>
struct
TensorView
{
static
constexpr
int
rank_value
=
Rank
;
using
ptr_t
=
typename
PtrTraits
<
T
>::
type
;
using
tv_shape_t
=
ShapeBase
<
Rank
==
-
1
?
TV_MAX_DIM
:
Rank
,
Tindex
>
;
using
no_cv_type
=
typename
std
::
remove_cv
<
T
>::
type
;
static_assert
(
Rank
==
-
1
||
Rank
>
0
,
"error"
);
TV_HOST_DEVICE_INLINE
TensorView
()
{}
explicit
TV_HOST_DEVICE_INLINE
TensorView
(
ptr_t
ptr
,
tv_shape_t
shape
)
:
ptr_
(
ptr
),
shape_
(
shape
),
stride_
(
shape
.
stride_rowmajor
())
{}
explicit
TV_HOST_DEVICE_INLINE
TensorView
(
ptr_t
ptr
,
tv_shape_t
shape
,
tv_shape_t
stride
)
:
ptr_
(
ptr
),
shape_
(
shape
),
stride_
(
stride
)
{}
operator
TensorView
<
const
no_cv_type
,
Rank
,
PtrTraits
,
Tindex
>
()
{
return
TensorView
<
const
no_cv_type
,
Rank
,
PtrTraits
,
Tindex
>
(
ptr_
,
shape_
);
}
// conversion function
template
<
class
...
Inds
>
TV_HOST_DEVICE_INLINE
T
&
operator
()(
Inds
...
inds
)
{
#ifdef TV_DEBUG
static_assert
(
Rank
==
-
1
||
sizeof
...(
inds
)
==
Rank
,
"error"
);
#if defined TV_DEBUG
int
idxes
[
sizeof
...(
Inds
)]{
int
(
inds
)...};
TV_REQUIRE
(
sizeof
...(
inds
)
==
mS
hape
.
ndim
(),
TV_REQUIRE
(
sizeof
...(
inds
)
==
s
hape
_
.
ndim
(),
"you provide %d indexes, but dim is %d
\n
"
,
sizeof
...(
inds
),
mS
hape
.
ndim
());
s
hape
_
.
ndim
());
for
(
int
i
=
0
;
i
<
sizeof
...(
inds
);
++
i
)
{
TV_REQUIRE
(
idxes
[
i
]
>=
0
&&
idxes
[
i
]
<
mS
hape
[
i
],
TV_REQUIRE
(
idxes
[
i
]
>=
0
&&
idxes
[
i
]
<
s
hape
_
[
i
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
i
,
idxes
[
i
],
mS
hape
[
i
]);
s
hape
_
[
i
]);
}
#endif
return
mPtr
[
rowArrayIdx
(
mShape
,
int
(
inds
)...)];
constexpr
int
Ndim
=
sizeof
...(
Inds
);
return
ptr_
[
ArrayIndexRowMajor
<
Ndim
,
Ndim
>::
runShape
(
shape_
,
0
,
inds
...)];
}
template
<
class
...
Inds
>
TV_HOST_DEVICE_INLINE
const
T
&
operator
()(
Inds
...
inds
)
const
{
#ifdef TV_DEBUG
static_assert
(
Rank
==
-
1
||
sizeof
...(
inds
)
==
Rank
,
"error"
);
#if defined TV_DEBUG
int
idxes
[
sizeof
...(
Inds
)]{
int
(
inds
)...};
TV_REQUIRE
(
sizeof
...(
inds
)
==
mS
hape
.
ndim
(),
TV_REQUIRE
(
sizeof
...(
inds
)
==
s
hape
_
.
ndim
(),
"you provide %d indexes, but dim is %d
\n
"
,
sizeof
...(
inds
),
mS
hape
.
ndim
());
s
hape
_
.
ndim
());
for
(
int
i
=
0
;
i
<
sizeof
...(
inds
);
++
i
)
{
TV_REQUIRE
(
idxes
[
i
]
>=
0
&&
idxes
[
i
]
<
mS
hape
[
i
],
TV_REQUIRE
(
idxes
[
i
]
>=
0
&&
idxes
[
i
]
<
s
hape
_
[
i
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
i
,
idxes
[
i
],
mS
hape
[
i
]);
s
hape
_
[
i
]);
}
#endif
return
mPtr
[
rowArrayIdx
(
mShape
,
int
(
inds
)...)];
constexpr
int
Ndim
=
sizeof
...(
Inds
);
return
ptr_
[
ArrayIndexRowMajor
<
Ndim
,
Ndim
>::
runShape
(
shape_
,
0
,
inds
...)];
}
TV_HOST_DEVICE_INLINE
T
&
operator
()()
{
static_assert
(
Rank
==
-
1
||
0
==
Rank
,
"error"
);
#if defined TV_DEBUG
#if defined(__CUDA_ARCH__)
TV_DEVICE_REQUIRE
(
mPtr
!=
nullptr
,
"you want get value but the view is empty.%s"
,
"
\n
"
);
TV_DEVICE_REQUIRE
(
mShape
.
ndim
()
==
0
,
"you provide 0 indexes, but dim is %ld
\n
"
,
mShape
.
ndim
());
#else
TV_REQUIRE
(
mPtr
!=
nullptr
,
"you want get value but the view is empty.%s"
,
TV_REQUIRE
(
ptr_
!=
nullptr
,
"you want get value but the view is empty.%s"
,
"
\n
"
);
TV_REQUIRE
(
mS
hape
.
ndim
()
==
0
,
"you provide 0 indexes, but dim is %ld
\n
"
,
mS
hape
.
ndim
());
TV_REQUIRE
(
s
hape
_
.
ndim
()
==
0
,
"you provide 0 indexes, but dim is %ld
\n
"
,
s
hape
_
.
ndim
());
#endif
#endif
return
mPtr
[
0
];
return
ptr_
[
0
];
}
TV_HOST_DEVICE_INLINE
const
T
&
operator
()()
const
{
static_assert
(
Rank
==
-
1
||
0
==
Rank
,
"error"
);
#if defined TV_DEBUG
#if defined(__CUDA_ARCH__)
TV_DEVICE_REQUIRE
(
mPtr
!=
nullptr
,
"you want get value but the view is empty.%s"
,
"
\n
"
);
TV_DEVICE_REQUIRE
(
mShape
.
ndim
()
==
0
,
"you provide 0 indexes, but dim is %ld
\n
"
,
mShape
.
ndim
());
#else
TV_REQUIRE
(
mPtr
!=
nullptr
,
"you want get value but the view is empty.%s"
,
TV_REQUIRE
(
ptr_
!=
nullptr
,
"you want get value but the view is empty.%s"
,
"
\n
"
);
TV_REQUIRE
(
mS
hape
.
ndim
()
==
0
,
"you provide 0 indexes, but dim is %ld
\n
"
,
mS
hape
.
ndim
());
TV_REQUIRE
(
s
hape
_
.
ndim
()
==
0
,
"you provide 0 indexes, but dim is %ld
\n
"
,
s
hape
_
.
ndim
());
#endif
#endif
return
mPtr
[
0
];
return
ptr_
[
0
];
}
template
<
class
T1
>
TV_HOST_DEVICE_INLINE
T
&
operator
()(
T1
i1
)
{
static_assert
(
Rank
==
-
1
||
1
==
Rank
,
"error"
);
#if defined TV_DEBUG
#if defined(__CUDA_ARCH__)
TV_DEVICE_REQUIRE
(
mShape
.
ndim
()
==
1
,
"you provide 1 indexes, but dim is %ld
\n
"
,
mShape
.
ndim
());
TV_DEVICE_REQUIRE
(
i1
>=
0
&&
i1
<
mShape
[
0
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
0
,
i1
,
mShape
[
0
]);
#else
TV_REQUIRE
(
mShape
.
ndim
()
==
1
,
"you provide 1 indexes, but dim is %ld
\n
"
,
mShape
.
ndim
());
TV_REQUIRE
(
i1
>=
0
&&
i1
<
mShape
[
0
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
0
,
i1
,
mShape
[
0
]);
#endif
TV_REQUIRE
(
shape_
.
ndim
()
==
1
,
"you provide 1 indexes, but dim is %ld
\n
"
,
shape_
.
ndim
());
TV_REQUIRE
(
i1
>=
0
&&
i1
<
shape_
[
0
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
0
,
i1
,
shape_
[
0
]);
#endif
return
mP
tr
[
i1
];
return
p
tr
_
[
i1
];
}
template
<
class
T1
,
class
T2
>
TV_HOST_DEVICE_INLINE
T
&
operator
()(
T1
i1
,
T2
i2
)
{
#ifdef TV_DEBUG
#if defined(__CUDA_ARCH__)
TV_DEVICE_REQUIRE
(
mShape
.
ndim
()
==
2
,
"you provide 2 indexes, but dim is %ld
\n
"
,
mShape
.
ndim
());
TV_DEVICE_REQUIRE
(
i1
>=
0
&&
i1
<
mShape
[
0
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
0
,
int
(
i1
),
mShape
[
0
]);
TV_DEVICE_REQUIRE
(
i2
>=
0
&&
i2
<
mShape
[
1
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
1
,
int
(
i2
),
mShape
[
1
]);
#else
TV_REQUIRE
(
mShape
.
ndim
()
==
2
,
"you provide 2 indexes, but dim is %ld
\n
"
,
mShape
.
ndim
());
TV_REQUIRE
(
i1
>=
0
&&
i1
<
mShape
[
0
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
0
,
int
(
i1
),
mShape
[
0
]);
TV_REQUIRE
(
i2
>=
0
&&
i2
<
mShape
[
1
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
1
,
int
(
i2
),
mShape
[
1
]);
#endif
static_assert
(
Rank
==
-
1
||
2
==
Rank
,
"error"
);
#if defined TV_DEBUG
TV_REQUIRE
(
shape_
.
ndim
()
==
2
,
"you provide 2 indexes, but dim is %ld
\n
"
,
shape_
.
ndim
());
TV_REQUIRE
(
i1
>=
0
&&
i1
<
shape_
[
0
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
0
,
int
(
i1
),
shape_
[
0
]);
TV_REQUIRE
(
i2
>=
0
&&
i2
<
shape_
[
1
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
1
,
int
(
i2
),
shape_
[
1
]);
#endif
return
mP
tr
[
i1
*
mS
hape
[
1
]
+
i2
];
return
p
tr
_
[
i1
*
s
hape
_
[
1
]
+
i2
];
}
template
<
class
T1
,
class
T2
,
class
T3
>
TV_HOST_DEVICE_INLINE
T
&
operator
()(
T1
i1
,
T2
i2
,
T3
i3
)
{
#ifdef TV_DEBUG
#if defined(__CUDA_ARCH__)
TV_DEVICE_REQUIRE
(
mShape
.
ndim
()
==
3
,
"you provide 3 indexes, but dim is %ld
\n
"
,
mShape
.
ndim
());
TV_DEVICE_REQUIRE
(
i1
>=
0
&&
i1
<
mShape
[
0
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
0
,
int
(
i1
),
mShape
[
0
]);
TV_DEVICE_REQUIRE
(
i2
>=
0
&&
i2
<
mShape
[
1
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
1
,
int
(
i2
),
mShape
[
1
]);
TV_DEVICE_REQUIRE
(
i3
>=
0
&&
i3
<
mShape
[
2
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
2
,
int
(
i3
),
mShape
[
2
]);
#else
TV_REQUIRE
(
mShape
.
ndim
()
==
3
,
"you provide 3 indexes, but dim is %ld
\n
"
,
mShape
.
ndim
());
TV_REQUIRE
(
i1
>=
0
&&
i1
<
mShape
[
0
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
0
,
int
(
i1
),
mShape
[
0
]);
TV_REQUIRE
(
i2
>=
0
&&
i2
<
mShape
[
1
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
1
,
int
(
i2
),
mShape
[
1
]);
TV_REQUIRE
(
i3
>=
0
&&
i3
<
mShape
[
2
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
2
,
int
(
i3
),
mShape
[
2
]);
#endif
static_assert
(
Rank
==
-
1
||
3
==
Rank
,
"error"
);
#if defined TV_DEBUG
TV_REQUIRE
(
shape_
.
ndim
()
==
3
,
"you provide 3 indexes, but dim is %ld
\n
"
,
shape_
.
ndim
());
TV_REQUIRE
(
i1
>=
0
&&
i1
<
shape_
[
0
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
0
,
int
(
i1
),
shape_
[
0
]);
TV_REQUIRE
(
i2
>=
0
&&
i2
<
shape_
[
1
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
1
,
int
(
i2
),
shape_
[
1
]);
TV_REQUIRE
(
i3
>=
0
&&
i3
<
shape_
[
2
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
2
,
int
(
i3
),
shape_
[
2
]);
#endif
return
mP
tr
[(
i1
*
mS
hape
[
1
]
+
i2
)
*
mS
hape
[
2
]
+
i3
];
return
p
tr
_
[(
i1
*
s
hape
_
[
1
]
+
i2
)
*
s
hape
_
[
2
]
+
i3
];
}
template
<
class
T1
,
class
T2
,
class
T3
,
class
T4
>
TV_HOST_DEVICE_INLINE
T
&
operator
()(
T1
i1
,
T2
i2
,
T3
i3
,
T4
i4
)
{
#ifdef TV_DEBUG
#if defined(__CUDA_ARCH__)
TV_DEVICE_REQUIRE
(
mShape
.
ndim
()
==
4
,
"you provide 4 indexes, but dim is %ld
\n
"
,
mShape
.
ndim
());
TV_DEVICE_REQUIRE
(
i1
>=
0
&&
i1
<
mShape
[
0
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
0
,
int
(
i1
),
mShape
[
0
]);
TV_DEVICE_REQUIRE
(
i2
>=
0
&&
i2
<
mShape
[
1
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
1
,
int
(
i2
),
mShape
[
1
]);
TV_DEVICE_REQUIRE
(
i3
>=
0
&&
i3
<
mShape
[
2
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
2
,
int
(
i3
),
mShape
[
2
]);
TV_DEVICE_REQUIRE
(
i4
>=
0
&&
i4
<
mShape
[
3
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
3
,
int
(
i4
),
mShape
[
3
]);
#else
TV_REQUIRE
(
mShape
.
ndim
()
==
4
,
"you provide 4 indexes, but dim is %ld
\n
"
,
mShape
.
ndim
());
TV_REQUIRE
(
i1
>=
0
&&
i1
<
mShape
[
0
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
0
,
int
(
i1
),
mShape
[
0
]);
TV_REQUIRE
(
i2
>=
0
&&
i2
<
mShape
[
1
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
1
,
int
(
i2
),
mShape
[
1
]);
TV_REQUIRE
(
i3
>=
0
&&
i3
<
mShape
[
2
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
2
,
int
(
i3
),
mShape
[
2
]);
TV_REQUIRE
(
i4
>=
0
&&
i4
<
mShape
[
3
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
3
,
int
(
i4
),
mShape
[
3
]);
#endif
static_assert
(
Rank
==
-
1
||
4
==
Rank
,
"error"
);
#if defined TV_DEBUG
TV_REQUIRE
(
shape_
.
ndim
()
==
4
,
"you provide 4 indexes, but dim is %ld
\n
"
,
shape_
.
ndim
());
TV_REQUIRE
(
i1
>=
0
&&
i1
<
shape_
[
0
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
0
,
int
(
i1
),
shape_
[
0
]);
TV_REQUIRE
(
i2
>=
0
&&
i2
<
shape_
[
1
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
1
,
int
(
i2
),
shape_
[
1
]);
TV_REQUIRE
(
i3
>=
0
&&
i3
<
shape_
[
2
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
2
,
int
(
i3
),
shape_
[
2
]);
TV_REQUIRE
(
i4
>=
0
&&
i4
<
shape_
[
3
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
3
,
int
(
i4
),
shape_
[
3
]);
#endif
return
mP
tr
[((
i1
*
mS
hape
[
1
]
+
i2
)
*
mS
hape
[
2
]
+
i3
)
*
mS
hape
[
3
]
+
i4
];
return
p
tr
_
[((
i1
*
s
hape
_
[
1
]
+
i2
)
*
s
hape
_
[
2
]
+
i3
)
*
s
hape
_
[
3
]
+
i4
];
}
template
<
class
T1
>
TV_HOST_DEVICE_INLINE
const
T
&
operator
()(
T1
i1
)
const
{
#ifdef TV_DEBUG
#if defined(__CUDA_ARCH__)
TV_DEVICE_REQUIRE
(
mShape
.
ndim
()
==
1
,
"you provide 1 indexes, but dim is %ld
\n
"
,
mShape
.
ndim
());
TV_DEVICE_REQUIRE
(
i1
>=
0
&&
i1
<
mShape
[
0
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
0
,
int
(
i1
),
mShape
[
0
]);
#else
TV_REQUIRE
(
mShape
.
ndim
()
==
1
,
"you provide 1 indexes, but dim is %ld
\n
"
,
mShape
.
ndim
());
TV_REQUIRE
(
i1
>=
0
&&
i1
<
mShape
[
0
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
0
,
int
(
i1
),
mShape
[
0
]);
#endif
static_assert
(
Rank
==
-
1
||
1
==
Rank
,
"error"
);
#if defined TV_DEBUG
TV_REQUIRE
(
shape_
.
ndim
()
==
1
,
"you provide 1 indexes, but dim is %ld
\n
"
,
shape_
.
ndim
());
TV_REQUIRE
(
i1
>=
0
&&
i1
<
shape_
[
0
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
0
,
int
(
i1
),
shape_
[
0
]);
#endif
return
mP
tr
[
i1
];
return
p
tr
_
[
i1
];
}
template
<
class
T1
,
class
T2
>
TV_HOST_DEVICE_INLINE
const
T
&
operator
()(
T1
i1
,
T2
i2
)
const
{
#ifdef TV_DEBUG
#if defined(__CUDA_ARCH__)
TV_DEVICE_REQUIRE
(
mShape
.
ndim
()
==
2
,
"you provide 2 indexes, but dim is %ld
\n
"
,
mShape
.
ndim
());
TV_DEVICE_REQUIRE
(
i1
>=
0
&&
i1
<
mShape
[
0
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
0
,
int
(
i1
),
mShape
[
0
]);
TV_DEVICE_REQUIRE
(
i2
>=
0
&&
i2
<
mShape
[
1
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
1
,
int
(
i2
),
mShape
[
1
]);
#else
TV_REQUIRE
(
mShape
.
ndim
()
==
2
,
"you provide 2 indexes, but dim is %ld
\n
"
,
mShape
.
ndim
());
TV_REQUIRE
(
i1
>=
0
&&
i1
<
mShape
[
0
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
0
,
int
(
i1
),
mShape
[
0
]);
TV_REQUIRE
(
i2
>=
0
&&
i2
<
mShape
[
1
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
1
,
int
(
i2
),
mShape
[
1
]);
#endif
static_assert
(
Rank
==
-
1
||
2
==
Rank
,
"error"
);
#if defined TV_DEBUG
TV_REQUIRE
(
shape_
.
ndim
()
==
2
,
"you provide 2 indexes, but dim is %ld
\n
"
,
shape_
.
ndim
());
TV_REQUIRE
(
i1
>=
0
&&
i1
<
shape_
[
0
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
0
,
int
(
i1
),
shape_
[
0
]);
TV_REQUIRE
(
i2
>=
0
&&
i2
<
shape_
[
1
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
1
,
int
(
i2
),
shape_
[
1
]);
#endif
return
mP
tr
[
i1
*
mS
hape
[
1
]
+
i2
];
return
p
tr
_
[
i1
*
s
hape
_
[
1
]
+
i2
];
}
template
<
class
T1
,
class
T2
,
class
T3
>
TV_HOST_DEVICE_INLINE
const
T
&
operator
()(
T1
i1
,
T2
i2
,
T3
i3
)
const
{
#ifdef TV_DEBUG
#if defined(__CUDA_ARCH__)
TV_DEVICE_REQUIRE
(
mShape
.
ndim
()
==
3
,
"you provide 3 indexes, but dim is %ld
\n
"
,
mShape
.
ndim
());
TV_DEVICE_REQUIRE
(
i1
>=
0
&&
i1
<
mShape
[
0
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
0
,
int
(
i1
),
mShape
[
0
]);
TV_DEVICE_REQUIRE
(
i2
>=
0
&&
i2
<
mShape
[
1
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
1
,
int
(
i2
),
mShape
[
1
]);
TV_DEVICE_REQUIRE
(
i3
>=
0
&&
i3
<
mShape
[
2
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
2
,
int
(
i3
),
mShape
[
2
]);
#else
TV_REQUIRE
(
mShape
.
ndim
()
==
3
,
"you provide 3 indexes, but dim is %ld
\n
"
,
mShape
.
ndim
());
TV_REQUIRE
(
i1
>=
0
&&
i1
<
mShape
[
0
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
0
,
int
(
i1
),
mShape
[
0
]);
TV_REQUIRE
(
i2
>=
0
&&
i2
<
mShape
[
1
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
1
,
int
(
i2
),
mShape
[
1
]);
TV_REQUIRE
(
i3
>=
0
&&
i3
<
mShape
[
2
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
2
,
int
(
i3
),
mShape
[
2
]);
#endif
static_assert
(
Rank
==
-
1
||
3
==
Rank
,
"error"
);
#if defined TV_DEBUG
TV_REQUIRE
(
shape_
.
ndim
()
==
3
,
"you provide 3 indexes, but dim is %ld
\n
"
,
shape_
.
ndim
());
TV_REQUIRE
(
i1
>=
0
&&
i1
<
shape_
[
0
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
0
,
int
(
i1
),
shape_
[
0
]);
TV_REQUIRE
(
i2
>=
0
&&
i2
<
shape_
[
1
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
1
,
int
(
i2
),
shape_
[
1
]);
TV_REQUIRE
(
i3
>=
0
&&
i3
<
shape_
[
2
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
2
,
int
(
i3
),
shape_
[
2
]);
#endif
return
mP
tr
[(
i1
*
mS
hape
[
1
]
+
i2
)
*
mS
hape
[
2
]
+
i3
];
return
p
tr
_
[(
i1
*
s
hape
_
[
1
]
+
i2
)
*
s
hape
_
[
2
]
+
i3
];
}
template
<
class
T1
,
class
T2
,
class
T3
,
class
T4
>
TV_HOST_DEVICE_INLINE
const
T
&
operator
()(
T1
i1
,
T2
i2
,
T3
i3
,
T4
i4
)
const
{
#ifdef TV_DEBUG
#if defined(__CUDA_ARCH__)
TV_DEVICE_REQUIRE
(
mShape
.
ndim
()
==
4
,
"you provide 4 indexes, but dim is %ld
\n
"
,
mShape
.
ndim
());
TV_DEVICE_REQUIRE
(
i1
>=
0
&&
i1
<
mShape
[
0
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
0
,
int
(
i1
),
mShape
[
0
]);
TV_DEVICE_REQUIRE
(
i2
>=
0
&&
i2
<
mShape
[
1
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
1
,
int
(
i2
),
mShape
[
1
]);
TV_DEVICE_REQUIRE
(
i3
>=
0
&&
i3
<
mShape
[
2
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
2
,
int
(
i3
),
mShape
[
2
]);
TV_DEVICE_REQUIRE
(
i4
>=
0
&&
i4
<
mShape
[
3
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
3
,
int
(
i4
),
mShape
[
3
]);
#else
TV_REQUIRE
(
mShape
.
ndim
()
==
4
,
"you provide 4 indexes, but dim is %ld
\n
"
,
mShape
.
ndim
());
TV_REQUIRE
(
i1
>=
0
&&
i1
<
mShape
[
0
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
0
,
int
(
i1
),
mShape
[
0
]);
TV_REQUIRE
(
i2
>=
0
&&
i2
<
mShape
[
1
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
1
,
int
(
i2
),
mShape
[
1
]);
TV_REQUIRE
(
i3
>=
0
&&
i3
<
mShape
[
2
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
2
,
int
(
i3
),
mShape
[
2
]);
TV_REQUIRE
(
i4
>=
0
&&
i4
<
mShape
[
3
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
3
,
int
(
i4
),
mShape
[
3
]);
#endif
static_assert
(
Rank
==
-
1
||
4
==
Rank
,
"error"
);
#if defined TV_DEBUG
TV_REQUIRE
(
shape_
.
ndim
()
==
4
,
"you provide 4 indexes, but dim is %ld
\n
"
,
shape_
.
ndim
());
TV_REQUIRE
(
i1
>=
0
&&
i1
<
shape_
[
0
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
0
,
int
(
i1
),
shape_
[
0
]);
TV_REQUIRE
(
i2
>=
0
&&
i2
<
shape_
[
1
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
1
,
int
(
i2
),
shape_
[
1
]);
TV_REQUIRE
(
i3
>=
0
&&
i3
<
shape_
[
2
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
2
,
int
(
i3
),
shape_
[
2
]);
TV_REQUIRE
(
i4
>=
0
&&
i4
<
shape_
[
3
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
3
,
int
(
i4
),
shape_
[
3
]);
#endif
return
mP
tr
[((
i1
*
mS
hape
[
1
]
+
i2
)
*
mS
hape
[
2
]
+
i3
)
*
mS
hape
[
3
]
+
i4
];
return
p
tr
_
[((
i1
*
s
hape
_
[
1
]
+
i2
)
*
s
hape
_
[
2
]
+
i3
)
*
s
hape
_
[
3
]
+
i4
];
}
TV_HOST_DEVICE_INLINE
T
&
operator
[](
int
idx
)
{
#ifdef TV_DEBUG
#if defined(__CUDA_ARCH__)
TV_DEVICE_REQUIRE
(
idx
>=
0
&&
idx
<
size
(),
"index(%d) out-of-range: [0, %ld)
\n
"
,
int
(
idx
),
size
());
#else
TV_REQUIRE
(
idx
>=
0
&&
idx
<
size
(),
"index(%d) out-of-range: [0, %ld)
\n
"
,
int
(
idx
),
size
());
#endif
#endif
return
mPtr
[
idx
];
return
ptr_
[
idx
];
}
TV_HOST_DEVICE_INLINE
const
T
&
operator
[](
int
idx
)
const
{
#ifdef TV_DEBUG
#if defined(__CUDA_ARCH__)
TV_DEVICE_REQUIRE
(
idx
>=
0
&&
idx
<
size
(),
"index(%d) out-of-range: [0, %ld)
\n
"
,
int
(
idx
),
size
());
#else
TV_REQUIRE
(
idx
>=
0
&&
idx
<
size
(),
"index(%d) out-of-range: [0, %ld)
\n
"
,
int
(
idx
),
size
());
#endif
#endif
return
mPtr
[
idx
];
return
ptr_
[
idx
];
}
// TODO: this is conflcit with operator[](SimpleVector<Slice> slice_vec).
/*TV_HOST_DEVICE_INLINE T &operator[](const Shape index) {
int idx = rowArrayIdx(mShape, index);
#ifdef TV_DEBUG
TV_REQUIRE(idx >= 0 && idx < size(), "index(%d) out-of-range: [0, %ld)\n",
int(idx), size());
#endif
return mPtr[idx];
TV_HOST_DEVICE_INLINE
TensorAccesser
<
T
,
Rank
-
1
,
PtrTraits
,
Tindex
>
accessor
(
Tindex
idx
)
{
static_assert
(
Rank
>
1
,
"for Rank == 1, use accessor() or just use []"
);
return
TensorAccesser
<
T
,
Rank
-
1
,
PtrTraits
,
Tindex
>
(
ptr_
+
stride_
[
0
]
*
idx
,
stride_
.
data
()
+
1
);
}
TV_HOST_DEVICE_INLINE const T &operator[](const Shape index) const {
int idx = rowArrayIdx(mShape, index);
#ifdef TV_DEBUG
TV_REQUIRE(idx >= 0 && idx < size(), "index(%d) out-of-range: [0, %ld)\n",
int(idx), size());
#endif
return mPtr[idx];
}*/
TV_HOST_DEVICE_INLINE
TensorView
<
T
,
Rank
>
operator
[](
SimpleVector
<
Slice
>
slice_vec
)
{
return
_subview
(
slice_vec
);
}
TV_HOST_DEVICE_INLINE
const
TensorView
<
T
,
Rank
>
operator
[](
SimpleVector
<
Slice
>
slice_vec
)
const
{
return
_subview
(
slice_vec
);
}
TV_HOST_DEVICE_INLINE
bool
empty
()
const
{
return
mPtr
==
nullptr
;
}
TV_HOST_DEVICE_INLINE
T
*
data
()
{
return
mPtr
;
}
TV_HOST_DEVICE_INLINE
const
T
*
data
()
const
{
return
mPtr
;
}
TV_HOST_DEVICE_INLINE
const
Shape
&
shape
()
const
{
return
mShape
;
}
TV_HOST_DEVICE_INLINE
int
dim
(
int
idx
)
const
{
return
mShape
[
idx
];
}
TV_HOST_DEVICE_INLINE
int
ndim
()
const
{
return
mShape
.
ndim
();
}
template
<
class
...
Inds
>
TV_HOST_DEVICE_INLINE
TensorView
<
T
,
Rank
>
&
reshape
(
Inds
...
newShapes
)
{
Shape
shapes
{
int
(
newShapes
)...};
TV_ASSERT
(
shapes
.
size
()
==
size
());
mShape
=
shapes
;
return
*
this
;
TV_HOST_DEVICE_INLINE
TensorAccesser
<
T
,
Rank
,
PtrTraits
,
Tindex
>
accessor
()
{
static_assert
(
Rank
>
0
,
"rank must higher than zero"
);
return
TensorAccesser
<
T
,
Rank
,
PtrTraits
,
Tindex
>
(
ptr_
,
stride_
.
data
());
}
TV_HOST_DEVICE_INLINE
TensorView
<
T
,
Rank
>
&
reshape
(
Shape
shapes
)
{
TV_ASSERT
(
shapes
.
size
()
==
size
());
mShape
=
shapes
;
return
*
this
;
TV_HOST_DEVICE_INLINE
TensorAccesser
<
T
,
Rank
-
1
,
PtrTraits
,
Tindex
>
accessor
(
Tindex
idx
)
const
{
static_assert
(
Rank
>
1
,
"for Rank == 1, use accessor() or just use []"
);
return
TensorAccesser
<
T
,
Rank
-
1
,
PtrTraits
,
Tindex
>
(
ptr_
+
stride_
[
0
]
*
idx
,
stride_
.
data
()
+
1
);
}
TV_HOST_DEVICE_INLINE
TensorAccesser
<
T
,
Rank
,
PtrTraits
,
Tindex
>
accessor
()
const
{
static_assert
(
Rank
>
0
,
"error"
);
return
TensorAccesser
<
T
,
Rank
,
PtrTraits
,
Tindex
>
(
ptr_
,
stride_
.
data
(),
"rank must higher than zero"
);
}
TV_HOST_DEVICE_INLINE
bool
empty
()
const
{
return
ptr_
==
nullptr
;
}
TV_HOST_DEVICE_INLINE
ptr_t
data
()
{
return
ptr_
;
}
TV_HOST_DEVICE_INLINE
const
ptr_t
data
()
const
{
return
ptr_
;
}
TV_HOST_DEVICE_INLINE
const
tv_shape_t
&
shape
()
const
{
return
shape_
;
}
TV_HOST_DEVICE_INLINE
const
tv_shape_t
&
stride
()
const
{
return
stride_
;
}
TV_HOST_DEVICE_INLINE
int
dim
(
int
idx
)
const
{
return
shape_
[
idx
];
}
TV_HOST_DEVICE_INLINE
int
ndim
()
const
{
return
shape_
.
ndim
();
}
template
<
class
...
Inds
>
TV_HOST_DEVICE_INLINE
TensorView
<
T
,
Rank
>
view
(
Inds
...
newShapes
)
const
{
Shape
shapes
{
int
(
newShapes
)...};
for
(
size_t
i
=
0
;
i
<
shapes
.
ndim
();
++
i
)
{
TV_HOST_DEVICE_INLINE
TensorView
<
T
,
Rank
==
-
1
?
-
1
:
sizeof
...(
Inds
),
PtrTraits
,
Tindex
>
view
(
Inds
...
newShapes
)
const
{
ShapeBase
<
Rank
==
-
1
?
TV_MAX_DIM
:
sizeof
...(
Inds
),
Tindex
>
shapes
{
int
(
newShapes
)...};
for
(
size_t
i
=
0
;
i
<
sizeof
...(
newShapes
);
++
i
)
{
if
(
shapes
[
i
]
==
-
1
)
{
shapes
[
i
]
=
1
;
shapes
[
i
]
=
size
()
/
shapes
.
size
();
...
...
@@ -932,220 +1038,221 @@ template <typename T, int Rank = -1> struct TensorView {
}
}
TV_ASSERT
(
shapes
.
size
()
==
size
());
return
TensorView
<
T
,
Rank
>
(
mPtr
,
shapes
);
return
TensorView
<
T
,
Rank
==
-
1
?
-
1
:
sizeof
...(
Inds
),
PtrTraits
,
Tindex
>
(
ptr_
,
shapes
);
}
TV_HOST_DEVICE_INLINE
TensorView
<
T
,
Rank
>
view
(
Shape
shapes
)
const
{
TV_HOST_DEVICE_INLINE
TensorView
<
T
,
-
1
,
PtrTraits
,
Tindex
>
view
(
Shape
shapes
)
const
{
TV_ASSERT
(
shapes
.
size
()
==
size
());
return
TensorView
<
T
,
Rank
>
(
mPtr
,
shapes
);
}
TV_HOST_DEVICE_INLINE
TensorView
<
T
,
Rank
>
squeeze
()
const
{
return
TensorView
<
T
,
Rank
>
(
mPtr
,
mShape
.
squeeze
());
}
TV_HOST_DEVICE_INLINE
TensorView
<
T
,
Rank
>
squeeze
(
int
dim
)
const
{
return
TensorView
<
T
,
Rank
>
(
mPtr
,
mShape
.
squeeze
(
dim
));
}
TV_HOST_DEVICE_INLINE
size_t
size
()
const
{
return
mShape
.
size
();
}
template
<
class
...
Slices
>
TV_HOST_DEVICE_INLINE
TensorView
<
T
,
Rank
>
subview
(
Slice
slice
,
Slices
...
slices
)
const
{
return
subview
<
float
,
Slice
,
Slices
...
>
(
slice
,
slices
...);
}
template
<
class
T2
=
float
,
class
...
Slices
>
TV_HOST_DEVICE_INLINE
TensorView
<
T
,
Rank
>
subview
(
Slices
...
slices
)
const
{
Slice
slice_vec
[
sizeof
...(
Slices
)]
=
{
to_slice
(
slices
)...};
Shape
new_shape
{
to_slice
(
slices
)[
0
]...};
Shape
start
{
to_slice
(
slices
)[
0
]...};
TV_ASSERT
(
new_shape
.
ndim
()
<=
mShape
.
ndim
());
TV_ASSERT
(
new_shape
.
ndim
()
!=
0
);
size_t
idxsize
=
new_shape
.
ndim
();
for
(
size_t
i
=
idxsize
;
i
<
mShape
.
ndim
();
++
i
)
{
new_shape
.
push_back
(
0
);
return
TensorView
<
T
,
-
1
,
PtrTraits
,
Tindex
>
(
ptr_
,
shapes
);
}
TV_HOST_DEVICE_INLINE
TensorView
<
T
,
-
1
,
PtrTraits
,
Tindex
>
squeeze
()
const
{
return
TensorView
<
T
,
-
1
,
PtrTraits
,
Tindex
>
(
ptr_
,
shape_
.
squeeze
());
}
TV_HOST_DEVICE_INLINE
TensorView
<
T
,
Rank
==
-
1
?
-
1
:
Rank
-
1
,
PtrTraits
,
Tindex
>
squeeze
(
int
dim
)
const
{
return
TensorView
<
T
,
Rank
==
-
1
?
-
1
:
Rank
-
1
,
PtrTraits
,
Tindex
>
(
ptr_
,
shape_
.
squeeze
<
Rank
==
-
1
?
TV_MAX_DIM
:
Rank
-
1
>
(
dim
));
}
TV_HOST_DEVICE_INLINE
size_t
size
()
const
{
return
shape_
.
size
();
}
template
<
class
...
Integers
>
TV_HOST_DEVICE_INLINE
TensorView
<
T
,
-
1
,
PtrTraits
,
Tindex
>
subview
(
int
id
,
Integers
...
ints
)
{
tv_shape_t
start
=
{
id
,
ints
...};
for
(
int
i
=
1
+
sizeof
...(
ints
);
i
<
ndim
();
++
i
)
{
start
.
push_back
(
0
);
}
#pragma unroll
for
(
size_t
i
=
0
;
i
<
sizeof
...(
Slices
);
++
i
)
{
if
(
slice_vec
[
i
][
1
]
!=
-
1
)
{
new_shape
[
i
]
=
slice_vec
[
i
][
1
]
-
slice_vec
[
i
][
0
];
TV_ASSERT
(
new_shape
[
i
]
>=
0
);
}
else
{
new_shape
[
i
]
=
1
;
// reduce dim
}
}
auto
offset
=
rowArrayIdx
(
mShape
,
start
);
#pragma unroll
for
(
size_t
i
=
sizeof
...(
Slices
);
i
<
mShape
.
ndim
();
++
i
)
{
new_shape
[
i
]
=
mShape
[
i
];
TV_ASSERT
(
new_shape
[
i
]
>=
0
);
}
Shape
reduced_shape
;
#pragma unroll
for
(
size_t
i
=
0
;
i
<
sizeof
...(
Slices
);
++
i
)
{
if
(
slice_vec
[
i
][
1
]
!=
-
1
)
{
reduced_shape
.
push_back
(
new_shape
[
i
]);
}
}
#pragma unroll
for
(
size_t
i
=
sizeof
...(
Slices
);
i
<
mShape
.
ndim
();
++
i
)
{
reduced_shape
.
push_back
(
new_shape
[
i
]);
}
return
TensorView
<
T
,
Rank
>
(
mPtr
+
offset
,
reduced_shape
);
return
TensorView
<
T
,
Rank
,
PtrTraits
,
Tindex
>
(
ptr_
+
rowArrayIdx
(
shape_
,
start
),
shape_
.
subshape
(
sizeof
...(
ints
)
+
1
));
}
template
<
class
...
Integers
>
TV_HOST_DEVICE_INLINE
TensorView
<
T
,
Rank
>
subview
(
int
id
,
Integers
...
ints
)
{
Shape
start
=
{
id
,
ints
...};
TV_HOST_DEVICE_INLINE
TensorView
<
T
,
-
1
,
PtrTraits
,
Tindex
>
subview
(
int
id
,
Integers
...
ints
)
const
{
tv_shape_t
start
=
{
id
,
ints
...};
for
(
int
i
=
1
+
sizeof
...(
ints
);
i
<
ndim
();
++
i
)
{
start
.
push_back
(
0
);
}
return
TensorView
<
T
,
Rank
>
(
mPtr
+
rowArrayIdx
(
mShape
,
start
),
mShape
.
subshape
(
sizeof
...(
ints
)
+
1
));
return
TensorView
<
T
,
Rank
,
PtrTraits
,
Tindex
>
(
ptr_
+
rowArrayIdx
(
shape_
,
start
),
shape_
.
subshape
(
sizeof
...(
ints
)
+
1
));
}
TV_HOST_DEVICE_INLINE
TensorView
<
T
,
Rank
>
subview
_ints
(
SimpleVector
<
int
>
ids
)
const
{
TV_HOST_DEVICE_INLINE
TensorView
<
T
,
-
1
,
PtrTraits
,
Tindex
>
subview
(
SimpleVector
<
int
>
ids
)
const
{
Shape
start
=
ids
;
for
(
int
i
=
ids
.
size
();
i
<
ndim
();
++
i
)
{
start
.
push_back
(
0
);
}
return
TensorView
<
T
,
Rank
>
(
mPtr
+
rowArrayIdx
(
mShape
,
start
),
mShape
.
subshape
(
ids
.
size
()));
}
std
::
string
print_vec
(
TensorView
<
T
>
tensor
)
const
{
std
::
ostringstream
ss
;
ss
<<
"["
;
for
(
size_t
i
=
0
;
i
<
tensor
.
dim
(
0
)
-
1
;
++
i
)
{
ss
<<
tensor
(
i
)
<<
", "
;
}
ss
<<
tensor
(
tensor
.
dim
(
0
)
-
1
)
<<
"]"
;
return
ss
.
str
();
return
TensorView
<
T
,
Rank
,
PtrTraits
,
Tindex
>
(
ptr_
+
rowArrayIdx
(
shape_
,
start
),
shape_
.
subshape
(
ids
.
size
()));
}
std
::
string
repr
()
const
{
std
::
ostringstream
ss
;
template
<
typename
Os
>
std
::
string
repr
(
Os
&
ss
,
int
limit
=
1000
,
int
limit_axis
=
6
)
const
{
if
(
empty
())
return
""
;
if
(
mShape
.
ndim
()
==
0
)
{
ss
<<
*
mPtr
;
// ss << fmt::format("\nTensor: shape={}, dtype={}", mShape,
// detail::simpleTypeName<T>());
ss
<<
"Tensor: dtype="
<<
detail
::
simpleTypeName
<
T
>
();
if
(
shape_
.
ndim
()
==
0
)
{
ss
<<
"Tensor["
<<
type_s
<
T
>
<<
"]"
<<
std
::
endl
;
ss
<<
*
ptr_
;
return
ss
.
str
();
}
Shape
counter
=
mShape
;
auto
tensor_flat
=
this
->
view
(
-
1
);
bool
enable_limit
=
size
()
>
limit
;
for
(
int
i
=
0
;
i
<
counter
.
ndim
()
-
1
;
++
i
)
{
counter
[
i
]
=
0
;
// ss << "[";
SimpleVector
<
int64_t
,
TV_MAX_DIM
>
prev
(
ndim
(),
-
1
);
SimpleVector
<
int64_t
,
TV_MAX_DIM
>
nd_index
(
ndim
());
SimpleVector
<
int64_t
,
TV_MAX_DIM
>
_shape
;
for
(
auto
s
:
shape
())
{
_shape
.
push_back
(
s
);
}
for
(
size_t
i
=
0
;
i
<
this
->
size
()
/
this
->
dim
(
this
->
ndim
()
-
1
);
++
i
)
{
for
(
int
i
=
0
;
i
<
counter
.
ndim
()
-
1
;
++
i
)
{
if
(
counter
[
i
]
==
0
)
{
ss
<<
"["
;
ss
<<
"Tensor["
<<
type_s
<
T
>
<<
"]: shape="
<<
shape
()
<<
", stride="
<<
stride
()
<<
std
::
endl
;
auto
ndimValue
=
ndim
();
for
(
int64_t
i
=
0
;
i
<
size
();
++
i
)
{
rowArrayIdxInv
(
i
,
nd_index
.
data
(),
_shape
.
data
(),
ndimValue
);
bool
newline
=
false
;
int
end_count
=
0
;
for
(
int
j
=
0
;
j
<
ndimValue
;
++
j
)
{
if
(
nd_index
[
j
]
!=
prev
[
j
]
&&
nd_index
[
j
]
==
0
&&
prev
[
j
]
!=
0
&&
prev
[
j
]
!=
-
1
)
{
ss
<<
"]"
;
++
end_count
;
newline
=
true
;
}
}
std
::
cout
<<
"counter.ndim() "
<<
counter
.
ndim
()
<<
std
::
endl
;
auto
counter_
=
counter
.
subshape
(
0
,
counter
.
ndim
()
-
1
);
std
::
cout
<<
counter
.
subshape
(
0
,
counter
.
ndim
()
-
1
)
<<
std
::
endl
;
ss
<<
print_vec
(
this
->
subview_ints
(
counter_
))
<<
"
\n
"
;
std
::
cout
<<
"after counter.ndim() "
<<
counter
.
ndim
()
<<
std
::
endl
;
for
(
int
i
=
0
;
i
<
counter
.
ndim
()
-
1
;
++
i
)
{
if
(
counter
[
i
]
==
this
->
dim
(
i
)
-
1
)
{
ss
<<
"]"
;
if
(
prev
[
0
]
==
-
1
)
{
end_count
=
ndimValue
;
}
if
(
newline
)
{
ss
<<
"
\n
"
;
}
int
starts_count
=
0
;
for
(
int
j
=
0
;
j
<
ndimValue
;
++
j
)
{
if
(
nd_index
[
j
]
!=
prev
[
j
]
&&
nd_index
[
j
]
==
0
&&
prev
[
j
]
!=
0
)
{
++
starts_count
;
}
}
}
// ss << "]";
// ss << fmt::format("\nTensor: shape={}, dtype={}", mShape,
// detail::simpleTypeName<T>());
ss
<<
"Tensor: dtype="
<<
detail
::
simpleTypeName
<
T
>
();
return
ss
.
str
();
}
protected:
// TODO: make this function public.
// currently this function is called unexpectedly when using subview({0, 0}).
TV_HOST_DEVICE_INLINE
TensorView
<
T
,
Rank
>
_subview
(
SimpleVector
<
Slice
>
slice_vec
)
{
Shape
new_shape
;
for
(
int
i
=
0
;
i
<
slice_vec
.
size
();
++
i
)
{
new_shape
.
push_back
(
slice_vec
[
i
][
0
]);
}
Shape
start
=
new_shape
;
TV_ASSERT
(
new_shape
.
ndim
()
<=
mShape
.
ndim
());
TV_ASSERT
(
new_shape
.
ndim
()
!=
0
);
size_t
idxsize
=
new_shape
.
ndim
();
for
(
size_t
i
=
idxsize
;
i
<
mShape
.
ndim
();
++
i
)
{
new_shape
.
push_back
(
0
);
start
.
push_back
(
0
);
}
for
(
size_t
i
=
0
;
i
<
slice_vec
.
size
();
++
i
)
{
if
(
slice_vec
[
i
][
1
]
!=
-
1
)
{
new_shape
[
i
]
=
slice_vec
[
i
][
1
]
-
slice_vec
[
i
][
0
];
TV_ASSERT
(
new_shape
[
i
]
>=
0
);
if
(
starts_count
>
0
)
{
for
(
int
j
=
0
;
j
<
ndimValue
-
end_count
;
++
j
)
{
ss
<<
" "
;
}
for
(
int
j
=
0
;
j
<
starts_count
;
++
j
)
{
ss
<<
"["
;
}
}
if
(
std
::
is_same
<
T
,
uint8_t
>::
value
||
std
::
is_same
<
T
,
const
uint8_t
>::
value
)
{
ss
<<
unsigned
((
*
this
)[
i
]);
}
else
{
new_shape
[
i
]
=
1
;
// reduce dim
ss
<<
(
*
this
)[
i
];
}
}
auto
offset
=
rowArrayIdx
(
mShape
,
start
);
for
(
size_t
i
=
slice_vec
.
size
();
i
<
mShape
.
ndim
();
++
i
)
{
new_shape
[
i
]
=
mShape
[
i
];
TV_ASSERT
(
new_shape
[
i
]
>=
0
);
}
Shape
reduced_shape
;
for
(
size_t
i
=
0
;
i
<
slice_vec
.
size
();
++
i
)
{
if
(
slice_vec
[
i
][
1
]
!=
-
1
)
{
reduced_shape
.
push_back
(
new_shape
[
i
]);
if
(
nd_index
[
ndimValue
-
1
]
!=
_shape
[
ndimValue
-
1
]
-
1
)
{
ss
<<
","
;
}
for
(
int
j
=
0
;
j
<
ndimValue
;
++
j
)
{
prev
[
j
]
=
nd_index
[
j
];
}
}
for
(
size_
t
i
=
slice_vec
.
size
()
;
i
<
mShape
.
ndim
()
;
++
i
)
{
reduced_shape
.
push_back
(
new_shape
[
i
])
;
for
(
in
t
j
=
0
;
j
<
ndimValue
;
++
j
)
{
ss
<<
"]"
;
}
return
TensorView
<
T
,
Rank
>
(
mPtr
+
offset
,
reduced_shape
);
return
ss
.
str
();
}
std
::
string
repr
()
const
{
std
::
ostringstream
ss
;
return
repr
(
ss
);
}
protected:
template
<
typename
T1
>
TV_HOST_DEVICE_INLINE
Slice
to_slice
(
T1
s
)
const
{
return
Slice
{
int
(
s
),
-
1
,
-
1
};
}
TV_HOST_DEVICE_INLINE
Slice
to_slice
(
Slice
s
)
const
{
return
Slice
(
s
);
}
T
*
mPtr
=
nullptr
;
Shape
mShape
;
ptr_t
ptr_
=
nullptr
;
tv_shape_t
shape_
;
tv_shape_t
stride_
;
};
template
<
typename
Os
,
typename
T
,
int
Rank
>
Os
&
operator
<<
(
Os
&
os
,
const
TensorView
<
T
,
Rank
>
&
dt
)
{
template
<
typename
T
>
TensorView
<
T
>
vector2tv
(
std
::
vector
<
T
>
&
arr
)
{
return
TensorView
<
T
>
(
arr
.
data
(),
{
arr
.
size
()});
}
template
<
typename
T
>
TensorView
<
T
>
vector2tv
(
std
::
vector
<
T
>
&
arr
,
Shape
shape
)
{
TV_ASSERT_INVALID_ARG
(
shape
.
prod
()
==
arr
.
size
(),
"error"
);
return
TensorView
<
T
>
(
arr
.
data
(),
shape
);
}
template
<
typename
T
>
TensorView
<
const
T
>
vector2tv
(
const
std
::
vector
<
T
>
&
arr
)
{
return
TensorView
<
const
T
>
(
arr
.
data
(),
{
arr
.
size
()});
}
template
<
typename
Os
,
typename
T
,
int
Rank
,
template
<
class
>
class
PtrTraits
,
typename
Tindex
>
Os
&
operator
<<
(
Os
&
os
,
const
TensorView
<
T
,
Rank
,
PtrTraits
,
Tindex
>
&
dt
)
{
os
<<
dt
.
repr
();
return
os
;
}
template
<
typename
Os
,
typename
T
,
int
Rank
>
Os
&
operator
<<
(
Os
&
os
,
const
TensorView
<
const
T
,
Rank
>
&
dt
)
{
template
<
typename
Os
,
typename
T
,
int
Rank
,
template
<
class
>
class
PtrTraits
,
typename
Tindex
>
Os
&
operator
<<
(
Os
&
os
,
const
TensorView
<
const
T
,
Rank
,
PtrTraits
,
Tindex
>
&
dt
)
{
os
<<
dt
.
repr
();
return
os
;
}
namespace
detail
{
template
<
typename
T
>
constexpr
const
char
*
printfTypeFormat
(
T
val
=
T
());
template
<
>
constexpr
const
char
*
printfTypeFormat
(
float
val
)
{
return
"%.2f"
;
}
template
<
>
constexpr
const
char
*
printfTypeFormat
(
double
val
)
{
return
"%.2f"
;
}
template
<
>
constexpr
const
char
*
printfTypeFormat
(
int
val
)
{
return
"%d"
;
}
template
<
>
constexpr
const
char
*
printfTypeFormat
(
unsigned
val
)
{
return
"%u"
;
}
template
<
>
constexpr
const
char
*
printfTypeFormat
(
long
val
)
{
return
"%ld"
;
}
template
<
>
constexpr
const
char
*
printfTypeFormat
(
unsigned
long
val
)
{
return
"%lu"
;
}
};
// namespace detail
template
<
typename
T
>
struct
TypePrintfFormat
;
template
<
>
struct
TypePrintfFormat
<
float
>
{
static
constexpr
const
char
*
value
=
"%.2f"
;
};
template
<
>
struct
TypePrintfFormat
<
double
>
{
static
constexpr
const
char
*
value
=
"%.2f"
;
};
template
<
>
struct
TypePrintfFormat
<
int8_t
>
{
static
constexpr
const
char
*
value
=
"%d"
;
};
template
<
>
struct
TypePrintfFormat
<
int16_t
>
{
static
constexpr
const
char
*
value
=
"%d"
;
};
template
<
>
struct
TypePrintfFormat
<
int32_t
>
{
static
constexpr
const
char
*
value
=
"%d"
;
};
template
<
>
struct
TypePrintfFormat
<
uint8_t
>
{
static
constexpr
const
char
*
value
=
"%u"
;
};
template
<
>
struct
TypePrintfFormat
<
uint16_t
>
{
static
constexpr
const
char
*
value
=
"%u"
;
};
template
<
>
struct
TypePrintfFormat
<
uint32_t
>
{
static
constexpr
const
char
*
value
=
"%u"
;
};
template
<
>
struct
TypePrintfFormat
<
int64_t
>
{
static
constexpr
const
char
*
value
=
"%ld"
;
};
template
<
>
struct
TypePrintfFormat
<
uint64_t
>
{
static
constexpr
const
char
*
value
=
"%lu"
;
};
template
<
>
struct
TypePrintfFormat
<
bool
>
{
static
constexpr
const
char
*
value
=
"%d"
;
};
template
<
typename
T
>
TV_HOST_DEVICE
void
printTensorView
(
const
TensorView
<
T
>
tensor
,
const
char
*
format
)
{
constexpr
const
char
*
type_printf_format_v
=
TypePrintfFormat
<
T
>::
value
;
};
// namespace detail
template
<
typename
T
,
int
Rank
,
template
<
class
>
class
PtrTraits
,
typename
Tindex
>
TV_HOST_DEVICE
void
printTensorView
(
const
TensorView
<
T
,
Rank
,
PtrTraits
,
Tindex
>
&
tensor
,
const
char
*
format
)
{
// used to print tensor in cuda kernel.
if
(
tensor
.
empty
())
return
;
if
(
tensor
.
ndim
()
==
0
)
{
...
...
@@ -1153,51 +1260,69 @@ TV_HOST_DEVICE void printTensorView(const TensorView<T> tensor,
printf
(
"
\n
"
);
return
;
}
Shape
counter
=
tensor
.
shape
();
auto
tensor_flat
=
tensor
.
view
(
-
1
);
for
(
int
i
=
0
;
i
<
counter
.
ndim
();
++
i
)
{
counter
[
i
]
=
0
;
printf
(
"["
);
}
for
(
size_t
i
=
0
;
i
<
tensor
.
size
();
++
i
)
{
printf
(
format
,
tensor_flat
(
rowArrayIdx
(
tensor
.
shape
(),
counter
)));
counter
[
counter
.
ndim
()
-
1
]
+=
1
;
int
inc_count
=
0
;
bool
print_comma
=
true
;
for
(
int
c
=
counter
.
ndim
()
-
1
;
c
>=
0
;
--
c
)
{
if
(
counter
[
c
]
==
tensor
.
dim
(
c
)
&&
c
>
0
)
{
++
inc_count
;
counter
[
c
-
1
]
+=
1
;
counter
[
c
]
=
0
;
print_comma
=
false
;
SimpleVector
<
int64_t
,
TV_MAX_DIM
>
prev
(
tensor
.
ndim
(),
-
1
);
SimpleVector
<
int64_t
,
TV_MAX_DIM
>
nd_index
(
tensor
.
ndim
());
SimpleVector
<
int64_t
,
TV_MAX_DIM
>
shape
(
tensor
.
shape
());
auto
ndim
=
tensor
.
ndim
();
for
(
int64_t
i
=
0
;
i
<
tensor
.
size
();
++
i
)
{
rowArrayIdxInv
(
i
,
nd_index
.
data
(),
shape
.
data
(),
ndim
);
bool
newline
=
false
;
int
end_count
=
0
;
for
(
int
j
=
0
;
j
<
ndim
;
++
j
)
{
if
(
nd_index
[
j
]
!=
prev
[
j
]
&&
nd_index
[
j
]
==
0
&&
prev
[
j
]
!=
0
&&
prev
[
j
]
!=
-
1
)
{
printf
(
"]"
);
++
end_count
;
newline
=
true
;
}
}
if
(
print_comma
&&
i
!=
tensor
.
size
()
-
1
)
printf
(
", "
);
for
(
int
j
=
0
;
j
<
inc_count
;
++
j
)
{
printf
(
"]"
);
if
(
prev
[
0
]
==
-
1
)
{
end_count
=
ndim
;
}
if
(
newline
)
{
printf
(
"
\n
"
);
}
int
starts_count
=
0
;
for
(
int
j
=
0
;
j
<
ndim
;
++
j
)
{
if
(
nd_index
[
j
]
!=
prev
[
j
]
&&
nd_index
[
j
]
==
0
&&
prev
[
j
]
!=
0
)
{
++
starts_count
;
}
}
if
(
i
!=
tensor
.
size
()
-
1
)
{
if
(
inc_count
!=
0
)
printf
(
"
\n
"
);
for
(
int
j
=
0
;
j
<
inc_count
;
++
j
)
{
printf
(
"["
);
if
(
starts_count
>
0
)
{
for
(
int
j
=
0
;
j
<
ndim
-
end_count
;
++
j
)
{
printf
(
" "
);
}
for
(
int
j
=
0
;
j
<
starts_count
;
++
j
)
{
printf
(
"]"
);
}
}
printf
(
format
,
tensor
[
i
]);
if
(
nd_index
[
ndim
-
1
]
!=
shape
[
ndim
-
1
]
-
1
)
{
printf
(
","
);
}
for
(
int
j
=
0
;
j
<
ndim
;
++
j
)
{
prev
[
j
]
=
nd_index
[
j
];
}
}
for
(
int
j
=
0
;
j
<
ndim
;
++
j
)
{
printf
(
"]"
);
}
printf
(
"
]
\n
"
);
printf
(
"
\n
"
);
}
template
<
typename
T
>
TV_HOST_DEVICE
void
printTensorView
(
TensorView
<
T
>
tensor
)
{
template
<
typename
T
,
int
Rank
,
template
<
class
>
class
PtrTraits
,
typename
Tindex
>
TV_HOST_DEVICE
void
printTensorView
(
TensorView
<
T
,
Rank
,
PtrTraits
,
Tindex
>
tensor
)
{
using
Traw
=
typename
std
::
remove_const
<
T
>::
type
;
return
printTensorView
(
tensor
,
detail
::
printf
TypeF
ormat
<
Traw
>
()
);
return
printTensorView
(
tensor
,
detail
::
type_
printf
_f
ormat
_v
<
Traw
>
);
}
template
<
typename
T
>
TV_HOST_DEVICE
void
printTensorView
(
const
T
*
ptr
,
Shape
shape
)
{
using
Traw
=
typename
std
::
remove_const
<
T
>::
type
;
return
printTensorView
(
TensorView
<
const
T
>
(
ptr
,
shape
),
detail
::
printf
TypeF
ormat
<
Traw
>
()
);
detail
::
type_
printf
_f
ormat
_v
<
Traw
>
);
}
template
<
typename
T
>
TV_HOST_DEVICE
void
printTensorView
(
const
T
*
ptr
,
Shape
shape
,
...
...
@@ -1205,7 +1330,7 @@ TV_HOST_DEVICE void printTensorView(const T *ptr, Shape shape,
return
printTensorView
(
TensorView
<
const
T
>
(
ptr
,
shape
),
format
);
}
#ifdef
SPCON
V_CUDA
#ifdef
T
V_CUDA
#ifdef __DRIVER_TYPES_H__
#ifndef DEVICE_RESET
...
...
@@ -1229,20 +1354,25 @@ void check(T result, char const *const func, const char *const file,
}
}
#define checkCudaErrors(val) check((val), #val, __FILE__, __LINE__)
#define checkCudaErrors(val)
tv::
check((val), #val, __FILE__, __LINE__)
template
<
typename
T
>
void
host2dev
(
T
*
dst
,
const
T
*
src
,
size_t
size
,
cudaStream_t
s
=
0
)
{
checkCudaErrors
(
cudaMemcpyAsync
(
dst
,
src
,
size
*
sizeof
(
T
),
cudaMemcpyHostToDevice
,
s
));
}
template
<
typename
T
>
void
host2dev
(
TensorView
<
T
>
dst
,
const
TensorView
<
const
T
>
src
,
template
<
typename
T
,
int
Rank
,
template
<
class
>
class
PtrTraits1
,
template
<
class
>
class
PtrTraits2
,
typename
Tindex1
,
typename
Tindex2
>
void
host2dev
(
TensorView
<
T
,
Rank
,
PtrTraits1
,
Tindex1
>
dst
,
const
TensorView
<
const
T
,
Rank
,
PtrTraits2
,
Tindex2
>
src
,
cudaStream_t
s
=
0
)
{
host2dev
(
dst
.
data
(),
src
.
data
(),
std
::
min
(
dst
.
size
(),
src
.
size
()),
s
);
}
template
<
typename
T
>
void
host2dev
(
TensorView
<
T
>
dst
,
const
TensorView
<
T
>
src
,
cudaStream_t
s
=
0
)
{
template
<
typename
T
,
int
Rank
,
template
<
class
>
class
PtrTraits1
,
template
<
class
>
class
PtrTraits2
,
typename
Tindex1
,
typename
Tindex2
>
void
host2dev
(
TensorView
<
T
,
Rank
,
PtrTraits1
,
Tindex1
>
dst
,
const
TensorView
<
T
,
Rank
,
PtrTraits2
,
Tindex2
>
src
,
cudaStream_t
s
=
0
)
{
host2dev
(
dst
.
data
(),
src
.
data
(),
std
::
min
(
dst
.
size
(),
src
.
size
()),
s
);
}
...
...
@@ -1250,12 +1380,16 @@ template <typename T> void host2dev_sync(T *dst, const T *src, size_t size) {
checkCudaErrors
(
cudaMemcpy
(
dst
,
src
,
size
*
sizeof
(
T
),
cudaMemcpyHostToDevice
));
}
template
<
typename
T
>
void
host2dev_sync
(
TensorView
<
T
>
dst
,
const
TensorView
<
const
T
>
src
)
{
template
<
typename
T
,
int
Rank
,
template
<
class
>
class
PtrTraits1
,
template
<
class
>
class
PtrTraits2
,
typename
Tindex1
,
typename
Tindex2
>
void
host2dev_sync
(
TensorView
<
T
,
Rank
,
PtrTraits1
,
Tindex1
>
dst
,
const
TensorView
<
const
T
,
Rank
,
PtrTraits2
,
Tindex2
>
src
)
{
host2dev_sync
(
dst
.
data
(),
src
.
data
(),
std
::
min
(
dst
.
size
(),
src
.
size
()));
}
template
<
typename
T
>
void
host2dev_sync
(
TensorView
<
T
>
dst
,
const
TensorView
<
T
>
src
)
{
template
<
typename
T
,
int
Rank
,
template
<
class
>
class
PtrTraits1
,
template
<
class
>
class
PtrTraits2
,
typename
Tindex1
,
typename
Tindex2
>
void
host2dev_sync
(
TensorView
<
T
,
Rank
,
PtrTraits1
,
Tindex1
>
dst
,
const
TensorView
<
T
,
Rank
,
PtrTraits2
,
Tindex2
>
src
)
{
host2dev_sync
(
dst
.
data
(),
src
.
data
(),
std
::
min
(
dst
.
size
(),
src
.
size
()));
}
...
...
@@ -1265,14 +1399,18 @@ void dev2host(T *dst, const T *src, size_t size, cudaStream_t s = 0) {
cudaMemcpyAsync
(
dst
,
src
,
size
*
sizeof
(
T
),
cudaMemcpyDeviceToHost
,
s
));
}
template
<
typename
T
>
void
dev2host
(
TensorView
<
T
>
dst
,
const
TensorView
<
const
T
>
src
,
template
<
typename
T
,
int
Rank
,
template
<
class
>
class
PtrTraits1
,
template
<
class
>
class
PtrTraits2
,
typename
Tindex1
,
typename
Tindex2
>
void
dev2host
(
TensorView
<
T
,
Rank
,
PtrTraits1
,
Tindex1
>
dst
,
const
TensorView
<
const
T
,
Rank
,
PtrTraits2
,
Tindex2
>
src
,
cudaStream_t
s
=
0
)
{
dev2host
(
dst
.
data
(),
src
.
data
(),
std
::
min
(
dst
.
size
(),
src
.
size
()),
s
);
}
template
<
typename
T
>
void
dev2host
(
TensorView
<
T
>
dst
,
const
TensorView
<
T
>
src
,
cudaStream_t
s
=
0
)
{
template
<
typename
T
,
int
Rank
,
template
<
class
>
class
PtrTraits1
,
template
<
class
>
class
PtrTraits2
,
typename
Tindex1
,
typename
Tindex2
>
void
dev2host
(
TensorView
<
T
,
Rank
,
PtrTraits1
,
Tindex1
>
dst
,
const
TensorView
<
T
,
Rank
,
PtrTraits2
,
Tindex2
>
src
,
cudaStream_t
s
=
0
)
{
dev2host
(
dst
.
data
(),
src
.
data
(),
std
::
min
(
dst
.
size
(),
src
.
size
()),
s
);
}
...
...
@@ -1282,13 +1420,18 @@ void dev2dev(T *dst, const T *src, size_t size, cudaStream_t s = 0) {
cudaMemcpyAsync
(
dst
,
src
,
size
*
sizeof
(
T
),
cudaMemcpyDeviceToDevice
,
s
));
}
template
<
typename
T
>
void
dev2dev
(
TensorView
<
T
>
dst
,
const
TensorView
<
const
T
>
src
,
template
<
typename
T
,
int
Rank
,
template
<
class
>
class
PtrTraits1
,
template
<
class
>
class
PtrTraits2
,
typename
Tindex1
,
typename
Tindex2
>
void
dev2dev
(
TensorView
<
T
,
Rank
,
PtrTraits1
,
Tindex1
>
dst
,
const
TensorView
<
const
T
,
Rank
,
PtrTraits2
,
Tindex2
>
src
,
cudaStream_t
s
=
0
)
{
dev2dev
(
dst
.
data
(),
src
.
data
(),
std
::
min
(
dst
.
size
(),
src
.
size
()),
s
);
}
template
<
typename
T
>
void
dev2dev
(
TensorView
<
T
>
dst
,
const
TensorView
<
T
>
src
,
cudaStream_t
s
=
0
)
{
template
<
typename
T
,
int
Rank
,
template
<
class
>
class
PtrTraits1
,
template
<
class
>
class
PtrTraits2
,
typename
Tindex1
,
typename
Tindex2
>
void
dev2dev
(
TensorView
<
T
,
Rank
,
PtrTraits1
,
Tindex1
>
dst
,
const
TensorView
<
T
,
Rank
,
PtrTraits2
,
Tindex2
>
src
,
cudaStream_t
s
=
0
)
{
dev2dev
(
dst
.
data
(),
src
.
data
(),
std
::
min
(
dst
.
size
(),
src
.
size
()),
s
);
}
...
...
@@ -1298,67 +1441,39 @@ void host2host(T *dst, const T *src, size_t size, cudaStream_t s = 0) {
cudaMemcpyAsync
(
dst
,
src
,
size
*
sizeof
(
T
),
cudaMemcpyHostToHost
,
s
));
}
template
<
typename
T
>
void
host2host
(
TensorView
<
T
>
dst
,
const
TensorView
<
const
T
>
src
,
template
<
typename
T
,
int
Rank
,
template
<
class
>
class
PtrTraits1
,
template
<
class
>
class
PtrTraits2
,
typename
Tindex1
,
typename
Tindex2
>
void
host2host
(
TensorView
<
T
,
Rank
,
PtrTraits1
,
Tindex1
>
dst
,
const
TensorView
<
const
T
,
Rank
,
PtrTraits2
,
Tindex2
>
src
,
cudaStream_t
s
=
0
)
{
host2host
(
dst
.
data
(),
src
.
data
(),
std
::
min
(
dst
.
size
(),
src
.
size
()),
s
);
}
template
<
typename
T
>
void
host2host
(
TensorView
<
T
>
dst
,
const
TensorView
<
T
>
src
,
cudaStream_t
s
=
0
)
{
template
<
typename
T
,
int
Rank
,
template
<
class
>
class
PtrTraits1
,
template
<
class
>
class
PtrTraits2
,
typename
Tindex1
,
typename
Tindex2
>
void
host2host
(
TensorView
<
T
,
Rank
,
PtrTraits1
,
Tindex1
>
dst
,
const
TensorView
<
T
,
Rank
,
PtrTraits2
,
Tindex2
>
src
,
cudaStream_t
s
=
0
)
{
host2host
(
dst
.
data
(),
src
.
data
(),
std
::
min
(
dst
.
size
(),
src
.
size
()),
s
);
}
template
<
typename
T
>
void
zero_dev
(
TensorView
<
T
>
tensor
)
{
template
<
typename
T
,
int
Rank
,
template
<
class
>
class
PtrTraits
,
typename
Tindex
>
void
zero_dev
(
TensorView
<
T
,
Rank
,
PtrTraits
,
Tindex
>
tensor
)
{
checkCudaErrors
(
cudaMemset
(
tensor
.
data
(),
0
,
tensor
.
size
()
*
sizeof
(
T
)));
}
template
<
typename
T
>
void
zero_dev
(
TensorView
<
T
>
tensor
,
cudaStream_t
s
)
{
template
<
typename
T
,
int
Rank
,
template
<
class
>
class
PtrTraits
,
typename
Tindex
>
void
zero_dev
(
TensorView
<
T
,
Rank
,
PtrTraits
,
Tindex
>
tensor
,
cudaStream_t
s
)
{
checkCudaErrors
(
cudaMemsetAsync
(
tensor
.
data
(),
0
,
tensor
.
size
()
*
sizeof
(
T
),
s
));
}
template
<
typename
T
>
void
zero_host
(
TensorView
<
T
>
tensor
)
{
template
<
typename
T
,
int
Rank
,
template
<
class
>
class
PtrTraits
,
typename
Tindex
>
void
zero_host
(
TensorView
<
T
,
Rank
,
PtrTraits
,
Tindex
>
tensor
)
{
std
::
fill
(
tensor
.
data
(),
tensor
.
data
()
+
tensor
.
size
(),
0
);
}
#endif
namespace
detail
{
template
<
typename
T
>
struct
TypeToString
;
template
<
>
struct
TypeToString
<
int32_t
>
{
static
constexpr
const
char
*
value
=
"int32"
;
};
template
<
>
struct
TypeToString
<
bool
>
{
static
constexpr
const
char
*
value
=
"bool"
;
};
template
<
>
struct
TypeToString
<
float
>
{
static
constexpr
const
char
*
value
=
"float"
;
};
template
<
>
struct
TypeToString
<
double
>
{
static
constexpr
const
char
*
value
=
"double"
;
};
template
<
>
struct
TypeToString
<
int16_t
>
{
static
constexpr
const
char
*
value
=
"int16"
;
};
template
<
>
struct
TypeToString
<
int8_t
>
{
static
constexpr
const
char
*
value
=
"int8"
;
};
template
<
>
struct
TypeToString
<
int64_t
>
{
static
constexpr
const
char
*
value
=
"int64"
;
};
template
<
>
struct
TypeToString
<
uint8_t
>
{
static
constexpr
const
char
*
value
=
"uint8"
;
};
template
<
>
struct
TypeToString
<
uint16_t
>
{
static
constexpr
const
char
*
value
=
"uint16"
;
};
template
<
>
struct
TypeToString
<
uint32_t
>
{
static
constexpr
const
char
*
value
=
"uint32"
;
};
template
<
>
struct
TypeToString
<
uint64_t
>
{
static
constexpr
const
char
*
value
=
"uint64"
;
};
}
// namespace detail
}
// namespace tv
\ No newline at end of file
include/tensorview/tools.h
0 → 100644
View file @
19e73bbe
// Copyright 2019-2020 Yan Yan
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <chrono>
#ifdef TV_CUDA
#include <cuda_runtime_api.h>
#endif
#include <iostream>
namespace
tv
{
#ifdef TV_CUDA
template
<
typename
TimeT
=
std
::
chrono
::
microseconds
>
struct
CudaContextTimer
{
CudaContextTimer
()
{
cudaDeviceSynchronize
();
mCurTime
=
std
::
chrono
::
steady_clock
::
now
();
}
typename
TimeT
::
rep
report
()
{
cudaDeviceSynchronize
();
auto
duration
=
std
::
chrono
::
duration_cast
<
TimeT
>
(
std
::
chrono
::
steady_clock
::
now
()
-
mCurTime
);
auto
res
=
duration
.
count
();
mCurTime
=
std
::
chrono
::
steady_clock
::
now
();
return
res
;
}
private:
std
::
chrono
::
time_point
<
std
::
chrono
::
steady_clock
>
mCurTime
;
};
#endif
template
<
typename
TimeT
=
std
::
chrono
::
microseconds
>
struct
CPUTimer
{
CPUTimer
()
{
mCurTime
=
std
::
chrono
::
steady_clock
::
now
();
}
typename
TimeT
::
rep
report
()
{
auto
duration
=
std
::
chrono
::
duration_cast
<
TimeT
>
(
std
::
chrono
::
steady_clock
::
now
()
-
mCurTime
);
auto
res
=
duration
.
count
();
mCurTime
=
std
::
chrono
::
steady_clock
::
now
();
return
res
;
}
private:
std
::
chrono
::
time_point
<
std
::
chrono
::
steady_clock
>
mCurTime
;
};
}
// namespace tv
include/tensorview/torch_utils.h
0 → 100644
View file @
19e73bbe
// Copyright 2019-2020 Yan Yan
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "mp_helper.h"
#include <tensorview/tensorview.h>
#include <ATen/ATen.h>
#include <torch/script.h>
#ifdef TV_CUDA
#include <ATen/cuda/CUDAContext.h>
#endif
namespace
tv
{
#ifdef TV_CUDA
struct
TorchGPU
:
public
tv
::
GPU
{
virtual
cudaStream_t
getStream
()
const
override
{
return
at
::
cuda
::
getCurrentCUDAStream
();
}
};
#endif
namespace
detail
{
template
<
typename
T
>
struct
TypeToTorchDtypeTraits
;
template
<
>
struct
TypeToTorchDtypeTraits
<
int32_t
>
{
static
constexpr
decltype
(
torch
::
kInt32
)
value
=
torch
::
kInt32
;
};
template
<
>
struct
TypeToTorchDtypeTraits
<
int16_t
>
{
static
constexpr
decltype
(
torch
::
kInt32
)
value
=
torch
::
kInt16
;
};
template
<
>
struct
TypeToTorchDtypeTraits
<
int8_t
>
{
static
constexpr
decltype
(
torch
::
kInt8
)
value
=
torch
::
kInt8
;
};
template
<
>
struct
TypeToTorchDtypeTraits
<
int64_t
>
{
static
constexpr
decltype
(
torch
::
kInt32
)
value
=
torch
::
kInt64
;
};
template
<
>
struct
TypeToTorchDtypeTraits
<
uint8_t
>
{
static
constexpr
decltype
(
torch
::
kInt32
)
value
=
torch
::
kUInt8
;
};
template
<
>
struct
TypeToTorchDtypeTraits
<
bool
>
{
static
constexpr
decltype
(
torch
::
kInt32
)
value
=
torch
::
kBool
;
};
template
<
>
struct
TypeToTorchDtypeTraits
<
float
>
{
static
constexpr
decltype
(
torch
::
kInt32
)
value
=
torch
::
kFloat32
;
};
template
<
>
struct
TypeToTorchDtypeTraits
<
double
>
{
static
constexpr
decltype
(
torch
::
kInt32
)
value
=
torch
::
kFloat64
;
};
template
<
>
struct
TypeToTorchDtypeTraits
<
at
::
Half
>
{
static
constexpr
decltype
(
torch
::
kInt32
)
value
=
torch
::
kHalf
;
};
using
all_torch_types_t
=
std
::
tuple
<
float
,
double
,
int8_t
,
int16_t
,
int32_t
,
int64_t
,
uint8_t
,
bool
,
at
::
Half
>
;
}
// namespace detail
template
<
typename
T
>
constexpr
decltype
(
torch
::
kInt32
)
torch_type_v
=
detail
::
TypeToTorchDtypeTraits
<
T
>::
value
;
template
<
class
...
Ts
,
typename
F
>
void
dispatch_torch
(
at
::
ScalarType
t
,
F
&&
f
)
{
static_assert
(
sizeof
...(
Ts
)
>
0
,
"you need to provide at least one type"
);
bool
notFound
=
true
;
tv
::
mp_for_each
<
mp_list
<
Ts
...
>>
([
=
,
&
notFound
,
&
f
](
auto
I
)
{
if
(
detail
::
TypeToTorchDtypeTraits
<
decltype
(
I
)
>::
value
==
t
)
{
std
::
forward
<
F
>
(
f
)(
decltype
(
I
)());
notFound
=
false
;
}
});
if
(
notFound
)
{
std
::
stringstream
ss
;
tv
::
mp_for_each
<
mp_list
<
Ts
...
>>
([
=
,
&
ss
](
auto
I
)
{
ss
<<
tv
::
detail
::
TypeToString
<
decltype
(
I
)
>::
value
<<
" "
;
});
TV_THROW_RT_ERR
(
"unknown type"
,
t
,
", available:"
,
ss
.
str
());
}
}
template
<
class
T
>
struct
DispatchTorch
;
template
<
template
<
class
...
>
class
T
,
class
...
Args
>
struct
DispatchTorch
<
T
<
Args
...
>>
{
template
<
typename
F
>
inline
void
operator
()(
at
::
ScalarType
t
,
F
&&
f
)
{
return
dispatch_torch
<
Args
...
>
(
t
,
std
::
forward
<
F
>
(
f
));
}
};
template
<
typename
T
>
void
check_torch_dtype
(
const
torch
::
Tensor
&
tensor
)
{
DispatchTorch
<
detail
::
all_torch_types_t
>
()(
tensor
.
scalar_type
(),
[
&
](
auto
I
)
{
using
Ttensor
=
decltype
(
I
);
constexpr
bool
val
=
std
::
is_same
<
std
::
remove_cv_t
<
T
>
,
Ttensor
>::
value
;
TV_ASSERT_RT_ERR
(
val
,
"error"
);
});
}
template
<
typename
T
,
int
Rank
=
-
1
,
template
<
class
>
class
PtrTraits
=
DefaultPtrTraits
,
typename
Tindex
=
int
>
TensorView
<
T
,
Rank
,
PtrTraits
,
Tindex
>
torch2tv
(
const
torch
::
Tensor
&
tensor
)
{
using
tv_shape_t
=
typename
TensorView
<
T
,
Rank
,
PtrTraits
,
Tindex
>::
tv_shape_t
;
check_torch_dtype
<
T
>
(
tensor
);
// TODO stride
if
(
Rank
>
0
)
{
TV_ASSERT_INVALID_ARG
(
tensor
.
dim
()
==
Rank
,
"error"
);
}
tv_shape_t
shape
;
for
(
auto
i
:
tensor
.
sizes
())
{
shape
.
push_back
(
i
);
}
return
tv
::
TensorView
<
T
,
Rank
,
PtrTraits
,
Tindex
>
(
tensor
.
data_ptr
<
std
::
remove_const_t
<
T
>>
(),
shape
);
}
namespace
detail
{
template
<
>
struct
TypeToString
<
at
::
Half
>
{
static
constexpr
const
char
*
value
=
"half"
;
};
}
// namespace detail
}
// namespace tv
\ No newline at end of file
include/torch_utils.h
View file @
19e73bbe
...
...
@@ -13,18 +13,18 @@
// limitations under the License.
#pragma once
#include <
spconv
/mp_helper.h>
#include <
tensorview
/mp_helper.h>
#include <tensorview/tensorview.h>
#include <ATen/ATen.h>
#include <torch/script.h>
#ifdef
SPCON
V_CUDA
#ifdef
T
V_CUDA
#include <ATen/cuda/CUDAContext.h>
#endif
namespace
tv
{
#ifdef
SPCON
V_CUDA
#ifdef
T
V_CUDA
struct
TorchGPU
:
public
tv
::
GPU
{
virtual
cudaStream_t
getStream
()
const
override
{
return
at
::
cuda
::
getCurrentCUDAStream
();
...
...
@@ -103,10 +103,10 @@ template <> struct TypeToString<at::Half> {
};
}
// namespace detail
template
<
class
...
Ts
,
typename
F
>
void
torch_
dispatch
(
at
::
ScalarType
t
,
F
&&
f
)
{
void
dispat
ch_tor
ch
(
at
::
ScalarType
t
,
F
&&
f
)
{
static_assert
(
sizeof
...(
Ts
)
>
0
,
"you need to provide at least one type"
);
bool
notFound
=
true
;
spconv
::
mp_for_each
<
spconv
::
mp_list
<
Ts
...
>>
([
=
,
&
notFound
,
&
f
](
auto
I
)
{
spconv
::
tv
::
mp_for_each
<
spconv
::
mp_list
<
Ts
...
>>
([
=
,
&
notFound
,
&
f
](
auto
I
)
{
if
(
torch_type_v
<
decltype
(
I
)
>
==
t
)
{
std
::
forward
<
F
>
(
f
)(
decltype
(
I
)());
notFound
=
false
;
...
...
@@ -114,7 +114,7 @@ void torch_dispatch(at::ScalarType t, F &&f) {
});
if
(
notFound
)
{
std
::
stringstream
ss
;
spconv
::
mp_for_each
<
spconv
::
mp_list
<
Ts
...
>>
([
=
,
&
ss
](
auto
I
)
{
spconv
::
tv
::
mp_for_each
<
spconv
::
mp_list
<
Ts
...
>>
([
=
,
&
ss
](
auto
I
)
{
ss
<<
tv
::
detail
::
TypeToString
<
decltype
(
I
)
>::
value
<<
" "
;
});
TV_THROW_RT_ERR
(
"unknown type"
,
t
,
", available: "
,
ss
.
str
());
...
...
Prev
1
2
3
4
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment