Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
MMCV
Commits
fdeee889
Commit
fdeee889
authored
May 25, 2025
by
limm
Browse files
release v1.6.1 of mmcv
parent
df465820
Changes
457
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
3236 additions
and
4 deletions
+3236
-4
mmcv/ops/csrc/common/mps/MPSLibrary.mm
mmcv/ops/csrc/common/mps/MPSLibrary.mm
+110
-0
mmcv/ops/csrc/common/mps/MPSStream.h
mmcv/ops/csrc/common/mps/MPSStream.h
+132
-0
mmcv/ops/csrc/common/mps/MPSUtils.h
mmcv/ops/csrc/common/mps/MPSUtils.h
+51
-0
mmcv/ops/csrc/common/pytorch_cpp_helper.hpp
mmcv/ops/csrc/common/pytorch_cpp_helper.hpp
+7
-4
mmcv/ops/csrc/common/pytorch_mlu_helper.hpp
mmcv/ops/csrc/common/pytorch_mlu_helper.hpp
+28
-0
mmcv/ops/csrc/common/utils/spconv/paramsgrid.h
mmcv/ops/csrc/common/utils/spconv/paramsgrid.h
+70
-0
mmcv/ops/csrc/common/utils/spconv/prettyprint.h
mmcv/ops/csrc/common/utils/spconv/prettyprint.h
+493
-0
mmcv/ops/csrc/common/utils/spconv/pybind11_utils.h
mmcv/ops/csrc/common/utils/spconv/pybind11_utils.h
+60
-0
mmcv/ops/csrc/common/utils/spconv/spconv/geometry.h
mmcv/ops/csrc/common/utils/spconv/spconv/geometry.h
+297
-0
mmcv/ops/csrc/common/utils/spconv/spconv/indice.h
mmcv/ops/csrc/common/utils/spconv/spconv/indice.h
+78
-0
mmcv/ops/csrc/common/utils/spconv/spconv/maxpool.h
mmcv/ops/csrc/common/utils/spconv/spconv/maxpool.h
+37
-0
mmcv/ops/csrc/common/utils/spconv/spconv/mp_helper.h
mmcv/ops/csrc/common/utils/spconv/spconv/mp_helper.h
+50
-0
mmcv/ops/csrc/common/utils/spconv/spconv/point2voxel.h
mmcv/ops/csrc/common/utils/spconv/spconv/point2voxel.h
+385
-0
mmcv/ops/csrc/common/utils/spconv/spconv/reordering.h
mmcv/ops/csrc/common/utils/spconv/spconv/reordering.h
+36
-0
mmcv/ops/csrc/common/utils/spconv/tensorview/helper_kernel.cuh
...ops/csrc/common/utils/spconv/tensorview/helper_kernel.cuh
+75
-0
mmcv/ops/csrc/common/utils/spconv/tensorview/helper_launch.h
mmcv/ops/csrc/common/utils/spconv/tensorview/helper_launch.h
+19
-0
mmcv/ops/csrc/common/utils/spconv/tensorview/tensorview.h
mmcv/ops/csrc/common/utils/spconv/tensorview/tensorview.h
+1119
-0
mmcv/ops/csrc/onnxruntime/cpu/onnxruntime_register.cpp
mmcv/ops/csrc/onnxruntime/cpu/onnxruntime_register.cpp
+7
-0
mmcv/ops/csrc/onnxruntime/cpu/rotated_feature_align.cpp
mmcv/ops/csrc/onnxruntime/cpu/rotated_feature_align.cpp
+132
-0
mmcv/ops/csrc/onnxruntime/rotated_feature_align.h
mmcv/ops/csrc/onnxruntime/rotated_feature_align.h
+50
-0
No files found.
Too many changes to show.
To preserve performance only
457 of 457+
files are displayed.
Plain diff
Email patch
mmcv/ops/csrc/common/mps/MPSLibrary.mm
0 → 100644
View file @
fdeee889
#include "MPSLibrary.h"
#include <c10/util/CallOnce.h>
#include "MPSDevice.h"
static
std
::
unique_ptr
<
MPSLibraryManager
>
mps_library_manager
;
static
c10
::
once_flag
mpsdev_init
;
MPSLibraryManager
*
MPSLibraryManager
::
getInstance
()
{
c10
::
call_once
(
mpsdev_init
,
[]
{
mps_library_manager
=
std
::
unique_ptr
<
MPSLibraryManager
>
(
new
MPSLibraryManager
());
});
return
mps_library_manager
.
get
();
}
MPSLibraryManager
::~
MPSLibraryManager
()
{}
MPSLibraryManager
::
MPSLibraryManager
()
{}
bool
MPSLibraryManager
::
hasLibrary
(
const
std
::
string
&
name
)
{
return
_library_map
.
find
(
name
)
!=
_library_map
.
end
();
}
MPSLibrary
*
MPSLibraryManager
::
getLibrary
(
const
std
::
string
&
library_url
)
{
if
(
_library_map
.
find
(
library_url
)
!=
_library_map
.
end
())
{
return
_library_map
[
library_url
].
get
();
}
_library_map
.
emplace
(
std
::
make_pair
(
library_url
,
std
::
unique_ptr
<
MPSLibrary
>
(
MPSLibrary
::
createFromUrl
(
library_url
))));
return
_library_map
[
library_url
].
get
();
}
MPSLibrary
*
MPSLibraryManager
::
createLibraryFromSouce
(
const
std
::
string
&
name
,
const
std
::
string
&
source
)
{
NSString
*
ns_name
=
[
NSString
stringWithCString
:
name
.
c_str
()];
if
(
_library_map
.
find
(
name
)
!=
_library_map
.
end
())
{
NSLog
(
@"Library %@ already exist."
,
ns_name
);
return
nullptr
;
}
_library_map
.
emplace
(
std
::
make_pair
(
name
,
std
::
unique_ptr
<
MPSLibrary
>
(
MPSLibrary
::
createFromSource
(
source
))));
return
_library_map
[
name
].
get
();
}
MPSLibrary
*
MPSLibrary
::
createFromUrl
(
const
std
::
string
&
library_url
)
{
MPSLibrary
*
library
=
new
MPSLibrary
();
@autoreleasepool
{
NSError
*
error
=
nil
;
// load library and func
NSString
*
utl_str
=
[
NSString
stringWithCString
:
library_url
.
c_str
()];
NSURL
*
metal_url
=
[
NSURL
fileURLWithPath
:
utl_str
];
library
->
_library
=
[
at
:
:
mps
:
:
MPSDevice
::
getInstance
()
->
device
()
newLibraryWithURL
:
metal_url
error:
&
error
];
if
(
library
->
_library
==
nil
)
{
NSLog
(
@"Failed to find library, error %@."
,
error
);
exit
(
1
);
}
}
return
library
;
}
MPSLibrary
*
MPSLibrary
::
createFromSource
(
const
std
::
string
&
sources
)
{
MPSLibrary
*
library
=
new
MPSLibrary
();
@autoreleasepool
{
NSError
*
error
=
nil
;
// load library and func
NSString
*
code_str
=
[
NSString
stringWithCString
:
sources
.
c_str
()];
library
->
_library
=
[
at
:
:
mps
:
:
MPSDevice
::
getInstance
()
->
device
()
newLibraryWithSource
:
code_str
options:
nil
error:
&
error
];
if
(
library
->
_library
==
nil
)
{
NSLog
(
@"Failed to find library, error %@."
,
error
);
exit
(
1
);
}
}
return
library
;
}
MPSLibrary
::~
MPSLibrary
()
{
[
_library
release
];
_library
=
nil
;
}
MTLComputePipelineState_t
MPSLibrary
::
getComputePipelineState
(
const
std
::
string
&
function_name
)
{
if
(
_pso_map
.
find
(
function_name
)
!=
_pso_map
.
end
())
{
return
_pso_map
[
function_name
];
}
MTLComputePipelineState_t
pso
;
@autoreleasepool
{
NSError
*
error
=
nil
;
// create function
NSString
*
function_name_str
=
[
NSString
stringWithCString
:
function_name
.
c_str
()];
id
<
MTLFunction
>
func
=
[
_library
newFunctionWithName
:
function_name_str
];
if
(
func
==
nil
)
{
NSLog
(
@"Failed to created pipeline state object, error %@."
,
error
);
exit
(
1
);
}
// create pipeline
pso
=
[
at
:
:
mps
:
:
MPSDevice
::
getInstance
()
->
device
()
newComputePipelineStateWithFunction
:
func
error:
&
error
];
_pso_map
.
emplace
(
std
::
make_pair
(
function_name
,
pso
));
}
return
_pso_map
[
function_name
];
}
mmcv/ops/csrc/common/mps/MPSStream.h
0 → 100644
View file @
fdeee889
// Copyright © 2022 Apple Inc.
// This file is modify from:
// https://github.com/pytorch/pytorch/blob/a85d1f0bcdd02cf18d3b0517337458cb51a18cdb/aten/src/ATen/mps/MPSStream.h
#pragma once
#include <cstdint>
#include <utility>
#include <c10/core/DeviceGuard.h>
#include <c10/core/Stream.h>
#include <c10/util/Exception.h>
#include "MPSDevice.h"
#ifdef __OBJC__
#include <Foundation/Foundation.h>
#include <Metal/Metal.h>
#include <MetalPerformanceShaders/MetalPerformanceShaders.h>
#include <MetalPerformanceShadersGraph/MetalPerformanceShadersGraph.h>
typedef
id
<
MTLCommandQueue
>
MTLCommandQueue_t
;
typedef
id
<
MTLCommandBuffer
>
MTLCommandBuffer_t
;
typedef
id
<
MTLSharedEvent
>
MTLSharedEvent_t
;
typedef
id
<
MTLDevice
>
MTLDevice_t
;
#else
typedef
void
*
MTLCommandQueue_t
;
typedef
void
*
MTLCommandQueue
;
typedef
void
*
MTLCommandBuffer_t
;
typedef
void
*
MTLCommandBuffer
;
typedef
void
*
MTLSharedEvent_t
;
typedef
void
*
dispatch_queue_t
;
typedef
void
*
MTLDevice_t
;
#define nil NULL;
#endif
namespace
at
{
namespace
mps
{
//-----------------------------------------------------------------
// MPSStream
//-----------------------------------------------------------------
class
TORCH_API
MPSStream
{
public:
enum
Unchecked
{
UNCHECKED
};
/// Construct a MPSStream from a Stream. This construction is checked,
/// and will raise an error if the Stream is not, in fact, a MPS stream.
explicit
MPSStream
(
Stream
stream
);
~
MPSStream
();
MTLCommandQueue_t
commandQueue
()
const
{
return
_commandQueue
;
};
dispatch_queue_t
queue
()
const
{
return
_serialQueue
;
}
MTLCommandBuffer_t
commandBuffer
();
void
commit
(
bool
flush
);
void
commitAndWait
();
void
synchronize
();
void
flush
();
/// Get the MPS device index that this stream is associated with.
c10
::
DeviceIndex
device_index
()
const
{
return
_stream
.
device_index
();
}
MTLCommandQueue_t
stream
()
const
{
return
_commandQueue
;
};
MTLDevice_t
device
()
const
{
return
[
_commandQueue
device
];
}
/// Explicit conversion to Stream.
Stream
unwrap
()
const
{
return
_stream
;
}
private:
Stream
_stream
;
MTLCommandQueue_t
_commandQueue
=
nil
;
MTLCommandBuffer_t
_commandBuffer
=
nil
;
void
_flush
(
bool
commitAndWait
)
const
;
dispatch_queue_t
_serialQueue
=
nullptr
;
};
/**
* Get the current MPS stream
*/
TORCH_API
MPSStream
*
getCurrentMPSStream
();
/**
* Get the default MPS stream
*/
TORCH_API
MPSStream
*
getDefaultMPSStream
();
//-----------------------------------------------------------------
// MPSStreamImpl
//-----------------------------------------------------------------
class
TORCH_API
MPSStreamImpl
{
public:
/**
* Gets single instance of the MPSStream.
*/
static
MPSStream
*
getInstance
();
private:
static
MPSStream
*
_stream
;
MPSStreamImpl
();
};
//-----------------------------------------------------------------
// MPSEvent
//-----------------------------------------------------------------
struct
TORCH_API
MPSEvent
{
MPSEvent
();
// MPSEvent(id<MTLDevice> device);
~
MPSEvent
();
MTLSharedEvent_t
event
()
const
{
return
_event
;
}
void
recordEvent
(
MPSStream
*
stream
);
void
waitForEvent
(
MPSStream
*
queue
);
// waits on the cpu
bool
queryEvent
();
uint64_t
getCurrentValue
()
{
return
_currentValue
;
}
void
setCurrentValue
(
uint64_t
currValue
)
{
_currentValue
=
currValue
;
}
private:
bool
_isRecorded
=
false
;
uint64_t
_currentValue
=
0
;
MTLSharedEvent_t
_event
;
};
typedef
MPSEvent
*
mpsEvent_t
;
}
// namespace mps
}
// namespace at
mmcv/ops/csrc/common/mps/MPSUtils.h
0 → 100644
View file @
fdeee889
#ifndef _MPS_UTILS_H_
#define _MPS_UTILS_H_
#include <torch/extension.h>
#ifdef __OBJC__
#include <Foundation/Foundation.h>
#include <Metal/Metal.h>
#include <MetalPerformanceShaders/MetalPerformanceShaders.h>
typedef
id
<
MTLBuffer
>
MTLBuffer_t
;
typedef
id
<
MTLComputeCommandEncoder
>
MTLComputeCommandEncoder_t
;
#else
typedef
void
*
MTLBuffer
;
typedef
void
*
MTLBuffer_t
;
typedef
void
*
MTLComputeCommandEncoder
;
typedef
void
*
MTLComputeCommandEncoder_t
;
#endif
// utils
static
inline
MTLBuffer_t
getMTLBufferStorage
(
const
at
::
Tensor
&
tensor
)
{
return
__builtin_bit_cast
(
MTLBuffer_t
,
tensor
.
storage
().
data
());
}
template
<
typename
T
,
std
::
enable_if_t
<!
std
::
is_same
<
std
::
decay_t
<
T
>,
at
::
Tensor
>::
value
,
bool
>
=
true
>
void
setMTLArg
(
MTLComputeCommandEncoder_t
encoder
,
int
index
,
T
&&
t
);
template
<
typename
T
,
std
::
enable_if_t
<
std
::
is_same
<
std
::
decay_t
<
T
>,
at
::
Tensor
>::
value
,
bool
>
=
true
>
void
setMTLArg
(
MTLComputeCommandEncoder_t
encoder
,
int
index
,
T
&&
t
)
{
[
encoder
setBuffer
:
getMTLBufferStorage
(
t
)
offset
:
0
atIndex
:
index
];
}
template
<
typename
T
,
std
::
enable_if_t
<!
std
::
is_same
<
std
::
decay_t
<
T
>,
at
::
Tensor
>::
value
,
bool
>>
void
setMTLArg
(
MTLComputeCommandEncoder_t
encoder
,
int
index
,
T
&&
t
)
{
[
encoder
setBytes
:&
t
length
:
sizeof
(
t
)
atIndex
:
index
];
}
inline
void
setMTLArgsImpl
(
MTLComputeCommandEncoder_t
,
int
)
{}
template
<
typename
T
,
typename
...
Args
>
void
setMTLArgsImpl
(
MTLComputeCommandEncoder_t
encoder
,
int
index
,
T
&&
t
,
Args
&&
...
args
)
{
setMTLArg
(
encoder
,
index
,
std
::
forward
<
T
>
(
t
));
setMTLArgsImpl
(
encoder
,
index
+
1
,
std
::
forward
<
Args
>
(
args
)...);
}
template
<
typename
...
Args
>
void
setMTLArgs
(
MTLComputeCommandEncoder_t
encoder
,
MTLComputePipelineState_t
pso
,
Args
&&
...
args
)
{
[
encoder
setComputePipelineState
:
pso
];
setMTLArgsImpl
(
encoder
,
0
,
std
::
forward
<
Args
>
(
args
)...);
}
#endif
mmcv/ops/csrc/common/pytorch_cpp_helper.hpp
View file @
fdeee889
#ifndef PYTORCH_CPP_HELPER
#define PYTORCH_CPP_HELPER
#include <torch/
extension
.h>
#include <torch/
types
.h>
#include <vector>
using
namespace
at
;
#define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0))
#define CHECK_CUDA(x) \
TORCH_CHECK(x.device().is_cuda(), #x " must be a CUDA tensor")
#define CHECK_MLU(x) \
TORCH_CHECK(x.device().type() == at::kMLU, #x " must be a MLU tensor")
#define CHECK_CPU(x) \
TORCH_CHECK(
!
x.device().
is_cuda()
, #x " must be a CPU tensor")
TORCH_CHECK(x.device().
type() == at::kCPU
, #x " must be a CPU tensor")
#define CHECK_CONTIGUOUS(x) \
TORCH_CHECK(x.is_contiguous(), #x " must be contiguous")
#define CHECK_CUDA_INPUT(x) \
CHECK_CUDA(x); \
CHECK_CONTIGUOUS(x)
#define CHECK_MLU_INPUT(x) \
CHECK_MLU(x); \
CHECK_CONTIGUOUS(x)
#define CHECK_CPU_INPUT(x) \
CHECK_CPU(x); \
CHECK_CONTIGUOUS(x)
...
...
mmcv/ops/csrc/common/pytorch_mlu_helper.hpp
0 → 100644
View file @
fdeee889
/*************************************************************************
* Copyright (C) 2021 Cambricon.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*************************************************************************/
#ifndef PYTORCH_MLU_HELPER_HPP_
#define PYTORCH_MLU_HELPER_HPP_
#ifdef MMCV_WITH_MLU
#include "aten.h"
#define NFU_ALIGN_SIZE 128
#define PAD_UP(x, y) (((x) / (y) + (int)((x) % (y) > 0)) * (y))
#define PAD_DOWN(x, y) (((x) / (y)) * (y))
#define CEIL_ALIGN(x, y) (((x) + (y)-1) / (y) * (y))
#endif
#endif // PYTORCH_MLU_HELPER_HPP_
mmcv/ops/csrc/common/utils/spconv/paramsgrid.h
0 → 100644
View file @
fdeee889
// Copyright 2019 Yan Yan
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef PARAMS_GRID_H_
#define PARAMS_GRID_H_
#include <tuple>
#include <vector>
namespace
detail
{
template
<
class
scalar_t
>
int
getTotalSize
(
std
::
vector
<
scalar_t
>
arg
)
{
return
arg
.
size
();
}
template
<
class
scalar_t
,
class
...
TArgs
>
int
getTotalSize
(
std
::
vector
<
scalar_t
>
arg
,
std
::
vector
<
TArgs
>
...
args
)
{
return
arg
.
size
()
*
getTotalSize
(
args
...);
}
template
<
typename
scalar_t
>
int
getSize
(
std
::
vector
<
scalar_t
>
arg
)
{
return
arg
.
size
();
}
template
<
int
Idx
,
class
TT
,
class
scalar_t
>
void
assigner
(
TT
&
src
,
std
::
vector
<
int
>
counter
,
std
::
vector
<
scalar_t
>
&
arg
)
{
std
::
get
<
Idx
>
(
src
)
=
arg
[
counter
[
Idx
]];
}
template
<
int
Idx
,
class
TT
,
class
scalar_t
,
class
...
TArgs
>
void
assigner
(
TT
&
src
,
std
::
vector
<
int
>
counter
,
std
::
vector
<
scalar_t
>
&
arg
,
std
::
vector
<
TArgs
>
&
...
args
)
{
std
::
get
<
Idx
>
(
src
)
=
arg
[
counter
[
Idx
]];
assigner
<
Idx
+
1
>
(
src
,
counter
,
args
...);
}
}
// namespace detail
template
<
class
...
TArgs
>
std
::
vector
<
std
::
tuple
<
TArgs
...
>>
paramsGrid
(
std
::
vector
<
TArgs
>
...
args
)
{
int
length
=
detail
::
getTotalSize
(
args
...);
std
::
vector
<
int
>
sizes
=
{
detail
::
getSize
(
args
)...};
int
size
=
sizes
.
size
();
std
::
vector
<
std
::
tuple
<
TArgs
...
>>
params
(
length
);
std
::
vector
<
int
>
counter
(
size
);
for
(
int
i
=
0
;
i
<
length
;
++
i
)
{
detail
::
assigner
<
0
>
(
params
[
i
],
counter
,
args
...);
counter
[
size
-
1
]
+=
1
;
for
(
int
c
=
size
-
1
;
c
>=
0
;
--
c
)
{
if
(
counter
[
c
]
==
sizes
[
c
]
&&
c
>
0
)
{
counter
[
c
-
1
]
+=
1
;
counter
[
c
]
=
0
;
}
}
}
return
params
;
}
#endif
mmcv/ops/csrc/common/utils/spconv/prettyprint.h
0 → 100644
View file @
fdeee889
// Copyright Louis Delacroix 2010 - 2014.
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)
//
// A pretty printing library for C++
//
// Usage:
// Include this header, and operator<< will "just work".
#ifndef H_PRETTY_PRINT
#define H_PRETTY_PRINT
#include <cstddef>
#include <iterator>
#include <memory>
#include <ostream>
#include <set>
#include <tuple>
#include <type_traits>
#include <unordered_set>
#include <utility>
#include <valarray>
namespace
pretty_print
{
namespace
detail
{
// SFINAE type trait to detect whether T::const_iterator exists.
struct
sfinae_base
{
using
yes
=
char
;
using
no
=
yes
[
2
];
};
template
<
typename
T
>
struct
has_const_iterator
:
private
sfinae_base
{
private:
template
<
typename
C
>
static
yes
&
test
(
typename
C
::
const_iterator
*
);
template
<
typename
C
>
static
no
&
test
(...);
public:
static
const
bool
value
=
sizeof
(
test
<
T
>
(
nullptr
))
==
sizeof
(
yes
);
using
type
=
T
;
};
template
<
typename
T
>
struct
has_begin_end
:
private
sfinae_base
{
private:
template
<
typename
C
>
static
yes
&
f
(
typename
std
::
enable_if
<
std
::
is_same
<
decltype
(
static_cast
<
typename
C
::
const_iterator
(
C
::*
)()
const
>
(
&
C
::
begin
)),
typename
C
::
const_iterator
(
C
::*
)()
const
>::
value
>::
type
*
);
template
<
typename
C
>
static
no
&
f
(...);
template
<
typename
C
>
static
yes
&
g
(
typename
std
::
enable_if
<
std
::
is_same
<
decltype
(
static_cast
<
typename
C
::
const_iterator
(
C
::*
)()
const
>
(
&
C
::
end
)),
typename
C
::
const_iterator
(
C
::*
)()
const
>::
value
,
void
>::
type
*
);
template
<
typename
C
>
static
no
&
g
(...);
public:
static
bool
const
beg_value
=
sizeof
(
f
<
T
>
(
nullptr
))
==
sizeof
(
yes
);
static
bool
const
end_value
=
sizeof
(
g
<
T
>
(
nullptr
))
==
sizeof
(
yes
);
};
}
// namespace detail
// Holds the delimiter values for a specific character type
template
<
typename
TChar
>
struct
delimiters_values
{
using
char_type
=
TChar
;
const
char_type
*
prefix
;
const
char_type
*
delimiter
;
const
char_type
*
postfix
;
};
// Defines the delimiter values for a specific container and character type
template
<
typename
T
,
typename
TChar
>
struct
delimiters
{
using
type
=
delimiters_values
<
TChar
>
;
static
const
type
values
;
};
// Functor to print containers. You can use this directly if you want
// to specify a non-default delimiters type. The printing logic can
// be customized by specializing the nested template.
template
<
typename
T
,
typename
TChar
=
char
,
typename
TCharTraits
=
::
std
::
char_traits
<
TChar
>,
typename
TDelimiters
=
delimiters
<
T
,
TChar
>>
struct
print_container_helper
{
using
delimiters_type
=
TDelimiters
;
using
ostream_type
=
std
::
basic_ostream
<
TChar
,
TCharTraits
>
;
template
<
typename
U
>
struct
printer
{
static
void
print_body
(
const
U
&
c
,
ostream_type
&
stream
)
{
using
std
::
begin
;
using
std
::
end
;
auto
it
=
begin
(
c
);
const
auto
the_end
=
end
(
c
);
if
(
it
!=
the_end
)
{
for
(;;)
{
stream
<<
*
it
;
if
(
++
it
==
the_end
)
break
;
if
(
delimiters_type
::
values
.
delimiter
!=
NULL
)
stream
<<
delimiters_type
::
values
.
delimiter
;
}
}
}
};
print_container_helper
(
const
T
&
container
)
:
container_
(
container
)
{}
inline
void
operator
()(
ostream_type
&
stream
)
const
{
if
(
delimiters_type
::
values
.
prefix
!=
NULL
)
stream
<<
delimiters_type
::
values
.
prefix
;
printer
<
T
>::
print_body
(
container_
,
stream
);
if
(
delimiters_type
::
values
.
postfix
!=
NULL
)
stream
<<
delimiters_type
::
values
.
postfix
;
}
private:
const
T
&
container_
;
};
// Specialization for pairs
template
<
typename
T
,
typename
TChar
,
typename
TCharTraits
,
typename
TDelimiters
>
template
<
typename
T1
,
typename
T2
>
struct
print_container_helper
<
T
,
TChar
,
TCharTraits
,
TDelimiters
>::
printer
<
std
::
pair
<
T1
,
T2
>>
{
using
ostream_type
=
typename
print_container_helper
<
T
,
TChar
,
TCharTraits
,
TDelimiters
>::
ostream_type
;
static
void
print_body
(
const
std
::
pair
<
T1
,
T2
>
&
c
,
ostream_type
&
stream
)
{
stream
<<
c
.
first
;
if
(
print_container_helper
<
T
,
TChar
,
TCharTraits
,
TDelimiters
>::
delimiters_type
::
values
.
delimiter
!=
NULL
)
stream
<<
print_container_helper
<
T
,
TChar
,
TCharTraits
,
TDelimiters
>::
delimiters_type
::
values
.
delimiter
;
stream
<<
c
.
second
;
}
};
// Specialization for tuples
template
<
typename
T
,
typename
TChar
,
typename
TCharTraits
,
typename
TDelimiters
>
template
<
typename
...
Args
>
struct
print_container_helper
<
T
,
TChar
,
TCharTraits
,
TDelimiters
>::
printer
<
std
::
tuple
<
Args
...
>>
{
using
ostream_type
=
typename
print_container_helper
<
T
,
TChar
,
TCharTraits
,
TDelimiters
>::
ostream_type
;
using
element_type
=
std
::
tuple
<
Args
...
>
;
template
<
std
::
size_t
I
>
struct
Int
{};
static
void
print_body
(
const
element_type
&
c
,
ostream_type
&
stream
)
{
tuple_print
(
c
,
stream
,
Int
<
0
>
());
}
static
void
tuple_print
(
const
element_type
&
,
ostream_type
&
,
Int
<
sizeof
...(
Args
)
>
)
{}
static
void
tuple_print
(
const
element_type
&
c
,
ostream_type
&
stream
,
typename
std
::
conditional
<
sizeof
...(
Args
)
!=
0
,
Int
<
0
>
,
std
::
nullptr_t
>::
type
)
{
stream
<<
std
::
get
<
0
>
(
c
);
tuple_print
(
c
,
stream
,
Int
<
1
>
());
}
template
<
std
::
size_t
N
>
static
void
tuple_print
(
const
element_type
&
c
,
ostream_type
&
stream
,
Int
<
N
>
)
{
if
(
print_container_helper
<
T
,
TChar
,
TCharTraits
,
TDelimiters
>::
delimiters_type
::
values
.
delimiter
!=
NULL
)
stream
<<
print_container_helper
<
T
,
TChar
,
TCharTraits
,
TDelimiters
>::
delimiters_type
::
values
.
delimiter
;
stream
<<
std
::
get
<
N
>
(
c
);
tuple_print
(
c
,
stream
,
Int
<
N
+
1
>
());
}
};
// Prints a print_container_helper to the specified stream.
template
<
typename
T
,
typename
TChar
,
typename
TCharTraits
,
typename
TDelimiters
>
inline
std
::
basic_ostream
<
TChar
,
TCharTraits
>
&
operator
<<
(
std
::
basic_ostream
<
TChar
,
TCharTraits
>
&
stream
,
const
print_container_helper
<
T
,
TChar
,
TCharTraits
,
TDelimiters
>
&
helper
)
{
helper
(
stream
);
return
stream
;
}
// Basic is_container template; specialize to derive from std::true_type for all
// desired container types
template
<
typename
T
>
struct
is_container
:
public
std
::
integral_constant
<
bool
,
detail
::
has_const_iterator
<
T
>::
value
&&
detail
::
has_begin_end
<
T
>::
beg_value
&&
detail
::
has_begin_end
<
T
>::
end_value
>
{};
template
<
typename
T
,
std
::
size_t
N
>
struct
is_container
<
T
[
N
]
>
:
std
::
true_type
{};
template
<
std
::
size_t
N
>
struct
is_container
<
char
[
N
]
>
:
std
::
false_type
{};
template
<
typename
T
>
struct
is_container
<
std
::
valarray
<
T
>>
:
std
::
true_type
{};
template
<
typename
T1
,
typename
T2
>
struct
is_container
<
std
::
pair
<
T1
,
T2
>>
:
std
::
true_type
{};
template
<
typename
...
Args
>
struct
is_container
<
std
::
tuple
<
Args
...
>>
:
std
::
true_type
{};
// Default delimiters
template
<
typename
T
>
struct
delimiters
<
T
,
char
>
{
static
const
delimiters_values
<
char
>
values
;
};
template
<
typename
T
>
const
delimiters_values
<
char
>
delimiters
<
T
,
char
>::
values
=
{
"["
,
", "
,
"]"
};
template
<
typename
T
>
struct
delimiters
<
T
,
wchar_t
>
{
static
const
delimiters_values
<
wchar_t
>
values
;
};
template
<
typename
T
>
const
delimiters_values
<
wchar_t
>
delimiters
<
T
,
wchar_t
>::
values
=
{
L"["
,
L", "
,
L"]"
};
// Delimiters for (multi)set and unordered_(multi)set
template
<
typename
T
,
typename
TComp
,
typename
TAllocator
>
struct
delimiters
<::
std
::
set
<
T
,
TComp
,
TAllocator
>
,
char
>
{
static
const
delimiters_values
<
char
>
values
;
};
template
<
typename
T
,
typename
TComp
,
typename
TAllocator
>
const
delimiters_values
<
char
>
delimiters
<::
std
::
set
<
T
,
TComp
,
TAllocator
>
,
char
>::
values
=
{
"{"
,
", "
,
"}"
};
template
<
typename
T
,
typename
TComp
,
typename
TAllocator
>
struct
delimiters
<::
std
::
set
<
T
,
TComp
,
TAllocator
>
,
wchar_t
>
{
static
const
delimiters_values
<
wchar_t
>
values
;
};
template
<
typename
T
,
typename
TComp
,
typename
TAllocator
>
const
delimiters_values
<
wchar_t
>
delimiters
<::
std
::
set
<
T
,
TComp
,
TAllocator
>
,
wchar_t
>::
values
=
{
L"{"
,
L", "
,
L"}"
};
template
<
typename
T
,
typename
TComp
,
typename
TAllocator
>
struct
delimiters
<::
std
::
multiset
<
T
,
TComp
,
TAllocator
>
,
char
>
{
static
const
delimiters_values
<
char
>
values
;
};
template
<
typename
T
,
typename
TComp
,
typename
TAllocator
>
const
delimiters_values
<
char
>
delimiters
<::
std
::
multiset
<
T
,
TComp
,
TAllocator
>
,
char
>::
values
=
{
"{"
,
", "
,
"}"
};
template
<
typename
T
,
typename
TComp
,
typename
TAllocator
>
struct
delimiters
<::
std
::
multiset
<
T
,
TComp
,
TAllocator
>
,
wchar_t
>
{
static
const
delimiters_values
<
wchar_t
>
values
;
};
template
<
typename
T
,
typename
TComp
,
typename
TAllocator
>
const
delimiters_values
<
wchar_t
>
delimiters
<::
std
::
multiset
<
T
,
TComp
,
TAllocator
>
,
wchar_t
>::
values
=
{
L"{"
,
L", "
,
L"}"
};
template
<
typename
T
,
typename
THash
,
typename
TEqual
,
typename
TAllocator
>
struct
delimiters
<::
std
::
unordered_set
<
T
,
THash
,
TEqual
,
TAllocator
>
,
char
>
{
static
const
delimiters_values
<
char
>
values
;
};
template
<
typename
T
,
typename
THash
,
typename
TEqual
,
typename
TAllocator
>
const
delimiters_values
<
char
>
delimiters
<
::
std
::
unordered_set
<
T
,
THash
,
TEqual
,
TAllocator
>
,
char
>::
values
=
{
"{"
,
", "
,
"}"
};
template
<
typename
T
,
typename
THash
,
typename
TEqual
,
typename
TAllocator
>
struct
delimiters
<::
std
::
unordered_set
<
T
,
THash
,
TEqual
,
TAllocator
>
,
wchar_t
>
{
static
const
delimiters_values
<
wchar_t
>
values
;
};
template
<
typename
T
,
typename
THash
,
typename
TEqual
,
typename
TAllocator
>
const
delimiters_values
<
wchar_t
>
delimiters
<
::
std
::
unordered_set
<
T
,
THash
,
TEqual
,
TAllocator
>
,
wchar_t
>::
values
=
{
L"{"
,
L", "
,
L"}"
};
template
<
typename
T
,
typename
THash
,
typename
TEqual
,
typename
TAllocator
>
struct
delimiters
<::
std
::
unordered_multiset
<
T
,
THash
,
TEqual
,
TAllocator
>
,
char
>
{
static
const
delimiters_values
<
char
>
values
;
};
template
<
typename
T
,
typename
THash
,
typename
TEqual
,
typename
TAllocator
>
const
delimiters_values
<
char
>
delimiters
<
::
std
::
unordered_multiset
<
T
,
THash
,
TEqual
,
TAllocator
>
,
char
>::
values
=
{
"{"
,
", "
,
"}"
};
template
<
typename
T
,
typename
THash
,
typename
TEqual
,
typename
TAllocator
>
struct
delimiters
<::
std
::
unordered_multiset
<
T
,
THash
,
TEqual
,
TAllocator
>
,
wchar_t
>
{
static
const
delimiters_values
<
wchar_t
>
values
;
};
template
<
typename
T
,
typename
THash
,
typename
TEqual
,
typename
TAllocator
>
const
delimiters_values
<
wchar_t
>
delimiters
<::
std
::
unordered_multiset
<
T
,
THash
,
TEqual
,
TAllocator
>
,
wchar_t
>::
values
=
{
L"{"
,
L", "
,
L"}"
};
// Delimiters for pair and tuple
template
<
typename
T1
,
typename
T2
>
struct
delimiters
<
std
::
pair
<
T1
,
T2
>
,
char
>
{
static
const
delimiters_values
<
char
>
values
;
};
template
<
typename
T1
,
typename
T2
>
const
delimiters_values
<
char
>
delimiters
<
std
::
pair
<
T1
,
T2
>
,
char
>::
values
=
{
"("
,
", "
,
")"
};
template
<
typename
T1
,
typename
T2
>
struct
delimiters
<::
std
::
pair
<
T1
,
T2
>
,
wchar_t
>
{
static
const
delimiters_values
<
wchar_t
>
values
;
};
template
<
typename
T1
,
typename
T2
>
const
delimiters_values
<
wchar_t
>
delimiters
<::
std
::
pair
<
T1
,
T2
>
,
wchar_t
>::
values
=
{
L"("
,
L", "
,
L")"
};
template
<
typename
...
Args
>
struct
delimiters
<
std
::
tuple
<
Args
...
>
,
char
>
{
static
const
delimiters_values
<
char
>
values
;
};
template
<
typename
...
Args
>
const
delimiters_values
<
char
>
delimiters
<
std
::
tuple
<
Args
...
>
,
char
>::
values
=
{
"("
,
", "
,
")"
};
template
<
typename
...
Args
>
struct
delimiters
<::
std
::
tuple
<
Args
...
>
,
wchar_t
>
{
static
const
delimiters_values
<
wchar_t
>
values
;
};
template
<
typename
...
Args
>
const
delimiters_values
<
wchar_t
>
delimiters
<::
std
::
tuple
<
Args
...
>
,
wchar_t
>::
values
=
{
L"("
,
L", "
,
L")"
};
// Type-erasing helper class for easy use of custom delimiters.
// Requires TCharTraits = std::char_traits<TChar> and TChar = char or wchar_t,
// and MyDelims needs to be defined for TChar. Usage: "cout <<
// pretty_print::custom_delims<MyDelims>(x)".
struct
custom_delims_base
{
virtual
~
custom_delims_base
()
{}
virtual
std
::
ostream
&
stream
(
::
std
::
ostream
&
)
=
0
;
virtual
std
::
wostream
&
stream
(
::
std
::
wostream
&
)
=
0
;
};
template
<
typename
T
,
typename
Delims
>
struct
custom_delims_wrapper
:
custom_delims_base
{
custom_delims_wrapper
(
const
T
&
t_
)
:
t
(
t_
)
{}
std
::
ostream
&
stream
(
std
::
ostream
&
s
)
{
return
s
<<
print_container_helper
<
T
,
char
,
std
::
char_traits
<
char
>
,
Delims
>
(
t
);
}
std
::
wostream
&
stream
(
std
::
wostream
&
s
)
{
return
s
<<
print_container_helper
<
T
,
wchar_t
,
std
::
char_traits
<
wchar_t
>
,
Delims
>
(
t
);
}
private:
const
T
&
t
;
};
template
<
typename
Delims
>
struct
custom_delims
{
template
<
typename
Container
>
custom_delims
(
const
Container
&
c
)
:
base
(
new
custom_delims_wrapper
<
Container
,
Delims
>
(
c
))
{}
std
::
unique_ptr
<
custom_delims_base
>
base
;
};
template
<
typename
TChar
,
typename
TCharTraits
,
typename
Delims
>
inline
std
::
basic_ostream
<
TChar
,
TCharTraits
>
&
operator
<<
(
std
::
basic_ostream
<
TChar
,
TCharTraits
>
&
s
,
const
custom_delims
<
Delims
>
&
p
)
{
return
p
.
base
->
stream
(
s
);
}
// A wrapper for a C-style array given as pointer-plus-size.
// Usage: std::cout << pretty_print_array(arr, n) << std::endl;
template
<
typename
T
>
struct
array_wrapper_n
{
typedef
const
T
*
const_iterator
;
typedef
T
value_type
;
array_wrapper_n
(
const
T
*
const
a
,
size_t
n
)
:
_array
(
a
),
_n
(
n
)
{}
inline
const_iterator
begin
()
const
{
return
_array
;
}
inline
const_iterator
end
()
const
{
return
_array
+
_n
;
}
private:
const
T
*
const
_array
;
size_t
_n
;
};
// A wrapper for hash-table based containers that offer local iterators to each
// bucket. Usage: std::cout << bucket_print(m, 4) << std::endl; (Prints bucket
// 5 of container m.)
template
<
typename
T
>
struct
bucket_print_wrapper
{
typedef
typename
T
::
const_local_iterator
const_iterator
;
typedef
typename
T
::
size_type
size_type
;
const_iterator
begin
()
const
{
return
m_map
.
cbegin
(
n
);
}
const_iterator
end
()
const
{
return
m_map
.
cend
(
n
);
}
bucket_print_wrapper
(
const
T
&
m
,
size_type
bucket
)
:
m_map
(
m
),
n
(
bucket
)
{}
private:
const
T
&
m_map
;
const
size_type
n
;
};
}
// namespace pretty_print
// Global accessor functions for the convenience wrappers
template
<
typename
T
>
inline
pretty_print
::
array_wrapper_n
<
T
>
pretty_print_array
(
const
T
*
const
a
,
size_t
n
)
{
return
pretty_print
::
array_wrapper_n
<
T
>
(
a
,
n
);
}
template
<
typename
T
>
pretty_print
::
bucket_print_wrapper
<
T
>
bucket_print
(
const
T
&
m
,
typename
T
::
size_type
n
)
{
return
pretty_print
::
bucket_print_wrapper
<
T
>
(
m
,
n
);
}
// Main magic entry point: An overload snuck into namespace std.
// Can we do better?
namespace
std
{
// Prints a container to the stream using default delimiters
template
<
typename
T
,
typename
TChar
,
typename
TCharTraits
>
inline
typename
enable_if
<::
pretty_print
::
is_container
<
T
>::
value
,
basic_ostream
<
TChar
,
TCharTraits
>
&>::
type
operator
<<
(
basic_ostream
<
TChar
,
TCharTraits
>
&
stream
,
const
T
&
container
)
{
return
stream
<<
::
pretty_print
::
print_container_helper
<
T
,
TChar
,
TCharTraits
>
(
container
);
}
}
// namespace std
#endif // H_PRETTY_PRINT
mmcv/ops/csrc/common/utils/spconv/pybind11_utils.h
0 → 100644
View file @
fdeee889
// Copyright 2019 Yan Yan
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <pybind11/embed.h>
#include <pybind11/functional.h>
#include <pybind11/numpy.h>
#include <pybind11/pybind11.h>
#include <pybind11/stl.h>
#include <spconv/tensorview/tensorview.h>
#include <algorithm>
#include <iostream>
namespace
py
=
pybind11
;
template
<
typename
scalar_t
,
typename
TPyObject
>
std
::
vector
<
scalar_t
>
array2Vector
(
TPyObject
arr
)
{
py
::
array
arr_np
=
arr
;
size_t
size
=
arr
.
attr
(
"size"
).
template
cast
<
size_t
>();
py
::
array_t
<
scalar_t
>
arr_cc
=
arr_np
;
std
::
vector
<
scalar_t
>
data
(
arr_cc
.
data
(),
arr_cc
.
data
()
+
size
);
return
data
;
}
template
<
typename
scalar_t
>
std
::
vector
<
scalar_t
>
arrayT2Vector
(
py
::
array_t
<
scalar_t
>
arr
)
{
std
::
vector
<
scalar_t
>
data
(
arr
.
data
(),
arr
.
data
()
+
arr
.
size
());
return
data
;
}
template
<
typename
scalar_t
,
typename
TPyObject
>
tv
::
TensorView
<
scalar_t
>
array2TensorView
(
TPyObject
arr
)
{
py
::
array
arr_np
=
arr
;
py
::
array_t
<
scalar_t
>
arr_cc
=
arr_np
;
tv
::
Shape
shape
;
for
(
int
i
=
0
;
i
<
arr_cc
.
ndim
();
++
i
)
{
shape
.
push_back
(
arr_cc
.
shape
(
i
));
}
return
tv
::
TensorView
<
scalar_t
>
(
arr_cc
.
mutable_data
(),
shape
);
}
template
<
typename
scalar_t
>
tv
::
TensorView
<
scalar_t
>
arrayT2TensorView
(
py
::
array_t
<
scalar_t
>
arr
)
{
tv
::
Shape
shape
;
for
(
int
i
=
0
;
i
<
arr
.
ndim
();
++
i
)
{
shape
.
push_back
(
arr
.
shape
(
i
));
}
return
tv
::
TensorView
<
scalar_t
>
(
arr
.
mutable_data
(),
shape
);
}
mmcv/ops/csrc/common/utils/spconv/spconv/geometry.h
0 → 100644
View file @
fdeee889
// Copyright 2019 Yan Yan
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef SPCONV_GEOMETRY_H_
#define SPCONV_GEOMETRY_H_
#include <utils/spconv/tensorview/tensorview.h>
#include <iostream>
#include <limits>
template
<
typename
Index
,
unsigned
NDim
>
TV_HOST_DEVICE
Index
getValidOutPos
(
const
Index
*
input_pos
,
const
Index
*
kernelSize
,
const
Index
*
stride
,
const
Index
*
padding
,
const
Index
*
dilation
,
const
Index
*
outSpatialShape
,
Index
*
out
)
{
Index
lowers
[
NDim
];
Index
uppers
[
NDim
];
Index
counter
[
NDim
];
Index
counterSize
[
NDim
];
Index
pointCounter
=
0
;
Index
val
;
Index
numPoints
=
1
;
Index
m
,
offset
;
bool
valid
=
false
;
#pragma unroll
for
(
int
i
=
0
;
i
<
NDim
;
++
i
)
{
lowers
[
i
]
=
(
input_pos
[
i
]
-
(
kernelSize
[
i
]
-
1
)
*
dilation
[
i
]
-
1
+
stride
[
i
]
+
padding
[
i
])
/
stride
[
i
];
uppers
[
i
]
=
(
input_pos
[
i
]
+
padding
[
i
])
/
stride
[
i
];
}
#pragma unroll
for
(
unsigned
i
=
0
;
i
<
NDim
;
++
i
)
{
counterSize
[
i
]
=
((
uppers
[
i
]
-
lowers
[
i
])
/
dilation
[
i
]
+
1
);
numPoints
*=
counterSize
[
i
];
}
#pragma unroll
for
(
int
i
=
0
;
i
<
NDim
;
++
i
)
{
counter
[
i
]
=
0
;
}
for
(
int
i
=
0
;
i
<
numPoints
;
++
i
)
{
valid
=
true
;
m
=
1
;
offset
=
0
;
#pragma unroll
for
(
int
j
=
NDim
-
1
;
j
>=
0
;
--
j
)
{
val
=
uppers
[
j
]
-
counter
[
j
]
*
dilation
[
j
];
out
[
pointCounter
*
(
NDim
+
1
)
+
j
]
=
val
;
if
(
val
<
0
||
(
val
>
outSpatialShape
[
j
]
-
1
))
{
valid
=
false
;
// break;
}
offset
+=
m
*
(
input_pos
[
j
]
-
val
*
stride
[
j
]
+
padding
[
j
])
/
dilation
[
j
];
m
*=
kernelSize
[
j
];
}
out
[
pointCounter
*
(
NDim
+
1
)
+
NDim
]
=
offset
;
if
(
valid
)
++
pointCounter
;
counter
[
NDim
-
1
]
+=
1
;
#pragma unroll
for
(
int
c
=
NDim
-
1
;
c
>=
0
;
--
c
)
{
if
(
counter
[
c
]
==
counterSize
[
c
]
&&
c
>
0
)
{
counter
[
c
-
1
]
+=
1
;
counter
[
c
]
=
0
;
}
}
}
return
pointCounter
;
}
template
<
typename
Index
,
unsigned
NDim
>
TV_HOST_DEVICE
Index
getValidOutPosTranspose
(
const
Index
*
input_pos
,
const
Index
*
kernelSize
,
const
Index
*
stride
,
const
Index
*
padding
,
const
Index
*
dilation
,
const
Index
*
outSpatialShape
,
Index
*
out
)
{
Index
lowers
[
NDim
];
Index
uppers
[
NDim
];
Index
counter
[
NDim
];
Index
counterSize
[
NDim
];
Index
pointCounter
=
0
;
Index
val
;
Index
numPoints
=
1
;
Index
m
,
offset
;
bool
valid
=
false
;
#pragma unroll
for
(
int
i
=
0
;
i
<
NDim
;
++
i
)
{
lowers
[
i
]
=
input_pos
[
i
]
*
stride
[
i
]
-
padding
[
i
];
uppers
[
i
]
=
lowers
[
i
]
+
(
kernelSize
[
i
]
-
1
)
*
dilation
[
i
];
}
#pragma unroll
for
(
unsigned
i
=
0
;
i
<
NDim
;
++
i
)
{
counterSize
[
i
]
=
((
uppers
[
i
]
-
lowers
[
i
])
/
dilation
[
i
]
+
1
);
numPoints
*=
counterSize
[
i
];
}
#pragma unroll
for
(
int
i
=
0
;
i
<
NDim
;
++
i
)
{
counter
[
i
]
=
0
;
}
for
(
int
i
=
0
;
i
<
numPoints
;
++
i
)
{
valid
=
true
;
m
=
1
;
offset
=
0
;
#pragma unroll
for
(
int
j
=
NDim
-
1
;
j
>=
0
;
--
j
)
{
val
=
uppers
[
j
]
-
counter
[
j
]
*
dilation
[
j
];
out
[
pointCounter
*
(
NDim
+
1
)
+
j
]
=
val
;
if
(
val
<
0
||
(
val
>
outSpatialShape
[
j
]
-
1
))
{
valid
=
false
;
}
offset
+=
m
*
(
val
-
lowers
[
j
])
/
dilation
[
j
];
m
*=
kernelSize
[
j
];
}
out
[
pointCounter
*
(
NDim
+
1
)
+
NDim
]
=
offset
;
if
(
valid
)
++
pointCounter
;
counter
[
NDim
-
1
]
+=
1
;
#pragma unroll
for
(
int
c
=
NDim
-
1
;
c
>=
0
;
--
c
)
{
if
(
counter
[
c
]
==
counterSize
[
c
]
&&
c
>
0
)
{
counter
[
c
-
1
]
+=
1
;
counter
[
c
]
=
0
;
}
}
}
return
pointCounter
;
}
template
<
typename
Index
,
typename
IndexGrid
,
unsigned
NDim
>
Index
getIndicePairsConv
(
tv
::
TensorView
<
const
Index
>
indicesIn
,
tv
::
TensorView
<
Index
>
indicesOut
,
tv
::
TensorView
<
IndexGrid
>
gridsOut
,
tv
::
TensorView
<
Index
>
indicePairs
,
tv
::
TensorView
<
Index
>
indiceNum
,
const
Index
*
kernelSize
,
const
Index
*
stride
,
const
Index
*
padding
,
const
Index
*
dilation
,
const
Index
*
outSpatialShape
)
{
// indicesOut: num_active * kernelVolume * (NDim + 1)
Index
numAct
=
0
;
auto
numActIn
=
indicesIn
.
dim
(
0
);
Index
batchIdx
=
0
;
Index
spatialVolume
=
1
;
#pragma unroll
for
(
int
i
=
0
;
i
<
NDim
;
++
i
)
{
spatialVolume
*=
outSpatialShape
[
i
];
}
Index
kernelVolume
=
1
;
#pragma unroll
for
(
int
i
=
0
;
i
<
NDim
;
++
i
)
{
kernelVolume
*=
kernelSize
[
i
];
}
Index
numValidPoints
=
0
;
std
::
vector
<
Index
>
validPoints_
(
kernelVolume
*
(
NDim
+
1
));
Index
*
validPoints
=
validPoints_
.
data
();
Index
*
pointPtr
=
nullptr
;
for
(
int
j
=
0
;
j
<
numActIn
;
++
j
)
{
batchIdx
=
indicesIn
(
j
,
0
);
numValidPoints
=
getValidOutPos
<
Index
,
NDim
>
(
indicesIn
.
data
()
+
j
*
(
NDim
+
1
)
+
1
,
kernelSize
,
stride
,
padding
,
dilation
,
outSpatialShape
,
validPoints
);
for
(
Index
i
=
0
;
i
<
numValidPoints
;
++
i
)
{
pointPtr
=
validPoints
+
i
*
(
NDim
+
1
);
auto
offset
=
pointPtr
[
NDim
];
auto
index
=
tv
::
rowArrayIdx
<
Index
,
NDim
>
(
pointPtr
,
outSpatialShape
)
+
spatialVolume
*
batchIdx
;
if
(
gridsOut
[
index
]
==
-
1
)
{
for
(
unsigned
k
=
1
;
k
<
NDim
+
1
;
++
k
)
{
indicesOut
(
numAct
,
k
)
=
pointPtr
[
k
-
1
];
}
indicesOut
(
numAct
,
0
)
=
batchIdx
;
gridsOut
[
index
]
=
numAct
++
;
}
// indicePairs: [K, 2, L]
indicePairs
(
offset
,
0
,
indiceNum
[
offset
])
=
j
;
indicePairs
(
offset
,
1
,
indiceNum
[
offset
]
++
)
=
gridsOut
[
index
];
}
}
return
numAct
;
}
template
<
typename
Index
,
typename
IndexGrid
,
unsigned
NDim
>
Index
getIndicePairsDeConv
(
tv
::
TensorView
<
const
Index
>
indicesIn
,
tv
::
TensorView
<
Index
>
indicesOut
,
tv
::
TensorView
<
IndexGrid
>
gridsOut
,
tv
::
TensorView
<
Index
>
indicePairs
,
tv
::
TensorView
<
Index
>
indiceNum
,
const
Index
*
kernelSize
,
const
Index
*
stride
,
const
Index
*
padding
,
const
Index
*
dilation
,
const
Index
*
outSpatialShape
)
{
Index
numAct
=
0
;
auto
numActIn
=
indicesIn
.
dim
(
0
);
Index
batchIdx
=
0
;
Index
spatialVolume
=
1
;
#pragma unroll
for
(
int
i
=
0
;
i
<
NDim
;
++
i
)
{
spatialVolume
*=
outSpatialShape
[
i
];
}
Index
kernelVolume
=
1
;
#pragma unroll
for
(
int
i
=
0
;
i
<
NDim
;
++
i
)
{
kernelVolume
*=
kernelSize
[
i
];
}
Index
numValidPoints
=
0
;
std
::
vector
<
Index
>
validPoints_
(
kernelVolume
*
(
NDim
+
1
));
Index
*
validPoints
=
validPoints_
.
data
();
Index
*
pointPtr
=
nullptr
;
for
(
int
j
=
0
;
j
<
numActIn
;
++
j
)
{
batchIdx
=
indicesIn
(
j
,
0
);
numValidPoints
=
getValidOutPosTranspose
<
Index
,
NDim
>
(
indicesIn
.
data
()
+
j
*
(
NDim
+
1
)
+
1
,
kernelSize
,
stride
,
padding
,
dilation
,
outSpatialShape
,
validPoints
);
for
(
Index
i
=
0
;
i
<
numValidPoints
;
++
i
)
{
pointPtr
=
validPoints
+
i
*
(
NDim
+
1
);
auto
offset
=
pointPtr
[
NDim
];
auto
index
=
tv
::
rowArrayIdx
<
Index
,
NDim
>
(
pointPtr
,
outSpatialShape
)
+
spatialVolume
*
batchIdx
;
if
(
gridsOut
[
index
]
==
-
1
)
{
for
(
unsigned
k
=
1
;
k
<
NDim
+
1
;
++
k
)
{
indicesOut
(
numAct
,
k
)
=
pointPtr
[
k
-
1
];
}
indicesOut
(
numAct
,
0
)
=
batchIdx
;
gridsOut
[
index
]
=
numAct
++
;
}
// indicePairs: [K, 2, L]
indicePairs
(
offset
,
0
,
indiceNum
[
offset
])
=
j
;
indicePairs
(
offset
,
1
,
indiceNum
[
offset
]
++
)
=
gridsOut
[
index
];
}
}
return
numAct
;
}
template
<
typename
Index
,
typename
IndexGrid
,
unsigned
NDim
>
Index
getIndicePairsSubM
(
tv
::
TensorView
<
const
Index
>
indicesIn
,
tv
::
TensorView
<
IndexGrid
>
gridsOut
,
tv
::
TensorView
<
Index
>
indicePairs
,
tv
::
TensorView
<
Index
>
indiceNum
,
const
Index
*
const
kernelSize
,
const
Index
*
const
stride
,
const
Index
*
const
padding
,
const
Index
*
dilation
,
const
Index
*
const
outSpatialShape
)
{
Index
numAct
=
0
;
auto
numActIn
=
indicesIn
.
dim
(
0
);
Index
batchIdx
=
0
;
Index
spatialVolume
=
1
;
#pragma unroll
for
(
int
i
=
0
;
i
<
NDim
;
++
i
)
{
spatialVolume
*=
outSpatialShape
[
i
];
}
Index
kernelVolume
=
1
;
#pragma unroll
for
(
int
i
=
0
;
i
<
NDim
;
++
i
)
{
kernelVolume
*=
kernelSize
[
i
];
}
Index
numValidPoints
=
0
;
// Index validPoints[kernelVolume * (NDim + 1)];
std
::
vector
<
Index
>
validPoints_
(
kernelVolume
*
(
NDim
+
1
));
Index
*
validPoints
=
validPoints_
.
data
();
Index
*
pointPtr
=
nullptr
;
Index
index
=
0
;
for
(
int
j
=
0
;
j
<
numActIn
;
++
j
)
{
index
=
tv
::
rowArrayIdx
<
Index
,
NDim
>
(
indicesIn
.
data
()
+
j
*
(
NDim
+
1
)
+
1
,
outSpatialShape
)
+
spatialVolume
*
indicesIn
(
j
,
0
);
gridsOut
[
index
]
=
j
;
}
for
(
int
j
=
0
;
j
<
numActIn
;
++
j
)
{
numValidPoints
=
getValidOutPos
<
Index
,
NDim
>
(
indicesIn
.
data
()
+
j
*
(
NDim
+
1
)
+
1
,
kernelSize
,
stride
,
padding
,
dilation
,
outSpatialShape
,
validPoints
);
for
(
Index
i
=
0
;
i
<
numValidPoints
;
++
i
)
{
pointPtr
=
validPoints
+
i
*
(
NDim
+
1
);
auto
offset
=
pointPtr
[
NDim
];
index
=
tv
::
rowArrayIdx
<
Index
,
NDim
>
(
pointPtr
,
outSpatialShape
)
+
spatialVolume
*
indicesIn
(
j
,
0
);
if
(
gridsOut
[
index
]
>
-
1
)
{
indicePairs
(
offset
,
0
,
indiceNum
[
offset
])
=
j
;
indicePairs
(
offset
,
1
,
indiceNum
[
offset
]
++
)
=
gridsOut
[
index
];
}
}
}
return
numActIn
;
}
#endif
mmcv/ops/csrc/common/utils/spconv/spconv/indice.h
0 → 100644
View file @
fdeee889
// Copyright 2019 Yan Yan
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef SPARSE_CONV_INDICE_FUNCTOR_H_
#define SPARSE_CONV_INDICE_FUNCTOR_H_
#include <utils/spconv/tensorview/tensorview.h>
namespace
functor
{
template
<
typename
Device
,
typename
Index
,
typename
IndexGrid
,
unsigned
NDim
>
struct
CreateConvIndicePairFunctorP1
{
Index
operator
()(
const
Device
&
d
,
tv
::
TensorView
<
const
Index
>
indicesIn
,
tv
::
TensorView
<
Index
>
indicesOut
,
tv
::
TensorView
<
IndexGrid
>
gridsOut
,
tv
::
TensorView
<
Index
>
indicePairs
,
tv
::
TensorView
<
Index
>
indiceNum
,
tv
::
TensorView
<
Index
>
indicePairUnique
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
kernelSize
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
stride
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
padding
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
dilation
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
outSpatialShape
,
bool
transpose
);
};
template
<
typename
Device
,
typename
Index
,
typename
IndexGrid
,
unsigned
NDim
>
struct
CreateConvIndicePairFunctorP2
{
Index
operator
()(
const
Device
&
d
,
tv
::
TensorView
<
const
Index
>
indicesIn
,
tv
::
TensorView
<
Index
>
indicesOut
,
tv
::
TensorView
<
IndexGrid
>
gridsOut
,
tv
::
TensorView
<
Index
>
indicePairs
,
tv
::
TensorView
<
Index
>
indiceNum
,
tv
::
TensorView
<
Index
>
indicePairUnique
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
outSpatialShape
,
bool
transpose
,
bool
resetGrid
=
false
);
};
template
<
typename
Device
,
typename
Index
,
typename
IndexGrid
,
unsigned
NDim
>
struct
CreateConvIndicePairFunctor
{
Index
operator
()(
const
Device
&
d
,
tv
::
TensorView
<
const
Index
>
indicesIn
,
tv
::
TensorView
<
Index
>
indicesOut
,
tv
::
TensorView
<
IndexGrid
>
gridsOut
,
tv
::
TensorView
<
Index
>
indicePairs
,
tv
::
TensorView
<
Index
>
indiceNum
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
kernelSize
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
stride
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
padding
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
dilation
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
outSpatialShape
,
bool
transpose
,
bool
resetGrid
=
false
);
};
template
<
typename
Device
,
typename
Index
,
typename
IndexGrid
,
unsigned
NDim
>
struct
CreateSubMIndicePairFunctor
{
Index
operator
()(
const
Device
&
d
,
tv
::
TensorView
<
const
Index
>
indicesIn
,
tv
::
TensorView
<
IndexGrid
>
gridsOut
,
tv
::
TensorView
<
Index
>
indicePairs
,
tv
::
TensorView
<
Index
>
indiceNum
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
kernelSize
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
stride
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
padding
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
dilation
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
outSpatialShape
,
bool
transpose
,
bool
resetGrid
=
false
);
};
}
// namespace functor
#endif
mmcv/ops/csrc/common/utils/spconv/spconv/maxpool.h
0 → 100644
View file @
fdeee889
// Copyright 2019 Yan Yan
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef SPARSE_MAXPOOL_FUNCTOR_H_
#define SPARSE_MAXPOOL_FUNCTOR_H_
#include <utils/spconv/tensorview/tensorview.h>
namespace
functor
{
template
<
typename
Device
,
typename
scalar_t
,
typename
Index
>
struct
SparseMaxPoolForwardFunctor
{
void
operator
()(
const
Device
&
d
,
tv
::
TensorView
<
scalar_t
>
outFeatures
,
tv
::
TensorView
<
const
scalar_t
>
inFeatures
,
tv
::
TensorView
<
const
Index
>
indices
,
int
size
);
};
template
<
typename
Device
,
typename
scalar_t
,
typename
Index
>
struct
SparseMaxPoolBackwardFunctor
{
void
operator
()(
const
Device
&
d
,
tv
::
TensorView
<
const
scalar_t
>
outFeatures
,
tv
::
TensorView
<
const
scalar_t
>
inFeatures
,
tv
::
TensorView
<
const
scalar_t
>
fout
,
tv
::
TensorView
<
scalar_t
>
fin
,
tv
::
TensorView
<
const
Index
>
indices
,
int
size
);
};
}
// namespace functor
#endif
mmcv/ops/csrc/common/utils/spconv/spconv/mp_helper.h
0 → 100644
View file @
fdeee889
#ifndef MP_HELPER_H_
#define MP_HELPER_H_
#include <type_traits>
#include <utility>
template
<
class
...
T
>
struct
mp_list
{};
template
<
class
T
,
T
...
I
>
using
mp_list_c
=
mp_list
<
std
::
integral_constant
<
T
,
I
>
...
>
;
namespace
detail
{
template
<
class
...
T
,
class
F
>
constexpr
F
mp_for_each_impl
(
mp_list
<
T
...
>
,
F
&&
f
)
{
return
std
::
initializer_list
<
int
>
{(
f
(
T
()),
0
)...},
std
::
forward
<
F
>
(
f
);
}
template
<
class
F
>
constexpr
F
mp_for_each_impl
(
mp_list
<>
,
F
&&
f
)
{
return
std
::
forward
<
F
>
(
f
);
}
}
// namespace detail
namespace
detail
{
template
<
class
A
,
template
<
class
...
>
class
B
>
struct
mp_rename_impl
{
// An error "no type named 'type'" here means that the first argument to
// mp_rename is not a list
};
template
<
template
<
class
...
>
class
A
,
class
...
T
,
template
<
class
...
>
class
B
>
struct
mp_rename_impl
<
A
<
T
...
>
,
B
>
{
using
type
=
B
<
T
...
>
;
};
}
// namespace detail
template
<
class
A
,
template
<
class
...
>
class
B
>
using
mp_rename
=
typename
::
detail
::
mp_rename_impl
<
A
,
B
>::
type
;
template
<
class
L
,
class
F
>
constexpr
F
mp_for_each
(
F
&&
f
)
{
return
::
detail
::
mp_for_each_impl
(
mp_rename
<
L
,
mp_list
>
(),
std
::
forward
<
F
>
(
f
));
}
#endif
mmcv/ops/csrc/common/utils/spconv/spconv/point2voxel.h
0 → 100644
View file @
fdeee889
// Copyright 2019 Yan Yan
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <math.h>
#include <pybind11/numpy.h>
#include <pybind11/pybind11.h>
#include <pybind11/stl.h>
#include <algorithm>
#include <iostream>
namespace
py
=
pybind11
;
using
namespace
pybind11
::
literals
;
template
<
typename
DType
,
int
NDim
>
int
points_to_voxel_3d_np
(
py
::
array_t
<
DType
>
points
,
py
::
array_t
<
DType
>
voxels
,
py
::
array_t
<
int
>
coors
,
py
::
array_t
<
int
>
num_points_per_voxel
,
py
::
array_t
<
int
>
coor_to_voxelidx
,
std
::
vector
<
DType
>
voxel_size
,
std
::
vector
<
DType
>
coors_range
,
int
max_points
,
int
max_voxels
)
{
auto
points_rw
=
points
.
template
mutable_unchecked
<
2
>();
auto
voxels_rw
=
voxels
.
template
mutable_unchecked
<
3
>();
auto
coors_rw
=
coors
.
mutable_unchecked
<
2
>
();
auto
num_points_per_voxel_rw
=
num_points_per_voxel
.
mutable_unchecked
<
1
>
();
auto
coor_to_voxelidx_rw
=
coor_to_voxelidx
.
mutable_unchecked
<
NDim
>
();
auto
N
=
points_rw
.
shape
(
0
);
auto
num_features
=
points_rw
.
shape
(
1
);
constexpr
int
ndim_minus_1
=
NDim
-
1
;
int
voxel_num
=
0
;
bool
failed
=
false
;
int
coor
[
NDim
];
int
c
;
int
grid_size
[
NDim
];
for
(
int
i
=
0
;
i
<
NDim
;
++
i
)
{
grid_size
[
i
]
=
round
((
coors_range
[
NDim
+
i
]
-
coors_range
[
i
])
/
voxel_size
[
i
]);
}
int
voxelidx
,
num
;
for
(
int
i
=
0
;
i
<
N
;
++
i
)
{
failed
=
false
;
for
(
int
j
=
0
;
j
<
NDim
;
++
j
)
{
c
=
floor
((
points_rw
(
i
,
j
)
-
coors_range
[
j
])
/
voxel_size
[
j
]);
if
((
c
<
0
||
c
>=
grid_size
[
j
]))
{
failed
=
true
;
break
;
}
coor
[
ndim_minus_1
-
j
]
=
c
;
}
if
(
failed
)
continue
;
voxelidx
=
coor_to_voxelidx_rw
(
coor
[
0
],
coor
[
1
],
coor
[
2
]);
if
(
voxelidx
==
-
1
)
{
voxelidx
=
voxel_num
;
if
(
voxel_num
>=
max_voxels
)
continue
;
voxel_num
+=
1
;
coor_to_voxelidx_rw
(
coor
[
0
],
coor
[
1
],
coor
[
2
])
=
voxelidx
;
for
(
int
k
=
0
;
k
<
NDim
;
++
k
)
{
coors_rw
(
voxelidx
,
k
)
=
coor
[
k
];
}
}
num
=
num_points_per_voxel_rw
(
voxelidx
);
if
(
num
<
max_points
)
{
for
(
int
k
=
0
;
k
<
num_features
;
++
k
)
{
voxels_rw
(
voxelidx
,
num
,
k
)
=
points_rw
(
i
,
k
);
}
num_points_per_voxel_rw
(
voxelidx
)
+=
1
;
}
}
for
(
int
i
=
0
;
i
<
voxel_num
;
++
i
)
{
coor_to_voxelidx_rw
(
coors_rw
(
i
,
0
),
coors_rw
(
i
,
1
),
coors_rw
(
i
,
2
))
=
-
1
;
}
return
voxel_num
;
}
template
<
typename
DType
,
int
NDim
>
int
points_to_voxel_3d_np_mean
(
py
::
array_t
<
DType
>
points
,
py
::
array_t
<
DType
>
voxels
,
py
::
array_t
<
DType
>
means
,
py
::
array_t
<
int
>
coors
,
py
::
array_t
<
int
>
num_points_per_voxel
,
py
::
array_t
<
int
>
coor_to_voxelidx
,
std
::
vector
<
DType
>
voxel_size
,
std
::
vector
<
DType
>
coors_range
,
int
max_points
,
int
max_voxels
)
{
auto
points_rw
=
points
.
template
mutable_unchecked
<
2
>();
auto
means_rw
=
means
.
template
mutable_unchecked
<
2
>();
auto
voxels_rw
=
voxels
.
template
mutable_unchecked
<
3
>();
auto
coors_rw
=
coors
.
mutable_unchecked
<
2
>
();
auto
num_points_per_voxel_rw
=
num_points_per_voxel
.
mutable_unchecked
<
1
>
();
auto
coor_to_voxelidx_rw
=
coor_to_voxelidx
.
mutable_unchecked
<
NDim
>
();
auto
N
=
points_rw
.
shape
(
0
);
auto
num_features
=
points_rw
.
shape
(
1
);
constexpr
int
ndim_minus_1
=
NDim
-
1
;
int
voxel_num
=
0
;
bool
failed
=
false
;
int
coor
[
NDim
];
int
c
;
int
grid_size
[
NDim
];
for
(
int
i
=
0
;
i
<
NDim
;
++
i
)
{
grid_size
[
i
]
=
round
((
coors_range
[
NDim
+
i
]
-
coors_range
[
i
])
/
voxel_size
[
i
]);
}
int
voxelidx
,
num
;
for
(
int
i
=
0
;
i
<
N
;
++
i
)
{
failed
=
false
;
for
(
int
j
=
0
;
j
<
NDim
;
++
j
)
{
c
=
floor
((
points_rw
(
i
,
j
)
-
coors_range
[
j
])
/
voxel_size
[
j
]);
if
((
c
<
0
||
c
>=
grid_size
[
j
]))
{
failed
=
true
;
break
;
}
coor
[
ndim_minus_1
-
j
]
=
c
;
}
if
(
failed
)
continue
;
voxelidx
=
coor_to_voxelidx_rw
(
coor
[
0
],
coor
[
1
],
coor
[
2
]);
if
(
voxelidx
==
-
1
)
{
voxelidx
=
voxel_num
;
if
(
voxel_num
>=
max_voxels
)
continue
;
voxel_num
+=
1
;
coor_to_voxelidx_rw
(
coor
[
0
],
coor
[
1
],
coor
[
2
])
=
voxelidx
;
for
(
int
k
=
0
;
k
<
NDim
;
++
k
)
{
coors_rw
(
voxelidx
,
k
)
=
coor
[
k
];
}
}
num
=
num_points_per_voxel_rw
(
voxelidx
);
if
(
num
<
max_points
)
{
for
(
int
k
=
0
;
k
<
num_features
;
++
k
)
{
voxels_rw
(
voxelidx
,
num
,
k
)
=
points_rw
(
i
,
k
);
}
num_points_per_voxel_rw
(
voxelidx
)
+=
1
;
for
(
int
k
=
0
;
k
<
num_features
;
++
k
)
{
means_rw
(
voxelidx
,
k
)
+=
(
points_rw
(
i
,
k
)
-
means_rw
(
voxelidx
,
k
))
/
DType
(
num
+
1
);
}
}
}
for
(
int
i
=
0
;
i
<
voxel_num
;
++
i
)
{
coor_to_voxelidx_rw
(
coors_rw
(
i
,
0
),
coors_rw
(
i
,
1
),
coors_rw
(
i
,
2
))
=
-
1
;
num
=
num_points_per_voxel_rw
(
i
);
for
(
int
j
=
num
;
j
<
max_points
;
++
j
)
{
for
(
int
k
=
0
;
k
<
num_features
;
++
k
)
{
voxels_rw
(
i
,
j
,
k
)
=
means_rw
(
i
,
k
);
}
}
}
return
voxel_num
;
}
template
<
typename
DType
,
int
NDim
>
int
points_to_voxel_3d_np_height
(
py
::
array_t
<
DType
>
points
,
py
::
array_t
<
DType
>
voxels
,
py
::
array_t
<
DType
>
height
,
py
::
array_t
<
DType
>
maxs
,
py
::
array_t
<
int
>
coors
,
py
::
array_t
<
int
>
num_points_per_voxel
,
py
::
array_t
<
int
>
coor_to_voxelidx
,
std
::
vector
<
DType
>
voxel_size
,
std
::
vector
<
DType
>
coors_range
,
int
max_points
,
int
max_voxels
)
{
auto
points_rw
=
points
.
template
mutable_unchecked
<
2
>();
auto
height_rw
=
height
.
template
mutable_unchecked
<
2
>();
auto
maxs_rw
=
maxs
.
template
mutable_unchecked
<
2
>();
auto
voxels_rw
=
voxels
.
template
mutable_unchecked
<
3
>();
auto
coors_rw
=
coors
.
mutable_unchecked
<
2
>
();
auto
num_points_per_voxel_rw
=
num_points_per_voxel
.
mutable_unchecked
<
1
>
();
auto
coor_to_voxelidx_rw
=
coor_to_voxelidx
.
mutable_unchecked
<
NDim
>
();
auto
N
=
points_rw
.
shape
(
0
);
auto
num_features
=
points_rw
.
shape
(
1
);
constexpr
int
ndim_minus_1
=
NDim
-
1
;
int
voxel_num
=
0
;
bool
failed
=
false
;
int
coor
[
NDim
];
int
c
;
int
grid_size
[
NDim
];
for
(
int
i
=
0
;
i
<
NDim
;
++
i
)
{
grid_size
[
i
]
=
round
((
coors_range
[
NDim
+
i
]
-
coors_range
[
i
])
/
voxel_size
[
i
]);
}
int
voxelidx
,
num
;
for
(
int
i
=
0
;
i
<
N
;
++
i
)
{
failed
=
false
;
for
(
int
j
=
0
;
j
<
NDim
;
++
j
)
{
c
=
floor
((
points_rw
(
i
,
j
)
-
coors_range
[
j
])
/
voxel_size
[
j
]);
if
((
c
<
0
||
c
>=
grid_size
[
j
]))
{
failed
=
true
;
break
;
}
coor
[
ndim_minus_1
-
j
]
=
c
;
}
if
(
failed
)
continue
;
voxelidx
=
coor_to_voxelidx_rw
(
coor
[
0
],
coor
[
1
],
coor
[
2
]);
if
(
voxelidx
==
-
1
)
{
voxelidx
=
voxel_num
;
if
(
voxel_num
>=
max_voxels
)
continue
;
voxel_num
+=
1
;
coor_to_voxelidx_rw
(
coor
[
0
],
coor
[
1
],
coor
[
2
])
=
voxelidx
;
for
(
int
k
=
0
;
k
<
NDim
;
++
k
)
{
coors_rw
(
voxelidx
,
k
)
=
coor
[
k
];
}
}
num
=
num_points_per_voxel_rw
(
voxelidx
);
if
(
num
<
max_points
)
{
for
(
int
k
=
0
;
k
<
num_features
;
++
k
)
{
voxels_rw
(
voxelidx
,
num
,
k
)
=
points_rw
(
i
,
k
);
height_rw
(
voxelidx
,
k
)
=
std
::
min
(
points_rw
(
i
,
k
),
height_rw
(
voxelidx
,
k
));
maxs_rw
(
voxelidx
,
k
)
=
std
::
max
(
points_rw
(
i
,
k
),
maxs_rw
(
voxelidx
,
k
));
}
num_points_per_voxel_rw
(
voxelidx
)
+=
1
;
}
}
for
(
int
i
=
0
;
i
<
voxel_num
;
++
i
)
{
coor_to_voxelidx_rw
(
coors_rw
(
i
,
0
),
coors_rw
(
i
,
1
),
coors_rw
(
i
,
2
))
=
-
1
;
for
(
int
k
=
0
;
k
<
num_features
;
++
k
)
{
height_rw
(
i
,
k
)
=
maxs_rw
(
i
,
k
)
-
height_rw
(
i
,
k
);
}
}
return
voxel_num
;
}
template
<
typename
DType
,
int
NDim
>
int
block_filtering
(
py
::
array_t
<
DType
>
points
,
py
::
array_t
<
int
>
mask
,
py
::
array_t
<
DType
>
height
,
py
::
array_t
<
DType
>
maxs
,
py
::
array_t
<
int
>
coor_to_voxelidx
,
std
::
vector
<
DType
>
voxel_size
,
std
::
vector
<
DType
>
coors_range
,
int
max_voxels
,
DType
eps
)
{
auto
points_rw
=
points
.
template
mutable_unchecked
<
2
>();
auto
height_rw
=
height
.
template
mutable_unchecked
<
1
>();
auto
maxs_rw
=
maxs
.
template
mutable_unchecked
<
1
>();
auto
coor_to_voxelidx_rw
=
coor_to_voxelidx
.
mutable_unchecked
<
NDim
>
();
auto
N
=
points_rw
.
shape
(
0
);
auto
num_features
=
points_rw
.
shape
(
1
);
constexpr
int
ndim_minus_1
=
NDim
-
1
;
int
voxel_num
=
0
;
bool
failed
=
false
;
int
coor
[
NDim
];
int
c
;
int
grid_size
[
NDim
];
for
(
int
i
=
0
;
i
<
NDim
;
++
i
)
{
grid_size
[
i
]
=
round
((
coors_range
[
NDim
+
i
]
-
coors_range
[
i
])
/
voxel_size
[
i
]);
}
int
voxelidx
,
num
;
for
(
int
i
=
0
;
i
<
N
;
++
i
)
{
failed
=
false
;
for
(
int
j
=
0
;
j
<
NDim
;
++
j
)
{
c
=
floor
((
points_rw
(
i
,
j
)
-
coors_range
[
j
])
/
voxel_size
[
j
]);
if
((
c
<
0
||
c
>=
grid_size
[
j
]))
{
failed
=
true
;
break
;
}
coor
[
ndim_minus_1
-
j
]
=
c
;
}
if
(
failed
)
continue
;
voxelidx
=
coor_to_voxelidx_rw
(
coor
[
0
],
coor
[
1
],
coor
[
2
]);
if
(
voxelidx
==
-
1
)
{
voxelidx
=
voxel_num
;
voxel_num
+=
1
;
coor_to_voxelidx_rw
(
coor
[
0
],
coor
[
1
],
coor
[
2
])
=
voxelidx
;
}
height_rw
(
voxelidx
)
=
std
::
min
(
points_rw
(
i
,
2
),
height_rw
(
voxelidx
));
maxs_rw
(
voxelidx
)
=
std
::
max
(
points_rw
(
i
,
2
),
maxs_rw
(
voxelidx
));
}
for
(
int
i
=
0
;
i
<
N
;
++
i
)
{
failed
=
false
;
for
(
int
j
=
0
;
j
<
NDim
;
++
j
)
{
c
=
floor
((
points_rw
(
i
,
j
)
-
coors_range
[
j
])
/
voxel_size
[
j
]);
if
((
c
<
0
||
c
>=
grid_size
[
j
]))
{
failed
=
true
;
break
;
}
coor
[
ndim_minus_1
-
j
]
=
c
;
}
if
(
failed
)
continue
;
voxelidx
=
coor_to_voxelidx_rw
(
coor
[
0
],
coor
[
1
],
coor
[
2
]);
if
((
maxs_rw
(
voxelidx
)
-
height_rw
(
voxelidx
,
2
))
<
eps
)
{
mask
(
i
)
=
0
;
}
}
}
template
<
typename
DType
,
int
NDim
>
int
points_to_voxel_3d_with_filtering
(
py
::
array_t
<
DType
>
points
,
py
::
array_t
<
DType
>
voxels
,
py
::
array_t
<
int
>
voxel_mask
,
py
::
array_t
<
DType
>
mins
,
py
::
array_t
<
DType
>
maxs
,
py
::
array_t
<
int
>
coors
,
py
::
array_t
<
int
>
num_points_per_voxel
,
py
::
array_t
<
int
>
coor_to_voxelidx
,
std
::
vector
<
DType
>
voxel_size
,
std
::
vector
<
DType
>
coors_range
,
int
max_points
,
int
max_voxels
,
int
block_factor
,
int
block_size
,
DType
height_threshold
)
{
auto
points_rw
=
points
.
template
mutable_unchecked
<
2
>();
auto
mins_rw
=
mins
.
template
mutable_unchecked
<
2
>();
auto
maxs_rw
=
maxs
.
template
mutable_unchecked
<
2
>();
auto
voxels_rw
=
voxels
.
template
mutable_unchecked
<
3
>();
auto
voxel_mask_rw
=
voxel_mask
.
template
mutable_unchecked
<
1
>();
auto
coors_rw
=
coors
.
mutable_unchecked
<
2
>
();
auto
num_points_per_voxel_rw
=
num_points_per_voxel
.
mutable_unchecked
<
1
>
();
auto
coor_to_voxelidx_rw
=
coor_to_voxelidx
.
mutable_unchecked
<
NDim
>
();
auto
N
=
points_rw
.
shape
(
0
);
auto
num_features
=
points_rw
.
shape
(
1
);
constexpr
int
ndim_minus_1
=
NDim
-
1
;
int
voxel_num
=
0
;
bool
failed
=
false
;
int
coor
[
NDim
];
int
c
;
int
grid_size
[
NDim
];
DType
max_value
,
min_value
;
for
(
int
i
=
0
;
i
<
NDim
;
++
i
)
{
grid_size
[
i
]
=
round
((
coors_range
[
NDim
+
i
]
-
coors_range
[
i
])
/
voxel_size
[
i
]);
}
int
block_shape_H
=
grid_size
[
1
]
/
block_factor
;
int
block_shape_W
=
grid_size
[
0
]
/
block_factor
;
int
voxelidx
,
num
;
int
block_coor
[
2
];
int
startx
,
stopx
,
starty
,
stopy
;
for
(
int
i
=
0
;
i
<
N
;
++
i
)
{
failed
=
false
;
for
(
int
j
=
0
;
j
<
NDim
;
++
j
)
{
c
=
floor
((
points_rw
(
i
,
j
)
-
coors_range
[
j
])
/
voxel_size
[
j
]);
if
((
c
<
0
||
c
>=
grid_size
[
j
]))
{
failed
=
true
;
break
;
}
coor
[
ndim_minus_1
-
j
]
=
c
;
}
if
(
failed
)
continue
;
voxelidx
=
coor_to_voxelidx_rw
(
coor
[
0
],
coor
[
1
],
coor
[
2
]);
if
(
voxelidx
==
-
1
)
{
voxelidx
=
voxel_num
;
if
(
voxel_num
>=
max_voxels
)
continue
;
voxel_num
+=
1
;
coor_to_voxelidx_rw
(
coor
[
0
],
coor
[
1
],
coor
[
2
])
=
voxelidx
;
for
(
int
k
=
0
;
k
<
NDim
;
++
k
)
{
coors_rw
(
voxelidx
,
k
)
=
coor
[
k
];
}
}
num
=
num_points_per_voxel_rw
(
voxelidx
);
if
(
num
<
max_points
)
{
for
(
int
k
=
0
;
k
<
num_features
;
++
k
)
{
voxels_rw
(
voxelidx
,
num
,
k
)
=
points_rw
(
i
,
k
);
}
block_coor
[
0
]
=
coor
[
1
]
/
block_factor
;
block_coor
[
1
]
=
coor
[
2
]
/
block_factor
;
mins_rw
(
block_coor
[
0
],
block_coor
[
1
])
=
std
::
min
(
points_rw
(
i
,
2
),
mins_rw
(
block_coor
[
0
],
block_coor
[
1
]));
maxs_rw
(
block_coor
[
0
],
block_coor
[
1
])
=
std
::
max
(
points_rw
(
i
,
2
),
maxs_rw
(
block_coor
[
0
],
block_coor
[
1
]));
num_points_per_voxel_rw
(
voxelidx
)
+=
1
;
}
}
for
(
int
i
=
0
;
i
<
voxel_num
;
++
i
)
{
coor
[
1
]
=
coors_rw
(
i
,
1
);
coor
[
2
]
=
coors_rw
(
i
,
2
);
coor_to_voxelidx_rw
(
coors_rw
(
i
,
0
),
coor
[
1
],
coor
[
2
])
=
-
1
;
block_coor
[
0
]
=
coor
[
1
]
/
block_factor
;
block_coor
[
1
]
=
coor
[
2
]
/
block_factor
;
min_value
=
mins_rw
(
block_coor
[
0
],
block_coor
[
1
]);
max_value
=
maxs_rw
(
block_coor
[
0
],
block_coor
[
1
]);
startx
=
std
::
max
(
0
,
block_coor
[
0
]
-
block_size
/
2
);
stopx
=
std
::
min
(
block_shape_H
,
block_coor
[
0
]
+
block_size
-
block_size
/
2
);
starty
=
std
::
max
(
0
,
block_coor
[
1
]
-
block_size
/
2
);
stopy
=
std
::
min
(
block_shape_W
,
block_coor
[
1
]
+
block_size
-
block_size
/
2
);
for
(
int
j
=
startx
;
j
<
stopx
;
++
j
)
{
for
(
int
k
=
starty
;
k
<
stopy
;
++
k
)
{
min_value
=
std
::
min
(
min_value
,
mins_rw
(
j
,
k
));
max_value
=
std
::
max
(
max_value
,
maxs_rw
(
j
,
k
));
}
}
voxel_mask_rw
(
i
)
=
(
max_value
-
min_value
)
>
height_threshold
;
}
return
voxel_num
;
}
mmcv/ops/csrc/common/utils/spconv/spconv/reordering.h
0 → 100644
View file @
fdeee889
// Copyright 2019 Yan Yan
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef SPARSE_REORDERING_FUNCTOR_H_
#define SPARSE_REORDERING_FUNCTOR_H_
#include <utils/spconv/tensorview/tensorview.h>
namespace
functor
{
template
<
typename
Device
,
typename
scalar_t
,
typename
Index
>
struct
SparseGatherFunctor
{
void
operator
()(
const
Device
&
d
,
tv
::
TensorView
<
scalar_t
>
buffer
,
tv
::
TensorView
<
const
scalar_t
>
features
,
tv
::
TensorView
<
const
Index
>
indices
,
int
size
);
};
template
<
typename
Device
,
typename
scalar_t
,
typename
Index
>
struct
SparseScatterAddFunctor
{
void
operator
()(
const
Device
&
d
,
tv
::
TensorView
<
scalar_t
>
out_features
,
tv
::
TensorView
<
const
scalar_t
>
buffer
,
tv
::
TensorView
<
const
Index
>
indices
,
int
size
,
bool
stable
=
false
);
};
}
// namespace functor
#endif
mmcv/ops/csrc/common/utils/spconv/tensorview/helper_kernel.cuh
0 → 100644
View file @
fdeee889
#pragma once
namespace
tv
{
namespace
detail
{
template
<
typename
scalar_t
>
class
KernelLoop
{
struct
Iterator
{
__forceinline__
__device__
Iterator
(
scalar_t
index
,
scalar_t
delta
)
:
index_
(
index
),
delta_
(
delta
)
{}
__forceinline__
__device__
scalar_t
operator
*
()
const
{
return
index_
;
}
__forceinline__
__device__
Iterator
&
operator
++
()
{
index_
+=
delta_
;
return
*
this
;
}
__forceinline__
__device__
bool
operator
!=
(
const
Iterator
&
other
)
const
{
bool
greater
=
index_
>
other
.
index_
;
bool
less
=
index_
<
other
.
index_
;
if
(
!
other
.
delta_
)
{
return
less
;
}
if
(
!
delta_
)
{
return
greater
;
}
return
less
||
greater
;
}
private:
scalar_t
index_
;
const
scalar_t
delta_
;
};
public:
__forceinline__
__device__
KernelLoop
(
scalar_t
begin
,
scalar_t
delta
,
scalar_t
end
)
:
begin_
(
begin
),
delta_
(
delta
),
end_
(
end
)
{}
__forceinline__
__device__
Iterator
begin
()
const
{
return
Iterator
{
begin_
,
delta_
};
}
__forceinline__
__device__
Iterator
end
()
const
{
return
Iterator
{
end_
,
0
};
}
private:
scalar_t
begin_
;
scalar_t
delta_
;
scalar_t
end_
;
};
}
// namespace detail
template
<
typename
scalar_t
,
int
NumILP
=
1
>
__forceinline__
__device__
detail
::
KernelLoop
<
scalar_t
>
KernelLoopX
(
scalar_t
count
)
{
return
detail
::
KernelLoop
<
scalar_t
>
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
,
gridDim
.
x
*
blockDim
.
x
*
NumILP
,
count
);
}
// Helper to visit indices in the range 0 <= i < count using the y-coordinate.
// Usage: for(int i : KernelLoopY(count)) { visit(i); }
template
<
typename
scalar_t
,
int
NumILP
=
1
>
__forceinline__
__device__
detail
::
KernelLoop
<
scalar_t
>
KernelLoopY
(
scalar_t
count
)
{
return
detail
::
KernelLoop
<
scalar_t
>
(
blockIdx
.
y
*
blockDim
.
y
+
threadIdx
.
y
,
gridDim
.
y
*
blockDim
.
y
*
NumILP
,
count
);
}
// Helper to visit indices in the range 0 <= i < count using the z-coordinate.
// Usage: for(int i : KernelLoopZ(count)) { visit(i); }
template
<
typename
scalar_t
,
int
NumILP
=
1
>
__forceinline__
__device__
detail
::
KernelLoop
<
scalar_t
>
KernelLoopZ
(
scalar_t
count
)
{
return
detail
::
KernelLoop
<
scalar_t
>
(
blockIdx
.
z
*
blockDim
.
z
+
threadIdx
.
z
,
gridDim
.
z
*
blockDim
.
z
*
NumILP
,
count
);
}
}
// namespace tv
mmcv/ops/csrc/common/utils/spconv/tensorview/helper_launch.h
0 → 100644
View file @
fdeee889
#pragma once
// from pytorch.aten
#include "tensorview.h"
namespace
tv
{
namespace
launch
{
template
<
typename
T1
,
typename
T2
>
inline
int
DivUp
(
const
T1
a
,
const
T2
b
)
{
return
(
a
+
b
-
1
)
/
b
;
}
constexpr
int
CUDA_NUM_THREADS
=
1024
;
inline
int
getBlocks
(
const
int
N
)
{
TV_ASSERT_RT_ERR
(
N
>
0
,
"CUDA kernel launch blocks must be positive, but got N="
,
N
);
return
DivUp
(
N
,
CUDA_NUM_THREADS
);
}
}
// namespace launch
}
// namespace tv
mmcv/ops/csrc/common/utils/spconv/tensorview/tensorview.h
0 → 100644
View file @
fdeee889
// Copyright 2019 Yan Yan
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <algorithm>
#include <cassert>
#include <cstdlib>
#include <iostream>
#include <memory>
#include <sstream>
#include <type_traits>
#include <vector>
#include "pytorch_cpp_helper.hpp"
namespace
tv
{
#ifdef __NVCC__
#define TV_HOST_DEVICE_INLINE __forceinline__ __device__ __host__
#define TV_DEVICE_INLINE __forceinline__ __device__
#define TV_HOST_DEVICE __device__ __host__
#define TV_ASSERT(expr) assert(expr)
#elif defined(__CUDACC_RTC__)
#define TV_ASSERT(expr) assert(expr)
#define TV_HOST_DEVICE_INLINE __forceinline__ __device__
#define TV_DEVICE_INLINE __forceinline__ __device__
#define TV_HOST_DEVICE __device__ __host__
#else
#define TV_ASSERT(x) assert(x)
#define TV_HOST_DEVICE_INLINE inline
#define TV_HOST_DEVICE
#endif
#define TV_REQUIRE(expr, ...) \
{ \
if (!(expr)) { \
printf(__VA_ARGS__); \
assert(expr); \
} \
}
#define TV_DEVICE_REQUIRE(expr, ...) \
{ \
if (!(expr) && threadIdx.x == 0) printf(__VA_ARGS__); \
assert(expr); \
}
template
<
class
SStream
,
class
T
>
void
sstream_print
(
SStream
&
ss
,
T
val
)
{
ss
<<
val
;
}
template
<
class
SStream
,
class
T
,
class
...
TArgs
>
void
sstream_print
(
SStream
&
ss
,
T
val
,
TArgs
...
args
)
{
ss
<<
val
<<
" "
;
sstream_print
(
ss
,
args
...);
}
#define TV_ASSERT_RT_ERR(expr, ...) \
{ \
if (!(expr)) { \
std::stringstream __macro_s; \
__macro_s << __FILE__ << " " << __LINE__ << "\n"; \
__macro_s << #expr << " assert failed. "; \
tv::sstream_print(__macro_s, __VA_ARGS__); \
throw std::runtime_error(__macro_s.str()); \
} \
}
#define TV_ASSERT_INVALID_ARG(expr, ...) \
{ \
if (!(expr)) { \
std::stringstream __macro_s; \
__macro_s << __FILE__ << " " << __LINE__ << "\n"; \
__macro_s << #expr << " assert failed. "; \
tv::sstream_print(__macro_s, __VA_ARGS__); \
throw std::invalid_argument(__macro_s.str()); \
} \
}
#define TV_CHECK_CUDA_ERR() \
{ \
auto err = cudaGetLastError(); \
if (err != cudaSuccess) { \
std::stringstream __macro_s; \
__macro_s << __FILE__ << " " << __LINE__ << "\n"; \
__macro_s << "cuda execution failed with error " << err; \
throw std::runtime_error(__macro_s.str()); \
} \
}
struct
CPU
{};
#define TV_MAX_DIM 6
template
<
typename
scalar_t
,
size_t
MaxDim
=
TV_MAX_DIM
>
struct
SimpleVector
{
public:
TV_HOST_DEVICE_INLINE
SimpleVector
(){};
TV_HOST_DEVICE_INLINE
SimpleVector
(
std
::
initializer_list
<
scalar_t
>
q
)
{
TV_ASSERT
(
q
.
size
()
<=
MaxDim
);
mSize
=
0
;
for
(
scalar_t
s
:
q
)
{
mArray
[
mSize
++
]
=
s
;
}
mSize
=
q
.
size
();
}
SimpleVector
(
const
std
::
vector
<
scalar_t
>
&
arr
)
{
TV_ASSERT
(
arr
.
size
()
<=
MaxDim
);
for
(
size_t
i
=
0
;
i
<
arr
.
size
();
++
i
)
{
mArray
[
i
]
=
arr
[
i
];
}
mSize
=
arr
.
size
();
}
TV_HOST_DEVICE_INLINE
SimpleVector
(
const
SimpleVector
<
scalar_t
,
MaxDim
>
&
arr
)
{
TV_ASSERT
(
arr
.
size
()
<=
MaxDim
);
for
(
size_t
i
=
0
;
i
<
arr
.
size
();
++
i
)
{
mArray
[
i
]
=
arr
[
i
];
}
mSize
=
arr
.
size
();
}
TV_HOST_DEVICE_INLINE
scalar_t
&
operator
[](
int
idx
)
{
#ifdef TV_DEBUG
TV_ASSERT
(
idx
>=
0
&&
idx
<
mSize
);
#endif
return
mArray
[
idx
];
}
TV_HOST_DEVICE_INLINE
const
scalar_t
&
operator
[](
int
idx
)
const
{
#ifdef TV_DEBUG
TV_ASSERT
(
idx
>=
0
&&
idx
<
mSize
);
#endif
return
mArray
[
idx
];
}
TV_HOST_DEVICE_INLINE
void
push_back
(
scalar_t
s
)
{
#ifdef TV_DEBUG
TV_ASSERT
(
mSize
<
MaxDim
);
#endif
mArray
[
mSize
]
=
s
;
mSize
++
;
}
TV_HOST_DEVICE_INLINE
void
pop_back
()
{
#ifdef TV_DEBUG
TV_ASSERT
(
mSize
>
0
);
#endif
mSize
--
;
}
TV_HOST_DEVICE_INLINE
size_t
size
()
const
{
return
mSize
;
}
TV_HOST_DEVICE_INLINE
const
scalar_t
*
data
()
const
{
return
mArray
;
}
TV_HOST_DEVICE_INLINE
size_t
empty
()
const
{
return
mSize
==
0
;
}
typedef
size_t
size_type
;
class
iterator
{
public:
typedef
iterator
self_type
;
typedef
scalar_t
value_type
;
typedef
scalar_t
&
reference
;
typedef
scalar_t
*
pointer
;
typedef
std
::
forward_iterator_tag
iterator_category
;
typedef
std
::
ptrdiff_t
difference_type
;
TV_HOST_DEVICE_INLINE
iterator
(
pointer
ptr
)
:
ptr_
(
ptr
)
{}
TV_HOST_DEVICE_INLINE
self_type
operator
++
(
int
junk
)
{
self_type
i
=
*
this
;
ptr_
++
;
return
i
;
}
TV_HOST_DEVICE_INLINE
self_type
operator
++
()
{
ptr_
++
;
return
*
this
;
}
TV_HOST_DEVICE_INLINE
reference
operator
*
()
{
return
*
ptr_
;
}
TV_HOST_DEVICE_INLINE
pointer
operator
->
()
{
return
ptr_
;
}
TV_HOST_DEVICE_INLINE
bool
operator
==
(
const
self_type
&
rhs
)
{
return
ptr_
==
rhs
.
ptr_
;
}
TV_HOST_DEVICE_INLINE
bool
operator
!=
(
const
self_type
&
rhs
)
{
return
ptr_
!=
rhs
.
ptr_
;
}
private:
pointer
ptr_
;
};
class
const_iterator
{
public:
typedef
const_iterator
self_type
;
typedef
scalar_t
value_type
;
typedef
const
scalar_t
&
reference
;
typedef
const
scalar_t
*
pointer
;
typedef
std
::
ptrdiff_t
difference_type
;
typedef
std
::
forward_iterator_tag
iterator_category
;
TV_HOST_DEVICE_INLINE
const_iterator
(
pointer
ptr
)
:
ptr_
(
ptr
)
{}
TV_HOST_DEVICE_INLINE
self_type
operator
++
(
int
junk
)
{
self_type
i
=
*
this
;
ptr_
++
;
return
i
;
}
TV_HOST_DEVICE_INLINE
self_type
operator
++
()
{
ptr_
++
;
return
*
this
;
}
TV_HOST_DEVICE_INLINE
reference
operator
*
()
{
return
*
ptr_
;
}
TV_HOST_DEVICE_INLINE
pointer
operator
->
()
{
return
ptr_
;
}
TV_HOST_DEVICE_INLINE
bool
operator
==
(
const
self_type
&
rhs
)
{
return
ptr_
==
rhs
.
ptr_
;
}
TV_HOST_DEVICE_INLINE
bool
operator
!=
(
const
self_type
&
rhs
)
{
return
ptr_
!=
rhs
.
ptr_
;
}
private:
pointer
ptr_
;
};
TV_HOST_DEVICE_INLINE
iterator
begin
()
{
return
iterator
(
mArray
);
}
TV_HOST_DEVICE_INLINE
iterator
end
()
{
return
iterator
(
mArray
+
mSize
);
}
TV_HOST_DEVICE_INLINE
const_iterator
begin
()
const
{
return
const_iterator
(
mArray
);
}
TV_HOST_DEVICE_INLINE
const_iterator
end
()
const
{
return
const_iterator
(
mArray
+
mSize
);
}
TV_HOST_DEVICE_INLINE
const_iterator
cbegin
()
const
{
return
const_iterator
(
mArray
);
}
TV_HOST_DEVICE_INLINE
const_iterator
cend
()
const
{
return
const_iterator
(
mArray
+
mSize
);
}
protected:
scalar_t
mArray
[
MaxDim
];
size_t
mSize
=
0
;
};
template
<
typename
scalar_t
,
size_t
MaxDim
>
bool
operator
==
(
const
SimpleVector
<
scalar_t
,
MaxDim
>
&
lfs
,
const
SimpleVector
<
scalar_t
,
MaxDim
>
&
rfs
)
{
if
(
lfs
.
size
()
!=
rfs
.
size
())
return
false
;
for
(
size_t
i
=
0
;
i
<
lfs
.
size
();
++
i
)
{
if
(
lfs
[
i
]
!=
rfs
[
i
])
return
false
;
}
return
true
;
}
template
<
typename
scalar_t
,
size_t
MaxDim
>
bool
operator
!=
(
const
SimpleVector
<
scalar_t
,
MaxDim
>
&
lfs
,
const
SimpleVector
<
scalar_t
,
MaxDim
>
&
rfs
)
{
return
!
(
lfs
==
rfs
);
}
struct
Slice
{
template
<
class
...
Integers
>
TV_HOST_DEVICE_INLINE
Slice
(
Integers
...
ints
)
{
static_assert
(
sizeof
...(
ints
)
<=
3
,
"slice init must smaller than 3"
);
SimpleVector
<
int
,
3
>
slices
{
int
(
ints
)...};
mSlices
[
0
]
=
-
1
;
mSlices
[
1
]
=
-
1
;
mSlices
[
2
]
=
-
1
;
for
(
size_t
i
=
0
;
i
<
slices
.
size
();
++
i
)
{
mSlices
[
i
]
=
slices
[
i
];
}
}
TV_HOST_DEVICE_INLINE
Slice
()
{
mSlices
[
0
]
=
-
1
;
mSlices
[
1
]
=
-
1
;
mSlices
[
2
]
=
-
1
;
}
template
<
typename
scalar_t
>
TV_HOST_DEVICE_INLINE
Slice
(
std
::
initializer_list
<
scalar_t
>
slice
)
{
mSlices
[
0
]
=
-
1
;
mSlices
[
1
]
=
-
1
;
mSlices
[
2
]
=
-
1
;
TV_ASSERT
(
slice
.
size
()
<=
3
);
int
idx
=
0
;
for
(
scalar_t
s
:
slice
)
{
mSlices
[
idx
]
=
int
(
s
);
++
idx
;
}
}
TV_HOST_DEVICE_INLINE
int
&
operator
[](
int
idx
)
{
#ifdef TV_DEBUG
TV_ASSERT
(
idx
>=
0
&&
idx
<
3
);
#endif
return
mSlices
[
idx
];
}
TV_HOST_DEVICE_INLINE
const
int
&
operator
[](
int
idx
)
const
{
#ifdef TV_DEBUG
TV_ASSERT
(
idx
>=
0
&&
idx
<
3
);
#endif
return
mSlices
[
idx
];
}
protected:
int
mSlices
[
3
];
};
template
<
size_t
MaxDim
=
TV_MAX_DIM
>
struct
ShapeBase
:
public
SimpleVector
<
int
,
MaxDim
>
{
TV_HOST_DEVICE_INLINE
ShapeBase
()
:
SimpleVector
<
int
,
MaxDim
>
(){};
TV_HOST_DEVICE_INLINE
ShapeBase
(
std
::
initializer_list
<
int
>
shape
)
:
SimpleVector
<
int
,
MaxDim
>
(
shape
)
{}
template
<
typename
scalar_t
,
template
<
class
...
>
class
Container
>
ShapeBase
(
Container
<
scalar_t
>
shape
)
:
SimpleVector
<
int
,
MaxDim
>
(
shape
)
{}
TV_HOST_DEVICE_INLINE
ShapeBase
(
const
ShapeBase
<
MaxDim
>
&
shape
)
:
SimpleVector
<
int
,
MaxDim
>
(
shape
)
{}
ShapeBase
(
const
std
::
vector
<
int
>
&
arr
)
:
SimpleVector
<
int
,
MaxDim
>
(
arr
)
{}
ShapeBase
<
MaxDim
>
&
operator
=
(
const
ShapeBase
<
MaxDim
>
&
shape
)
=
default
;
TV_HOST_DEVICE_INLINE
ShapeBase
<
MaxDim
>
subshape
(
int
start
,
int
end
)
const
{
#ifdef TV_DEBUG
TV_ASSERT
(
start
>=
0
&&
end
<
this
->
mSize
&&
end
>
start
);
#endif
ShapeBase
<
MaxDim
>
shape
;
for
(
int
i
=
start
;
i
<
end
;
++
i
)
{
shape
.
push_back
(
this
->
mArray
[
i
]);
}
return
shape
;
}
TV_HOST_DEVICE_INLINE
ShapeBase
<
MaxDim
>
subshape
(
int
start
)
const
{
#ifdef TV_DEBUG
TV_ASSERT
(
start
>=
0
&&
start
<=
this
->
mSize
);
#endif
ShapeBase
<
MaxDim
>
shape
;
for
(
int
i
=
start
;
i
<
this
->
mSize
;
++
i
)
{
shape
.
push_back
(
this
->
mArray
[
i
]);
}
return
shape
;
}
TV_HOST_DEVICE_INLINE
size_t
size
()
const
{
if
(
this
->
mSize
==
0
)
return
0
;
size_t
s
=
1
;
for
(
int
i
=
0
;
i
<
int
(
this
->
mSize
);
++
i
)
{
s
*=
this
->
mArray
[
i
];
}
return
s
;
}
TV_HOST_DEVICE_INLINE
size_t
ndim
()
const
{
return
this
->
mSize
;
}
TV_HOST_DEVICE_INLINE
ShapeBase
<
MaxDim
>
squeeze
()
const
{
ShapeBase
<
MaxDim
>
shape
;
for
(
int
i
=
0
;
i
<
this
->
mSize
;
++
i
)
{
if
(
this
->
mArray
[
i
]
!=
1
)
shape
.
push_back
(
this
->
mArray
[
i
]);
}
return
shape
;
}
TV_HOST_DEVICE_INLINE
ShapeBase
<
MaxDim
>
squeeze
(
int
dim
)
const
{
ShapeBase
<
MaxDim
>
shape
;
for
(
int
i
=
0
;
i
<
this
->
mSize
;
++
i
)
{
if
(
i
!=
dim
||
this
->
mArray
[
i
]
!=
1
)
shape
.
push_back
(
this
->
mArray
[
i
]);
}
return
shape
;
}
};
using
Shape
=
ShapeBase
<
TV_MAX_DIM
>
;
template
<
class
...
Inds
>
TV_HOST_DEVICE_INLINE
unsigned
rowArrayIdx
(
std
::
vector
<
int
>
&
shape
,
Inds
...
indexes
)
{
unsigned
offset
=
0
;
unsigned
m
=
1
;
int
indexes_vec
[
sizeof
...(
indexes
)]
=
{
indexes
...};
#ifdef TV_DEBUG
TV_ASSERT
(
sizeof
...(
indexes
)
==
shape
.
size
());
#endif
#pragma unroll
for
(
int
i
=
sizeof
...(
indexes
)
-
1
;
i
>=
0
;
--
i
)
{
offset
+=
m
*
indexes_vec
[
i
];
m
*=
shape
[
i
];
}
return
offset
;
}
TV_HOST_DEVICE_INLINE
unsigned
rowArrayIdx
(
std
::
vector
<
int
>
&
shape
,
std
::
vector
<
int
>
&
indexes_vec
)
{
unsigned
offset
=
0
;
unsigned
m
=
1
;
for
(
int
i
=
shape
.
size
()
-
1
;
i
>=
0
;
--
i
)
{
offset
+=
m
*
indexes_vec
[
i
];
m
*=
shape
[
i
];
}
return
offset
;
}
template
<
class
...
Inds
>
TV_HOST_DEVICE_INLINE
unsigned
rowArrayIdx
(
const
Shape
&
shape
,
Inds
...
indexes
)
{
unsigned
offset
=
0
;
unsigned
m
=
1
;
int
indexes_vec
[
sizeof
...(
indexes
)]
=
{
indexes
...};
#pragma unroll
for
(
int
i
=
sizeof
...(
indexes
)
-
1
;
i
>=
0
;
--
i
)
{
offset
+=
m
*
indexes_vec
[
i
];
m
*=
shape
[
i
];
}
return
offset
;
}
TV_HOST_DEVICE_INLINE
unsigned
rowArrayIdx
(
const
Shape
&
shape
,
const
Shape
&
indexes_vec
)
{
unsigned
offset
=
0
;
unsigned
m
=
1
;
for
(
int
i
=
indexes_vec
.
ndim
()
-
1
;
i
>=
0
;
--
i
)
{
offset
+=
m
*
indexes_vec
[
i
];
m
*=
shape
[
i
];
}
return
offset
;
}
template
<
typename
Index
,
unsigned
NDim
>
TV_HOST_DEVICE_INLINE
unsigned
rowArrayIdx
(
const
Index
*
indexes
,
const
Index
*
shape
)
{
unsigned
offset
=
0
;
unsigned
m
=
1
;
#pragma unroll
for
(
int
i
=
NDim
-
1
;
i
>=
0
;
--
i
)
{
offset
+=
m
*
indexes
[
i
];
m
*=
shape
[
i
];
}
return
offset
;
}
template
<
typename
Index
,
unsigned
NDim
>
TV_HOST_DEVICE_INLINE
Index
rowArrayIdxInv
(
Index
index
,
Index
*
output
,
const
Index
*
shape
)
{
#pragma unroll
for
(
int
i
=
NDim
-
1
;
i
>=
0
;
--
i
)
{
output
[
i
]
=
index
%
shape
[
i
];
index
-=
output
[
i
];
index
/=
shape
[
i
];
}
return
index
;
}
template
<
int
N
>
struct
ArrayIndexRowMajor
{
TV_HOST_DEVICE_INLINE
static
unsigned
run
(
const
Shape
&
shape
,
const
Shape
&
indexes
)
{
return
indexes
[
N
-
1
]
+
shape
[
N
-
1
]
*
ArrayIndexRowMajor
<
N
-
1
>::
run
(
shape
,
indexes
);
}
};
template
<
>
struct
ArrayIndexRowMajor
<
0
>
{
TV_HOST_DEVICE_INLINE
static
unsigned
run
(
const
Shape
&
shape
,
const
Shape
&
indexes
)
{
return
0
;
}
};
namespace
detail
{
template
<
typename
scalar_t
>
constexpr
const
char
*
simpleTypeName
(
scalar_t
val
=
scalar_t
());
template
<
>
constexpr
const
char
*
simpleTypeName
(
float
val
)
{
return
"float32"
;
}
template
<
>
constexpr
const
char
*
simpleTypeName
(
double
val
)
{
return
"float64"
;
}
template
<
>
constexpr
const
char
*
simpleTypeName
(
int
val
)
{
return
"int32"
;
}
template
<
>
constexpr
const
char
*
simpleTypeName
(
unsigned
val
)
{
return
"uint32"
;
}
template
<
>
constexpr
const
char
*
simpleTypeName
(
long
val
)
{
return
"int64"
;
}
template
<
>
constexpr
const
char
*
simpleTypeName
(
unsigned
long
val
)
{
return
"uint64"
;
}
};
// namespace detail
template
<
typename
scalar_t
,
int
Rank
=
-
1
>
struct
TensorView
{
TV_HOST_DEVICE_INLINE
TensorView
()
{}
explicit
TV_HOST_DEVICE_INLINE
TensorView
(
scalar_t
*
ptr
,
Shape
shape
)
:
mPtr
(
ptr
),
mShape
(
shape
)
{}
template
<
class
...
Integers
>
explicit
TV_HOST_DEVICE_INLINE
TensorView
(
scalar_t
*
ptr
,
Integers
...
shapes
)
:
mPtr
(
ptr
)
{
mShape
=
{
int
(
shapes
)...};
}
TV_HOST_DEVICE_INLINE
TensorView
<
scalar_t
,
Rank
>
&
assign
(
const
TensorView
<
scalar_t
,
Rank
>
&
tensor
)
{
TV_REQUIRE
(
tensor
.
shape
()
==
shape
(),
"you must provide same input size%s"
,
"
\n
"
);
scalar_t
*
ptr
=
mPtr
;
const
scalar_t
*
other_ptr
=
tensor
.
data
();
for
(
size_t
i
=
0
;
i
<
size
();
++
i
)
*
(
ptr
++
)
=
*
(
other_ptr
++
);
return
*
this
;
}
template
<
typename
T1
>
TV_HOST_DEVICE_INLINE
TensorView
<
scalar_t
,
Rank
>
&
assign
(
std
::
initializer_list
<
T1
>
seq
)
{
TV_REQUIRE
(
seq
.
size
()
==
size
(),
"you must provide same input size%s"
,
"
\n
"
);
scalar_t
*
ptr
=
mPtr
;
for
(
const
T1
&
s
:
seq
)
*
(
ptr
++
)
=
scalar_t
(
s
);
return
*
this
;
}
template
<
class
...
Inds
>
TV_HOST_DEVICE_INLINE
scalar_t
&
operator
()(
Inds
...
inds
)
{
#ifdef TV_DEBUG
int
idxes
[
sizeof
...(
Inds
)]{
int
(
inds
)...};
TV_REQUIRE
(
sizeof
...(
inds
)
==
mShape
.
ndim
(),
"you provide %d indexes, but dim is %d
\n
"
,
sizeof
...(
inds
),
mShape
.
ndim
());
for
(
int
i
=
0
;
i
<
sizeof
...(
inds
);
++
i
)
{
TV_REQUIRE
(
idxes
[
i
]
>=
0
&&
idxes
[
i
]
<
mShape
[
i
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
i
,
idxes
[
i
],
mShape
[
i
]);
}
#endif
return
mPtr
[
rowArrayIdx
(
mShape
,
int
(
inds
)...)];
}
template
<
class
...
Inds
>
TV_HOST_DEVICE_INLINE
const
scalar_t
&
operator
()(
Inds
...
inds
)
const
{
#ifdef TV_DEBUG
int
idxes
[
sizeof
...(
Inds
)]{
int
(
inds
)...};
TV_REQUIRE
(
sizeof
...(
inds
)
==
mShape
.
ndim
(),
"you provide %d indexes, but dim is %d
\n
"
,
sizeof
...(
inds
),
mShape
.
ndim
());
for
(
int
i
=
0
;
i
<
sizeof
...(
inds
);
++
i
)
{
TV_REQUIRE
(
idxes
[
i
]
>=
0
&&
idxes
[
i
]
<
mShape
[
i
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
i
,
idxes
[
i
],
mShape
[
i
]);
}
#endif
return
mPtr
[
rowArrayIdx
(
mShape
,
int
(
inds
)...)];
}
TV_HOST_DEVICE_INLINE
scalar_t
&
operator
()()
{
#if defined TV_DEBUG
#if defined(__CUDA_ARCH__)
TV_DEVICE_REQUIRE
(
mPtr
!=
nullptr
,
"you want get value but the view is empty.%s"
,
"
\n
"
);
TV_DEVICE_REQUIRE
(
mShape
.
ndim
()
==
0
,
"you provide 0 indexes, but dim is %ld
\n
"
,
mShape
.
ndim
());
#else
TV_REQUIRE
(
mPtr
!=
nullptr
,
"you want get value but the view is empty.%s"
,
"
\n
"
);
TV_REQUIRE
(
mShape
.
ndim
()
==
0
,
"you provide 0 indexes, but dim is %ld
\n
"
,
mShape
.
ndim
());
#endif
#endif
return
mPtr
[
0
];
}
TV_HOST_DEVICE_INLINE
const
scalar_t
&
operator
()()
const
{
#if defined TV_DEBUG
#if defined(__CUDA_ARCH__)
TV_DEVICE_REQUIRE
(
mPtr
!=
nullptr
,
"you want get value but the view is empty.%s"
,
"
\n
"
);
TV_DEVICE_REQUIRE
(
mShape
.
ndim
()
==
0
,
"you provide 0 indexes, but dim is %ld
\n
"
,
mShape
.
ndim
());
#else
TV_REQUIRE
(
mPtr
!=
nullptr
,
"you want get value but the view is empty.%s"
,
"
\n
"
);
TV_REQUIRE
(
mShape
.
ndim
()
==
0
,
"you provide 0 indexes, but dim is %ld
\n
"
,
mShape
.
ndim
());
#endif
#endif
return
mPtr
[
0
];
}
template
<
class
T1
>
TV_HOST_DEVICE_INLINE
scalar_t
&
operator
()(
T1
i1
)
{
#if defined TV_DEBUG
#if defined(__CUDA_ARCH__)
TV_DEVICE_REQUIRE
(
mShape
.
ndim
()
==
1
,
"you provide 1 indexes, but dim is %ld
\n
"
,
mShape
.
ndim
());
TV_DEVICE_REQUIRE
(
i1
>=
0
&&
i1
<
mShape
[
0
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
0
,
i1
,
mShape
[
0
]);
#else
TV_REQUIRE
(
mShape
.
ndim
()
==
1
,
"you provide 1 indexes, but dim is %ld
\n
"
,
mShape
.
ndim
());
TV_REQUIRE
(
i1
>=
0
&&
i1
<
mShape
[
0
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
0
,
i1
,
mShape
[
0
]);
#endif
#endif
return
mPtr
[
i1
];
}
template
<
class
T1
,
class
T2
>
TV_HOST_DEVICE_INLINE
scalar_t
&
operator
()(
T1
i1
,
T2
i2
)
{
#ifdef TV_DEBUG
#if defined(__CUDA_ARCH__)
TV_DEVICE_REQUIRE
(
mShape
.
ndim
()
==
2
,
"you provide 2 indexes, but dim is %ld
\n
"
,
mShape
.
ndim
());
TV_DEVICE_REQUIRE
(
i1
>=
0
&&
i1
<
mShape
[
0
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
0
,
int
(
i1
),
mShape
[
0
]);
TV_DEVICE_REQUIRE
(
i2
>=
0
&&
i2
<
mShape
[
1
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
1
,
int
(
i2
),
mShape
[
1
]);
#else
TV_REQUIRE
(
mShape
.
ndim
()
==
2
,
"you provide 2 indexes, but dim is %ld
\n
"
,
mShape
.
ndim
());
TV_REQUIRE
(
i1
>=
0
&&
i1
<
mShape
[
0
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
0
,
int
(
i1
),
mShape
[
0
]);
TV_REQUIRE
(
i2
>=
0
&&
i2
<
mShape
[
1
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
1
,
int
(
i2
),
mShape
[
1
]);
#endif
#endif
return
mPtr
[
i1
*
mShape
[
1
]
+
i2
];
}
template
<
class
T1
,
class
T2
,
class
T3
>
TV_HOST_DEVICE_INLINE
scalar_t
&
operator
()(
T1
i1
,
T2
i2
,
T3
i3
)
{
#ifdef TV_DEBUG
#if defined(__CUDA_ARCH__)
TV_DEVICE_REQUIRE
(
mShape
.
ndim
()
==
3
,
"you provide 3 indexes, but dim is %ld
\n
"
,
mShape
.
ndim
());
TV_DEVICE_REQUIRE
(
i1
>=
0
&&
i1
<
mShape
[
0
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
0
,
int
(
i1
),
mShape
[
0
]);
TV_DEVICE_REQUIRE
(
i2
>=
0
&&
i2
<
mShape
[
1
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
1
,
int
(
i2
),
mShape
[
1
]);
TV_DEVICE_REQUIRE
(
i3
>=
0
&&
i3
<
mShape
[
2
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
2
,
int
(
i3
),
mShape
[
2
]);
#else
TV_REQUIRE
(
mShape
.
ndim
()
==
3
,
"you provide 3 indexes, but dim is %ld
\n
"
,
mShape
.
ndim
());
TV_REQUIRE
(
i1
>=
0
&&
i1
<
mShape
[
0
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
0
,
int
(
i1
),
mShape
[
0
]);
TV_REQUIRE
(
i2
>=
0
&&
i2
<
mShape
[
1
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
1
,
int
(
i2
),
mShape
[
1
]);
TV_REQUIRE
(
i3
>=
0
&&
i3
<
mShape
[
2
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
2
,
int
(
i3
),
mShape
[
2
]);
#endif
#endif
return
mPtr
[(
i1
*
mShape
[
1
]
+
i2
)
*
mShape
[
2
]
+
i3
];
}
template
<
class
T1
,
class
T2
,
class
T3
,
class
T4
>
TV_HOST_DEVICE_INLINE
scalar_t
&
operator
()(
T1
i1
,
T2
i2
,
T3
i3
,
T4
i4
)
{
#ifdef TV_DEBUG
#if defined(__CUDA_ARCH__)
TV_DEVICE_REQUIRE
(
mShape
.
ndim
()
==
4
,
"you provide 4 indexes, but dim is %ld
\n
"
,
mShape
.
ndim
());
TV_DEVICE_REQUIRE
(
i1
>=
0
&&
i1
<
mShape
[
0
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
0
,
int
(
i1
),
mShape
[
0
]);
TV_DEVICE_REQUIRE
(
i2
>=
0
&&
i2
<
mShape
[
1
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
1
,
int
(
i2
),
mShape
[
1
]);
TV_DEVICE_REQUIRE
(
i3
>=
0
&&
i3
<
mShape
[
2
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
2
,
int
(
i3
),
mShape
[
2
]);
TV_DEVICE_REQUIRE
(
i4
>=
0
&&
i4
<
mShape
[
3
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
3
,
int
(
i4
),
mShape
[
3
]);
#else
TV_REQUIRE
(
mShape
.
ndim
()
==
4
,
"you provide 4 indexes, but dim is %ld
\n
"
,
mShape
.
ndim
());
TV_REQUIRE
(
i1
>=
0
&&
i1
<
mShape
[
0
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
0
,
int
(
i1
),
mShape
[
0
]);
TV_REQUIRE
(
i2
>=
0
&&
i2
<
mShape
[
1
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
1
,
int
(
i2
),
mShape
[
1
]);
TV_REQUIRE
(
i3
>=
0
&&
i3
<
mShape
[
2
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
2
,
int
(
i3
),
mShape
[
2
]);
TV_REQUIRE
(
i4
>=
0
&&
i4
<
mShape
[
3
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
3
,
int
(
i4
),
mShape
[
3
]);
#endif
#endif
return
mPtr
[((
i1
*
mShape
[
1
]
+
i2
)
*
mShape
[
2
]
+
i3
)
*
mShape
[
3
]
+
i4
];
}
template
<
class
T1
>
TV_HOST_DEVICE_INLINE
const
scalar_t
&
operator
()(
T1
i1
)
const
{
#ifdef TV_DEBUG
#if defined(__CUDA_ARCH__)
TV_DEVICE_REQUIRE
(
mShape
.
ndim
()
==
1
,
"you provide 1 indexes, but dim is %ld
\n
"
,
mShape
.
ndim
());
TV_DEVICE_REQUIRE
(
i1
>=
0
&&
i1
<
mShape
[
0
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
0
,
int
(
i1
),
mShape
[
0
]);
#else
TV_REQUIRE
(
mShape
.
ndim
()
==
1
,
"you provide 1 indexes, but dim is %ld
\n
"
,
mShape
.
ndim
());
TV_REQUIRE
(
i1
>=
0
&&
i1
<
mShape
[
0
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
0
,
int
(
i1
),
mShape
[
0
]);
#endif
#endif
return
mPtr
[
i1
];
}
template
<
class
T1
,
class
T2
>
TV_HOST_DEVICE_INLINE
const
scalar_t
&
operator
()(
T1
i1
,
T2
i2
)
const
{
#ifdef TV_DEBUG
#if defined(__CUDA_ARCH__)
TV_DEVICE_REQUIRE
(
mShape
.
ndim
()
==
2
,
"you provide 2 indexes, but dim is %ld
\n
"
,
mShape
.
ndim
());
TV_DEVICE_REQUIRE
(
i1
>=
0
&&
i1
<
mShape
[
0
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
0
,
int
(
i1
),
mShape
[
0
]);
TV_DEVICE_REQUIRE
(
i2
>=
0
&&
i2
<
mShape
[
1
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
1
,
int
(
i2
),
mShape
[
1
]);
#else
TV_REQUIRE
(
mShape
.
ndim
()
==
2
,
"you provide 2 indexes, but dim is %ld
\n
"
,
mShape
.
ndim
());
TV_REQUIRE
(
i1
>=
0
&&
i1
<
mShape
[
0
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
0
,
int
(
i1
),
mShape
[
0
]);
TV_REQUIRE
(
i2
>=
0
&&
i2
<
mShape
[
1
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
1
,
int
(
i2
),
mShape
[
1
]);
#endif
#endif
return
mPtr
[
i1
*
mShape
[
1
]
+
i2
];
}
template
<
class
T1
,
class
T2
,
class
T3
>
TV_HOST_DEVICE_INLINE
const
scalar_t
&
operator
()(
T1
i1
,
T2
i2
,
T3
i3
)
const
{
#ifdef TV_DEBUG
#if defined(__CUDA_ARCH__)
TV_DEVICE_REQUIRE
(
mShape
.
ndim
()
==
3
,
"you provide 3 indexes, but dim is %ld
\n
"
,
mShape
.
ndim
());
TV_DEVICE_REQUIRE
(
i1
>=
0
&&
i1
<
mShape
[
0
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
0
,
int
(
i1
),
mShape
[
0
]);
TV_DEVICE_REQUIRE
(
i2
>=
0
&&
i2
<
mShape
[
1
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
1
,
int
(
i2
),
mShape
[
1
]);
TV_DEVICE_REQUIRE
(
i3
>=
0
&&
i3
<
mShape
[
2
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
2
,
int
(
i3
),
mShape
[
2
]);
#else
TV_REQUIRE
(
mShape
.
ndim
()
==
3
,
"you provide 3 indexes, but dim is %ld
\n
"
,
mShape
.
ndim
());
TV_REQUIRE
(
i1
>=
0
&&
i1
<
mShape
[
0
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
0
,
int
(
i1
),
mShape
[
0
]);
TV_REQUIRE
(
i2
>=
0
&&
i2
<
mShape
[
1
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
1
,
int
(
i2
),
mShape
[
1
]);
TV_REQUIRE
(
i3
>=
0
&&
i3
<
mShape
[
2
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
2
,
int
(
i3
),
mShape
[
2
]);
#endif
#endif
return
mPtr
[(
i1
*
mShape
[
1
]
+
i2
)
*
mShape
[
2
]
+
i3
];
}
template
<
class
T1
,
class
T2
,
class
T3
,
class
T4
>
TV_HOST_DEVICE_INLINE
const
scalar_t
&
operator
()(
T1
i1
,
T2
i2
,
T3
i3
,
T4
i4
)
const
{
#ifdef TV_DEBUG
#if defined(__CUDA_ARCH__)
TV_DEVICE_REQUIRE
(
mShape
.
ndim
()
==
4
,
"you provide 4 indexes, but dim is %ld
\n
"
,
mShape
.
ndim
());
TV_DEVICE_REQUIRE
(
i1
>=
0
&&
i1
<
mShape
[
0
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
0
,
int
(
i1
),
mShape
[
0
]);
TV_DEVICE_REQUIRE
(
i2
>=
0
&&
i2
<
mShape
[
1
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
1
,
int
(
i2
),
mShape
[
1
]);
TV_DEVICE_REQUIRE
(
i3
>=
0
&&
i3
<
mShape
[
2
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
2
,
int
(
i3
),
mShape
[
2
]);
TV_DEVICE_REQUIRE
(
i4
>=
0
&&
i4
<
mShape
[
3
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
3
,
int
(
i4
),
mShape
[
3
]);
#else
TV_REQUIRE
(
mShape
.
ndim
()
==
4
,
"you provide 4 indexes, but dim is %ld
\n
"
,
mShape
.
ndim
());
TV_REQUIRE
(
i1
>=
0
&&
i1
<
mShape
[
0
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
0
,
int
(
i1
),
mShape
[
0
]);
TV_REQUIRE
(
i2
>=
0
&&
i2
<
mShape
[
1
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
1
,
int
(
i2
),
mShape
[
1
]);
TV_REQUIRE
(
i3
>=
0
&&
i3
<
mShape
[
2
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
2
,
int
(
i3
),
mShape
[
2
]);
TV_REQUIRE
(
i4
>=
0
&&
i4
<
mShape
[
3
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
3
,
int
(
i4
),
mShape
[
3
]);
#endif
#endif
return
mPtr
[((
i1
*
mShape
[
1
]
+
i2
)
*
mShape
[
2
]
+
i3
)
*
mShape
[
3
]
+
i4
];
}
TV_HOST_DEVICE_INLINE
scalar_t
&
operator
[](
int
idx
)
{
#ifdef TV_DEBUG
#if defined(__CUDA_ARCH__)
TV_DEVICE_REQUIRE
(
idx
>=
0
&&
idx
<
size
(),
"index(%d) out-of-range: [0, %ld)
\n
"
,
int
(
idx
),
size
());
#else
TV_REQUIRE
(
idx
>=
0
&&
idx
<
size
(),
"index(%d) out-of-range: [0, %ld)
\n
"
,
int
(
idx
),
size
());
#endif
#endif
return
mPtr
[
idx
];
}
TV_HOST_DEVICE_INLINE
TensorView
<
scalar_t
,
Rank
>
operator
[](
SimpleVector
<
Slice
>
slice_vec
)
{
return
_subview
(
slice_vec
);
}
TV_HOST_DEVICE_INLINE
const
TensorView
<
scalar_t
,
Rank
>
operator
[](
SimpleVector
<
Slice
>
slice_vec
)
const
{
return
_subview
(
slice_vec
);
}
TV_HOST_DEVICE_INLINE
bool
empty
()
const
{
return
mPtr
==
nullptr
;
}
TV_HOST_DEVICE_INLINE
scalar_t
*
data
()
{
return
mPtr
;
}
TV_HOST_DEVICE_INLINE
const
scalar_t
*
data
()
const
{
return
mPtr
;
}
TV_HOST_DEVICE_INLINE
const
Shape
&
shape
()
const
{
return
mShape
;
}
TV_HOST_DEVICE_INLINE
int
dim
(
int
idx
)
const
{
return
mShape
[
idx
];
}
TV_HOST_DEVICE_INLINE
int
ndim
()
const
{
return
mShape
.
ndim
();
}
template
<
class
...
Inds
>
TV_HOST_DEVICE_INLINE
TensorView
<
scalar_t
,
Rank
>
&
reshape
(
Inds
...
newShapes
)
{
Shape
shapes
{
int
(
newShapes
)...};
TV_ASSERT
(
shapes
.
size
()
==
size
());
mShape
=
shapes
;
return
*
this
;
}
TV_HOST_DEVICE_INLINE
TensorView
<
scalar_t
,
Rank
>
&
reshape
(
Shape
shapes
)
{
TV_ASSERT
(
shapes
.
size
()
==
size
());
mShape
=
shapes
;
return
*
this
;
}
template
<
class
...
Inds
>
TV_HOST_DEVICE_INLINE
TensorView
<
scalar_t
,
Rank
>
view
(
Inds
...
newShapes
)
const
{
Shape
shapes
{
int
(
newShapes
)...};
for
(
size_t
i
=
0
;
i
<
shapes
.
ndim
();
++
i
)
{
if
(
shapes
[
i
]
==
-
1
)
{
shapes
[
i
]
=
1
;
shapes
[
i
]
=
size
()
/
shapes
.
size
();
break
;
}
}
TV_ASSERT
(
shapes
.
size
()
==
size
());
return
TensorView
<
scalar_t
,
Rank
>
(
mPtr
,
shapes
);
}
TV_HOST_DEVICE_INLINE
TensorView
<
scalar_t
,
Rank
>
view
(
Shape
shapes
)
const
{
TV_ASSERT
(
shapes
.
size
()
==
size
());
return
TensorView
<
scalar_t
,
Rank
>
(
mPtr
,
shapes
);
}
TV_HOST_DEVICE_INLINE
TensorView
<
scalar_t
,
Rank
>
squeeze
()
const
{
return
TensorView
<
scalar_t
,
Rank
>
(
mPtr
,
mShape
.
squeeze
());
}
TV_HOST_DEVICE_INLINE
TensorView
<
scalar_t
,
Rank
>
squeeze
(
int
dim
)
const
{
return
TensorView
<
scalar_t
,
Rank
>
(
mPtr
,
mShape
.
squeeze
(
dim
));
}
TV_HOST_DEVICE_INLINE
size_t
size
()
const
{
return
mShape
.
size
();
}
template
<
class
...
Slices
>
TV_HOST_DEVICE_INLINE
TensorView
<
scalar_t
,
Rank
>
subview
(
Slice
slice
,
Slices
...
slices
)
const
{
return
subview
<
float
,
Slice
,
Slices
...
>
(
slice
,
slices
...);
}
template
<
class
T2
=
float
,
class
...
Slices
>
TV_HOST_DEVICE_INLINE
TensorView
<
scalar_t
,
Rank
>
subview
(
Slices
...
slices
)
const
{
Slice
slice_vec
[
sizeof
...(
Slices
)]
=
{
to_slice
(
slices
)...};
Shape
new_shape
{
to_slice
(
slices
)[
0
]...};
Shape
start
{
to_slice
(
slices
)[
0
]...};
TV_ASSERT
(
new_shape
.
ndim
()
<=
mShape
.
ndim
());
TV_ASSERT
(
new_shape
.
ndim
()
!=
0
);
size_t
idxsize
=
new_shape
.
ndim
();
for
(
size_t
i
=
idxsize
;
i
<
mShape
.
ndim
();
++
i
)
{
new_shape
.
push_back
(
0
);
start
.
push_back
(
0
);
}
#pragma unroll
for
(
size_t
i
=
0
;
i
<
sizeof
...(
Slices
);
++
i
)
{
if
(
slice_vec
[
i
][
1
]
!=
-
1
)
{
new_shape
[
i
]
=
slice_vec
[
i
][
1
]
-
slice_vec
[
i
][
0
];
TV_ASSERT
(
new_shape
[
i
]
>=
0
);
}
else
{
new_shape
[
i
]
=
1
;
}
}
auto
offset
=
rowArrayIdx
(
mShape
,
start
);
#pragma unroll
for
(
size_t
i
=
sizeof
...(
Slices
);
i
<
mShape
.
ndim
();
++
i
)
{
new_shape
[
i
]
=
mShape
[
i
];
TV_ASSERT
(
new_shape
[
i
]
>=
0
);
}
Shape
reduced_shape
;
#pragma unroll
for
(
size_t
i
=
0
;
i
<
sizeof
...(
Slices
);
++
i
)
{
if
(
slice_vec
[
i
][
1
]
!=
-
1
)
{
reduced_shape
.
push_back
(
new_shape
[
i
]);
}
}
#pragma unroll
for
(
size_t
i
=
sizeof
...(
Slices
);
i
<
mShape
.
ndim
();
++
i
)
{
reduced_shape
.
push_back
(
new_shape
[
i
]);
}
return
TensorView
<
scalar_t
,
Rank
>
(
mPtr
+
offset
,
reduced_shape
);
}
template
<
class
...
Integers
>
TV_HOST_DEVICE_INLINE
TensorView
<
scalar_t
,
Rank
>
subview
(
int
id
,
Integers
...
ints
)
{
Shape
start
=
{
id
,
ints
...};
for
(
int
i
=
1
+
sizeof
...(
ints
);
i
<
ndim
();
++
i
)
{
start
.
push_back
(
0
);
}
return
TensorView
<
scalar_t
,
Rank
>
(
mPtr
+
rowArrayIdx
(
mShape
,
start
),
mShape
.
subshape
(
sizeof
...(
ints
)
+
1
));
}
std
::
string
repr
()
const
{
std
::
ostringstream
ss
;
if
(
empty
())
return
""
;
if
(
mShape
.
ndim
()
==
0
)
{
ss
<<
*
mPtr
;
ss
<<
"Tensor: dtype="
<<
detail
::
simpleTypeName
<
scalar_t
>
();
return
ss
.
str
();
}
Shape
counter
=
mShape
;
auto
tensor_flat
=
this
->
view
(
-
1
);
for
(
int
i
=
0
;
i
<
counter
.
ndim
();
++
i
)
{
counter
[
i
]
=
0
;
ss
<<
"["
;
}
for
(
size_t
i
=
0
;
i
<
this
->
size
();
++
i
)
{
ss
<<
tensor_flat
(
rowArrayIdx
(
mShape
,
counter
));
counter
[
counter
.
ndim
()
-
1
]
+=
1
;
int
inc_count
=
0
;
bool
print_comma
=
true
;
for
(
int
c
=
counter
.
ndim
()
-
1
;
c
>=
0
;
--
c
)
{
if
(
counter
[
c
]
==
this
->
dim
(
c
)
&&
c
>
0
)
{
++
inc_count
;
counter
[
c
-
1
]
+=
1
;
counter
[
c
]
=
0
;
print_comma
=
false
;
}
}
if
(
print_comma
&&
i
!=
this
->
size
()
-
1
)
ss
<<
", "
;
for
(
int
j
=
0
;
j
<
inc_count
;
++
j
)
{
ss
<<
"]"
;
}
if
(
i
!=
this
->
size
()
-
1
)
{
if
(
inc_count
!=
0
)
ss
<<
"
\n
"
;
for
(
int
j
=
0
;
j
<
inc_count
;
++
j
)
{
ss
<<
"["
;
}
}
}
ss
<<
"]"
;
ss
<<
"Tensor: dtype="
<<
detail
::
simpleTypeName
<
scalar_t
>
();
return
ss
.
str
();
}
protected:
// TODO: make this function public.
// currently this function is called unexpectedly when using subview({0, 0}).
TV_HOST_DEVICE_INLINE
TensorView
<
scalar_t
,
Rank
>
_subview
(
SimpleVector
<
Slice
>
slice_vec
)
{
Shape
new_shape
;
for
(
int
i
=
0
;
i
<
slice_vec
.
size
();
++
i
)
{
new_shape
.
push_back
(
slice_vec
[
i
][
0
]);
}
Shape
start
=
new_shape
;
TV_ASSERT
(
new_shape
.
ndim
()
<=
mShape
.
ndim
());
TV_ASSERT
(
new_shape
.
ndim
()
!=
0
);
size_t
idxsize
=
new_shape
.
ndim
();
for
(
size_t
i
=
idxsize
;
i
<
mShape
.
ndim
();
++
i
)
{
new_shape
.
push_back
(
0
);
start
.
push_back
(
0
);
}
for
(
size_t
i
=
0
;
i
<
slice_vec
.
size
();
++
i
)
{
if
(
slice_vec
[
i
][
1
]
!=
-
1
)
{
new_shape
[
i
]
=
slice_vec
[
i
][
1
]
-
slice_vec
[
i
][
0
];
TV_ASSERT
(
new_shape
[
i
]
>=
0
);
}
else
{
new_shape
[
i
]
=
1
;
// reduce dim
}
}
auto
offset
=
rowArrayIdx
(
mShape
,
start
);
for
(
size_t
i
=
slice_vec
.
size
();
i
<
mShape
.
ndim
();
++
i
)
{
new_shape
[
i
]
=
mShape
[
i
];
TV_ASSERT
(
new_shape
[
i
]
>=
0
);
}
Shape
reduced_shape
;
for
(
size_t
i
=
0
;
i
<
slice_vec
.
size
();
++
i
)
{
if
(
slice_vec
[
i
][
1
]
!=
-
1
)
{
reduced_shape
.
push_back
(
new_shape
[
i
]);
}
}
for
(
size_t
i
=
slice_vec
.
size
();
i
<
mShape
.
ndim
();
++
i
)
{
reduced_shape
.
push_back
(
new_shape
[
i
]);
}
return
TensorView
<
scalar_t
,
Rank
>
(
mPtr
+
offset
,
reduced_shape
);
}
template
<
typename
T1
>
TV_HOST_DEVICE_INLINE
Slice
to_slice
(
T1
s
)
const
{
return
Slice
{
int
(
s
),
-
1
,
-
1
};
}
TV_HOST_DEVICE_INLINE
Slice
to_slice
(
Slice
s
)
const
{
return
Slice
(
s
);
}
scalar_t
*
mPtr
=
nullptr
;
Shape
mShape
;
};
template
<
typename
Os
,
typename
scalar_t
,
int
Rank
>
Os
&
operator
<<
(
Os
&
os
,
const
TensorView
<
scalar_t
,
Rank
>
&
dt
)
{
os
<<
dt
.
repr
();
return
os
;
}
template
<
typename
Os
,
typename
scalar_t
,
int
Rank
>
Os
&
operator
<<
(
Os
&
os
,
const
TensorView
<
const
scalar_t
,
Rank
>
&
dt
)
{
os
<<
dt
.
repr
();
return
os
;
}
namespace
detail
{
template
<
typename
scalar_t
>
constexpr
const
char
*
printfTypeFormat
(
scalar_t
val
=
scalar_t
());
template
<
>
constexpr
const
char
*
printfTypeFormat
(
float
val
)
{
return
"%.2f"
;
}
template
<
>
constexpr
const
char
*
printfTypeFormat
(
double
val
)
{
return
"%.2f"
;
}
template
<
>
constexpr
const
char
*
printfTypeFormat
(
int
val
)
{
return
"%d"
;
}
template
<
>
constexpr
const
char
*
printfTypeFormat
(
unsigned
val
)
{
return
"%u"
;
}
template
<
>
constexpr
const
char
*
printfTypeFormat
(
long
val
)
{
return
"%ld"
;
}
template
<
>
constexpr
const
char
*
printfTypeFormat
(
unsigned
long
val
)
{
return
"%lu"
;
}
};
// namespace detail
template
<
typename
scalar_t
>
TV_HOST_DEVICE
void
printTensorView
(
const
TensorView
<
scalar_t
>
tensor
,
const
char
*
format
)
{
if
(
tensor
.
empty
())
return
;
if
(
tensor
.
ndim
()
==
0
)
{
printf
(
format
,
tensor
());
printf
(
"
\n
"
);
return
;
}
Shape
counter
=
tensor
.
shape
();
auto
tensor_flat
=
tensor
.
view
(
-
1
);
for
(
int
i
=
0
;
i
<
counter
.
ndim
();
++
i
)
{
counter
[
i
]
=
0
;
printf
(
"["
);
}
for
(
size_t
i
=
0
;
i
<
tensor
.
size
();
++
i
)
{
printf
(
format
,
tensor_flat
(
rowArrayIdx
(
tensor
.
shape
(),
counter
)));
counter
[
counter
.
ndim
()
-
1
]
+=
1
;
int
inc_count
=
0
;
bool
print_comma
=
true
;
for
(
int
c
=
counter
.
ndim
()
-
1
;
c
>=
0
;
--
c
)
{
if
(
counter
[
c
]
==
tensor
.
dim
(
c
)
&&
c
>
0
)
{
++
inc_count
;
counter
[
c
-
1
]
+=
1
;
counter
[
c
]
=
0
;
print_comma
=
false
;
}
}
if
(
print_comma
&&
i
!=
tensor
.
size
()
-
1
)
printf
(
", "
);
for
(
int
j
=
0
;
j
<
inc_count
;
++
j
)
{
printf
(
"]"
);
}
if
(
i
!=
tensor
.
size
()
-
1
)
{
if
(
inc_count
!=
0
)
printf
(
"
\n
"
);
for
(
int
j
=
0
;
j
<
inc_count
;
++
j
)
{
printf
(
"["
);
}
}
}
printf
(
"]
\n
"
);
}
template
<
typename
scalar_t
>
TV_HOST_DEVICE
void
printTensorView
(
TensorView
<
scalar_t
>
tensor
)
{
using
Traw
=
typename
std
::
remove_const
<
scalar_t
>::
type
;
return
printTensorView
(
tensor
,
detail
::
printfTypeFormat
<
Traw
>
());
}
template
<
typename
scalar_t
>
TV_HOST_DEVICE
void
printTensorView
(
const
scalar_t
*
ptr
,
Shape
shape
)
{
using
Traw
=
typename
std
::
remove_const
<
scalar_t
>::
type
;
return
printTensorView
(
TensorView
<
const
scalar_t
>
(
ptr
,
shape
),
detail
::
printfTypeFormat
<
Traw
>
());
}
template
<
typename
scalar_t
>
TV_HOST_DEVICE
void
printTensorView
(
const
scalar_t
*
ptr
,
Shape
shape
,
const
char
*
format
)
{
return
printTensorView
(
TensorView
<
const
scalar_t
>
(
ptr
,
shape
),
format
);
}
}
// namespace tv
mmcv/ops/csrc/onnxruntime/cpu/onnxruntime_register.cpp
View file @
fdeee889
...
...
@@ -10,6 +10,7 @@
#include "reduce_ops.h"
#include "roi_align.h"
#include "roi_align_rotated.h"
#include "rotated_feature_align.h"
#include "soft_nms.h"
const
char
*
c_MMCVOpDomain
=
"mmcv"
;
...
...
@@ -17,6 +18,7 @@ SoftNmsOp c_SoftNmsOp;
NmsOp
c_NmsOp
;
MMCVRoiAlignCustomOp
c_MMCVRoiAlignCustomOp
;
MMCVRoIAlignRotatedCustomOp
c_MMCVRoIAlignRotatedCustomOp
;
MMCVRotatedFeatureAlignCustomOp
c_MMCVRotatedFeatureAlignCustomOp
;
GridSampleOp
c_GridSampleOp
;
MMCVCumMaxCustomOp
c_MMCVCumMaxCustomOp
;
MMCVCumMinCustomOp
c_MMCVCumMinCustomOp
;
...
...
@@ -77,5 +79,10 @@ OrtStatus *ORT_API_CALL RegisterCustomOps(OrtSessionOptions *options,
return
status
;
}
if
(
auto
status
=
ortApi
->
CustomOpDomain_Add
(
domain
,
&
c_MMCVRotatedFeatureAlignCustomOp
))
{
return
status
;
}
return
ortApi
->
AddCustomOpDomain
(
options
,
domain
);
}
mmcv/ops/csrc/onnxruntime/cpu/rotated_feature_align.cpp
0 → 100644
View file @
fdeee889
// Modified from
// https://github.com/SJTU-Thinklab-Det/r3det-on-mmdetection/blob/master/mmdet/ops/fr/src/feature_refine_kernel.cu
#include "rotated_feature_align.h"
#include "../ort_mmcv_utils.h"
template
<
typename
T
>
T
bilinear_interpolate
(
const
T
*
input
,
const
int
height
,
const
int
width
,
T
y
,
T
x
,
const
int
index
/* index for debug only*/
)
{
// deal with cases that inverse elements are out of feature map boundary
if
(
y
<
-
1.0
||
y
>
height
||
x
<
-
1.0
||
x
>
width
)
return
0
;
if
(
y
<=
0
)
y
=
0
;
if
(
x
<=
0
)
x
=
0
;
int
y_low
=
(
int
)
y
;
int
x_low
=
(
int
)
x
;
int
y_high
;
int
x_high
;
if
(
y_low
>=
height
-
1
)
{
y_high
=
y_low
=
height
-
1
;
y
=
(
T
)
y_low
;
}
else
{
y_high
=
y_low
+
1
;
}
if
(
x_low
>=
width
-
1
)
{
x_high
=
x_low
=
width
-
1
;
x
=
(
T
)
x_low
;
}
else
{
x_high
=
x_low
+
1
;
}
T
ly
=
y
-
y_low
;
T
lx
=
x
-
x_low
;
T
hy
=
1.
-
ly
,
hx
=
1.
-
lx
;
// do bilinear interpolation
T
v1
=
input
[
int
(
fma
(
y_low
,
width
,
x_low
))];
T
v2
=
input
[
int
(
fma
(
y_low
,
width
,
x_high
))];
T
v3
=
input
[
int
(
fma
(
y_high
,
width
,
x_low
))];
T
v4
=
input
[
int
(
fma
(
y_high
,
width
,
x_high
))];
T
w1
=
hy
*
hx
,
w2
=
hy
*
lx
,
w3
=
ly
*
hx
,
w4
=
ly
*
lx
;
T
val
=
(
w1
*
v1
+
w2
*
v2
+
w3
*
v3
+
w4
*
v4
);
return
val
;
}
template
<
typename
scalar_t
>
void
rotated_feature_align_forward_cpu_kernel
(
const
int
nthreads
,
const
int
points
,
const
scalar_t
*
bottom_data
,
const
scalar_t
*
best_bboxes
,
const
scalar_t
spatial_scale
,
const
int
channels
,
const
int
height
,
const
int
width
,
scalar_t
*
top_data
)
{
for
(
int
index
=
0
;
index
<
nthreads
;
index
++
)
{
int
w
=
index
%
width
;
int
h
=
(
index
/
width
)
%
height
;
int
c
=
(
index
/
width
/
height
)
%
channels
;
int
n
=
index
/
width
/
height
/
channels
;
const
scalar_t
*
bbox_offset
=
best_bboxes
+
((
n
*
height
+
h
)
*
width
+
w
)
*
5
;
scalar_t
roi_y
=
bbox_offset
[
0
]
*
spatial_scale
;
scalar_t
roi_x
=
bbox_offset
[
1
]
*
spatial_scale
;
scalar_t
px
[
5
]
=
{
roi_x
,
0
,
0
,
0
,
0
};
scalar_t
py
[
5
]
=
{
roi_y
,
0
,
0
,
0
,
0
};
if
(
points
>
1
)
{
scalar_t
roi_w
=
bbox_offset
[
2
]
*
spatial_scale
;
scalar_t
roi_h
=
bbox_offset
[
3
]
*
spatial_scale
;
scalar_t
roi_a
=
bbox_offset
[
4
];
scalar_t
w_2
=
roi_w
/
2
,
h_2
=
roi_h
/
2
;
scalar_t
cosa
=
cosf
(
roi_a
),
sina
=
sinf
(
roi_a
);
scalar_t
wx
=
cosa
*
w_2
,
wy
=
sina
*
w_2
;
scalar_t
hx
=
-
sina
*
h_2
,
hy
=
cosa
*
h_2
;
px
[
1
]
=
roi_x
+
wx
+
hx
;
py
[
1
]
=
roi_y
+
wy
+
hy
;
px
[
2
]
=
roi_x
-
wx
+
hx
;
py
[
2
]
=
roi_y
-
wy
+
hy
;
px
[
3
]
=
roi_x
-
wx
-
hx
;
py
[
3
]
=
roi_y
-
wy
-
hy
;
px
[
4
]
=
roi_x
+
wx
-
hx
;
py
[
4
]
=
roi_y
+
wy
-
hy
;
}
const
scalar_t
*
offset_bottom_data
=
bottom_data
+
(
n
*
channels
+
c
)
*
height
*
width
;
scalar_t
output_val
=
bottom_data
[
index
];
for
(
int
i
=
0
;
i
<
points
;
i
++
)
{
output_val
+=
bilinear_interpolate
<
scalar_t
>
(
offset_bottom_data
,
height
,
width
,
py
[
i
],
px
[
i
],
i
);
}
top_data
[
index
]
=
output_val
;
}
}
void
MMCVRotatedFeatureAlignKernel
::
Compute
(
OrtKernelContext
*
context
)
{
// Setup inputs
const
OrtValue
*
input_features
=
ort_
.
KernelContext_GetInput
(
context
,
0
);
const
float
*
features_data
=
reinterpret_cast
<
const
float
*>
(
ort_
.
GetTensorData
<
float
>
(
input_features
));
const
OrtValue
*
input_best_rbboxes
=
ort_
.
KernelContext_GetInput
(
context
,
1
);
const
float
*
best_rbboxes
=
reinterpret_cast
<
const
float
*>
(
ort_
.
GetTensorData
<
const
float
*>
(
input_best_rbboxes
));
// Setup output
OrtTensorDimensions
out_dimensions
(
ort_
,
input_features
);
int
batch_size
=
out_dimensions
.
data
()[
0
];
int
input_channels
=
out_dimensions
.
data
()[
1
];
int
input_height
=
out_dimensions
.
data
()[
2
];
int
input_width
=
out_dimensions
.
data
()[
3
];
OrtValue
*
output
=
ort_
.
KernelContext_GetOutput
(
context
,
0
,
out_dimensions
.
data
(),
out_dimensions
.
size
());
float
*
out
=
ort_
.
GetTensorMutableData
<
float
>
(
output
);
OrtTensorTypeAndShapeInfo
*
output_info
=
ort_
.
GetTensorTypeAndShape
(
output
);
ort_
.
ReleaseTensorTypeAndShapeInfo
(
output_info
);
// TODO: forward here
int
output_size
=
out_dimensions
.
data
()[
0
];
for
(
auto
i
=
1
;
i
<
out_dimensions
.
size
();
++
i
)
{
output_size
*=
out_dimensions
.
data
()[
i
];
}
rotated_feature_align_forward_cpu_kernel
<
float
>
(
output_size
,
points_
,
features_data
,
best_rbboxes
,
spatial_scale_
,
input_channels
,
input_height
,
input_width
,
out
);
}
mmcv/ops/csrc/onnxruntime/rotated_feature_align.h
0 → 100644
View file @
fdeee889
#ifndef ONNXRUNTIME_ROTATED_FEATURE_ALIGN_H
#define ONNXRUNTIME_ROTATED_FEATURE_ALIGN_H
#include <onnxruntime_cxx_api.h>
#include <cmath>
struct
MMCVRotatedFeatureAlignKernel
{
public:
MMCVRotatedFeatureAlignKernel
(
Ort
::
CustomOpApi
ort
,
const
OrtKernelInfo
*
info
)
:
ort_
(
ort
)
{
spatial_scale_
=
ort_
.
KernelInfoGetAttribute
<
float
>
(
info
,
"spatial_scale"
);
points_
=
ort_
.
KernelInfoGetAttribute
<
int64_t
>
(
info
,
"points"
);
}
void
Compute
(
OrtKernelContext
*
context
);
private:
Ort
::
CustomOpApi
ort_
;
float
spatial_scale_
;
int
points_
;
};
struct
MMCVRotatedFeatureAlignCustomOp
:
Ort
::
CustomOpBase
<
MMCVRotatedFeatureAlignCustomOp
,
MMCVRotatedFeatureAlignKernel
>
{
void
*
CreateKernel
(
Ort
::
CustomOpApi
api
,
const
OrtKernelInfo
*
info
)
const
{
return
new
MMCVRotatedFeatureAlignKernel
(
api
,
info
);
}
const
char
*
GetName
()
const
{
return
"MMCVRotatedFeatureAlign"
;
}
size_t
GetInputTypeCount
()
const
{
return
2
;
}
ONNXTensorElementDataType
GetInputType
(
size_t
)
const
{
return
ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT
;
}
size_t
GetOutputTypeCount
()
const
{
return
1
;
}
ONNXTensorElementDataType
GetOutputType
(
size_t
)
const
{
return
ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT
;
}
// force cpu
const
char
*
GetExecutionProviderType
()
const
{
return
"CPUExecutionProvider"
;
}
};
#endif // ONNXRUNTIME_ROTATED_FEATURE_ALIGN_H
Prev
1
…
8
9
10
11
12
13
14
15
16
…
23
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment