Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
MMCV
Commits
6f3c5f1c
Commit
6f3c5f1c
authored
Jul 11, 2024
by
limm
Browse files
support v1.4.0
parent
6f674c7e
Changes
339
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1146 additions
and
2813 deletions
+1146
-2813
mmcv/ops/csrc/common/pytorch_mlu_helper.hpp
mmcv/ops/csrc/common/pytorch_mlu_helper.hpp
+0
-61
mmcv/ops/csrc/common/pytorch_npu_helper.hpp
mmcv/ops/csrc/common/pytorch_npu_helper.hpp
+0
-35
mmcv/ops/csrc/common/utils/spconv/paramsgrid.h
mmcv/ops/csrc/common/utils/spconv/paramsgrid.h
+0
-70
mmcv/ops/csrc/common/utils/spconv/prettyprint.h
mmcv/ops/csrc/common/utils/spconv/prettyprint.h
+0
-493
mmcv/ops/csrc/common/utils/spconv/pybind11_utils.h
mmcv/ops/csrc/common/utils/spconv/pybind11_utils.h
+0
-60
mmcv/ops/csrc/common/utils/spconv/spconv/geometry.h
mmcv/ops/csrc/common/utils/spconv/spconv/geometry.h
+0
-295
mmcv/ops/csrc/common/utils/spconv/spconv/indice.h
mmcv/ops/csrc/common/utils/spconv/spconv/indice.h
+0
-78
mmcv/ops/csrc/common/utils/spconv/spconv/maxpool.h
mmcv/ops/csrc/common/utils/spconv/spconv/maxpool.h
+0
-37
mmcv/ops/csrc/common/utils/spconv/spconv/mp_helper.h
mmcv/ops/csrc/common/utils/spconv/spconv/mp_helper.h
+0
-50
mmcv/ops/csrc/common/utils/spconv/spconv/point2voxel.h
mmcv/ops/csrc/common/utils/spconv/spconv/point2voxel.h
+0
-385
mmcv/ops/csrc/common/utils/spconv/spconv/reordering.h
mmcv/ops/csrc/common/utils/spconv/spconv/reordering.h
+0
-36
mmcv/ops/csrc/common/utils/spconv/tensorview/helper_kernel.cuh
...ops/csrc/common/utils/spconv/tensorview/helper_kernel.cuh
+0
-75
mmcv/ops/csrc/common/utils/spconv/tensorview/helper_launch.h
mmcv/ops/csrc/common/utils/spconv/tensorview/helper_launch.h
+0
-19
mmcv/ops/csrc/common/utils/spconv/tensorview/tensorview.h
mmcv/ops/csrc/common/utils/spconv/tensorview/tensorview.h
+0
-1119
mmcv/ops/csrc/onnxruntime/corner_pool.h
mmcv/ops/csrc/onnxruntime/corner_pool.h
+46
-0
mmcv/ops/csrc/onnxruntime/cpu/corner_pool.cpp
mmcv/ops/csrc/onnxruntime/cpu/corner_pool.cpp
+123
-0
mmcv/ops/csrc/onnxruntime/cpu/deform_conv.cpp
mmcv/ops/csrc/onnxruntime/cpu/deform_conv.cpp
+263
-0
mmcv/ops/csrc/onnxruntime/cpu/gridSample.cpp
mmcv/ops/csrc/onnxruntime/cpu/gridSample.cpp
+314
-0
mmcv/ops/csrc/onnxruntime/cpu/modulated_deform_conv.cpp
mmcv/ops/csrc/onnxruntime/cpu/modulated_deform_conv.cpp
+292
-0
mmcv/ops/csrc/onnxruntime/cpu/nms.cpp
mmcv/ops/csrc/onnxruntime/cpu/nms.cpp
+108
-0
No files found.
Too many changes to show.
To preserve performance only
339 of 339+
files are displayed.
Plain diff
Email patch
mmcv/ops/csrc/common/pytorch_mlu_helper.hpp
deleted
100644 → 0
View file @
6f674c7e
/*************************************************************************
* Copyright (C) 2021 Cambricon.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*************************************************************************/
#ifndef PYTORCH_MLU_HELPER_HPP_
#define PYTORCH_MLU_HELPER_HPP_
#ifdef MMCV_WITH_MLU
#include "aten.h"
#define NFU_ALIGN_SIZE 128
#define PAD_UP(x, y) (((x) / (y) + (int)((x) % (y) > 0)) * (y))
#define PAD_DOWN(x, y) (((x) / (y)) * (y))
#define CEIL_DIV(x, y) (((x) + (y)-1) / (y))
#define CEIL_ALIGN(x, y) (((x) + (y)-1) / (y) * (y))
inline
int32_t
getJobLimitCapability
()
{
CNcontext
drv_ctx
;
TORCH_CHECK
(
CN_SUCCESS
==
cnCtxGetCurrent
(
&
drv_ctx
),
"cnCtxGetCurrent fails"
);
CNctxConfigParam
ctx_conf_param
;
TORCH_CHECK
(
CN_SUCCESS
==
cnGetCtxConfigParam
(
drv_ctx
,
CN_CTX_CONFIG_UNION_LIMIT
,
&
ctx_conf_param
),
"cnGetCtxConfigParam fails."
);
return
(
int32_t
)
ctx_conf_param
.
unionLimit
;
}
inline
int32_t
getCoreNumOfJobLimitCapability
()
{
switch
(
getJobLimitCapability
())
{
default:
return
torch_mlu
::
getDeviceAttr
(
cnrtAttrMcorePerCluster
)
*
getJobLimitCapability
();
case
CN_KERNEL_CLASS_BLOCK
:
return
1
;
case
CN_KERNEL_CLASS_UNION
:
return
torch_mlu
::
getDeviceAttr
(
cnrtAttrMcorePerCluster
);
case
CN_KERNEL_CLASS_UNION2
:
return
torch_mlu
::
getDeviceAttr
(
cnrtAttrMcorePerCluster
)
*
2
;
case
CN_KERNEL_CLASS_UNION4
:
return
torch_mlu
::
getDeviceAttr
(
cnrtAttrMcorePerCluster
)
*
4
;
case
CN_KERNEL_CLASS_UNION8
:
return
torch_mlu
::
getDeviceAttr
(
cnrtAttrMcorePerCluster
)
*
8
;
case
CN_KERNEL_CLASS_UNION16
:
return
torch_mlu
::
getDeviceAttr
(
cnrtAttrMcorePerCluster
)
*
16
;
}
}
#endif // MMCV_WITH_MLU
#endif // PYTORCH_MLU_HELPER_HPP_
mmcv/ops/csrc/common/pytorch_npu_helper.hpp
deleted
100644 → 0
View file @
6f674c7e
/******************************************************************************
* Copyright (c) 2022 Huawei Technologies Co., Ltd
* All rights reserved.
*
* Licensed under the BSD 3-Clause License (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://opensource.org/licenses/BSD-3-Clause
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
******************************************************************************/
#ifndef PYTORCH_NPU_HELPER_HPP_
#define PYTORCH_NPU_HELPER_HPP_
#include <torch_npu/csrc/aten/NPUNativeFunctions.h>
#include <torch_npu/csrc/framework/utils/CalcuOpUtil.h>
#include <torch_npu/csrc/framework/utils/OpAdapter.h>
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"
#define NPU_NAME_SPACE at_npu::native
#define REGISTER_NPU_IMPL(key, value) REGISTER_DEVICE_IMPL(key, XLA, value)
#define CHECK_NPU(x) \
TORCH_CHECK(x.device().type() == at::kXLA, #x " must be a NPU tensor")
#endif // PYTORCH_NPU_HELPER_HPP_
mmcv/ops/csrc/common/utils/spconv/paramsgrid.h
deleted
100644 → 0
View file @
6f674c7e
// Copyright 2019 Yan Yan
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef PARAMS_GRID_H_
#define PARAMS_GRID_H_
#include <tuple>
#include <vector>
namespace
detail
{
template
<
class
scalar_t
>
int
getTotalSize
(
std
::
vector
<
scalar_t
>
arg
)
{
return
arg
.
size
();
}
template
<
class
scalar_t
,
class
...
TArgs
>
int
getTotalSize
(
std
::
vector
<
scalar_t
>
arg
,
std
::
vector
<
TArgs
>
...
args
)
{
return
arg
.
size
()
*
getTotalSize
(
args
...);
}
template
<
typename
scalar_t
>
int
getSize
(
std
::
vector
<
scalar_t
>
arg
)
{
return
arg
.
size
();
}
template
<
int
Idx
,
class
TT
,
class
scalar_t
>
void
assigner
(
TT
&
src
,
std
::
vector
<
int
>
counter
,
std
::
vector
<
scalar_t
>
&
arg
)
{
std
::
get
<
Idx
>
(
src
)
=
arg
[
counter
[
Idx
]];
}
template
<
int
Idx
,
class
TT
,
class
scalar_t
,
class
...
TArgs
>
void
assigner
(
TT
&
src
,
std
::
vector
<
int
>
counter
,
std
::
vector
<
scalar_t
>
&
arg
,
std
::
vector
<
TArgs
>
&
...
args
)
{
std
::
get
<
Idx
>
(
src
)
=
arg
[
counter
[
Idx
]];
assigner
<
Idx
+
1
>
(
src
,
counter
,
args
...);
}
}
// namespace detail
template
<
class
...
TArgs
>
std
::
vector
<
std
::
tuple
<
TArgs
...
>>
paramsGrid
(
std
::
vector
<
TArgs
>
...
args
)
{
int
length
=
detail
::
getTotalSize
(
args
...);
std
::
vector
<
int
>
sizes
=
{
detail
::
getSize
(
args
)...};
int
size
=
sizes
.
size
();
std
::
vector
<
std
::
tuple
<
TArgs
...
>>
params
(
length
);
std
::
vector
<
int
>
counter
(
size
);
for
(
int
i
=
0
;
i
<
length
;
++
i
)
{
detail
::
assigner
<
0
>
(
params
[
i
],
counter
,
args
...);
counter
[
size
-
1
]
+=
1
;
for
(
int
c
=
size
-
1
;
c
>=
0
;
--
c
)
{
if
(
counter
[
c
]
==
sizes
[
c
]
&&
c
>
0
)
{
counter
[
c
-
1
]
+=
1
;
counter
[
c
]
=
0
;
}
}
}
return
params
;
}
#endif
mmcv/ops/csrc/common/utils/spconv/prettyprint.h
deleted
100644 → 0
View file @
6f674c7e
// Copyright Louis Delacroix 2010 - 2014.
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)
//
// A pretty printing library for C++
//
// Usage:
// Include this header, and operator<< will "just work".
#ifndef H_PRETTY_PRINT
#define H_PRETTY_PRINT
#include <cstddef>
#include <iterator>
#include <memory>
#include <ostream>
#include <set>
#include <tuple>
#include <type_traits>
#include <unordered_set>
#include <utility>
#include <valarray>
namespace
pretty_print
{
namespace
detail
{
// SFINAE type trait to detect whether T::const_iterator exists.
struct
sfinae_base
{
using
yes
=
char
;
using
no
=
yes
[
2
];
};
template
<
typename
T
>
struct
has_const_iterator
:
private
sfinae_base
{
private:
template
<
typename
C
>
static
yes
&
test
(
typename
C
::
const_iterator
*
);
template
<
typename
C
>
static
no
&
test
(...);
public:
static
const
bool
value
=
sizeof
(
test
<
T
>
(
nullptr
))
==
sizeof
(
yes
);
using
type
=
T
;
};
template
<
typename
T
>
struct
has_begin_end
:
private
sfinae_base
{
private:
template
<
typename
C
>
static
yes
&
f
(
typename
std
::
enable_if
<
std
::
is_same
<
decltype
(
static_cast
<
typename
C
::
const_iterator
(
C
::*
)()
const
>
(
&
C
::
begin
)),
typename
C
::
const_iterator
(
C
::*
)()
const
>::
value
>::
type
*
);
template
<
typename
C
>
static
no
&
f
(...);
template
<
typename
C
>
static
yes
&
g
(
typename
std
::
enable_if
<
std
::
is_same
<
decltype
(
static_cast
<
typename
C
::
const_iterator
(
C
::*
)()
const
>
(
&
C
::
end
)),
typename
C
::
const_iterator
(
C
::*
)()
const
>::
value
,
void
>::
type
*
);
template
<
typename
C
>
static
no
&
g
(...);
public:
static
bool
const
beg_value
=
sizeof
(
f
<
T
>
(
nullptr
))
==
sizeof
(
yes
);
static
bool
const
end_value
=
sizeof
(
g
<
T
>
(
nullptr
))
==
sizeof
(
yes
);
};
}
// namespace detail
// Holds the delimiter values for a specific character type
template
<
typename
TChar
>
struct
delimiters_values
{
using
char_type
=
TChar
;
const
char_type
*
prefix
;
const
char_type
*
delimiter
;
const
char_type
*
postfix
;
};
// Defines the delimiter values for a specific container and character type
template
<
typename
T
,
typename
TChar
>
struct
delimiters
{
using
type
=
delimiters_values
<
TChar
>
;
static
const
type
values
;
};
// Functor to print containers. You can use this directly if you want
// to specify a non-default delimiters type. The printing logic can
// be customized by specializing the nested template.
template
<
typename
T
,
typename
TChar
=
char
,
typename
TCharTraits
=
::
std
::
char_traits
<
TChar
>,
typename
TDelimiters
=
delimiters
<
T
,
TChar
>>
struct
print_container_helper
{
using
delimiters_type
=
TDelimiters
;
using
ostream_type
=
std
::
basic_ostream
<
TChar
,
TCharTraits
>
;
template
<
typename
U
>
struct
printer
{
static
void
print_body
(
const
U
&
c
,
ostream_type
&
stream
)
{
using
std
::
begin
;
using
std
::
end
;
auto
it
=
begin
(
c
);
const
auto
the_end
=
end
(
c
);
if
(
it
!=
the_end
)
{
for
(;;)
{
stream
<<
*
it
;
if
(
++
it
==
the_end
)
break
;
if
(
delimiters_type
::
values
.
delimiter
!=
NULL
)
stream
<<
delimiters_type
::
values
.
delimiter
;
}
}
}
};
print_container_helper
(
const
T
&
container
)
:
container_
(
container
)
{}
inline
void
operator
()(
ostream_type
&
stream
)
const
{
if
(
delimiters_type
::
values
.
prefix
!=
NULL
)
stream
<<
delimiters_type
::
values
.
prefix
;
printer
<
T
>::
print_body
(
container_
,
stream
);
if
(
delimiters_type
::
values
.
postfix
!=
NULL
)
stream
<<
delimiters_type
::
values
.
postfix
;
}
private:
const
T
&
container_
;
};
// Specialization for pairs
template
<
typename
T
,
typename
TChar
,
typename
TCharTraits
,
typename
TDelimiters
>
template
<
typename
T1
,
typename
T2
>
struct
print_container_helper
<
T
,
TChar
,
TCharTraits
,
TDelimiters
>::
printer
<
std
::
pair
<
T1
,
T2
>>
{
using
ostream_type
=
typename
print_container_helper
<
T
,
TChar
,
TCharTraits
,
TDelimiters
>::
ostream_type
;
static
void
print_body
(
const
std
::
pair
<
T1
,
T2
>
&
c
,
ostream_type
&
stream
)
{
stream
<<
c
.
first
;
if
(
print_container_helper
<
T
,
TChar
,
TCharTraits
,
TDelimiters
>::
delimiters_type
::
values
.
delimiter
!=
NULL
)
stream
<<
print_container_helper
<
T
,
TChar
,
TCharTraits
,
TDelimiters
>::
delimiters_type
::
values
.
delimiter
;
stream
<<
c
.
second
;
}
};
// Specialization for tuples
template
<
typename
T
,
typename
TChar
,
typename
TCharTraits
,
typename
TDelimiters
>
template
<
typename
...
Args
>
struct
print_container_helper
<
T
,
TChar
,
TCharTraits
,
TDelimiters
>::
printer
<
std
::
tuple
<
Args
...
>>
{
using
ostream_type
=
typename
print_container_helper
<
T
,
TChar
,
TCharTraits
,
TDelimiters
>::
ostream_type
;
using
element_type
=
std
::
tuple
<
Args
...
>
;
template
<
std
::
size_t
I
>
struct
Int
{};
static
void
print_body
(
const
element_type
&
c
,
ostream_type
&
stream
)
{
tuple_print
(
c
,
stream
,
Int
<
0
>
());
}
static
void
tuple_print
(
const
element_type
&
,
ostream_type
&
,
Int
<
sizeof
...(
Args
)
>
)
{}
static
void
tuple_print
(
const
element_type
&
c
,
ostream_type
&
stream
,
typename
std
::
conditional
<
sizeof
...(
Args
)
!=
0
,
Int
<
0
>
,
std
::
nullptr_t
>::
type
)
{
stream
<<
std
::
get
<
0
>
(
c
);
tuple_print
(
c
,
stream
,
Int
<
1
>
());
}
template
<
std
::
size_t
N
>
static
void
tuple_print
(
const
element_type
&
c
,
ostream_type
&
stream
,
Int
<
N
>
)
{
if
(
print_container_helper
<
T
,
TChar
,
TCharTraits
,
TDelimiters
>::
delimiters_type
::
values
.
delimiter
!=
NULL
)
stream
<<
print_container_helper
<
T
,
TChar
,
TCharTraits
,
TDelimiters
>::
delimiters_type
::
values
.
delimiter
;
stream
<<
std
::
get
<
N
>
(
c
);
tuple_print
(
c
,
stream
,
Int
<
N
+
1
>
());
}
};
// Prints a print_container_helper to the specified stream.
template
<
typename
T
,
typename
TChar
,
typename
TCharTraits
,
typename
TDelimiters
>
inline
std
::
basic_ostream
<
TChar
,
TCharTraits
>
&
operator
<<
(
std
::
basic_ostream
<
TChar
,
TCharTraits
>
&
stream
,
const
print_container_helper
<
T
,
TChar
,
TCharTraits
,
TDelimiters
>
&
helper
)
{
helper
(
stream
);
return
stream
;
}
// Basic is_container template; specialize to derive from std::true_type for all
// desired container types
template
<
typename
T
>
struct
is_container
:
public
std
::
integral_constant
<
bool
,
detail
::
has_const_iterator
<
T
>::
value
&&
detail
::
has_begin_end
<
T
>::
beg_value
&&
detail
::
has_begin_end
<
T
>::
end_value
>
{};
template
<
typename
T
,
std
::
size_t
N
>
struct
is_container
<
T
[
N
]
>
:
std
::
true_type
{};
template
<
std
::
size_t
N
>
struct
is_container
<
char
[
N
]
>
:
std
::
false_type
{};
template
<
typename
T
>
struct
is_container
<
std
::
valarray
<
T
>>
:
std
::
true_type
{};
template
<
typename
T1
,
typename
T2
>
struct
is_container
<
std
::
pair
<
T1
,
T2
>>
:
std
::
true_type
{};
template
<
typename
...
Args
>
struct
is_container
<
std
::
tuple
<
Args
...
>>
:
std
::
true_type
{};
// Default delimiters
template
<
typename
T
>
struct
delimiters
<
T
,
char
>
{
static
const
delimiters_values
<
char
>
values
;
};
template
<
typename
T
>
const
delimiters_values
<
char
>
delimiters
<
T
,
char
>::
values
=
{
"["
,
", "
,
"]"
};
template
<
typename
T
>
struct
delimiters
<
T
,
wchar_t
>
{
static
const
delimiters_values
<
wchar_t
>
values
;
};
template
<
typename
T
>
const
delimiters_values
<
wchar_t
>
delimiters
<
T
,
wchar_t
>::
values
=
{
L"["
,
L", "
,
L"]"
};
// Delimiters for (multi)set and unordered_(multi)set
template
<
typename
T
,
typename
TComp
,
typename
TAllocator
>
struct
delimiters
<::
std
::
set
<
T
,
TComp
,
TAllocator
>
,
char
>
{
static
const
delimiters_values
<
char
>
values
;
};
template
<
typename
T
,
typename
TComp
,
typename
TAllocator
>
const
delimiters_values
<
char
>
delimiters
<::
std
::
set
<
T
,
TComp
,
TAllocator
>
,
char
>::
values
=
{
"{"
,
", "
,
"}"
};
template
<
typename
T
,
typename
TComp
,
typename
TAllocator
>
struct
delimiters
<::
std
::
set
<
T
,
TComp
,
TAllocator
>
,
wchar_t
>
{
static
const
delimiters_values
<
wchar_t
>
values
;
};
template
<
typename
T
,
typename
TComp
,
typename
TAllocator
>
const
delimiters_values
<
wchar_t
>
delimiters
<::
std
::
set
<
T
,
TComp
,
TAllocator
>
,
wchar_t
>::
values
=
{
L"{"
,
L", "
,
L"}"
};
template
<
typename
T
,
typename
TComp
,
typename
TAllocator
>
struct
delimiters
<::
std
::
multiset
<
T
,
TComp
,
TAllocator
>
,
char
>
{
static
const
delimiters_values
<
char
>
values
;
};
template
<
typename
T
,
typename
TComp
,
typename
TAllocator
>
const
delimiters_values
<
char
>
delimiters
<::
std
::
multiset
<
T
,
TComp
,
TAllocator
>
,
char
>::
values
=
{
"{"
,
", "
,
"}"
};
template
<
typename
T
,
typename
TComp
,
typename
TAllocator
>
struct
delimiters
<::
std
::
multiset
<
T
,
TComp
,
TAllocator
>
,
wchar_t
>
{
static
const
delimiters_values
<
wchar_t
>
values
;
};
template
<
typename
T
,
typename
TComp
,
typename
TAllocator
>
const
delimiters_values
<
wchar_t
>
delimiters
<::
std
::
multiset
<
T
,
TComp
,
TAllocator
>
,
wchar_t
>::
values
=
{
L"{"
,
L", "
,
L"}"
};
template
<
typename
T
,
typename
THash
,
typename
TEqual
,
typename
TAllocator
>
struct
delimiters
<::
std
::
unordered_set
<
T
,
THash
,
TEqual
,
TAllocator
>
,
char
>
{
static
const
delimiters_values
<
char
>
values
;
};
template
<
typename
T
,
typename
THash
,
typename
TEqual
,
typename
TAllocator
>
const
delimiters_values
<
char
>
delimiters
<
::
std
::
unordered_set
<
T
,
THash
,
TEqual
,
TAllocator
>
,
char
>::
values
=
{
"{"
,
", "
,
"}"
};
template
<
typename
T
,
typename
THash
,
typename
TEqual
,
typename
TAllocator
>
struct
delimiters
<::
std
::
unordered_set
<
T
,
THash
,
TEqual
,
TAllocator
>
,
wchar_t
>
{
static
const
delimiters_values
<
wchar_t
>
values
;
};
template
<
typename
T
,
typename
THash
,
typename
TEqual
,
typename
TAllocator
>
const
delimiters_values
<
wchar_t
>
delimiters
<
::
std
::
unordered_set
<
T
,
THash
,
TEqual
,
TAllocator
>
,
wchar_t
>::
values
=
{
L"{"
,
L", "
,
L"}"
};
template
<
typename
T
,
typename
THash
,
typename
TEqual
,
typename
TAllocator
>
struct
delimiters
<::
std
::
unordered_multiset
<
T
,
THash
,
TEqual
,
TAllocator
>
,
char
>
{
static
const
delimiters_values
<
char
>
values
;
};
template
<
typename
T
,
typename
THash
,
typename
TEqual
,
typename
TAllocator
>
const
delimiters_values
<
char
>
delimiters
<
::
std
::
unordered_multiset
<
T
,
THash
,
TEqual
,
TAllocator
>
,
char
>::
values
=
{
"{"
,
", "
,
"}"
};
template
<
typename
T
,
typename
THash
,
typename
TEqual
,
typename
TAllocator
>
struct
delimiters
<::
std
::
unordered_multiset
<
T
,
THash
,
TEqual
,
TAllocator
>
,
wchar_t
>
{
static
const
delimiters_values
<
wchar_t
>
values
;
};
template
<
typename
T
,
typename
THash
,
typename
TEqual
,
typename
TAllocator
>
const
delimiters_values
<
wchar_t
>
delimiters
<::
std
::
unordered_multiset
<
T
,
THash
,
TEqual
,
TAllocator
>
,
wchar_t
>::
values
=
{
L"{"
,
L", "
,
L"}"
};
// Delimiters for pair and tuple
template
<
typename
T1
,
typename
T2
>
struct
delimiters
<
std
::
pair
<
T1
,
T2
>
,
char
>
{
static
const
delimiters_values
<
char
>
values
;
};
template
<
typename
T1
,
typename
T2
>
const
delimiters_values
<
char
>
delimiters
<
std
::
pair
<
T1
,
T2
>
,
char
>::
values
=
{
"("
,
", "
,
")"
};
template
<
typename
T1
,
typename
T2
>
struct
delimiters
<::
std
::
pair
<
T1
,
T2
>
,
wchar_t
>
{
static
const
delimiters_values
<
wchar_t
>
values
;
};
template
<
typename
T1
,
typename
T2
>
const
delimiters_values
<
wchar_t
>
delimiters
<::
std
::
pair
<
T1
,
T2
>
,
wchar_t
>::
values
=
{
L"("
,
L", "
,
L")"
};
template
<
typename
...
Args
>
struct
delimiters
<
std
::
tuple
<
Args
...
>
,
char
>
{
static
const
delimiters_values
<
char
>
values
;
};
template
<
typename
...
Args
>
const
delimiters_values
<
char
>
delimiters
<
std
::
tuple
<
Args
...
>
,
char
>::
values
=
{
"("
,
", "
,
")"
};
template
<
typename
...
Args
>
struct
delimiters
<::
std
::
tuple
<
Args
...
>
,
wchar_t
>
{
static
const
delimiters_values
<
wchar_t
>
values
;
};
template
<
typename
...
Args
>
const
delimiters_values
<
wchar_t
>
delimiters
<::
std
::
tuple
<
Args
...
>
,
wchar_t
>::
values
=
{
L"("
,
L", "
,
L")"
};
// Type-erasing helper class for easy use of custom delimiters.
// Requires TCharTraits = std::char_traits<TChar> and TChar = char or wchar_t,
// and MyDelims needs to be defined for TChar. Usage: "cout <<
// pretty_print::custom_delims<MyDelims>(x)".
struct
custom_delims_base
{
virtual
~
custom_delims_base
()
{}
virtual
std
::
ostream
&
stream
(
::
std
::
ostream
&
)
=
0
;
virtual
std
::
wostream
&
stream
(
::
std
::
wostream
&
)
=
0
;
};
template
<
typename
T
,
typename
Delims
>
struct
custom_delims_wrapper
:
custom_delims_base
{
custom_delims_wrapper
(
const
T
&
t_
)
:
t
(
t_
)
{}
std
::
ostream
&
stream
(
std
::
ostream
&
s
)
{
return
s
<<
print_container_helper
<
T
,
char
,
std
::
char_traits
<
char
>
,
Delims
>
(
t
);
}
std
::
wostream
&
stream
(
std
::
wostream
&
s
)
{
return
s
<<
print_container_helper
<
T
,
wchar_t
,
std
::
char_traits
<
wchar_t
>
,
Delims
>
(
t
);
}
private:
const
T
&
t
;
};
template
<
typename
Delims
>
struct
custom_delims
{
template
<
typename
Container
>
custom_delims
(
const
Container
&
c
)
:
base
(
new
custom_delims_wrapper
<
Container
,
Delims
>
(
c
))
{}
std
::
unique_ptr
<
custom_delims_base
>
base
;
};
template
<
typename
TChar
,
typename
TCharTraits
,
typename
Delims
>
inline
std
::
basic_ostream
<
TChar
,
TCharTraits
>
&
operator
<<
(
std
::
basic_ostream
<
TChar
,
TCharTraits
>
&
s
,
const
custom_delims
<
Delims
>
&
p
)
{
return
p
.
base
->
stream
(
s
);
}
// A wrapper for a C-style array given as pointer-plus-size.
// Usage: std::cout << pretty_print_array(arr, n) << std::endl;
template
<
typename
T
>
struct
array_wrapper_n
{
typedef
const
T
*
const_iterator
;
typedef
T
value_type
;
array_wrapper_n
(
const
T
*
const
a
,
size_t
n
)
:
_array
(
a
),
_n
(
n
)
{}
inline
const_iterator
begin
()
const
{
return
_array
;
}
inline
const_iterator
end
()
const
{
return
_array
+
_n
;
}
private:
const
T
*
const
_array
;
size_t
_n
;
};
// A wrapper for hash-table based containers that offer local iterators to each
// bucket. Usage: std::cout << bucket_print(m, 4) << std::endl; (Prints bucket
// 5 of container m.)
template
<
typename
T
>
struct
bucket_print_wrapper
{
typedef
typename
T
::
const_local_iterator
const_iterator
;
typedef
typename
T
::
size_type
size_type
;
const_iterator
begin
()
const
{
return
m_map
.
cbegin
(
n
);
}
const_iterator
end
()
const
{
return
m_map
.
cend
(
n
);
}
bucket_print_wrapper
(
const
T
&
m
,
size_type
bucket
)
:
m_map
(
m
),
n
(
bucket
)
{}
private:
const
T
&
m_map
;
const
size_type
n
;
};
}
// namespace pretty_print
// Global accessor functions for the convenience wrappers
template
<
typename
T
>
inline
pretty_print
::
array_wrapper_n
<
T
>
pretty_print_array
(
const
T
*
const
a
,
size_t
n
)
{
return
pretty_print
::
array_wrapper_n
<
T
>
(
a
,
n
);
}
template
<
typename
T
>
pretty_print
::
bucket_print_wrapper
<
T
>
bucket_print
(
const
T
&
m
,
typename
T
::
size_type
n
)
{
return
pretty_print
::
bucket_print_wrapper
<
T
>
(
m
,
n
);
}
// Main magic entry point: An overload snuck into namespace std.
// Can we do better?
namespace
std
{
// Prints a container to the stream using default delimiters
template
<
typename
T
,
typename
TChar
,
typename
TCharTraits
>
inline
typename
enable_if
<::
pretty_print
::
is_container
<
T
>::
value
,
basic_ostream
<
TChar
,
TCharTraits
>
&>::
type
operator
<<
(
basic_ostream
<
TChar
,
TCharTraits
>
&
stream
,
const
T
&
container
)
{
return
stream
<<
::
pretty_print
::
print_container_helper
<
T
,
TChar
,
TCharTraits
>
(
container
);
}
}
// namespace std
#endif // H_PRETTY_PRINT
mmcv/ops/csrc/common/utils/spconv/pybind11_utils.h
deleted
100644 → 0
View file @
6f674c7e
// Copyright 2019 Yan Yan
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <pybind11/embed.h>
#include <pybind11/functional.h>
#include <pybind11/numpy.h>
#include <pybind11/pybind11.h>
#include <pybind11/stl.h>
#include <spconv/tensorview/tensorview.h>
#include <algorithm>
#include <iostream>
namespace
py
=
pybind11
;
template
<
typename
scalar_t
,
typename
TPyObject
>
std
::
vector
<
scalar_t
>
array2Vector
(
TPyObject
arr
)
{
py
::
array
arr_np
=
arr
;
size_t
size
=
arr
.
attr
(
"size"
).
template
cast
<
size_t
>();
py
::
array_t
<
scalar_t
>
arr_cc
=
arr_np
;
std
::
vector
<
scalar_t
>
data
(
arr_cc
.
data
(),
arr_cc
.
data
()
+
size
);
return
data
;
}
template
<
typename
scalar_t
>
std
::
vector
<
scalar_t
>
arrayT2Vector
(
py
::
array_t
<
scalar_t
>
arr
)
{
std
::
vector
<
scalar_t
>
data
(
arr
.
data
(),
arr
.
data
()
+
arr
.
size
());
return
data
;
}
template
<
typename
scalar_t
,
typename
TPyObject
>
tv
::
TensorView
<
scalar_t
>
array2TensorView
(
TPyObject
arr
)
{
py
::
array
arr_np
=
arr
;
py
::
array_t
<
scalar_t
>
arr_cc
=
arr_np
;
tv
::
Shape
shape
;
for
(
int
i
=
0
;
i
<
arr_cc
.
ndim
();
++
i
)
{
shape
.
push_back
(
arr_cc
.
shape
(
i
));
}
return
tv
::
TensorView
<
scalar_t
>
(
arr_cc
.
mutable_data
(),
shape
);
}
template
<
typename
scalar_t
>
tv
::
TensorView
<
scalar_t
>
arrayT2TensorView
(
py
::
array_t
<
scalar_t
>
arr
)
{
tv
::
Shape
shape
;
for
(
int
i
=
0
;
i
<
arr
.
ndim
();
++
i
)
{
shape
.
push_back
(
arr
.
shape
(
i
));
}
return
tv
::
TensorView
<
scalar_t
>
(
arr
.
mutable_data
(),
shape
);
}
mmcv/ops/csrc/common/utils/spconv/spconv/geometry.h
deleted
100644 → 0
View file @
6f674c7e
// Copyright 2019 Yan Yan
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef SPCONV_GEOMETRY_H_
#define SPCONV_GEOMETRY_H_
#include <utils/spconv/tensorview/tensorview.h>
#include <iostream>
#include <limits>
template
<
typename
Index
,
unsigned
NDim
>
TV_HOST_DEVICE
Index
getValidOutPos
(
const
Index
*
input_pos
,
const
Index
*
kernelSize
,
const
Index
*
stride
,
const
Index
*
padding
,
const
Index
*
dilation
,
const
Index
*
outSpatialShape
,
Index
*
out
)
{
Index
lowers
[
NDim
];
Index
uppers
[
NDim
];
Index
counter
[
NDim
];
Index
counterSize
[
NDim
];
Index
pointCounter
=
0
;
Index
val
;
Index
numPoints
=
1
;
Index
m
,
offset
;
bool
valid
=
false
;
#pragma unroll
for
(
unsigned
i
=
0
;
i
<
NDim
;
++
i
)
{
lowers
[
i
]
=
(
input_pos
[
i
]
-
(
kernelSize
[
i
]
-
1
)
*
dilation
[
i
]
-
1
+
stride
[
i
]
+
padding
[
i
])
/
stride
[
i
];
uppers
[
i
]
=
(
input_pos
[
i
]
+
padding
[
i
])
/
stride
[
i
];
}
#pragma unroll
for
(
unsigned
i
=
0
;
i
<
NDim
;
++
i
)
{
counterSize
[
i
]
=
((
uppers
[
i
]
-
lowers
[
i
])
/
dilation
[
i
]
+
1
);
numPoints
*=
counterSize
[
i
];
}
#pragma unroll
for
(
unsigned
i
=
0
;
i
<
NDim
;
++
i
)
{
counter
[
i
]
=
0
;
}
for
(
int
i
=
0
;
i
<
numPoints
;
++
i
)
{
valid
=
true
;
m
=
1
;
offset
=
0
;
#pragma unroll
for
(
int
j
=
NDim
-
1
;
j
>=
0
;
--
j
)
{
val
=
uppers
[
j
]
-
counter
[
j
]
*
dilation
[
j
];
out
[
pointCounter
*
(
NDim
+
1
)
+
j
]
=
val
;
if
(
val
<
0
||
(
val
>
outSpatialShape
[
j
]
-
1
))
{
valid
=
false
;
// break;
}
offset
+=
m
*
(
input_pos
[
j
]
-
val
*
stride
[
j
]
+
padding
[
j
])
/
dilation
[
j
];
m
*=
kernelSize
[
j
];
}
out
[
pointCounter
*
(
NDim
+
1
)
+
NDim
]
=
offset
;
if
(
valid
)
++
pointCounter
;
counter
[
NDim
-
1
]
+=
1
;
#pragma unroll
for
(
int
c
=
NDim
-
1
;
c
>=
0
;
--
c
)
{
if
(
counter
[
c
]
==
counterSize
[
c
]
&&
c
>
0
)
{
counter
[
c
-
1
]
+=
1
;
counter
[
c
]
=
0
;
}
}
}
return
pointCounter
;
}
template
<
typename
Index
,
unsigned
NDim
>
TV_HOST_DEVICE
Index
getValidOutPosTranspose
(
const
Index
*
input_pos
,
const
Index
*
kernelSize
,
const
Index
*
stride
,
const
Index
*
padding
,
const
Index
*
dilation
,
const
Index
*
outSpatialShape
,
Index
*
out
)
{
Index
lowers
[
NDim
];
Index
uppers
[
NDim
];
Index
counter
[
NDim
];
Index
counterSize
[
NDim
];
Index
pointCounter
=
0
;
Index
val
;
Index
numPoints
=
1
;
Index
m
,
offset
;
bool
valid
=
false
;
#pragma unroll
for
(
unsigned
i
=
0
;
i
<
NDim
;
++
i
)
{
lowers
[
i
]
=
input_pos
[
i
]
*
stride
[
i
]
-
padding
[
i
];
uppers
[
i
]
=
lowers
[
i
]
+
(
kernelSize
[
i
]
-
1
)
*
dilation
[
i
];
}
#pragma unroll
for
(
unsigned
i
=
0
;
i
<
NDim
;
++
i
)
{
counterSize
[
i
]
=
((
uppers
[
i
]
-
lowers
[
i
])
/
dilation
[
i
]
+
1
);
numPoints
*=
counterSize
[
i
];
}
#pragma unroll
for
(
unsigned
i
=
0
;
i
<
NDim
;
++
i
)
{
counter
[
i
]
=
0
;
}
for
(
int
i
=
0
;
i
<
numPoints
;
++
i
)
{
valid
=
true
;
m
=
1
;
offset
=
0
;
#pragma unroll
for
(
int
j
=
NDim
-
1
;
j
>=
0
;
--
j
)
{
val
=
uppers
[
j
]
-
counter
[
j
]
*
dilation
[
j
];
out
[
pointCounter
*
(
NDim
+
1
)
+
j
]
=
val
;
if
(
val
<
0
||
(
val
>
outSpatialShape
[
j
]
-
1
))
{
valid
=
false
;
}
offset
+=
m
*
(
val
-
lowers
[
j
])
/
dilation
[
j
];
m
*=
kernelSize
[
j
];
}
out
[
pointCounter
*
(
NDim
+
1
)
+
NDim
]
=
offset
;
if
(
valid
)
++
pointCounter
;
counter
[
NDim
-
1
]
+=
1
;
#pragma unroll
for
(
int
c
=
NDim
-
1
;
c
>=
0
;
--
c
)
{
if
(
counter
[
c
]
==
counterSize
[
c
]
&&
c
>
0
)
{
counter
[
c
-
1
]
+=
1
;
counter
[
c
]
=
0
;
}
}
}
return
pointCounter
;
}
template
<
typename
Index
,
typename
IndexGrid
,
unsigned
NDim
>
Index
getIndicePairsConv
(
tv
::
TensorView
<
const
Index
>
indicesIn
,
tv
::
TensorView
<
Index
>
indicesOut
,
tv
::
TensorView
<
IndexGrid
>
gridsOut
,
tv
::
TensorView
<
Index
>
indicePairs
,
tv
::
TensorView
<
Index
>
indiceNum
,
const
Index
*
kernelSize
,
const
Index
*
stride
,
const
Index
*
padding
,
const
Index
*
dilation
,
const
Index
*
outSpatialShape
)
{
// indicesOut: num_active * kernelVolume * (NDim + 1)
Index
numAct
=
0
;
auto
numActIn
=
indicesIn
.
dim
(
0
);
Index
batchIdx
=
0
;
Index
spatialVolume
=
1
;
#pragma unroll
for
(
int
i
=
0
;
i
<
NDim
;
++
i
)
{
spatialVolume
*=
outSpatialShape
[
i
];
}
Index
kernelVolume
=
1
;
#pragma unroll
for
(
int
i
=
0
;
i
<
NDim
;
++
i
)
{
kernelVolume
*=
kernelSize
[
i
];
}
Index
numValidPoints
=
0
;
std
::
vector
<
Index
>
validPoints_
(
kernelVolume
*
(
NDim
+
1
));
Index
*
validPoints
=
validPoints_
.
data
();
Index
*
pointPtr
=
nullptr
;
for
(
int
j
=
0
;
j
<
numActIn
;
++
j
)
{
batchIdx
=
indicesIn
(
j
,
0
);
numValidPoints
=
getValidOutPos
<
Index
,
NDim
>
(
indicesIn
.
data
()
+
j
*
(
NDim
+
1
)
+
1
,
kernelSize
,
stride
,
padding
,
dilation
,
outSpatialShape
,
validPoints
);
for
(
Index
i
=
0
;
i
<
numValidPoints
;
++
i
)
{
pointPtr
=
validPoints
+
i
*
(
NDim
+
1
);
auto
offset
=
pointPtr
[
NDim
];
auto
index
=
tv
::
rowArrayIdx
<
Index
,
NDim
>
(
pointPtr
,
outSpatialShape
)
+
spatialVolume
*
batchIdx
;
if
(
gridsOut
[
index
]
==
-
1
)
{
for
(
unsigned
k
=
1
;
k
<
NDim
+
1
;
++
k
)
{
indicesOut
(
numAct
,
k
)
=
pointPtr
[
k
-
1
];
}
indicesOut
(
numAct
,
0
)
=
batchIdx
;
gridsOut
[
index
]
=
numAct
++
;
}
// indicePairs: [K, 2, L]
indicePairs
(
offset
,
0
,
indiceNum
[
offset
])
=
j
;
indicePairs
(
offset
,
1
,
indiceNum
[
offset
]
++
)
=
gridsOut
[
index
];
}
}
return
numAct
;
}
template
<
typename
Index
,
typename
IndexGrid
,
unsigned
NDim
>
Index
getIndicePairsDeConv
(
tv
::
TensorView
<
const
Index
>
indicesIn
,
tv
::
TensorView
<
Index
>
indicesOut
,
tv
::
TensorView
<
IndexGrid
>
gridsOut
,
tv
::
TensorView
<
Index
>
indicePairs
,
tv
::
TensorView
<
Index
>
indiceNum
,
const
Index
*
kernelSize
,
const
Index
*
stride
,
const
Index
*
padding
,
const
Index
*
dilation
,
const
Index
*
outSpatialShape
)
{
Index
numAct
=
0
;
auto
numActIn
=
indicesIn
.
dim
(
0
);
Index
batchIdx
=
0
;
Index
spatialVolume
=
1
;
#pragma unroll
for
(
int
i
=
0
;
i
<
NDim
;
++
i
)
{
spatialVolume
*=
outSpatialShape
[
i
];
}
Index
kernelVolume
=
1
;
#pragma unroll
for
(
int
i
=
0
;
i
<
NDim
;
++
i
)
{
kernelVolume
*=
kernelSize
[
i
];
}
Index
numValidPoints
=
0
;
std
::
vector
<
Index
>
validPoints_
(
kernelVolume
*
(
NDim
+
1
));
Index
*
validPoints
=
validPoints_
.
data
();
Index
*
pointPtr
=
nullptr
;
for
(
int
j
=
0
;
j
<
numActIn
;
++
j
)
{
batchIdx
=
indicesIn
(
j
,
0
);
numValidPoints
=
getValidOutPosTranspose
<
Index
,
NDim
>
(
indicesIn
.
data
()
+
j
*
(
NDim
+
1
)
+
1
,
kernelSize
,
stride
,
padding
,
dilation
,
outSpatialShape
,
validPoints
);
for
(
Index
i
=
0
;
i
<
numValidPoints
;
++
i
)
{
pointPtr
=
validPoints
+
i
*
(
NDim
+
1
);
auto
offset
=
pointPtr
[
NDim
];
auto
index
=
tv
::
rowArrayIdx
<
Index
,
NDim
>
(
pointPtr
,
outSpatialShape
)
+
spatialVolume
*
batchIdx
;
if
(
gridsOut
[
index
]
==
-
1
)
{
for
(
unsigned
k
=
1
;
k
<
NDim
+
1
;
++
k
)
{
indicesOut
(
numAct
,
k
)
=
pointPtr
[
k
-
1
];
}
indicesOut
(
numAct
,
0
)
=
batchIdx
;
gridsOut
[
index
]
=
numAct
++
;
}
// indicePairs: [K, 2, L]
indicePairs
(
offset
,
0
,
indiceNum
[
offset
])
=
j
;
indicePairs
(
offset
,
1
,
indiceNum
[
offset
]
++
)
=
gridsOut
[
index
];
}
}
return
numAct
;
}
template
<
typename
Index
,
typename
IndexGrid
,
unsigned
NDim
>
Index
getIndicePairsSubM
(
tv
::
TensorView
<
const
Index
>
indicesIn
,
tv
::
TensorView
<
IndexGrid
>
gridsOut
,
tv
::
TensorView
<
Index
>
indicePairs
,
tv
::
TensorView
<
Index
>
indiceNum
,
const
Index
*
const
kernelSize
,
const
Index
*
const
stride
,
const
Index
*
const
padding
,
const
Index
*
dilation
,
const
Index
*
const
outSpatialShape
)
{
auto
numActIn
=
indicesIn
.
dim
(
0
);
Index
spatialVolume
=
1
;
#pragma unroll
for
(
int
i
=
0
;
i
<
NDim
;
++
i
)
{
spatialVolume
*=
outSpatialShape
[
i
];
}
Index
kernelVolume
=
1
;
#pragma unroll
for
(
int
i
=
0
;
i
<
NDim
;
++
i
)
{
kernelVolume
*=
kernelSize
[
i
];
}
Index
numValidPoints
=
0
;
// Index validPoints[kernelVolume * (NDim + 1)];
std
::
vector
<
Index
>
validPoints_
(
kernelVolume
*
(
NDim
+
1
));
Index
*
validPoints
=
validPoints_
.
data
();
Index
*
pointPtr
=
nullptr
;
Index
index
=
0
;
for
(
int
j
=
0
;
j
<
numActIn
;
++
j
)
{
index
=
tv
::
rowArrayIdx
<
Index
,
NDim
>
(
indicesIn
.
data
()
+
j
*
(
NDim
+
1
)
+
1
,
outSpatialShape
)
+
spatialVolume
*
indicesIn
(
j
,
0
);
gridsOut
[
index
]
=
j
;
}
for
(
int
j
=
0
;
j
<
numActIn
;
++
j
)
{
numValidPoints
=
getValidOutPos
<
Index
,
NDim
>
(
indicesIn
.
data
()
+
j
*
(
NDim
+
1
)
+
1
,
kernelSize
,
stride
,
padding
,
dilation
,
outSpatialShape
,
validPoints
);
for
(
Index
i
=
0
;
i
<
numValidPoints
;
++
i
)
{
pointPtr
=
validPoints
+
i
*
(
NDim
+
1
);
auto
offset
=
pointPtr
[
NDim
];
index
=
tv
::
rowArrayIdx
<
Index
,
NDim
>
(
pointPtr
,
outSpatialShape
)
+
spatialVolume
*
indicesIn
(
j
,
0
);
if
(
gridsOut
[
index
]
>
-
1
)
{
indicePairs
(
offset
,
0
,
indiceNum
[
offset
])
=
j
;
indicePairs
(
offset
,
1
,
indiceNum
[
offset
]
++
)
=
gridsOut
[
index
];
}
}
}
return
numActIn
;
}
#endif
mmcv/ops/csrc/common/utils/spconv/spconv/indice.h
deleted
100644 → 0
View file @
6f674c7e
// Copyright 2019 Yan Yan
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef SPARSE_CONV_INDICE_FUNCTOR_H_
#define SPARSE_CONV_INDICE_FUNCTOR_H_
#include <utils/spconv/tensorview/tensorview.h>
namespace
functor
{
template
<
typename
Device
,
typename
Index
,
typename
IndexGrid
,
unsigned
NDim
>
struct
CreateConvIndicePairFunctorP1
{
Index
operator
()(
const
Device
&
d
,
tv
::
TensorView
<
const
Index
>
indicesIn
,
tv
::
TensorView
<
Index
>
indicesOut
,
tv
::
TensorView
<
IndexGrid
>
gridsOut
,
tv
::
TensorView
<
Index
>
indicePairs
,
tv
::
TensorView
<
Index
>
indiceNum
,
tv
::
TensorView
<
Index
>
indicePairUnique
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
kernelSize
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
stride
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
padding
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
dilation
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
outSpatialShape
,
bool
transpose
);
};
template
<
typename
Device
,
typename
Index
,
typename
IndexGrid
,
unsigned
NDim
>
struct
CreateConvIndicePairFunctorP2
{
Index
operator
()(
const
Device
&
d
,
tv
::
TensorView
<
const
Index
>
indicesIn
,
tv
::
TensorView
<
Index
>
indicesOut
,
tv
::
TensorView
<
IndexGrid
>
gridsOut
,
tv
::
TensorView
<
Index
>
indicePairs
,
tv
::
TensorView
<
Index
>
indiceNum
,
tv
::
TensorView
<
Index
>
indicePairUnique
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
outSpatialShape
,
bool
transpose
,
bool
resetGrid
=
false
);
};
template
<
typename
Device
,
typename
Index
,
typename
IndexGrid
,
unsigned
NDim
>
struct
CreateConvIndicePairFunctor
{
Index
operator
()(
const
Device
&
d
,
tv
::
TensorView
<
const
Index
>
indicesIn
,
tv
::
TensorView
<
Index
>
indicesOut
,
tv
::
TensorView
<
IndexGrid
>
gridsOut
,
tv
::
TensorView
<
Index
>
indicePairs
,
tv
::
TensorView
<
Index
>
indiceNum
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
kernelSize
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
stride
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
padding
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
dilation
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
outSpatialShape
,
bool
transpose
,
bool
resetGrid
=
false
);
};
template
<
typename
Device
,
typename
Index
,
typename
IndexGrid
,
unsigned
NDim
>
struct
CreateSubMIndicePairFunctor
{
Index
operator
()(
const
Device
&
d
,
tv
::
TensorView
<
const
Index
>
indicesIn
,
tv
::
TensorView
<
IndexGrid
>
gridsOut
,
tv
::
TensorView
<
Index
>
indicePairs
,
tv
::
TensorView
<
Index
>
indiceNum
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
kernelSize
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
stride
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
padding
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
dilation
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
outSpatialShape
,
bool
transpose
,
bool
resetGrid
=
false
);
};
}
// namespace functor
#endif
mmcv/ops/csrc/common/utils/spconv/spconv/maxpool.h
deleted
100644 → 0
View file @
6f674c7e
// Copyright 2019 Yan Yan
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef SPARSE_MAXPOOL_FUNCTOR_H_
#define SPARSE_MAXPOOL_FUNCTOR_H_
#include <utils/spconv/tensorview/tensorview.h>
namespace
functor
{
template
<
typename
Device
,
typename
scalar_t
,
typename
Index
>
struct
SparseMaxPoolForwardFunctor
{
void
operator
()(
const
Device
&
d
,
tv
::
TensorView
<
scalar_t
>
outFeatures
,
tv
::
TensorView
<
const
scalar_t
>
inFeatures
,
tv
::
TensorView
<
const
Index
>
indices
,
int
size
);
};
template
<
typename
Device
,
typename
scalar_t
,
typename
Index
>
struct
SparseMaxPoolBackwardFunctor
{
void
operator
()(
const
Device
&
d
,
tv
::
TensorView
<
const
scalar_t
>
outFeatures
,
tv
::
TensorView
<
const
scalar_t
>
inFeatures
,
tv
::
TensorView
<
const
scalar_t
>
fout
,
tv
::
TensorView
<
scalar_t
>
fin
,
tv
::
TensorView
<
const
Index
>
indices
,
int
size
);
};
}
// namespace functor
#endif
mmcv/ops/csrc/common/utils/spconv/spconv/mp_helper.h
deleted
100644 → 0
View file @
6f674c7e
#ifndef MP_HELPER_H_
#define MP_HELPER_H_
#include <type_traits>
#include <utility>
template
<
class
...
T
>
struct
mp_list
{};
template
<
class
T
,
T
...
I
>
using
mp_list_c
=
mp_list
<
std
::
integral_constant
<
T
,
I
>
...
>
;
namespace
detail
{
template
<
class
...
T
,
class
F
>
constexpr
F
mp_for_each_impl
(
mp_list
<
T
...
>
,
F
&&
f
)
{
return
std
::
initializer_list
<
int
>
{(
f
(
T
()),
0
)...},
std
::
forward
<
F
>
(
f
);
}
template
<
class
F
>
constexpr
F
mp_for_each_impl
(
mp_list
<>
,
F
&&
f
)
{
return
std
::
forward
<
F
>
(
f
);
}
}
// namespace detail
namespace
detail
{
template
<
class
A
,
template
<
class
...
>
class
B
>
struct
mp_rename_impl
{
// An error "no type named 'type'" here means that the first argument to
// mp_rename is not a list
};
template
<
template
<
class
...
>
class
A
,
class
...
T
,
template
<
class
...
>
class
B
>
struct
mp_rename_impl
<
A
<
T
...
>
,
B
>
{
using
type
=
B
<
T
...
>
;
};
}
// namespace detail
template
<
class
A
,
template
<
class
...
>
class
B
>
using
mp_rename
=
typename
::
detail
::
mp_rename_impl
<
A
,
B
>::
type
;
template
<
class
L
,
class
F
>
constexpr
F
mp_for_each
(
F
&&
f
)
{
return
::
detail
::
mp_for_each_impl
(
mp_rename
<
L
,
mp_list
>
(),
std
::
forward
<
F
>
(
f
));
}
#endif
mmcv/ops/csrc/common/utils/spconv/spconv/point2voxel.h
deleted
100644 → 0
View file @
6f674c7e
// Copyright 2019 Yan Yan
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <math.h>
#include <pybind11/numpy.h>
#include <pybind11/pybind11.h>
#include <pybind11/stl.h>
#include <algorithm>
#include <iostream>
namespace
py
=
pybind11
;
using
namespace
pybind11
::
literals
;
template
<
typename
DType
,
int
NDim
>
int
points_to_voxel_3d_np
(
py
::
array_t
<
DType
>
points
,
py
::
array_t
<
DType
>
voxels
,
py
::
array_t
<
int
>
coors
,
py
::
array_t
<
int
>
num_points_per_voxel
,
py
::
array_t
<
int
>
coor_to_voxelidx
,
std
::
vector
<
DType
>
voxel_size
,
std
::
vector
<
DType
>
coors_range
,
int
max_points
,
int
max_voxels
)
{
auto
points_rw
=
points
.
template
mutable_unchecked
<
2
>();
auto
voxels_rw
=
voxels
.
template
mutable_unchecked
<
3
>();
auto
coors_rw
=
coors
.
mutable_unchecked
<
2
>
();
auto
num_points_per_voxel_rw
=
num_points_per_voxel
.
mutable_unchecked
<
1
>
();
auto
coor_to_voxelidx_rw
=
coor_to_voxelidx
.
mutable_unchecked
<
NDim
>
();
auto
N
=
points_rw
.
shape
(
0
);
auto
num_features
=
points_rw
.
shape
(
1
);
constexpr
int
ndim_minus_1
=
NDim
-
1
;
int
voxel_num
=
0
;
bool
failed
=
false
;
int
coor
[
NDim
];
int
c
;
int
grid_size
[
NDim
];
for
(
int
i
=
0
;
i
<
NDim
;
++
i
)
{
grid_size
[
i
]
=
round
((
coors_range
[
NDim
+
i
]
-
coors_range
[
i
])
/
voxel_size
[
i
]);
}
int
voxelidx
,
num
;
for
(
int
i
=
0
;
i
<
N
;
++
i
)
{
failed
=
false
;
for
(
int
j
=
0
;
j
<
NDim
;
++
j
)
{
c
=
floor
((
points_rw
(
i
,
j
)
-
coors_range
[
j
])
/
voxel_size
[
j
]);
if
((
c
<
0
||
c
>=
grid_size
[
j
]))
{
failed
=
true
;
break
;
}
coor
[
ndim_minus_1
-
j
]
=
c
;
}
if
(
failed
)
continue
;
voxelidx
=
coor_to_voxelidx_rw
(
coor
[
0
],
coor
[
1
],
coor
[
2
]);
if
(
voxelidx
==
-
1
)
{
voxelidx
=
voxel_num
;
if
(
voxel_num
>=
max_voxels
)
continue
;
voxel_num
+=
1
;
coor_to_voxelidx_rw
(
coor
[
0
],
coor
[
1
],
coor
[
2
])
=
voxelidx
;
for
(
int
k
=
0
;
k
<
NDim
;
++
k
)
{
coors_rw
(
voxelidx
,
k
)
=
coor
[
k
];
}
}
num
=
num_points_per_voxel_rw
(
voxelidx
);
if
(
num
<
max_points
)
{
for
(
int
k
=
0
;
k
<
num_features
;
++
k
)
{
voxels_rw
(
voxelidx
,
num
,
k
)
=
points_rw
(
i
,
k
);
}
num_points_per_voxel_rw
(
voxelidx
)
+=
1
;
}
}
for
(
int
i
=
0
;
i
<
voxel_num
;
++
i
)
{
coor_to_voxelidx_rw
(
coors_rw
(
i
,
0
),
coors_rw
(
i
,
1
),
coors_rw
(
i
,
2
))
=
-
1
;
}
return
voxel_num
;
}
template
<
typename
DType
,
int
NDim
>
int
points_to_voxel_3d_np_mean
(
py
::
array_t
<
DType
>
points
,
py
::
array_t
<
DType
>
voxels
,
py
::
array_t
<
DType
>
means
,
py
::
array_t
<
int
>
coors
,
py
::
array_t
<
int
>
num_points_per_voxel
,
py
::
array_t
<
int
>
coor_to_voxelidx
,
std
::
vector
<
DType
>
voxel_size
,
std
::
vector
<
DType
>
coors_range
,
int
max_points
,
int
max_voxels
)
{
auto
points_rw
=
points
.
template
mutable_unchecked
<
2
>();
auto
means_rw
=
means
.
template
mutable_unchecked
<
2
>();
auto
voxels_rw
=
voxels
.
template
mutable_unchecked
<
3
>();
auto
coors_rw
=
coors
.
mutable_unchecked
<
2
>
();
auto
num_points_per_voxel_rw
=
num_points_per_voxel
.
mutable_unchecked
<
1
>
();
auto
coor_to_voxelidx_rw
=
coor_to_voxelidx
.
mutable_unchecked
<
NDim
>
();
auto
N
=
points_rw
.
shape
(
0
);
auto
num_features
=
points_rw
.
shape
(
1
);
constexpr
int
ndim_minus_1
=
NDim
-
1
;
int
voxel_num
=
0
;
bool
failed
=
false
;
int
coor
[
NDim
];
int
c
;
int
grid_size
[
NDim
];
for
(
int
i
=
0
;
i
<
NDim
;
++
i
)
{
grid_size
[
i
]
=
round
((
coors_range
[
NDim
+
i
]
-
coors_range
[
i
])
/
voxel_size
[
i
]);
}
int
voxelidx
,
num
;
for
(
int
i
=
0
;
i
<
N
;
++
i
)
{
failed
=
false
;
for
(
int
j
=
0
;
j
<
NDim
;
++
j
)
{
c
=
floor
((
points_rw
(
i
,
j
)
-
coors_range
[
j
])
/
voxel_size
[
j
]);
if
((
c
<
0
||
c
>=
grid_size
[
j
]))
{
failed
=
true
;
break
;
}
coor
[
ndim_minus_1
-
j
]
=
c
;
}
if
(
failed
)
continue
;
voxelidx
=
coor_to_voxelidx_rw
(
coor
[
0
],
coor
[
1
],
coor
[
2
]);
if
(
voxelidx
==
-
1
)
{
voxelidx
=
voxel_num
;
if
(
voxel_num
>=
max_voxels
)
continue
;
voxel_num
+=
1
;
coor_to_voxelidx_rw
(
coor
[
0
],
coor
[
1
],
coor
[
2
])
=
voxelidx
;
for
(
int
k
=
0
;
k
<
NDim
;
++
k
)
{
coors_rw
(
voxelidx
,
k
)
=
coor
[
k
];
}
}
num
=
num_points_per_voxel_rw
(
voxelidx
);
if
(
num
<
max_points
)
{
for
(
int
k
=
0
;
k
<
num_features
;
++
k
)
{
voxels_rw
(
voxelidx
,
num
,
k
)
=
points_rw
(
i
,
k
);
}
num_points_per_voxel_rw
(
voxelidx
)
+=
1
;
for
(
int
k
=
0
;
k
<
num_features
;
++
k
)
{
means_rw
(
voxelidx
,
k
)
+=
(
points_rw
(
i
,
k
)
-
means_rw
(
voxelidx
,
k
))
/
DType
(
num
+
1
);
}
}
}
for
(
int
i
=
0
;
i
<
voxel_num
;
++
i
)
{
coor_to_voxelidx_rw
(
coors_rw
(
i
,
0
),
coors_rw
(
i
,
1
),
coors_rw
(
i
,
2
))
=
-
1
;
num
=
num_points_per_voxel_rw
(
i
);
for
(
int
j
=
num
;
j
<
max_points
;
++
j
)
{
for
(
int
k
=
0
;
k
<
num_features
;
++
k
)
{
voxels_rw
(
i
,
j
,
k
)
=
means_rw
(
i
,
k
);
}
}
}
return
voxel_num
;
}
template
<
typename
DType
,
int
NDim
>
int
points_to_voxel_3d_np_height
(
py
::
array_t
<
DType
>
points
,
py
::
array_t
<
DType
>
voxels
,
py
::
array_t
<
DType
>
height
,
py
::
array_t
<
DType
>
maxs
,
py
::
array_t
<
int
>
coors
,
py
::
array_t
<
int
>
num_points_per_voxel
,
py
::
array_t
<
int
>
coor_to_voxelidx
,
std
::
vector
<
DType
>
voxel_size
,
std
::
vector
<
DType
>
coors_range
,
int
max_points
,
int
max_voxels
)
{
auto
points_rw
=
points
.
template
mutable_unchecked
<
2
>();
auto
height_rw
=
height
.
template
mutable_unchecked
<
2
>();
auto
maxs_rw
=
maxs
.
template
mutable_unchecked
<
2
>();
auto
voxels_rw
=
voxels
.
template
mutable_unchecked
<
3
>();
auto
coors_rw
=
coors
.
mutable_unchecked
<
2
>
();
auto
num_points_per_voxel_rw
=
num_points_per_voxel
.
mutable_unchecked
<
1
>
();
auto
coor_to_voxelidx_rw
=
coor_to_voxelidx
.
mutable_unchecked
<
NDim
>
();
auto
N
=
points_rw
.
shape
(
0
);
auto
num_features
=
points_rw
.
shape
(
1
);
constexpr
int
ndim_minus_1
=
NDim
-
1
;
int
voxel_num
=
0
;
bool
failed
=
false
;
int
coor
[
NDim
];
int
c
;
int
grid_size
[
NDim
];
for
(
int
i
=
0
;
i
<
NDim
;
++
i
)
{
grid_size
[
i
]
=
round
((
coors_range
[
NDim
+
i
]
-
coors_range
[
i
])
/
voxel_size
[
i
]);
}
int
voxelidx
,
num
;
for
(
int
i
=
0
;
i
<
N
;
++
i
)
{
failed
=
false
;
for
(
int
j
=
0
;
j
<
NDim
;
++
j
)
{
c
=
floor
((
points_rw
(
i
,
j
)
-
coors_range
[
j
])
/
voxel_size
[
j
]);
if
((
c
<
0
||
c
>=
grid_size
[
j
]))
{
failed
=
true
;
break
;
}
coor
[
ndim_minus_1
-
j
]
=
c
;
}
if
(
failed
)
continue
;
voxelidx
=
coor_to_voxelidx_rw
(
coor
[
0
],
coor
[
1
],
coor
[
2
]);
if
(
voxelidx
==
-
1
)
{
voxelidx
=
voxel_num
;
if
(
voxel_num
>=
max_voxels
)
continue
;
voxel_num
+=
1
;
coor_to_voxelidx_rw
(
coor
[
0
],
coor
[
1
],
coor
[
2
])
=
voxelidx
;
for
(
int
k
=
0
;
k
<
NDim
;
++
k
)
{
coors_rw
(
voxelidx
,
k
)
=
coor
[
k
];
}
}
num
=
num_points_per_voxel_rw
(
voxelidx
);
if
(
num
<
max_points
)
{
for
(
int
k
=
0
;
k
<
num_features
;
++
k
)
{
voxels_rw
(
voxelidx
,
num
,
k
)
=
points_rw
(
i
,
k
);
height_rw
(
voxelidx
,
k
)
=
std
::
min
(
points_rw
(
i
,
k
),
height_rw
(
voxelidx
,
k
));
maxs_rw
(
voxelidx
,
k
)
=
std
::
max
(
points_rw
(
i
,
k
),
maxs_rw
(
voxelidx
,
k
));
}
num_points_per_voxel_rw
(
voxelidx
)
+=
1
;
}
}
for
(
int
i
=
0
;
i
<
voxel_num
;
++
i
)
{
coor_to_voxelidx_rw
(
coors_rw
(
i
,
0
),
coors_rw
(
i
,
1
),
coors_rw
(
i
,
2
))
=
-
1
;
for
(
int
k
=
0
;
k
<
num_features
;
++
k
)
{
height_rw
(
i
,
k
)
=
maxs_rw
(
i
,
k
)
-
height_rw
(
i
,
k
);
}
}
return
voxel_num
;
}
template
<
typename
DType
,
int
NDim
>
int
block_filtering
(
py
::
array_t
<
DType
>
points
,
py
::
array_t
<
int
>
mask
,
py
::
array_t
<
DType
>
height
,
py
::
array_t
<
DType
>
maxs
,
py
::
array_t
<
int
>
coor_to_voxelidx
,
std
::
vector
<
DType
>
voxel_size
,
std
::
vector
<
DType
>
coors_range
,
int
max_voxels
,
DType
eps
)
{
auto
points_rw
=
points
.
template
mutable_unchecked
<
2
>();
auto
height_rw
=
height
.
template
mutable_unchecked
<
1
>();
auto
maxs_rw
=
maxs
.
template
mutable_unchecked
<
1
>();
auto
coor_to_voxelidx_rw
=
coor_to_voxelidx
.
mutable_unchecked
<
NDim
>
();
auto
N
=
points_rw
.
shape
(
0
);
auto
num_features
=
points_rw
.
shape
(
1
);
constexpr
int
ndim_minus_1
=
NDim
-
1
;
int
voxel_num
=
0
;
bool
failed
=
false
;
int
coor
[
NDim
];
int
c
;
int
grid_size
[
NDim
];
for
(
int
i
=
0
;
i
<
NDim
;
++
i
)
{
grid_size
[
i
]
=
round
((
coors_range
[
NDim
+
i
]
-
coors_range
[
i
])
/
voxel_size
[
i
]);
}
int
voxelidx
,
num
;
for
(
int
i
=
0
;
i
<
N
;
++
i
)
{
failed
=
false
;
for
(
int
j
=
0
;
j
<
NDim
;
++
j
)
{
c
=
floor
((
points_rw
(
i
,
j
)
-
coors_range
[
j
])
/
voxel_size
[
j
]);
if
((
c
<
0
||
c
>=
grid_size
[
j
]))
{
failed
=
true
;
break
;
}
coor
[
ndim_minus_1
-
j
]
=
c
;
}
if
(
failed
)
continue
;
voxelidx
=
coor_to_voxelidx_rw
(
coor
[
0
],
coor
[
1
],
coor
[
2
]);
if
(
voxelidx
==
-
1
)
{
voxelidx
=
voxel_num
;
voxel_num
+=
1
;
coor_to_voxelidx_rw
(
coor
[
0
],
coor
[
1
],
coor
[
2
])
=
voxelidx
;
}
height_rw
(
voxelidx
)
=
std
::
min
(
points_rw
(
i
,
2
),
height_rw
(
voxelidx
));
maxs_rw
(
voxelidx
)
=
std
::
max
(
points_rw
(
i
,
2
),
maxs_rw
(
voxelidx
));
}
for
(
int
i
=
0
;
i
<
N
;
++
i
)
{
failed
=
false
;
for
(
int
j
=
0
;
j
<
NDim
;
++
j
)
{
c
=
floor
((
points_rw
(
i
,
j
)
-
coors_range
[
j
])
/
voxel_size
[
j
]);
if
((
c
<
0
||
c
>=
grid_size
[
j
]))
{
failed
=
true
;
break
;
}
coor
[
ndim_minus_1
-
j
]
=
c
;
}
if
(
failed
)
continue
;
voxelidx
=
coor_to_voxelidx_rw
(
coor
[
0
],
coor
[
1
],
coor
[
2
]);
if
((
maxs_rw
(
voxelidx
)
-
height_rw
(
voxelidx
,
2
))
<
eps
)
{
mask
(
i
)
=
0
;
}
}
}
template
<
typename
DType
,
int
NDim
>
int
points_to_voxel_3d_with_filtering
(
py
::
array_t
<
DType
>
points
,
py
::
array_t
<
DType
>
voxels
,
py
::
array_t
<
int
>
voxel_mask
,
py
::
array_t
<
DType
>
mins
,
py
::
array_t
<
DType
>
maxs
,
py
::
array_t
<
int
>
coors
,
py
::
array_t
<
int
>
num_points_per_voxel
,
py
::
array_t
<
int
>
coor_to_voxelidx
,
std
::
vector
<
DType
>
voxel_size
,
std
::
vector
<
DType
>
coors_range
,
int
max_points
,
int
max_voxels
,
int
block_factor
,
int
block_size
,
DType
height_threshold
)
{
auto
points_rw
=
points
.
template
mutable_unchecked
<
2
>();
auto
mins_rw
=
mins
.
template
mutable_unchecked
<
2
>();
auto
maxs_rw
=
maxs
.
template
mutable_unchecked
<
2
>();
auto
voxels_rw
=
voxels
.
template
mutable_unchecked
<
3
>();
auto
voxel_mask_rw
=
voxel_mask
.
template
mutable_unchecked
<
1
>();
auto
coors_rw
=
coors
.
mutable_unchecked
<
2
>
();
auto
num_points_per_voxel_rw
=
num_points_per_voxel
.
mutable_unchecked
<
1
>
();
auto
coor_to_voxelidx_rw
=
coor_to_voxelidx
.
mutable_unchecked
<
NDim
>
();
auto
N
=
points_rw
.
shape
(
0
);
auto
num_features
=
points_rw
.
shape
(
1
);
constexpr
int
ndim_minus_1
=
NDim
-
1
;
int
voxel_num
=
0
;
bool
failed
=
false
;
int
coor
[
NDim
];
int
c
;
int
grid_size
[
NDim
];
DType
max_value
,
min_value
;
for
(
int
i
=
0
;
i
<
NDim
;
++
i
)
{
grid_size
[
i
]
=
round
((
coors_range
[
NDim
+
i
]
-
coors_range
[
i
])
/
voxel_size
[
i
]);
}
int
block_shape_H
=
grid_size
[
1
]
/
block_factor
;
int
block_shape_W
=
grid_size
[
0
]
/
block_factor
;
int
voxelidx
,
num
;
int
block_coor
[
2
];
int
startx
,
stopx
,
starty
,
stopy
;
for
(
int
i
=
0
;
i
<
N
;
++
i
)
{
failed
=
false
;
for
(
int
j
=
0
;
j
<
NDim
;
++
j
)
{
c
=
floor
((
points_rw
(
i
,
j
)
-
coors_range
[
j
])
/
voxel_size
[
j
]);
if
((
c
<
0
||
c
>=
grid_size
[
j
]))
{
failed
=
true
;
break
;
}
coor
[
ndim_minus_1
-
j
]
=
c
;
}
if
(
failed
)
continue
;
voxelidx
=
coor_to_voxelidx_rw
(
coor
[
0
],
coor
[
1
],
coor
[
2
]);
if
(
voxelidx
==
-
1
)
{
voxelidx
=
voxel_num
;
if
(
voxel_num
>=
max_voxels
)
continue
;
voxel_num
+=
1
;
coor_to_voxelidx_rw
(
coor
[
0
],
coor
[
1
],
coor
[
2
])
=
voxelidx
;
for
(
int
k
=
0
;
k
<
NDim
;
++
k
)
{
coors_rw
(
voxelidx
,
k
)
=
coor
[
k
];
}
}
num
=
num_points_per_voxel_rw
(
voxelidx
);
if
(
num
<
max_points
)
{
for
(
int
k
=
0
;
k
<
num_features
;
++
k
)
{
voxels_rw
(
voxelidx
,
num
,
k
)
=
points_rw
(
i
,
k
);
}
block_coor
[
0
]
=
coor
[
1
]
/
block_factor
;
block_coor
[
1
]
=
coor
[
2
]
/
block_factor
;
mins_rw
(
block_coor
[
0
],
block_coor
[
1
])
=
std
::
min
(
points_rw
(
i
,
2
),
mins_rw
(
block_coor
[
0
],
block_coor
[
1
]));
maxs_rw
(
block_coor
[
0
],
block_coor
[
1
])
=
std
::
max
(
points_rw
(
i
,
2
),
maxs_rw
(
block_coor
[
0
],
block_coor
[
1
]));
num_points_per_voxel_rw
(
voxelidx
)
+=
1
;
}
}
for
(
int
i
=
0
;
i
<
voxel_num
;
++
i
)
{
coor
[
1
]
=
coors_rw
(
i
,
1
);
coor
[
2
]
=
coors_rw
(
i
,
2
);
coor_to_voxelidx_rw
(
coors_rw
(
i
,
0
),
coor
[
1
],
coor
[
2
])
=
-
1
;
block_coor
[
0
]
=
coor
[
1
]
/
block_factor
;
block_coor
[
1
]
=
coor
[
2
]
/
block_factor
;
min_value
=
mins_rw
(
block_coor
[
0
],
block_coor
[
1
]);
max_value
=
maxs_rw
(
block_coor
[
0
],
block_coor
[
1
]);
startx
=
std
::
max
(
0
,
block_coor
[
0
]
-
block_size
/
2
);
stopx
=
std
::
min
(
block_shape_H
,
block_coor
[
0
]
+
block_size
-
block_size
/
2
);
starty
=
std
::
max
(
0
,
block_coor
[
1
]
-
block_size
/
2
);
stopy
=
std
::
min
(
block_shape_W
,
block_coor
[
1
]
+
block_size
-
block_size
/
2
);
for
(
int
j
=
startx
;
j
<
stopx
;
++
j
)
{
for
(
int
k
=
starty
;
k
<
stopy
;
++
k
)
{
min_value
=
std
::
min
(
min_value
,
mins_rw
(
j
,
k
));
max_value
=
std
::
max
(
max_value
,
maxs_rw
(
j
,
k
));
}
}
voxel_mask_rw
(
i
)
=
(
max_value
-
min_value
)
>
height_threshold
;
}
return
voxel_num
;
}
mmcv/ops/csrc/common/utils/spconv/spconv/reordering.h
deleted
100644 → 0
View file @
6f674c7e
// Copyright 2019 Yan Yan
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef SPARSE_REORDERING_FUNCTOR_H_
#define SPARSE_REORDERING_FUNCTOR_H_
#include <utils/spconv/tensorview/tensorview.h>
namespace
functor
{
template
<
typename
Device
,
typename
scalar_t
,
typename
Index
>
struct
SparseGatherFunctor
{
void
operator
()(
const
Device
&
d
,
tv
::
TensorView
<
scalar_t
>
buffer
,
tv
::
TensorView
<
const
scalar_t
>
features
,
tv
::
TensorView
<
const
Index
>
indices
,
int
size
);
};
template
<
typename
Device
,
typename
scalar_t
,
typename
Index
>
struct
SparseScatterAddFunctor
{
void
operator
()(
const
Device
&
d
,
tv
::
TensorView
<
scalar_t
>
out_features
,
tv
::
TensorView
<
const
scalar_t
>
buffer
,
tv
::
TensorView
<
const
Index
>
indices
,
int
size
,
bool
stable
=
false
);
};
}
// namespace functor
#endif
mmcv/ops/csrc/common/utils/spconv/tensorview/helper_kernel.cuh
deleted
100644 → 0
View file @
6f674c7e
#pragma once
namespace
tv
{
namespace
detail
{
template
<
typename
scalar_t
>
class
KernelLoop
{
struct
Iterator
{
__forceinline__
__device__
Iterator
(
scalar_t
index
,
scalar_t
delta
)
:
index_
(
index
),
delta_
(
delta
)
{}
__forceinline__
__device__
scalar_t
operator
*
()
const
{
return
index_
;
}
__forceinline__
__device__
Iterator
&
operator
++
()
{
index_
+=
delta_
;
return
*
this
;
}
__forceinline__
__device__
bool
operator
!=
(
const
Iterator
&
other
)
const
{
bool
greater
=
index_
>
other
.
index_
;
bool
less
=
index_
<
other
.
index_
;
if
(
!
other
.
delta_
)
{
return
less
;
}
if
(
!
delta_
)
{
return
greater
;
}
return
less
||
greater
;
}
private:
scalar_t
index_
;
const
scalar_t
delta_
;
};
public:
__forceinline__
__device__
KernelLoop
(
scalar_t
begin
,
scalar_t
delta
,
scalar_t
end
)
:
begin_
(
begin
),
delta_
(
delta
),
end_
(
end
)
{}
__forceinline__
__device__
Iterator
begin
()
const
{
return
Iterator
{
begin_
,
delta_
};
}
__forceinline__
__device__
Iterator
end
()
const
{
return
Iterator
{
end_
,
0
};
}
private:
scalar_t
begin_
;
scalar_t
delta_
;
scalar_t
end_
;
};
}
// namespace detail
template
<
typename
scalar_t
,
int
NumILP
=
1
>
__forceinline__
__device__
detail
::
KernelLoop
<
scalar_t
>
KernelLoopX
(
scalar_t
count
)
{
return
detail
::
KernelLoop
<
scalar_t
>
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
,
gridDim
.
x
*
blockDim
.
x
*
NumILP
,
count
);
}
// Helper to visit indices in the range 0 <= i < count using the y-coordinate.
// Usage: for(int i : KernelLoopY(count)) { visit(i); }
template
<
typename
scalar_t
,
int
NumILP
=
1
>
__forceinline__
__device__
detail
::
KernelLoop
<
scalar_t
>
KernelLoopY
(
scalar_t
count
)
{
return
detail
::
KernelLoop
<
scalar_t
>
(
blockIdx
.
y
*
blockDim
.
y
+
threadIdx
.
y
,
gridDim
.
y
*
blockDim
.
y
*
NumILP
,
count
);
}
// Helper to visit indices in the range 0 <= i < count using the z-coordinate.
// Usage: for(int i : KernelLoopZ(count)) { visit(i); }
template
<
typename
scalar_t
,
int
NumILP
=
1
>
__forceinline__
__device__
detail
::
KernelLoop
<
scalar_t
>
KernelLoopZ
(
scalar_t
count
)
{
return
detail
::
KernelLoop
<
scalar_t
>
(
blockIdx
.
z
*
blockDim
.
z
+
threadIdx
.
z
,
gridDim
.
z
*
blockDim
.
z
*
NumILP
,
count
);
}
}
// namespace tv
mmcv/ops/csrc/common/utils/spconv/tensorview/helper_launch.h
deleted
100644 → 0
View file @
6f674c7e
#pragma once
// from pytorch.aten
#include "tensorview.h"
namespace
tv
{
namespace
launch
{
template
<
typename
T1
,
typename
T2
>
inline
int
DivUp
(
const
T1
a
,
const
T2
b
)
{
return
(
a
+
b
-
1
)
/
b
;
}
constexpr
int
CUDA_NUM_THREADS
=
1024
;
inline
int
getBlocks
(
const
int
N
)
{
TV_ASSERT_RT_ERR
(
N
>
0
,
"CUDA kernel launch blocks must be positive, but got N="
,
N
);
return
DivUp
(
N
,
CUDA_NUM_THREADS
);
}
}
// namespace launch
}
// namespace tv
mmcv/ops/csrc/common/utils/spconv/tensorview/tensorview.h
deleted
100644 → 0
View file @
6f674c7e
// Copyright 2019 Yan Yan
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <algorithm>
#include <cassert>
#include <cstdlib>
#include <iostream>
#include <memory>
#include <sstream>
#include <type_traits>
#include <vector>
#include "pytorch_cpp_helper.hpp"
namespace
tv
{
#if defined(__NVCC__) || defined(__HIP__)
#define TV_HOST_DEVICE_INLINE __forceinline__ __device__ __host__
#define TV_DEVICE_INLINE __forceinline__ __device__
#define TV_HOST_DEVICE __device__ __host__
#define TV_ASSERT(expr) assert(expr)
#elif defined(__CUDACC_RTC__)
#define TV_ASSERT(expr) assert(expr)
#define TV_HOST_DEVICE_INLINE __forceinline__ __device__
#define TV_DEVICE_INLINE __forceinline__ __device__
#define TV_HOST_DEVICE __device__ __host__
#else
#define TV_ASSERT(x) assert(x)
#define TV_HOST_DEVICE_INLINE inline
#define TV_HOST_DEVICE
#endif
#define TV_REQUIRE(expr, ...) \
{ \
if (!(expr)) { \
printf(__VA_ARGS__); \
assert(expr); \
} \
}
#define TV_DEVICE_REQUIRE(expr, ...) \
{ \
if (!(expr) && threadIdx.x == 0) printf(__VA_ARGS__); \
assert(expr); \
}
template
<
class
SStream
,
class
T
>
void
sstream_print
(
SStream
&
ss
,
T
val
)
{
ss
<<
val
;
}
template
<
class
SStream
,
class
T
,
class
...
TArgs
>
void
sstream_print
(
SStream
&
ss
,
T
val
,
TArgs
...
args
)
{
ss
<<
val
<<
" "
;
sstream_print
(
ss
,
args
...);
}
#define TV_ASSERT_RT_ERR(expr, ...) \
{ \
if (!(expr)) { \
std::stringstream __macro_s; \
__macro_s << __FILE__ << " " << __LINE__ << "\n"; \
__macro_s << #expr << " assert failed. "; \
tv::sstream_print(__macro_s, __VA_ARGS__); \
throw std::runtime_error(__macro_s.str()); \
} \
}
#define TV_ASSERT_INVALID_ARG(expr, ...) \
{ \
if (!(expr)) { \
std::stringstream __macro_s; \
__macro_s << __FILE__ << " " << __LINE__ << "\n"; \
__macro_s << #expr << " assert failed. "; \
tv::sstream_print(__macro_s, __VA_ARGS__); \
throw std::invalid_argument(__macro_s.str()); \
} \
}
#define TV_CHECK_CUDA_ERR() \
{ \
auto err = cudaGetLastError(); \
if (err != cudaSuccess) { \
std::stringstream __macro_s; \
__macro_s << __FILE__ << " " << __LINE__ << "\n"; \
__macro_s << "cuda execution failed with error " << err; \
throw std::runtime_error(__macro_s.str()); \
} \
}
struct
CPU
{};
#define TV_MAX_DIM 6
template
<
typename
scalar_t
,
size_t
MaxDim
=
TV_MAX_DIM
>
struct
SimpleVector
{
public:
TV_HOST_DEVICE_INLINE
SimpleVector
(){};
TV_HOST_DEVICE_INLINE
SimpleVector
(
std
::
initializer_list
<
scalar_t
>
q
)
{
TV_ASSERT
(
q
.
size
()
<=
MaxDim
);
mSize
=
0
;
for
(
scalar_t
s
:
q
)
{
mArray
[
mSize
++
]
=
s
;
}
mSize
=
q
.
size
();
}
SimpleVector
(
const
std
::
vector
<
scalar_t
>
&
arr
)
{
TV_ASSERT
(
arr
.
size
()
<=
MaxDim
);
for
(
size_t
i
=
0
;
i
<
arr
.
size
();
++
i
)
{
mArray
[
i
]
=
arr
[
i
];
}
mSize
=
arr
.
size
();
}
TV_HOST_DEVICE_INLINE
SimpleVector
(
const
SimpleVector
<
scalar_t
,
MaxDim
>
&
arr
)
{
TV_ASSERT
(
arr
.
size
()
<=
MaxDim
);
for
(
size_t
i
=
0
;
i
<
arr
.
size
();
++
i
)
{
mArray
[
i
]
=
arr
[
i
];
}
mSize
=
arr
.
size
();
}
TV_HOST_DEVICE_INLINE
scalar_t
&
operator
[](
int
idx
)
{
#ifdef TV_DEBUG
TV_ASSERT
(
idx
>=
0
&&
idx
<
mSize
);
#endif
return
mArray
[
idx
];
}
TV_HOST_DEVICE_INLINE
const
scalar_t
&
operator
[](
int
idx
)
const
{
#ifdef TV_DEBUG
TV_ASSERT
(
idx
>=
0
&&
idx
<
mSize
);
#endif
return
mArray
[
idx
];
}
TV_HOST_DEVICE_INLINE
void
push_back
(
scalar_t
s
)
{
#ifdef TV_DEBUG
TV_ASSERT
(
mSize
<
MaxDim
);
#endif
mArray
[
mSize
]
=
s
;
mSize
++
;
}
TV_HOST_DEVICE_INLINE
void
pop_back
()
{
#ifdef TV_DEBUG
TV_ASSERT
(
mSize
>
0
);
#endif
mSize
--
;
}
TV_HOST_DEVICE_INLINE
size_t
size
()
const
{
return
mSize
;
}
TV_HOST_DEVICE_INLINE
const
scalar_t
*
data
()
const
{
return
mArray
;
}
TV_HOST_DEVICE_INLINE
size_t
empty
()
const
{
return
mSize
==
0
;
}
typedef
size_t
size_type
;
class
iterator
{
public:
typedef
iterator
self_type
;
typedef
scalar_t
value_type
;
typedef
scalar_t
&
reference
;
typedef
scalar_t
*
pointer
;
typedef
std
::
forward_iterator_tag
iterator_category
;
typedef
std
::
ptrdiff_t
difference_type
;
TV_HOST_DEVICE_INLINE
iterator
(
pointer
ptr
)
:
ptr_
(
ptr
)
{}
TV_HOST_DEVICE_INLINE
self_type
operator
++
(
int
junk
)
{
self_type
i
=
*
this
;
ptr_
++
;
return
i
;
}
TV_HOST_DEVICE_INLINE
self_type
operator
++
()
{
ptr_
++
;
return
*
this
;
}
TV_HOST_DEVICE_INLINE
reference
operator
*
()
{
return
*
ptr_
;
}
TV_HOST_DEVICE_INLINE
pointer
operator
->
()
{
return
ptr_
;
}
TV_HOST_DEVICE_INLINE
bool
operator
==
(
const
self_type
&
rhs
)
{
return
ptr_
==
rhs
.
ptr_
;
}
TV_HOST_DEVICE_INLINE
bool
operator
!=
(
const
self_type
&
rhs
)
{
return
ptr_
!=
rhs
.
ptr_
;
}
private:
pointer
ptr_
;
};
class
const_iterator
{
public:
typedef
const_iterator
self_type
;
typedef
scalar_t
value_type
;
typedef
const
scalar_t
&
reference
;
typedef
const
scalar_t
*
pointer
;
typedef
std
::
ptrdiff_t
difference_type
;
typedef
std
::
forward_iterator_tag
iterator_category
;
TV_HOST_DEVICE_INLINE
const_iterator
(
pointer
ptr
)
:
ptr_
(
ptr
)
{}
TV_HOST_DEVICE_INLINE
self_type
operator
++
(
int
junk
)
{
self_type
i
=
*
this
;
ptr_
++
;
return
i
;
}
TV_HOST_DEVICE_INLINE
self_type
operator
++
()
{
ptr_
++
;
return
*
this
;
}
TV_HOST_DEVICE_INLINE
reference
operator
*
()
{
return
*
ptr_
;
}
TV_HOST_DEVICE_INLINE
pointer
operator
->
()
{
return
ptr_
;
}
TV_HOST_DEVICE_INLINE
bool
operator
==
(
const
self_type
&
rhs
)
{
return
ptr_
==
rhs
.
ptr_
;
}
TV_HOST_DEVICE_INLINE
bool
operator
!=
(
const
self_type
&
rhs
)
{
return
ptr_
!=
rhs
.
ptr_
;
}
private:
pointer
ptr_
;
};
TV_HOST_DEVICE_INLINE
iterator
begin
()
{
return
iterator
(
mArray
);
}
TV_HOST_DEVICE_INLINE
iterator
end
()
{
return
iterator
(
mArray
+
mSize
);
}
TV_HOST_DEVICE_INLINE
const_iterator
begin
()
const
{
return
const_iterator
(
mArray
);
}
TV_HOST_DEVICE_INLINE
const_iterator
end
()
const
{
return
const_iterator
(
mArray
+
mSize
);
}
TV_HOST_DEVICE_INLINE
const_iterator
cbegin
()
const
{
return
const_iterator
(
mArray
);
}
TV_HOST_DEVICE_INLINE
const_iterator
cend
()
const
{
return
const_iterator
(
mArray
+
mSize
);
}
protected:
scalar_t
mArray
[
MaxDim
];
size_t
mSize
=
0
;
};
template
<
typename
scalar_t
,
size_t
MaxDim
>
bool
operator
==
(
const
SimpleVector
<
scalar_t
,
MaxDim
>
&
lfs
,
const
SimpleVector
<
scalar_t
,
MaxDim
>
&
rfs
)
{
if
(
lfs
.
size
()
!=
rfs
.
size
())
return
false
;
for
(
size_t
i
=
0
;
i
<
lfs
.
size
();
++
i
)
{
if
(
lfs
[
i
]
!=
rfs
[
i
])
return
false
;
}
return
true
;
}
template
<
typename
scalar_t
,
size_t
MaxDim
>
bool
operator
!=
(
const
SimpleVector
<
scalar_t
,
MaxDim
>
&
lfs
,
const
SimpleVector
<
scalar_t
,
MaxDim
>
&
rfs
)
{
return
!
(
lfs
==
rfs
);
}
struct
Slice
{
template
<
class
...
Integers
>
TV_HOST_DEVICE_INLINE
Slice
(
Integers
...
ints
)
{
static_assert
(
sizeof
...(
ints
)
<=
3
,
"slice init must smaller than 3"
);
SimpleVector
<
int
,
3
>
slices
{
int
(
ints
)...};
mSlices
[
0
]
=
-
1
;
mSlices
[
1
]
=
-
1
;
mSlices
[
2
]
=
-
1
;
for
(
size_t
i
=
0
;
i
<
slices
.
size
();
++
i
)
{
mSlices
[
i
]
=
slices
[
i
];
}
}
TV_HOST_DEVICE_INLINE
Slice
()
{
mSlices
[
0
]
=
-
1
;
mSlices
[
1
]
=
-
1
;
mSlices
[
2
]
=
-
1
;
}
template
<
typename
scalar_t
>
TV_HOST_DEVICE_INLINE
Slice
(
std
::
initializer_list
<
scalar_t
>
slice
)
{
mSlices
[
0
]
=
-
1
;
mSlices
[
1
]
=
-
1
;
mSlices
[
2
]
=
-
1
;
TV_ASSERT
(
slice
.
size
()
<=
3
);
int
idx
=
0
;
for
(
scalar_t
s
:
slice
)
{
mSlices
[
idx
]
=
int
(
s
);
++
idx
;
}
}
TV_HOST_DEVICE_INLINE
int
&
operator
[](
int
idx
)
{
#ifdef TV_DEBUG
TV_ASSERT
(
idx
>=
0
&&
idx
<
3
);
#endif
return
mSlices
[
idx
];
}
TV_HOST_DEVICE_INLINE
const
int
&
operator
[](
int
idx
)
const
{
#ifdef TV_DEBUG
TV_ASSERT
(
idx
>=
0
&&
idx
<
3
);
#endif
return
mSlices
[
idx
];
}
protected:
int
mSlices
[
3
];
};
template
<
size_t
MaxDim
=
TV_MAX_DIM
>
struct
ShapeBase
:
public
SimpleVector
<
int
,
MaxDim
>
{
TV_HOST_DEVICE_INLINE
ShapeBase
()
:
SimpleVector
<
int
,
MaxDim
>
(){};
TV_HOST_DEVICE_INLINE
ShapeBase
(
std
::
initializer_list
<
int
>
shape
)
:
SimpleVector
<
int
,
MaxDim
>
(
shape
)
{}
template
<
typename
scalar_t
,
template
<
class
...
>
class
Container
>
ShapeBase
(
Container
<
scalar_t
>
shape
)
:
SimpleVector
<
int
,
MaxDim
>
(
shape
)
{}
TV_HOST_DEVICE_INLINE
ShapeBase
(
const
ShapeBase
<
MaxDim
>
&
shape
)
:
SimpleVector
<
int
,
MaxDim
>
(
shape
)
{}
ShapeBase
(
const
std
::
vector
<
int
>
&
arr
)
:
SimpleVector
<
int
,
MaxDim
>
(
arr
)
{}
ShapeBase
<
MaxDim
>
&
operator
=
(
const
ShapeBase
<
MaxDim
>
&
shape
)
=
default
;
TV_HOST_DEVICE_INLINE
ShapeBase
<
MaxDim
>
subshape
(
int
start
,
int
end
)
const
{
#ifdef TV_DEBUG
TV_ASSERT
(
start
>=
0
&&
end
<
this
->
mSize
&&
end
>
start
);
#endif
ShapeBase
<
MaxDim
>
shape
;
for
(
int
i
=
start
;
i
<
end
;
++
i
)
{
shape
.
push_back
(
this
->
mArray
[
i
]);
}
return
shape
;
}
TV_HOST_DEVICE_INLINE
ShapeBase
<
MaxDim
>
subshape
(
int
start
)
const
{
#ifdef TV_DEBUG
TV_ASSERT
(
start
>=
0
&&
start
<=
this
->
mSize
);
#endif
ShapeBase
<
MaxDim
>
shape
;
for
(
int
i
=
start
;
i
<
this
->
mSize
;
++
i
)
{
shape
.
push_back
(
this
->
mArray
[
i
]);
}
return
shape
;
}
TV_HOST_DEVICE_INLINE
size_t
size
()
const
{
if
(
this
->
mSize
==
0
)
return
0
;
size_t
s
=
1
;
for
(
int
i
=
0
;
i
<
int
(
this
->
mSize
);
++
i
)
{
s
*=
this
->
mArray
[
i
];
}
return
s
;
}
TV_HOST_DEVICE_INLINE
size_t
ndim
()
const
{
return
this
->
mSize
;
}
TV_HOST_DEVICE_INLINE
ShapeBase
<
MaxDim
>
squeeze
()
const
{
ShapeBase
<
MaxDim
>
shape
;
for
(
int
i
=
0
;
i
<
this
->
mSize
;
++
i
)
{
if
(
this
->
mArray
[
i
]
!=
1
)
shape
.
push_back
(
this
->
mArray
[
i
]);
}
return
shape
;
}
TV_HOST_DEVICE_INLINE
ShapeBase
<
MaxDim
>
squeeze
(
int
dim
)
const
{
ShapeBase
<
MaxDim
>
shape
;
for
(
int
i
=
0
;
i
<
this
->
mSize
;
++
i
)
{
if
(
i
!=
dim
||
this
->
mArray
[
i
]
!=
1
)
shape
.
push_back
(
this
->
mArray
[
i
]);
}
return
shape
;
}
};
using
Shape
=
ShapeBase
<
TV_MAX_DIM
>
;
template
<
class
...
Inds
>
TV_HOST_DEVICE_INLINE
unsigned
rowArrayIdx
(
std
::
vector
<
int
>
&
shape
,
Inds
...
indexes
)
{
unsigned
offset
=
0
;
unsigned
m
=
1
;
int
indexes_vec
[
sizeof
...(
indexes
)]
=
{
indexes
...};
#ifdef TV_DEBUG
TV_ASSERT
(
sizeof
...(
indexes
)
==
shape
.
size
());
#endif
#pragma unroll
for
(
int
i
=
sizeof
...(
indexes
)
-
1
;
i
>=
0
;
--
i
)
{
offset
+=
m
*
indexes_vec
[
i
];
m
*=
shape
[
i
];
}
return
offset
;
}
TV_HOST_DEVICE_INLINE
unsigned
rowArrayIdx
(
std
::
vector
<
int
>
&
shape
,
std
::
vector
<
int
>
&
indexes_vec
)
{
unsigned
offset
=
0
;
unsigned
m
=
1
;
for
(
int
i
=
shape
.
size
()
-
1
;
i
>=
0
;
--
i
)
{
offset
+=
m
*
indexes_vec
[
i
];
m
*=
shape
[
i
];
}
return
offset
;
}
template
<
class
...
Inds
>
TV_HOST_DEVICE_INLINE
unsigned
rowArrayIdx
(
const
Shape
&
shape
,
Inds
...
indexes
)
{
unsigned
offset
=
0
;
unsigned
m
=
1
;
int
indexes_vec
[
sizeof
...(
indexes
)]
=
{
indexes
...};
#pragma unroll
for
(
int
i
=
sizeof
...(
indexes
)
-
1
;
i
>=
0
;
--
i
)
{
offset
+=
m
*
indexes_vec
[
i
];
m
*=
shape
[
i
];
}
return
offset
;
}
TV_HOST_DEVICE_INLINE
unsigned
rowArrayIdx
(
const
Shape
&
shape
,
const
Shape
&
indexes_vec
)
{
unsigned
offset
=
0
;
unsigned
m
=
1
;
for
(
int
i
=
indexes_vec
.
ndim
()
-
1
;
i
>=
0
;
--
i
)
{
offset
+=
m
*
indexes_vec
[
i
];
m
*=
shape
[
i
];
}
return
offset
;
}
template
<
typename
Index
,
unsigned
NDim
>
TV_HOST_DEVICE_INLINE
unsigned
rowArrayIdx
(
const
Index
*
indexes
,
const
Index
*
shape
)
{
unsigned
offset
=
0
;
unsigned
m
=
1
;
#pragma unroll
for
(
int
i
=
NDim
-
1
;
i
>=
0
;
--
i
)
{
offset
+=
m
*
indexes
[
i
];
m
*=
shape
[
i
];
}
return
offset
;
}
template
<
typename
Index
,
unsigned
NDim
>
TV_HOST_DEVICE_INLINE
Index
rowArrayIdxInv
(
Index
index
,
Index
*
output
,
const
Index
*
shape
)
{
#pragma unroll
for
(
int
i
=
NDim
-
1
;
i
>=
0
;
--
i
)
{
output
[
i
]
=
index
%
shape
[
i
];
index
-=
output
[
i
];
index
/=
shape
[
i
];
}
return
index
;
}
template
<
int
N
>
struct
ArrayIndexRowMajor
{
TV_HOST_DEVICE_INLINE
static
unsigned
run
(
const
Shape
&
shape
,
const
Shape
&
indexes
)
{
return
indexes
[
N
-
1
]
+
shape
[
N
-
1
]
*
ArrayIndexRowMajor
<
N
-
1
>::
run
(
shape
,
indexes
);
}
};
template
<
>
struct
ArrayIndexRowMajor
<
0
>
{
TV_HOST_DEVICE_INLINE
static
unsigned
run
(
const
Shape
&
shape
,
const
Shape
&
indexes
)
{
return
0
;
}
};
namespace
detail
{
template
<
typename
scalar_t
>
constexpr
const
char
*
simpleTypeName
(
scalar_t
val
=
scalar_t
());
template
<
>
constexpr
const
char
*
simpleTypeName
(
float
val
)
{
return
"float32"
;
}
template
<
>
constexpr
const
char
*
simpleTypeName
(
double
val
)
{
return
"float64"
;
}
template
<
>
constexpr
const
char
*
simpleTypeName
(
int
val
)
{
return
"int32"
;
}
template
<
>
constexpr
const
char
*
simpleTypeName
(
unsigned
val
)
{
return
"uint32"
;
}
template
<
>
constexpr
const
char
*
simpleTypeName
(
long
val
)
{
return
"int64"
;
}
template
<
>
constexpr
const
char
*
simpleTypeName
(
unsigned
long
val
)
{
return
"uint64"
;
}
};
// namespace detail
template
<
typename
scalar_t
,
int
Rank
=
-
1
>
struct
TensorView
{
TV_HOST_DEVICE_INLINE
TensorView
()
{}
explicit
TV_HOST_DEVICE_INLINE
TensorView
(
scalar_t
*
ptr
,
Shape
shape
)
:
mPtr
(
ptr
),
mShape
(
shape
)
{}
template
<
class
...
Integers
>
explicit
TV_HOST_DEVICE_INLINE
TensorView
(
scalar_t
*
ptr
,
Integers
...
shapes
)
:
mPtr
(
ptr
)
{
mShape
=
{
int
(
shapes
)...};
}
TV_HOST_DEVICE_INLINE
TensorView
<
scalar_t
,
Rank
>
&
assign
(
const
TensorView
<
scalar_t
,
Rank
>
&
tensor
)
{
TV_REQUIRE
(
tensor
.
shape
()
==
shape
(),
"you must provide same input size%s"
,
"
\n
"
);
scalar_t
*
ptr
=
mPtr
;
const
scalar_t
*
other_ptr
=
tensor
.
data
();
for
(
size_t
i
=
0
;
i
<
size
();
++
i
)
*
(
ptr
++
)
=
*
(
other_ptr
++
);
return
*
this
;
}
template
<
typename
T1
>
TV_HOST_DEVICE_INLINE
TensorView
<
scalar_t
,
Rank
>
&
assign
(
std
::
initializer_list
<
T1
>
seq
)
{
TV_REQUIRE
(
seq
.
size
()
==
size
(),
"you must provide same input size%s"
,
"
\n
"
);
scalar_t
*
ptr
=
mPtr
;
for
(
const
T1
&
s
:
seq
)
*
(
ptr
++
)
=
scalar_t
(
s
);
return
*
this
;
}
template
<
class
...
Inds
>
TV_HOST_DEVICE_INLINE
scalar_t
&
operator
()(
Inds
...
inds
)
{
#ifdef TV_DEBUG
int
idxes
[
sizeof
...(
Inds
)]{
int
(
inds
)...};
TV_REQUIRE
(
sizeof
...(
inds
)
==
mShape
.
ndim
(),
"you provide %d indexes, but dim is %d
\n
"
,
sizeof
...(
inds
),
mShape
.
ndim
());
for
(
int
i
=
0
;
i
<
sizeof
...(
inds
);
++
i
)
{
TV_REQUIRE
(
idxes
[
i
]
>=
0
&&
idxes
[
i
]
<
mShape
[
i
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
i
,
idxes
[
i
],
mShape
[
i
]);
}
#endif
return
mPtr
[
rowArrayIdx
(
mShape
,
int
(
inds
)...)];
}
template
<
class
...
Inds
>
TV_HOST_DEVICE_INLINE
const
scalar_t
&
operator
()(
Inds
...
inds
)
const
{
#ifdef TV_DEBUG
int
idxes
[
sizeof
...(
Inds
)]{
int
(
inds
)...};
TV_REQUIRE
(
sizeof
...(
inds
)
==
mShape
.
ndim
(),
"you provide %d indexes, but dim is %d
\n
"
,
sizeof
...(
inds
),
mShape
.
ndim
());
for
(
int
i
=
0
;
i
<
sizeof
...(
inds
);
++
i
)
{
TV_REQUIRE
(
idxes
[
i
]
>=
0
&&
idxes
[
i
]
<
mShape
[
i
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
i
,
idxes
[
i
],
mShape
[
i
]);
}
#endif
return
mPtr
[
rowArrayIdx
(
mShape
,
int
(
inds
)...)];
}
TV_HOST_DEVICE_INLINE
scalar_t
&
operator
()()
{
#if defined TV_DEBUG
#if defined(__CUDA_ARCH__)
TV_DEVICE_REQUIRE
(
mPtr
!=
nullptr
,
"you want get value but the view is empty.%s"
,
"
\n
"
);
TV_DEVICE_REQUIRE
(
mShape
.
ndim
()
==
0
,
"you provide 0 indexes, but dim is %ld
\n
"
,
mShape
.
ndim
());
#else
TV_REQUIRE
(
mPtr
!=
nullptr
,
"you want get value but the view is empty.%s"
,
"
\n
"
);
TV_REQUIRE
(
mShape
.
ndim
()
==
0
,
"you provide 0 indexes, but dim is %ld
\n
"
,
mShape
.
ndim
());
#endif
#endif
return
mPtr
[
0
];
}
TV_HOST_DEVICE_INLINE
const
scalar_t
&
operator
()()
const
{
#if defined TV_DEBUG
#if defined(__CUDA_ARCH__)
TV_DEVICE_REQUIRE
(
mPtr
!=
nullptr
,
"you want get value but the view is empty.%s"
,
"
\n
"
);
TV_DEVICE_REQUIRE
(
mShape
.
ndim
()
==
0
,
"you provide 0 indexes, but dim is %ld
\n
"
,
mShape
.
ndim
());
#else
TV_REQUIRE
(
mPtr
!=
nullptr
,
"you want get value but the view is empty.%s"
,
"
\n
"
);
TV_REQUIRE
(
mShape
.
ndim
()
==
0
,
"you provide 0 indexes, but dim is %ld
\n
"
,
mShape
.
ndim
());
#endif
#endif
return
mPtr
[
0
];
}
template
<
class
T1
>
TV_HOST_DEVICE_INLINE
scalar_t
&
operator
()(
T1
i1
)
{
#if defined TV_DEBUG
#if defined(__CUDA_ARCH__)
TV_DEVICE_REQUIRE
(
mShape
.
ndim
()
==
1
,
"you provide 1 indexes, but dim is %ld
\n
"
,
mShape
.
ndim
());
TV_DEVICE_REQUIRE
(
i1
>=
0
&&
i1
<
mShape
[
0
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
0
,
i1
,
mShape
[
0
]);
#else
TV_REQUIRE
(
mShape
.
ndim
()
==
1
,
"you provide 1 indexes, but dim is %ld
\n
"
,
mShape
.
ndim
());
TV_REQUIRE
(
i1
>=
0
&&
i1
<
mShape
[
0
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
0
,
i1
,
mShape
[
0
]);
#endif
#endif
return
mPtr
[
i1
];
}
template
<
class
T1
,
class
T2
>
TV_HOST_DEVICE_INLINE
scalar_t
&
operator
()(
T1
i1
,
T2
i2
)
{
#ifdef TV_DEBUG
#if defined(__CUDA_ARCH__)
TV_DEVICE_REQUIRE
(
mShape
.
ndim
()
==
2
,
"you provide 2 indexes, but dim is %ld
\n
"
,
mShape
.
ndim
());
TV_DEVICE_REQUIRE
(
i1
>=
0
&&
i1
<
mShape
[
0
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
0
,
int
(
i1
),
mShape
[
0
]);
TV_DEVICE_REQUIRE
(
i2
>=
0
&&
i2
<
mShape
[
1
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
1
,
int
(
i2
),
mShape
[
1
]);
#else
TV_REQUIRE
(
mShape
.
ndim
()
==
2
,
"you provide 2 indexes, but dim is %ld
\n
"
,
mShape
.
ndim
());
TV_REQUIRE
(
i1
>=
0
&&
i1
<
mShape
[
0
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
0
,
int
(
i1
),
mShape
[
0
]);
TV_REQUIRE
(
i2
>=
0
&&
i2
<
mShape
[
1
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
1
,
int
(
i2
),
mShape
[
1
]);
#endif
#endif
return
mPtr
[
i1
*
mShape
[
1
]
+
i2
];
}
template
<
class
T1
,
class
T2
,
class
T3
>
TV_HOST_DEVICE_INLINE
scalar_t
&
operator
()(
T1
i1
,
T2
i2
,
T3
i3
)
{
#ifdef TV_DEBUG
#if defined(__CUDA_ARCH__)
TV_DEVICE_REQUIRE
(
mShape
.
ndim
()
==
3
,
"you provide 3 indexes, but dim is %ld
\n
"
,
mShape
.
ndim
());
TV_DEVICE_REQUIRE
(
i1
>=
0
&&
i1
<
mShape
[
0
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
0
,
int
(
i1
),
mShape
[
0
]);
TV_DEVICE_REQUIRE
(
i2
>=
0
&&
i2
<
mShape
[
1
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
1
,
int
(
i2
),
mShape
[
1
]);
TV_DEVICE_REQUIRE
(
i3
>=
0
&&
i3
<
mShape
[
2
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
2
,
int
(
i3
),
mShape
[
2
]);
#else
TV_REQUIRE
(
mShape
.
ndim
()
==
3
,
"you provide 3 indexes, but dim is %ld
\n
"
,
mShape
.
ndim
());
TV_REQUIRE
(
i1
>=
0
&&
i1
<
mShape
[
0
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
0
,
int
(
i1
),
mShape
[
0
]);
TV_REQUIRE
(
i2
>=
0
&&
i2
<
mShape
[
1
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
1
,
int
(
i2
),
mShape
[
1
]);
TV_REQUIRE
(
i3
>=
0
&&
i3
<
mShape
[
2
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
2
,
int
(
i3
),
mShape
[
2
]);
#endif
#endif
return
mPtr
[(
i1
*
mShape
[
1
]
+
i2
)
*
mShape
[
2
]
+
i3
];
}
template
<
class
T1
,
class
T2
,
class
T3
,
class
T4
>
TV_HOST_DEVICE_INLINE
scalar_t
&
operator
()(
T1
i1
,
T2
i2
,
T3
i3
,
T4
i4
)
{
#ifdef TV_DEBUG
#if defined(__CUDA_ARCH__)
TV_DEVICE_REQUIRE
(
mShape
.
ndim
()
==
4
,
"you provide 4 indexes, but dim is %ld
\n
"
,
mShape
.
ndim
());
TV_DEVICE_REQUIRE
(
i1
>=
0
&&
i1
<
mShape
[
0
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
0
,
int
(
i1
),
mShape
[
0
]);
TV_DEVICE_REQUIRE
(
i2
>=
0
&&
i2
<
mShape
[
1
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
1
,
int
(
i2
),
mShape
[
1
]);
TV_DEVICE_REQUIRE
(
i3
>=
0
&&
i3
<
mShape
[
2
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
2
,
int
(
i3
),
mShape
[
2
]);
TV_DEVICE_REQUIRE
(
i4
>=
0
&&
i4
<
mShape
[
3
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
3
,
int
(
i4
),
mShape
[
3
]);
#else
TV_REQUIRE
(
mShape
.
ndim
()
==
4
,
"you provide 4 indexes, but dim is %ld
\n
"
,
mShape
.
ndim
());
TV_REQUIRE
(
i1
>=
0
&&
i1
<
mShape
[
0
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
0
,
int
(
i1
),
mShape
[
0
]);
TV_REQUIRE
(
i2
>=
0
&&
i2
<
mShape
[
1
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
1
,
int
(
i2
),
mShape
[
1
]);
TV_REQUIRE
(
i3
>=
0
&&
i3
<
mShape
[
2
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
2
,
int
(
i3
),
mShape
[
2
]);
TV_REQUIRE
(
i4
>=
0
&&
i4
<
mShape
[
3
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
3
,
int
(
i4
),
mShape
[
3
]);
#endif
#endif
return
mPtr
[((
i1
*
mShape
[
1
]
+
i2
)
*
mShape
[
2
]
+
i3
)
*
mShape
[
3
]
+
i4
];
}
template
<
class
T1
>
TV_HOST_DEVICE_INLINE
const
scalar_t
&
operator
()(
T1
i1
)
const
{
#ifdef TV_DEBUG
#if defined(__CUDA_ARCH__)
TV_DEVICE_REQUIRE
(
mShape
.
ndim
()
==
1
,
"you provide 1 indexes, but dim is %ld
\n
"
,
mShape
.
ndim
());
TV_DEVICE_REQUIRE
(
i1
>=
0
&&
i1
<
mShape
[
0
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
0
,
int
(
i1
),
mShape
[
0
]);
#else
TV_REQUIRE
(
mShape
.
ndim
()
==
1
,
"you provide 1 indexes, but dim is %ld
\n
"
,
mShape
.
ndim
());
TV_REQUIRE
(
i1
>=
0
&&
i1
<
mShape
[
0
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
0
,
int
(
i1
),
mShape
[
0
]);
#endif
#endif
return
mPtr
[
i1
];
}
template
<
class
T1
,
class
T2
>
TV_HOST_DEVICE_INLINE
const
scalar_t
&
operator
()(
T1
i1
,
T2
i2
)
const
{
#ifdef TV_DEBUG
#if defined(__CUDA_ARCH__)
TV_DEVICE_REQUIRE
(
mShape
.
ndim
()
==
2
,
"you provide 2 indexes, but dim is %ld
\n
"
,
mShape
.
ndim
());
TV_DEVICE_REQUIRE
(
i1
>=
0
&&
i1
<
mShape
[
0
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
0
,
int
(
i1
),
mShape
[
0
]);
TV_DEVICE_REQUIRE
(
i2
>=
0
&&
i2
<
mShape
[
1
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
1
,
int
(
i2
),
mShape
[
1
]);
#else
TV_REQUIRE
(
mShape
.
ndim
()
==
2
,
"you provide 2 indexes, but dim is %ld
\n
"
,
mShape
.
ndim
());
TV_REQUIRE
(
i1
>=
0
&&
i1
<
mShape
[
0
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
0
,
int
(
i1
),
mShape
[
0
]);
TV_REQUIRE
(
i2
>=
0
&&
i2
<
mShape
[
1
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
1
,
int
(
i2
),
mShape
[
1
]);
#endif
#endif
return
mPtr
[
i1
*
mShape
[
1
]
+
i2
];
}
template
<
class
T1
,
class
T2
,
class
T3
>
TV_HOST_DEVICE_INLINE
const
scalar_t
&
operator
()(
T1
i1
,
T2
i2
,
T3
i3
)
const
{
#ifdef TV_DEBUG
#if defined(__CUDA_ARCH__)
TV_DEVICE_REQUIRE
(
mShape
.
ndim
()
==
3
,
"you provide 3 indexes, but dim is %ld
\n
"
,
mShape
.
ndim
());
TV_DEVICE_REQUIRE
(
i1
>=
0
&&
i1
<
mShape
[
0
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
0
,
int
(
i1
),
mShape
[
0
]);
TV_DEVICE_REQUIRE
(
i2
>=
0
&&
i2
<
mShape
[
1
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
1
,
int
(
i2
),
mShape
[
1
]);
TV_DEVICE_REQUIRE
(
i3
>=
0
&&
i3
<
mShape
[
2
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
2
,
int
(
i3
),
mShape
[
2
]);
#else
TV_REQUIRE
(
mShape
.
ndim
()
==
3
,
"you provide 3 indexes, but dim is %ld
\n
"
,
mShape
.
ndim
());
TV_REQUIRE
(
i1
>=
0
&&
i1
<
mShape
[
0
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
0
,
int
(
i1
),
mShape
[
0
]);
TV_REQUIRE
(
i2
>=
0
&&
i2
<
mShape
[
1
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
1
,
int
(
i2
),
mShape
[
1
]);
TV_REQUIRE
(
i3
>=
0
&&
i3
<
mShape
[
2
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
2
,
int
(
i3
),
mShape
[
2
]);
#endif
#endif
return
mPtr
[(
i1
*
mShape
[
1
]
+
i2
)
*
mShape
[
2
]
+
i3
];
}
template
<
class
T1
,
class
T2
,
class
T3
,
class
T4
>
TV_HOST_DEVICE_INLINE
const
scalar_t
&
operator
()(
T1
i1
,
T2
i2
,
T3
i3
,
T4
i4
)
const
{
#ifdef TV_DEBUG
#if defined(__CUDA_ARCH__)
TV_DEVICE_REQUIRE
(
mShape
.
ndim
()
==
4
,
"you provide 4 indexes, but dim is %ld
\n
"
,
mShape
.
ndim
());
TV_DEVICE_REQUIRE
(
i1
>=
0
&&
i1
<
mShape
[
0
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
0
,
int
(
i1
),
mShape
[
0
]);
TV_DEVICE_REQUIRE
(
i2
>=
0
&&
i2
<
mShape
[
1
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
1
,
int
(
i2
),
mShape
[
1
]);
TV_DEVICE_REQUIRE
(
i3
>=
0
&&
i3
<
mShape
[
2
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
2
,
int
(
i3
),
mShape
[
2
]);
TV_DEVICE_REQUIRE
(
i4
>=
0
&&
i4
<
mShape
[
3
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
3
,
int
(
i4
),
mShape
[
3
]);
#else
TV_REQUIRE
(
mShape
.
ndim
()
==
4
,
"you provide 4 indexes, but dim is %ld
\n
"
,
mShape
.
ndim
());
TV_REQUIRE
(
i1
>=
0
&&
i1
<
mShape
[
0
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
0
,
int
(
i1
),
mShape
[
0
]);
TV_REQUIRE
(
i2
>=
0
&&
i2
<
mShape
[
1
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
1
,
int
(
i2
),
mShape
[
1
]);
TV_REQUIRE
(
i3
>=
0
&&
i3
<
mShape
[
2
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
2
,
int
(
i3
),
mShape
[
2
]);
TV_REQUIRE
(
i4
>=
0
&&
i4
<
mShape
[
3
],
"index-%d(%d) out-of-range: [0, %d)
\n
"
,
3
,
int
(
i4
),
mShape
[
3
]);
#endif
#endif
return
mPtr
[((
i1
*
mShape
[
1
]
+
i2
)
*
mShape
[
2
]
+
i3
)
*
mShape
[
3
]
+
i4
];
}
TV_HOST_DEVICE_INLINE
scalar_t
&
operator
[](
int
idx
)
{
#ifdef TV_DEBUG
#if defined(__CUDA_ARCH__)
TV_DEVICE_REQUIRE
(
idx
>=
0
&&
idx
<
size
(),
"index(%d) out-of-range: [0, %ld)
\n
"
,
int
(
idx
),
size
());
#else
TV_REQUIRE
(
idx
>=
0
&&
idx
<
size
(),
"index(%d) out-of-range: [0, %ld)
\n
"
,
int
(
idx
),
size
());
#endif
#endif
return
mPtr
[
idx
];
}
TV_HOST_DEVICE_INLINE
TensorView
<
scalar_t
,
Rank
>
operator
[](
SimpleVector
<
Slice
>
slice_vec
)
{
return
_subview
(
slice_vec
);
}
TV_HOST_DEVICE_INLINE
const
TensorView
<
scalar_t
,
Rank
>
operator
[](
SimpleVector
<
Slice
>
slice_vec
)
const
{
return
_subview
(
slice_vec
);
}
TV_HOST_DEVICE_INLINE
bool
empty
()
const
{
return
mPtr
==
nullptr
;
}
TV_HOST_DEVICE_INLINE
scalar_t
*
data
()
{
return
mPtr
;
}
TV_HOST_DEVICE_INLINE
const
scalar_t
*
data
()
const
{
return
mPtr
;
}
TV_HOST_DEVICE_INLINE
const
Shape
&
shape
()
const
{
return
mShape
;
}
TV_HOST_DEVICE_INLINE
int
dim
(
int
idx
)
const
{
return
mShape
[
idx
];
}
TV_HOST_DEVICE_INLINE
int
ndim
()
const
{
return
mShape
.
ndim
();
}
template
<
class
...
Inds
>
TV_HOST_DEVICE_INLINE
TensorView
<
scalar_t
,
Rank
>
&
reshape
(
Inds
...
newShapes
)
{
Shape
shapes
{
int
(
newShapes
)...};
TV_ASSERT
(
shapes
.
size
()
==
size
());
mShape
=
shapes
;
return
*
this
;
}
TV_HOST_DEVICE_INLINE
TensorView
<
scalar_t
,
Rank
>
&
reshape
(
Shape
shapes
)
{
TV_ASSERT
(
shapes
.
size
()
==
size
());
mShape
=
shapes
;
return
*
this
;
}
template
<
class
...
Inds
>
TV_HOST_DEVICE_INLINE
TensorView
<
scalar_t
,
Rank
>
view
(
Inds
...
newShapes
)
const
{
Shape
shapes
{
int
(
newShapes
)...};
for
(
size_t
i
=
0
;
i
<
shapes
.
ndim
();
++
i
)
{
if
(
shapes
[
i
]
==
-
1
)
{
shapes
[
i
]
=
1
;
shapes
[
i
]
=
size
()
/
shapes
.
size
();
break
;
}
}
TV_ASSERT
(
shapes
.
size
()
==
size
());
return
TensorView
<
scalar_t
,
Rank
>
(
mPtr
,
shapes
);
}
TV_HOST_DEVICE_INLINE
TensorView
<
scalar_t
,
Rank
>
view
(
Shape
shapes
)
const
{
TV_ASSERT
(
shapes
.
size
()
==
size
());
return
TensorView
<
scalar_t
,
Rank
>
(
mPtr
,
shapes
);
}
TV_HOST_DEVICE_INLINE
TensorView
<
scalar_t
,
Rank
>
squeeze
()
const
{
return
TensorView
<
scalar_t
,
Rank
>
(
mPtr
,
mShape
.
squeeze
());
}
TV_HOST_DEVICE_INLINE
TensorView
<
scalar_t
,
Rank
>
squeeze
(
int
dim
)
const
{
return
TensorView
<
scalar_t
,
Rank
>
(
mPtr
,
mShape
.
squeeze
(
dim
));
}
TV_HOST_DEVICE_INLINE
size_t
size
()
const
{
return
mShape
.
size
();
}
template
<
class
...
Slices
>
TV_HOST_DEVICE_INLINE
TensorView
<
scalar_t
,
Rank
>
subview
(
Slice
slice
,
Slices
...
slices
)
const
{
return
subview
<
float
,
Slice
,
Slices
...
>
(
slice
,
slices
...);
}
template
<
class
T2
=
float
,
class
...
Slices
>
TV_HOST_DEVICE_INLINE
TensorView
<
scalar_t
,
Rank
>
subview
(
Slices
...
slices
)
const
{
Slice
slice_vec
[
sizeof
...(
Slices
)]
=
{
to_slice
(
slices
)...};
Shape
new_shape
{
to_slice
(
slices
)[
0
]...};
Shape
start
{
to_slice
(
slices
)[
0
]...};
TV_ASSERT
(
new_shape
.
ndim
()
<=
mShape
.
ndim
());
TV_ASSERT
(
new_shape
.
ndim
()
!=
0
);
size_t
idxsize
=
new_shape
.
ndim
();
for
(
size_t
i
=
idxsize
;
i
<
mShape
.
ndim
();
++
i
)
{
new_shape
.
push_back
(
0
);
start
.
push_back
(
0
);
}
#pragma unroll
for
(
size_t
i
=
0
;
i
<
sizeof
...(
Slices
);
++
i
)
{
if
(
slice_vec
[
i
][
1
]
!=
-
1
)
{
new_shape
[
i
]
=
slice_vec
[
i
][
1
]
-
slice_vec
[
i
][
0
];
TV_ASSERT
(
new_shape
[
i
]
>=
0
);
}
else
{
new_shape
[
i
]
=
1
;
}
}
auto
offset
=
rowArrayIdx
(
mShape
,
start
);
#pragma unroll
for
(
size_t
i
=
sizeof
...(
Slices
);
i
<
mShape
.
ndim
();
++
i
)
{
new_shape
[
i
]
=
mShape
[
i
];
TV_ASSERT
(
new_shape
[
i
]
>=
0
);
}
Shape
reduced_shape
;
#pragma unroll
for
(
size_t
i
=
0
;
i
<
sizeof
...(
Slices
);
++
i
)
{
if
(
slice_vec
[
i
][
1
]
!=
-
1
)
{
reduced_shape
.
push_back
(
new_shape
[
i
]);
}
}
#pragma unroll
for
(
size_t
i
=
sizeof
...(
Slices
);
i
<
mShape
.
ndim
();
++
i
)
{
reduced_shape
.
push_back
(
new_shape
[
i
]);
}
return
TensorView
<
scalar_t
,
Rank
>
(
mPtr
+
offset
,
reduced_shape
);
}
template
<
class
...
Integers
>
TV_HOST_DEVICE_INLINE
TensorView
<
scalar_t
,
Rank
>
subview
(
int
id
,
Integers
...
ints
)
{
Shape
start
=
{
id
,
ints
...};
for
(
int
i
=
1
+
sizeof
...(
ints
);
i
<
ndim
();
++
i
)
{
start
.
push_back
(
0
);
}
return
TensorView
<
scalar_t
,
Rank
>
(
mPtr
+
rowArrayIdx
(
mShape
,
start
),
mShape
.
subshape
(
sizeof
...(
ints
)
+
1
));
}
std
::
string
repr
()
const
{
std
::
ostringstream
ss
;
if
(
empty
())
return
""
;
if
(
mShape
.
ndim
()
==
0
)
{
ss
<<
*
mPtr
;
ss
<<
"Tensor: dtype="
<<
detail
::
simpleTypeName
<
scalar_t
>
();
return
ss
.
str
();
}
Shape
counter
=
mShape
;
auto
tensor_flat
=
this
->
view
(
-
1
);
for
(
int
i
=
0
;
i
<
counter
.
ndim
();
++
i
)
{
counter
[
i
]
=
0
;
ss
<<
"["
;
}
for
(
size_t
i
=
0
;
i
<
this
->
size
();
++
i
)
{
ss
<<
tensor_flat
(
rowArrayIdx
(
mShape
,
counter
));
counter
[
counter
.
ndim
()
-
1
]
+=
1
;
int
inc_count
=
0
;
bool
print_comma
=
true
;
for
(
int
c
=
counter
.
ndim
()
-
1
;
c
>=
0
;
--
c
)
{
if
(
counter
[
c
]
==
this
->
dim
(
c
)
&&
c
>
0
)
{
++
inc_count
;
counter
[
c
-
1
]
+=
1
;
counter
[
c
]
=
0
;
print_comma
=
false
;
}
}
if
(
print_comma
&&
i
!=
this
->
size
()
-
1
)
ss
<<
", "
;
for
(
int
j
=
0
;
j
<
inc_count
;
++
j
)
{
ss
<<
"]"
;
}
if
(
i
!=
this
->
size
()
-
1
)
{
if
(
inc_count
!=
0
)
ss
<<
"
\n
"
;
for
(
int
j
=
0
;
j
<
inc_count
;
++
j
)
{
ss
<<
"["
;
}
}
}
ss
<<
"]"
;
ss
<<
"Tensor: dtype="
<<
detail
::
simpleTypeName
<
scalar_t
>
();
return
ss
.
str
();
}
protected:
// TODO: make this function public.
// currently this function is called unexpectedly when using subview({0, 0}).
TV_HOST_DEVICE_INLINE
TensorView
<
scalar_t
,
Rank
>
_subview
(
SimpleVector
<
Slice
>
slice_vec
)
{
Shape
new_shape
;
for
(
int
i
=
0
;
i
<
slice_vec
.
size
();
++
i
)
{
new_shape
.
push_back
(
slice_vec
[
i
][
0
]);
}
Shape
start
=
new_shape
;
TV_ASSERT
(
new_shape
.
ndim
()
<=
mShape
.
ndim
());
TV_ASSERT
(
new_shape
.
ndim
()
!=
0
);
size_t
idxsize
=
new_shape
.
ndim
();
for
(
size_t
i
=
idxsize
;
i
<
mShape
.
ndim
();
++
i
)
{
new_shape
.
push_back
(
0
);
start
.
push_back
(
0
);
}
for
(
size_t
i
=
0
;
i
<
slice_vec
.
size
();
++
i
)
{
if
(
slice_vec
[
i
][
1
]
!=
-
1
)
{
new_shape
[
i
]
=
slice_vec
[
i
][
1
]
-
slice_vec
[
i
][
0
];
TV_ASSERT
(
new_shape
[
i
]
>=
0
);
}
else
{
new_shape
[
i
]
=
1
;
// reduce dim
}
}
auto
offset
=
rowArrayIdx
(
mShape
,
start
);
for
(
size_t
i
=
slice_vec
.
size
();
i
<
mShape
.
ndim
();
++
i
)
{
new_shape
[
i
]
=
mShape
[
i
];
TV_ASSERT
(
new_shape
[
i
]
>=
0
);
}
Shape
reduced_shape
;
for
(
size_t
i
=
0
;
i
<
slice_vec
.
size
();
++
i
)
{
if
(
slice_vec
[
i
][
1
]
!=
-
1
)
{
reduced_shape
.
push_back
(
new_shape
[
i
]);
}
}
for
(
size_t
i
=
slice_vec
.
size
();
i
<
mShape
.
ndim
();
++
i
)
{
reduced_shape
.
push_back
(
new_shape
[
i
]);
}
return
TensorView
<
scalar_t
,
Rank
>
(
mPtr
+
offset
,
reduced_shape
);
}
template
<
typename
T1
>
TV_HOST_DEVICE_INLINE
Slice
to_slice
(
T1
s
)
const
{
return
Slice
{
int
(
s
),
-
1
,
-
1
};
}
TV_HOST_DEVICE_INLINE
Slice
to_slice
(
Slice
s
)
const
{
return
Slice
(
s
);
}
scalar_t
*
mPtr
=
nullptr
;
Shape
mShape
;
};
template
<
typename
Os
,
typename
scalar_t
,
int
Rank
>
Os
&
operator
<<
(
Os
&
os
,
const
TensorView
<
scalar_t
,
Rank
>
&
dt
)
{
os
<<
dt
.
repr
();
return
os
;
}
template
<
typename
Os
,
typename
scalar_t
,
int
Rank
>
Os
&
operator
<<
(
Os
&
os
,
const
TensorView
<
const
scalar_t
,
Rank
>
&
dt
)
{
os
<<
dt
.
repr
();
return
os
;
}
namespace
detail
{
template
<
typename
scalar_t
>
constexpr
const
char
*
printfTypeFormat
(
scalar_t
val
=
scalar_t
());
template
<
>
constexpr
const
char
*
printfTypeFormat
(
float
val
)
{
return
"%.2f"
;
}
template
<
>
constexpr
const
char
*
printfTypeFormat
(
double
val
)
{
return
"%.2f"
;
}
template
<
>
constexpr
const
char
*
printfTypeFormat
(
int
val
)
{
return
"%d"
;
}
template
<
>
constexpr
const
char
*
printfTypeFormat
(
unsigned
val
)
{
return
"%u"
;
}
template
<
>
constexpr
const
char
*
printfTypeFormat
(
long
val
)
{
return
"%ld"
;
}
template
<
>
constexpr
const
char
*
printfTypeFormat
(
unsigned
long
val
)
{
return
"%lu"
;
}
};
// namespace detail
template
<
typename
scalar_t
>
TV_HOST_DEVICE
void
printTensorView
(
const
TensorView
<
scalar_t
>
tensor
,
const
char
*
format
)
{
if
(
tensor
.
empty
())
return
;
if
(
tensor
.
ndim
()
==
0
)
{
printf
(
format
,
tensor
());
printf
(
"
\n
"
);
return
;
}
Shape
counter
=
tensor
.
shape
();
auto
tensor_flat
=
tensor
.
view
(
-
1
);
for
(
int
i
=
0
;
i
<
counter
.
ndim
();
++
i
)
{
counter
[
i
]
=
0
;
printf
(
"["
);
}
for
(
size_t
i
=
0
;
i
<
tensor
.
size
();
++
i
)
{
printf
(
format
,
tensor_flat
(
rowArrayIdx
(
tensor
.
shape
(),
counter
)));
counter
[
counter
.
ndim
()
-
1
]
+=
1
;
int
inc_count
=
0
;
bool
print_comma
=
true
;
for
(
int
c
=
counter
.
ndim
()
-
1
;
c
>=
0
;
--
c
)
{
if
(
counter
[
c
]
==
tensor
.
dim
(
c
)
&&
c
>
0
)
{
++
inc_count
;
counter
[
c
-
1
]
+=
1
;
counter
[
c
]
=
0
;
print_comma
=
false
;
}
}
if
(
print_comma
&&
i
!=
tensor
.
size
()
-
1
)
printf
(
", "
);
for
(
int
j
=
0
;
j
<
inc_count
;
++
j
)
{
printf
(
"]"
);
}
if
(
i
!=
tensor
.
size
()
-
1
)
{
if
(
inc_count
!=
0
)
printf
(
"
\n
"
);
for
(
int
j
=
0
;
j
<
inc_count
;
++
j
)
{
printf
(
"["
);
}
}
}
printf
(
"]
\n
"
);
}
template
<
typename
scalar_t
>
TV_HOST_DEVICE
void
printTensorView
(
TensorView
<
scalar_t
>
tensor
)
{
using
Traw
=
typename
std
::
remove_const
<
scalar_t
>::
type
;
return
printTensorView
(
tensor
,
detail
::
printfTypeFormat
<
Traw
>
());
}
template
<
typename
scalar_t
>
TV_HOST_DEVICE
void
printTensorView
(
const
scalar_t
*
ptr
,
Shape
shape
)
{
using
Traw
=
typename
std
::
remove_const
<
scalar_t
>::
type
;
return
printTensorView
(
TensorView
<
const
scalar_t
>
(
ptr
,
shape
),
detail
::
printfTypeFormat
<
Traw
>
());
}
template
<
typename
scalar_t
>
TV_HOST_DEVICE
void
printTensorView
(
const
scalar_t
*
ptr
,
Shape
shape
,
const
char
*
format
)
{
return
printTensorView
(
TensorView
<
const
scalar_t
>
(
ptr
,
shape
),
format
);
}
}
// namespace tv
mmcv/ops/csrc/onnxruntime/corner_pool.h
0 → 100644
View file @
6f3c5f1c
// Copyright (c) OpenMMLab. All rights reserved
#ifndef ONNXRUNTIME_CORNER_POOL_H
#define ONNXRUNTIME_CORNER_POOL_H
#include <assert.h>
#include <onnxruntime_cxx_api.h>
struct
MMCVCornerPoolKernel
{
public:
MMCVCornerPoolKernel
(
Ort
::
CustomOpApi
ort
,
const
OrtKernelInfo
*
info
)
:
ort_
(
ort
)
{
mode_
=
ort_
.
KernelInfoGetAttribute
<
int64_t
>
(
info
,
"mode"
);
}
void
Compute
(
OrtKernelContext
*
context
);
private:
Ort
::
CustomOpApi
ort_
;
int64_t
mode_
;
};
struct
MMCVCornerPoolCustomOp
:
Ort
::
CustomOpBase
<
MMCVCornerPoolCustomOp
,
MMCVCornerPoolKernel
>
{
void
*
CreateKernel
(
Ort
::
CustomOpApi
api
,
const
OrtKernelInfo
*
info
)
const
{
return
new
MMCVCornerPoolKernel
(
api
,
info
);
}
const
char
*
GetName
()
const
{
return
"MMCVCornerPool"
;
}
size_t
GetInputTypeCount
()
const
{
return
1
;
}
ONNXTensorElementDataType
GetInputType
(
size_t
)
const
{
return
ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT
;
}
size_t
GetOutputTypeCount
()
const
{
return
1
;
}
ONNXTensorElementDataType
GetOutputType
(
size_t
)
const
{
return
ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT
;
}
// force cpu
const
char
*
GetExecutionProviderType
()
const
{
return
"CPUExecutionProvider"
;
}
};
#endif // ONNXRUNTIME_CORNER_POOL_H
mmcv/ops/csrc/onnxruntime/cpu/corner_pool.cpp
0 → 100644
View file @
6f3c5f1c
// Copyright (c) OpenMMLab. All rights reserved
#include "corner_pool.h"
#include "../ort_mmcv_utils.h"
void
TopPoolForwardCPU
(
const
float
*
input
,
float
*
output
,
const
int
batch_size
,
const
int
channels
,
const
int
height
,
const
int
width
)
{
for
(
int
n
=
0
;
n
<
batch_size
;
n
++
)
{
int
index_n
=
n
*
channels
*
width
*
height
;
for
(
int
c
=
0
;
c
<
channels
;
c
++
)
{
int
index_n_c
=
index_n
+
c
*
width
*
height
;
for
(
int
w
=
0
;
w
<
width
;
w
++
)
{
// directly copy the most bottom value from input to output
output
[
index_n_c
+
(
height
-
1
)
*
width
+
w
]
=
input
[
index_n_c
+
(
height
-
1
)
*
width
+
w
];
// do top_pool
for
(
int
h
=
height
-
2
;
h
>=
0
;
h
--
)
{
output
[
index_n_c
+
h
*
width
+
w
]
=
std
::
max
(
output
[
index_n_c
+
(
h
+
1
)
*
width
+
w
],
input
[
index_n_c
+
h
*
width
+
w
]);
}
// for h
}
// for w
}
// for c
}
// for n
}
void
BottomPoolForwardCPU
(
const
float
*
input
,
float
*
output
,
const
int
batch_size
,
const
int
channels
,
const
int
height
,
const
int
width
)
{
for
(
int
n
=
0
;
n
<
batch_size
;
n
++
)
{
int
index_n
=
n
*
channels
*
width
*
height
;
for
(
int
c
=
0
;
c
<
channels
;
c
++
)
{
int
index_n_c
=
index_n
+
c
*
width
*
height
;
for
(
int
w
=
0
;
w
<
width
;
w
++
)
{
// directly copy the most top value from input to output
output
[
index_n_c
+
w
]
=
input
[
index_n_c
+
w
];
// do top_pool
for
(
int
h
=
1
;
h
<
height
;
h
++
)
{
output
[
index_n_c
+
h
*
width
+
w
]
=
std
::
max
(
output
[
index_n_c
+
(
h
-
1
)
*
width
+
w
],
input
[
index_n_c
+
h
*
width
+
w
]);
}
// for h
}
// for w
}
// for c
}
// for n
}
void
LeftPoolForwardCPU
(
const
float
*
input
,
float
*
output
,
const
int
batch_size
,
const
int
channels
,
const
int
height
,
const
int
width
)
{
for
(
int
n
=
0
;
n
<
batch_size
;
n
++
)
{
int
index_n
=
n
*
channels
*
width
*
height
;
for
(
int
c
=
0
;
c
<
channels
;
c
++
)
{
int
index_n_c
=
index_n
+
c
*
width
*
height
;
for
(
int
h
=
0
;
h
<
height
;
h
++
)
{
// directly copy the most right value from input to output
output
[
index_n_c
+
h
*
width
+
width
-
1
]
=
input
[
index_n_c
+
h
*
width
+
width
-
1
];
// do left_pool
for
(
int
w
=
width
-
2
;
w
>=
0
;
w
--
)
{
output
[
index_n_c
+
h
*
width
+
w
]
=
std
::
max
(
output
[
index_n_c
+
h
*
width
+
w
+
1
],
input
[
index_n_c
+
h
*
width
+
w
]);
}
// for w
}
// for h
}
// for c
}
// for n
}
void
RightPoolForwardCPU
(
const
float
*
input
,
float
*
output
,
const
int
batch_size
,
const
int
channels
,
const
int
height
,
const
int
width
)
{
for
(
int
n
=
0
;
n
<
batch_size
;
n
++
)
{
int
index_n
=
n
*
channels
*
width
*
height
;
for
(
int
c
=
0
;
c
<
channels
;
c
++
)
{
int
index_n_c
=
index_n
+
c
*
width
*
height
;
for
(
int
h
=
0
;
h
<
height
;
h
++
)
{
// directly copy the most left value from input to output
output
[
index_n_c
+
h
*
width
]
=
input
[
index_n_c
+
h
*
width
];
// do right_pool
for
(
int
w
=
1
;
w
<
width
;
w
++
)
{
output
[
index_n_c
+
h
*
width
+
w
]
=
std
::
max
(
output
[
index_n_c
+
h
*
width
+
w
-
1
],
input
[
index_n_c
+
h
*
width
+
w
]);
}
// for w
}
// for h
}
// for c
}
// for n
}
void
MMCVCornerPoolKernel
::
Compute
(
OrtKernelContext
*
context
)
{
const
int
mode
=
int
(
mode_
);
typedef
float
T
;
const
OrtValue
*
input
=
ort_
.
KernelContext_GetInput
(
context
,
0
);
const
T
*
input_data
=
reinterpret_cast
<
const
float
*>
(
ort_
.
GetTensorData
<
T
>
(
input
));
// get output memory
OrtTensorDimensions
out_dimensions
(
ort_
,
input
);
OrtValue
*
output
=
ort_
.
KernelContext_GetOutput
(
context
,
0
,
out_dimensions
.
data
(),
out_dimensions
.
size
());
T
*
output_data
=
ort_
.
GetTensorMutableData
<
T
>
(
output
);
// 'top': 0, 'bottom': 1, 'left': 2, 'right':3
assert
(
mode
==
0
||
mode
==
1
||
mode
==
2
||
mode
==
3
);
// do corner_pool
int
batch_size
=
out_dimensions
.
data
()[
0
];
int
input_channels
=
out_dimensions
.
data
()[
1
];
int
input_height
=
out_dimensions
.
data
()[
2
];
int
input_width
=
out_dimensions
.
data
()[
3
];
if
(
mode
==
0
)
TopPoolForwardCPU
(
input_data
,
output_data
,
batch_size
,
input_channels
,
input_height
,
input_width
);
else
if
(
mode
==
1
)
BottomPoolForwardCPU
(
input_data
,
output_data
,
batch_size
,
input_channels
,
input_height
,
input_width
);
else
if
(
mode
==
2
)
LeftPoolForwardCPU
(
input_data
,
output_data
,
batch_size
,
input_channels
,
input_height
,
input_width
);
else
RightPoolForwardCPU
(
input_data
,
output_data
,
batch_size
,
input_channels
,
input_height
,
input_width
);
}
mmcv/ops/csrc/onnxruntime/cpu/deform_conv.cpp
0 → 100644
View file @
6f3c5f1c
// Copyright (c) OpenMMLab. All rights reserved
#include "deform_conv.h"
#include <cmath>
#include <vector>
#include "../ort_mmcv_utils.h"
void
gemm_ref_fp32_deform
(
const
float
*
A
,
const
float
*
B
,
const
float
*
V
,
const
float
*
H
,
const
int32_t
trans_A
,
const
int32_t
trans_B
,
const
int32_t
M
,
const
int32_t
N
,
const
int32_t
K
,
const
float
alpha
,
const
float
beta
,
float
*
Y
)
{
if
(
!
trans_A
&&
!
trans_B
)
{
// MK, KN; NN
for
(
int64_t
m
=
0
;
m
<
M
;
++
m
)
{
for
(
int64_t
n
=
0
;
n
<
N
;
++
n
)
{
float
y
=
0.0
f
;
for
(
int64_t
k
=
0
;
k
<
K
;
++
k
)
{
y
+=
A
[
m
*
K
+
k
]
*
B
[
k
*
N
+
n
];
}
y
*=
alpha
;
if
(
V
)
y
+=
beta
*
V
[
n
];
if
(
H
)
y
+=
beta
*
H
[
m
*
N
+
n
];
Y
[
m
*
N
+
n
]
=
y
;
}
}
}
if
(
trans_A
&&
!
trans_B
)
{
// KM, KN; TN
for
(
int64_t
m
=
0
;
m
<
M
;
++
m
)
{
for
(
int64_t
n
=
0
;
n
<
N
;
++
n
)
{
float
y
=
0.0
f
;
for
(
int64_t
k
=
0
;
k
<
K
;
++
k
)
{
y
+=
A
[
k
*
M
+
m
]
*
B
[
k
*
N
+
n
];
}
y
*=
alpha
;
if
(
V
)
y
+=
beta
*
V
[
n
];
if
(
H
)
y
+=
beta
*
H
[
m
*
N
+
n
];
Y
[
m
*
N
+
n
]
=
y
;
}
}
}
if
(
trans_A
&&
trans_B
)
{
// KM, NK; TT
for
(
int64_t
m
=
0
;
m
<
M
;
++
m
)
{
for
(
int64_t
n
=
0
;
n
<
N
;
++
n
)
{
float
y
=
0.0
f
;
for
(
int64_t
k
=
0
;
k
<
K
;
++
k
)
{
y
+=
A
[
k
*
M
+
m
]
*
B
[
n
*
K
+
k
];
}
y
*=
alpha
;
if
(
V
)
y
+=
beta
*
V
[
n
];
if
(
H
)
y
+=
beta
*
H
[
m
*
N
+
n
];
Y
[
m
*
N
+
n
]
=
y
;
}
}
}
if
(
!
trans_A
&&
trans_B
)
{
// MK, NK; NT
for
(
int64_t
m
=
0
;
m
<
M
;
++
m
)
{
for
(
int64_t
n
=
0
;
n
<
N
;
++
n
)
{
float
y
=
0.0
f
;
for
(
int64_t
k
=
0
;
k
<
K
;
++
k
)
{
y
+=
A
[
m
*
K
+
k
]
*
B
[
n
*
K
+
k
];
}
y
*=
alpha
;
if
(
V
)
y
+=
beta
*
V
[
n
];
if
(
H
)
y
+=
beta
*
H
[
m
*
N
+
n
];
Y
[
m
*
N
+
n
]
=
y
;
}
}
}
}
float
bilinear_interpolate
(
const
float
*
src
,
const
int64_t
src_h
,
const
int64_t
src_w
,
const
float
h
,
const
float
w
)
{
if
(
h
<=
-
1
||
src_h
<=
h
||
w
<=
-
1
||
src_w
<=
w
)
{
return
0
;
}
int64_t
h_low
=
floor
(
h
);
int64_t
w_low
=
floor
(
w
);
int64_t
h_high
=
h_low
+
1
;
int64_t
w_high
=
w_low
+
1
;
float
lh
=
h
-
h_low
;
float
lw
=
w
-
w_low
;
float
hh
=
1
-
lh
;
float
hw
=
1
-
lw
;
float
v1
=
0
;
if
(
h_low
>=
0
&&
w_low
>=
0
)
v1
=
src
[
h_low
*
src_w
+
w_low
];
float
v2
=
0
;
if
(
h_low
>=
0
&&
w_high
<=
src_w
-
1
)
v2
=
src
[
h_low
*
src_w
+
w_high
];
float
v3
=
0
;
if
(
h_high
<=
src_h
-
1
&&
w_low
>=
0
)
v3
=
src
[
h_high
*
src_w
+
w_low
];
float
v4
=
0
;
if
(
h_high
<=
src_h
-
1
&&
w_high
<=
src_w
-
1
)
v4
=
src
[
h_high
*
src_w
+
w_high
];
float
w1
=
hh
*
hw
,
w2
=
hh
*
lw
,
w3
=
lh
*
hw
,
w4
=
lh
*
lw
;
float
val
=
(
w1
*
v1
+
w2
*
v2
+
w3
*
v3
+
w4
*
v4
);
return
val
;
}
void
deformable_im2col
(
const
float
*
input
,
const
float
*
offset
,
const
int64_t
src_h
,
const
int64_t
src_w
,
const
int64_t
kernel_h
,
const
int64_t
kernel_w
,
const
int64_t
pad_h
,
const
int64_t
pad_w
,
const
int64_t
stride_h
,
const
int64_t
stride_w
,
const
int64_t
dilation_h
,
const
int64_t
dilation_w
,
const
int64_t
channels
,
const
int64_t
offset_groups
,
const
int64_t
dst_h
,
const
int64_t
dst_w
,
float
*
columns
)
{
const
int64_t
indices
=
channels
*
dst_h
*
dst_w
;
for
(
int64_t
index
=
0
;
index
!=
indices
;
++
index
)
{
const
int64_t
w_col
=
index
%
dst_w
;
const
int64_t
h_col
=
(
index
/
dst_w
)
%
dst_h
;
const
int64_t
c_im
=
index
/
(
dst_w
*
dst_h
);
const
int64_t
c_col
=
c_im
*
kernel_h
*
kernel_w
;
int64_t
c_per_offset_grp
=
channels
/
offset_groups
;
const
int64_t
grp_idx
=
c_im
/
c_per_offset_grp
;
auto
columns_ptr
=
columns
+
(
c_col
*
(
dst_h
*
dst_w
)
+
h_col
*
dst_w
+
w_col
);
auto
input_ptr
=
input
+
c_im
*
(
src_h
*
src_w
);
auto
offset_ptr
=
offset
+
grp_idx
*
2
*
kernel_h
*
kernel_w
*
dst_h
*
dst_w
;
for
(
int64_t
kh
=
0
;
kh
<
kernel_h
;
++
kh
)
{
for
(
int64_t
kw
=
0
;
kw
<
kernel_w
;
++
kw
)
{
const
int
data_offset_h_ptr
=
((
2
*
(
kh
*
kernel_w
+
kw
))
*
dst_h
+
h_col
)
*
dst_w
+
w_col
;
const
int
data_offset_w_ptr
=
((
2
*
(
kh
*
kernel_w
+
kw
)
+
1
)
*
dst_h
+
h_col
)
*
dst_w
+
w_col
;
const
float
offset_h
=
offset_ptr
[
data_offset_h_ptr
];
const
float
offset_w
=
offset_ptr
[
data_offset_w_ptr
];
const
float
ih
=
(
h_col
*
stride_h
-
pad_h
)
+
kh
*
dilation_h
+
offset_h
;
const
float
iw
=
(
w_col
*
stride_w
-
pad_w
)
+
kw
*
dilation_w
+
offset_w
;
*
columns_ptr
=
bilinear_interpolate
(
input_ptr
,
src_h
,
src_w
,
ih
,
iw
);
columns_ptr
+=
dst_h
*
dst_w
;
}
}
}
}
void
deformable_conv_forward
(
const
float
*
src
,
const
float
*
offset
,
const
float
*
filter
,
const
int64_t
batch
,
const
int64_t
src_c
,
const
int64_t
src_h
,
const
int64_t
src_w
,
const
int64_t
dst_c
,
const
int64_t
dst_h
,
const
int64_t
dst_w
,
const
int64_t
group
,
const
int64_t
offset_group
,
const
int64_t
channels
,
const
int64_t
num_output
,
const
int64_t
kernel_h
,
const
int64_t
kernel_w
,
const
int64_t
stride_h
,
const
int64_t
stride_w
,
const
int64_t
pad_h
,
const
int64_t
pad_w
,
const
int64_t
dilation_h
,
const
int64_t
dilation_w
,
float
*
columns
,
float
*
dst
)
{
const
int64_t
ic_per_gp
=
channels
/
group
;
const
int64_t
oc_per_gp
=
num_output
/
group
;
for
(
int64_t
b
=
0
;
b
<
batch
;
++
b
)
{
for
(
int64_t
g
=
0
;
g
<
group
;
++
g
)
{
deformable_im2col
(
src
+
b
*
src_c
*
src_h
*
src_w
+
g
*
ic_per_gp
*
src_h
*
src_w
,
offset
+
b
*
offset_group
*
2
*
kernel_h
*
kernel_w
*
dst_h
*
dst_w
,
src_h
,
src_w
,
kernel_h
,
kernel_w
,
pad_h
,
pad_w
,
stride_h
,
stride_w
,
dilation_h
,
dilation_w
,
ic_per_gp
,
offset_group
,
dst_h
,
dst_w
,
columns
);
float
*
dst_ptr
=
dst
+
b
*
dst_c
*
dst_h
*
dst_w
+
g
*
oc_per_gp
*
dst_h
*
dst_w
;
memset
(
dst_ptr
,
0.0
f
,
sizeof
(
float
)
*
oc_per_gp
*
dst_h
*
dst_w
);
gemm_ref_fp32_deform
(
filter
+
g
*
oc_per_gp
*
ic_per_gp
*
kernel_h
*
kernel_w
,
columns
,
nullptr
,
dst_ptr
,
0
,
0
,
oc_per_gp
,
dst_h
*
dst_w
,
ic_per_gp
*
kernel_h
*
kernel_w
,
1.0
f
,
1.0
f
,
dst_ptr
);
}
}
}
MMCVDeformConvKernel
::
MMCVDeformConvKernel
(
OrtApi
api
,
const
OrtKernelInfo
*
info
)
:
api_
(
api
),
ort_
(
api_
),
info_
(
info
)
{
std
::
vector
<
int64_t
>
stride
=
ort_
.
KernelInfoGetAttribute
<
std
::
vector
<
int64_t
>>
(
info
,
"stride"
);
stride_height_
=
stride
[
0
];
stride_width_
=
stride
[
1
];
std
::
vector
<
int64_t
>
padding
=
ort_
.
KernelInfoGetAttribute
<
std
::
vector
<
int64_t
>>
(
info
,
"padding"
);
padding_height_
=
padding
[
0
];
padding_width_
=
padding
[
1
];
std
::
vector
<
int64_t
>
dilation
=
ort_
.
KernelInfoGetAttribute
<
std
::
vector
<
int64_t
>>
(
info
,
"dilation"
);
dilation_height_
=
dilation
[
0
];
dilation_width_
=
dilation
[
1
];
deformable_group_
=
ort_
.
KernelInfoGetAttribute
<
int64_t
>
(
info
,
"deform_groups"
);
group_
=
ort_
.
KernelInfoGetAttribute
<
int64_t
>
(
info
,
"groups"
);
// create allocator
allocator_
=
Ort
::
AllocatorWithDefaultOptions
();
}
void
MMCVDeformConvKernel
::
Compute
(
OrtKernelContext
*
context
)
{
const
int64_t
stride_height
=
stride_height_
;
const
int64_t
stride_width
=
stride_width_
;
const
int64_t
padding_height
=
padding_height_
;
const
int64_t
padding_width
=
padding_width_
;
const
int64_t
dilation_height
=
dilation_height_
;
const
int64_t
dilation_width
=
dilation_width_
;
const
int64_t
deformable_group
=
deformable_group_
;
const
int64_t
group
=
group_
;
const
OrtValue
*
input
=
ort_
.
KernelContext_GetInput
(
context
,
0
);
const
float
*
input_data
=
reinterpret_cast
<
const
float
*>
(
ort_
.
GetTensorData
<
float
>
(
input
));
const
OrtValue
*
offset
=
ort_
.
KernelContext_GetInput
(
context
,
1
);
const
float
*
offset_data
=
reinterpret_cast
<
const
float
*>
(
ort_
.
GetTensorData
<
float
>
(
offset
));
const
OrtValue
*
filter
=
ort_
.
KernelContext_GetInput
(
context
,
2
);
const
float
*
filter_data
=
reinterpret_cast
<
const
float
*>
(
ort_
.
GetTensorData
<
float
>
(
filter
));
OrtTensorDimensions
input_dims
(
ort_
,
input
);
OrtTensorDimensions
filter_dims
(
ort_
,
filter
);
int64_t
batch_size
=
input_dims
[
0
];
int64_t
in_channels
=
input_dims
[
1
];
int64_t
in_height
=
input_dims
[
2
];
int64_t
in_width
=
input_dims
[
3
];
int64_t
out_channels
=
filter_dims
[
0
];
int64_t
kernel_height
=
filter_dims
[
2
];
int64_t
kernel_width
=
filter_dims
[
3
];
// get output memory
int64_t
out_height
=
floor
((
in_height
+
2
*
padding_height
-
dilation_height
*
(
kernel_height
-
1
)
-
1
)
/
stride_height
+
1
);
int64_t
out_width
=
floor
(
(
in_width
+
2
*
padding_width
-
dilation_width
*
(
kernel_width
-
1
)
-
1
)
/
stride_width
+
1
);
std
::
vector
<
int64_t
>
output_dims
=
{
batch_size
,
out_channels
,
out_height
,
out_width
};
OrtValue
*
output
=
ort_
.
KernelContext_GetOutput
(
context
,
0
,
output_dims
.
data
(),
output_dims
.
size
());
float
*
out_ptr
=
ort_
.
GetTensorMutableData
<
float
>
(
output
);
// allocate tmp memory
int64_t
column_len
=
(
in_channels
/
group
)
*
kernel_height
*
kernel_width
*
out_height
*
out_width
;
float
*
columns
=
(
float
*
)
allocator_
.
Alloc
(
sizeof
(
float
)
*
column_len
);
deformable_conv_forward
(
input_data
,
offset_data
,
filter_data
,
batch_size
,
in_channels
,
in_height
,
in_width
,
out_channels
,
out_height
,
out_width
,
group
,
deformable_group
,
in_channels
,
out_channels
,
kernel_height
,
kernel_width
,
stride_height
,
stride_width
,
padding_height
,
padding_width
,
dilation_height
,
dilation_width
,
columns
,
out_ptr
);
}
mmcv/ops/csrc/onnxruntime/cpu/gridSample.cpp
0 → 100644
View file @
6f3c5f1c
// Copyright (c) OpenMMLab. All rights reserved
#include <cmath>
#include "../ort_mmcv_utils.h"
#include "grid_sample.h"
#define MIN(a, b) (((a) < (b)) ? (a) : (b))
#define MAX(a, b) (((a) < (b)) ? (b) : (a))
#define CLIP_COORDINATES(in, out, clip_limit) \
out = MIN((clip_limit - 1), MAX(in, 0))
// modified from
// https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/native/GridSampler.cpp
GridSampleKernel
::
GridSampleKernel
(
OrtApi
api
,
const
OrtKernelInfo
*
info
)
:
api_
(
api
),
ort_
(
api_
),
info_
(
info
)
{
align_corners_
=
ort_
.
KernelInfoGetAttribute
<
int64_t
>
(
info
,
"align_corners"
);
interpolation_mode_
=
ort_
.
KernelInfoGetAttribute
<
int64_t
>
(
info
,
"interpolation_mode"
);
padding_mode_
=
ort_
.
KernelInfoGetAttribute
<
int64_t
>
(
info
,
"padding_mode"
);
allocator_
=
Ort
::
AllocatorWithDefaultOptions
();
}
enum
GridSamplerInterpolation
{
Bilinear
=
0
,
Nearest
=
1
,
Bicubic
=
2
};
enum
GridSamplerPadding
{
Zeros
=
0
,
Border
=
1
,
Reflection
=
2
};
template
<
typename
scalar_t
>
static
inline
scalar_t
grid_sampler_unnormalize
(
scalar_t
coord
,
int64_t
size
,
bool
align_corners
)
{
if
(
align_corners
)
{
return
((
coord
+
1
)
/
2
)
*
(
size
-
1
);
}
else
{
return
((
coord
+
1
)
*
size
-
1
)
/
2
;
}
}
// Clips coordinates to between 0 and clip_limit - 1
template
<
typename
scalar_t
>
static
inline
scalar_t
clip_coordinates
(
scalar_t
in
,
int64_t
clip_limit
)
{
return
std
::
min
(
static_cast
<
scalar_t
>
(
clip_limit
-
1
),
std
::
max
(
in
,
static_cast
<
scalar_t
>
(
0
)));
}
// Reflects coordinates until they fall between low and high (inclusive).
// The bounds are passed as twice their value so that half-integer values
// can be represented as ints.
template
<
typename
scalar_t
>
static
inline
scalar_t
reflect_coordinates
(
scalar_t
in
,
int64_t
twice_low
,
int64_t
twice_high
)
{
if
(
twice_low
==
twice_high
)
{
return
static_cast
<
scalar_t
>
(
0
);
}
scalar_t
min
=
static_cast
<
scalar_t
>
(
twice_low
)
/
2
;
scalar_t
span
=
static_cast
<
scalar_t
>
(
twice_high
-
twice_low
)
/
2
;
in
=
std
::
fabs
(
in
-
min
);
// `fmod` returns same sign as `in`, which is positive after the `fabs` above.
scalar_t
extra
=
std
::
fmod
(
in
,
span
);
int
flips
=
static_cast
<
int
>
(
std
::
floor
(
in
/
span
));
if
(
flips
%
2
==
0
)
{
return
extra
+
min
;
}
else
{
return
span
-
extra
+
min
;
}
}
template
<
typename
scalar_t
>
static
inline
scalar_t
compute_coordinates
(
scalar_t
coord
,
int64_t
size
,
int64_t
padding_mode
,
bool
align_corners
)
{
if
(
padding_mode
==
GridSamplerPadding
::
Border
)
{
coord
=
clip_coordinates
(
coord
,
size
);
}
else
if
(
padding_mode
==
GridSamplerPadding
::
Reflection
)
{
if
(
align_corners
)
{
coord
=
reflect_coordinates
(
coord
,
0
,
2
*
(
size
-
1
));
}
else
{
coord
=
reflect_coordinates
(
coord
,
-
1
,
2
*
size
-
1
);
}
coord
=
clip_coordinates
(
coord
,
size
);
}
return
coord
;
}
// Computes the pixel source index value for a grid coordinate
template
<
typename
scalar_t
>
static
inline
scalar_t
grid_sampler_compute_source_index
(
scalar_t
coord
,
int64_t
size
,
int64_t
padding_mode
,
bool
align_corners
)
{
coord
=
grid_sampler_unnormalize
(
coord
,
size
,
align_corners
);
coord
=
compute_coordinates
(
coord
,
size
,
padding_mode
,
align_corners
);
return
coord
;
}
static
inline
bool
within_bounds_2d
(
int64_t
h
,
int64_t
w
,
int64_t
H
,
int64_t
W
)
{
return
h
>=
0
&&
h
<
H
&&
w
>=
0
&&
w
<
W
;
}
template
<
typename
scalar_t
>
static
inline
scalar_t
get_value_bounded
(
const
scalar_t
*
data
,
scalar_t
x
,
scalar_t
y
,
int64_t
W
,
int64_t
H
,
int64_t
sW
,
int64_t
sH
,
int64_t
padding_mode
,
bool
align_corners
)
{
x
=
compute_coordinates
(
x
,
W
,
padding_mode
,
align_corners
);
y
=
compute_coordinates
(
y
,
H
,
padding_mode
,
align_corners
);
int64_t
ix
=
static_cast
<
int64_t
>
(
x
);
int64_t
iy
=
static_cast
<
int64_t
>
(
y
);
if
(
within_bounds_2d
(
iy
,
ix
,
H
,
W
))
{
return
data
[
iy
*
sH
+
ix
*
sW
];
}
return
static_cast
<
scalar_t
>
(
0
);
}
template
<
typename
scalar_t
>
static
inline
scalar_t
cubic_convolution1
(
scalar_t
x
,
scalar_t
A
)
{
return
((
A
+
2
)
*
x
-
(
A
+
3
))
*
x
*
x
+
1
;
}
template
<
typename
scalar_t
>
static
inline
scalar_t
cubic_convolution2
(
scalar_t
x
,
scalar_t
A
)
{
return
((
A
*
x
-
5
*
A
)
*
x
+
8
*
A
)
*
x
-
4
*
A
;
}
template
<
typename
scalar_t
>
static
inline
void
get_cubic_upsample_coefficients
(
scalar_t
coeffs
[
4
],
scalar_t
t
)
{
scalar_t
A
=
-
0.75
;
scalar_t
x1
=
t
;
coeffs
[
0
]
=
cubic_convolution2
<
scalar_t
>
(
x1
+
1.0
,
A
);
coeffs
[
1
]
=
cubic_convolution1
<
scalar_t
>
(
x1
,
A
);
// opposite coefficients
scalar_t
x2
=
1.0
-
t
;
coeffs
[
2
]
=
cubic_convolution1
<
scalar_t
>
(
x2
,
A
);
coeffs
[
3
]
=
cubic_convolution2
<
scalar_t
>
(
x2
+
1.0
,
A
);
}
template
<
typename
scalar_t
>
static
inline
scalar_t
cubic_interp1d
(
scalar_t
x0
,
scalar_t
x1
,
scalar_t
x2
,
scalar_t
x3
,
scalar_t
t
)
{
scalar_t
coeffs
[
4
];
get_cubic_upsample_coefficients
<
scalar_t
>
(
coeffs
,
t
);
return
x0
*
coeffs
[
0
]
+
x1
*
coeffs
[
1
]
+
x2
*
coeffs
[
2
]
+
x3
*
coeffs
[
3
];
}
void
GridSampleKernel
::
Compute
(
OrtKernelContext
*
context
)
{
const
bool
align_corners
=
align_corners_
;
const
int64_t
padding_mode
=
padding_mode_
;
const
int64_t
interpolation_mode
=
interpolation_mode_
;
const
OrtValue
*
input
=
ort_
.
KernelContext_GetInput
(
context
,
0
);
const
float
*
input_data
=
reinterpret_cast
<
const
float
*>
(
ort_
.
GetTensorData
<
float
>
(
input
));
const
OrtValue
*
grid
=
ort_
.
KernelContext_GetInput
(
context
,
1
);
const
float
*
grid_data
=
reinterpret_cast
<
const
float
*>
(
ort_
.
GetTensorData
<
float
>
(
grid
));
OrtTensorDimensions
input_dims
(
ort_
,
input
);
OrtTensorDimensions
grid_dims
(
ort_
,
grid
);
int64_t
N
=
input_dims
[
0
];
int64_t
C
=
input_dims
[
1
];
int64_t
inp_H
=
input_dims
[
2
];
int64_t
inp_W
=
input_dims
[
3
];
int64_t
out_H
=
grid_dims
[
1
];
int64_t
out_W
=
grid_dims
[
2
];
std
::
vector
<
int64_t
>
output_dims
=
{
N
,
C
,
out_H
,
out_W
};
OrtValue
*
output
=
ort_
.
KernelContext_GetOutput
(
context
,
0
,
output_dims
.
data
(),
output_dims
.
size
());
float
*
out_ptr
=
ort_
.
GetTensorMutableData
<
float
>
(
output
);
int64_t
inp_sN
=
input_dims
[
1
]
*
input_dims
[
2
]
*
input_dims
[
3
];
int64_t
inp_sC
=
input_dims
[
2
]
*
input_dims
[
3
];
int64_t
inp_sH
=
input_dims
[
3
];
int64_t
inp_sW
=
1
;
int64_t
grid_sN
=
grid_dims
[
1
]
*
grid_dims
[
2
]
*
grid_dims
[
3
];
int64_t
grid_sH
=
grid_dims
[
2
]
*
grid_dims
[
3
];
int64_t
grid_sW
=
grid_dims
[
3
];
int64_t
grid_sCoor
=
1
;
int64_t
out_sN
=
output_dims
[
1
]
*
output_dims
[
2
]
*
output_dims
[
3
];
int64_t
out_sC
=
output_dims
[
2
]
*
output_dims
[
3
];
int64_t
out_sH
=
output_dims
[
3
];
int64_t
out_sW
=
1
;
// loop over each output pixel
for
(
int64_t
n
=
0
;
n
<
N
;
++
n
)
{
const
float
*
grid_ptr_N
=
grid_data
+
n
*
grid_sN
;
const
float
*
inp_ptr_N
=
input_data
+
n
*
inp_sN
;
for
(
int64_t
h
=
0
;
h
<
out_H
;
++
h
)
{
for
(
int64_t
w
=
0
;
w
<
out_W
;
++
w
)
{
const
float
*
grid_ptr_NHW
=
grid_ptr_N
+
h
*
grid_sH
+
w
*
grid_sW
;
float
x
=
*
grid_ptr_NHW
;
float
y
=
grid_ptr_NHW
[
grid_sCoor
];
float
ix
=
grid_sampler_compute_source_index
(
x
,
inp_W
,
padding_mode
,
align_corners
);
float
iy
=
grid_sampler_compute_source_index
(
y
,
inp_H
,
padding_mode
,
align_corners
);
if
(
interpolation_mode
==
GridSamplerInterpolation
::
Bilinear
)
{
// get corner pixel values from (x, y)
// for 4d, we use north-east-south-west
int64_t
ix_nw
=
static_cast
<
int64_t
>
(
std
::
floor
(
ix
));
int64_t
iy_nw
=
static_cast
<
int64_t
>
(
std
::
floor
(
iy
));
int64_t
ix_ne
=
ix_nw
+
1
;
int64_t
iy_ne
=
iy_nw
;
int64_t
ix_sw
=
ix_nw
;
int64_t
iy_sw
=
iy_nw
+
1
;
int64_t
ix_se
=
ix_nw
+
1
;
int64_t
iy_se
=
iy_nw
+
1
;
// get surfaces to each neighbor:
float
nw
=
(
ix_se
-
ix
)
*
(
iy_se
-
iy
);
float
ne
=
(
ix
-
ix_sw
)
*
(
iy_sw
-
iy
);
float
sw
=
(
ix_ne
-
ix
)
*
(
iy
-
iy_ne
);
float
se
=
(
ix
-
ix_nw
)
*
(
iy
-
iy_nw
);
// calculate bilinear weighted pixel value and set output pixel
const
float
*
inp_ptr_NC
=
inp_ptr_N
;
float
*
out_ptr_NCHW
=
out_ptr
+
n
*
out_sN
+
h
*
out_sH
+
w
*
out_sW
;
for
(
int64_t
c
=
0
;
c
<
C
;
++
c
,
out_ptr_NCHW
+=
out_sC
,
inp_ptr_NC
+=
inp_sC
)
{
auto
res
=
static_cast
<
float
>
(
0
);
if
(
within_bounds_2d
(
iy_nw
,
ix_nw
,
inp_H
,
inp_W
))
{
res
+=
inp_ptr_NC
[
iy_nw
*
inp_sH
+
ix_nw
*
inp_sW
]
*
nw
;
}
if
(
within_bounds_2d
(
iy_ne
,
ix_ne
,
inp_H
,
inp_W
))
{
res
+=
inp_ptr_NC
[
iy_ne
*
inp_sH
+
ix_ne
*
inp_sW
]
*
ne
;
}
if
(
within_bounds_2d
(
iy_sw
,
ix_sw
,
inp_H
,
inp_W
))
{
res
+=
inp_ptr_NC
[
iy_sw
*
inp_sH
+
ix_sw
*
inp_sW
]
*
sw
;
}
if
(
within_bounds_2d
(
iy_se
,
ix_se
,
inp_H
,
inp_W
))
{
res
+=
inp_ptr_NC
[
iy_se
*
inp_sH
+
ix_se
*
inp_sW
]
*
se
;
}
*
out_ptr_NCHW
=
res
;
}
}
else
if
(
interpolation_mode
==
GridSamplerInterpolation
::
Nearest
)
{
int64_t
ix_nearest
=
static_cast
<
int64_t
>
(
std
::
nearbyint
(
ix
));
int64_t
iy_nearest
=
static_cast
<
int64_t
>
(
std
::
nearbyint
(
iy
));
// assign nearest neighbor pixel value to output pixel
float
*
out_ptr_NCHW
=
out_ptr
+
n
*
out_sN
+
h
*
out_sH
+
w
*
out_sW
;
const
float
*
inp_ptr_NC
=
inp_ptr_N
;
for
(
int64_t
c
=
0
;
c
<
C
;
++
c
,
out_ptr_NCHW
+=
out_sC
,
inp_ptr_NC
+=
inp_sC
)
{
if
(
within_bounds_2d
(
iy_nearest
,
ix_nearest
,
inp_H
,
inp_W
))
{
*
out_ptr_NCHW
=
inp_ptr_NC
[
iy_nearest
*
inp_sH
+
ix_nearest
*
inp_sW
];
}
else
{
*
out_ptr_NCHW
=
static_cast
<
float
>
(
0
);
}
}
}
else
if
(
interpolation_mode
==
GridSamplerInterpolation
::
Bicubic
)
{
// grid_sampler_compute_source_index will "clip the value" of idx
// depends on the padding,
// which would cause calculation to be wrong,
// for example x = -0.1 -> ix = 0 for zero padding, but in bicubic ix
// = floor(x) = -1
// There would be more problem in reflection padding, since the -1 and
// +1 direction is not fixed in boundary condition
ix
=
grid_sampler_unnormalize
(
x
,
inp_W
,
align_corners
);
iy
=
grid_sampler_unnormalize
(
y
,
inp_H
,
align_corners
);
float
ix_nw
=
std
::
floor
(
ix
);
float
iy_nw
=
std
::
floor
(
iy
);
const
float
tx
=
ix
-
ix_nw
;
const
float
ty
=
iy
-
iy_nw
;
const
float
*
inp_ptr_NC
=
inp_ptr_N
;
float
*
out_ptr_NCHW
=
out_ptr
+
n
*
out_sN
+
h
*
out_sH
+
w
*
out_sW
;
for
(
int64_t
c
=
0
;
c
<
C
;
++
c
,
out_ptr_NCHW
+=
out_sC
,
inp_ptr_NC
+=
inp_sC
)
{
float
coefficients
[
4
];
// Interpolate 4 values in the x direction
for
(
int64_t
i
=
0
;
i
<
4
;
++
i
)
{
coefficients
[
i
]
=
cubic_interp1d
<
float
>
(
get_value_bounded
<
float
>
(
inp_ptr_NC
,
ix_nw
-
1
,
iy_nw
-
1
+
i
,
inp_W
,
inp_H
,
inp_sW
,
inp_sH
,
padding_mode
,
align_corners
),
get_value_bounded
<
float
>
(
inp_ptr_NC
,
ix_nw
+
0
,
iy_nw
-
1
+
i
,
inp_W
,
inp_H
,
inp_sW
,
inp_sH
,
padding_mode
,
align_corners
),
get_value_bounded
<
float
>
(
inp_ptr_NC
,
ix_nw
+
1
,
iy_nw
-
1
+
i
,
inp_W
,
inp_H
,
inp_sW
,
inp_sH
,
padding_mode
,
align_corners
),
get_value_bounded
<
float
>
(
inp_ptr_NC
,
ix_nw
+
2
,
iy_nw
-
1
+
i
,
inp_W
,
inp_H
,
inp_sW
,
inp_sH
,
padding_mode
,
align_corners
),
tx
);
}
// Interpolate in the y direction
*
out_ptr_NCHW
=
cubic_interp1d
<
float
>
(
coefficients
[
0
],
coefficients
[
1
],
coefficients
[
2
],
coefficients
[
3
],
ty
);
}
}
}
}
}
}
mmcv/ops/csrc/onnxruntime/cpu/modulated_deform_conv.cpp
0 → 100644
View file @
6f3c5f1c
// Copyright (c) OpenMMLab. All rights reserved
#include "modulated_deform_conv.h"
#include <cmath>
#include <vector>
#include "../ort_mmcv_utils.h"
float
bilinear_interpolate_2d
(
const
float
*
src
,
const
int64_t
src_h
,
const
int64_t
src_w
,
const
float
h
,
const
float
w
)
{
if
(
h
<=
-
1
||
src_h
<=
h
||
w
<=
-
1
||
src_w
<=
w
)
{
return
0
;
}
int64_t
h_low
=
floor
(
h
);
int64_t
w_low
=
floor
(
w
);
int64_t
h_high
=
h_low
+
1
;
int64_t
w_high
=
w_low
+
1
;
float
lh
=
h
-
h_low
;
float
lw
=
w
-
w_low
;
float
hh
=
1
-
lh
;
float
hw
=
1
-
lw
;
float
v1
=
0
;
if
(
h_low
>=
0
&&
w_low
>=
0
)
v1
=
src
[
h_low
*
src_w
+
w_low
];
float
v2
=
0
;
if
(
h_low
>=
0
&&
w_high
<=
src_w
-
1
)
v2
=
src
[
h_low
*
src_w
+
w_high
];
float
v3
=
0
;
if
(
h_high
<=
src_h
-
1
&&
w_low
>=
0
)
v3
=
src
[
h_high
*
src_w
+
w_low
];
float
v4
=
0
;
if
(
h_high
<=
src_h
-
1
&&
w_high
<=
src_w
-
1
)
v4
=
src
[
h_high
*
src_w
+
w_high
];
float
w1
=
hh
*
hw
,
w2
=
hh
*
lw
,
w3
=
lh
*
hw
,
w4
=
lh
*
lw
;
float
val
=
(
w1
*
v1
+
w2
*
v2
+
w3
*
v3
+
w4
*
v4
);
return
val
;
}
// output: (channels * kernel_h * kernel_w, dst_h * dst_w)
void
deformable_im2col_2d
(
const
float
*
input
,
const
float
*
offset
,
const
float
*
mask
,
const
int64_t
src_h
,
const
int64_t
src_w
,
const
int64_t
kernel_h
,
const
int64_t
kernel_w
,
const
int64_t
pad_h
,
const
int64_t
pad_w
,
const
int64_t
stride_h
,
const
int64_t
stride_w
,
const
int64_t
dilation_h
,
const
int64_t
dilation_w
,
const
int64_t
channels
,
const
int64_t
offset_groups
,
const
int64_t
dst_h
,
const
int64_t
dst_w
,
const
bool
use_mask
,
float
*
columns
)
{
const
int64_t
workload
=
channels
*
dst_h
*
dst_w
;
for
(
int64_t
index
=
0
;
index
!=
workload
;
++
index
)
{
const
int64_t
ow
=
index
%
dst_w
;
const
int64_t
oh
=
(
index
/
dst_w
)
%
dst_h
;
const
int64_t
ic
=
index
/
(
dst_w
*
dst_h
);
const
int64_t
oc
=
ic
*
kernel_h
*
kernel_w
;
int64_t
c_per_offset_grp
=
channels
/
offset_groups
;
const
int64_t
grp_idx
=
ic
/
c_per_offset_grp
;
auto
columns_ptr
=
columns
+
(
oc
*
(
dst_h
*
dst_w
)
+
oh
*
dst_w
+
ow
);
auto
input_ptr
=
input
+
ic
*
(
src_h
*
src_w
);
auto
offset_ptr
=
offset
+
grp_idx
*
2
*
kernel_h
*
kernel_w
*
dst_h
*
dst_w
;
auto
mask_ptr
=
mask
;
if
(
use_mask
)
{
mask_ptr
+=
grp_idx
*
kernel_h
*
kernel_w
*
dst_h
*
dst_w
;
}
for
(
int64_t
kh
=
0
;
kh
<
kernel_h
;
++
kh
)
{
for
(
int64_t
kw
=
0
;
kw
<
kernel_w
;
++
kw
)
{
const
int64_t
mask_idx
=
kh
*
kernel_w
+
kw
;
const
int64_t
offset_idx
=
2
*
mask_idx
;
float
mask_value
=
1
;
if
(
use_mask
)
{
mask_value
=
mask_ptr
[
mask_idx
*
(
dst_h
*
dst_w
)
+
oh
*
dst_w
+
ow
];
}
const
float
offset_h
=
offset_ptr
[
offset_idx
*
(
dst_h
*
dst_w
)
+
oh
*
dst_w
+
ow
];
const
float
offset_w
=
offset_ptr
[(
offset_idx
+
1
)
*
(
dst_h
*
dst_w
)
+
oh
*
dst_w
+
ow
];
const
float
ih
=
(
oh
*
stride_h
-
pad_h
)
+
kh
*
dilation_h
+
offset_h
;
const
float
iw
=
(
ow
*
stride_w
-
pad_w
)
+
kw
*
dilation_w
+
offset_w
;
*
columns_ptr
=
mask_value
*
bilinear_interpolate_2d
(
input_ptr
,
src_h
,
src_w
,
ih
,
iw
);
columns_ptr
+=
dst_h
*
dst_w
;
}
}
}
}
void
gemm_ref_fp32
(
const
float
*
A
,
const
float
*
B
,
const
float
*
V
,
const
float
*
H
,
const
int32_t
trans_A
,
const
int32_t
trans_B
,
const
int32_t
M
,
const
int32_t
N
,
const
int32_t
K
,
const
float
alpha
,
const
float
beta
,
float
*
Y
)
{
if
(
!
trans_A
&&
!
trans_B
)
{
// MK, KN; NN
for
(
int64_t
m
=
0
;
m
<
M
;
++
m
)
{
for
(
int64_t
n
=
0
;
n
<
N
;
++
n
)
{
float
y
=
0.0
f
;
for
(
int64_t
k
=
0
;
k
<
K
;
++
k
)
{
y
+=
A
[
m
*
K
+
k
]
*
B
[
k
*
N
+
n
];
}
y
*=
alpha
;
if
(
V
)
y
+=
beta
*
V
[
n
];
if
(
H
)
y
+=
beta
*
H
[
m
*
N
+
n
];
Y
[
m
*
N
+
n
]
=
y
;
}
}
}
if
(
trans_A
&&
!
trans_B
)
{
// KM, KN; TN
for
(
int64_t
m
=
0
;
m
<
M
;
++
m
)
{
for
(
int64_t
n
=
0
;
n
<
N
;
++
n
)
{
float
y
=
0.0
f
;
for
(
int64_t
k
=
0
;
k
<
K
;
++
k
)
{
y
+=
A
[
k
*
M
+
m
]
*
B
[
k
*
N
+
n
];
}
y
*=
alpha
;
if
(
V
)
y
+=
beta
*
V
[
n
];
if
(
H
)
y
+=
beta
*
H
[
m
*
N
+
n
];
Y
[
m
*
N
+
n
]
=
y
;
}
}
}
if
(
trans_A
&&
trans_B
)
{
// KM, NK; TT
for
(
int64_t
m
=
0
;
m
<
M
;
++
m
)
{
for
(
int64_t
n
=
0
;
n
<
N
;
++
n
)
{
float
y
=
0.0
f
;
for
(
int64_t
k
=
0
;
k
<
K
;
++
k
)
{
y
+=
A
[
k
*
M
+
m
]
*
B
[
n
*
K
+
k
];
}
y
*=
alpha
;
if
(
V
)
y
+=
beta
*
V
[
n
];
if
(
H
)
y
+=
beta
*
H
[
m
*
N
+
n
];
Y
[
m
*
N
+
n
]
=
y
;
}
}
}
if
(
!
trans_A
&&
trans_B
)
{
// MK, NK; NT
for
(
int64_t
m
=
0
;
m
<
M
;
++
m
)
{
for
(
int64_t
n
=
0
;
n
<
N
;
++
n
)
{
float
y
=
0.0
f
;
for
(
int64_t
k
=
0
;
k
<
K
;
++
k
)
{
y
+=
A
[
m
*
K
+
k
]
*
B
[
n
*
K
+
k
];
}
y
*=
alpha
;
if
(
V
)
y
+=
beta
*
V
[
n
];
if
(
H
)
y
+=
beta
*
H
[
m
*
N
+
n
];
Y
[
m
*
N
+
n
]
=
y
;
}
}
}
}
void
deformable_conv2d_ref_fp32
(
const
float
*
src
,
const
float
*
offset
,
const
float
*
mask
,
const
float
*
filter
,
const
float
*
bias
,
const
int64_t
batch
,
const
int64_t
src_c
,
const
int64_t
src_h
,
const
int64_t
src_w
,
const
int64_t
dst_c
,
const
int64_t
dst_h
,
const
int64_t
dst_w
,
const
int64_t
group
,
const
int64_t
offset_group
,
const
int64_t
channels
,
const
int64_t
num_output
,
const
int64_t
kernel_h
,
const
int64_t
kernel_w
,
const
int64_t
stride_h
,
const
int64_t
stride_w
,
const
int64_t
pad_h
,
const
int64_t
pad_w
,
const
int64_t
dilation_h
,
const
int64_t
dilation_w
,
float
*
columns
,
float
*
dst
)
{
const
int64_t
ic_per_gp
=
channels
/
group
;
const
int64_t
oc_per_gp
=
num_output
/
group
;
for
(
int64_t
b
=
0
;
b
<
batch
;
++
b
)
{
for
(
int64_t
g
=
0
;
g
<
group
;
++
g
)
{
deformable_im2col_2d
(
src
+
b
*
src_c
*
src_h
*
src_w
+
g
*
ic_per_gp
*
src_h
*
src_w
,
offset
+
b
*
offset_group
*
2
*
kernel_h
*
kernel_w
*
dst_h
*
dst_w
,
mask
+
b
*
offset_group
*
kernel_h
*
kernel_w
*
dst_h
*
dst_w
,
src_h
,
src_w
,
kernel_h
,
kernel_w
,
pad_h
,
pad_w
,
stride_h
,
stride_w
,
dilation_h
,
dilation_w
,
ic_per_gp
,
offset_group
,
dst_h
,
dst_w
,
mask
!=
nullptr
,
columns
);
float
*
dst_ptr
=
dst
+
b
*
dst_c
*
dst_h
*
dst_w
+
g
*
oc_per_gp
*
dst_h
*
dst_w
;
if
(
bias
!=
nullptr
)
{
const
float
*
bias_ptr
=
bias
+
g
*
oc_per_gp
;
for
(
int64_t
oc
=
0
;
oc
<
oc_per_gp
;
++
oc
)
{
for
(
int64_t
hw
=
0
;
hw
<
dst_h
*
dst_w
;
++
hw
)
{
dst_ptr
[
oc
*
dst_h
*
dst_w
+
hw
]
=
bias_ptr
[
oc
];
}
}
}
else
{
memset
(
dst_ptr
,
0.0
f
,
sizeof
(
float
)
*
oc_per_gp
*
dst_h
*
dst_w
);
}
gemm_ref_fp32
(
filter
+
g
*
oc_per_gp
*
ic_per_gp
*
kernel_h
*
kernel_w
,
columns
,
nullptr
,
dst_ptr
,
0
,
0
,
oc_per_gp
,
dst_h
*
dst_w
,
ic_per_gp
*
kernel_h
*
kernel_w
,
1.0
f
,
1.0
f
,
dst_ptr
);
}
}
}
MMCVModulatedDeformConvKernel
::
MMCVModulatedDeformConvKernel
(
OrtApi
api
,
const
OrtKernelInfo
*
info
)
:
api_
(
api
),
ort_
(
api_
),
info_
(
info
)
{
std
::
vector
<
int64_t
>
stride
=
ort_
.
KernelInfoGetAttribute
<
std
::
vector
<
int64_t
>>
(
info
,
"stride"
);
stride_height_
=
stride
[
0
];
stride_width_
=
stride
[
1
];
std
::
vector
<
int64_t
>
padding
=
ort_
.
KernelInfoGetAttribute
<
std
::
vector
<
int64_t
>>
(
info
,
"padding"
);
padding_height_
=
padding
[
0
];
padding_width_
=
padding
[
1
];
std
::
vector
<
int64_t
>
dilation
=
ort_
.
KernelInfoGetAttribute
<
std
::
vector
<
int64_t
>>
(
info
,
"dilation"
);
dilation_height_
=
dilation
[
0
];
dilation_width_
=
dilation
[
1
];
deformable_group_
=
ort_
.
KernelInfoGetAttribute
<
int64_t
>
(
info
,
"deform_groups"
);
group_
=
ort_
.
KernelInfoGetAttribute
<
int64_t
>
(
info
,
"groups"
);
// create allocator
allocator_
=
Ort
::
AllocatorWithDefaultOptions
();
}
void
MMCVModulatedDeformConvKernel
::
Compute
(
OrtKernelContext
*
context
)
{
const
int64_t
stride_height
=
stride_height_
;
const
int64_t
stride_width
=
stride_width_
;
const
int64_t
padding_height
=
padding_height_
;
const
int64_t
padding_width
=
padding_width_
;
const
int64_t
dilation_height
=
dilation_height_
;
const
int64_t
dilation_width
=
dilation_width_
;
const
int64_t
deformable_group
=
deformable_group_
;
const
int64_t
group
=
group_
;
const
OrtValue
*
input
=
ort_
.
KernelContext_GetInput
(
context
,
0
);
const
float
*
input_data
=
reinterpret_cast
<
const
float
*>
(
ort_
.
GetTensorData
<
float
>
(
input
));
const
OrtValue
*
offset
=
ort_
.
KernelContext_GetInput
(
context
,
1
);
const
float
*
offset_data
=
reinterpret_cast
<
const
float
*>
(
ort_
.
GetTensorData
<
float
>
(
offset
));
const
OrtValue
*
mask
=
ort_
.
KernelContext_GetInput
(
context
,
2
);
const
float
*
mask_data
=
reinterpret_cast
<
const
float
*>
(
ort_
.
GetTensorData
<
float
>
(
mask
));
const
OrtValue
*
filter
=
ort_
.
KernelContext_GetInput
(
context
,
3
);
const
float
*
filter_data
=
reinterpret_cast
<
const
float
*>
(
ort_
.
GetTensorData
<
float
>
(
filter
));
const
OrtValue
*
bias
=
ort_
.
KernelContext_GetInput
(
context
,
4
);
const
float
*
bias_data
=
(
bias
!=
nullptr
)
?
reinterpret_cast
<
const
float
*>
(
ort_
.
GetTensorData
<
float
>
(
bias
))
:
nullptr
;
// const float *bias_data = nullptr;
OrtTensorDimensions
input_dims
(
ort_
,
input
);
OrtTensorDimensions
filter_dims
(
ort_
,
filter
);
int64_t
batch
=
input_dims
[
0
];
int64_t
channels
=
input_dims
[
1
];
int64_t
in_height
=
input_dims
[
2
];
int64_t
in_width
=
input_dims
[
3
];
int64_t
num_output
=
filter_dims
[
0
];
int64_t
kernel_height
=
filter_dims
[
2
];
int64_t
kernel_width
=
filter_dims
[
3
];
// get output memory
int64_t
out_height
=
floor
((
in_height
+
2
*
padding_height
-
dilation_height
*
(
kernel_height
-
1
)
-
1
)
/
stride_height
+
1
);
int64_t
out_width
=
floor
(
(
in_width
+
2
*
padding_width
-
dilation_width
*
(
kernel_width
-
1
)
-
1
)
/
stride_width
+
1
);
std
::
vector
<
int64_t
>
output_dims
=
{
batch
,
num_output
,
out_height
,
out_width
};
OrtValue
*
output
=
ort_
.
KernelContext_GetOutput
(
context
,
0
,
output_dims
.
data
(),
output_dims
.
size
());
float
*
out_ptr
=
ort_
.
GetTensorMutableData
<
float
>
(
output
);
// allocate tmp memory
int64_t
column_len
=
(
channels
/
group
)
*
kernel_height
*
kernel_width
*
out_height
*
out_width
;
float
*
columns
=
(
float
*
)
allocator_
.
Alloc
(
sizeof
(
float
)
*
column_len
);
deformable_conv2d_ref_fp32
(
input_data
,
offset_data
,
mask_data
,
filter_data
,
bias_data
,
batch
,
channels
,
in_height
,
in_width
,
num_output
,
out_height
,
out_width
,
group
,
deformable_group
,
channels
,
num_output
,
kernel_height
,
kernel_width
,
stride_height
,
stride_width
,
padding_height
,
padding_width
,
dilation_height
,
dilation_width
,
columns
,
out_ptr
);
}
mmcv/ops/csrc/onnxruntime/cpu/nms.cpp
0 → 100644
View file @
6f3c5f1c
// Copyright (c) OpenMMLab. All rights reserved
#include "nms.h"
#include <assert.h>
#include <algorithm>
#include <cmath>
#include <iostream>
#include <iterator>
#include <numeric> // std::iota
#include <vector>
#include "../ort_mmcv_utils.h"
NmsKernel
::
NmsKernel
(
OrtApi
api
,
const
OrtKernelInfo
*
info
)
:
api_
(
api
),
ort_
(
api_
),
info_
(
info
)
{
iou_threshold_
=
ort_
.
KernelInfoGetAttribute
<
float
>
(
info
,
"iou_threshold"
);
offset_
=
ort_
.
KernelInfoGetAttribute
<
int64_t
>
(
info
,
"offset"
);
// create allocator
allocator_
=
Ort
::
AllocatorWithDefaultOptions
();
}
void
NmsKernel
::
Compute
(
OrtKernelContext
*
context
)
{
const
float
iou_threshold
=
iou_threshold_
;
const
int64_t
offset
=
offset_
;
const
OrtValue
*
boxes
=
ort_
.
KernelContext_GetInput
(
context
,
0
);
const
float
*
boxes_data
=
reinterpret_cast
<
const
float
*>
(
ort_
.
GetTensorData
<
float
>
(
boxes
));
const
OrtValue
*
scores
=
ort_
.
KernelContext_GetInput
(
context
,
1
);
const
float
*
scores_data
=
reinterpret_cast
<
const
float
*>
(
ort_
.
GetTensorData
<
float
>
(
scores
));
OrtTensorDimensions
boxes_dim
(
ort_
,
boxes
);
OrtTensorDimensions
scores_dim
(
ort_
,
scores
);
int64_t
nboxes
=
boxes_dim
[
0
];
assert
(
boxes_dim
[
1
]
==
4
);
// allocate tmp memory
float
*
tmp_boxes
=
(
float
*
)
allocator_
.
Alloc
(
sizeof
(
float
)
*
nboxes
*
4
);
float
*
sc
=
(
float
*
)
allocator_
.
Alloc
(
sizeof
(
float
)
*
nboxes
);
float
*
areas
=
(
float
*
)
allocator_
.
Alloc
(
sizeof
(
float
)
*
nboxes
);
bool
*
select
=
(
bool
*
)
allocator_
.
Alloc
(
sizeof
(
bool
)
*
nboxes
);
for
(
int64_t
i
=
0
;
i
<
nboxes
;
i
++
)
{
select
[
i
]
=
true
;
}
memcpy
(
tmp_boxes
,
boxes_data
,
sizeof
(
float
)
*
nboxes
*
4
);
memcpy
(
sc
,
scores_data
,
sizeof
(
float
)
*
nboxes
);
// sort scores
std
::
vector
<
float
>
tmp_sc
;
for
(
int
i
=
0
;
i
<
nboxes
;
i
++
)
{
tmp_sc
.
push_back
(
sc
[
i
]);
}
std
::
vector
<
int64_t
>
order
(
tmp_sc
.
size
());
std
::
iota
(
order
.
begin
(),
order
.
end
(),
0
);
std
::
sort
(
order
.
begin
(),
order
.
end
(),
[
&
tmp_sc
](
int64_t
id1
,
int64_t
id2
)
{
return
tmp_sc
[
id1
]
>
tmp_sc
[
id2
];
});
// area = (x2 - x1 + offset) * (y2 - y1 + offset)
for
(
int64_t
i
=
0
;
i
<
nboxes
;
i
++
)
{
areas
[
i
]
=
(
tmp_boxes
[
i
*
4
+
2
]
-
tmp_boxes
[
i
*
4
+
0
]
+
offset
)
*
(
tmp_boxes
[
i
*
4
+
3
]
-
tmp_boxes
[
i
*
4
+
1
]
+
offset
);
}
for
(
int64_t
_i
=
0
;
_i
<
nboxes
;
_i
++
)
{
if
(
select
[
_i
]
==
false
)
continue
;
auto
i
=
order
[
_i
];
auto
ix1
=
tmp_boxes
[
i
*
4
+
0
];
auto
iy1
=
tmp_boxes
[
i
*
4
+
1
];
auto
ix2
=
tmp_boxes
[
i
*
4
+
2
];
auto
iy2
=
tmp_boxes
[
i
*
4
+
3
];
auto
iarea
=
areas
[
i
];
for
(
int64_t
_j
=
_i
+
1
;
_j
<
nboxes
;
_j
++
)
{
if
(
select
[
_j
]
==
false
)
continue
;
auto
j
=
order
[
_j
];
auto
xx1
=
std
::
max
(
ix1
,
tmp_boxes
[
j
*
4
+
0
]);
auto
yy1
=
std
::
max
(
iy1
,
tmp_boxes
[
j
*
4
+
1
]);
auto
xx2
=
std
::
min
(
ix2
,
tmp_boxes
[
j
*
4
+
2
]);
auto
yy2
=
std
::
min
(
iy2
,
tmp_boxes
[
j
*
4
+
3
]);
auto
w
=
std
::
max
(
0.
f
,
xx2
-
xx1
+
offset
);
auto
h
=
std
::
max
(
0.
f
,
yy2
-
yy1
+
offset
);
auto
inter
=
w
*
h
;
auto
ovr
=
inter
/
(
iarea
+
areas
[
j
]
-
inter
);
if
(
ovr
>
iou_threshold
)
select
[
_j
]
=
false
;
}
}
std
::
vector
<
int64_t
>
res_order
;
for
(
int
i
=
0
;
i
<
nboxes
;
i
++
)
{
if
(
select
[
i
])
{
res_order
.
push_back
(
order
[
i
]);
}
}
std
::
vector
<
int64_t
>
inds_dims
({
res_order
.
size
()});
OrtValue
*
res
=
ort_
.
KernelContext_GetOutput
(
context
,
0
,
inds_dims
.
data
(),
inds_dims
.
size
());
int64_t
*
res_data
=
ort_
.
GetTensorMutableData
<
int64_t
>
(
res
);
memcpy
(
res_data
,
res_order
.
data
(),
sizeof
(
int64_t
)
*
res_order
.
size
());
}
Prev
1
…
11
12
13
14
15
16
17
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment