Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
6368be50
Commit
6368be50
authored
Jan 12, 2024
by
Jun Liu
Browse files
Merge branch 'amd-develop' into amd-master
parents
32806d5f
71d6ede7
Changes
56
Hide whitespace changes
Inline
Side-by-side
Showing
16 changed files
with
462 additions
and
173 deletions
+462
-173
library/src/tensor_operation_instance/gpu/CMakeLists.txt
library/src/tensor_operation_instance/gpu/CMakeLists.txt
+0
-1
library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_ab/xdl/device_grouped_conv3d_fwd_xdl_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp
...wd_xdl_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp
+36
-37
library/src/tensor_operation_instance/gpu/softmax/CMakeLists.txt
.../src/tensor_operation_instance/gpu/softmax/CMakeLists.txt
+1
-3
library/src/utility/CMakeLists.txt
library/src/utility/CMakeLists.txt
+6
-4
profiler/include/profiler/profile_transpose_impl.hpp
profiler/include/profiler/profile_transpose_impl.hpp
+4
-8
profiler/src/CMakeLists.txt
profiler/src/CMakeLists.txt
+3
-1
profiler/src/profile_transpose.cpp
profiler/src/profile_transpose.cpp
+112
-0
test/CMakeLists.txt
test/CMakeLists.txt
+5
-6
test/transpose/test_transpose.cpp
test/transpose/test_transpose.cpp
+21
-13
test/transpose/test_transpose_ut_cases.inc
test/transpose/test_transpose_ut_cases.inc
+0
-28
test/transpose/test_transpose_util.hpp
test/transpose/test_transpose_util.hpp
+0
-54
test/wrapper/CMakeLists.txt
test/wrapper/CMakeLists.txt
+4
-0
test/wrapper/test_copy.cpp
test/wrapper/test_copy.cpp
+129
-0
test/wrapper/test_layout.cpp
test/wrapper/test_layout.cpp
+2
-9
test/wrapper/test_partition.cpp
test/wrapper/test_partition.cpp
+119
-0
test/wrapper/test_tensor.cpp
test/wrapper/test_tensor.cpp
+20
-9
No files found.
library/src/tensor_operation_instance/gpu/CMakeLists.txt
View file @
6368be50
...
...
@@ -152,7 +152,6 @@ ENDFOREACH()
if
(
CK_DEVICE_OTHER_INSTANCES
)
add_library
(
device_other_operations STATIC
${
CK_DEVICE_OTHER_INSTANCES
}
)
add_library
(
composablekernels::device_other_operations ALIAS device_other_operations
)
target_compile_features
(
device_other_operations PUBLIC
)
set_target_properties
(
device_other_operations PROPERTIES POSITION_INDEPENDENT_CODE ON
)
target_include_directories
(
device_other_operations PUBLIC
$<INSTALL_INTERFACE:
${
CMAKE_INSTALL_INCLUDEDIR
}
/ck>
...
...
library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_ab/xdl/device_grouped_conv3d_fwd_xdl_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp
View file @
6368be50
...
...
@@ -9,43 +9,42 @@ namespace tensor_operation {
namespace
device
{
namespace
instance
{
// TODO: Workaround for https://ontrack-internal.amd.com/browse/SWDEV-435347
// void add_device_grouped_conv3d_fwd_xdl_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_bf16_instances(
// std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleABD<3,
// NDHWGC,
// GKZYXC,
// ck::Tuple<>,
// NDHWGK,
// ck::Tuple<BF16, BF16>,
// ck::Tuple<BF16, BF16>,
// ck::Tuple<>,
// BF16,
// ScaleAdd,
// ScaleAdd,
// PassThrough>>>& instances)
// {
// add_device_operation_instances(
// instances,
// device_grouped_conv_fwd_xdl_scaleadd_ab_bf16_instances<3,
// NDHWGC,
// GKZYXC,
// NDHWGK,
// ConvFwdDefault>{});
// add_device_operation_instances(
// instances,
// device_grouped_conv_fwd_xdl_scaleadd_ab_bf16_instances<3,
// NDHWGC,
// GKZYXC,
// NDHWGK,
// ConvFwd1x1P0>{});
// add_device_operation_instances(
// instances,
// device_grouped_conv_fwd_xdl_scaleadd_ab_bf16_instances<3,
// NDHWGC,
// GKZYXC,
// NDHWGK,
// ConvFwd1x1S1P0>{});
// }
void
add_device_grouped_conv3d_fwd_xdl_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_bf16_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvFwdMultipleABD
<
3
,
NDHWGC
,
GKZYXC
,
ck
::
Tuple
<>
,
NDHWGK
,
ck
::
Tuple
<
BF16
,
BF16
>
,
ck
::
Tuple
<
BF16
,
BF16
>
,
ck
::
Tuple
<>
,
BF16
,
ScaleAdd
,
ScaleAdd
,
PassThrough
>>>&
instances
)
{
add_device_operation_instances
(
instances
,
device_grouped_conv_fwd_xdl_scaleadd_ab_bf16_instances
<
3
,
NDHWGC
,
GKZYXC
,
NDHWGK
,
ConvFwdDefault
>
{});
add_device_operation_instances
(
instances
,
device_grouped_conv_fwd_xdl_scaleadd_ab_bf16_instances
<
3
,
NDHWGC
,
GKZYXC
,
NDHWGK
,
ConvFwd1x1P0
>
{});
add_device_operation_instances
(
instances
,
device_grouped_conv_fwd_xdl_scaleadd_ab_bf16_instances
<
3
,
NDHWGC
,
GKZYXC
,
NDHWGK
,
ConvFwd1x1S1P0
>
{});
}
}
// namespace instance
}
// namespace device
...
...
library/src/tensor_operation_instance/gpu/softmax/CMakeLists.txt
View file @
6368be50
set
(
DEVICE_SOFTMAX_INSTANCES
)
list
(
APPEND DEVICE_SOFTMAX_INSTANCES
add_instance_library
(
device_softmax_instance
device_softmax_f16_f16_instance_rank3_reduce1.cpp
device_softmax_f16_f16_instance_rank3_reduce2.cpp
device_softmax_f16_f16_instance_rank3_reduce3.cpp
...
...
@@ -14,4 +13,3 @@ list(APPEND DEVICE_SOFTMAX_INSTANCES
device_softmax_f32_f32_instance_rank4_reduce2.cpp
device_softmax_f32_f32_instance_rank4_reduce3.cpp
device_softmax_f32_f32_instance_rank4_reduce4.cpp
)
add_instance_library
(
device_softmax_instance
${
DEVICE_SOFTMAX_INSTANCES
}
)
library/src/utility/CMakeLists.txt
View file @
6368be50
## utility
set
(
UTILITY_SOURCE
add_library
(
utility STATIC
device_memory.cpp
host_tensor.cpp
convolution_parameter.cpp
)
add_library
(
utility STATIC
${
UTILITY_SOURCE
}
)
add_library
(
composable_kernel::utility ALIAS utility
)
set_target_properties
(
utility PROPERTIES POSITION_INDEPENDENT_CODE ON
)
target_compile_options
(
utility PRIVATE
${
CMAKE_COMPILER_WARNINGS
}
)
target_include_directories
(
utility PUBLIC
"$<INSTALL_INTERFACE:
${
CMAKE_INSTALL_INCLUDEDIR
}
/ck>"
"$<INSTALL_INTERFACE:
${
CMAKE_INSTALL_INCLUDEDIR
}
/ck/library/utility>"
)
if
(
WIN32
)
target_compile_definitions
(
utility PUBLIC NOMINMAX
)
endif
()
rocm_install
(
TARGETS utility
...
...
profiler/include/profiler/profile_transpose_impl.hpp
View file @
6368be50
...
...
@@ -25,7 +25,7 @@ namespace ck {
namespace
profiler
{
template
<
typename
HostTensorA
,
typename
HostTensorB
,
typename
Functor
>
void
host_elementwise4D
(
HostTensorB
&
B_n
c
hw
d
,
const
HostTensorA
&
A_ncdhw
,
Functor
functor
)
void
host_elementwise4D
(
HostTensorB
&
B_n
d
hw
c
,
const
HostTensorA
&
A_ncdhw
,
Functor
functor
)
{
for
(
std
::
size_t
n
=
0
;
n
<
A_ncdhw
.
mDesc
.
GetLengths
()[
0
];
++
n
)
for
(
std
::
size_t
c
=
0
;
c
<
A_ncdhw
.
mDesc
.
GetLengths
()[
1
];
++
c
)
...
...
@@ -34,7 +34,7 @@ void host_elementwise4D(HostTensorB& B_nchwd, const HostTensorA& A_ncdhw, Functo
for
(
std
::
size_t
w
=
0
;
w
<
A_ncdhw
.
mDesc
.
GetLengths
()[
4
];
++
w
)
{
auto
a_val
=
A_ncdhw
(
n
,
c
,
d
,
h
,
w
);
functor
(
B_n
c
hw
d
(
n
,
c
,
h
,
w
,
d
),
a_val
);
functor
(
B_n
d
hw
c
(
n
,
d
,
h
,
w
,
c
),
a_val
);
}
}
...
...
@@ -77,8 +77,6 @@ bool profile_transpose_impl(int do_verification,
using
ElementOp
=
ck
::
tensor_operation
::
element_wise
::
PassThrough
;
// const auto element_op = ElementOp{};
DeviceMem
a_device_buf
(
sizeof
(
ADataType
)
*
a
.
mDesc
.
GetElementSpaceSize
());
DeviceMem
b_device_buf
(
sizeof
(
BDataType
)
*
b
.
mDesc
.
GetElementSpaceSize
());
...
...
@@ -118,6 +116,7 @@ bool profile_transpose_impl(int do_verification,
// re-init C to zero before profiling next kernel
b_device_buf
.
SetZero
();
// run for verification
invoker_ptr
->
Run
(
argument_ptr
.
get
(),
StreamConfig
{
nullptr
,
false
});
if
(
do_verification
)
...
...
@@ -136,6 +135,7 @@ bool profile_transpose_impl(int do_verification,
std
::
string
op_name
=
op_ptr
->
GetTypeString
();
// run for timing purposes
float
ave_time
=
invoker_ptr
->
Run
(
argument_ptr
.
get
(),
StreamConfig
{
nullptr
,
time_kernel
});
...
...
@@ -153,10 +153,6 @@ bool profile_transpose_impl(int do_verification,
std
::
cout
<<
"Perf: "
<<
std
::
setw
(
10
)
<<
ave_time
<<
" ms, "
<<
tflops
<<
" TFlops, "
<<
gb_per_sec
<<
" GB/s, "
<<
op_name
<<
std
::
endl
;
// pass = pass & ck::utils::check_err(b_device_result, b_host_result);
pass
&=
ck
::
utils
::
check_err
(
b
.
mData
,
host_b
.
mData
,
"Error: Incorrect results b"
,
1e-3
,
1e-3
);
if
(
tflops
>
best_tflops
)
{
best_op_name
=
op_name
;
...
...
profiler/src/CMakeLists.txt
View file @
6368be50
...
...
@@ -29,6 +29,7 @@ set(PROFILER_SOURCES
profile_batchnorm_infer.cpp
profile_grouped_conv_bwd_data.cpp
profile_conv_tensor_rearrange.cpp
profile_transpose.cpp
)
if
(
DL_KERNELS
)
...
...
@@ -58,7 +59,7 @@ set(PROFILER_EXECUTABLE ckProfiler)
add_executable
(
${
PROFILER_EXECUTABLE
}
${
PROFILER_SOURCES
}
)
target_compile_options
(
${
PROFILER_EXECUTABLE
}
PRIVATE -Wno-global-constructors
)
target_link_libraries
(
${
PROFILER_EXECUTABLE
}
PRIVATE utility
)
target_link_libraries
(
${
PROFILER_EXECUTABLE
}
PRIVATE utility
getopt::getopt
)
target_link_libraries
(
${
PROFILER_EXECUTABLE
}
PRIVATE device_gemm_instance
)
target_link_libraries
(
${
PROFILER_EXECUTABLE
}
PRIVATE device_gemm_splitk_instance
)
target_link_libraries
(
${
PROFILER_EXECUTABLE
}
PRIVATE device_gemm_add_multiply_instance
)
...
...
@@ -91,6 +92,7 @@ target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_grouped_conv2d_bwd_d
target_link_libraries
(
${
PROFILER_EXECUTABLE
}
PRIVATE device_grouped_conv3d_bwd_data_instance
)
target_link_libraries
(
${
PROFILER_EXECUTABLE
}
PRIVATE device_image_to_column_instance
)
target_link_libraries
(
${
PROFILER_EXECUTABLE
}
PRIVATE device_column_to_image_instance
)
target_link_libraries
(
${
PROFILER_EXECUTABLE
}
PRIVATE device_transpose_instance
)
if
(
DTYPES MATCHES
"fp32"
OR DTYPES MATCHES
"fp64"
OR NOT DEFINED DTYPES
)
target_link_libraries
(
${
PROFILER_EXECUTABLE
}
PRIVATE device_contraction_bilinear_instance
)
...
...
profiler/src/profile_transpose.cpp
0 → 100644
View file @
6368be50
// SPDX-License-Identifier: MIT
// Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.
#include <iostream>
#include <numeric>
#include <initializer_list>
#include <cstdlib>
#include "profiler/profile_transpose_impl.hpp"
#include "profiler_operation_registry.hpp"
enum
struct
DataType
{
F32_F32_F32_F32_F32
,
// 0
F16_F16_F16_F16_F16
,
// 1
};
#define OP_NAME "transpose"
#define OP_DESC "Transpose"
struct
TransposeArgParser
{
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
int
>>
long_opts
=
{{
"lengths"
,
{}}};
bool
parse_opt
(
const
int
argc
,
char
*
argv
[],
const
std
::
string
&
key
,
int
i
)
{
if
(
std
::
string
(
"--"
)
+
key
==
argv
[
i
])
{
const
int
pos
=
i
;
while
(
++
i
<
argc
&&
argv
[
i
][
0
]
!=
'-'
)
{}
int
end
=
i
;
for
(
int
j
=
pos
+
1
;
j
<
end
;
j
++
)
{
long_opts
[
key
].
push_back
(
std
::
stoi
(
argv
[
j
]));
}
return
true
;
}
return
false
;
}
void
operator
()(
int
argc
,
char
*
argv
[])
{
for
(
auto
&
kv
:
long_opts
)
{
for
(
int
i
=
1
;
i
<
argc
;
i
++
)
{
if
(
parse_opt
(
argc
,
argv
,
kv
.
first
,
i
))
break
;
}
}
}
};
static
void
print_helper_msg
()
{
printf
(
"arg1: tensor operation ("
OP_NAME
": "
OP_DESC
")
\n
"
);
printf
(
"arg2: data type (0: fp32; 1: fp16)
\n
"
);
printf
(
"arg3: verification (0: no; 1: yes)
\n
"
);
printf
(
"arg4: initialization (0: no init; 1: integer value; 2: decimal value)
\n
"
);
printf
(
"arg5: print tensor value (0: no; 1: yes)
\n
"
);
printf
(
"arg6: time kernel (0=no, 1=yes)
\n
"
);
printf
(
"arg7: --lengths: N, C, D, H, W
\n
"
);
}
int
profile_transpose
(
int
argc
,
char
*
argv
[])
{
if
(
argc
!=
7
)
{
print_helper_msg
();
exit
(
1
);
}
TransposeArgParser
arg_parser
;
const
auto
data_type
=
static_cast
<
DataType
>
(
std
::
stoi
(
argv
[
2
]));
const
bool
do_verification
=
std
::
stoi
(
argv
[
3
]);
const
int
init_method
=
std
::
stoi
(
argv
[
4
]);
const
bool
do_log
=
std
::
stoi
(
argv
[
5
]);
const
bool
time_kernel
=
std
::
stoi
(
argv
[
6
]);
arg_parser
(
argc
,
argv
);
const
std
::
vector
<
ck
::
index_t
>
lengths
=
arg_parser
.
long_opts
[
"lengths"
];
using
F32
=
float
;
using
F16
=
ck
::
half_t
;
auto
profile
=
[
&
](
auto
a_type
,
auto
b_type
)
{
using
ADataType
=
decltype
(
a_type
);
using
BDataType
=
decltype
(
b_type
);
constexpr
ck
::
index_t
NumDim
=
5
;
bool
pass
=
ck
::
profiler
::
profile_transpose_impl
<
ADataType
,
BDataType
,
NumDim
>
(
do_verification
,
init_method
,
do_log
,
time_kernel
,
lengths
);
return
pass
?
0
:
1
;
};
if
(
data_type
==
DataType
::
F32_F32_F32_F32_F32
)
{
return
profile
(
F32
{},
F32
{});
}
else
if
(
data_type
==
DataType
::
F16_F16_F16_F16_F16
)
{
return
profile
(
F16
{},
F16
{});
}
else
{
std
::
cout
<<
"this data_type & layout is not implemented"
<<
std
::
endl
;
return
1
;
}
}
REGISTER_PROFILER_OPERATION
(
OP_NAME
,
OP_DESC
,
profile_transpose
);
test/CMakeLists.txt
View file @
6368be50
...
...
@@ -3,7 +3,7 @@ include_directories(BEFORE
${
PROJECT_SOURCE_DIR
}
/profiler/include
)
include
(
g
oogle
test
)
include
(
gtest
)
add_custom_target
(
tests
)
...
...
@@ -50,6 +50,7 @@ function(add_test_executable TEST_NAME)
#only continue if there are some source files left on the list
if
(
ARGN
)
add_executable
(
${
TEST_NAME
}
${
ARGN
}
)
target_link_libraries
(
${
TEST_NAME
}
PRIVATE getopt::getopt
)
add_test
(
NAME
${
TEST_NAME
}
COMMAND $<TARGET_FILE:
${
TEST_NAME
}
>
)
add_dependencies
(
tests
${
TEST_NAME
}
)
add_dependencies
(
check
${
TEST_NAME
}
)
...
...
@@ -58,9 +59,7 @@ function(add_test_executable TEST_NAME)
endif
()
#message("add_test returns ${result}")
set
(
result
${
result
}
PARENT_SCOPE
)
endfunction
(
add_test_executable TEST_NAME
)
include
(
GoogleTest
)
endfunction
()
function
(
add_gtest_executable TEST_NAME
)
message
(
"adding gtest
${
TEST_NAME
}
"
)
...
...
@@ -109,14 +108,14 @@ function(add_gtest_executable TEST_NAME)
# suppress gtest warnings
target_compile_options
(
${
TEST_NAME
}
PRIVATE -Wno-global-constructors -Wno-undef
)
target_link_libraries
(
${
TEST_NAME
}
PRIVATE gtest_main
)
target_link_libraries
(
${
TEST_NAME
}
PRIVATE gtest_main
getopt::getopt
)
add_test
(
NAME
${
TEST_NAME
}
COMMAND $<TARGET_FILE:
${
TEST_NAME
}
>
)
rocm_install
(
TARGETS
${
TEST_NAME
}
COMPONENT tests
)
set
(
result 0
)
endif
()
#message("add_gtest returns ${result}")
set
(
result
${
result
}
PARENT_SCOPE
)
endfunction
(
add_gtest_executable TEST_NAME
)
endfunction
()
add_subdirectory
(
magic_number_division
)
add_subdirectory
(
space_filling_curve
)
...
...
test/transpose/test_transpose.cpp
View file @
6368be50
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <tuple>
#include "gtest/gtest.h"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "test_transpose_util.hpp"
#include "profiler/profile_transpose_impl.hpp"
using
F16
=
ck
::
half_t
;
using
F32
=
float
;
using
ck
::
index_t
;
template
<
typename
Tuple
>
class
TestTranspose
:
public
::
testing
::
Test
{
protected:
using
ADataType
=
std
::
tuple_element_t
<
0
,
Tuple
>
;
using
BDataType
=
std
::
tuple_element_t
<
1
,
Tuple
>
;
void
Run
()
{
std
::
vector
<
std
::
vector
<
ck
::
index_t
>>
lengths
=
{
{
4
,
16
,
16
,
32
,
5
},
{
8
,
16
,
16
,
32
,
8
}
/**{32, 16, 16, 32, 8},**/
};
for
(
auto
length
:
lengths
)
{
bool
success
=
ck
::
profiler
::
profile_transpose_impl
<
ADataType
,
BDataType
,
5
>
(
true
,
2
,
false
,
false
,
length
);
EXPECT_TRUE
(
success
);
}
}
};
// clang-format off
using
KernelTypes
=
::
testing
::
Types
<
std
::
tuple
<
F16
,
F16
>
,
std
::
tuple
<
F32
,
F32
>
>
;
// clang-format on
using
KernelTypes
=
::
testing
::
Types
<
std
::
tuple
<
F16
,
F16
>
,
std
::
tuple
<
F32
,
F32
>>
;
TYPED_TEST_SUITE
(
TestTranspose
,
KernelTypes
);
//#include "t
est
_t
ranspose
_ut_cases.inc"
TYPED_TEST
(
TestTranspose
,
Test_FP16
)
{
this
->
Run
();
}
TYPED_TEST
(
T
est
T
ranspose
,
Test_FP32
)
{
this
->
Run
();
}
test/transpose/test_transpose_ut_cases.inc
deleted
100644 → 0
View file @
32806d5f
#pragma once
TYPED_TEST
(
TestTranspose
,
Test1
)
{
// for 16, 8, 16, 32, 8
std
::
vector
<
int
>
Ms
{
1
,
2
,
3
,
4
,
5
,
6
};
std
::
vector
<
index_t
>
lengths
{
16
,
8
,
16
,
32
,
8
};
/**constexpr int N = 16;
constexpr int C = 8;
constexpr int D = 16;
constexpr int H = 32;
constexpr int W = 8;**/
this
->
Run
();
}
TYPED_TEST
(
TestTranpose
,
Test2
)
{
std
::
vector
<
int
>
Ms
{
127
,
255
,
312
,
799
,
1573
};
std
::
vector
<
index_t
>
lengths
{
16
,
8
,
16
,
32
,
16
};
/**constexpr int N = 16;
constexpr int C = 8;
constexpr int D = 16;
constexpr int H = 32;
constexpr int W = 8;**/
this
->
Run
();
}
test/transpose/test_transpose_util.hpp
deleted
100644 → 0
View file @
32806d5f
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include <string>
#include <sstream>
#include <tuple>
#include <vector>
#include <gtest/gtest.h>
#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "include/ck/utility/data_type.hpp"
#include "profiler/profile_transpose_impl.hpp"
namespace
ck
{
namespace
test
{
template
<
typename
Tuple
>
class
TestTranspose
:
public
testing
::
Test
{
using
F32
=
float
;
protected:
using
ADataType
=
std
::
tuple_element_t
<
0
,
Tuple
>
;
using
BDataType
=
std
::
tuple_element_t
<
1
,
Tuple
>
;
public:
static
constexpr
bool
verify_
=
true
;
static
constexpr
int
init_method_
=
1
;
// decimal value initialization
static
constexpr
bool
log_
=
false
;
static
constexpr
bool
bench_
=
false
;
// measure kernel performance
std
::
vector
<
std
::
vector
<
index_t
>>
lengths_
=
{{
16
,
32
,
16
,
32
,
16
},
{
16
,
8
,
16
,
32
,
8
}};
void
Run
()
{
for
(
auto
length
:
this
->
lengths_
)
{
this
->
RunSingle
(
length
);
}
}
void
RunSingle
()
{
bool
pass
=
ck
::
profiler
::
profile_transpose_impl
<
ADataType
,
BDataType
,
5
>
(
verify_
,
init_method_
,
log_
,
bench_
,
lengths_
);
EXPECT_TRUE
(
pass
);
}
};
}
// namespace test
}
// namespace ck
test/wrapper/CMakeLists.txt
View file @
6368be50
...
...
@@ -2,3 +2,7 @@ add_gtest_executable(test_layout test_layout.cpp)
target_link_libraries
(
test_layout PRIVATE utility
)
add_gtest_executable
(
test_tensor test_tensor.cpp
)
target_link_libraries
(
test_tensor PRIVATE utility
)
add_gtest_executable
(
test_copy test_copy.cpp
)
target_link_libraries
(
test_copy PRIVATE utility
)
add_gtest_executable
(
test_partition test_partition.cpp
)
target_link_libraries
(
test_partition PRIVATE utility
)
test/wrapper/test_copy.cpp
0 → 100644
View file @
6368be50
// SPDX-License-Identifier: MIT
// Copyright (c) 2023-2024, Advanced Micro Devices, Inc. All rights reserved.
#include <numeric>
#include <cstdlib>
#include <iostream>
#include <initializer_list>
#include <vector>
#include <gtest/gtest.h>
#include "ck/host_utility/kernel_launch.hpp"
#include "ck/library/utility/device_memory.hpp"
#include "ck/library/utility/check_err.hpp"
#include "ck/utility/common_header.hpp"
#include "ck/wrapper/layout.hpp"
#include "ck/wrapper/tensor.hpp"
#include "ck/wrapper/operations/copy.hpp"
// Test copy from Global to Global through LDS and VGPR
template
<
typename
InputTensor
,
typename
OutputTensor
,
typename
BlockShape
,
typename
ThreadLayoutShape
,
typename
LocalTileSteps
,
typename
LocalPartitionSteps
>
__global__
void
TestCopyDevice
(
const
InputTensor
input_tensor
,
OutputTensor
output_tensor
,
const
BlockShape
tile_shape
,
const
ThreadLayoutShape
thread_layout
,
const
LocalTileSteps
block_steps
,
const
LocalPartitionSteps
thread_steps
)
{
__shared__
ck
::
index_t
p_shared
[
ck
::
wrapper
::
size
(
tile_shape
)];
auto
tensor_lds
=
ck
::
wrapper
::
make_tensor
<
ck
::
wrapper
::
MemoryTypeEnum
::
Lds
>
(
p_shared
,
ck
::
wrapper
::
make_layout
(
tile_shape
));
const
auto
block_idxs
=
ck
::
make_tuple
(
ck
::
make_tuple
(
0
,
0
),
blockIdx
.
x
);
// Get local tiles for global memory
const
auto
input_local_tile
=
ck
::
wrapper
::
make_local_tile
(
input_tensor
,
tile_shape
,
block_idxs
,
block_steps
);
const
auto
output_local_tile
=
ck
::
wrapper
::
make_local_tile
(
output_tensor
,
tile_shape
,
block_idxs
,
block_steps
);
// Get partition per thread
const
auto
input_local_partition
=
ck
::
wrapper
::
make_local_partition
(
input_local_tile
,
thread_layout
,
threadIdx
.
x
,
thread_steps
);
auto
lds_local_partition
=
ck
::
wrapper
::
make_local_partition
(
tensor_lds
,
thread_layout
,
threadIdx
.
x
,
thread_steps
);
auto
output_local_partition
=
ck
::
wrapper
::
make_local_partition
(
output_local_tile
,
thread_layout
,
threadIdx
.
x
,
thread_steps
);
// Allocate VGPR
constexpr
ck
::
index_t
scalar_per_vector
=
1
;
constexpr
ck
::
index_t
vgpr_size
=
ck
::
wrapper
::
size
(
lds_local_partition
);
auto
tensor_vgpr
=
ck
::
wrapper
::
make_register_tensor
<
ck
::
wrapper
::
MemoryTypeEnum
::
Vgpr
,
vgpr_size
,
scalar_per_vector
,
ck
::
index_t
>
();
// Perform copy
ck
::
wrapper
::
copy
(
input_local_partition
,
lds_local_partition
);
ck
::
wrapper
::
copy
(
lds_local_partition
,
tensor_vgpr
);
ck
::
wrapper
::
copy
(
tensor_vgpr
,
output_local_partition
);
}
void
PerformCopyGlobalToGlobalViaLDS
()
{
const
auto
shape
=
ck
::
make_tuple
(
ck
::
make_tuple
(
ck
::
Number
<
2
>
{},
ck
::
Number
<
2
>
{}),
ck
::
Number
<
256
>
{});
const
auto
strides
=
ck
::
make_tuple
(
ck
::
make_tuple
(
ck
::
Number
<
1
>
{},
ck
::
Number
<
2
>
{}),
ck
::
Number
<
4
>
{});
const
auto
layout
=
ck
::
wrapper
::
make_layout
(
shape
,
strides
);
// 0, 1, 2, ..., size(shape) - 1
std
::
vector
<
ck
::
index_t
>
input_data
(
ck
::
wrapper
::
size
(
shape
));
std
::
iota
(
input_data
.
begin
(),
input_data
.
end
(),
0
);
// Global memory buffers
DeviceMem
in_buf
(
ck
::
wrapper
::
size
(
layout
)
*
sizeof
(
ck
::
index_t
));
DeviceMem
out_buf
(
ck
::
wrapper
::
size
(
layout
)
*
sizeof
(
ck
::
index_t
));
in_buf
.
ToDevice
(
input_data
.
data
());
out_buf
.
SetZero
();
// Create tensors for global memory
const
auto
input_tensor_global
=
ck
::
wrapper
::
make_tensor
<
ck
::
wrapper
::
MemoryTypeEnum
::
Global
>
(
static_cast
<
const
ck
::
index_t
*>
(
in_buf
.
GetDeviceBuffer
()),
layout
);
auto
output_tensor_global
=
ck
::
wrapper
::
make_tensor
<
ck
::
wrapper
::
MemoryTypeEnum
::
Global
>
(
static_cast
<
ck
::
index_t
*>
(
out_buf
.
GetDeviceBuffer
()),
layout
);
const
auto
thread_layout
=
ck
::
make_tuple
(
ck
::
make_tuple
(
ck
::
Number
<
1
>
{},
ck
::
Number
<
1
>
{}),
ck
::
Number
<
32
>
{});
const
auto
tile_shape
=
ck
::
make_tuple
(
ck
::
make_tuple
(
ck
::
Number
<
2
>
{},
ck
::
Number
<
2
>
{}),
ck
::
Number
<
64
>
{});
const
auto
thread_steps
=
ck
::
make_tuple
(
ck
::
make_tuple
(
ck
::
Number
<
1
>
{},
ck
::
Number
<
1
>
{}),
ck
::
Number
<
2
>
{});
const
auto
block_steps
=
ck
::
make_tuple
(
ck
::
make_tuple
(
ck
::
Number
<
1
>
{},
ck
::
Number
<
1
>
{}),
ck
::
Number
<
64
>
{});
const
ck
::
index_t
grid_size
=
ck
::
math
::
integer_divide_ceil
(
ck
::
wrapper
::
size
(
input_tensor_global
),
ck
::
wrapper
::
size
(
tile_shape
));
const
auto
kernel
=
TestCopyDevice
<
decltype
(
input_tensor_global
),
decltype
(
output_tensor_global
),
decltype
(
tile_shape
),
decltype
(
thread_layout
),
decltype
(
block_steps
),
decltype
(
thread_steps
)
>
;
launch_and_time_kernel
(
StreamConfig
{},
kernel
,
dim3
(
grid_size
),
dim3
(
ck
::
wrapper
::
size
(
thread_layout
)),
0
,
input_tensor_global
,
output_tensor_global
,
tile_shape
,
thread_layout
,
block_steps
,
thread_steps
);
// Verify results
std
::
vector
<
ck
::
index_t
>
output_data
(
ck
::
wrapper
::
size
(
shape
));
out_buf
.
FromDevice
(
output_data
.
data
());
EXPECT_TRUE
(
ck
::
utils
::
check_err
(
output_data
,
input_data
));
}
TEST
(
TestCopy
,
CopyGlobalToGlobalViaLDS
)
{
PerformCopyGlobalToGlobalViaLDS
();
}
test/wrapper/test_layout.cpp
View file @
6368be50
...
...
@@ -84,7 +84,8 @@ TEST_F(TestWrapperLayout, 2d)
ck
::
make_tuple
(
ck
::
Sequence
<
0
>
{}));
const
auto
layout_runtime
=
ck
::
wrapper
::
make_layout
(
ck
::
make_tuple
(
d1
,
d0
));
const
auto
layout_compiletime
=
ck
::
wrapper
::
make_layout
(
ck
::
make_tuple
(
ck
::
Number
<
d1
>
{},
ck
::
Number
<
d0
>
{}));
ck
::
wrapper
::
make_layout
(
ck
::
make_tuple
(
ck
::
Number
<
d1
>
{},
ck
::
Number
<
d0
>
{}),
ck
::
make_tuple
(
ck
::
Number
<
s1
>
{},
ck
::
Number
<
s0
>
{}));
std
::
vector
<
ck
::
Tuple
<
ck
::
index_t
,
ck
::
index_t
>>
idxs
;
for
(
ck
::
index_t
h
=
0
;
h
<
d1
;
h
++
)
...
...
@@ -435,19 +436,11 @@ TEST(TestLayoutHelpers, ShapeAndStrides)
constexpr
bool
check_compiletime_shape
=
std
::
is_same_v
<
decltype
(
shape_compiletime
),
std
::
remove_reference_t
<
decltype
(
shape
(
layout_compiletime
))
>>
;
constexpr
bool
check_compiletime_strides
=
std
::
is_same_v
<
decltype
(
strides_compiletime
),
std
::
remove_reference_t
<
decltype
(
stride
(
layout_compiletime
))
>>
;
constexpr
bool
check_runtime_shape
=
std
::
is_same_v
<
decltype
(
shape_runtime
),
std
::
remove_reference_t
<
decltype
(
shape
(
layout_runtime
))
>>
;
constexpr
bool
check_runtime_strides
=
std
::
is_same_v
<
decltype
(
strides_runtime
),
std
::
remove_reference_t
<
decltype
(
stride
(
layout_runtime
))
>>
;
EXPECT_TRUE
(
check_compiletime_shape
);
EXPECT_TRUE
(
check_compiletime_strides
);
EXPECT_TRUE
(
check_runtime_shape
);
EXPECT_TRUE
(
check_runtime_strides
);
}
TEST
(
TestLayoutHelpers
,
Hierarchical
)
...
...
test/wrapper/test_partition.cpp
0 → 100644
View file @
6368be50
// SPDX-License-Identifier: MIT
// Copyright (c) 2023-2024, Advanced Micro Devices, Inc. All rights reserved.
#include <numeric>
#include <cstdlib>
#include <iostream>
#include <initializer_list>
#include <vector>
#include <gtest/gtest.h>
#include "ck/host_utility/kernel_launch.hpp"
#include "ck/library/utility/device_memory.hpp"
#include "ck/library/utility/check_err.hpp"
#include "ck/utility/common_header.hpp"
#include "ck/wrapper/layout.hpp"
#include "ck/wrapper/tensor.hpp"
TEST
(
TestPartition
,
LocalPartition
)
{
const
auto
shape
=
ck
::
make_tuple
(
ck
::
make_tuple
(
ck
::
Number
<
16
>
{},
ck
::
Number
<
4
>
{}),
ck
::
Number
<
4
>
{});
const
auto
strides
=
ck
::
make_tuple
(
ck
::
make_tuple
(
ck
::
Number
<
1
>
{},
ck
::
Number
<
16
>
{}),
ck
::
Number
<
64
>
{});
const
auto
layout
=
ck
::
wrapper
::
make_layout
(
shape
,
strides
);
std
::
vector
<
ck
::
index_t
>
data
(
ck
::
wrapper
::
size
(
layout
));
std
::
iota
(
data
.
begin
(),
data
.
end
(),
0
);
const
auto
tensor
=
ck
::
wrapper
::
make_tensor
<
ck
::
wrapper
::
MemoryTypeEnum
::
Generic
>
(
data
.
data
(),
layout
);
const
auto
thread_steps
=
ck
::
make_tuple
(
ck
::
make_tuple
(
ck
::
Number
<
2
>
{},
ck
::
Number
<
1
>
{}),
ck
::
Number
<
1
>
{});
const
auto
thread_layout
=
ck
::
make_tuple
(
ck
::
make_tuple
(
ck
::
Number
<
8
>
{},
ck
::
Number
<
1
>
{}),
ck
::
Number
<
1
>
{});
for
(
ck
::
index_t
thread_id
=
0
;
thread_id
<
ck
::
wrapper
::
size
(
thread_layout
);
thread_id
++
)
{
const
auto
raked_partition
=
ck
::
wrapper
::
make_local_partition
(
tensor
,
thread_layout
,
thread_id
);
const
auto
expected_partition_size
=
ck
::
wrapper
::
size
(
tensor
)
/
ck
::
wrapper
::
size
(
thread_layout
);
EXPECT_EQ
(
ck
::
wrapper
::
size
(
raked_partition
),
expected_partition_size
);
EXPECT_EQ
(
raked_partition
(
0
),
thread_id
);
}
for
(
ck
::
index_t
thread_id
=
0
;
thread_id
<
ck
::
wrapper
::
size
(
thread_layout
);
thread_id
++
)
{
const
auto
packed_partition
=
ck
::
wrapper
::
make_local_partition
(
tensor
,
thread_layout
,
thread_id
,
thread_steps
);
const
auto
expected_partition_size
=
ck
::
wrapper
::
size
(
tensor
)
/
ck
::
wrapper
::
size
(
thread_layout
);
const
auto
expected_partition_first_val
=
thread_id
*
ck
::
wrapper
::
size
<
0
,
0
>
(
thread_steps
);
EXPECT_EQ
(
ck
::
wrapper
::
size
(
packed_partition
),
expected_partition_size
);
EXPECT_EQ
(
packed_partition
(
0
),
expected_partition_first_val
);
}
}
TEST
(
TestPartition
,
LocalTile
)
{
const
auto
shape
=
ck
::
make_tuple
(
ck
::
make_tuple
(
ck
::
Number
<
16
>
{},
ck
::
Number
<
4
>
{}),
ck
::
Number
<
4
>
{});
const
auto
strides
=
ck
::
make_tuple
(
ck
::
make_tuple
(
ck
::
Number
<
1
>
{},
ck
::
Number
<
16
>
{}),
ck
::
Number
<
64
>
{});
const
auto
layout
=
ck
::
wrapper
::
make_layout
(
shape
,
strides
);
std
::
vector
<
ck
::
index_t
>
data
(
ck
::
wrapper
::
size
(
layout
));
std
::
iota
(
data
.
begin
(),
data
.
end
(),
0
);
const
auto
tensor
=
ck
::
wrapper
::
make_tensor
<
ck
::
wrapper
::
MemoryTypeEnum
::
Generic
>
(
data
.
data
(),
layout
);
const
auto
block_steps
=
ck
::
make_tuple
(
ck
::
make_tuple
(
ck
::
Number
<
4
>
{},
ck
::
Number
<
2
>
{}),
ck
::
Number
<
2
>
{});
const
auto
block_shape
=
ck
::
make_tuple
(
ck
::
make_tuple
(
ck
::
Number
<
4
>
{},
ck
::
Number
<
2
>
{}),
ck
::
Number
<
2
>
{});
const
auto
block_layout
=
ck
::
make_tuple
(
ck
::
make_tuple
(
ck
::
Number
<
4
>
{},
ck
::
Number
<
2
>
{}),
ck
::
Number
<
2
>
{});
std
::
vector
<
ck
::
Tuple
<
ck
::
Tuple
<
ck
::
index_t
,
ck
::
index_t
>
,
ck
::
index_t
>>
block_idxs
;
for
(
ck
::
index_t
x
=
0
;
x
<
ck
::
wrapper
::
size
<
0
,
0
>
(
block_layout
);
x
++
)
{
for
(
ck
::
index_t
y
=
0
;
y
<
ck
::
wrapper
::
size
<
0
,
1
>
(
block_layout
);
y
++
)
{
for
(
ck
::
index_t
z
=
0
;
z
<
ck
::
wrapper
::
size
<
1
>
(
block_layout
);
z
++
)
{
block_idxs
.
emplace_back
(
ck
::
make_tuple
(
x
,
y
),
z
);
}
}
}
for
(
const
auto
&
block_idx
:
block_idxs
)
{
const
auto
raked_tile
=
ck
::
wrapper
::
make_local_tile
(
tensor
,
block_shape
,
block_idx
);
const
auto
expected_tile_size
=
ck
::
wrapper
::
size
(
block_shape
);
EXPECT_EQ
(
ck
::
wrapper
::
size
(
raked_tile
),
expected_tile_size
);
EXPECT_EQ
(
raked_tile
(
0
),
layout
(
block_idx
));
}
for
(
const
auto
&
block_idx
:
block_idxs
)
{
const
auto
packed_tile
=
ck
::
wrapper
::
make_local_tile
(
tensor
,
block_shape
,
block_idx
,
block_steps
);
const
auto
expected_tile_size
=
ck
::
wrapper
::
size
(
block_shape
);
const
auto
expected_tile_first_val
=
ck
::
wrapper
::
size
<
0
,
0
>
(
block_idx
)
*
ck
::
wrapper
::
size
<
0
,
0
>
(
block_shape
)
*
ck
::
wrapper
::
size
<
0
,
0
>
(
strides
)
+
ck
::
wrapper
::
size
<
0
,
1
>
(
block_idx
)
*
ck
::
wrapper
::
size
<
0
,
1
>
(
block_shape
)
*
ck
::
wrapper
::
size
<
0
,
1
>
(
strides
)
+
ck
::
wrapper
::
size
<
1
>
(
block_idx
)
*
ck
::
wrapper
::
size
<
1
>
(
block_shape
)
*
ck
::
wrapper
::
size
<
1
>
(
strides
);
EXPECT_EQ
(
ck
::
wrapper
::
size
(
packed_tile
),
expected_tile_size
);
EXPECT_EQ
(
packed_tile
(
0
),
expected_tile_first_val
);
}
}
test/wrapper/test_tensor.cpp
View file @
6368be50
...
...
@@ -108,7 +108,6 @@ __global__ void TestTensorReadWriteDevice(void* data, void* success)
bool
*
casted_success_ptr
=
static_cast
<
bool
*>
(
success
);
const
auto
layout
=
ck
::
wrapper
::
make_layout
(
ck
::
make_tuple
(
ck
::
make_tuple
(
2
,
2
),
2
));
constexpr
auto
register_layout
=
ck
::
wrapper
::
make_layout
(
ck
::
make_tuple
(
ck
::
Number
<
8
>
{}));
auto
tensor_global
=
ck
::
wrapper
::
make_tensor
<
ck
::
wrapper
::
MemoryTypeEnum
::
Global
>
(
casted_data_ptr
,
layout
);
...
...
@@ -116,18 +115,18 @@ __global__ void TestTensorReadWriteDevice(void* data, void* success)
auto
tensor_vgpr
=
ck
::
wrapper
::
make_register_tensor
<
ck
::
wrapper
::
MemoryTypeEnum
::
Vgpr
,
nelems
,
scalar_per_vector
,
ck
::
index_t
>
(
register_layout
);
ck
::
index_t
>
();
auto
tensor_sgpr
=
ck
::
wrapper
::
make_register_tensor
<
ck
::
wrapper
::
MemoryTypeEnum
::
Sgpr
,
nelems
,
scalar_per_vector
,
ck
::
index_t
>
(
register_layout
);
ck
::
index_t
>
();
InitTensor
(
tensor_global
);
InitTensor
(
tensor_lds
);
StaticInitTensor
<
nelems
>
(
tensor_vgpr
);
StaticInitTensor
<
nelems
>
(
tensor_sgpr
);
*
casted_success_ptr
&
=
TestTensorCheck1d
(
tensor_global
);
*
casted_success_ptr
=
TestTensorCheck1d
(
tensor_global
);
*
casted_success_ptr
&=
TestTensorCheck3d
(
tensor_global
);
*
casted_success_ptr
&=
TestTensorCheck1d
(
tensor_lds
);
...
...
@@ -151,7 +150,7 @@ TEST(TestTensor, ReadWriteGlobalLdsRegistersMemory)
TestTensorReadWriteDevice
,
dim3
(
1
),
dim3
(
1
),
nelems
*
sizeof
(
ck
::
index_t
)
,
0
,
data_buf
.
GetDeviceBuffer
(),
success_buf
.
GetDeviceBuffer
());
...
...
@@ -173,33 +172,45 @@ TEST(TestTensor, Slicing)
auto
tensor2x2x2
=
tensor
(
ck
::
make_tuple
(
ck
::
wrapper
::
slice
(
2
),
ck
::
wrapper
::
slice
(
2
)),
ck
::
wrapper
::
slice
(
2
));
EXPECT_EQ
(
tensor2x2x2
(
0
),
layout
(
ck
::
make_tuple
(
ck
::
make_tuple
(
0
,
0
),
0
)));
EXPECT_EQ
(
ck
::
wrapper
::
rank
(
tensor2x2x2
),
2
);
EXPECT_EQ
(
ck
::
wrapper
::
depth
(
tensor2x2x2
),
2
);
EXPECT_EQ
(
ck
::
wrapper
::
size
(
tensor2x2x2
),
8
);
EXPECT_TRUE
(
TestTensorCheck1d
(
tensor2x2x2
));
auto
tensor2x2
=
tensor
(
ck
::
make_tuple
(
1
,
ck
::
wrapper
::
slice
(
2
)),
ck
::
wrapper
::
slice
(
2
));
EXPECT_EQ
(
tensor2x2
(
0
),
layout
(
ck
::
make_tuple
(
ck
::
make_tuple
(
1
,
0
),
0
)));
EXPECT_EQ
(
ck
::
wrapper
::
rank
(
tensor2x2
),
2
);
EXPECT_EQ
(
ck
::
wrapper
::
depth
(
tensor2x2
),
2
);
EXPECT_EQ
(
ck
::
wrapper
::
size
(
tensor2x2
),
4
);
EXPECT_TRUE
(
TestTensorCheck1d
(
tensor2x2
,
layout
(
ck
::
make_tuple
(
ck
::
make_tuple
(
1
,
0
),
0
))
));
EXPECT_TRUE
(
TestTensorCheck1d
(
tensor2x2
));
auto
tensor1x1
=
tensor
(
ck
::
make_tuple
(
1
,
ck
::
wrapper
::
slice
(
1
,
2
)),
ck
::
wrapper
::
slice
(
1
,
2
));
EXPECT_EQ
(
tensor1x1
(
0
),
layout
(
ck
::
make_tuple
(
ck
::
make_tuple
(
1
,
1
),
1
)));
EXPECT_EQ
(
rank
(
tensor1x1
),
2
);
EXPECT_EQ
(
depth
(
tensor1x1
),
2
);
EXPECT_EQ
(
size
(
tensor1x1
),
1
);
EXPECT_TRUE
(
TestTensorCheck1d
(
tensor1x1
,
layout
(
ck
::
make_tuple
(
ck
::
make_tuple
(
1
,
1
),
1
))
));
EXPECT_TRUE
(
TestTensorCheck1d
(
tensor1x1
));
auto
tensor2
=
tensor
(
ck
::
make_tuple
(
1
,
1
),
ck
::
wrapper
::
slice
(
0
,
2
));
EXPECT_EQ
(
tensor2
(
0
),
layout
(
ck
::
make_tuple
(
ck
::
make_tuple
(
1
,
1
),
0
)));
EXPECT_EQ
(
ck
::
wrapper
::
rank
(
tensor2
),
1
);
EXPECT_EQ
(
ck
::
wrapper
::
depth
(
tensor2
),
1
);
EXPECT_EQ
(
ck
::
wrapper
::
size
(
tensor2
),
2
);
EXPECT_TRUE
(
TestTensorCheck1d
(
tensor2
,
layout
(
ck
::
make_tuple
(
ck
::
make_tuple
(
1
,
1
),
0
))));
EXPECT_TRUE
(
TestTensorCheck1d
(
tensor2
));
auto
tensor2_v2
=
tensor
(
2
,
ck
::
wrapper
::
slice
(
0
,
2
));
EXPECT_EQ
(
tensor2_v2
(
0
),
layout
(
ck
::
make_tuple
(
2
,
0
)));
EXPECT_EQ
(
ck
::
wrapper
::
rank
(
tensor2_v2
),
1
);
EXPECT_EQ
(
ck
::
wrapper
::
depth
(
tensor2_v2
),
1
);
EXPECT_EQ
(
ck
::
wrapper
::
size
(
tensor2_v2
),
2
);
EXPECT_TRUE
(
TestTensorCheck1d
(
tensor2_v2
));
// negative indexing
auto
tensor1x2
=
tensor
(
ck
::
make_tuple
(
1
,
ck
::
wrapper
::
slice
(
0
,
-
2
)),
ck
::
wrapper
::
slice
());
EXPECT_EQ
(
tensor1x2
(
0
),
layout
(
ck
::
make_tuple
(
ck
::
make_tuple
(
1
,
0
),
0
)));
EXPECT_EQ
(
rank
(
tensor1x2
),
2
);
EXPECT_EQ
(
depth
(
tensor1x2
),
2
);
EXPECT_EQ
(
size
(
tensor1x2
),
2
);
EXPECT_TRUE
(
TestTensorCheck1d
(
tensor1x2
,
layout
(
ck
::
make_tuple
(
ck
::
make_tuple
(
1
,
0
),
0
))
));
EXPECT_TRUE
(
TestTensorCheck1d
(
tensor1x2
));
}
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment