"git@developer.sourcefind.cn:gaoqiong/migraphx.git" did not exist on "d918b57f9af767d550a203aca5dec3521149b4a7"
Commit 522b7aee authored by Adam Osewski's avatar Adam Osewski
Browse files

Merge remote-tracking branch 'origin/develop' into aosewski/ggemm_multi_d2

parents ff936fd6 84832fc4
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include "gtest/gtest.h"
#include "profiler/profile_groupnorm_bwd_gamma_beta_impl.hpp"
using F16 = ck::half_t;
using F32 = float;
using ck::index_t;
template <typename Tuple>
class TestgroupnormBwdGammaBeta : public ::testing::Test
{
protected:
using DYDataType = std::tuple_element_t<0, Tuple>;
using XDataType = std::tuple_element_t<1, Tuple>;
using MeanInvStdDataType = std::tuple_element_t<2, Tuple>;
using ComputeDataType = std::tuple_element_t<3, Tuple>;
using DGammaDataType = std::tuple_element_t<4, Tuple>;
using DBetaDataType = std::tuple_element_t<5, Tuple>;
void Run()
{
// Bwd data: [N, H, W, G, C], reduce H, W, C
std::vector<std::vector<ck::index_t>> lengths = {{1, 1, 1, 1, 1},
{1, 2, 3, 4, 5},
{256, 9, 9, 9, 9},
{1, 64, 64, 32, 10},
{1, 32, 32, 32, 20},
{1, 16, 16, 32, 40}};
for(auto length : lengths)
{
bool success = ck::profiler::profile_groupnorm_bwd_gamma_beta_impl<DYDataType,
XDataType,
MeanInvStdDataType,
ComputeDataType,
DGammaDataType,
DBetaDataType>(
true, 2, false, false, length);
EXPECT_TRUE(success);
}
}
};
using KernelTypes = ::testing::Types<
// DYDataType XDataType, MeanInvStdDataType, ComputeDataType, DGammaDataType, DBetaDataType>
std::tuple<F32, F32, F32, F32, F32, F32>>;
TYPED_TEST_SUITE(TestgroupnormBwdGammaBeta, KernelTypes);
TYPED_TEST(TestgroupnormBwdGammaBeta, Test_FP32) { this->Run(); }
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include "gtest/gtest.h"
#include "profiler/profile_layernorm_bwd_gamma_beta_impl.hpp"
using F16 = ck::half_t;
using F32 = float;
using ck::index_t;
template <typename Tuple>
class TestLayernorm2dBwdGammaBeta : public ::testing::Test
{
protected:
using DYDataType = std::tuple_element_t<0, Tuple>;
using XDataType = std::tuple_element_t<1, Tuple>;
using MeanInvStdDataType = std::tuple_element_t<2, Tuple>;
using ComputeDataType = std::tuple_element_t<3, Tuple>;
using DGammaDataType = std::tuple_element_t<4, Tuple>;
using DBetaDataType = std::tuple_element_t<5, Tuple>;
void Run()
{
// Bwd data: [N, D], reduce D
std::vector<std::vector<ck::index_t>> lengths = {
{4, 256}, {8, 511}, {9, 1032}, {4, 2048}, {1, 8192}, {4000, 2000}};
for(auto length : lengths)
{
bool success = ck::profiler::profile_layernorm_bwd_gamma_beta_impl<DYDataType,
XDataType,
MeanInvStdDataType,
ComputeDataType,
DGammaDataType,
DBetaDataType,
2>(
true, 2, false, false, length);
EXPECT_TRUE(success);
}
}
};
using KernelTypes = ::testing::Types<
// DYDataType XDataType, MeanInvStdDataType, ComputeDataType, DGammaDataType, DBetaDataType>
std::tuple<F32, F32, F32, F32, F32, F32>>;
TYPED_TEST_SUITE(TestLayernorm2dBwdGammaBeta, KernelTypes);
TYPED_TEST(TestLayernorm2dBwdGammaBeta, Test_FP32) { this->Run(); }
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <tuple>
#include "gtest/gtest.h"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "test_transpose_util.hpp"
#include "profiler/profile_transpose_impl.hpp"
using F16 = ck::half_t;
using F32 = float;
using ck::index_t;
template <typename Tuple>
class TestTranspose : public ::testing::Test
{
protected:
using ADataType = std::tuple_element_t<0, Tuple>;
using BDataType = std::tuple_element_t<1, Tuple>;
void Run()
{
std::vector<std::vector<ck::index_t>> lengths = {
{4, 16, 16, 32, 5}, {8, 16, 16, 32, 8} /**{32, 16, 16, 32, 8},**/};
for(auto length : lengths)
{
bool success = ck::profiler::profile_transpose_impl<ADataType, BDataType, 5>(
true, 2, false, false, length);
EXPECT_TRUE(success);
}
}
};
// clang-format off
using KernelTypes = ::testing::Types<
std::tuple< F16, F16>,
std::tuple< F32, F32>
>;
// clang-format on
using KernelTypes = ::testing::Types<std::tuple<F16, F16>, std::tuple<F32, F32>>;
TYPED_TEST_SUITE(TestTranspose, KernelTypes);
//#include "test_transpose_ut_cases.inc"
TYPED_TEST(TestTranspose, Test_FP16) { this->Run(); }
TYPED_TEST(TestTranspose, Test_FP32) { this->Run(); }
#pragma once
TYPED_TEST(TestTranspose, Test1)
{
// for 16, 8, 16, 32, 8
std::vector<int> Ms{1, 2, 3, 4, 5, 6};
std::vector<index_t> lengths{16, 8, 16, 32, 8};
/**constexpr int N = 16;
constexpr int C = 8;
constexpr int D = 16;
constexpr int H = 32;
constexpr int W = 8;**/
this->Run();
}
TYPED_TEST(TestTranpose, Test2)
{
std::vector<int> Ms{127, 255, 312, 799, 1573};
std::vector<index_t> lengths{16, 8, 16, 32, 16};
/**constexpr int N = 16;
constexpr int C = 8;
constexpr int D = 16;
constexpr int H = 32;
constexpr int W = 8;**/
this->Run();
}
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include <string>
#include <sstream>
#include <tuple>
#include <vector>
#include <gtest/gtest.h>
#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "include/ck/utility/data_type.hpp"
#include "profiler/profile_transpose_impl.hpp"
namespace ck {
namespace test {
template <typename Tuple>
class TestTranspose : public testing::Test
{
using F32 = float;
protected:
using ADataType = std::tuple_element_t<0, Tuple>;
using BDataType = std::tuple_element_t<1, Tuple>;
public:
static constexpr bool verify_ = true;
static constexpr int init_method_ = 1; // decimal value initialization
static constexpr bool log_ = false;
static constexpr bool bench_ = false; // measure kernel performance
std::vector<std::vector<index_t>> lengths_ = {{16, 32, 16, 32, 16}, {16, 8, 16, 32, 8}};
void Run()
{
for(auto length : this->lengths_)
{
this->RunSingle(length);
}
}
void RunSingle()
{
bool pass = ck::profiler::profile_transpose_impl<ADataType, BDataType, 5>(
verify_, init_method_, log_, bench_, lengths_);
EXPECT_TRUE(pass);
}
};
} // namespace test
} // namespace ck
......@@ -2,3 +2,7 @@ add_gtest_executable(test_layout test_layout.cpp)
target_link_libraries(test_layout PRIVATE utility)
add_gtest_executable(test_tensor test_tensor.cpp)
target_link_libraries(test_tensor PRIVATE utility)
add_gtest_executable(test_copy test_copy.cpp)
target_link_libraries(test_copy PRIVATE utility)
add_gtest_executable(test_partition test_partition.cpp)
target_link_libraries(test_partition PRIVATE utility)
// SPDX-License-Identifier: MIT
// Copyright (c) 2023-2024, Advanced Micro Devices, Inc. All rights reserved.
#include <numeric>
#include <cstdlib>
#include <iostream>
#include <initializer_list>
#include <vector>
#include <gtest/gtest.h>
#include "ck/host_utility/kernel_launch.hpp"
#include "ck/library/utility/device_memory.hpp"
#include "ck/library/utility/check_err.hpp"
#include "ck/utility/common_header.hpp"
#include "ck/wrapper/layout.hpp"
#include "ck/wrapper/tensor.hpp"
#include "ck/wrapper/operations/copy.hpp"
// Test copy from Global to Global through LDS and VGPR
template <typename InputTensor,
typename OutputTensor,
typename BlockShape,
typename ThreadLayoutShape,
bool UseOptimizedCopy>
__global__ void TestCopyDevice(const InputTensor input_tensor,
OutputTensor output_tensor,
const BlockShape tile_shape,
const ThreadLayoutShape thread_layout)
{
__shared__ ck::index_t p_shared[ck::wrapper::size(tile_shape)];
const auto tensor_lds = ck::wrapper::make_tensor<ck::wrapper::MemoryTypeEnum::Lds>(
p_shared, ck::wrapper::make_layout(tile_shape));
const auto block_idx = static_cast<ck::index_t>(blockIdx.x);
// Get local tiles for global memory
const auto input_local_tile = ck::wrapper::make_local_tile(input_tensor, tile_shape, block_idx);
const auto output_local_tile =
ck::wrapper::make_local_tile(output_tensor, tile_shape, block_idx);
// Get partition per thread
const auto input_local_partition =
ck::wrapper::make_local_partition(input_local_tile, thread_layout, threadIdx.x);
auto lds_local_partition =
ck::wrapper::make_local_partition(tensor_lds, thread_layout, threadIdx.x);
auto output_local_partition =
ck::wrapper::make_local_partition(output_local_tile, thread_layout, threadIdx.x);
// Allocate VGPR
auto tensor_vgpr =
ck::wrapper::make_register_tensor<ck::wrapper::MemoryTypeEnum::Vgpr, ck::index_t>(
layout(lds_local_partition));
// Perform copy
if constexpr(UseOptimizedCopy)
{
using DimAccessOrder = ck::Tuple<ck::Number<1>, ck::Number<0>>;
constexpr ck::index_t vector_dim = 0;
constexpr ck::index_t scalar_per_vector = 2;
ck::wrapper::copy<DimAccessOrder, vector_dim, scalar_per_vector>(input_local_partition,
lds_local_partition);
// TODO: Enable optimized copy for static buffers
ck::wrapper::copy<DimAccessOrder, vector_dim, scalar_per_vector>(lds_local_partition,
tensor_vgpr);
ck::wrapper::copy<DimAccessOrder, vector_dim, scalar_per_vector>(tensor_vgpr,
output_local_partition);
}
else
{
ck::wrapper::copy(input_local_partition, lds_local_partition);
ck::wrapper::copy(lds_local_partition, tensor_vgpr);
ck::wrapper::copy(tensor_vgpr, output_local_partition);
}
}
template <bool UseOptimizedCopy>
void PerformCopyGlobalToGlobalViaLDS()
{
const auto shape =
ck::make_tuple(ck::make_tuple(ck::Number<2>{}, ck::Number<2>{}), ck::Number<256>{});
const auto strides =
ck::make_tuple(ck::make_tuple(ck::Number<1>{}, ck::Number<2>{}), ck::Number<4>{});
const auto layout = ck::wrapper::make_layout(shape, strides);
// 0, 1, 2, ..., size(shape) - 1
std::vector<ck::index_t> input_data(ck::wrapper::size(shape));
std::iota(input_data.begin(), input_data.end(), 0);
// Global memory buffers
DeviceMem in_buf(ck::wrapper::size(layout) * sizeof(ck::index_t));
DeviceMem out_buf(ck::wrapper::size(layout) * sizeof(ck::index_t));
in_buf.ToDevice(input_data.data());
out_buf.SetZero();
// Create tensors for global memory
const auto input_tensor_global = ck::wrapper::make_tensor<ck::wrapper::MemoryTypeEnum::Global>(
static_cast<const ck::index_t*>(in_buf.GetDeviceBuffer()), layout);
auto output_tensor_global = ck::wrapper::make_tensor<ck::wrapper::MemoryTypeEnum::Global>(
static_cast<ck::index_t*>(out_buf.GetDeviceBuffer()), layout);
const auto thread_layout = ck::make_tuple(ck::Number<1>{}, ck::Number<32>{});
const auto tile_shape = ck::make_tuple(ck::Number<4>{}, ck::Number<64>{});
const ck::index_t grid_size = ck::math::integer_divide_ceil(
ck::wrapper::size(input_tensor_global), ck::wrapper::size(tile_shape));
const auto kernel = TestCopyDevice<decltype(input_tensor_global),
decltype(output_tensor_global),
decltype(tile_shape),
decltype(thread_layout),
UseOptimizedCopy>;
launch_and_time_kernel(StreamConfig{},
kernel,
dim3(grid_size),
dim3(ck::wrapper::size(thread_layout)),
0,
input_tensor_global,
output_tensor_global,
tile_shape,
thread_layout);
// Verify results
std::vector<ck::index_t> output_data(ck::wrapper::size(shape));
out_buf.FromDevice(output_data.data());
EXPECT_TRUE(ck::utils::check_err(output_data, input_data));
}
TEST(TestCopyGlobalToGlobalViaLDS, GenericCopy) { PerformCopyGlobalToGlobalViaLDS<false>(); }
TEST(TestCopyGlobalToGlobalViaLDS, OptimizedCopy) { PerformCopyGlobalToGlobalViaLDS<true>(); }
......@@ -84,7 +84,8 @@ TEST_F(TestWrapperLayout, 2d)
ck::make_tuple(ck::Sequence<0>{}));
const auto layout_runtime = ck::wrapper::make_layout(ck::make_tuple(d1, d0));
const auto layout_compiletime =
ck::wrapper::make_layout(ck::make_tuple(ck::Number<d1>{}, ck::Number<d0>{}));
ck::wrapper::make_layout(ck::make_tuple(ck::Number<d1>{}, ck::Number<d0>{}),
ck::make_tuple(ck::Number<s1>{}, ck::Number<s0>{}));
std::vector<ck::Tuple<ck::index_t, ck::index_t>> idxs;
for(ck::index_t h = 0; h < d1; h++)
......@@ -435,19 +436,11 @@ TEST(TestLayoutHelpers, ShapeAndStrides)
constexpr bool check_compiletime_shape =
std::is_same_v<decltype(shape_compiletime),
std::remove_reference_t<decltype(shape(layout_compiletime))>>;
constexpr bool check_compiletime_strides =
std::is_same_v<decltype(strides_compiletime),
std::remove_reference_t<decltype(stride(layout_compiletime))>>;
constexpr bool check_runtime_shape =
std::is_same_v<decltype(shape_runtime),
std::remove_reference_t<decltype(shape(layout_runtime))>>;
constexpr bool check_runtime_strides =
std::is_same_v<decltype(strides_runtime),
std::remove_reference_t<decltype(stride(layout_runtime))>>;
EXPECT_TRUE(check_compiletime_shape);
EXPECT_TRUE(check_compiletime_strides);
EXPECT_TRUE(check_runtime_shape);
EXPECT_TRUE(check_runtime_strides);
}
TEST(TestLayoutHelpers, Hierarchical)
......
// SPDX-License-Identifier: MIT
// Copyright (c) 2023-2024, Advanced Micro Devices, Inc. All rights reserved.
#include <numeric>
#include <cstdlib>
#include <iostream>
#include <initializer_list>
#include <vector>
#include <gtest/gtest.h>
#include "ck/host_utility/kernel_launch.hpp"
#include "ck/library/utility/device_memory.hpp"
#include "ck/library/utility/check_err.hpp"
#include "ck/utility/common_header.hpp"
#include "ck/wrapper/layout.hpp"
#include "ck/wrapper/tensor.hpp"
TEST(TestPartition, LocalPartition)
{
const auto shape =
ck::make_tuple(ck::make_tuple(ck::Number<16>{}, ck::Number<4>{}), ck::Number<4>{});
const auto strides =
ck::make_tuple(ck::make_tuple(ck::Number<1>{}, ck::Number<16>{}), ck::Number<64>{});
const auto layout = ck::wrapper::make_layout(shape, strides);
std::vector<ck::index_t> data(ck::wrapper::size(layout));
std::iota(data.begin(), data.end(), 0);
const auto tensor =
ck::wrapper::make_tensor<ck::wrapper::MemoryTypeEnum::Generic>(data.data(), layout);
const auto thread_steps = ck::make_tuple(ck::Number<8>{}, ck::Number<1>{});
const auto thread_layout = ck::make_tuple(ck::Number<8>{}, ck::Number<1>{});
for(ck::index_t thread_id = 0; thread_id < ck::wrapper::size(thread_layout); thread_id++)
{
const auto packed_partition =
ck::wrapper::make_local_partition(tensor, thread_layout, thread_id);
const auto expected_partition_size =
ck::wrapper::size(tensor) / ck::wrapper::size(thread_layout);
const auto expected_partition_first_val = thread_id * ck::wrapper::size<0>(thread_steps);
const auto expected_partition_second_val = expected_partition_first_val + 1;
EXPECT_EQ(ck::wrapper::size(packed_partition), expected_partition_size);
EXPECT_EQ(packed_partition(0), expected_partition_first_val);
EXPECT_EQ(packed_partition(1), expected_partition_second_val);
}
}
TEST(TestPartition, LocalTile)
{
const auto shape = ck::make_tuple(ck::Number<16>{}, ck::Number<4>{}, ck::Number<4>{});
const auto strides = ck::make_tuple(ck::Number<1>{}, ck::Number<16>{}, ck::Number<64>{});
const auto layout = ck::wrapper::make_layout(shape, strides);
std::vector<ck::index_t> data(ck::wrapper::size(layout));
std::iota(data.begin(), data.end(), 0);
const auto tensor =
ck::wrapper::make_tensor<ck::wrapper::MemoryTypeEnum::Generic>(data.data(), layout);
const auto block_shape = ck::make_tuple(ck::Number<2>{}, ck::Number<4>{}, ck::Number<2>{});
const auto num_blocks =
ck::make_tuple(ck::wrapper::size<0>(shape) / ck::wrapper::size<0>(block_shape),
ck::wrapper::size<1>(shape) / ck::wrapper::size<1>(block_shape),
ck::wrapper::size<2>(shape) / ck::wrapper::size<2>(block_shape));
std::vector<ck::index_t> block_idxs(ck::wrapper::size(num_blocks));
std::iota(block_idxs.begin(), block_idxs.end(), 0);
for(auto block_idx : block_idxs)
{
const auto packed_tile = ck::wrapper::make_local_tile(tensor, block_shape, block_idx);
const auto expected_tile_size = ck::wrapper::size(block_shape);
auto expected_tile_first_val = (block_idx % ck::wrapper::size<2>(num_blocks)) *
ck::wrapper::size<2>(block_shape) *
ck::wrapper::size<2>(strides);
block_idx /= ck::wrapper::size<2>(num_blocks);
expected_tile_first_val += (block_idx % ck::wrapper::size<1>(num_blocks)) *
ck::wrapper::size<1>(block_shape) *
ck::wrapper::size<1>(strides);
block_idx /= ck::wrapper::size<1>(num_blocks);
expected_tile_first_val += (block_idx % ck::wrapper::size<0>(num_blocks)) *
ck::wrapper::size<0>(block_shape) *
ck::wrapper::size<0>(strides);
const auto expected_tile_second_val = expected_tile_first_val + 1;
EXPECT_EQ(ck::wrapper::size(packed_tile), expected_tile_size);
EXPECT_EQ(packed_tile(0), expected_tile_first_val);
EXPECT_EQ(packed_tile(1), expected_tile_second_val);
}
}
// SPDX-License-Identifier: MIT
// Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2023-2024, Advanced Micro Devices, Inc. All rights reserved.
#include <cstdlib>
#include <iostream>
......@@ -100,42 +100,34 @@ TEST(TestTensor, ReadWriteHostMemory)
__global__ void TestTensorReadWriteDevice(void* data, void* success)
{
constexpr ck::index_t nelems = 8;
constexpr ck::index_t scalar_per_vector = 1;
constexpr ck::index_t nelems = 8;
__shared__ ck::index_t p_shared[nelems];
ck::index_t* casted_data_ptr = static_cast<ck::index_t*>(data);
bool* casted_success_ptr = static_cast<bool*>(success);
const auto layout = ck::wrapper::make_layout(ck::make_tuple(ck::make_tuple(2, 2), 2));
constexpr auto register_layout = ck::wrapper::make_layout(ck::make_tuple(ck::Number<8>{}));
constexpr auto vgpr_layout =
ck::wrapper::make_layout(make_tuple(ck::Number<nelems>{}), make_tuple(ck::Number<1>{}));
auto tensor_global =
ck::wrapper::make_tensor<ck::wrapper::MemoryTypeEnum::Global>(casted_data_ptr, layout);
auto tensor_lds = ck::wrapper::make_tensor<ck::wrapper::MemoryTypeEnum::Lds>(p_shared, layout);
auto tensor_vgpr = ck::wrapper::make_register_tensor<ck::wrapper::MemoryTypeEnum::Vgpr,
nelems,
scalar_per_vector,
ck::index_t>(register_layout);
auto tensor_sgpr = ck::wrapper::make_register_tensor<ck::wrapper::MemoryTypeEnum::Sgpr,
nelems,
scalar_per_vector,
ck::index_t>(register_layout);
auto tensor_lds = ck::wrapper::make_tensor<ck::wrapper::MemoryTypeEnum::Lds>(p_shared, layout);
auto tensor_vgpr =
ck::wrapper::make_register_tensor<ck::wrapper::MemoryTypeEnum::Vgpr, ck::index_t>(
vgpr_layout);
InitTensor(tensor_global);
InitTensor(tensor_lds);
StaticInitTensor<nelems>(tensor_vgpr);
StaticInitTensor<nelems>(tensor_sgpr);
*casted_success_ptr &= TestTensorCheck1d(tensor_global);
*casted_success_ptr = TestTensorCheck1d(tensor_global);
*casted_success_ptr &= TestTensorCheck3d(tensor_global);
*casted_success_ptr &= TestTensorCheck1d(tensor_lds);
*casted_success_ptr &= TestTensorCheck3d(tensor_lds);
*casted_success_ptr &= StaticTestTensorCheck1d<nelems>(tensor_vgpr);
*casted_success_ptr &= StaticTestTensorCheck1d<nelems>(tensor_sgpr);
}
TEST(TestTensor, ReadWriteGlobalLdsRegistersMemory)
......@@ -151,7 +143,7 @@ TEST(TestTensor, ReadWriteGlobalLdsRegistersMemory)
TestTensorReadWriteDevice,
dim3(1),
dim3(1),
nelems * sizeof(ck::index_t),
0,
data_buf.GetDeviceBuffer(),
success_buf.GetDeviceBuffer());
......@@ -173,33 +165,45 @@ TEST(TestTensor, Slicing)
auto tensor2x2x2 =
tensor(ck::make_tuple(ck::wrapper::slice(2), ck::wrapper::slice(2)), ck::wrapper::slice(2));
EXPECT_EQ(tensor2x2x2(0), layout(ck::make_tuple(ck::make_tuple(0, 0), 0)));
EXPECT_EQ(ck::wrapper::rank(tensor2x2x2), 2);
EXPECT_EQ(ck::wrapper::depth(tensor2x2x2), 2);
EXPECT_EQ(ck::wrapper::size(tensor2x2x2), 8);
EXPECT_TRUE(TestTensorCheck1d(tensor2x2x2));
auto tensor2x2 = tensor(ck::make_tuple(1, ck::wrapper::slice(2)), ck::wrapper::slice(2));
EXPECT_EQ(tensor2x2(0), layout(ck::make_tuple(ck::make_tuple(1, 0), 0)));
EXPECT_EQ(ck::wrapper::rank(tensor2x2), 2);
EXPECT_EQ(ck::wrapper::depth(tensor2x2), 2);
EXPECT_EQ(ck::wrapper::size(tensor2x2), 4);
EXPECT_TRUE(TestTensorCheck1d(tensor2x2, layout(ck::make_tuple(ck::make_tuple(1, 0), 0))));
EXPECT_TRUE(TestTensorCheck1d(tensor2x2));
auto tensor1x1 = tensor(ck::make_tuple(1, ck::wrapper::slice(1, 2)), ck::wrapper::slice(1, 2));
EXPECT_EQ(tensor1x1(0), layout(ck::make_tuple(ck::make_tuple(1, 1), 1)));
EXPECT_EQ(rank(tensor1x1), 2);
EXPECT_EQ(depth(tensor1x1), 2);
EXPECT_EQ(size(tensor1x1), 1);
EXPECT_TRUE(TestTensorCheck1d(tensor1x1, layout(ck::make_tuple(ck::make_tuple(1, 1), 1))));
EXPECT_TRUE(TestTensorCheck1d(tensor1x1));
auto tensor2 = tensor(ck::make_tuple(1, 1), ck::wrapper::slice(0, 2));
EXPECT_EQ(tensor2(0), layout(ck::make_tuple(ck::make_tuple(1, 1), 0)));
EXPECT_EQ(ck::wrapper::rank(tensor2), 1);
EXPECT_EQ(ck::wrapper::depth(tensor2), 1);
EXPECT_EQ(ck::wrapper::size(tensor2), 2);
EXPECT_TRUE(TestTensorCheck1d(tensor2, layout(ck::make_tuple(ck::make_tuple(1, 1), 0))));
EXPECT_TRUE(TestTensorCheck1d(tensor2));
auto tensor2_v2 = tensor(2, ck::wrapper::slice(0, 2));
EXPECT_EQ(tensor2_v2(0), layout(ck::make_tuple(2, 0)));
EXPECT_EQ(ck::wrapper::rank(tensor2_v2), 1);
EXPECT_EQ(ck::wrapper::depth(tensor2_v2), 1);
EXPECT_EQ(ck::wrapper::size(tensor2_v2), 2);
EXPECT_TRUE(TestTensorCheck1d(tensor2_v2));
// negative indexing
auto tensor1x2 = tensor(ck::make_tuple(1, ck::wrapper::slice(0, -2)), ck::wrapper::slice());
EXPECT_EQ(tensor1x2(0), layout(ck::make_tuple(ck::make_tuple(1, 0), 0)));
EXPECT_EQ(rank(tensor1x2), 2);
EXPECT_EQ(depth(tensor1x2), 2);
EXPECT_EQ(size(tensor1x2), 2);
EXPECT_TRUE(TestTensorCheck1d(tensor1x2, layout(ck::make_tuple(ck::make_tuple(1, 0), 0))));
EXPECT_TRUE(TestTensorCheck1d(tensor1x2));
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment