test_partition.cpp 4.95 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
// SPDX-License-Identifier: MIT
// Copyright (c) 2023-2024, Advanced Micro Devices, Inc. All rights reserved.

#include <numeric>
#include <cstdlib>
#include <iostream>
#include <initializer_list>
#include <vector>
#include <gtest/gtest.h>

#include "ck/host_utility/kernel_launch.hpp"
#include "ck/library/utility/device_memory.hpp"
#include "ck/library/utility/check_err.hpp"
#include "ck/utility/common_header.hpp"
#include "ck/wrapper/layout.hpp"
#include "ck/wrapper/tensor.hpp"

TEST(TestPartition, LocalPartition)
{
    const auto shape =
        ck::make_tuple(ck::make_tuple(ck::Number<16>{}, ck::Number<4>{}), ck::Number<4>{});
    const auto strides =
        ck::make_tuple(ck::make_tuple(ck::Number<1>{}, ck::Number<16>{}), ck::Number<64>{});
    const auto layout = ck::wrapper::make_layout(shape, strides);

    std::vector<ck::index_t> data(ck::wrapper::size(layout));
    std::iota(data.begin(), data.end(), 0);

    const auto tensor =
        ck::wrapper::make_tensor<ck::wrapper::MemoryTypeEnum::Generic>(data.data(), layout);

32
33
34
35
36
37
    const auto thread_steps  = ck::make_tuple(ck::Number<1>{}, ck::Number<8>{}, ck::Number<1>{});
    const auto thread_layout = ck::make_tuple(ck::Number<4>{}, ck::Number<8>{}, ck::Number<1>{});
    // 3d partition on 2d shape (calculate partition on 3d thread layout, and then skip first dim)
    const auto thread_projection =
        ck::make_tuple(ck::wrapper::slice(4), ck::Number<1>{}, ck::Number<1>{});
    constexpr ck::index_t projection_thread_length = ck::Number<4>{};
38

39
40
41
    for(ck::index_t thread_id = 0;
        thread_id < ck::wrapper::size(thread_layout) / projection_thread_length;
        thread_id++)
42
43
    {
        const auto packed_partition =
44
            ck::wrapper::make_local_partition(tensor, thread_layout, thread_id, thread_projection);
45
46

        const auto expected_partition_size =
47
48
49
            ck::wrapper::size(tensor) /
            (ck::wrapper::size(thread_layout) / projection_thread_length);
        const auto expected_partition_first_val  = thread_id * ck::wrapper::size<1>(thread_steps);
50
        const auto expected_partition_second_val = expected_partition_first_val + 1;
51
52
        EXPECT_EQ(ck::wrapper::size(packed_partition), expected_partition_size);
        EXPECT_EQ(packed_partition(0), expected_partition_first_val);
53
        EXPECT_EQ(packed_partition(1), expected_partition_second_val);
54
55
56
57
58
    }
}

TEST(TestPartition, LocalTile)
{
59
60
61
    const auto shape   = ck::make_tuple(ck::Number<16>{}, ck::Number<4>{}, ck::Number<4>{});
    const auto strides = ck::make_tuple(ck::Number<1>{}, ck::Number<16>{}, ck::Number<64>{});
    const auto layout  = ck::wrapper::make_layout(shape, strides);
62
63
64
65
66
67

    std::vector<ck::index_t> data(ck::wrapper::size(layout));
    std::iota(data.begin(), data.end(), 0);

    const auto tensor =
        ck::wrapper::make_tensor<ck::wrapper::MemoryTypeEnum::Generic>(data.data(), layout);
68
69
70
71
72
73
    // 4d tile partitioning on 3d shape (calculate tile on 4d tile layout, and then skip last dim)
    const auto block_shape =
        ck::make_tuple(ck::Number<2>{}, ck::Number<4>{}, ck::Number<2>{}, ck::Number<2>{});
    const auto block_projection =
        ck::make_tuple(ck::Number<1>{}, ck::Number<1>{}, ck::Number<1>{}, ck::wrapper::slice(2));
    constexpr ck::index_t projection_block_dim = ck::Number<2>{};
74
75
76
77
78
79
    const auto num_blocks =
        ck::make_tuple(ck::wrapper::size<0>(shape) / ck::wrapper::size<0>(block_shape),
                       ck::wrapper::size<1>(shape) / ck::wrapper::size<1>(block_shape),
                       ck::wrapper::size<2>(shape) / ck::wrapper::size<2>(block_shape));
    std::vector<ck::index_t> block_idxs(ck::wrapper::size(num_blocks));
    std::iota(block_idxs.begin(), block_idxs.end(), 0);
80

81
    for(auto block_idx : block_idxs)
82
    {
83
84
        const auto packed_tile =
            ck::wrapper::make_local_tile(tensor, block_shape, block_idx, block_projection);
85

86
        const auto expected_tile_size = ck::wrapper::size(block_shape) / projection_block_dim;
87
88
89
90
91
92
93
94
95
96
97
98
99
        auto expected_tile_first_val  = (block_idx % ck::wrapper::size<2>(num_blocks)) *
                                       ck::wrapper::size<2>(block_shape) *
                                       ck::wrapper::size<2>(strides);
        block_idx /= ck::wrapper::size<2>(num_blocks);
        expected_tile_first_val += (block_idx % ck::wrapper::size<1>(num_blocks)) *
                                   ck::wrapper::size<1>(block_shape) *
                                   ck::wrapper::size<1>(strides);
        block_idx /= ck::wrapper::size<1>(num_blocks);
        expected_tile_first_val += (block_idx % ck::wrapper::size<0>(num_blocks)) *
                                   ck::wrapper::size<0>(block_shape) *
                                   ck::wrapper::size<0>(strides);

        const auto expected_tile_second_val = expected_tile_first_val + 1;
100
101
        EXPECT_EQ(ck::wrapper::size(packed_tile), expected_tile_size);
        EXPECT_EQ(packed_tile(0), expected_tile_first_val);
102
        EXPECT_EQ(packed_tile(1), expected_tile_second_val);
103
104
    }
}