Commit 036c5234 authored by Adam Osewski's avatar Adam Osewski
Browse files

Merge remote-tracking branch 'origin/develop' into aosewski/ggemm_multi_d2

parents 22995e9a 7843a8a7
......@@ -7,9 +7,10 @@
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "test_gemm_universal_util.hpp"
using F8 = ck::f8_t;
using F16 = ck::half_t;
using F32 = float;
using F8 = ck::f8_t;
using F16 = ck::half_t;
using BF16 = ck::bhalf_t;
using F32 = float;
using Row = ck::tensor_layout::gemm::RowMajor;
using Col = ck::tensor_layout::gemm::ColumnMajor;
......@@ -44,7 +45,8 @@ using KernelTypes = ::testing::Types<
// ADataType, BDataType, CDataType
std::tuple< F16, F16, F16>,
std::tuple< F16, F8, F16>,
std::tuple< F8, F16, F16>
std::tuple< F8, F16, F16>,
std::tuple< BF16, BF16, BF16>
>;
// clang-format on
......
......@@ -264,5 +264,7 @@ TYPED_TEST(TestGroupedConvndBwdWeight3d, Test3D)
{3, 1, 1, 64, 3, {3, 3, 3}, {32, 32, 32}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}});
this->conv_params.push_back(
{3, 1, 1, 1, 1, {3, 3, 3}, {32, 32, 32}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}});
this->conv_params.push_back(
{3, 1, 1, 4, 4, {3, 3, 3}, {14, 28, 28}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}});
this->Run();
}
......@@ -55,14 +55,14 @@ class TestGroupedConvndBwdWeight : public ::testing::Test
}
}
if(ck::is_navi3_supported())
if(ck::is_gfx11_supported())
{
// on navi3x only support for 3d is implemented
// on gfx11 only support for 3d is implemented
if constexpr(NDimSpatial{} != 3)
{
return true;
}
// on navi3x only support for i8 and fp16 is implemented
// on gfx11 only support for i8 and fp16 is implemented
if constexpr(!((std::is_same_v<InDataType, int8_t> &&
std::is_same_v<WeiDataType, int8_t> &&
std::is_same_v<OutDataType, int8_t>) ||
......@@ -80,7 +80,7 @@ class TestGroupedConvndBwdWeight : public ::testing::Test
}
else
{
// support for i8 is only implemented on navi3x
// support for i8 is only implemented on gfx11
if constexpr(std::is_same_v<InDataType, int8_t> &&
std::is_same_v<WeiDataType, int8_t> && std::is_same_v<OutDataType, int8_t>)
{
......
add_custom_target(test_normalization_bwd_data)
add_gtest_executable(test_layernorm2d_bwd_data_fp32 test_layernorm2d_bwd_data_fp32.cpp)
target_link_libraries(test_layernorm2d_bwd_data_fp32 PRIVATE utility device_normalization_bwd_data_instance)
add_dependencies(test_normalization_bwd_data test_layernorm2d_bwd_data_fp32)
if (result EQUAL 0)
target_link_libraries(test_layernorm2d_bwd_data_fp32 PRIVATE utility device_normalization_bwd_data_instance)
add_dependencies(test_normalization_bwd_data test_layernorm2d_bwd_data_fp32)
endif()
add_gtest_executable(test_groupnorm_bwd_data_fp32 test_groupnorm_bwd_data_fp32.cpp)
target_link_libraries(test_groupnorm_bwd_data_fp32 PRIVATE utility device_normalization_bwd_data_instance)
add_dependencies(test_normalization_bwd_data test_groupnorm_bwd_data_fp32)
if (result EQUAL 0)
target_link_libraries(test_groupnorm_bwd_data_fp32 PRIVATE utility device_normalization_bwd_data_instance)
add_dependencies(test_normalization_bwd_data test_groupnorm_bwd_data_fp32)
endif()
add_custom_target(test_normalization_bwd_gamma_beta)
add_gtest_executable(test_layernorm2d_bwd_gamma_beta_fp32 test_layernorm2d_bwd_gamma_beta_fp32.cpp)
target_link_libraries(test_layernorm2d_bwd_gamma_beta_fp32 PRIVATE utility device_normalization_bwd_gamma_beta_instance)
add_dependencies(test_normalization_bwd_gamma_beta test_layernorm2d_bwd_gamma_beta_fp32)
if (result EQUAL 0)
target_link_libraries(test_layernorm2d_bwd_gamma_beta_fp32 PRIVATE utility device_normalization_bwd_gamma_beta_instance)
add_dependencies(test_normalization_bwd_gamma_beta test_layernorm2d_bwd_gamma_beta_fp32)
endif()
add_gtest_executable(test_groupnorm_bwd_gamma_beta_fp32 test_groupnorm_bwd_gamma_beta_fp32.cpp)
target_link_libraries(test_groupnorm_bwd_gamma_beta_fp32 PRIVATE utility device_normalization_bwd_gamma_beta_instance)
add_dependencies(test_normalization_bwd_gamma_beta test_groupnorm_bwd_gamma_beta_fp32)
if (result EQUAL 0)
target_link_libraries(test_groupnorm_bwd_gamma_beta_fp32 PRIVATE utility device_normalization_bwd_gamma_beta_instance)
add_dependencies(test_normalization_bwd_gamma_beta test_groupnorm_bwd_gamma_beta_fp32)
endif()
\ No newline at end of file
add_test_executable(test_position_embedding position_embedding.cpp)
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
#include <vector>
#include <iostream>
#include <numeric>
#include <cassert>
#include "ck_tile/core.hpp"
#include "ck_tile/ops/fmha.hpp"
#ifndef TEST_ALIBI_VERBOSE
#define TEST_ALIBI_VERBOSE 0
#endif
template <typename DataType>
struct attention_score
{
ck_tile::index_t rows, cols;
std::vector<DataType> pixels;
attention_score(ck_tile::index_t rows_,
ck_tile::index_t cols_,
DataType init_v_ = static_cast<DataType>(0))
: rows(rows_), cols(cols_), pixels(rows_ * cols_, init_v_)
{
}
auto& operator()(ck_tile::index_t i_row, ck_tile::index_t i_col)
{
return pixels[i_row * cols + i_col];
}
void print()
{
for(auto i_row = 0; i_row < rows; i_row++)
{
for(auto i_col = 0; i_col < cols; i_col++)
{
std::cout << pixels[i_row * cols + i_col] << " ";
}
std::cout << std::endl;
}
}
};
template <bool RowMajor, typename DataType>
void alibi_traverse_with_slope(attention_score<DataType>& score,
DataType slope,
ck_tile::AlibiMode mode = ck_tile::AlibiMode::VERTICAL)
{
using Alibi = ck_tile::Alibi<DataType, RowMajor>;
auto alibi = Alibi{slope, score.rows, score.cols, mode};
for(ck_tile::index_t i_row = 0; i_row < score.rows; i_row++)
{
for(ck_tile::index_t i_col = 0; i_col < score.cols; i_col++)
{
alibi.update(score(i_row, i_col), i_row, i_col);
}
}
}
std::string alibi_mode_to_str(ck_tile::AlibiMode mode)
{
if(mode == ck_tile::AlibiMode::VERTICAL)
return std::string("alibi_verti");
else if(mode == ck_tile::AlibiMode::FROM_TOP_LEFT)
return std::string("alibi_top-l");
else if(mode == ck_tile::AlibiMode::FROM_BOTTOM_RIGHT)
return std::string("alibi_bot-r");
return "";
}
template <bool RowMajor, typename DataType>
bool test_alibi_traverse_with_slope(ck_tile::index_t rows,
ck_tile::index_t cols,
DataType slope,
ck_tile::AlibiMode mode,
const std::vector<DataType>& expected)
{
attention_score<DataType> score{rows, cols};
alibi_traverse_with_slope<RowMajor, DataType>(score, slope, mode);
bool is_match = std::equal(score.pixels.begin(), score.pixels.end(), expected.begin());
#if TEST_ALIBI_VERBOSE
std::cout << "---------" << alibi_mode_to_str(mode) << ", " << rows << "x" << cols << "("
<< (RowMajor ? "row_major" : "col_major") << ")"
<< (is_match ? ", valie:y" : ", valid:n") << std::endl;
score.print();
#endif
return is_match;
}
template <typename DataType>
bool test_alibi_slope_generation(ck_tile::index_t nheads, const std::vector<DataType>& expected)
{
auto slopes = ck_tile::get_alibi_slopes<DataType>(nheads);
bool is_match = std::equal(slopes.begin(),
slopes.end(),
expected.begin(),
expected.end(),
[](const DataType& lhs, const DataType& rhs) {
constexpr float rtol = 1e-6;
auto error = std::abs(lhs - rhs);
return error < rtol * std::abs(rhs);
});
#if TEST_ALIBI_VERBOSE
std::cout << "-------------------- slopes " << nheads << ", " << (is_match ? "y" : "n")
<< std::endl;
for(ck_tile::index_t i = 0; i < nheads; i++)
{
std::cout << slopes[i] << " ";
}
std::cout << std::endl;
#endif
return is_match;
}
int main()
{
using dtype = int32_t;
dtype slope = static_cast<dtype>(1);
bool rtn = true;
// clang-format off
rtn &= test_alibi_traverse_with_slope<true, dtype>(4, 6, slope, ck_tile::AlibiMode::VERTICAL, {0, 1, 2, 3, 4, 5,
0, 1, 2, 3, 4, 5,
0, 1, 2, 3, 4, 5,
0, 1, 2, 3, 4, 5});
rtn &= test_alibi_traverse_with_slope<true, dtype>(4, 6, slope, ck_tile::AlibiMode::FROM_TOP_LEFT, {0, 1, 2, 3, 4, 5,
1, 0, 1, 2, 3, 4,
2, 1, 0, 1, 2, 3,
3, 2, 1, 0, 1, 2});
rtn &= test_alibi_traverse_with_slope<true, dtype>(6, 4, slope, ck_tile::AlibiMode::FROM_TOP_LEFT, {0, 1, 2, 3,
1, 0, 1, 2,
2, 1, 0, 1,
3, 2, 1, 0,
4, 3, 2, 1,
5, 4, 3, 2});
rtn &= test_alibi_traverse_with_slope<true, dtype>(3, 3, slope, ck_tile::AlibiMode::FROM_TOP_LEFT, {0, 1, 2,
1, 0, 1,
2, 1, 0});
rtn &= test_alibi_traverse_with_slope<true, dtype>(4, 6, slope, ck_tile::AlibiMode::FROM_BOTTOM_RIGHT, {2, 1, 0, 1, 2, 3,
3, 2, 1, 0, 1, 2,
4, 3, 2, 1, 0, 1,
5, 4, 3, 2, 1, 0});
rtn &= test_alibi_traverse_with_slope<true, dtype>(6, 4, slope, ck_tile::AlibiMode::FROM_BOTTOM_RIGHT, {2, 3, 4, 5,
1, 2, 3, 4,
0, 1, 2, 3,
1, 0, 1, 2,
2, 1, 0, 1,
3, 2, 1, 0});
rtn &= test_alibi_traverse_with_slope<true, dtype>(3, 3, slope, ck_tile::AlibiMode::FROM_BOTTOM_RIGHT, {0, 1, 2,
1, 0, 1,
2, 1, 0});
rtn &= test_alibi_traverse_with_slope<false, dtype>(4, 6, slope, ck_tile::AlibiMode::VERTICAL, {0, 1, 2, 3, 4, 5,
0, 1, 2, 3, 4, 5,
0, 1, 2, 3, 4, 5,
0, 1, 2, 3, 4, 5});
rtn &= test_alibi_traverse_with_slope<false, dtype>(4, 6, slope, ck_tile::AlibiMode::FROM_TOP_LEFT, {0, 1, 2, 3, 4, 5,
1, 0, 1, 2, 3, 4,
2, 1, 0, 1, 2, 3,
3, 2, 1, 0, 1, 2});
rtn &= test_alibi_traverse_with_slope<false, dtype>(6, 4, slope, ck_tile::AlibiMode::FROM_TOP_LEFT, {0, 1, 2, 3,
1, 0, 1, 2,
2, 1, 0, 1,
3, 2, 1, 0,
4, 3, 2, 1,
5, 4, 3, 2});
rtn &= test_alibi_traverse_with_slope<false, dtype>(3, 3, slope, ck_tile::AlibiMode::FROM_TOP_LEFT, {0, 1, 2,
1, 0, 1,
2, 1, 0});
rtn &= test_alibi_traverse_with_slope<false, dtype>(4, 6, slope, ck_tile::AlibiMode::FROM_BOTTOM_RIGHT, {2, 1, 0, 1, 2, 3,
3, 2, 1, 0, 1, 2,
4, 3, 2, 1, 0, 1,
5, 4, 3, 2, 1, 0});
rtn &= test_alibi_traverse_with_slope<false, dtype>(6, 4, slope, ck_tile::AlibiMode::FROM_BOTTOM_RIGHT, {2, 3, 4, 5,
1, 2, 3, 4,
0, 1, 2, 3,
1, 0, 1, 2,
2, 1, 0, 1,
3, 2, 1, 0});
rtn &= test_alibi_traverse_with_slope<false, dtype>(3, 3, slope, ck_tile::AlibiMode::FROM_BOTTOM_RIGHT, {0, 1, 2,
1, 0, 1,
2, 1, 0});
rtn &= test_alibi_slope_generation<float>(8, {0.5, 0.25, 0.125, 0.0625, 0.03125, 0.015625, 0.0078125, 0.00390625});
rtn &= test_alibi_slope_generation<float>(16, {0.7071067811865476, 0.5, 0.35355339059327384, 0.25000000000000006, 0.17677669529663692,
0.12500000000000006, 0.08838834764831849, 0.06250000000000004, 0.044194173824159244,
0.03125000000000002, 0.022097086912079626, 0.01562500000000001, 0.011048543456039816,
0.007812500000000007, 0.005524271728019908, 0.003906250000000004});
rtn &= test_alibi_slope_generation<float>(1, {0.00390625});
rtn &= test_alibi_slope_generation<float>(5, {0.25, 0.0625, 0.015625, 0.00390625, 0.5});
rtn &= test_alibi_slope_generation<float>(6, {0.25, 0.0625, 0.015625, 0.00390625, 0.5, 0.125});
rtn &= test_alibi_slope_generation<float>(7, {0.25, 0.0625, 0.015625, 0.00390625, 0.5, 0.125, 0.03125});
rtn &= test_alibi_slope_generation<float>(9, {0.5, 0.25, 0.125, 0.0625, 0.03125, 0.015625, 0.0078125, 0.00390625, 0.7071067811865476});
// clang-format on
return rtn ? 0 : -1;
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment