Unverified Commit b4e77c8c authored by Bartlomiej Wroblewski's avatar Bartlomiej Wroblewski Committed by GitHub
Browse files

Merge branch 'develop' into bwroblew/contraction_mixed_dt

parents ac56377a 271ef645
# Change Log for Composable Kernel
# Changelog for Composable Kernel
Full documentation for Composable Kernel is not yet available.
## (Unreleased) CK for ROCm 6.0.0
### Fixed
### Fixes
- Fixed a hazard associated with inline v_dot (#808)
- Fixed two bugs in grouped convolution backward data without K padding (#848 #876)
### Optimizations
None
### Added
- Added image to column (#867) and column to image kernels (#930).
### Additions
- Added an image to a column kernel (#867)
- Added a column to an image kernel (#930)
- Support for 3D grouped convolution forward on RDNA 3 GPUs (#935)
- Grouped convolution support for small K and C (#822 #879 #897)
- Support for NHWGC (2D and 3D) grouped convolution backward weight (#769 #804)
- Support for bf16/f32/f16 and NHWGC (2D and 3d) grouped convolution backward data (#757 #799)
- Support for Batched Gemm DL (#732)
### Changed
### Changes
- Changed the grouped convolution API to maintain consistency with other convolution kernels (#817)
## CK 0.2.0 for ROCm 5.7.0
## CK 0.2.0 for ROCm 5.5.0
### Fixed
- Fixed a bug in 6-dimensional kernels (#555).
- Fixed grouped ConvBwdWeight test case failure (#524).
### Fixes
- Fixed a bug in 6-dimensional kernels (#555)
- Fixed a test case failure with grouped convolution backward weight (#524)
### Optimizations
- Improve proformance of normalization kernel
### Added
- Added new cmake flag "DL_KERNELS" must be set to "ON" in order to build the gemm_dl and batched_gemm_multi_d_dl instances.
- Added new cmake flag "DTYPES" which could be set to any subset of "fp64;fp32;fp16;fp8;bf16;int8" to build instance of select data types.
- Added new cmake flag "INSTANCES_ONLY" which will only build CK library and instances without the tests, examples, or profiler.
- Added new feature: if GPU_TARGETS is not set on cmake command line, CK will be built for all targets supported by compiler.
- Added support on MI300A/MI300X.
- Added support on NAVI3x.
- Added user tutorial (#563).
- Added more instances for irregular GEMM sizes (#560).
- Added inter-wave consumer-producer programming model for GEMM kernels (#310).
- Added multi-D GEMM client APIs (#534).
- Added multi-embeddings support (#542).
- Added Navi3x blockwise GEMM and real GEMM support (#541).
- Added Navi grouped ConvBwdWeight support (#505).
- Added MaxPool, AvgPool forward (#815).
- Added MaxPool backward (#750).
### Changed
- Improved the performance of the normalization kernel
### Additions
- New CMake flags:
- "DL_KERNELS"-- Must be set to "ON" in order to build the gemm_dl and batched_gemm_multi_d_dl instances
- "DTYPES" -- Can be set to any subset of "fp64;fp32;fp16;fp8;bf16;int8" to build an instance of the specified data types
- "INSTANCES_ONLY" -- Only builds CK library and instances without tests, examples, or profiler
- New feature: if GPU_TARGETS is not set in the CMake command line, CK will be built for all targets supported by the compiler
- Support for MI300A/MI300X
- Support for AMD RDNA 3
- New user tutorial (#563)
- Additional instances for irregular GEMM sizes (#560)
- New inter-wave consumer-producer programming model for GEMM kernels (#310)
- GEMM with support multiple elementwise fusions (multi-D) (#534)
- Multi-embeddings support (#542)
- AMD RDNA 3 blockwise GEMM and real GEMM support (#541)
- AMD RDNA grouped convolution backward weight support (#505)
- MaxPool and AvgPool forward (#815); MaxPool backward (#750)
### Changes
None
......@@ -34,6 +34,7 @@ float launch_and_time_kernel(const StreamConfig& stream_config,
#endif
// warm up
kernel<<<grid_dim, block_dim, lds_byte, stream_config.stream_id_>>>(args...);
hip_check_error(hipGetLastError());
const int nrepeat = 10;
#if DEBUG_LOG
......@@ -50,6 +51,7 @@ float launch_and_time_kernel(const StreamConfig& stream_config,
for(int i = 0; i < nrepeat; ++i)
{
kernel<<<grid_dim, block_dim, lds_byte, stream_config.stream_id_>>>(args...);
hip_check_error(hipGetLastError());
}
hip_check_error(hipEventRecord(stop, stream_config.stream_id_));
......@@ -64,11 +66,13 @@ float launch_and_time_kernel(const StreamConfig& stream_config,
else
{
kernel<<<grid_dim, block_dim, lds_byte, stream_config.stream_id_>>>(args...);
hip_check_error(hipGetLastError());
return 0;
}
#else
kernel<<<grid_dim, block_dim, lds_byte, stream_config.stream_id_>>>(args...);
hip_check_error(hipGetLastError());
return 0;
#endif
......@@ -101,6 +105,7 @@ float launch_and_time_kernel_with_preprocess(const StreamConfig& stream_config,
// warm up
preprocess();
kernel<<<grid_dim, block_dim, lds_byte, stream_config.stream_id_>>>(args...);
hip_check_error(hipGetLastError());
const int nrepeat = 10;
#if DEBUG_LOG
......@@ -118,6 +123,7 @@ float launch_and_time_kernel_with_preprocess(const StreamConfig& stream_config,
{
preprocess();
kernel<<<grid_dim, block_dim, lds_byte, stream_config.stream_id_>>>(args...);
hip_check_error(hipGetLastError());
}
hip_check_error(hipEventRecord(stop, stream_config.stream_id_));
......@@ -133,11 +139,13 @@ float launch_and_time_kernel_with_preprocess(const StreamConfig& stream_config,
{
preprocess();
kernel<<<grid_dim, block_dim, lds_byte, stream_config.stream_id_>>>(args...);
hip_check_error(hipGetLastError());
return 0;
}
#else
kernel<<<grid_dim, block_dim, lds_byte, stream_config.stream_id_>>>(args...);
hip_check_error(hipGetLastError());
return 0;
#endif
......
......@@ -9,8 +9,10 @@
namespace ck {
// Convert X to Y
template <typename Y, typename X>
// Convert X to Y, both X and Y are non-const data types.
template <typename Y,
typename X,
std::enable_if_t<!(std::is_const_v<Y> || std::is_const_v<X>), bool> = false>
__host__ __device__ constexpr Y type_convert(X x)
{
static_assert(!std::is_reference_v<Y> && !std::is_reference_v<X>);
......@@ -18,6 +20,19 @@ __host__ __device__ constexpr Y type_convert(X x)
return static_cast<Y>(x);
}
// Convert X to Y, either X or Y is a const data type.
template <typename Y,
typename X,
std::enable_if_t<std::is_const_v<Y> || std::is_const_v<X>, bool> = false>
__host__ __device__ constexpr Y type_convert(X x)
{
static_assert(!std::is_reference_v<Y> && !std::is_reference_v<X>);
using NonConstY = std::remove_const_t<Y>;
using NonConstX = std::remove_const_t<X>;
return static_cast<Y>(type_convert<NonConstY, NonConstX>(x));
}
// convert bfp16 to fp32
template <>
inline __host__ __device__ constexpr float type_convert<float, bhalf_t>(bhalf_t x)
......
......@@ -13,3 +13,5 @@ add_gtest_executable(test_bf8 bf8.cpp)
if(result EQUAL 0)
target_link_libraries(test_bf8 PRIVATE utility)
endif()
add_gtest_executable(test_type_convert_const type_convert_const.cpp)
// SPDX-License-Identifier: MIT
// Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.
#include "gtest/gtest.h"
#include "ck/utility/data_type.hpp"
#include "ck/utility/type_convert.hpp"
using ck::bhalf_t;
using ck::type_convert;
TEST(TypeConvertConst, ConvertToConst)
{
constexpr float bf16_epsilon = 0.0078125;
constexpr float rel_tol = 2 * bf16_epsilon;
const std::vector<float> cases = {0.0, -123.f, 3.981323f, 0.2429f};
for(float x : cases)
{
const float abs_tol = std::abs(rel_tol * x);
{
bhalf_t y = type_convert<bhalf_t>(x);
// Test non-const bhalf to const float.
const float y_float = type_convert<const float>(y);
ASSERT_NEAR(y_float, x, abs_tol);
}
{
// Test non-const float to const bhalf.
const bhalf_t y = type_convert<const bhalf_t>(x);
// Remove the constness manually to not rely on const casts anymore since the
// possible issue could hide after two casts.
bhalf_t& y_nonconst = const_cast<bhalf_t&>(y);
float y_float = type_convert<float>(y_nonconst);
ASSERT_NEAR(y_float, x, abs_tol);
}
}
}
TEST(TypeConvertConst, ConvertFromConst)
{
constexpr float bf16_epsilon = 0.0078125;
constexpr float rel_tol = 2 * bf16_epsilon;
const std::vector<float> cases = {0.0, -123.f, 3.981323f, 0.2429f};
for(const float x : cases)
{
const float abs_tol = std::abs(rel_tol * x);
{
// Test const float to const bhalf_t.
const bhalf_t y = type_convert<const bhalf_t>(x);
// Remove the constness manually to not rely on const casts anymore since the
// possible issue could hide after two casts.
bhalf_t& y_nonconst = const_cast<bhalf_t&>(y);
float y_float = type_convert<float>(y_nonconst);
ASSERT_NEAR(y_float, x, abs_tol);
}
{
// Test const float to non-const bhalf.
bhalf_t y = type_convert<bhalf_t>(x);
float y_float = type_convert<float>(y);
ASSERT_NEAR(y_float, x, abs_tol);
}
{
const bhalf_t y = type_convert<const bhalf_t>(x);
// Test const bhalf to non-const float.
float y_float = type_convert<float>(y);
ASSERT_NEAR(y_float, x, abs_tol);
}
// Tests with full type specializations for X.
{
// Test const float to const bhalf_t.
const bhalf_t y = type_convert<const bhalf_t, const float>(x);
// Remove the constness manually to not rely on const casts anymore since the
// possible issue could hide after two casts.
bhalf_t& y_nonconst = const_cast<bhalf_t&>(y);
float y_float = type_convert<float>(y_nonconst);
ASSERT_NEAR(y_float, x, abs_tol);
}
{
// Test const float to non-const bhalf.
bhalf_t y = type_convert<bhalf_t, const float>(x);
float y_float = type_convert<float>(y);
ASSERT_NEAR(y_float, x, abs_tol);
}
{
const bhalf_t y = type_convert<const bhalf_t, const float>(x);
// Test const bhalf to non-const float.
float y_float = type_convert<float, const bhalf_t>(y);
ASSERT_NEAR(y_float, x, abs_tol);
}
}
}
......@@ -2,7 +2,7 @@
TYPED_TEST(TestGemmSplitK_MK_KN, SmallM)
{
std::vector<int> Ms{0, 1, 2, 3, 4, 5, 6};
std::vector<int> Ms{1, 2, 3, 4, 5, 6};
constexpr int N = 512;
constexpr int K = 320;
......@@ -16,7 +16,7 @@ TYPED_TEST(TestGemmSplitK_MK_KN, SmallM)
TYPED_TEST(TestGemmSplitK_MK_NK, SmallM)
{
std::vector<int> Ms{0, 1, 2, 3, 4, 5, 6};
std::vector<int> Ms{1, 2, 3, 4, 5, 6};
constexpr int N = 512;
constexpr int K = 320;
......@@ -30,7 +30,7 @@ TYPED_TEST(TestGemmSplitK_MK_NK, SmallM)
TYPED_TEST(TestGemmSplitK_KM_KN, SmallM)
{
std::vector<int> Ms{0, 1, 2, 3, 4, 5, 6};
std::vector<int> Ms{1, 2, 3, 4, 5, 6};
constexpr int N = 512;
constexpr int K = 320;
......@@ -43,7 +43,7 @@ TYPED_TEST(TestGemmSplitK_KM_KN, SmallM)
TYPED_TEST(TestGemmSplitK_KM_NK, SmallM)
{
std::vector<int> Ms{0, 1, 2, 3, 4, 5, 6};
std::vector<int> Ms{1, 2, 3, 4, 5, 6};
constexpr int N = 512;
constexpr int K = 320;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment