Commit 2732d06c authored by rocking's avatar rocking
Browse files

Merge commit '75891161' into gemm_layernorm_welford

parents dd0255ba 75891161
...@@ -26,6 +26,7 @@ add_subdirectory(02_gemm_bilinear) ...@@ -26,6 +26,7 @@ add_subdirectory(02_gemm_bilinear)
add_subdirectory(03_gemm_bias_relu) add_subdirectory(03_gemm_bias_relu)
add_subdirectory(04_gemm_add_add_fastgelu) add_subdirectory(04_gemm_add_add_fastgelu)
add_subdirectory(09_convnd_fwd) add_subdirectory(09_convnd_fwd)
add_subdirectory(10_convnd_fwd_multiple_d_multiple_reduce)
add_subdirectory(12_reduce) add_subdirectory(12_reduce)
add_subdirectory(13_pool2d_fwd) add_subdirectory(13_pool2d_fwd)
add_subdirectory(14_gemm_xdl_requant_relu_requant) add_subdirectory(14_gemm_xdl_requant_relu_requant)
...@@ -50,3 +51,4 @@ add_subdirectory(32_batched_gemm_scale_softmax_gemm) ...@@ -50,3 +51,4 @@ add_subdirectory(32_batched_gemm_scale_softmax_gemm)
add_subdirectory(33_multiple_reduce) add_subdirectory(33_multiple_reduce)
add_subdirectory(34_batchnorm) add_subdirectory(34_batchnorm)
add_subdirectory(35_splitK_gemm) add_subdirectory(35_splitK_gemm)
add_subdirectory(41_grouped_conv_conv_fwd)
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#include "ck/tensor_operation/gpu/grid/gridwise_batched_gemm_gemm_xdl_cshuffle_v1.hpp" #include "ck/tensor_operation/gpu/grid/gridwise_batched_gemm_gemm_xdl_cshuffle_v1.hpp"
#include "ck/host_utility/device_prop.hpp" #include "ck/host_utility/device_prop.hpp"
#include "ck/host_utility/kernel_launch.hpp" #include "ck/host_utility/kernel_launch.hpp"
#include "ck/host_utility/io.hpp"
namespace ck { namespace ck {
namespace tensor_operation { namespace tensor_operation {
...@@ -464,6 +465,14 @@ struct DeviceBatchedGemmGemm_Xdl_CShuffle : public DeviceBatchedGemmGemm<ALayout ...@@ -464,6 +465,14 @@ struct DeviceBatchedGemmGemm_Xdl_CShuffle : public DeviceBatchedGemmGemm<ALayout
} }
} }
void Print() const
{
std::cout << "A[AK0, M, AK1]: " << a_grid_desc_ak0_m_ak1_ << std::endl;
std::cout << "B0[BK0, N, BK1]: " << b_grid_desc_bk0_n_bk1_ << std::endl;
std::cout << "B1[BK0, N, BK1]: " << b1_grid_desc_bk0_n_bk1_ << std::endl;
std::cout << "C[M, N]: " << c_grid_desc_m_n_ << std::endl;
}
// private: // private:
const ADataType* p_a_grid_; const ADataType* p_a_grid_;
const BDataType* p_b_grid_; const BDataType* p_b_grid_;
......
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#include "ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_d_xdl_cshuffle.hpp" #include "ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_d_xdl_cshuffle.hpp"
#include "ck/host_utility/device_prop.hpp" #include "ck/host_utility/device_prop.hpp"
#include "ck/host_utility/kernel_launch.hpp" #include "ck/host_utility/kernel_launch.hpp"
#include "ck/host_utility/io.hpp"
namespace ck { namespace ck {
namespace tensor_operation { namespace tensor_operation {
......
...@@ -111,6 +111,15 @@ __global__ void ...@@ -111,6 +111,15 @@ __global__ void
// Computes C = A * B0 * B1 // Computes C = A * B0 * B1
// ^^^^^^ (Acc0) // ^^^^^^ (Acc0)
// ^^^^^^^^^^^ (Acc1) // ^^^^^^^^^^^ (Acc1)
// When using NPadding as GemmSpecialization, AccElementwiseOperation should be set to
// ScaleAndResetNaNToMinusInfinity.
// if !isNan(AccElement)
// AccElement *= scale
// else
// AccElement = -INFINITY
// Otherwise, result may be wrong.
template <typename ALayout, template <typename ALayout,
typename BLayout, // B0Layout typename BLayout, // B0Layout
typename B1Layout, typename B1Layout,
......
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
#pragma once #pragma once
#include <vector> #include <array>
#include "ck/tensor_operation/gpu/device/device_base.hpp" #include "ck/tensor_operation/gpu/device/device_base.hpp"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment