Commit 54f914c6 authored by Astha Rai's avatar Astha Rai
Browse files

added header guards/replicated functionality in device files

parent 7d9969ab
...@@ -3,7 +3,9 @@ ...@@ -3,7 +3,9 @@
#pragma once #pragma once
#ifndef CK_CODE_GEN_RTC
#include <string> #include <string>
#endif
namespace ck { namespace ck {
namespace tensor_operation { namespace tensor_operation {
...@@ -18,6 +20,7 @@ enum struct ConvolutionForwardSpecialization ...@@ -18,6 +20,7 @@ enum struct ConvolutionForwardSpecialization
Filter3x3, Filter3x3,
}; };
#ifndef CK_CODE_GEN_RTC
inline std::string getConvForwardSpecializationString(const ConvolutionForwardSpecialization& s) inline std::string getConvForwardSpecializationString(const ConvolutionForwardSpecialization& s)
{ {
switch(s) switch(s)
...@@ -30,6 +33,7 @@ inline std::string getConvForwardSpecializationString(const ConvolutionForwardSp ...@@ -30,6 +33,7 @@ inline std::string getConvForwardSpecializationString(const ConvolutionForwardSp
default: return "Unrecognized specialization!"; default: return "Unrecognized specialization!";
} }
} }
#endif
} // namespace device } // namespace device
} // namespace tensor_operation } // namespace tensor_operation
......
...@@ -29,6 +29,7 @@ enum struct GemmSpecialization ...@@ -29,6 +29,7 @@ enum struct GemmSpecialization
MNKOPadding, MNKOPadding,
}; };
#ifndef CK_CODE_GEN_RTC
inline std::string getGemmSpecializationString(const GemmSpecialization& s) inline std::string getGemmSpecializationString(const GemmSpecialization& s)
{ {
switch(s) switch(s)
...@@ -52,6 +53,7 @@ inline std::string getGemmSpecializationString(const GemmSpecialization& s) ...@@ -52,6 +53,7 @@ inline std::string getGemmSpecializationString(const GemmSpecialization& s)
default: return "Unrecognized specialization!"; default: return "Unrecognized specialization!";
} }
} }
#endif
} // namespace device } // namespace device
} // namespace tensor_operation } // namespace tensor_operation
......
...@@ -20,13 +20,13 @@ ...@@ -20,13 +20,13 @@
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp" #include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/tensor_operation/gpu/device/convolution_forward_specialization.hpp" #include "ck/tensor_operation/gpu/device/convolution_forward_specialization.hpp"
#include "ck/tensor_operation/operator_transform/transform_conv_fwd_to_gemm.hpp" #include "ck/tensor_operation/operator_transform/transform_conv_fwd_to_gemm.hpp"
#include "ck/tensor_operation/gpu/device/device_grouped_conv_fwd_multiple_abd.hpp"
#include "ck/tensor_operation/gpu/device/gemm_specialization.hpp" #include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "ck/tensor_operation/gpu/device/matrix_padder.hpp" #include "ck/tensor_operation/gpu/device/matrix_padder.hpp"
#include "ck/tensor_operation/gpu/device/device_grouped_conv_fwd_multiple_abd.hpp"
#include "ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_d_xdl_cshuffle.hpp" #include "ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_d_xdl_cshuffle.hpp"
#include "ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_abd_xdl_cshuffle.hpp" #include "ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_abd_xdl_cshuffle.hpp"
#include "ck/tensor_operation/gpu/device/impl/device_grouped_conv_utils.hpp" #include "ck/tensor_operation/gpu/device/impl/device_grouped_conv_utils.hpp"
#include "ck/host_utility/io.hpp" //#include "ck/host_utility/io.hpp"
namespace ck { namespace ck {
namespace tensor_operation { namespace tensor_operation {
......
...@@ -149,7 +149,7 @@ struct TransformConvFwdToGemm ...@@ -149,7 +149,7 @@ struct TransformConvFwdToGemm
template <typename ConvDimsType, template <typename ConvDimsType,
typename ConvSpatialDimsType, typename ConvSpatialDimsType,
index_t NDim = NDimSpatial, index_t NDim = NDimSpatial,
typename std::enable_if<NDim == 1, bool>::type = false> typename ck::enable_if<NDim == 1, bool>::type = false>
__host__ __device__ TransformConvFwdToGemm(const ConvDimsType& a_g_n_c_wis_lengths, __host__ __device__ TransformConvFwdToGemm(const ConvDimsType& a_g_n_c_wis_lengths,
const ConvDimsType& a_g_n_c_wis_strides, const ConvDimsType& a_g_n_c_wis_strides,
const ConvDimsType& b_g_k_c_xs_lengths, const ConvDimsType& b_g_k_c_xs_lengths,
...@@ -201,11 +201,15 @@ struct TransformConvFwdToGemm ...@@ -201,11 +201,15 @@ struct TransformConvFwdToGemm
InRightPadW_{input_right_pads[I0]}, InRightPadW_{input_right_pads[I0]},
ZYX_{X_} ZYX_{X_}
{ {
#ifdef CK_CODE_GEN_RTC
static_assert(is_same_v<ConvSpatialDimsType, ck::Array<IndexType, NDimSpatial>>);
static_assert(is_same_v<ConvDimsType, ck::Array<IndexType, NDimSpatial + I3>>);
#else
static_assert(is_same_v<ConvSpatialDimsType, std::array<IndexType, NDimSpatial>> || static_assert(is_same_v<ConvSpatialDimsType, std::array<IndexType, NDimSpatial>> ||
is_same_v<ConvSpatialDimsType, ck::Array<IndexType, NDimSpatial>>); is_same_v<ConvSpatialDimsType, ck::Array<IndexType, NDimSpatial>>);
static_assert(is_same_v<ConvDimsType, std::array<IndexType, NDimSpatial + I3>> || static_assert(is_same_v<ConvDimsType, std::array<IndexType, NDimSpatial + I3>> ||
is_same_v<ConvDimsType, ck::Array<IndexType, NDimSpatial + I3>>); is_same_v<ConvDimsType, ck::Array<IndexType, NDimSpatial + I3>>);
#endif
if constexpr(SplitN) if constexpr(SplitN)
{ {
N_ = GetSplitedNSize( N_ = GetSplitedNSize(
...@@ -220,7 +224,7 @@ struct TransformConvFwdToGemm ...@@ -220,7 +224,7 @@ struct TransformConvFwdToGemm
template <typename ConvDimsType, template <typename ConvDimsType,
typename ConvSpatialDimsType, typename ConvSpatialDimsType,
index_t NDim = NDimSpatial, index_t NDim = NDimSpatial,
typename std::enable_if<NDim == 2, bool>::type = false> typename ck::enable_if<NDim == 2, bool>::type = false>
__host__ __device__ TransformConvFwdToGemm(const ConvDimsType& a_g_n_c_wis_lengths, __host__ __device__ TransformConvFwdToGemm(const ConvDimsType& a_g_n_c_wis_lengths,
const ConvDimsType& a_g_n_c_wis_strides, const ConvDimsType& a_g_n_c_wis_strides,
const ConvDimsType& b_g_k_c_xs_lengths, const ConvDimsType& b_g_k_c_xs_lengths,
...@@ -272,11 +276,15 @@ struct TransformConvFwdToGemm ...@@ -272,11 +276,15 @@ struct TransformConvFwdToGemm
InRightPadW_{input_right_pads[I1]}, InRightPadW_{input_right_pads[I1]},
ZYX_{Y_ * X_} ZYX_{Y_ * X_}
{ {
#ifdef CK_CODE_GEN_RTC
static_assert(is_same_v<ConvSpatialDimsType, ck::Array<IndexType, NDimSpatial>>);
static_assert(is_same_v<ConvDimsType, ck::Array<IndexType, NDimSpatial + I3>>);
#else
static_assert(is_same_v<ConvSpatialDimsType, std::array<IndexType, NDimSpatial>> || static_assert(is_same_v<ConvSpatialDimsType, std::array<IndexType, NDimSpatial>> ||
is_same_v<ConvSpatialDimsType, ck::Array<IndexType, NDimSpatial>>); is_same_v<ConvSpatialDimsType, ck::Array<IndexType, NDimSpatial>>);
static_assert(is_same_v<ConvDimsType, std::array<IndexType, NDimSpatial + I3>> || static_assert(is_same_v<ConvDimsType, std::array<IndexType, NDimSpatial + I3>> ||
is_same_v<ConvDimsType, ck::Array<IndexType, NDimSpatial + I3>>); is_same_v<ConvDimsType, ck::Array<IndexType, NDimSpatial + I3>>);
#endif
if constexpr(SplitN) if constexpr(SplitN)
{ {
N_ = GetSplitedNSize( N_ = GetSplitedNSize(
...@@ -291,7 +299,7 @@ struct TransformConvFwdToGemm ...@@ -291,7 +299,7 @@ struct TransformConvFwdToGemm
template <typename ConvDimsType, template <typename ConvDimsType,
typename ConvSpatialDimsType, typename ConvSpatialDimsType,
index_t NDim = NDimSpatial, index_t NDim = NDimSpatial,
typename std::enable_if<NDim == 3, bool>::type = false> typename ck::enable_if<NDim == 3, bool>::type = false>
__host__ __device__ TransformConvFwdToGemm(const ConvDimsType& a_g_n_c_wis_lengths, __host__ __device__ TransformConvFwdToGemm(const ConvDimsType& a_g_n_c_wis_lengths,
const ConvDimsType& a_g_n_c_wis_strides, const ConvDimsType& a_g_n_c_wis_strides,
const ConvDimsType& b_g_k_c_xs_lengths, const ConvDimsType& b_g_k_c_xs_lengths,
...@@ -343,11 +351,15 @@ struct TransformConvFwdToGemm ...@@ -343,11 +351,15 @@ struct TransformConvFwdToGemm
InRightPadW_{input_right_pads[I2]}, InRightPadW_{input_right_pads[I2]},
ZYX_{Z_ * Y_ * X_} ZYX_{Z_ * Y_ * X_}
{ {
#ifdef CK_CODE_GEN_RTC
static_assert(is_same_v<ConvSpatialDimsType, ck::Array<IndexType, NDimSpatial>>);
static_assert(is_same_v<ConvDimsType, ck::Array<IndexType, NDimSpatial + I3>>);
#else
static_assert(is_same_v<ConvSpatialDimsType, std::array<IndexType, NDimSpatial>> || static_assert(is_same_v<ConvSpatialDimsType, std::array<IndexType, NDimSpatial>> ||
is_same_v<ConvSpatialDimsType, ck::Array<IndexType, NDimSpatial>>); is_same_v<ConvSpatialDimsType, ck::Array<IndexType, NDimSpatial>>);
static_assert(is_same_v<ConvDimsType, std::array<IndexType, NDimSpatial + I3>> || static_assert(is_same_v<ConvDimsType, std::array<IndexType, NDimSpatial + I3>> ||
is_same_v<ConvDimsType, ck::Array<IndexType, NDimSpatial + I3>>); is_same_v<ConvDimsType, ck::Array<IndexType, NDimSpatial + I3>>);
#endif
if constexpr(SplitN) if constexpr(SplitN)
{ {
N_ = GetSplitedNSize( N_ = GetSplitedNSize(
...@@ -478,7 +490,7 @@ struct TransformConvFwdToGemm ...@@ -478,7 +490,7 @@ struct TransformConvFwdToGemm
// TODO: implement ck::tensor_layout::convolution that describe packed/strided dimemsion as // TODO: implement ck::tensor_layout::convolution that describe packed/strided dimemsion as
// properties // properties
template <typename ALayout, template <typename ALayout,
typename std::enable_if<NDimSpatial == 1 && typename ck::enable_if<NDimSpatial == 1 &&
(is_same_v<ALayout, tensor_layout::convolution::G_NW_C> || (is_same_v<ALayout, tensor_layout::convolution::G_NW_C> ||
is_same_v<ALayout, tensor_layout::convolution::NWGC> || is_same_v<ALayout, tensor_layout::convolution::NWGC> ||
is_same_v<ALayout, tensor_layout::convolution::GNWC>), is_same_v<ALayout, tensor_layout::convolution::GNWC>),
...@@ -691,8 +703,8 @@ struct TransformConvFwdToGemm ...@@ -691,8 +703,8 @@ struct TransformConvFwdToGemm
} }
template <typename ALayout, template <typename ALayout,
typename std::enable_if< typename ck::enable_if<NDimSpatial == 2 &&
NDimSpatial == 2 && (is_same_v<ALayout, tensor_layout::convolution::G_NHW_C> || (is_same_v<ALayout, tensor_layout::convolution::G_NHW_C> ||
is_same_v<ALayout, tensor_layout::convolution::NHWGC> || is_same_v<ALayout, tensor_layout::convolution::NHWGC> ||
is_same_v<ALayout, tensor_layout::convolution::GNHWC>), is_same_v<ALayout, tensor_layout::convolution::GNHWC>),
bool>::type = false> bool>::type = false>
...@@ -932,7 +944,7 @@ struct TransformConvFwdToGemm ...@@ -932,7 +944,7 @@ struct TransformConvFwdToGemm
} }
template <typename ALayout, template <typename ALayout,
typename std::enable_if< typename ck::enable_if<
NDimSpatial == 3 && (is_same_v<ALayout, tensor_layout::convolution::G_NDHW_C> || NDimSpatial == 3 && (is_same_v<ALayout, tensor_layout::convolution::G_NDHW_C> ||
is_same_v<ALayout, tensor_layout::convolution::NDHWGC> || is_same_v<ALayout, tensor_layout::convolution::NDHWGC> ||
is_same_v<ALayout, tensor_layout::convolution::GNDHWC>), is_same_v<ALayout, tensor_layout::convolution::GNDHWC>),
...@@ -1242,7 +1254,7 @@ struct TransformConvFwdToGemm ...@@ -1242,7 +1254,7 @@ struct TransformConvFwdToGemm
} }
template <typename BLayout, template <typename BLayout,
typename std::enable_if<is_same_v<BLayout, tensor_layout::convolution::GKXC> || typename ck::enable_if<is_same_v<BLayout, tensor_layout::convolution::GKXC> ||
is_same_v<BLayout, tensor_layout::convolution::GKYXC> || is_same_v<BLayout, tensor_layout::convolution::GKYXC> ||
is_same_v<BLayout, tensor_layout::convolution::GKZYXC>, is_same_v<BLayout, tensor_layout::convolution::GKZYXC>,
bool>::type = false> bool>::type = false>
...@@ -1252,9 +1264,9 @@ struct TransformConvFwdToGemm ...@@ -1252,9 +1264,9 @@ struct TransformConvFwdToGemm
device::ConvolutionForwardSpecialization::Filter3x3) device::ConvolutionForwardSpecialization::Filter3x3)
{ {
using FilterSizeNumType = using FilterSizeNumType =
std::conditional_t<NDimSpatial == 1, ck::conditional_t<NDimSpatial == 1,
Number<3>, Number<3>,
std::conditional_t<NDimSpatial == 2, Number<9>, Number<27>>>; ck::conditional_t<NDimSpatial == 2, Number<9>, Number<27>>>;
if constexpr(NumGroupsToMerge == 1) if constexpr(NumGroupsToMerge == 1)
{ {
...@@ -1297,7 +1309,7 @@ struct TransformConvFwdToGemm ...@@ -1297,7 +1309,7 @@ struct TransformConvFwdToGemm
template < template <
typename BLayout, typename BLayout,
typename std::enable_if<is_same_v<BLayout, tensor_layout::convolution::G_K_X_C> || typename ck::enable_if<is_same_v<BLayout, tensor_layout::convolution::G_K_X_C> ||
is_same_v<BLayout, tensor_layout::convolution::G_K_YX_C> || is_same_v<BLayout, tensor_layout::convolution::G_K_YX_C> ||
is_same_v<BLayout, tensor_layout::convolution::G_K_ZYX_C> || is_same_v<BLayout, tensor_layout::convolution::G_K_ZYX_C> ||
is_same_v<BLayout, tensor_layout::convolution::KXGC> || is_same_v<BLayout, tensor_layout::convolution::KXGC> ||
...@@ -1318,11 +1330,11 @@ struct TransformConvFwdToGemm ...@@ -1318,11 +1330,11 @@ struct TransformConvFwdToGemm
return wei_gemmn_gemmk_desc; return wei_gemmn_gemmk_desc;
} }
template <typename CLayout, template <
typename CLayout,
index_t NDimSp = NDimSpatial, index_t NDimSp = NDimSpatial,
typename std::enable_if<NDimSp == 1 && typename ck::enable_if<NDimSp == 1 && (is_same_v<CLayout, tensor_layout::convolution::G_K>),
(is_same_v<CLayout, tensor_layout::convolution::G_K>),
bool>::type = false> bool>::type = false>
__host__ __device__ auto MakeCDescriptor_M_N() const __host__ __device__ auto MakeCDescriptor_M_N() const
{ {
...@@ -1330,11 +1342,11 @@ struct TransformConvFwdToGemm ...@@ -1330,11 +1342,11 @@ struct TransformConvFwdToGemm
make_tuple(I0, KStrideTensorC_)); make_tuple(I0, KStrideTensorC_));
} }
template <typename CLayout, template <
typename CLayout,
index_t NDimSp = NDimSpatial, index_t NDimSp = NDimSpatial,
typename std::enable_if<NDimSp == 2 && typename ck::enable_if<NDimSp == 2 && (is_same_v<CLayout, tensor_layout::convolution::G_K>),
(is_same_v<CLayout, tensor_layout::convolution::G_K>),
bool>::type = false> bool>::type = false>
__host__ __device__ auto MakeCDescriptor_M_N() const __host__ __device__ auto MakeCDescriptor_M_N() const
{ {
...@@ -1342,11 +1354,11 @@ struct TransformConvFwdToGemm ...@@ -1342,11 +1354,11 @@ struct TransformConvFwdToGemm
make_tuple(I0, KStrideTensorC_)); make_tuple(I0, KStrideTensorC_));
} }
template <typename CLayout, template <
typename CLayout,
index_t NDimSp = NDimSpatial, index_t NDimSp = NDimSpatial,
typename std::enable_if<NDimSp == 3 && typename ck::enable_if<NDimSp == 3 && (is_same_v<CLayout, tensor_layout::convolution::G_K>),
(is_same_v<CLayout, tensor_layout::convolution::G_K>),
bool>::type = false> bool>::type = false>
__host__ __device__ auto MakeCDescriptor_M_N() const __host__ __device__ auto MakeCDescriptor_M_N() const
{ {
...@@ -1356,7 +1368,7 @@ struct TransformConvFwdToGemm ...@@ -1356,7 +1368,7 @@ struct TransformConvFwdToGemm
template <typename CLayout, template <typename CLayout,
index_t NDimSp = NDimSpatial, index_t NDimSp = NDimSpatial,
typename std::enable_if<NDimSp == 1 && typename ck::enable_if<NDimSp == 1 &&
(is_same_v<CLayout, tensor_layout::convolution::G_NW_K> || (is_same_v<CLayout, tensor_layout::convolution::G_NW_K> ||
is_same_v<CLayout, tensor_layout::convolution::NWGK> || is_same_v<CLayout, tensor_layout::convolution::NWGK> ||
is_same_v<CLayout, tensor_layout::convolution::GNWK>), is_same_v<CLayout, tensor_layout::convolution::GNWK>),
...@@ -1410,8 +1422,8 @@ struct TransformConvFwdToGemm ...@@ -1410,8 +1422,8 @@ struct TransformConvFwdToGemm
template <typename CLayout, template <typename CLayout,
index_t NDimSp = NDimSpatial, index_t NDimSp = NDimSpatial,
typename std::enable_if< typename ck::enable_if<NDimSp == 2 &&
NDimSp == 2 && (is_same_v<CLayout, tensor_layout::convolution::G_NHW_K> || (is_same_v<CLayout, tensor_layout::convolution::G_NHW_K> ||
is_same_v<CLayout, tensor_layout::convolution::NHWGK> || is_same_v<CLayout, tensor_layout::convolution::NHWGK> ||
is_same_v<CLayout, tensor_layout::convolution::GNHWK>), is_same_v<CLayout, tensor_layout::convolution::GNHWK>),
bool>::type = false> bool>::type = false>
...@@ -1467,7 +1479,7 @@ struct TransformConvFwdToGemm ...@@ -1467,7 +1479,7 @@ struct TransformConvFwdToGemm
template <typename CLayout, template <typename CLayout,
index_t NDimSp = NDimSpatial, index_t NDimSp = NDimSpatial,
typename std::enable_if< typename ck::enable_if<
NDimSp == 3 && (is_same_v<CLayout, tensor_layout::convolution::G_NDHW_K> || NDimSp == 3 && (is_same_v<CLayout, tensor_layout::convolution::G_NDHW_K> ||
is_same_v<CLayout, tensor_layout::convolution::NDHWGK> || is_same_v<CLayout, tensor_layout::convolution::NDHWGK> ||
is_same_v<CLayout, tensor_layout::convolution::GNDHWK>), is_same_v<CLayout, tensor_layout::convolution::GNDHWK>),
......
...@@ -501,6 +501,7 @@ inline __host__ __device__ half_t type_convert<half_t, bf8_t>(bf8_t x) ...@@ -501,6 +501,7 @@ inline __host__ __device__ half_t type_convert<half_t, bf8_t>(bf8_t x)
#endif #endif
} }
#ifndef CK_CODE_GEN_RTC
template <typename Y, typename X, size_t NumElems> template <typename Y, typename X, size_t NumElems>
inline __host__ __device__ void array_convert(std::array<Y, NumElems>& y, inline __host__ __device__ void array_convert(std::array<Y, NumElems>& y,
const std::array<X, NumElems>& x) const std::array<X, NumElems>& x)
...@@ -510,6 +511,7 @@ inline __host__ __device__ void array_convert(std::array<Y, NumElems>& y, ...@@ -510,6 +511,7 @@ inline __host__ __device__ void array_convert(std::array<Y, NumElems>& y,
y[i] = type_convert<Y>(x[i]); y[i] = type_convert<Y>(x[i]);
} }
} }
#endif
template <typename Y, typename X, index_t NumElems> template <typename Y, typename X, index_t NumElems>
inline __host__ __device__ void array_convert(Array<Y, NumElems>& y, const Array<X, NumElems>& x) inline __host__ __device__ void array_convert(Array<Y, NumElems>& y, const Array<X, NumElems>& x)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment