Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
9bdf1489
Unverified
Commit
9bdf1489
authored
Jan 28, 2025
by
arai713
Committed by
GitHub
Jan 28, 2025
Browse files
Merge branch 'develop' into codegen_hiprtc
parents
d6eec413
d6a4605e
Changes
6
Show whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
16 additions
and
11 deletions
+16
-11
include/ck/ck.hpp
include/ck/ck.hpp
+11
-6
include/ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_abd_xdl_cshuffle.hpp
...tion/gpu/grid/gridwise_gemm_multiple_abd_xdl_cshuffle.hpp
+1
-1
include/ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_d_xdl_cshuffle.hpp
...ration/gpu/grid/gridwise_gemm_multiple_d_xdl_cshuffle.hpp
+1
-1
include/ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_d_xdl_cshuffle_lds_direct_load.hpp
...gridwise_gemm_multiple_d_xdl_cshuffle_lds_direct_load.hpp
+1
-1
include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_bwd_weight.hpp
...or_operation/gpu/grid/gridwise_gemm_xdlops_bwd_weight.hpp
+1
-1
include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v2r3.hpp
...k/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v2r3.hpp
+1
-1
No files found.
include/ck/ck.hpp
View file @
9bdf1489
...
...
@@ -235,13 +235,18 @@ CK_DECLARE_ENV_VAR_BOOL(CK_LOGGING)
// workaround: compiler issue on gfx908
#define CK_WORKAROUND_SWDEV_388832 1
// denorm test fix, required to work around dissue
#ifndef CK_WORKAROUND_DENORM_FIX
#define CK_WORKAROUND_DENORM_FIX 0
// denorm test fix, necessary for gfx90a
#ifndef CK_GFX90A_DENORM_WORKAROUND
#define CK_GFX90A_DENORM_WORKAROUND 0
#endif // CK_GFX90A_DENORM_WORKAROUND
// Enable only for gfx90a
#if defined(__gfx90a__)
#if CK_GFX90A_DENORM_WORKAROUND
#define CK_GFX90A_DENORM_WORKAROUND 1
#endif // CK_GFX90A_DENORM_WORKAROUND is set to 1
#else
// enable only for gfx90a
#define CK_WORKAROUND_DENORM_FIX = CK_WORKAROUND_DENORM_FIX && defined(__gfx90a__)
#endif // CK_WORKAROUND_DENORM_FIX
#define CK_GFX90A_DENORM_WORKAROUND 0
#endif // gfx90a
// set flag to 1 to build deprecated instances
#define CK_BUILD_DEPRECATED 1
...
...
include/ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_abd_xdl_cshuffle.hpp
View file @
9bdf1489
...
...
@@ -101,7 +101,7 @@ struct GridwiseGemmMultipleABD_xdl_cshuffle
using
GridwiseGemmPipe
=
remove_cvref_t
<
decltype
(
GridwiseGemmPipeline_Selector
<
PipelineVer
,
NumGemmKPrefetchStage
,
LoopSched
>
())
>
;
#if CK_
WORKAROUND_DENORM_FIX
#if CK_
GFX90A_DENORM_WORKAROUND
using
AComputeDataType
=
conditional_t
<
is_same_v
<
AComputeDataType_
,
ck
::
half_t
>
,
ck
::
bhalf_t
,
AComputeDataType_
>
;
using
BComputeDataType
=
...
...
include/ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_d_xdl_cshuffle.hpp
View file @
9bdf1489
...
...
@@ -100,7 +100,7 @@ struct GridwiseGemmMultipleD_xdl_cshuffle
using
GridwiseGemmPipe
=
remove_cvref_t
<
decltype
(
GridwiseGemmPipeline_Selector
<
PipelineVer
,
NumGemmKPrefetchStage
,
LoopSched
>
())
>
;
#if CK_
WORKAROUND_DENORM_FIX
#if CK_
GFX90A_DENORM_WORKAROUND
using
AComputeDataType
=
conditional_t
<
is_same_v
<
AComputeDataType_
,
ck
::
half_t
>
,
ck
::
bhalf_t
,
AComputeDataType_
>
;
using
BComputeDataType
=
...
...
include/ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_d_xdl_cshuffle_lds_direct_load.hpp
View file @
9bdf1489
...
...
@@ -164,7 +164,7 @@ struct GridwiseGemmMultipleD_Xdl_CShuffle_LdsDirectLoad
using
GridwiseGemmPipe
=
remove_cvref_t
<
decltype
(
GridwiseGemmPipeline_Selector
<
PipelineVer
,
NumGemmKPrefetchStage
,
LoopSched
>
())
>
;
#if CK_
WORKAROUND_DENORM_FIX
#if CK_
GFX90A_DENORM_WORKAROUND
using
AComputeDataType
=
conditional_t
<
is_same_v
<
AComputeDataType_
,
ck
::
half_t
>
,
ck
::
bhalf_t
,
AComputeDataType_
>
;
#else
...
...
include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_bwd_weight.hpp
View file @
9bdf1489
...
...
@@ -271,7 +271,7 @@ struct GridwiseGemm_bk0mk1_bk0nk1_mn_xdlops_bwd_weight
// when mfma if fixed, remove this section and update
// FloatAAdjusted -> ComputeTypeA, FloatBAdjusted -> ComputeTypeB,
// throughout this file
#if CK_
WORKAROUND_DENORM_FIX
#if CK_
GFX90A_DENORM_WORKAROUND
using
FloatAAdjusted
=
conditional_t
<
is_same_v
<
ComputeTypeA
,
ck
::
half_t
>
,
ck
::
bhalf_t
,
ComputeTypeA
>
;
using
FloatBAdjusted
=
...
...
include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v2r3.hpp
View file @
9bdf1489
...
...
@@ -254,7 +254,7 @@ struct GridwiseGemm_k0mk1_k0nk1_mn_xdlops_v2r3
// we convert fp16->fp32->bf16 and execute bf16 mfma instruction
// when mfma if fixed, remove this section and update
// FloatABAdjusted -> FloatAB throughout this file
#if CK_
WORKAROUND_DENORM_FIX
#if CK_
GFX90A_DENORM_WORKAROUND
using
FloatABAdjusted
=
conditional_t
<
is_same_v
<
FloatAB
,
ck
::
half_t
>
,
ck
::
bhalf_t
,
FloatAB
>
;
#else
using
FloatABAdjusted
=
FloatAB
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment