change the target condition

3b573bb9 · illsilin · 1ca54630 · 3b573bb9 · 3b573bb9 · 3b573bb9
Commit 3b573bb9 authored Apr 08, 2022 by illsilin
10 changed files
--- a/include/ck/tensor_operation/gpu/device/device_batched_gemm_reduce_xdl_cshuffle.hpp
+++ b/include/ck/tensor_operation/gpu/device/device_batched_gemm_reduce_xdl_cshuffle.hpp
@@ -54,7 +54,7 @@ __global__ void
            const ComputeBasePrtOfBatch compute_base_ptr_of_batch_,
            const Block2CTileMap block_2_ctile_map)
 {
-#if (defined(__gfx908__) || defined(__gfx90a__))
+#if (!defined(__HIP_DEVICE_COMPILE__)  || defined(__gfx908__) || defined(__gfx90a__))
    const index_t num_blocks_per_batch =
        __builtin_amdgcn_readfirstlane(get_grid_size() / batch_count);
    const index_t g_idx = __builtin_amdgcn_readfirstlane(get_block_1d_id() / num_blocks_per_batch);

--- a/include/ck/tensor_operation/gpu/device/device_batched_gemm_xdl.hpp
+++ b/include/ck/tensor_operation/gpu/device/device_batched_gemm_xdl.hpp
@@ -46,7 +46,7 @@ __global__ void
            const ComputeBasePrtOfBatch compute_base_ptr_of_batch_,
            const Block2CTileMap block_2_ctile_map)
 {
-#if (defined(!__HIP_DEVICE_COMPILE__  || __gfx908__) || defined(__gfx90a__))
+#if (defined(!defined(__HIP_DEVICE_COMPILE__)  || defined(__gfx908__) || defined(__gfx90a__))
    const index_t num_blocks_per_batch =
        __builtin_amdgcn_readfirstlane(get_grid_size() / batch_count);
    const index_t g_idx = __builtin_amdgcn_readfirstlane(get_block_1d_id() / num_blocks_per_batch);

--- a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_reduce_xdl_cshuffle_v1.hpp
+++ b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_reduce_xdl_cshuffle_v1.hpp
@@ -48,7 +48,7 @@ __global__ void
            const DGridDescriptor_MBlock_MPerBlock d_grid_desc_mblock_mperblock,
            const Block2CTileMap block_2_ctile_map)
 {
-#if (defined(__gfx908__) || defined(__gfx90a__))
+#if (!defined(__HIP_DEVICE_COMPILE__)  || defined(__gfx908__) || defined(__gfx90a__))
    __shared__ char p_shared[GridwiseGemm::GetSharedMemoryNumberOfByte()];

    GridwiseGemm::template Run<HasMainK0BlockLoop>(p_a_grid,

--- a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_v1.hpp
+++ b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_v1.hpp
@@ -38,7 +38,7 @@ __global__ void
                                        c_grid_desc_mblock_mperblock_nblock_nperblock,
                                    const Block2CTileMap block_2_ctile_map)
 {
-#if (defined(!__HIP_DEVICE_COMPILE__  || __gfx908__) || defined(__gfx90a__))
+#if (!defined(__HIP_DEVICE_COMPILE__)  || defined(__gfx908__) || defined(__gfx90a__))
    __shared__ char p_shared[GridwiseGemm::GetSharedMemoryNumberOfByte()];

    GridwiseGemm::template Run<HasMainK0BlockLoop>(p_a_grid,

--- a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v2r3.hpp
+++ b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v2r3.hpp
@@ -39,7 +39,7 @@ __global__ void
            const CElementwiseOperation c_element_op,
            const Block2CTileMap block_2_ctile_map)
 {
-#if (!__HIP_DEVICE_COMPILE__  || defined(__gfx908__) || defined(__gfx90a__))
+#if (!defined(__HIP_DEVICE_COMPILE__)  || defined(__gfx908__) || defined(__gfx90a__))
    __shared__ char p_shared[GridwiseGemm::GetSharedMemoryNumberOfByte()];

    GridwiseGemm::template Run<HasMainK0BlockLoop>(p_a_grid,
@@ -76,7 +76,7 @@ __global__ void
            const BElementwiseOperation b_element_op,
            const CElementwiseOperation c_element_op)
 {
-#if (!__HIP_DEVICE_COMPILE__  || defined(__gfx908__) || defined(__gfx90a__))
+#if (!defined(__HIP_DEVICE_COMPILE__)  || defined(__gfx908__) || defined(__gfx90a__))
    __shared__ char p_shared[GridwiseGemm::GetSharedMemoryNumberOfByte()];

    const index_t block_id = get_block_1d_id();

--- a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v2r4.hpp
+++ b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v2r4.hpp
@@ -37,7 +37,7 @@ __global__ void
                                const CElementwiseOperation c_element_op,
                                const CBlockClusterAdaptor c_block_cluster_adaptor)
 {
-#if (defined(!__HIP_DEVICE_COMPILE__  || __gfx908__) || defined(__gfx90a__))
+#if (!defined(__HIP_DEVICE_COMPILE__)  || defined(__gfx908__) || defined(__gfx90a__))
    constexpr index_t shared_block_size =
        GridwiseGemm::GetSharedMemoryNumberOfByte() / sizeof(FloatAB);


--- a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v2r4r2.hpp
+++ b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v2r4r2.hpp
@@ -39,7 +39,7 @@ __global__ void
                                  const CElementwiseOperation c_element_op,
                                  const CBlockClusterAdaptor c_block_cluster_adaptor)
 {
-#if (defined!__HIP_DEVICE_COMPILE__  || (__gfx908__) || defined(__gfx90a__))
+#if (!defined(__HIP_DEVICE_COMPILE__)  || defined(__gfx908__) || defined(__gfx90a__))
    constexpr index_t shared_block_size =
        GridwiseGemm::GetSharedMemoryNumberOfByte() / sizeof(FloatAB);


--- a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v3r1.hpp
+++ b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v3r1.hpp
@@ -42,7 +42,7 @@ __global__ void
            const CElementwiseOperation c_element_op,
            const Block2CTileMap block_2_ctile_map)
 {
-#if (defined(!__HIP_DEVICE_COMPILE__  || __gfx908__) || defined(__gfx90a__))
+#if (defined(!defined(__HIP_DEVICE_COMPILE__)  || defined(__gfx908__) || defined(__gfx90a__))
    __shared__ char p_shared[GridwiseGemm::GetSharedMemoryNumberOfByte()];

    GridwiseGemm::template Run<HasMainK0BlockLoop>(

--- a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v3r2.hpp
+++ b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v3r2.hpp
@@ -45,7 +45,7 @@ __global__ void
            const CElementwiseOperation c_element_op,
            const Block2CTileMap block_2_ctile_map)
 {
-#if (defined(!__HIP_DEVICE_COMPILE__  || __gfx908__) || defined(__gfx90a__))
+#if (defined(!defined(__HIP_DEVICE_COMPILE__)  || defined(__gfx908__) || defined(__gfx90a__))
    __shared__ char p_shared[GridwiseGemm::GetSharedMemoryNumberOfByte()];

    GridwiseGemm::template Run<HasMainK0BlockLoop>(

--- a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v3r3.hpp
+++ b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v3r3.hpp
@@ -49,7 +49,7 @@ __global__ void
            const CElementwiseOperation c_element_op,
            const Block2CTileMap block_2_ctile_map)
 {
-#if (defined(!__HIP_DEVICE_COMPILE__  || __gfx908__) || defined(__gfx90a__))
+#if (!defined(__HIP_DEVICE_COMPILE__)  || defined(__gfx908__) || defined(__gfx90a__))
    __shared__ char p_shared[GridwiseGemm::GetSharedMemoryNumberOfByte()];

    GridwiseGemm::template Run<HasMainK0BlockLoop>(