Commit 4e075420 authored by illsilin's avatar illsilin
Browse files

compile ck for all targets

parent 64687816
......@@ -20,7 +20,7 @@ mkdir build && cd build
cmake \
-D BUILD_DEV=OFF \
-D CMAKE_BUILD_TYPE=Release \
-D CMAKE_CXX_FLAGS=" --offload-arch=gfx908 --offload-arch=gfx90a -O3 \
-D CMAKE_CXX_FLAGS=" -O3 \
-D CMAKE_CXX_COMPILER=/opt/rocm/bin/hipcc \
-D CMAKE_PREFIX_PATH=/opt/rocm \
..
......
......@@ -54,6 +54,7 @@ __global__ void
const ComputeBasePrtOfBatch compute_base_ptr_of_batch_,
const Block2CTileMap block_2_ctile_map)
{
#if (defined(__gfx908__) || defined(__gfx90a__))
const index_t num_blocks_per_batch =
__builtin_amdgcn_readfirstlane(get_grid_size() / batch_count);
const index_t g_idx = __builtin_amdgcn_readfirstlane(get_block_1d_id() / num_blocks_per_batch);
......@@ -933,6 +934,7 @@ struct DeviceBatchedGemmReduce_Xdl_CShuffle : public DeviceGemmReduce<AElementwi
return str.str();
}
#endif //end of if defined (defined(__gfx908__) || defined(__gfx90a__))
};
} // namespace device
......
......@@ -46,6 +46,7 @@ __global__ void
const ComputeBasePrtOfBatch compute_base_ptr_of_batch_,
const Block2CTileMap block_2_ctile_map)
{
#if (defined(__gfx908__) || defined(__gfx90a__))
const index_t num_blocks_per_batch =
__builtin_amdgcn_readfirstlane(get_grid_size() / batch_count);
const index_t g_idx = __builtin_amdgcn_readfirstlane(get_block_1d_id() / num_blocks_per_batch);
......@@ -609,6 +610,7 @@ struct DeviceBatchedGemmXdl
return str.str();
}
#endif //end of if (defined(__gfx908__) || defined(__gfx90a__))
};
} // namespace device
......
......@@ -49,6 +49,7 @@ __global__ void
const CElementwiseOperation c_element_op,
const Block2CTileMap block_2_ctile_map)
{
#if (defined(__gfx908__) || defined(__gfx90a__))
const index_t num_blocks_per_batch =
__builtin_amdgcn_readfirstlane(get_grid_size() / num_batches);
const index_t g_idx = __builtin_amdgcn_readfirstlane(get_block_1d_id() / num_blocks_per_batch);
......@@ -654,6 +655,7 @@ struct DeviceConv3dFwdXdl_Input_N_Di_Hi_Wi_C_Weight_K_Z_Y_X_C_Output_N_Do_Ho_Wo_
return str.str();
}
#endif //end of if (defined(__gfx908__) || defined(__gfx90a__))
};
} // namespace device
......
......@@ -48,6 +48,7 @@ __global__ void
const DGridDescriptor_MBlock_MPerBlock d_grid_desc_mblock_mperblock,
const Block2CTileMap block_2_ctile_map)
{
#if (defined(__gfx908__) || defined(__gfx90a__))
__shared__ char p_shared[GridwiseGemm::GetSharedMemoryNumberOfByte()];
GridwiseGemm::template Run<HasMainK0BlockLoop>(p_a_grid,
......@@ -66,6 +67,7 @@ __global__ void
c_grid_desc_mblock_mperblock_nblock_nperblock,
d_grid_desc_mblock_mperblock,
block_2_ctile_map);
#endif //end of if (defined(__gfx908__) || defined(__gfx90a__))
}
template <typename FloatAB,
......
......@@ -38,6 +38,7 @@ __global__ void
c_grid_desc_mblock_mperblock_nblock_nperblock,
const Block2CTileMap block_2_ctile_map)
{
#if (defined(__gfx908__) || defined(__gfx90a__))
__shared__ char p_shared[GridwiseGemm::GetSharedMemoryNumberOfByte()];
GridwiseGemm::template Run<HasMainK0BlockLoop>(p_a_grid,
......@@ -51,6 +52,7 @@ __global__ void
b_grid_desc_bk0_n_bk1,
c_grid_desc_mblock_mperblock_nblock_nperblock,
block_2_ctile_map);
#endif //end of if (defined(__gfx908__) || defined(__gfx90a__))
}
template <typename FloatAB,
......
......@@ -39,6 +39,7 @@ __global__ void
const CElementwiseOperation c_element_op,
const Block2CTileMap block_2_ctile_map)
{
#if (defined(__gfx908__) || defined(__gfx90a__))
__shared__ char p_shared[GridwiseGemm::GetSharedMemoryNumberOfByte()];
GridwiseGemm::template Run<HasMainK0BlockLoop>(p_a_grid,
......@@ -52,6 +53,7 @@ __global__ void
b_element_op,
c_element_op,
block_2_ctile_map);
#endif //end of if (defined(__gfx908__) || defined(__gfx90a__))
}
template <typename GridwiseGemm,
......@@ -74,6 +76,7 @@ __global__ void
const BElementwiseOperation b_element_op,
const CElementwiseOperation c_element_op)
{
#if (defined(__gfx908__) || defined(__gfx90a__))
__shared__ char p_shared[GridwiseGemm::GetSharedMemoryNumberOfByte()];
const index_t block_id = get_block_1d_id();
......@@ -126,6 +129,7 @@ __global__ void
gemm_desc_ptr[group_id].block_2_ctile_map_,
block_id_grp);
#endif
#endif //end of if (defined(__gfx908__) || defined(__gfx90a__))
}
template <index_t BlockSize,
......
......@@ -37,6 +37,7 @@ __global__ void
const CElementwiseOperation c_element_op,
const CBlockClusterAdaptor c_block_cluster_adaptor)
{
#if (defined(__gfx908__) || defined(__gfx90a__))
constexpr index_t shared_block_size =
GridwiseGemm::GetSharedMemoryNumberOfByte() / sizeof(FloatAB);
......@@ -53,6 +54,7 @@ __global__ void
b_element_op,
c_element_op,
c_block_cluster_adaptor);
#endif //end of if (defined(__gfx908__) || defined(__gfx90a__))
}
template <index_t BlockSize,
......
......@@ -39,6 +39,7 @@ __global__ void
const CElementwiseOperation c_element_op,
const CBlockClusterAdaptor c_block_cluster_adaptor)
{
#if (defined(__gfx908__) || defined(__gfx90a__))
constexpr index_t shared_block_size =
GridwiseGemm::GetSharedMemoryNumberOfByte() / sizeof(FloatAB);
......@@ -55,6 +56,7 @@ __global__ void
b_element_op,
c_element_op,
c_block_cluster_adaptor);
#endif //end of if (defined(__gfx908__) || defined(__gfx90a__))
}
template <index_t BlockSize,
......
......@@ -42,6 +42,7 @@ __global__ void
const CElementwiseOperation c_element_op,
const Block2CTileMap block_2_ctile_map)
{
#if (defined(__gfx908__) || defined(__gfx90a__))
__shared__ char p_shared[GridwiseGemm::GetSharedMemoryNumberOfByte()];
GridwiseGemm::template Run<HasMainK0BlockLoop>(
......@@ -56,6 +57,7 @@ __global__ void
b_element_op,
c_element_op,
block_2_ctile_map);
#endif //end of if (defined(__gfx908__) || defined(__gfx90a__))
}
template <
......
......@@ -45,6 +45,7 @@ __global__ void
const CElementwiseOperation c_element_op,
const Block2CTileMap block_2_ctile_map)
{
#if (defined(__gfx908__) || defined(__gfx90a__))
__shared__ char p_shared[GridwiseGemm::GetSharedMemoryNumberOfByte()];
GridwiseGemm::template Run<HasMainK0BlockLoop>(
......@@ -61,6 +62,7 @@ __global__ void
b_element_op,
c_element_op,
block_2_ctile_map);
#endif //end of if (defined(__gfx908__) || defined(__gfx90a__))
}
template <
......
......@@ -49,6 +49,7 @@ __global__ void
const CElementwiseOperation c_element_op,
const Block2CTileMap block_2_ctile_map)
{
#if (defined(__gfx908__) || defined(__gfx90a__))
__shared__ char p_shared[GridwiseGemm::GetSharedMemoryNumberOfByte()];
GridwiseGemm::template Run<HasMainK0BlockLoop>(
......@@ -67,6 +68,7 @@ __global__ void
b_element_op,
c_element_op,
block_2_ctile_map);
#endif //end of if (defined(__gfx908__) || defined(__gfx90a__))
}
template <
......
......@@ -36,6 +36,7 @@ __global__ void kernel_buffer_set_value(const Grid1dBufferDescType grid_1d_buffe
DataType value)
{
using PassThroughOp = tensor_operation::element_wise::UnaryIdentic<DataType, DataType>;
constexpr auto I0 = Number<0>{};
......
......@@ -10,7 +10,7 @@ cmake
-D CMAKE_INSTALL_PREFIX=${MY_PROJECT_INSTALL} \
-D BUILD_DEV=OFF \
-D CMAKE_BUILD_TYPE=Release \
-D CMAKE_CXX_FLAGS=" --offload-arch=gfx908 --offload-arch=gfx90a -O3 -ftemplate-backtrace-limit=0 -gline-tables-only -save-temps=$PWD" \
-D CMAKE_CXX_FLAGS=" -O3 -ftemplate-backtrace-limit=0 -gline-tables-only -save-temps=$PWD" \
-D CMAKE_CXX_COMPILER=/opt/rocm/bin/hipcc \
-D CMAKE_PREFIX_PATH=/opt/rocm \
-D CMAKE_VERBOSE_MAKEFILE:BOOL=ON \
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment