Unverified Commit cd167e49 authored by Chao Liu's avatar Chao Liu Committed by GitHub
Browse files

Compile for gfx908 and gfx90a (#130)

* adding compilation for multiple targets

* fix build

* clean

* update Jekinsfile

* update readme

* update Jenkins

* use ck::half_t instead of ushort for bf16

* rename enum classes

* clean

* rename

* clean
parent ecf337ba
......@@ -85,26 +85,24 @@ int main(int argc, char* argv[])
{
return profile_reduce(argc, argv);
}
else
{
// clang-format off
printf("arg1: tensor operation (gemm: GEMM\n"
" gemm_bias_2d: GEMM+Bias(2D)\n"
" gemm_bias_relu: GEMM+Bias+ReLU\n"
" gemm_bias_relu_add: GEMM+Bias+ReLU+Add\n"
" gemm_reduce: GEMM+Reduce\n"
" grouped_gemm: Grouped Gemm\n"
" grouped_gemm: Grouped GEMM\n"
" conv_fwd: ForwardConvolution\n"
" conv_fwd_bias_relu: ForwardConvolution+Bias+ReLU\n"
" conv_fwd_bias_relu_add: ForwardConvolution+Bias+ReLU+Add\n"
" conv_fwd_bias_relu_atomic_add: ForwardConvolution+Bias+ReLU+AtomicAdd\n"
" conv1d_bwd_data: BackwardConvolution data 1 dim\n"
" conv2d_bwd_data: BackwardConvolution data 2 dim\n"
" conv3d_bwd_data: BackwardConvolution data 3 dim\n"
" grouped_gemm: Grouped Gemm\n"
" reduce: REDUCE\n");
" conv1d_bwd_data: BackwardConvolution data 1d\n"
" conv2d_bwd_data: BackwardConvolution data 2d\n"
" conv3d_bwd_data: BackwardConvolution data 3d\n"
" grouped_gemm: Grouped GEMM\n"
" reduce: Reduce\n");
// clang-format on
return 0;
}
}
......@@ -10,9 +10,11 @@ cmake
-D CMAKE_INSTALL_PREFIX=${MY_PROJECT_INSTALL} \
-D BUILD_DEV=OFF \
-D CMAKE_BUILD_TYPE=Release \
-D CMAKE_CXX_FLAGS="-DCK_AMD_GPU_GFX908 --amdgpu-target=gfx908 -O3 -ftemplate-backtrace-limit=0 -mllvm --amdgpu-spill-vgpr-to-agpr=0 -gline-tables-only " \
-D CMAKE_CXX_FLAGS=" --offload-arch=gfx908 --offload-arch=gfx90a -O3 -ftemplate-backtrace-limit=0 -gline-tables-only -save-temps=$PWD" \
-D CMAKE_CXX_COMPILER=/opt/rocm/bin/hipcc \
-D CMAKE_PREFIX_PATH=/opt/rocm \
-D CMAKE_VERBOSE_MAKEFILE:BOOL=ON \
${MY_PROJECT_SOURCE}
#-D CMAKE_CXX_FLAGS=" --offload-arch=gfx908 --offload-arch=gfx90a -O3 -ftemplate-backtrace-limit=0 -mllvm --amdgpu-spill-vgpr-to-agpr=0 -gline-tables-only -save-temps=$PWD" \
#-D CMAKE_CXX_FLAGS=" --offload-arch=gfx908 --offload-arch=gfx90a -O3 -ftemplate-backtrace-limit=0 -gline-tables-only -save-temps=$PWD" \
......@@ -31,7 +31,7 @@ using WeiElementOp = ck::tensor_operation::element_wise::PassThrough;
using OutElementOp = ck::tensor_operation::element_wise::PassThrough;
static constexpr auto ConvFwdDefault =
ck::tensor_operation::device::ConvolutionForwardSpecialization_t::Default;
ck::tensor_operation::device::ConvolutionForwardSpecialization::Default;
template <ck::index_t SpatialDims, typename InDataType, typename WeiDataType, typename OutDataType>
using DeviceConvNDFwdInstance = ck::tensor_operation::device::
......
......@@ -5,7 +5,7 @@
#include <stdlib.h>
#include <half.hpp>
#include "config.hpp"
#include "print.hpp"
#include "magic_division.hpp"
#include "device.hpp"
#include "host_tensor.hpp"
#include "host_tensor_generator.hpp"
......
......@@ -51,10 +51,10 @@ struct type_mapping<ck::half_t>
constexpr int Rank = 4;
constexpr ReduceTensorOp_t ReduceOpId = ReduceTensorOp_t::AVG;
constexpr NanPropagation_t NanOpt = NanPropagation_t::PROPAGATE_NAN;
constexpr ReduceTensorOp ReduceOpId = ReduceTensorOp::AVG;
constexpr NanPropagation NanOpt = NanPropagation::PROPAGATE_NAN;
constexpr bool PropagateNan = false;
constexpr ReduceTensorIndices_t IndicesOpt = ReduceTensorIndices_t::NO_INDICES;
constexpr ReduceTensorIndices IndicesOpt = ReduceTensorIndices::NO_INDICES;
constexpr bool NeedIndices = false;
template <typename InDataType,
......
......@@ -51,10 +51,10 @@ struct type_mapping<ck::half_t>
constexpr int Rank = 4;
constexpr ReduceTensorOp_t ReduceOpId = ReduceTensorOp_t::AMAX;
constexpr NanPropagation_t NanOpt = NanPropagation_t::PROPAGATE_NAN;
constexpr ReduceTensorOp ReduceOpId = ReduceTensorOp::AMAX;
constexpr NanPropagation NanOpt = NanPropagation::PROPAGATE_NAN;
constexpr bool PropagateNan = false;
constexpr ReduceTensorIndices_t IndicesOpt = ReduceTensorIndices_t::FLATTENED_INDICES;
constexpr ReduceTensorIndices IndicesOpt = ReduceTensorIndices::FLATTENED_INDICES;
constexpr bool NeedIndices = true;
template <typename InDataType,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment