"megatron/git@developer.sourcefind.cn:OpenDAS/megatron-lm.git" did not exist on "83671bbfee43649a544d3fcf1aae76a5bee10b88"
Unverified Commit cd167e49 authored by Chao Liu's avatar Chao Liu Committed by GitHub
Browse files

Compile for gfx908 and gfx90a (#130)

* adding compilation for multiple targets

* fix build

* clean

* update Jekinsfile

* update readme

* update Jenkins

* use ck::half_t instead of ushort for bf16

* rename enum classes

* clean

* rename

* clean
parent ecf337ba
...@@ -85,26 +85,24 @@ int main(int argc, char* argv[]) ...@@ -85,26 +85,24 @@ int main(int argc, char* argv[])
{ {
return profile_reduce(argc, argv); return profile_reduce(argc, argv);
} }
else
{
// clang-format off
printf("arg1: tensor operation (gemm: GEMM\n"
" gemm_bias_2d: GEMM+Bias(2D)\n"
" gemm_bias_relu: GEMM+Bias+ReLU\n"
" gemm_bias_relu_add: GEMM+Bias+ReLU+Add\n"
" gemm_reduce: GEMM+Reduce\n"
" grouped_gemm: Grouped Gemm\n"
" conv_fwd: ForwardConvolution\n"
" conv_fwd_bias_relu: ForwardConvolution+Bias+ReLU\n"
" conv_fwd_bias_relu_add: ForwardConvolution+Bias+ReLU+Add\n"
" conv_fwd_bias_relu_atomic_add: ForwardConvolution+Bias+ReLU+AtomicAdd\n"
" conv1d_bwd_data: BackwardConvolution data 1 dim\n"
" conv2d_bwd_data: BackwardConvolution data 2 dim\n"
" conv3d_bwd_data: BackwardConvolution data 3 dim\n"
" grouped_gemm: Grouped Gemm\n"
" reduce: REDUCE\n");
// clang-format on
return 0; // clang-format off
} printf("arg1: tensor operation (gemm: GEMM\n"
" gemm_bias_2d: GEMM+Bias(2D)\n"
" gemm_bias_relu: GEMM+Bias+ReLU\n"
" gemm_bias_relu_add: GEMM+Bias+ReLU+Add\n"
" gemm_reduce: GEMM+Reduce\n"
" grouped_gemm: Grouped GEMM\n"
" conv_fwd: ForwardConvolution\n"
" conv_fwd_bias_relu: ForwardConvolution+Bias+ReLU\n"
" conv_fwd_bias_relu_add: ForwardConvolution+Bias+ReLU+Add\n"
" conv_fwd_bias_relu_atomic_add: ForwardConvolution+Bias+ReLU+AtomicAdd\n"
" conv1d_bwd_data: BackwardConvolution data 1d\n"
" conv2d_bwd_data: BackwardConvolution data 2d\n"
" conv3d_bwd_data: BackwardConvolution data 3d\n"
" grouped_gemm: Grouped GEMM\n"
" reduce: Reduce\n");
// clang-format on
return 0;
} }
...@@ -10,9 +10,11 @@ cmake ...@@ -10,9 +10,11 @@ cmake
-D CMAKE_INSTALL_PREFIX=${MY_PROJECT_INSTALL} \ -D CMAKE_INSTALL_PREFIX=${MY_PROJECT_INSTALL} \
-D BUILD_DEV=OFF \ -D BUILD_DEV=OFF \
-D CMAKE_BUILD_TYPE=Release \ -D CMAKE_BUILD_TYPE=Release \
-D CMAKE_CXX_FLAGS="-DCK_AMD_GPU_GFX908 --amdgpu-target=gfx908 -O3 -ftemplate-backtrace-limit=0 -mllvm --amdgpu-spill-vgpr-to-agpr=0 -gline-tables-only " \ -D CMAKE_CXX_FLAGS=" --offload-arch=gfx908 --offload-arch=gfx90a -O3 -ftemplate-backtrace-limit=0 -gline-tables-only -save-temps=$PWD" \
-D CMAKE_CXX_COMPILER=/opt/rocm/bin/hipcc \ -D CMAKE_CXX_COMPILER=/opt/rocm/bin/hipcc \
-D CMAKE_PREFIX_PATH=/opt/rocm \ -D CMAKE_PREFIX_PATH=/opt/rocm \
-D CMAKE_VERBOSE_MAKEFILE:BOOL=ON \ -D CMAKE_VERBOSE_MAKEFILE:BOOL=ON \
${MY_PROJECT_SOURCE} ${MY_PROJECT_SOURCE}
#-D CMAKE_CXX_FLAGS=" --offload-arch=gfx908 --offload-arch=gfx90a -O3 -ftemplate-backtrace-limit=0 -mllvm --amdgpu-spill-vgpr-to-agpr=0 -gline-tables-only -save-temps=$PWD" \
#-D CMAKE_CXX_FLAGS=" --offload-arch=gfx908 --offload-arch=gfx90a -O3 -ftemplate-backtrace-limit=0 -gline-tables-only -save-temps=$PWD" \
...@@ -31,7 +31,7 @@ using WeiElementOp = ck::tensor_operation::element_wise::PassThrough; ...@@ -31,7 +31,7 @@ using WeiElementOp = ck::tensor_operation::element_wise::PassThrough;
using OutElementOp = ck::tensor_operation::element_wise::PassThrough; using OutElementOp = ck::tensor_operation::element_wise::PassThrough;
static constexpr auto ConvFwdDefault = static constexpr auto ConvFwdDefault =
ck::tensor_operation::device::ConvolutionForwardSpecialization_t::Default; ck::tensor_operation::device::ConvolutionForwardSpecialization::Default;
template <ck::index_t SpatialDims, typename InDataType, typename WeiDataType, typename OutDataType> template <ck::index_t SpatialDims, typename InDataType, typename WeiDataType, typename OutDataType>
using DeviceConvNDFwdInstance = ck::tensor_operation::device:: using DeviceConvNDFwdInstance = ck::tensor_operation::device::
......
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
#include <stdlib.h> #include <stdlib.h>
#include <half.hpp> #include <half.hpp>
#include "config.hpp" #include "config.hpp"
#include "print.hpp" #include "magic_division.hpp"
#include "device.hpp" #include "device.hpp"
#include "host_tensor.hpp" #include "host_tensor.hpp"
#include "host_tensor_generator.hpp" #include "host_tensor_generator.hpp"
......
...@@ -51,11 +51,11 @@ struct type_mapping<ck::half_t> ...@@ -51,11 +51,11 @@ struct type_mapping<ck::half_t>
constexpr int Rank = 4; constexpr int Rank = 4;
constexpr ReduceTensorOp_t ReduceOpId = ReduceTensorOp_t::AVG; constexpr ReduceTensorOp ReduceOpId = ReduceTensorOp::AVG;
constexpr NanPropagation_t NanOpt = NanPropagation_t::PROPAGATE_NAN; constexpr NanPropagation NanOpt = NanPropagation::PROPAGATE_NAN;
constexpr bool PropagateNan = false; constexpr bool PropagateNan = false;
constexpr ReduceTensorIndices_t IndicesOpt = ReduceTensorIndices_t::NO_INDICES; constexpr ReduceTensorIndices IndicesOpt = ReduceTensorIndices::NO_INDICES;
constexpr bool NeedIndices = false; constexpr bool NeedIndices = false;
template <typename InDataType, template <typename InDataType,
typename AccDataType, typename AccDataType,
......
...@@ -51,11 +51,11 @@ struct type_mapping<ck::half_t> ...@@ -51,11 +51,11 @@ struct type_mapping<ck::half_t>
constexpr int Rank = 4; constexpr int Rank = 4;
constexpr ReduceTensorOp_t ReduceOpId = ReduceTensorOp_t::AMAX; constexpr ReduceTensorOp ReduceOpId = ReduceTensorOp::AMAX;
constexpr NanPropagation_t NanOpt = NanPropagation_t::PROPAGATE_NAN; constexpr NanPropagation NanOpt = NanPropagation::PROPAGATE_NAN;
constexpr bool PropagateNan = false; constexpr bool PropagateNan = false;
constexpr ReduceTensorIndices_t IndicesOpt = ReduceTensorIndices_t::FLATTENED_INDICES; constexpr ReduceTensorIndices IndicesOpt = ReduceTensorIndices::FLATTENED_INDICES;
constexpr bool NeedIndices = true; constexpr bool NeedIndices = true;
template <typename InDataType, template <typename InDataType,
typename AccDataType, typename AccDataType,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment