"...csrc/git@developer.sourcefind.cn:OpenDAS/vision.git" did not exist on "a075d62971aa140ec72626f25033cf905715530a"
Unverified Commit c1ef7319 authored by Qianfeng's avatar Qianfeng Committed by GitHub
Browse files

Use ck::half_t for Host Reduction (#195)

* Add math functions for host

* Change to host reduction to use ck::math:

* Remove the using of half_float::half and half.hpp from reduction example/profiler/ctest
parent 4221505d
...@@ -3,7 +3,6 @@ ...@@ -3,7 +3,6 @@
#include <initializer_list> #include <initializer_list>
#include <cstdlib> #include <cstdlib>
#include <getopt.h> #include <getopt.h>
#include <half.hpp>
#include "check_err.hpp" #include "check_err.hpp"
#include "config.hpp" #include "config.hpp"
...@@ -27,10 +26,6 @@ using InDataType = ck::half_t; ...@@ -27,10 +26,6 @@ using InDataType = ck::half_t;
using OutDataType = ck::half_t; using OutDataType = ck::half_t;
using AccDataType = float; using AccDataType = float;
using HostInDataType = half_float::half;
using HostOutDataType = half_float::half;
using HostAccDataType = float;
constexpr int Rank = 4; constexpr int Rank = 4;
constexpr int NumReduceDim = 3; constexpr int NumReduceDim = 3;
...@@ -306,9 +301,9 @@ int main(int argc, char* argv[]) ...@@ -306,9 +301,9 @@ int main(int argc, char* argv[])
if(args.do_verification) if(args.do_verification)
{ {
ReductionHost<HostInDataType, ReductionHost<InDataType,
HostAccDataType, AccDataType,
HostOutDataType, OutDataType,
ReduceOpId, ReduceOpId,
Rank, Rank,
NumReduceDim, NumReduceDim,
...@@ -316,11 +311,8 @@ int main(int argc, char* argv[]) ...@@ -316,11 +311,8 @@ int main(int argc, char* argv[])
NeedIndices> NeedIndices>
hostReduce(in.mDesc, out_ref.mDesc, invariantDims, reduceDims); hostReduce(in.mDesc, out_ref.mDesc, invariantDims, reduceDims);
hostReduce.Run(alpha, hostReduce.Run(
reinterpret_cast<const HostInDataType*>(in.mData.data()), alpha, in.mData.data(), beta, out_ref.mData.data(), out_indices_ref.mData.data());
beta,
reinterpret_cast<HostOutDataType*>(out_ref.mData.data()),
out_indices_ref.mData.data());
}; };
const auto i_inLengths = to_int_vector(args.inLengths); const auto i_inLengths = to_int_vector(args.inLengths);
......
#ifndef CK_MATH_V2_HPP #ifndef CK_MATH_V2_HPP
#define CK_MATH_V2_HPP #define CK_MATH_V2_HPP
#include <cmath>
#include "data_type.hpp" #include "data_type.hpp"
#include "half.hpp"
namespace ck { namespace ck {
namespace math { namespace math {
static inline __device__ half_t abs(half_t x) { return __habs(x); }; static inline __host__ float abs(float x) { return std::abs(x); };
static inline __device__ half_t sqrtf(half_t x) { return hsqrt(x); };
static inline __device__ bool isnan(half_t x) { return __hisnan(x); }; static inline __host__ double abs(double x) { return std::abs(x); };
static inline __host__ int8_t abs(int8_t x)
{
int8_t sgn = x >> (8 - 1);
return (x ^ sgn) - sgn;
};
static inline __host__ int32_t abs(int32_t x)
{
int32_t sgn = x >> (32 - 1);
return (x ^ sgn) - sgn;
};
static inline __host__ half_t abs(half_t x)
{
half_float::half xx = *reinterpret_cast<half_float::half*>(&x);
half_float::half abs_xx = half_float::abs(xx);
half_t abs_x = *reinterpret_cast<half_t*>(&abs_xx);
return abs_x;
};
static inline __host__ float isnan(float x) { return std::isnan(x); };
static inline __host__ double isnan(double x) { return std::isnan(x); };
static inline __host__ int8_t isnan(int8_t x)
{
(void)x;
return false;
};
static inline __host__ int32_t isnan(int32_t x)
{
(void)x;
return false;
};
static inline __host__ bool isnan(half_t x)
{
half_float::half xx = *reinterpret_cast<half_float::half*>(&x);
return half_float::isnan(xx);
};
} // namespace math } // namespace math
} // namespace ck } // namespace ck
......
...@@ -33,7 +33,7 @@ namespace ck { ...@@ -33,7 +33,7 @@ namespace ck {
struct float_equal_one struct float_equal_one
{ {
template <class T> template <class T>
__device__ inline bool operator()(T x) __host__ __device__ inline bool operator()(T x)
{ {
return x <= static_cast<T>(1.0f) and x >= static_cast<T>(1.0f); return x <= static_cast<T>(1.0f) and x >= static_cast<T>(1.0f);
}; };
...@@ -42,7 +42,7 @@ struct float_equal_one ...@@ -42,7 +42,7 @@ struct float_equal_one
struct float_equal_zero struct float_equal_zero
{ {
template <class T> template <class T>
__device__ inline bool operator()(T x) __host__ __device__ inline bool operator()(T x)
{ {
return x <= static_cast<T>(0.0f) and x >= static_cast<T>(0.0f); return x <= static_cast<T>(0.0f) and x >= static_cast<T>(0.0f);
}; };
......
...@@ -26,7 +26,6 @@ ...@@ -26,7 +26,6 @@
#ifndef GUARD_HOST_REDUCE_UTIL_HPP #ifndef GUARD_HOST_REDUCE_UTIL_HPP
#define GUARD_HOST_REDUCE_UTIL_HPP #define GUARD_HOST_REDUCE_UTIL_HPP
#include <half.hpp>
#include <limits> #include <limits>
#include <cmath> #include <cmath>
#include <cassert> #include <cassert>
...@@ -34,6 +33,8 @@ ...@@ -34,6 +33,8 @@
#include <string> #include <string>
#include "reduction_enums.hpp" #include "reduction_enums.hpp"
#include "data_type.hpp"
#include "math_v2.hpp"
namespace ck { namespace ck {
...@@ -42,34 +43,10 @@ namespace host_reduce { ...@@ -42,34 +43,10 @@ namespace host_reduce {
using ck::NanPropagation; using ck::NanPropagation;
using ck::ReduceTensorOp; using ck::ReduceTensorOp;
template <typename T>
static inline bool float_equal_one(T);
static inline bool float_equal_one(float x) { return x == 1.0f; };
static inline bool float_equal_one(double x) { return x == 1.0; };
static inline bool float_equal_one(half_float::half x)
{
return x == static_cast<half_float::half>(1.0f);
};
template <typename T>
static inline bool float_equal_zero(T x);
static inline bool float_equal_zero(float x) { return x == 0.0f; };
static inline bool float_equal_zero(double x) { return x == 0.0; };
static inline bool float_equal_zero(half_float::half x)
{
return x == static_cast<half_float::half>(0.0f);
};
template <typename AccDataType, ReduceTensorOp ReduceOpId> template <typename AccDataType, ReduceTensorOp ReduceOpId>
__host__ static inline std::function<void(AccDataType&)> PreUnaryOpFn(int) __host__ static inline std::function<void(AccDataType&)> PreUnaryOpFn(int)
{ {
using std::abs; using ck::math::abs;
if constexpr(ReduceOpId == ReduceTensorOp::NORM1) if constexpr(ReduceOpId == ReduceTensorOp::NORM1)
{ {
...@@ -196,11 +173,11 @@ __host__ static inline AccDataType ReduceOpZeroVal() ...@@ -196,11 +173,11 @@ __host__ static inline AccDataType ReduceOpZeroVal()
} }
else if constexpr(ReduceOpId == ReduceTensorOp::MIN) else if constexpr(ReduceOpId == ReduceTensorOp::MIN)
{ {
return (std::numeric_limits<AccDataType>::max()); return (ck::NumericLimits<AccDataType>::Max());
} }
else if constexpr(ReduceOpId == ReduceTensorOp::MAX) else if constexpr(ReduceOpId == ReduceTensorOp::MAX)
{ {
return (std::numeric_limits<AccDataType>::lowest()); return (ck::NumericLimits<AccDataType>::Lowest());
} }
else if constexpr(ReduceOpId == ReduceTensorOp::AMAX) else if constexpr(ReduceOpId == ReduceTensorOp::AMAX)
{ {
...@@ -222,7 +199,7 @@ binop_with_nan_check(std::function<void(AccDataType&, AccDataType)> opReduce, ...@@ -222,7 +199,7 @@ binop_with_nan_check(std::function<void(AccDataType&, AccDataType)> opReduce,
AccDataType& accuVal, AccDataType& accuVal,
AccDataType currVal) AccDataType currVal)
{ {
using std::isnan; using ck::math::isnan;
if constexpr(!PropagateNan) if constexpr(!PropagateNan)
{ {
...@@ -245,7 +222,7 @@ binop_with_nan_check2(std::function<void(AccDataType&, AccDataType, bool&)> opRe ...@@ -245,7 +222,7 @@ binop_with_nan_check2(std::function<void(AccDataType&, AccDataType, bool&)> opRe
int& accuIndex, int& accuIndex,
int currIndex) int currIndex)
{ {
using std::isnan; using ck::math::isnan;
if constexpr(!PropagateNan) if constexpr(!PropagateNan)
{ {
......
...@@ -32,6 +32,7 @@ ...@@ -32,6 +32,7 @@
#include <functional> #include <functional>
#include "reduction_enums.hpp" #include "reduction_enums.hpp"
#include "reduction_common.hpp"
#include "host_reduce_util.hpp" #include "host_reduce_util.hpp"
#include "host_tensor.hpp" #include "host_tensor.hpp"
#include "data_type.hpp" #include "data_type.hpp"
...@@ -196,10 +197,10 @@ struct ReductionHost ...@@ -196,10 +197,10 @@ struct ReductionHost
OutDataType* out_data, OutDataType* out_data,
IndexDataType* out_indices) IndexDataType* out_indices)
{ {
using ck::float_equal_one;
using ck::float_equal_zero;
using ck::type_convert; using ck::type_convert;
using ck::host_reduce::binop_with_nan_check2; using ck::host_reduce::binop_with_nan_check2;
using ck::host_reduce::float_equal_one;
using ck::host_reduce::float_equal_zero;
using ck::host_reduce::ReduceOpFn2; using ck::host_reduce::ReduceOpFn2;
using ck::host_reduce::ReduceOpZeroVal; using ck::host_reduce::ReduceOpZeroVal;
...@@ -227,10 +228,10 @@ struct ReductionHost ...@@ -227,10 +228,10 @@ struct ReductionHost
posUnaryOp(accuVal); posUnaryOp(accuVal);
if(!float_equal_one(alpha)) if(!float_equal_one{}(alpha))
accuVal *= type_convert<AccDataType>(alpha); accuVal *= type_convert<AccDataType>(alpha);
if(!float_equal_zero(beta)) if(!float_equal_zero{}(beta))
accuVal += type_convert<AccDataType>(out_data[0]) * type_convert<AccDataType>(beta); accuVal += type_convert<AccDataType>(out_data[0]) * type_convert<AccDataType>(beta);
out_data[0] = type_convert<OutDataType>(accuVal); out_data[0] = type_convert<OutDataType>(accuVal);
...@@ -263,13 +264,13 @@ struct ReductionHost ...@@ -263,13 +264,13 @@ struct ReductionHost
posUnaryOp(accuVal); posUnaryOp(accuVal);
if(!float_equal_one(alpha)) if(!float_equal_one{}(alpha))
accuVal *= type_convert<AccDataType>(alpha); accuVal *= type_convert<AccDataType>(alpha);
auto dst_offset = auto dst_offset =
get_offset_from_index<NumInvariantDim>(outStrides, invariant_index); get_offset_from_index<NumInvariantDim>(outStrides, invariant_index);
if(!float_equal_zero(beta)) if(!float_equal_zero{}(beta))
accuVal += type_convert<AccDataType>(out_data[dst_offset]) * accuVal += type_convert<AccDataType>(out_data[dst_offset]) *
type_convert<AccDataType>(beta); type_convert<AccDataType>(beta);
...@@ -303,10 +304,10 @@ struct ReductionHost ...@@ -303,10 +304,10 @@ struct ReductionHost
void RunImpl_no_index(float alpha, const InDataType* in_data, float beta, OutDataType* out_data) void RunImpl_no_index(float alpha, const InDataType* in_data, float beta, OutDataType* out_data)
{ {
using ck::float_equal_one;
using ck::float_equal_zero;
using ck::type_convert; using ck::type_convert;
using ck::host_reduce::binop_with_nan_check; using ck::host_reduce::binop_with_nan_check;
using ck::host_reduce::float_equal_one;
using ck::host_reduce::float_equal_zero;
using ck::host_reduce::ReduceOpFn; using ck::host_reduce::ReduceOpFn;
using ck::host_reduce::ReduceOpZeroVal; using ck::host_reduce::ReduceOpZeroVal;
...@@ -330,10 +331,10 @@ struct ReductionHost ...@@ -330,10 +331,10 @@ struct ReductionHost
posUnaryOp(accuVal); posUnaryOp(accuVal);
if(!float_equal_one(alpha)) if(!float_equal_one{}(alpha))
accuVal *= type_convert<AccDataType>(alpha); accuVal *= type_convert<AccDataType>(alpha);
if(!float_equal_zero(beta)) if(!float_equal_zero{}(beta))
accuVal += type_convert<AccDataType>(out_data[0]) * type_convert<AccDataType>(beta); accuVal += type_convert<AccDataType>(out_data[0]) * type_convert<AccDataType>(beta);
out_data[0] = type_convert<OutDataType>(accuVal); out_data[0] = type_convert<OutDataType>(accuVal);
...@@ -361,13 +362,13 @@ struct ReductionHost ...@@ -361,13 +362,13 @@ struct ReductionHost
posUnaryOp(accuVal); posUnaryOp(accuVal);
if(!float_equal_one(alpha)) if(!float_equal_one{}(alpha))
accuVal *= type_convert<AccDataType>(alpha); accuVal *= type_convert<AccDataType>(alpha);
auto dst_offset = auto dst_offset =
get_offset_from_index<NumInvariantDim>(outStrides, invariant_index); get_offset_from_index<NumInvariantDim>(outStrides, invariant_index);
if(!float_equal_zero(beta)) if(!float_equal_zero{}(beta))
accuVal += type_convert<AccDataType>(out_data[dst_offset]) * accuVal += type_convert<AccDataType>(out_data[dst_offset]) *
type_convert<AccDataType>(beta); type_convert<AccDataType>(beta);
......
...@@ -380,13 +380,9 @@ void profile_reduce_impl_impl(bool do_verification, ...@@ -380,13 +380,9 @@ void profile_reduce_impl_impl(bool do_verification,
if(do_verification) if(do_verification)
{ {
using HostInDataType = typename type_mapping<InDataType>::OutType; ReductionHost<InDataType,
using HostOutDataType = typename type_mapping<OutDataType>::OutType; AccDataType,
using HostAccDataType = typename type_mapping<AccDataType>::OutType; OutDataType,
ReductionHost<HostInDataType,
HostAccDataType,
HostOutDataType,
ReduceOpId, ReduceOpId,
Rank, Rank,
NumReduceDim, NumReduceDim,
...@@ -394,11 +390,8 @@ void profile_reduce_impl_impl(bool do_verification, ...@@ -394,11 +390,8 @@ void profile_reduce_impl_impl(bool do_verification,
NeedIndices> NeedIndices>
hostReduce(in.mDesc, out_ref.mDesc, invariantDims, reduceDims); hostReduce(in.mDesc, out_ref.mDesc, invariantDims, reduceDims);
hostReduce.Run(alpha, hostReduce.Run(
reinterpret_cast<const HostInDataType*>(in.mData.data()), alpha, in.mData.data(), beta, out_ref.mData.data(), out_indices_ref.mData.data());
beta,
reinterpret_cast<HostOutDataType*>(out_ref.mData.data()),
out_indices_ref.mData.data());
}; };
const auto i_inLengths = to_int_vector(inLengths); const auto i_inLengths = to_int_vector(inLengths);
......
...@@ -37,19 +37,6 @@ static inline std::vector<int> get_invariant_dims(const std::vector<int>& reduce ...@@ -37,19 +37,6 @@ static inline std::vector<int> get_invariant_dims(const std::vector<int>& reduce
return invariantDims; return invariantDims;
}; };
// map the data type used by the GPU kernels to the corresponding type used by the host codes
template <typename InType>
struct type_mapping
{
using OutType = InType;
};
template <>
struct type_mapping<ck::half_t>
{
using OutType = half_float::half;
};
constexpr int Rank = 4; constexpr int Rank = 4;
constexpr ReduceTensorOp ReduceOpId = ReduceTensorOp::AVG; constexpr ReduceTensorOp ReduceOpId = ReduceTensorOp::AVG;
...@@ -226,13 +213,9 @@ bool test_reduce_no_index_impl(int init_method, ...@@ -226,13 +213,9 @@ bool test_reduce_no_index_impl(int init_method,
bool result = true; bool result = true;
using HostInDataType = typename type_mapping<InDataType>::OutType; ReductionHost<InDataType,
using HostOutDataType = typename type_mapping<OutDataType>::OutType; AccDataType,
using HostAccDataType = typename type_mapping<AccDataType>::OutType; OutDataType,
ReductionHost<HostInDataType,
HostAccDataType,
HostOutDataType,
ReduceOpId, ReduceOpId,
Rank, Rank,
NumReduceDim, NumReduceDim,
...@@ -240,11 +223,7 @@ bool test_reduce_no_index_impl(int init_method, ...@@ -240,11 +223,7 @@ bool test_reduce_no_index_impl(int init_method,
NeedIndices> NeedIndices>
hostReduce(in.mDesc, out_ref.mDesc, invariantDims, reduceDims); hostReduce(in.mDesc, out_ref.mDesc, invariantDims, reduceDims);
hostReduce.Run(alpha, hostReduce.Run(alpha, in.mData.data(), beta, out_ref.mData.data(), nullptr);
reinterpret_cast<const HostInDataType*>(in.mData.data()),
beta,
reinterpret_cast<HostOutDataType*>(out_ref.mData.data()),
nullptr);
const auto i_inLengths = to_int_vector(inLengths); const auto i_inLengths = to_int_vector(inLengths);
const auto i_inStrides = to_int_vector(inStrides); const auto i_inStrides = to_int_vector(inStrides);
......
...@@ -36,19 +36,6 @@ static inline std::vector<int> get_invariant_dims(const std::vector<int>& reduce ...@@ -36,19 +36,6 @@ static inline std::vector<int> get_invariant_dims(const std::vector<int>& reduce
return invariantDims; return invariantDims;
}; };
// map the data type used by the GPU kernels to the corresponding type used by the host codes
template <typename InType>
struct type_mapping
{
using OutType = InType;
};
template <>
struct type_mapping<ck::half_t>
{
using OutType = half_float::half;
};
constexpr int Rank = 4; constexpr int Rank = 4;
constexpr ReduceTensorOp ReduceOpId = ReduceTensorOp::AMAX; constexpr ReduceTensorOp ReduceOpId = ReduceTensorOp::AMAX;
...@@ -209,13 +196,9 @@ bool test_reduce_with_index_impl(int init_method, ...@@ -209,13 +196,9 @@ bool test_reduce_with_index_impl(int init_method,
bool result = true; bool result = true;
using HostInDataType = typename type_mapping<InDataType>::OutType; ReductionHost<InDataType,
using HostOutDataType = typename type_mapping<OutDataType>::OutType; AccDataType,
using HostAccDataType = typename type_mapping<AccDataType>::OutType; OutDataType,
ReductionHost<HostInDataType,
HostAccDataType,
HostOutDataType,
ReduceOpId, ReduceOpId,
Rank, Rank,
NumReduceDim, NumReduceDim,
...@@ -223,11 +206,8 @@ bool test_reduce_with_index_impl(int init_method, ...@@ -223,11 +206,8 @@ bool test_reduce_with_index_impl(int init_method,
NeedIndices> NeedIndices>
hostReduce(in.mDesc, out_ref.mDesc, invariantDims, reduceDims); hostReduce(in.mDesc, out_ref.mDesc, invariantDims, reduceDims);
hostReduce.Run(alpha, hostReduce.Run(
reinterpret_cast<const HostInDataType*>(in.mData.data()), alpha, in.mData.data(), beta, out_ref.mData.data(), out_indices_ref.mData.data());
beta,
reinterpret_cast<HostOutDataType*>(out_ref.mData.data()),
out_indices_ref.mData.data());
const auto i_inLengths = to_int_vector(inLengths); const auto i_inLengths = to_int_vector(inLengths);
const auto i_inStrides = to_int_vector(inStrides); const auto i_inStrides = to_int_vector(inStrides);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment