Commit 95a83c6e authored by Adam Osewski's avatar Adam Osewski
Browse files

Merge remote-tracking branch 'origin/develop' into wavelet_model

parents 5b7c2432 892a8d76
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include <vector>
#include "ck/library/tensor_operation_instance/device_operation_instance_factory.hpp"
#include "ck/tensor_operation/gpu/device/device_softmax.hpp"
namespace ck {
namespace tensor_operation {
namespace device {
namespace instance {
void add_device_softmax_f32_f32_rank4_reduce2_instances(
std::vector<DeviceSoftmaxPtr<F32, F32, F32, PassThrough, PassThrough, 4>>& instances);
} // namespace instance
} // namespace device
} // namespace tensor_operation
} // namespace ck
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include <vector>
#include "ck/library/tensor_operation_instance/device_operation_instance_factory.hpp"
#include "ck/tensor_operation/gpu/device/device_softmax.hpp"
namespace ck {
namespace tensor_operation {
namespace device {
namespace instance {
void add_device_softmax_f32_f32_rank4_reduce3_instances(
std::vector<DeviceSoftmaxPtr<F32, F32, F32, PassThrough, PassThrough, 4>>& instances);
} // namespace instance
} // namespace device
} // namespace tensor_operation
} // namespace ck
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include <vector>
#include "ck/library/tensor_operation_instance/device_operation_instance_factory.hpp"
#include "ck/tensor_operation/gpu/device/device_softmax.hpp"
namespace ck {
namespace tensor_operation {
namespace device {
namespace instance {
void add_device_softmax_f32_f32_rank4_reduce4_instances(
std::vector<DeviceSoftmaxPtr<F32, F32, F32, PassThrough, PassThrough, 4>>& instances);
} // namespace instance
} // namespace device
} // namespace tensor_operation
} // namespace ck
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <tuple>
#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/impl/device_softmax_impl.hpp"
#include "ck/tensor_operation/gpu/element/unary_element_wise_operation.hpp"
namespace ck {
namespace tensor_operation {
namespace device {
namespace instance {
template <index_t Rank, index_t Reduce>
using device_softmax_f32_f32_instances = std::tuple<
// clang-format off
// InDataType, AccDataType, OutDataType, InElementwiseOp, AccElementwiseOp, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSize>
DeviceSoftmaxImpl< F32, F32, F32, PassThrough, PassThrough, Rank, Reduce, 256, 8, 32, 1, 8, 1, 1, 1>, // fallback kernel
DeviceSoftmaxImpl< F32, F32, F32, PassThrough, PassThrough, Rank, Reduce, 256, 8, 32, 1, 8, 1, 4, 4>,
DeviceSoftmaxImpl< F32, F32, F32, PassThrough, PassThrough, Rank, Reduce, 256, 4, 64, 1, 8, 1, 4, 4>,
DeviceSoftmaxImpl< F32, F32, F32, PassThrough, PassThrough, Rank, Reduce, 256, 2, 128, 1, 8, 1, 4, 4>,
DeviceSoftmaxImpl< F32, F32, F32, PassThrough, PassThrough, Rank, Reduce, 256, 2, 128, 1, 16, 1, 4, 4>,
DeviceSoftmaxImpl< F32, F32, F32, PassThrough, PassThrough, Rank, Reduce, 256, 2, 128, 1, 32, 1, 4, 4>,
DeviceSoftmaxImpl< F32, F32, F32, PassThrough, PassThrough, Rank, Reduce, 256, 1, 256, 1, 8, 1, 4, 4>,
DeviceSoftmaxImpl< F32, F32, F32, PassThrough, PassThrough, Rank, Reduce, 256, 1, 256, 1, 16, 1, 4, 4>,
DeviceSoftmaxImpl< F32, F32, F32, PassThrough, PassThrough, Rank, Reduce, 256, 1, 256, 1, 32, 1, 4, 4>,
// Reduction on middle dimensions
// InSrcVectorDim is 0 since we want to coalesce reads on M dimension
DeviceSoftmaxImpl< F32, F32, F32, PassThrough, PassThrough, Rank, Reduce, 256, 8, 32, 8, 4, 0, 1, 1>,
DeviceSoftmaxImpl< F32, F32, F32, PassThrough, PassThrough, Rank, Reduce, 256, 8, 32, 8, 4, 0, 4, 4>
// clang-format on
>;
} // namespace instance
} // namespace device
} // namespace tensor_operation
} // namespace ck
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include "ck/library/tensor_operation_instance/device_operation_instance_factory.hpp"
#include "ck/tensor_operation/gpu/device/device_softmax.hpp"
namespace ck {
namespace tensor_operation {
namespace device {
namespace instance {
void add_device_softmax_i8_i8_rank3_instances(
std::vector<DeviceSoftmaxPtr<F16, F32, F16, PassThrough, PassThrough, 3>>& instances);
void add_device_softmax_i8_i8_rank4_instances(
std::vector<DeviceSoftmaxPtr<F16, F32, F16, PassThrough, PassThrough, 4>>& instances);
} // namespace instance
} // namespace device
} // namespace tensor_operation
} // namespace ck
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include <vector>
#include "ck/library/tensor_operation_instance/device_operation_instance_factory.hpp"
#include "ck/tensor_operation/gpu/device/device_softmax.hpp"
namespace ck {
namespace tensor_operation {
namespace device {
namespace instance {
void add_device_softmax_i8_i8_rank3_reduce1_instances(
std::vector<DeviceSoftmaxPtr<I8, F32, I8, PassThrough, PassThrough, 3>>& instances);
} // namespace instance
} // namespace device
} // namespace tensor_operation
} // namespace ck
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include <vector>
#include "ck/library/tensor_operation_instance/device_operation_instance_factory.hpp"
#include "ck/tensor_operation/gpu/device/device_softmax.hpp"
namespace ck {
namespace tensor_operation {
namespace device {
namespace instance {
void add_device_softmax_i8_i8_rank3_reduce2_instances(
std::vector<DeviceSoftmaxPtr<I8, F32, I8, PassThrough, PassThrough, 3>>& instances);
} // namespace instance
} // namespace device
} // namespace tensor_operation
} // namespace ck
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include <vector>
#include "ck/library/tensor_operation_instance/device_operation_instance_factory.hpp"
#include "ck/tensor_operation/gpu/device/device_softmax.hpp"
namespace ck {
namespace tensor_operation {
namespace device {
namespace instance {
void add_device_softmax_i8_i8_rank3_reduce3_instances(
std::vector<DeviceSoftmaxPtr<I8, F32, I8, PassThrough, PassThrough, 3>>& instances);
} // namespace instance
} // namespace device
} // namespace tensor_operation
} // namespace ck
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include <vector>
#include "ck/library/tensor_operation_instance/device_operation_instance_factory.hpp"
#include "ck/tensor_operation/gpu/device/device_softmax.hpp"
namespace ck {
namespace tensor_operation {
namespace device {
namespace instance {
void add_device_softmax_i8_i8_rank4_reduce1_instances(
std::vector<DeviceSoftmaxPtr<I8, F32, I8, PassThrough, PassThrough, 4>>& instances);
} // namespace instance
} // namespace device
} // namespace tensor_operation
} // namespace ck
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include <vector>
#include "ck/library/tensor_operation_instance/device_operation_instance_factory.hpp"
#include "ck/tensor_operation/gpu/device/device_softmax.hpp"
namespace ck {
namespace tensor_operation {
namespace device {
namespace instance {
void add_device_softmax_i8_i8_rank4_reduce2_instances(
std::vector<DeviceSoftmaxPtr<I8, F32, I8, PassThrough, PassThrough, 4>>& instances);
} // namespace instance
} // namespace device
} // namespace tensor_operation
} // namespace ck
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include <vector>
#include "ck/library/tensor_operation_instance/device_operation_instance_factory.hpp"
#include "ck/tensor_operation/gpu/device/device_softmax.hpp"
namespace ck {
namespace tensor_operation {
namespace device {
namespace instance {
void add_device_softmax_i8_i8_rank4_reduce3_instances(
std::vector<DeviceSoftmaxPtr<I8, F32, I8, PassThrough, PassThrough, 4>>& instances);
} // namespace instance
} // namespace device
} // namespace tensor_operation
} // namespace ck
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include <vector>
#include "ck/library/tensor_operation_instance/device_operation_instance_factory.hpp"
#include "ck/tensor_operation/gpu/device/device_softmax.hpp"
namespace ck {
namespace tensor_operation {
namespace device {
namespace instance {
void add_device_softmax_i8_i8_rank4_reduce4_instances(
std::vector<DeviceSoftmaxPtr<I8, F32, I8, PassThrough, PassThrough, 4>>& instances);
} // namespace instance
} // namespace device
} // namespace tensor_operation
} // namespace ck
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <tuple>
#include "ck/ck.hpp"
#include "ck/library/tensor_operation_instance/device_operation_instance_factory.hpp"
#include "ck/tensor_operation/gpu/device/impl/device_softmax_impl.hpp"
#include "ck/tensor_operation/gpu/element/unary_element_wise_operation.hpp"
namespace ck {
namespace tensor_operation {
namespace device {
namespace instance {
template <index_t Rank, index_t Reduce>
using device_softmax_i8_i8_instances = std::tuple<
// clang-format off
// InDataType, AccDataType, OutDataType, InElementwiseOp, AccElementwiseOp, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSize>
// fallback kernel
DeviceSoftmaxImpl< I8, F32, I8, PassThrough, PassThrough, Rank, Reduce, 256, 8, 32, 1, 16, 1, 1, 1>,
DeviceSoftmaxImpl< I8, F32, I8, PassThrough, PassThrough, Rank, Reduce, 256, 8, 32, 1, 16, 1, 16, 16>,
DeviceSoftmaxImpl< I8, F32, I8, PassThrough, PassThrough, Rank, Reduce, 256, 4, 64, 1, 16, 1, 16, 16>,
DeviceSoftmaxImpl< I8, F32, I8, PassThrough, PassThrough, Rank, Reduce, 256, 2, 128, 1, 16, 1, 16, 16>,
DeviceSoftmaxImpl< I8, F32, I8, PassThrough, PassThrough, Rank, Reduce, 256, 2, 128, 1, 32, 1, 16, 16>,
DeviceSoftmaxImpl< I8, F32, I8, PassThrough, PassThrough, Rank, Reduce, 256, 2, 128, 1, 64, 1, 16, 16>,
DeviceSoftmaxImpl< I8, F32, I8, PassThrough, PassThrough, Rank, Reduce, 256, 1, 256, 1, 16, 1, 16, 16>,
DeviceSoftmaxImpl< I8, F32, I8, PassThrough, PassThrough, Rank, Reduce, 256, 1, 256, 1, 32, 1, 16, 16>,
DeviceSoftmaxImpl< I8, F32, I8, PassThrough, PassThrough, Rank, Reduce, 256, 1, 256, 1, 64, 1, 16, 16>,
// Reduction on middle dimensions
// InSrcVectorDim is 0 since we want to coalesce reads on M dimension
DeviceSoftmaxImpl< I8, F32, I8, PassThrough, PassThrough, Rank, Reduce, 256, 8, 32, 8, 8, 0, 1, 1>,
DeviceSoftmaxImpl< I8, F32, I8, PassThrough, PassThrough, Rank, Reduce, 256, 32, 8, 32, 8, 0, 16, 8>
// clang-format on
>;
} // namespace instance
} // namespace device
} // namespace tensor_operation
} // namespace ck
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_i8_i8_instance.hpp"
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include <algorithm>
#include <iterator>
#include <type_traits>
#include <utility>
namespace ck {
namespace ranges {
template <typename InputRange, typename OutputIterator>
auto copy(InputRange&& range, OutputIterator iter)
-> decltype(std::copy(std::begin(std::forward<InputRange>(range)),
std::end(std::forward<InputRange>(range)),
iter))
{
return std::copy(std::begin(std::forward<InputRange>(range)),
std::end(std::forward<InputRange>(range)),
iter);
}
template <typename T, typename OutputRange>
auto fill(OutputRange&& range, const T& init)
-> std::void_t<decltype(std::fill(std::begin(std::forward<OutputRange>(range)),
std::end(std::forward<OutputRange>(range)),
init))>
{
std::fill(std::begin(std::forward<OutputRange>(range)),
std::end(std::forward<OutputRange>(range)),
init);
}
template <typename InputRange, typename OutputIterator, typename UnaryOperation>
auto transform(InputRange&& range, OutputIterator iter, UnaryOperation unary_op)
-> decltype(std::transform(std::begin(range), std::end(range), iter, unary_op))
{
return std::transform(std::begin(range), std::end(range), iter, unary_op);
}
} // namespace ranges
} // namespace ck
...@@ -15,18 +15,22 @@ ...@@ -15,18 +15,22 @@
#include "ck/ck.hpp" #include "ck/ck.hpp"
#include "ck/utility/data_type.hpp" #include "ck/utility/data_type.hpp"
#include "ck/utility/span.hpp"
#include "ck/utility/type.hpp" #include "ck/utility/type.hpp"
#include "ck/host_utility/io.hpp" #include "ck/host_utility/io.hpp"
#include "ck/library/utility/ranges.hpp"
namespace ck { namespace ck {
namespace utils { namespace utils {
template <typename T> template <typename Range, typename RefRange>
typename std::enable_if<std::is_floating_point<T>::value && !std::is_same<T, half_t>::value, typename std::enable_if<
bool>::type std::is_same_v<ranges::range_value_t<Range>, ranges::range_value_t<RefRange>> &&
check_err(const std::vector<T>& out, std::is_floating_point_v<ranges::range_value_t<Range>> &&
const std::vector<T>& ref, !std::is_same_v<ranges::range_value_t<Range>, half_t>,
bool>::type
check_err(const Range& out,
const RefRange& ref,
const std::string& msg = "Error: Incorrect results!", const std::string& msg = "Error: Incorrect results!",
double rtol = 1e-5, double rtol = 1e-5,
double atol = 3e-6) double atol = 3e-6)
...@@ -44,15 +48,17 @@ check_err(const std::vector<T>& out, ...@@ -44,15 +48,17 @@ check_err(const std::vector<T>& out,
double max_err = std::numeric_limits<double>::min(); double max_err = std::numeric_limits<double>::min();
for(std::size_t i = 0; i < ref.size(); ++i) for(std::size_t i = 0; i < ref.size(); ++i)
{ {
err = std::abs(out[i] - ref[i]); const double o = *std::next(std::begin(out), i);
if(err > atol + rtol * std::abs(ref[i]) || !std::isfinite(out[i]) || !std::isfinite(ref[i])) const double r = *std::next(std::begin(ref), i);
err = std::abs(o - r);
if(err > atol + rtol * std::abs(r) || !std::isfinite(o) || !std::isfinite(r))
{ {
max_err = err > max_err ? err : max_err; max_err = err > max_err ? err : max_err;
err_count++; err_count++;
if(err_count < 5) if(err_count < 5)
{ {
std::cerr << msg << std::setw(12) << std::setprecision(7) << " out[" << i std::cerr << msg << std::setw(12) << std::setprecision(7) << " out[" << i
<< "] != ref[" << i << "]: " << out[i] << " != " << ref[i] << std::endl; << "] != ref[" << i << "]: " << o << " != " << r << std::endl;
} }
res = false; res = false;
} }
...@@ -64,10 +70,13 @@ check_err(const std::vector<T>& out, ...@@ -64,10 +70,13 @@ check_err(const std::vector<T>& out,
return res; return res;
} }
template <typename T> template <typename Range, typename RefRange>
typename std::enable_if<std::is_same<T, bhalf_t>::value, bool>::type typename std::enable_if<
check_err(const std::vector<T>& out, std::is_same_v<ranges::range_value_t<Range>, ranges::range_value_t<RefRange>> &&
const std::vector<T>& ref, std::is_same_v<ranges::range_value_t<Range>, bhalf_t>,
bool>::type
check_err(const Range& out,
const RefRange& ref,
const std::string& msg = "Error: Incorrect results!", const std::string& msg = "Error: Incorrect results!",
double rtol = 1e-3, double rtol = 1e-3,
double atol = 1e-3) double atol = 1e-3)
...@@ -86,9 +95,9 @@ check_err(const std::vector<T>& out, ...@@ -86,9 +95,9 @@ check_err(const std::vector<T>& out,
double max_err = std::numeric_limits<float>::min(); double max_err = std::numeric_limits<float>::min();
for(std::size_t i = 0; i < ref.size(); ++i) for(std::size_t i = 0; i < ref.size(); ++i)
{ {
double o = type_convert<float>(out[i]); const double o = type_convert<float>(*std::next(std::begin(out), i));
double r = type_convert<float>(ref[i]); const double r = type_convert<float>(*std::next(std::begin(ref), i));
err = std::abs(o - r); err = std::abs(o - r);
if(err > atol + rtol * std::abs(r) || !std::isfinite(o) || !std::isfinite(r)) if(err > atol + rtol * std::abs(r) || !std::isfinite(o) || !std::isfinite(r))
{ {
max_err = err > max_err ? err : max_err; max_err = err > max_err ? err : max_err;
...@@ -108,10 +117,13 @@ check_err(const std::vector<T>& out, ...@@ -108,10 +117,13 @@ check_err(const std::vector<T>& out,
return res; return res;
} }
template <typename T> template <typename Range, typename RefRange>
typename std::enable_if<std::is_same_v<T, half_t>, bool>::type typename std::enable_if<
check_err(span<const T> out, std::is_same_v<ranges::range_value_t<Range>, ranges::range_value_t<RefRange>> &&
span<const T> ref, std::is_same_v<ranges::range_value_t<Range>, half_t>,
bool>::type
check_err(const Range& out,
const RefRange& ref,
const std::string& msg = "Error: Incorrect results!", const std::string& msg = "Error: Incorrect results!",
double rtol = 1e-3, double rtol = 1e-3,
double atol = 1e-3) double atol = 1e-3)
...@@ -126,12 +138,12 @@ check_err(span<const T> out, ...@@ -126,12 +138,12 @@ check_err(span<const T> out,
bool res{true}; bool res{true};
int err_count = 0; int err_count = 0;
double err = 0; double err = 0;
double max_err = std::numeric_limits<T>::min(); double max_err = std::numeric_limits<ranges::range_value_t<Range>>::min();
for(std::size_t i = 0; i < ref.size(); ++i) for(std::size_t i = 0; i < ref.size(); ++i)
{ {
double o = type_convert<float>(out[i]); const double o = type_convert<float>(*std::next(std::begin(out), i));
double r = type_convert<float>(ref[i]); const double r = type_convert<float>(*std::next(std::begin(ref), i));
err = std::abs(o - r); err = std::abs(o - r);
if(err > atol + rtol * std::abs(r) || !std::isfinite(o) || !std::isfinite(r)) if(err > atol + rtol * std::abs(r) || !std::isfinite(o) || !std::isfinite(r))
{ {
max_err = err > max_err ? err : max_err; max_err = err > max_err ? err : max_err;
...@@ -151,26 +163,17 @@ check_err(span<const T> out, ...@@ -151,26 +163,17 @@ check_err(span<const T> out,
return res; return res;
} }
template <typename T> template <typename Range, typename RefRange>
typename std::enable_if<std::is_same<T, half_t>::value, bool>::type std::enable_if_t<(std::is_same_v<ranges::range_value_t<Range>, ranges::range_value_t<RefRange>> &&
check_err(const std::vector<T>& out, std::is_integral_v<ranges::range_value_t<Range>> &&
const std::vector<T>& ref, !std::is_same_v<ranges::range_value_t<Range>, bhalf_t>)
const std::string& msg = "Error: Incorrect results!",
double rtol = 1e-3,
double atol = 1e-3)
{
return check_err(span<const T>{out}, span<const T>{ref}, msg, rtol, atol);
}
template <typename T>
std::enable_if_t<(std::is_integral_v<T> && !std::is_same_v<T, bhalf_t>)
#ifdef CK_EXPERIMENTAL_BIT_INT_EXTENSION_INT4 #ifdef CK_EXPERIMENTAL_BIT_INT_EXTENSION_INT4
|| std::is_same_v<T, int4_t> || std::is_same_v<ranges::range_value_t<Range>, int4_t>
#endif #endif
, ,
bool> bool>
check_err(const std::vector<T>& out, check_err(const Range& out,
const std::vector<T>& ref, const RefRange& ref,
const std::string& msg = "Error: Incorrect results!", const std::string& msg = "Error: Incorrect results!",
double = 0, double = 0,
double atol = 0) double atol = 0)
...@@ -188,9 +191,9 @@ check_err(const std::vector<T>& out, ...@@ -188,9 +191,9 @@ check_err(const std::vector<T>& out,
int64_t max_err = std::numeric_limits<int64_t>::min(); int64_t max_err = std::numeric_limits<int64_t>::min();
for(std::size_t i = 0; i < ref.size(); ++i) for(std::size_t i = 0; i < ref.size(); ++i)
{ {
int64_t o = out[i]; const int64_t o = *std::next(std::begin(out), i);
int64_t r = ref[i]; const int64_t r = *std::next(std::begin(ref), i);
err = std::abs(o - r); err = std::abs(o - r);
if(err > atol) if(err > atol)
{ {
......
...@@ -10,6 +10,8 @@ ...@@ -10,6 +10,8 @@
#include "ck/ck.hpp" #include "ck/ck.hpp"
#include "ck/library/utility/numeric.hpp"
namespace ck { namespace ck {
namespace utils { namespace utils {
namespace conv { namespace conv {
...@@ -55,10 +57,8 @@ struct ConvParam ...@@ -55,10 +57,8 @@ struct ConvParam
// sizeof(InDataType) * (G * N * C * <input spatial lengths product>) + // sizeof(InDataType) * (G * N * C * <input spatial lengths product>) +
return sizeof(InDataType) * return sizeof(InDataType) *
(G_ * N_ * C_ * (G_ * N_ * C_ *
std::accumulate(std::begin(input_spatial_lengths_), ck::accumulate_n<std::size_t>(
std::begin(input_spatial_lengths_) + num_dim_spatial_, std::begin(input_spatial_lengths_), num_dim_spatial_, 1, std::multiplies<>()));
static_cast<std::size_t>(1),
std::multiplies<std::size_t>()));
} }
template <typename WeiDataType> template <typename WeiDataType>
...@@ -67,10 +67,8 @@ struct ConvParam ...@@ -67,10 +67,8 @@ struct ConvParam
// sizeof(WeiDataType) * (G * K * C * <filter spatial lengths product>) + // sizeof(WeiDataType) * (G * K * C * <filter spatial lengths product>) +
return sizeof(WeiDataType) * return sizeof(WeiDataType) *
(G_ * K_ * C_ * (G_ * K_ * C_ *
std::accumulate(std::begin(filter_spatial_lengths_), ck::accumulate_n<std::size_t>(
std::begin(filter_spatial_lengths_) + num_dim_spatial_, std::begin(filter_spatial_lengths_), num_dim_spatial_, 1, std::multiplies<>()));
static_cast<std::size_t>(1),
std::multiplies<std::size_t>()));
} }
template <typename OutDataType> template <typename OutDataType>
......
...@@ -30,9 +30,10 @@ struct FillUniformDistribution ...@@ -30,9 +30,10 @@ struct FillUniformDistribution
} }
template <typename ForwardRange> template <typename ForwardRange>
auto operator()(ForwardRange&& range) -> std::void_t<decltype( auto operator()(ForwardRange&& range) const
std::declval<FillUniformDistribution>()(std::begin(std::forward<ForwardRange>(range)), -> std::void_t<decltype(std::declval<const FillUniformDistribution&>()(
std::end(std::forward<ForwardRange>(range))))> std::begin(std::forward<ForwardRange>(range)),
std::end(std::forward<ForwardRange>(range))))>
{ {
(*this)(std::begin(std::forward<ForwardRange>(range)), (*this)(std::begin(std::forward<ForwardRange>(range)),
std::end(std::forward<ForwardRange>(range))); std::end(std::forward<ForwardRange>(range)));
...@@ -72,6 +73,16 @@ struct FillUniformDistributionIntegerValue ...@@ -72,6 +73,16 @@ struct FillUniformDistributionIntegerValue
std::generate( std::generate(
first, last, [&dis, &gen]() { return ck::type_convert<T>(std::round(dis(gen))); }); first, last, [&dis, &gen]() { return ck::type_convert<T>(std::round(dis(gen))); });
} }
template <typename ForwardRange>
auto operator()(ForwardRange&& range) const
-> std::void_t<decltype(std::declval<const FillUniformDistributionIntegerValue&>()(
std::begin(std::forward<ForwardRange>(range)),
std::end(std::forward<ForwardRange>(range))))>
{
(*this)(std::begin(std::forward<ForwardRange>(range)),
std::end(std::forward<ForwardRange>(range)));
}
}; };
template <typename T> template <typename T>
......
...@@ -96,10 +96,9 @@ struct ReductionHost ...@@ -96,10 +96,9 @@ struct ReductionHost
static constexpr int NumInvariantDim = Rank - NumReduceDim; static constexpr int NumInvariantDim = Rank - NumReduceDim;
std::vector<size_t> outStrides; std::vector<size_t> outStrides;
std::vector<int> invariantDims;
std::vector<int> reduceDims;
IndexDataType divider; IndexDataType divider;
std::array<size_t, NumReduceDim> reduceLengths; std::array<size_t, NumReduceDim> reduceLengths;
std::array<size_t, NumReduceDim> reduceStrides; std::array<size_t, NumReduceDim> reduceStrides;
std::array<size_t, NumInvariantDim> invariantLengths; std::array<size_t, NumInvariantDim> invariantLengths;
...@@ -110,15 +109,12 @@ struct ReductionHost ...@@ -110,15 +109,12 @@ struct ReductionHost
ReductionHost(HostTensorDescriptor& inDesc, ReductionHost(HostTensorDescriptor& inDesc,
HostTensorDescriptor& outDesc, HostTensorDescriptor& outDesc,
const std::vector<int>& invariantDims_, const std::array<int, NumInvariantDim> invariantDims,
const std::vector<int>& reduceDims_) const std::array<int, NumReduceDim> reduceDims)
{ {
// this->outLengths = to_int_vector(outDesc.GetLengths()); // this->outLengths = to_int_vector(outDesc.GetLengths());
this->outStrides = outDesc.GetStrides(); this->outStrides = outDesc.GetStrides();
this->invariantDims = invariantDims_;
this->reduceDims = reduceDims_;
int product = 1; int product = 1;
for(int i = 0; i < NumReduceDim; i++) for(int i = 0; i < NumReduceDim; i++)
......
...@@ -14,6 +14,9 @@ ...@@ -14,6 +14,9 @@
#include "ck/utility/data_type.hpp" #include "ck/utility/data_type.hpp"
#include "ck/utility/span.hpp" #include "ck/utility/span.hpp"
#include "ck/library/utility/algorithm.hpp"
#include "ck/library/utility/ranges.hpp"
template <typename Range> template <typename Range>
std::ostream& LogRange(std::ostream& os, Range&& range, std::string delim) std::ostream& LogRange(std::ostream& os, Range&& range, std::string delim)
{ {
...@@ -84,10 +87,10 @@ struct HostTensorDescriptor ...@@ -84,10 +87,10 @@ struct HostTensorDescriptor
this->CalculateStrides(); this->CalculateStrides();
} }
template <typename Range, template <typename Lengths,
typename = std::enable_if_t< typename = std::enable_if_t<
std::is_convertible_v<decltype(*std::begin(std::declval<Range>())), std::size_t>>> std::is_convertible_v<ck::ranges::range_value_t<Lengths>, std::size_t>>>
HostTensorDescriptor(const Range& lens) : mLens(lens.begin(), lens.end()) HostTensorDescriptor(const Lengths& lens) : mLens(lens.begin(), lens.end())
{ {
this->CalculateStrides(); this->CalculateStrides();
} }
...@@ -102,13 +105,12 @@ struct HostTensorDescriptor ...@@ -102,13 +105,12 @@ struct HostTensorDescriptor
{ {
} }
template < template <typename Lengths,
typename Range1, typename Strides,
typename Range2, typename = std::enable_if_t<
typename = std::enable_if_t< std::is_convertible_v<ck::ranges::range_value_t<Lengths>, std::size_t> &&
std::is_convertible_v<decltype(*std::begin(std::declval<Range1>())), std::size_t> && std::is_convertible_v<ck::ranges::range_value_t<Strides>, std::size_t>>>
std::is_convertible_v<decltype(*std::begin(std::declval<Range2>())), std::size_t>>> HostTensorDescriptor(const Lengths& lens, const Strides& strides)
HostTensorDescriptor(const Range1& lens, const Range2& strides)
: mLens(lens.begin(), lens.end()), mStrides(strides.begin(), strides.end()) : mLens(lens.begin(), lens.end()), mStrides(strides.begin(), strides.end())
{ {
} }
...@@ -244,14 +246,20 @@ struct Tensor ...@@ -244,14 +246,20 @@ struct Tensor
{ {
} }
template <typename X> template <typename X, typename Y>
Tensor(std::vector<X> lens) : mDesc(lens), mData(mDesc.GetElementSpaceSize()) Tensor(std::initializer_list<X> lens, std::initializer_list<Y> strides)
: mDesc(lens, strides), mData(mDesc.GetElementSpaceSize())
{ {
} }
template <typename X, typename Y> template <typename Lengths>
Tensor(std::vector<X> lens, std::vector<Y> strides) Tensor(const Lengths& lens) : mDesc(lens), mData(mDesc.GetElementSpaceSize())
: mDesc(lens, strides), mData(mDesc.GetElementSpaceSize()) {
}
template <typename Lengths, typename Strides>
Tensor(const Lengths& lens, const Strides& strides)
: mDesc(lens, strides), mData(GetElementSpaceSize())
{ {
} }
...@@ -261,10 +269,10 @@ struct Tensor ...@@ -261,10 +269,10 @@ struct Tensor
Tensor<OutT> CopyAsType() const Tensor<OutT> CopyAsType() const
{ {
Tensor<OutT> ret(mDesc); Tensor<OutT> ret(mDesc);
for(size_t i = 0; i < mData.size(); i++)
{ ck::ranges::transform(
ret.mData[i] = ck::type_convert<OutT>(mData[i]); mData, ret.mData.begin(), [](auto value) { return ck::type_convert<OutT>(value); });
}
return ret; return ret;
} }
...@@ -294,13 +302,7 @@ struct Tensor ...@@ -294,13 +302,7 @@ struct Tensor
std::size_t GetElementSpaceSizeInBytes() const { return sizeof(T) * GetElementSpaceSize(); } std::size_t GetElementSpaceSizeInBytes() const { return sizeof(T) * GetElementSpaceSize(); }
void SetZero() void SetZero() { ck::ranges::fill<T>(mData, 0); }
{
for(auto& v : mData)
{
v = T{0};
}
}
template <typename F> template <typename F>
void ForEach_impl(F&& f, std::vector<size_t>& idx, size_t rank) void ForEach_impl(F&& f, std::vector<size_t>& idx, size_t rank)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment