"vscode:/vscode.git/clone" did not exist on "2fa3fcfda5bb1aa1e8efc4d9cf90951e8055375e"
Commit 06d2c7b1 authored by Jing Zhang's avatar Jing Zhang Committed by root
Browse files

clean

parents b27909a0 3b18f1e3
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
......
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
......
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
......
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
......
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
......
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include "host_tensor.hpp"
#include "conv_common.hpp"
template <typename TIn,
typename TWei,
typename TOut,
typename ConvStrides,
typename ConvDilations,
typename InLeftPads,
typename InRightPads>
void host_conv_nchw_kcyx_nkhw(const Tensor<TIn>& in,
const Tensor<TWei>& wei,
Tensor<TOut>& out,
const ConvStrides& conv_strides,
const ConvDilations& conv_dilations,
const InLeftPads& in_left_pads,
const InRightPads&)
{
constexpr auto I0 = ck::Number<0>{};
constexpr auto I1 = ck::Number<1>{};
auto f_nchw = [&](auto n, auto k, auto ho, auto wo) {
float v = 0;
for(int c = 0; c < wei.mDesc.GetLengths()[1]; ++c)
{
for(int y = 0; y < wei.mDesc.GetLengths()[2]; ++y)
{
int hi = ho * conv_strides[I0] + y * conv_dilations[I0] - in_left_pads[I0];
for(int x = 0; x < wei.mDesc.GetLengths()[3]; ++x)
{
int wi = wo * conv_strides[I1] + x * conv_dilations[I1] - in_left_pads[I1];
if(hi >= 0 && hi < in.mDesc.GetLengths()[2] && wi >= 0 &&
wi < in.mDesc.GetLengths()[3])
{
v += ck::type_convert<float>(in(n, c, hi, wi)) *
ck::type_convert<float>(wei(k, c, y, x));
}
}
}
}
out(n, k, ho, wo) = ck::type_convert<TOut>(v);
};
make_ParallelTensorFunctor(f_nchw,
out.mDesc.GetLengths()[0],
out.mDesc.GetLengths()[1],
out.mDesc.GetLengths()[2],
out.mDesc.GetLengths()[3])(std::thread::hardware_concurrency());
}
template <typename TIn,
typename TWei,
typename TOut,
typename ConvStrides,
typename ConvDilations,
typename InLeftPads,
typename InRightPads>
void host_conv3d_ndhwc_kzyxc_ndhwk(const Tensor<TIn>& in,
const Tensor<TWei>& wei,
Tensor<TOut>& out,
const ConvStrides& conv_strides,
const ConvDilations& conv_dilations,
const InLeftPads& in_left_pads,
const InRightPads&)
{
using namespace ck;
constexpr auto I0 = Number<0>{};
constexpr auto I1 = Number<1>{};
constexpr auto I2 = Number<2>{};
const auto Di = in.mDesc.GetLengths()[1];
const auto Hi = in.mDesc.GetLengths()[2];
const auto Wi = in.mDesc.GetLengths()[3];
const auto Z = wei.mDesc.GetLengths()[1];
const auto Y = wei.mDesc.GetLengths()[2];
const auto X = wei.mDesc.GetLengths()[3];
const auto C = wei.mDesc.GetLengths()[4];
auto f_ndhwc = [&](auto n, auto do_tmp, auto ho_tmp, auto wo_tmp, auto k) {
// do__ must be converted to signed integer, otherwise zmin might be wrong in cases
// negative values.
const int do_ = static_cast<int>(do_tmp);
const int ho = static_cast<int>(ho_tmp);
const int wo = static_cast<int>(wo_tmp);
const int zmin =
std::max(0,
(in_left_pads[I0] - do_ * conv_strides[I0] + conv_dilations[I0] - 1) /
conv_dilations[I0]);
const int ymin =
std::max(0,
(in_left_pads[I1] - ho * conv_strides[I1] + conv_dilations[I1] - 1) /
conv_dilations[I1]);
const int xmin =
std::max(0,
(in_left_pads[I2] - wo * conv_strides[I2] + conv_dilations[I2] - 1) /
conv_dilations[I2]);
const int zmax =
std::min(Z, (in_left_pads[I0] - do_ * conv_strides[I0] + Di) / conv_dilations[I0]);
const int ymax =
std::min(Y, (in_left_pads[I1] - ho * conv_strides[I1] + Hi) / conv_dilations[I1]);
const int xmax =
std::min(X, (in_left_pads[I2] - wo * conv_strides[I2] + Wi) / conv_dilations[I2]);
const int di_min = do_ * conv_strides[I0] + zmin * conv_dilations[I0] - in_left_pads[I0];
const int hi_min = ho * conv_strides[I1] + ymin * conv_dilations[I1] - in_left_pads[I1];
const int wi_min = wo * conv_strides[I2] + xmin * conv_dilations[I2] - in_left_pads[I2];
double v = 0;
const TIn* in_n = in.mData.data() + n * Di * Hi * Wi * C;
const TWei* wei_k = wei.mData.data() + k * Z * Y * X * C;
int di = di_min;
for(int z = zmin; z < zmax; ++z, di += conv_dilations[I0])
{
const TIn* in_n_di = in_n + di * Hi * Wi * C;
const TWei* wei_k_z = wei_k + z * Y * X * C;
int hi = hi_min;
for(int y = ymin; y < ymax; ++y, hi += conv_dilations[I1])
{
const TIn* in_n_di_hi = in_n_di + hi * Wi * C;
const TWei* wei_k_z_y = wei_k_z + y * X * C;
int wi = wi_min;
for(int x = xmin; x < xmax; ++x, wi += conv_dilations[I2])
{
const TIn* in_n_di_hi_wi = in_n_di_hi + wi * C;
const TWei* wei_k_z_y_x = wei_k_z_y + x * C;
for(int c = 0; c < C; ++c)
{
v += static_cast<const double>(in_n_di_hi_wi[c]) *
static_cast<const double>(wei_k_z_y_x[c]);
}
}
}
}
out(n, do_, ho, wo, k) = v;
};
make_ParallelTensorFunctor(f_ndhwc,
out.mDesc.GetLengths()[0],
out.mDesc.GetLengths()[1],
out.mDesc.GetLengths()[2],
out.mDesc.GetLengths()[3],
out.mDesc.GetLengths()[4])(std::thread::hardware_concurrency() - 4);
}
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
......
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
......@@ -13,6 +13,7 @@
#include "ck/utility/data_type.hpp"
#include "ck/utility/span.hpp"
#include "ck/utility/type_convert.hpp"
#include "ck/library/utility/algorithm.hpp"
#include "ck/library/utility/ranges.hpp"
......@@ -411,6 +412,12 @@ struct Tensor
}
}
template <typename... Is>
std::size_t GetOffsetFromMultiIndex(Is... is) const
{
return mDesc.GetOffsetFromMultiIndex(is...);
}
template <typename... Is>
T& operator()(Is... is)
{
......
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
......
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
......
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
......
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
......
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include <cstdlib>
#include <iostream>
#include <limits>
#include <memory>
#include <stdexcept>
#include <tuple>
#include <utility>
#include <vector>
#include "ck/utility/functional2.hpp"
#include "ck/tensor_operation/gpu/device/device_base.hpp"
#include "ck/library/utility/check_err.hpp"
#include "ck/library/utility/device_memory.hpp"
#include "ck/library/utility/host_tensor.hpp"
namespace ck {
namespace utils {
struct ProfileBestConfig
{
std::string best_op_name;
float best_avg_time = std::numeric_limits<float>::max();
float best_tflops = std::numeric_limits<float>::max();
float best_gb_per_sec = std::numeric_limits<float>::max();
};
/**
* @brief This class describes an operation instance(s).
*
* Op instance defines a particular specializations of operator
* template. Thanks to this specific input/output data types, data
* layouts and modifying elementwise operations it is able to create
* it's input/output tensors, provide pointers to instances which
* can execute it and all operation specific parameters.
*/
template <typename OutDataType, typename... InArgTypes>
class OpInstance
{
public:
template <typename T>
using TensorPtr = std::unique_ptr<Tensor<T>>;
using InTensorsTuple = std::tuple<TensorPtr<InArgTypes>...>;
using DeviceMemPtr = std::unique_ptr<DeviceMem>;
using DeviceBuffers = std::vector<DeviceMemPtr>;
OpInstance() = default;
OpInstance(const OpInstance&) = default;
OpInstance& operator=(const OpInstance&) = default;
virtual ~OpInstance(){};
virtual InTensorsTuple GetInputTensors() const = 0;
virtual TensorPtr<OutDataType> GetOutputTensor() const = 0;
virtual std::unique_ptr<tensor_operation::device::BaseInvoker>
MakeInvokerPointer(tensor_operation::device::BaseOperator*) const = 0;
virtual std::unique_ptr<tensor_operation::device::BaseArgument>
MakeArgumentPointer(tensor_operation::device::BaseOperator*,
const DeviceBuffers&,
const DeviceMemPtr&) const = 0;
virtual std::size_t GetFlops() const = 0;
virtual std::size_t GetBtype() const = 0;
};
/**
* @brief A generic operation instance run engine.
*/
template <typename OutDataType, typename... InArgTypes>
class OpInstanceRunEngine
{
public:
using OpInstanceT = OpInstance<InArgTypes..., OutDataType>;
template <typename T>
using TensorPtr = std::unique_ptr<Tensor<T>>;
using DeviceMemPtr = std::unique_ptr<DeviceMem>;
using InTensorsTuple = std::tuple<TensorPtr<InArgTypes>...>;
using DeviceBuffers = std::vector<DeviceMemPtr>;
using InArgsTypesTuple = std::tuple<InArgTypes...>;
OpInstanceRunEngine() = delete;
template <typename ReferenceOp = std::function<void()>>
OpInstanceRunEngine(const OpInstanceT& op_instance,
const ReferenceOp& reference_op = ReferenceOp{},
bool do_verification = true)
: op_instance_{op_instance}
{
in_tensors_ = op_instance_.GetInputTensors();
out_tensor_ = op_instance_.GetOutputTensor();
if constexpr(std::is_invocable_v<ReferenceOp,
const Tensor<InArgTypes>&...,
Tensor<OutDataType>&>)
{
if(do_verification)
{
ref_output_ = op_instance_.GetOutputTensor();
CallRefOpUnpackArgs(reference_op, std::make_index_sequence<kNInArgs_>{});
}
}
AllocateDeviceInputTensors(std::make_index_sequence<kNInArgs_>{});
out_device_buffer_ = std::make_unique<DeviceMem>(sizeof(OutDataType) *
out_tensor_->mDesc.GetElementSpaceSize());
out_device_buffer_->SetZero();
}
virtual ~OpInstanceRunEngine(){};
template <typename OpInstancePtr>
bool Test(const std::vector<OpInstancePtr>& op_ptrs)
{
bool res{true};
for(auto& op_ptr : op_ptrs)
{
auto invoker = op_instance_.MakeInvokerPointer(op_ptr.get());
auto argument = op_instance_.MakeArgumentPointer(
op_ptr.get(), in_device_buffers_, out_device_buffer_);
if(op_ptr->IsSupportedArgument(argument.get()))
{
std::cout << "Testing instance: " << op_ptr->GetTypeString() << std::endl;
invoker->Run(argument.get());
out_device_buffer_->FromDevice(out_tensor_->mData.data());
if(!ref_output_)
{
throw std::runtime_error(
"OpInstanceRunEngine::Test: Reference value not availabe."
" You have to provide reference function.");
}
// TODO: enable flexible use of custom check_error functions
bool inst_res = CheckErr(out_tensor_->mData, ref_output_->mData);
std::cout << (inst_res ? "SUCCESS" : "FAILURE") << std::endl;
res = res && inst_res;
out_device_buffer_->SetZero();
}
else
{
std::cout << "Given conv problem is not supported by instance: \n\t>>>>"
<< op_ptr->GetTypeString() << std::endl;
}
}
return res;
}
template <typename OpInstancePtr>
ProfileBestConfig Profile(const std::vector<OpInstancePtr>& op_ptrs,
bool time_kernel = false,
bool do_verification = false,
bool do_log = false)
{
ProfileBestConfig best_config;
for(auto& op_ptr : op_ptrs)
{
auto invoker = op_instance_.MakeInvokerPointer(op_ptr.get());
auto argument = op_instance_.MakeArgumentPointer(
op_ptr.get(), in_device_buffers_, out_device_buffer_);
if(op_ptr->IsSupportedArgument(argument.get()))
{
std::string op_name = op_ptr->GetTypeString();
float avg_time = invoker->Run(argument.get(), StreamConfig{nullptr, time_kernel});
std::size_t flops = op_instance_.GetFlops();
std::size_t num_btype = op_instance_.GetBtype();
float tflops = static_cast<float>(flops) / 1.E9 / avg_time;
float gb_per_sec = num_btype / 1.E6 / avg_time;
std::cout << "Perf: " << avg_time << " ms, " << tflops << " TFlops, " << gb_per_sec
<< " GB/s, " << op_name << std::endl;
if(avg_time < best_config.best_avg_time)
{
best_config.best_op_name = op_name;
best_config.best_tflops = tflops;
best_config.best_gb_per_sec = gb_per_sec;
best_config.best_avg_time = avg_time;
}
if(do_verification)
{
out_device_buffer_->FromDevice(out_tensor_->mData.data());
if(!ref_output_)
{
throw std::runtime_error(
"OpInstanceRunEngine::Profile: Reference value not availabe."
" You have to provide reference function.");
}
// TODO: enable flexible use of custom check_error functions
CheckErr(out_tensor_->mData, ref_output_->mData);
if(do_log) {}
}
out_device_buffer_->SetZero();
}
}
return best_config;
}
void SetAtol(double a) { atol_ = a; }
void SetRtol(double r) { rtol_ = r; }
private:
template <typename F, std::size_t... Is>
void CallRefOpUnpackArgs(const F& f, std::index_sequence<Is...>) const
{
f(*std::get<Is>(in_tensors_)..., *ref_output_);
}
template <std::size_t... Is>
void AllocateDeviceInputTensors(std::index_sequence<Is...>)
{
(AllocateDeviceInputTensorsImpl<Is>(), ...);
}
template <std::size_t Index>
void AllocateDeviceInputTensorsImpl()
{
const auto& ts = std::get<Index>(in_tensors_);
in_device_buffers_
.emplace_back(
std::make_unique<DeviceMem>(sizeof(std::tuple_element_t<Index, InArgsTypesTuple>) *
ts->mDesc.GetElementSpaceSize()))
->ToDevice(ts->mData.data());
}
static constexpr std::size_t kNInArgs_ = std::tuple_size_v<InTensorsTuple>;
const OpInstanceT& op_instance_;
double rtol_{1e-5};
double atol_{1e-8};
InTensorsTuple in_tensors_;
TensorPtr<OutDataType> out_tensor_;
TensorPtr<OutDataType> ref_output_;
DeviceBuffers in_device_buffers_;
DeviceMemPtr out_device_buffer_;
template <typename T>
bool CheckErr(const std::vector<T>& dev_out, const std::vector<T>& ref_out) const
{
return ck::utils::check_err(dev_out, ref_out, "Error: incorrect results!", rtol_, atol_);
}
};
} // namespace utils
} // namespace ck
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
......
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include <cstdlib>
......
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include <cstdlib>
......
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include <cstdlib>
......
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include <cstdlib>
......
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include <cstdlib>
......
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include <cstdlib>
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment