Commit d3405258 authored by Chao Liu's avatar Chao Liu
Browse files

prototype dynamic descriptor

parent 834eb24c
...@@ -96,31 +96,26 @@ struct DummyStaticTransform ...@@ -96,31 +96,26 @@ struct DummyStaticTransform
auto coord = typename TensorCoordinate<decltype(in_gemmk_gemmn_global_desc)>::type(k0, n0); auto coord = typename TensorCoordinate<decltype(in_gemmk_gemmn_global_desc)>::type(k0, n0);
if(get_block_1d_id() < coord.GetOffset()) #pragma unroll 1
for(index_t k = 0; k < 100; ++k)
{ {
for(index_t k = 0; k < 1; ++k) coord += Array<index_t, 2>{8, 0};
{
for(index_t n = 0; n < 4; ++n) Float value = 1;
{ transfer_data<Float,
auto tmp = coord + Array<index_t, 2>{k, n}; 1,
AddressSpace::Vgpr,
Float value = 1; AddressSpace::Global,
transfer_data<Float, InMemoryDataOperation::Set,
1, 1,
AddressSpace::Vgpr, 1>(&value,
AddressSpace::Global, 0,
InMemoryDataOperation::Set, true,
1, 1,
1>(&value, p_in_global,
0, coord.GetOffset(),
true, coord.IsOffsetValidAssumingUpperIndexIsValid(),
1, in_gemmk_gemmn_global_desc.GetElementSpace());
p_in_global,
tmp.GetOffset(),
tmp.IsOffsetValidAssumingUpperIndexIsValid(),
in_gemmk_gemmn_global_desc.GetElementSpace());
}
}
} }
} }
}; };
......
#ifndef CK_DYNAMIC_MULTI_INDEX_TRANSFORM_HPP
#define CK_DYNAMIC_MULTI_INDEX_TRANSFORM_HPP
#include "common_header.hpp"
namespace ck {
struct DynamicPassThrough
{
using LowerIndex = MultiIndex<1>;
using UpperIndex = MultiIndex<1>;
index_t low_length_;
__host__ __device__ constexpr DynamicPassThrough(index_t low_length) : low_length_(low_length)
{
}
__host__ __device__ static constexpr auto GetNumOfLowerDimension() { return Number<1>{}; }
__host__ __device__ static constexpr auto GetNumOfUpperDimension() { return Number<1>{}; }
__host__ __device__ static constexpr auto GetUpperLengths() { return Sequence<Length>{}; }
__host__ __device__ static constexpr auto CalculateLowerIndex(const UpperIndex& idx_up)
{
return idx_up;
}
__host__ __device__ static constexpr auto
CalculateLowerIndexDiff(const UpperIndex& idx_up_diff,
const UpperIndex& /* idx_up_old */,
const LowerIndex& /* idx_low_old */)
{
return idx_up_diff;
}
__host__ __device__ static constexpr bool IsLinearTransform() { return true; }
__host__ __device__ static constexpr bool IsValidUpperIndexAlwaysMappedToValidLowerIndex()
{
return true;
}
};
template <index_t NDimLow>
struct DynamicMerge
{
static constexpr index_t ndim_low_ = NDimLow static constexpr index_t ndim_up_ = 1;
using LowerIndex = MultiIndex<ndim_low_>;
using UpperIndex = MultiIndex<ndum_up_>;
Array<index_t, NDimLow> low_lengths_;
index_t up_length_;
__host__ __device__ static constexpr auto GetNumOfLowerDimension()
{
return Number<ndim_low_>{};
}
__host__ __device__ static constexpr auto GetNumOfUpperDimension()
{
return Number<ndim_up_>{};
}
__host__ __device__ static constexpr auto GetUpperLengths()
{
return Array<index_t, 1> up_length_;
}
// emulate constexpr lambda
template <typename PseudoLowStrides>
struct lambda_CalculateLowerIndex
{
index_t& itmp;
LowerIndex& idx_low;
__host__ __device__ explicit constexpr lambda_CalculateLowerIndex(index_t& itmp_,
LowerIndex& idx_low_)
: itmp(itmp_), idx_low(idx_low_)
{
}
template <typename IDim>
__host__ __device__ constexpr void operator()(IDim idim) const
{
constexpr index_t stride = PseudoLowStrides::At(idim);
idx_low(idim) = itmp / stride;
itmp -= idx_low[idim] * stride;
}
};
__host__ __device__ static constexpr auto CalculateLowerIndex(const UpperIndex& idx_up)
{
LowerIndex idx_low;
index_t itmp = idx_up[0];
constexpr auto pseudo_low_strides =
reverse_inclusive_scan_sequence(
LowerLengths::PopFront(), math::multiplies<index_t>{}, Number<1>{})
.PushBack(Number<1>{});
static_for<0, nDimLow - 1, 1>{}(
lambda_CalculateLowerIndex<decltype(pseudo_low_strides)>(itmp, idx_low));
idx_low(nDimLow - 1) = itmp / pseudo_low_strides[nDimLow - 1];
return idx_low;
}
// idx_low_diff depends on idx_low_old, so idx_low need to be up-to-date
// If idx_up_diff is known at compile-time, many calculations can be optimized
// away by compiler
// This function assume idx_low_old is not out-of-bound
__host__ __device__ static constexpr auto
CalculateLowerIndexDiff(const UpperIndex& idx_up_diff,
const UpperIndex& /* idx_up_old */,
const LowerIndex& idx_low_old)
{
if(idx_up_diff[0] == 0)
{
return make_zero_array<index_t, nDimLow>();
}
else
{
// CalculateLowerIndex(idx_up_diff) has multiple integer divisions.
// If idx_up_diff is known at compile-time, the calculation can
// be done at compile-time. However, if idx_up_diff is only known
// at run-time, then the calculation will also be computed at
// run-time, and can be very expensive.
LowerIndex idx_low_diff_tmp = CalculateLowerIndex(idx_up_diff);
// find out the last low dimension that changed
index_t last_changed_low_dim = 0;
static_for<0, nDimLow, 1>{}([&](auto i) {
if(idx_low_diff_tmp[i] != 0)
{
last_changed_low_dim = i;
}
});
LowerIndex idx_low_new = idx_low_old + idx_low_diff_tmp;
if(idx_up_diff[0] > 0)
{
// do carry check on each low dimension in reversed order
// starting from the first digit that changed
// don't check the highest dimension
bool carry = false;
static_for<nDimLow - 1, 0, -1>{}([&](auto i) {
if(i <= last_changed_low_dim)
{
if(carry)
{
++idx_low_new(i);
}
carry = false;
if(idx_low_new[i] >= LowerLengths::At(i))
{
idx_low_new(i) -= LowerLengths::At(i);
carry = true;
}
}
});
// highest dimension, no out-of-bound check
if(carry)
{
++idx_low_new(0);
}
}
else
{
// do borrow check on each low dimension in reversed order
// starting from the first digit that changed
// don't check the highest dimension
bool borrow = false;
static_for<nDimLow - 1, 0, -1>{}([&](auto i) {
if(i <= last_changed_low_dim)
{
if(borrow)
{
--idx_low_new(i);
}
borrow = false;
if(idx_low_new[i] < 0)
{
idx_low_new(i) += LowerLengths::At(i);
borrow = true;
}
}
});
// highest dimension, no out-of-bound check
if(borrow)
{
--idx_low_new(0);
}
}
return idx_low_new - idx_low_old;
}
}
__host__ __device__ static constexpr bool IsLinearTransform() { return false; }
__host__ __device__ static constexpr bool IsValidUpperIndexAlwaysMappedToValidLowerIndex()
{
return true;
}
};
} // namespace ck
#endif
#ifndef CK_DYNAMIC_TENSOR_DESCRIPTOR_HPP
#define CK_DYNAMIC_TENSOR_DESCRIPTOR_HPP
#include "common_header.hpp"
namespace ck {
template <index_t NDim>
struct DynamicNativeTensorDescriptor
{
using Index = MultiIndex<NDim>;
Array<index_t, NDim> lengths_;
Array<index_t, NDim> strides_;
index_t element_size_;
index_t element_space_;
template <typename Lengths, typename Strides>
__host__ __device__ constexpr DynamicNativeTensorDescriptor(const Lengths& lengths,
const Strides& strides)
: lengths_(lengths), strides_(strides)
{
element_size_ = 1;
for(index_t i = 0; i < NDim; ++i)
{
element_size_ *= lengths_[i];
}
element_space_ = 1;
for(index_t i = 0; i < NDim; ++i)
{
element_space_ += (lengths_[i] - 1) * strides_[i];
}
}
__host__ __device__ static constexpr auto GetNumOfDimension() { return NDim; }
__host__ __device__ constexpr auto GetLength(const index_t& i) const { return lengths_[i]; }
__host__ __device__ constexpr auto GetStride(const index_t& i) const { return strides_[i]; }
__host__ __device__ constexpr auto GetLengths() const { return lengths_; }
__host__ __device__ constexpr auto GetStrides() const { return strides_; }
__host__ __device__ constexpr auto GetElementSize() const { return element_size_; }
__host__ __device__ constexpr auto GetElementSpace() const { return element_space_; }
__host__ __device__ constexpr auto CalculateOffset(const Index& idx) const
{
index_t offset = 0;
#pragma unroll
for(index_t i = 0; i < NDim; ++i)
{
offset += idx[i] * strides_[i];
}
return offset;
}
__host__ __device__ constexpr auto CalculateOffsetDiff(const Index& idx_diff) const
{
index_t offset_diff = 0;
#pragma unroll
for(index_t i = 0; i < NDim; ++i)
{
offset_diff += idx_diff[i] * strides_[i];
}
return offset_diff;
}
__host__ __device__ constexpr bool IsUpperIndexValid(const Index& idx) const
{
bool flag = true;
#pragma unroll
for(index_t i = 0; i < NDim; ++i)
{
flag = flag && idx[i] >= 0 && idx[i] < lengths_[i];
}
return flag;
}
};
#if 0
// Tensor descriptor for "transformed tensor"
template <typename LowTensorDescriptor,
typename Transforms, // Tuple<DynamicMultIndexTransforms,...>
typename LowDimensions, // Tuple<Sequence<...>,...>
typename UpDimensions> // Tuple<Sequence<...>,...>
struct DynamicTransformedTensorDescriptor
{
using Type = DynamicTransformedTensorDescriptor;
__host__ __device__ static constexpr auto GetNumOfLowerDimension()
{
// Here, we assume all lower-dimensions are active
// TODO: sanity-check all lower-dimension are indeed active
using duplicated_low_active_dims =
decltype(unpack(lambda_merge_sequences{}, LowDimensions{}));
using low_active_dims = typename sequence_unique_sort<duplicated_low_active_dims,
math::less<index_t>,
math::equal<index_t>>::type;
return low_active_dims::Size();
}
__host__ __device__ static constexpr auto GetNumOfUpperDimension()
{
using duplicated_up_active_dims =
decltype(unpack(lambda_merge_sequences{}, UpDimensions{}));
using up_active_dims = typename sequence_unique_sort<duplicated_up_active_dims,
math::less<index_t>,
math::equal<index_t>>::type;
return up_active_dims::Size();
}
static constexpr index_t ndim_up_ = GetNumOfUpperDimension();
static constexpr index_t ndim_low_ = GetNumOfLowerDimension();
static constexpr index_t num_transform_ = Transforms::Size();
using UpperIndex = MultiIndex<ndim_up_>;
using LowerIndex = MultiIndex<ndim_low_>;
const LowTensorDescriptor low_tensor_desc_;
const Transforms transforms_;
const LowDimensions low_dims_;
const UpDimensions up_dims_;
__host__ __device__ constexpr TransformedTensorDescriptor(const LowTensorDescriptor& low_tensor_desc,
const Transforms& transforms)
: low_tensor_desc_(low_tensor_desc),
transforms_(transforms)
{
}
__host__ __device__ static constexpr auto GetNumOfDimension()
{
return GetNumOfUpperDimension();
}
__host__ __device__ constexpr auto GetLowerTensorDescriptor() const
{
return low_dims_;
}
__host__ __device__ constexpr auto GetUpperLengths() cons
{
}
__host__ __device__ constexpr auto GetLengths() const { return GetUpperLengths(); }
__host__ __device__ constexpr auto GetLength(index_t i) const
{
return GetLengths()[i];
}
__host__ __device__ constexpr auto GetElementSize() const
{
index_t element_size = 1;
for(index_t i = 0; i < ndim_up_; ++i)
{
element_size *= GetLength(i);
}
return element_size;
}
__host__ __device__ constexpr auto GetElementSpace() const
{
return lower_tensor_desc_.GetElementSpace();
}
// TODO: right now return value is not constexpr because use of non-constexpr lambda
__host__ __device__ constexpr LowerIndex CalculateLowerIndex(const UpperIndex& idx_up) const
{
LowerIndex idx_low;
static_for<0, num_transform_, 1>{}([&](auto itran) {
constexpr auto tran = Transforms{}.At(itran);
const auto idx_up_part = pick_array_element(idx_up, UpDimensions{}.At(itran));
auto idx_low_part = pick_array_element(idx_low, LowDimensions{}.At(itran));
// this assume each lower (single) index is only assocaited with one transformation,
// which is required for index transformation, and has been checked during constructor
// of TransformedTensorDescriptor
idx_low_part = tran.CalculateLowerIndex(to_array(idx_up_part));
});
return idx_low;
}
// TODO: right now return value is not constexpr because use of non-constepxr lambda
__host__ __device__ static constexpr LowerIndex CalculateLowerIndexDiff(
const UpperIndex& idx_up_diff, const UpperIndex& idx_up_old, const LowerIndex& idx_low_old)
{
LowerIndex idx_low_diff;
static_for<0, nTransform, 1>{}([&](auto itran) {
constexpr auto tran = Transforms{}.At(itran);
const auto idx_up_diff_part =
pick_array_element(idx_up_diff, UpDimensions{}.At(itran));
const auto idx_up_old_part = pick_array_element(idx_up_old, UpDimensions{}.At(itran));
const auto idx_low_old_part =
pick_array_element(idx_low_old, LowDimensions{}.At(itran));
auto idx_low_diff_part = pick_array_element(idx_low_diff, LowDimensions{}.At(itran));
// this assume each lower (single) index is associated with only one transformation,
// which is required for index transformation, and has been checked during constructor
// of TransformedTensorDescriptor
idx_low_diff_part = tran.CalculateLowerIndexDiff(
to_array(idx_up_diff_part), to_array(idx_up_old_part), to_array(idx_low_old_part));
});
return idx_low_diff;
}
__host__ __device__ static constexpr index_t CalculateOffset(const UpperIndex& idx_up)
{
return GetLowerTensorDescriptor().CalculateOffset(CalculateLowerIndex(idx_up));
}
};
#endif
} // namespace ck
#endif
#ifndef CK_DYNAMIC_TENSOR_DESCRIPTOR_HELPER_HPP
#define CK_DYNAMIC_TENSOR_DESCRIPTOR_HELPER_HPP
#include "common_header.hpp"
#include "dynamic_tensor_descriptor.hpp"
namespace ck {
template <typename Lengths, typename Strides>
__host__ __device__ constexpr auto make_dynamic_native_tensor_descriptor(const Lengths& lengths,
const Strides& strides)
{
static_assert(Lengths::GetSize() == Strides::GetSize(), "wrong! Size not the same");
return DynamicNativeTensorDescriptor<Lengths::GetSize()>(lengths, strides);
}
template <typename LowTensorDescriptor,
typename Transforms,
typename LowDimensions,
typename UpDimensions>
__host__ __device__ constexpr auto
transform_dynamic_tensor_descriptor(const LowTensorDescriptor& low_tensor_desc,
const Transforms& transforms,
LowDimensions,
UpDimensions)
{
return DynamicTransformedTensorDescriptor<LowTensorDescriptor,
Transforms,
LowDimensions,
UpDimensions>(low_tensor_desc, transforms);
}
} // namespace ck
#endif
#ifndef CK_AMD_LLVM_INTRINSIC_HPP
#define CK_AMD_LLVM_INTRINSIC_HPP
#include "float_type.hpp"
namespace ck {
__device__ int32_t __llvm_amdgcn_readfirstlane_i32(int32_t i) __asm("llvm.amdgcn.readfirstlane");
} // namespace ck
#endif
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
#if CK_USE_AMD_INLINE_ASM #if CK_USE_AMD_INLINE_ASM
#include "amd_inline_asm.hpp" #include "amd_inline_asm.hpp"
#include "amd_llvm_intrinsic.hpp"
#endif #endif
#if CK_USE_AMD_XDLOPS #if CK_USE_AMD_XDLOPS
......
...@@ -172,7 +172,7 @@ void device_convolution_implicit_gemm_v4r1_nchw_kcyx_nkhw(InDesc, ...@@ -172,7 +172,7 @@ void device_convolution_implicit_gemm_v4r1_nchw_kcyx_nkhw(InDesc,
constexpr index_t WeiBlockCopySrcDataPerRead_E = 4; constexpr index_t WeiBlockCopySrcDataPerRead_E = 4;
constexpr index_t WeiBlockCopyDstDataPerWrite_K = 1; constexpr index_t WeiBlockCopyDstDataPerWrite_K = 1;
#elif 0 #elif 1
// cdata = 64, BlockSize = 256, 128x128x16 // cdata = 64, BlockSize = 256, 128x128x16
constexpr index_t BlockSize = 256; constexpr index_t BlockSize = 256;
...@@ -290,7 +290,7 @@ void device_convolution_implicit_gemm_v4r1_nchw_kcyx_nkhw(InDesc, ...@@ -290,7 +290,7 @@ void device_convolution_implicit_gemm_v4r1_nchw_kcyx_nkhw(InDesc,
constexpr index_t WeiBlockCopySrcDataPerRead_E = 2; constexpr index_t WeiBlockCopySrcDataPerRead_E = 2;
constexpr index_t WeiBlockCopyDstDataPerWrite_K = 1; constexpr index_t WeiBlockCopyDstDataPerWrite_K = 1;
#elif 1 #elif 0
// cdata = 64, BlockSize = 128, 64x128x8 // cdata = 64, BlockSize = 128, 64x128x8
constexpr index_t BlockSize = 128; constexpr index_t BlockSize = 128;
......
#include <unistd.h>
#include "device.hpp"
#include "host_tensor.hpp"
#include "gridwise_operation_wrapper.hpp"
#include "dummy_dynamic_transform.hpp"
template <class T,
class InDesc,
class WeiDesc,
class OutDesc,
class ConvStrides,
class ConvDilations,
class InLeftPads,
class InRightPads>
void device_dummy_dynamic_transform(InDesc,
const Tensor<T>& in_nchw,
WeiDesc,
const Tensor<T>& wei_kcyx,
OutDesc,
Tensor<T>& out_nkhw,
ConvStrides,
ConvDilations,
InLeftPads,
InRightPads,
ck::index_t nrepeat)
{
using namespace ck;
using TDevice = typename conditional<is_same<half_float::half, T>::value, half_t, T>::type;
const auto in_nchw_desc = make_dynamic_native_tensor_descriptor(to_array(InDesc::GetLengths()),
to_array(InDesc::GetStrides()));
const auto wei_kcyx_desc = make_dynamic_native_tensor_descriptor(
to_array(WeiDesc::GetLengths()), to_array(WeiDesc::GetStrides()));
const auto out_nkhw_desc = make_dynamic_native_tensor_descriptor(
to_array(OutDesc::GetLengths()), to_array(OutDesc::GetStrides()));
const auto conv_strides = to_array(ConvStrides{});
const auto conv_dilations = to_array(ConvDilations{});
const auto in_left_pads = to_array(InLeftPads{});
const auto in_right_pads = to_array(InRightPads{});
std::size_t data_sz = sizeof(T);
DeviceMem in_nchw_device_buf(data_sz * in_nchw.mDesc.GetElementSpace());
DeviceMem wei_kcyx_device_buf(data_sz * wei_kcyx.mDesc.GetElementSpace());
DeviceMem out_nkhw_device_buf(data_sz * out_nkhw.mDesc.GetElementSpace());
in_nchw_device_buf.ToDevice(in_nchw.mData.data());
wei_kcyx_device_buf.ToDevice(wei_kcyx.mData.data());
out_nkhw_device_buf.ToDevice(out_nkhw.mData.data());
constexpr index_t BlockSize = 256;
constexpr index_t GridSize = 1;
printf("%s: BlockSize %u, GridSize %u \n", __func__, BlockSize, GridSize);
using dummy_transform = DummyDynamicTransform<BlockSize>;
for(index_t i = 0; i < 5; ++i)
{
std::cout << "Start running " << nrepeat << " times..." << std::endl;
KernelTimer timer;
timer.Start();
for(index_t j = 0; j < nrepeat; ++j)
{
launch_kernel(run_gridwise_operation<dummy_transform,
index_t* const,
index_t* const,
float* const,
const DynamicNativeTensorDescriptor<4>,
const DynamicNativeTensorDescriptor<4>,
const DynamicNativeTensorDescriptor<4>,
const Array<index_t, 2>,
const Array<index_t, 2>,
const Array<index_t, 2>,
const Array<index_t, 2>,
index_t,
index_t,
index_t,
index_t>,
dim3(GridSize),
dim3(BlockSize),
0,
0,
static_cast<index_t*>(in_nchw_device_buf.GetDeviceBuffer()),
static_cast<index_t*>(wei_kcyx_device_buf.GetDeviceBuffer()),
static_cast<float*>(out_nkhw_device_buf.GetDeviceBuffer()),
wei_kcyx_desc,
in_nchw_desc,
out_nkhw_desc,
conv_strides,
conv_dilations,
in_left_pads,
in_right_pads,
10,
10,
10,
10);
}
}
out_nkhw_device_buf.FromDevice(out_nkhw.mData.data());
}
...@@ -12,17 +12,17 @@ template <class T, ...@@ -12,17 +12,17 @@ template <class T,
class ConvDilations, class ConvDilations,
class InLeftPads, class InLeftPads,
class InRightPads> class InRightPads>
void device_dummy_transform(InDesc, void device_dummy_static_transform(InDesc,
const Tensor<T>& in_nchw, const Tensor<T>& in_nchw,
WeiDesc, WeiDesc,
const Tensor<T>& wei_kcyx, const Tensor<T>& wei_kcyx,
OutDesc, OutDesc,
Tensor<T>& out_nkhw, Tensor<T>& out_nkhw,
ConvStrides, ConvStrides,
ConvDilations, ConvDilations,
InLeftPads, InLeftPads,
InRightPads, InRightPads,
ck::index_t nrepeat) ck::index_t nrepeat)
{ {
using namespace ck; using namespace ck;
......
...@@ -14,7 +14,8 @@ ...@@ -14,7 +14,8 @@
#include "device_tensor.hpp" #include "device_tensor.hpp"
#include "device_convolution_implicit_gemm_v4r1_nchw_kcyx_nkhw.hpp" #include "device_convolution_implicit_gemm_v4r1_nchw_kcyx_nkhw.hpp"
#include "device_convolution_implicit_gemm_v4r4_nchw_kcyx_nkhw.hpp" #include "device_convolution_implicit_gemm_v4r4_nchw_kcyx_nkhw.hpp"
#include "device_dummy_transform.hpp" #include "device_dummy_static_transform.hpp"
#include "device_dummy_dynamic_transform.hpp"
int main(int argc, char* argv[]) int main(int argc, char* argv[])
{ {
...@@ -200,7 +201,7 @@ int main(int argc, char* argv[]) ...@@ -200,7 +201,7 @@ int main(int argc, char* argv[])
using LeftPads = Sequence<0, 0>; using LeftPads = Sequence<0, 0>;
using RightPads = Sequence<0, 0>; using RightPads = Sequence<0, 0>;
#elif 0 #elif 1
// 3x3, 35x35, stride 2 // 3x3, 35x35, stride 2
constexpr index_t N = 128; constexpr index_t N = 128;
constexpr index_t C = 288; constexpr index_t C = 288;
...@@ -572,18 +573,30 @@ int main(int argc, char* argv[]) ...@@ -572,18 +573,30 @@ int main(int argc, char* argv[])
LeftPads{}, LeftPads{},
RightPads{}, RightPads{},
nrepeat); nrepeat);
#elif 0
device_dummy_static_transform(in_nchw_desc,
in_nchw,
wei_kcyx_desc,
wei_kcyx,
out_nkhw_desc,
out_nkhw_device,
ConvStrides{},
ConvDilations{},
LeftPads{},
RightPads{},
nrepeat);
#elif 1 #elif 1
device_dummy_transform(in_nchw_desc, device_dummy_dynamic_transform(in_nchw_desc,
in_nchw, in_nchw,
wei_kcyx_desc, wei_kcyx_desc,
wei_kcyx, wei_kcyx,
out_nkhw_desc, out_nkhw_desc,
out_nkhw_device, out_nkhw_device,
ConvStrides{}, ConvStrides{},
ConvDilations{}, ConvDilations{},
LeftPads{}, LeftPads{},
RightPads{}, RightPads{},
nrepeat); nrepeat);
#endif #endif
if(do_verification) if(do_verification)
......
conv_driver.cpp
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment