Commit cab8f2e5 authored by Jing Zhang's avatar Jing Zhang
Browse files

clean

parents c20aabc3 9a17e7fb
......@@ -9,54 +9,52 @@ namespace tensor_operation {
namespace device {
namespace device_reduce_instance {
template <int Rank, typename ReduceDims, int ReduceOpId, int NanOpt, int IndicesOpt>
template <int Rank, int NumReduceDim, int ReduceOpId, int NanOpt, int IndicesOpt>
struct ReduceDescription
{
static constexpr int Rank_ = Rank;
static constexpr int ReduceOpId_ = ReduceOpId;
static constexpr int NanOpt_ = NanOpt;
static constexpr int IndicesOpt_ = IndicesOpt;
using ReduceDims_ = ReduceDims;
static constexpr int Rank_ = Rank;
static constexpr int NumReduceDim_ = NumReduceDim;
static constexpr int ReduceOpId_ = ReduceOpId;
static constexpr int NanOpt_ = NanOpt;
static constexpr int IndicesOpt_ = IndicesOpt;
};
using reduce_description_instances =
std::tuple<ReduceDescription<4, Sequence<0, 1, 2>, 0, 0, 0>, // for ADD
ReduceDescription<4, Sequence<0>, 0, 0, 0>,
ReduceDescription<2, Sequence<1>, 0, 0, 0>,
ReduceDescription<4, Sequence<0, 1, 2>, 5, 0, 0>, // for AVG
ReduceDescription<4, Sequence<0>, 5, 0, 0>,
ReduceDescription<2, Sequence<1>, 5, 0, 0>,
ReduceDescription<4, Sequence<0, 1, 2>, 7, 0, 0>, // for NORM2
ReduceDescription<4, Sequence<0>, 7, 0, 0>,
ReduceDescription<2, Sequence<1>, 7, 0, 0>,
ReduceDescription<4, Sequence<0, 1, 2>, 2, 0, 0>, // for MIN
ReduceDescription<4, Sequence<0>, 2, 0, 0>,
ReduceDescription<2, Sequence<1>, 2, 0, 0>,
ReduceDescription<4, Sequence<0, 1, 2>, 3, 0, 0>, // for MAX
ReduceDescription<4, Sequence<0>, 3, 0, 0>,
ReduceDescription<2, Sequence<1>, 3, 0, 0>,
ReduceDescription<4, Sequence<0, 1, 2>, 4, 0, 0>, // for AMAX
ReduceDescription<4, Sequence<0>, 4, 0, 0>,
ReduceDescription<2, Sequence<1>, 4, 0, 0>,
ReduceDescription<4, Sequence<0, 1, 2>, 2, 0, 1>, // for MIN
ReduceDescription<4, Sequence<0>, 2, 0, 1>,
ReduceDescription<2, Sequence<1>, 2, 0, 1>,
ReduceDescription<4, Sequence<0, 1, 2>, 3, 0, 1>, // for MAX
ReduceDescription<4, Sequence<0>, 3, 0, 1>,
ReduceDescription<2, Sequence<1>, 3, 0, 1>,
ReduceDescription<4, Sequence<0, 1, 2>, 4, 0, 1>, // for AMAX
ReduceDescription<4, Sequence<0>, 4, 0, 1>,
ReduceDescription<2, Sequence<1>, 4, 0, 1>>;
using reduce_description_instances = std::tuple<ReduceDescription<4, 3, 0, 0, 0>, // for ADD
ReduceDescription<4, 1, 0, 0, 0>,
ReduceDescription<2, 1, 0, 0, 0>,
ReduceDescription<4, 3, 5, 0, 0>, // for AVG
ReduceDescription<4, 1, 5, 0, 0>,
ReduceDescription<2, 1, 5, 0, 0>,
ReduceDescription<4, 3, 7, 0, 0>, // for NORM2
ReduceDescription<4, 1, 7, 0, 0>,
ReduceDescription<2, 1, 7, 0, 0>,
ReduceDescription<4, 3, 2, 0, 0>, // for MIN
ReduceDescription<4, 1, 2, 0, 0>,
ReduceDescription<2, 1, 2, 0, 0>,
ReduceDescription<4, 3, 3, 0, 0>, // for MAX
ReduceDescription<4, 1, 3, 0, 0>,
ReduceDescription<2, 1, 3, 0, 0>,
ReduceDescription<4, 3, 4, 0, 0>, // for AMAX
ReduceDescription<4, 1, 4, 0, 0>,
ReduceDescription<2, 1, 4, 0, 0>,
ReduceDescription<4, 3, 2, 0, 1>, // for MIN
ReduceDescription<4, 1, 2, 0, 1>,
ReduceDescription<2, 1, 2, 0, 1>,
ReduceDescription<4, 3, 3, 0, 1>, // for MAX
ReduceDescription<4, 1, 3, 0, 1>,
ReduceDescription<2, 1, 3, 0, 1>,
ReduceDescription<4, 3, 4, 0, 1>, // for AMAX
ReduceDescription<4, 1, 4, 0, 1>,
ReduceDescription<2, 1, 4, 0, 1>>;
template <typename DescriptionType>
bool description_match(const DescriptionType& description,
int Rank,
const std::vector<int>& ReduceDims,
const std::vector<int>& reduceDims,
ReduceTensorOp_t ReduceOpId,
NanPropagation_t NanOpt,
ReduceTensorIndices_t IndicesOpt)
......@@ -66,16 +64,11 @@ bool description_match(const DescriptionType& description,
description.IndicesOpt_ != static_cast<int>(IndicesOpt))
return (false);
if(DescriptionType::ReduceDims_::Size() != ReduceDims.size())
if(DescriptionType::NumReduceDim_ != reduceDims.size())
return (false);
bool result = true;
static_for<0, DescriptionType::ReduceDims_::Size(), 1>{}([&](auto i) {
if(DescriptionType::ReduceDims_::At(i) != ReduceDims[i])
result = false;
});
return (result);
};
......@@ -87,33 +80,29 @@ bool description_match(const DescriptionType& description,
namespace ck {
namespace profiler {
template <int Rank, typename ReduceDims>
static std::vector<int> get_reduce_dims()
{
std::vector<int> resDims;
static_for<0, ReduceDims::Size(), 1>{}([&](auto i) { resDims.push_back(ReduceDims::At(i)); });
return (resDims);
};
template <int Rank, typename ReduceDims>
static std::vector<int> get_invariant_dims()
template <index_t Rank, index_t NumReduceDim>
static inline std::vector<int> get_invariant_dims(const std::vector<int>& reduceDims)
{
std::vector<int> resDims;
unsigned int incFlag = 0;
assert(NumReduceDim == reduceDims.size());
static_for<0, ReduceDims::Size(), 1>{}(
[&](auto i) { incFlag = incFlag | (0x1 << ReduceDims::At(i)); });
int reduceFlag = 0;
for(int dim = 0; dim < Rank; dim++)
// flag the bits for the reduceDims
for(int i = 0; i < NumReduceDim; i++)
{
if(incFlag & (0x1 << dim))
continue;
resDims.push_back(dim);
reduceFlag |= 1 << reduceDims[i];
};
return (resDims);
std::vector<int> invariantDims;
// collect invariant dimensions
for(int i = 0; i < Rank; i++)
if((reduceFlag & (1 << i)) == 0)
{
invariantDims.push_back(i);
};
return invariantDims;
};
template <typename T>
......@@ -149,7 +138,7 @@ template <typename InDataType,
typename AccDataType,
typename OutDataType,
int Rank,
typename ReduceDims_,
int NumReduceDim,
ReduceTensorOp_t ReduceOpId,
NanPropagation_t NanOpt,
ReduceTensorIndices_t IndicesOpt>
......@@ -159,6 +148,7 @@ void profile_reduce_impl_impl(bool do_verification,
bool do_dumpout,
int nrepeat,
const std::vector<size_t>& inLengths,
const std::vector<int>& reduceDims,
float alpha,
float beta)
{
......@@ -203,15 +193,14 @@ void profile_reduce_impl_impl(bool do_verification,
{
Tensor<InDataType> in(inLengths);
const std::vector<int> OuterDims = get_invariant_dims<Rank, ReduceDims_>();
const std::vector<int> ReduceDims = get_reduce_dims<Rank, ReduceDims_>();
std::vector<size_t> outLengths;
if(OuterDims.empty())
const auto invariantDims = get_invariant_dims<Rank, NumReduceDim>(reduceDims);
if(reduceDims.size() == Rank)
outLengths.push_back(1);
else
for(auto dim : OuterDims)
for(auto dim : invariantDims)
outLengths.push_back(inLengths[dim]);
Tensor<OutDataType> out_ref(outLengths);
......@@ -302,7 +291,7 @@ void profile_reduce_impl_impl(bool do_verification,
AccDataType,
OutDataType,
Rank,
ReduceDims_,
NumReduceDim,
ReduceOpId,
NanOpt,
IndicesOpt>(reduce0_ptrs);
......@@ -311,7 +300,7 @@ void profile_reduce_impl_impl(bool do_verification,
AccDataType,
OutDataType,
Rank,
ReduceDims_,
NumReduceDim,
ReduceOpId,
NanOpt,
IndicesOpt>(reduce0_ptrs);
......@@ -321,7 +310,7 @@ void profile_reduce_impl_impl(bool do_verification,
AccDataType,
OutDataType,
Rank,
ReduceDims_,
NumReduceDim,
ReduceOpId,
NanOpt,
IndicesOpt>(reduce0_ptrs);
......@@ -330,7 +319,7 @@ void profile_reduce_impl_impl(bool do_verification,
AccDataType,
OutDataType,
Rank,
ReduceDims_,
NumReduceDim,
ReduceOpId,
NanOpt,
IndicesOpt>(reduce1_ptrs);
......@@ -341,7 +330,7 @@ void profile_reduce_impl_impl(bool do_verification,
AccDataType,
OutDataType,
Rank,
ReduceDims_,
NumReduceDim,
ReduceOpId,
NanOpt,
IndicesOpt>(reduce2_ptrs);
......@@ -358,7 +347,7 @@ void profile_reduce_impl_impl(bool do_verification,
using hCompType = typename type_mapping<AccDataType>::outDataType;
ReductionHost<hInType, hCompType, hOutType, ReduceOpId, PropagateNan, NeedIndices>
hostReduce(in.mDesc, out_ref.mDesc, OuterDims, ReduceDims);
hostReduce(in.mDesc, out_ref.mDesc, invariantDims, reduceDims);
hostReduce.Run(alpha,
reinterpret_cast<const hInType*>(in.mData.data()),
......@@ -383,6 +372,7 @@ void profile_reduce_impl_impl(bool do_verification,
i_inStrides,
i_outLengths,
i_outStrides,
reduceDims,
alpha,
beta,
in_dev.GetDeviceBuffer(),
......@@ -464,6 +454,7 @@ void profile_reduce_impl_impl(bool do_verification,
i_inStrides,
i_outLengths,
i_outStrides,
reduceDims,
alpha,
beta,
in_dev.GetDeviceBuffer(),
......@@ -496,6 +487,7 @@ void profile_reduce_impl_impl(bool do_verification,
inStrides2,
i_outLengths,
i_outStrides,
reduceDims,
alpha,
beta,
ws_dev.GetDeviceBuffer(),
......@@ -584,7 +576,7 @@ void profile_reduce_impl(bool do_verification,
bool do_dumpout,
int nrepeat,
const std::vector<size_t>& inLengths,
const std::vector<int>& ReduceDims,
const std::vector<int>& reduceDims,
ReduceTensorOp_t ReduceOpId,
NanPropagation_t NanOpt,
ReduceTensorIndices_t IndicesOpt,
......@@ -605,18 +597,26 @@ void profile_reduce_impl(bool do_verification,
using descType = remove_cvref_t<decltype(std::get<i>(tuple_object))>;
if(!description_match(
descType{}, inLengths.size(), ReduceDims, ReduceOpId, NanOpt, IndicesOpt))
descType{}, inLengths.size(), reduceDims, ReduceOpId, NanOpt, IndicesOpt))
return;
profile_reduce_impl_impl<InDataType,
AccDataType,
OutDataType,
descType::Rank_,
typename descType::ReduceDims_,
descType::NumReduceDim_,
static_cast<ReduceTensorOp_t>(descType::ReduceOpId_),
static_cast<NanPropagation_t>(descType::NanOpt_),
static_cast<ReduceTensorIndices_t>(descType::IndicesOpt_)>(
do_verification, init_method, do_log, do_dumpout, nrepeat, inLengths, alpha, beta);
do_verification,
init_method,
do_log,
do_dumpout,
nrepeat,
inLengths,
reduceDims,
alpha,
beta);
matched = true;
});
......
......@@ -25,7 +25,7 @@ using ck::ReduceTensorIndices_t;
using ck::ReduceTensorOp_t;
static struct option long_options[] = {{"inLengths", required_argument, nullptr, 'D'},
{"toReduceDims", required_argument, nullptr, 'R'},
{"reduceDims", required_argument, nullptr, 'R'},
{"reduceOp", required_argument, nullptr, 'O'},
{"compType", required_argument, nullptr, 'C'},
{"outType", required_argument, nullptr, 'W'},
......@@ -93,9 +93,9 @@ typedef enum
appDouble = 6,
} appDataType_t;
static void check_reduce_dims(const int rank, const std::vector<int>& toReduceDims)
static void check_reduce_dims(const int rank, const std::vector<int>& reduceDims)
{
for(auto dim : toReduceDims)
for(auto dim : reduceDims)
{
if(dim < 0 || dim >= rank)
throw std::runtime_error("Invalid dimension index specified for Reducing");
......@@ -103,7 +103,7 @@ static void check_reduce_dims(const int rank, const std::vector<int>& toReduceDi
unsigned int flag = 0;
for(auto dim : toReduceDims)
for(auto dim : reduceDims)
{
if(flag & (0x1 << dim))
throw std::runtime_error("All toReduce dimensions should be different!");
......@@ -122,7 +122,7 @@ class AppArgs
std::vector<size_t> inLengths;
std::vector<size_t> outLengths;
std::vector<int> toReduceDims;
std::vector<int> reduceDims;
std::vector<float> scales;
......@@ -152,7 +152,7 @@ class AppArgs
std::cout << "Usage of " << cmd << std::endl;
std::cout << "--inLengths or -D, comma separated list of input tensor dimension lengths"
<< std::endl;
std::cout << "--toReduceDims or -R, comma separated list of to-reduce dimensions"
std::cout << "--reduceDims or -R, comma separated list of to-reduce dimensions"
<< std::endl;
std::cout << "--reduceOp or -O, enum value indicating the reduction operations"
<< std::endl;
......@@ -201,7 +201,7 @@ class AppArgs
if(!optarg)
throw std::runtime_error("Invalid option format!");
toReduceDims = getTypeValuesFromString<int>(optarg);
reduceDims = getTypeValuesFromString<int>(optarg);
break;
case 'O':
if(!optarg)
......@@ -321,7 +321,7 @@ int profile_reduce(int argc, char* argv[])
int rank = args.inLengths.size();
check_reduce_dims(rank, args.toReduceDims);
check_reduce_dims(rank, args.reduceDims);
if(args.reduceOp == ReduceTensorOp_t::MUL || args.reduceOp == ReduceTensorOp_t::NORM1)
throw std::runtime_error("MUL and NORM1 are not supported by composable kernel!");
......@@ -345,7 +345,7 @@ int profile_reduce(int argc, char* argv[])
args.do_dumpout,
args.nrepeat,
args.inLengths,
args.toReduceDims,
args.reduceDims,
args.reduceOp,
args.nanOpt,
args.indicesOpt,
......@@ -360,7 +360,7 @@ int profile_reduce(int argc, char* argv[])
args.do_dumpout,
args.nrepeat,
args.inLengths,
args.toReduceDims,
args.reduceDims,
args.reduceOp,
args.nanOpt,
args.indicesOpt,
......@@ -378,7 +378,7 @@ int profile_reduce(int argc, char* argv[])
args.do_dumpout,
args.nrepeat,
args.inLengths,
args.toReduceDims,
args.reduceDims,
args.reduceOp,
args.nanOpt,
args.indicesOpt,
......@@ -395,7 +395,7 @@ int profile_reduce(int argc, char* argv[])
args.do_dumpout,
args.nrepeat,
args.inLengths,
args.toReduceDims,
args.reduceDims,
args.reduceOp,
args.nanOpt,
args.indicesOpt,
......@@ -410,7 +410,7 @@ int profile_reduce(int argc, char* argv[])
args.do_dumpout,
args.nrepeat,
args.inLengths,
args.toReduceDims,
args.reduceDims,
args.reduceOp,
args.nanOpt,
args.indicesOpt,
......
#!/bin/bash
FILE=$1
echo v0 $( grep -w v0 $FILE | wc -l )
echo v1 $( grep -w v1 $FILE | wc -l )
echo v2 $( grep -w v2 $FILE | wc -l )
echo v3 $( grep -w v3 $FILE | wc -l )
echo v4 $( grep -w v4 $FILE | wc -l )
echo v5 $( grep -w v5 $FILE | wc -l )
echo v6 $( grep -w v6 $FILE | wc -l )
echo v7 $( grep -w v7 $FILE | wc -l )
echo v8 $( grep -w v8 $FILE | wc -l )
echo v9 $( grep -w v9 $FILE | wc -l )
echo v10 $( grep -w v10 $FILE | wc -l )
echo v11 $( grep -w v11 $FILE | wc -l )
echo v12 $( grep -w v12 $FILE | wc -l )
echo v13 $( grep -w v13 $FILE | wc -l )
echo v14 $( grep -w v14 $FILE | wc -l )
echo v15 $( grep -w v15 $FILE | wc -l )
echo v16 $( grep -w v16 $FILE | wc -l )
echo v17 $( grep -w v17 $FILE | wc -l )
echo v18 $( grep -w v18 $FILE | wc -l )
echo v19 $( grep -w v19 $FILE | wc -l )
echo v20 $( grep -w v20 $FILE | wc -l )
echo v21 $( grep -w v21 $FILE | wc -l )
echo v22 $( grep -w v22 $FILE | wc -l )
echo v23 $( grep -w v23 $FILE | wc -l )
echo v24 $( grep -w v24 $FILE | wc -l )
echo v25 $( grep -w v25 $FILE | wc -l )
echo v26 $( grep -w v26 $FILE | wc -l )
echo v27 $( grep -w v27 $FILE | wc -l )
echo v28 $( grep -w v28 $FILE | wc -l )
echo v29 $( grep -w v29 $FILE | wc -l )
echo v30 $( grep -w v30 $FILE | wc -l )
echo v31 $( grep -w v31 $FILE | wc -l )
echo v32 $( grep -w v32 $FILE | wc -l )
echo v33 $( grep -w v33 $FILE | wc -l )
echo v34 $( grep -w v34 $FILE | wc -l )
echo v35 $( grep -w v35 $FILE | wc -l )
echo v36 $( grep -w v36 $FILE | wc -l )
echo v37 $( grep -w v37 $FILE | wc -l )
echo v38 $( grep -w v38 $FILE | wc -l )
echo v39 $( grep -w v39 $FILE | wc -l )
echo v40 $( grep -w v40 $FILE | wc -l )
echo v41 $( grep -w v41 $FILE | wc -l )
echo v42 $( grep -w v42 $FILE | wc -l )
echo v43 $( grep -w v43 $FILE | wc -l )
echo v44 $( grep -w v44 $FILE | wc -l )
echo v45 $( grep -w v45 $FILE | wc -l )
echo v46 $( grep -w v46 $FILE | wc -l )
echo v47 $( grep -w v47 $FILE | wc -l )
echo v48 $( grep -w v48 $FILE | wc -l )
echo v49 $( grep -w v49 $FILE | wc -l )
echo v50 $( grep -w v50 $FILE | wc -l )
echo v51 $( grep -w v51 $FILE | wc -l )
echo v52 $( grep -w v52 $FILE | wc -l )
echo v53 $( grep -w v53 $FILE | wc -l )
echo v54 $( grep -w v54 $FILE | wc -l )
echo v55 $( grep -w v55 $FILE | wc -l )
echo v56 $( grep -w v56 $FILE | wc -l )
echo v57 $( grep -w v57 $FILE | wc -l )
echo v58 $( grep -w v58 $FILE | wc -l )
echo v59 $( grep -w v59 $FILE | wc -l )
echo v60 $( grep -w v60 $FILE | wc -l )
echo v61 $( grep -w v61 $FILE | wc -l )
echo v62 $( grep -w v62 $FILE | wc -l )
echo v63 $( grep -w v63 $FILE | wc -l )
echo v64 $( grep -w v64 $FILE | wc -l )
echo v65 $( grep -w v65 $FILE | wc -l )
echo v66 $( grep -w v66 $FILE | wc -l )
echo v67 $( grep -w v67 $FILE | wc -l )
echo v68 $( grep -w v68 $FILE | wc -l )
echo v69 $( grep -w v69 $FILE | wc -l )
echo v70 $( grep -w v70 $FILE | wc -l )
echo v71 $( grep -w v71 $FILE | wc -l )
echo v72 $( grep -w v72 $FILE | wc -l )
echo v73 $( grep -w v73 $FILE | wc -l )
echo v74 $( grep -w v74 $FILE | wc -l )
echo v75 $( grep -w v75 $FILE | wc -l )
echo v76 $( grep -w v76 $FILE | wc -l )
echo v77 $( grep -w v77 $FILE | wc -l )
echo v78 $( grep -w v78 $FILE | wc -l )
echo v79 $( grep -w v79 $FILE | wc -l )
echo v80 $( grep -w v80 $FILE | wc -l )
echo v81 $( grep -w v81 $FILE | wc -l )
echo v82 $( grep -w v82 $FILE | wc -l )
echo v83 $( grep -w v83 $FILE | wc -l )
echo v84 $( grep -w v84 $FILE | wc -l )
echo v85 $( grep -w v85 $FILE | wc -l )
echo v86 $( grep -w v86 $FILE | wc -l )
echo v87 $( grep -w v87 $FILE | wc -l )
echo v88 $( grep -w v88 $FILE | wc -l )
echo v89 $( grep -w v89 $FILE | wc -l )
echo v90 $( grep -w v90 $FILE | wc -l )
echo v91 $( grep -w v91 $FILE | wc -l )
echo v92 $( grep -w v92 $FILE | wc -l )
echo v93 $( grep -w v93 $FILE | wc -l )
echo v94 $( grep -w v94 $FILE | wc -l )
echo v95 $( grep -w v95 $FILE | wc -l )
echo v96 $( grep -w v96 $FILE | wc -l )
echo v97 $( grep -w v97 $FILE | wc -l )
echo v98 $( grep -w v98 $FILE | wc -l )
echo v99 $( grep -w v99 $FILE | wc -l )
echo v100 $( grep -w v100 $FILE | wc -l )
echo v101 $( grep -w v101 $FILE | wc -l )
echo v102 $( grep -w v102 $FILE | wc -l )
echo v103 $( grep -w v103 $FILE | wc -l )
echo v104 $( grep -w v104 $FILE | wc -l )
echo v105 $( grep -w v105 $FILE | wc -l )
echo v106 $( grep -w v106 $FILE | wc -l )
echo v107 $( grep -w v107 $FILE | wc -l )
echo v108 $( grep -w v108 $FILE | wc -l )
echo v109 $( grep -w v109 $FILE | wc -l )
echo v110 $( grep -w v110 $FILE | wc -l )
echo v111 $( grep -w v111 $FILE | wc -l )
echo v112 $( grep -w v112 $FILE | wc -l )
echo v113 $( grep -w v113 $FILE | wc -l )
echo v114 $( grep -w v114 $FILE | wc -l )
echo v115 $( grep -w v115 $FILE | wc -l )
echo v116 $( grep -w v116 $FILE | wc -l )
echo v117 $( grep -w v117 $FILE | wc -l )
echo v118 $( grep -w v118 $FILE | wc -l )
echo v119 $( grep -w v119 $FILE | wc -l )
echo v120 $( grep -w v120 $FILE | wc -l )
echo v121 $( grep -w v121 $FILE | wc -l )
echo v122 $( grep -w v122 $FILE | wc -l )
echo v123 $( grep -w v123 $FILE | wc -l )
echo v124 $( grep -w v124 $FILE | wc -l )
echo v125 $( grep -w v125 $FILE | wc -l )
echo v126 $( grep -w v126 $FILE | wc -l )
echo v127 $( grep -w v127 $FILE | wc -l )
echo v128 $( grep -w v128 $FILE | wc -l )
echo v129 $( grep -w v129 $FILE | wc -l )
echo v130 $( grep -w v130 $FILE | wc -l )
echo v131 $( grep -w v131 $FILE | wc -l )
echo v132 $( grep -w v132 $FILE | wc -l )
echo v133 $( grep -w v133 $FILE | wc -l )
echo v134 $( grep -w v134 $FILE | wc -l )
echo v135 $( grep -w v135 $FILE | wc -l )
echo v136 $( grep -w v136 $FILE | wc -l )
echo v137 $( grep -w v137 $FILE | wc -l )
echo v138 $( grep -w v138 $FILE | wc -l )
echo v139 $( grep -w v139 $FILE | wc -l )
echo v140 $( grep -w v140 $FILE | wc -l )
echo v141 $( grep -w v141 $FILE | wc -l )
echo v142 $( grep -w v142 $FILE | wc -l )
echo v143 $( grep -w v143 $FILE | wc -l )
echo v144 $( grep -w v144 $FILE | wc -l )
echo v145 $( grep -w v145 $FILE | wc -l )
echo v146 $( grep -w v146 $FILE | wc -l )
echo v147 $( grep -w v147 $FILE | wc -l )
echo v148 $( grep -w v148 $FILE | wc -l )
echo v149 $( grep -w v149 $FILE | wc -l )
echo v150 $( grep -w v150 $FILE | wc -l )
echo v151 $( grep -w v151 $FILE | wc -l )
echo v152 $( grep -w v152 $FILE | wc -l )
echo v153 $( grep -w v153 $FILE | wc -l )
echo v154 $( grep -w v154 $FILE | wc -l )
echo v155 $( grep -w v155 $FILE | wc -l )
echo v156 $( grep -w v156 $FILE | wc -l )
echo v157 $( grep -w v157 $FILE | wc -l )
echo v158 $( grep -w v158 $FILE | wc -l )
echo v159 $( grep -w v159 $FILE | wc -l )
echo v160 $( grep -w v160 $FILE | wc -l )
echo v161 $( grep -w v161 $FILE | wc -l )
echo v162 $( grep -w v162 $FILE | wc -l )
echo v163 $( grep -w v163 $FILE | wc -l )
echo v164 $( grep -w v164 $FILE | wc -l )
echo v165 $( grep -w v165 $FILE | wc -l )
echo v166 $( grep -w v166 $FILE | wc -l )
echo v167 $( grep -w v167 $FILE | wc -l )
echo v168 $( grep -w v168 $FILE | wc -l )
echo v169 $( grep -w v169 $FILE | wc -l )
echo v170 $( grep -w v170 $FILE | wc -l )
echo v171 $( grep -w v171 $FILE | wc -l )
echo v172 $( grep -w v172 $FILE | wc -l )
echo v173 $( grep -w v173 $FILE | wc -l )
echo v174 $( grep -w v174 $FILE | wc -l )
echo v175 $( grep -w v175 $FILE | wc -l )
echo v176 $( grep -w v176 $FILE | wc -l )
echo v177 $( grep -w v177 $FILE | wc -l )
echo v178 $( grep -w v178 $FILE | wc -l )
echo v179 $( grep -w v179 $FILE | wc -l )
echo v180 $( grep -w v180 $FILE | wc -l )
echo v181 $( grep -w v181 $FILE | wc -l )
echo v182 $( grep -w v182 $FILE | wc -l )
echo v183 $( grep -w v183 $FILE | wc -l )
echo v184 $( grep -w v184 $FILE | wc -l )
echo v185 $( grep -w v185 $FILE | wc -l )
echo v186 $( grep -w v186 $FILE | wc -l )
echo v187 $( grep -w v187 $FILE | wc -l )
echo v188 $( grep -w v188 $FILE | wc -l )
echo v189 $( grep -w v189 $FILE | wc -l )
echo v190 $( grep -w v190 $FILE | wc -l )
echo v191 $( grep -w v191 $FILE | wc -l )
echo v192 $( grep -w v192 $FILE | wc -l )
echo v193 $( grep -w v193 $FILE | wc -l )
echo v194 $( grep -w v194 $FILE | wc -l )
echo v195 $( grep -w v195 $FILE | wc -l )
echo v196 $( grep -w v196 $FILE | wc -l )
echo v197 $( grep -w v197 $FILE | wc -l )
echo v198 $( grep -w v198 $FILE | wc -l )
echo v199 $( grep -w v199 $FILE | wc -l )
echo v200 $( grep -w v200 $FILE | wc -l )
echo v201 $( grep -w v201 $FILE | wc -l )
echo v202 $( grep -w v202 $FILE | wc -l )
echo v203 $( grep -w v203 $FILE | wc -l )
echo v204 $( grep -w v204 $FILE | wc -l )
echo v205 $( grep -w v205 $FILE | wc -l )
echo v206 $( grep -w v206 $FILE | wc -l )
echo v207 $( grep -w v207 $FILE | wc -l )
echo v208 $( grep -w v208 $FILE | wc -l )
echo v209 $( grep -w v209 $FILE | wc -l )
echo v210 $( grep -w v210 $FILE | wc -l )
echo v211 $( grep -w v211 $FILE | wc -l )
echo v212 $( grep -w v212 $FILE | wc -l )
echo v213 $( grep -w v213 $FILE | wc -l )
echo v214 $( grep -w v214 $FILE | wc -l )
echo v215 $( grep -w v215 $FILE | wc -l )
echo v216 $( grep -w v216 $FILE | wc -l )
echo v217 $( grep -w v217 $FILE | wc -l )
echo v218 $( grep -w v218 $FILE | wc -l )
echo v219 $( grep -w v219 $FILE | wc -l )
echo v220 $( grep -w v220 $FILE | wc -l )
echo v221 $( grep -w v221 $FILE | wc -l )
echo v222 $( grep -w v222 $FILE | wc -l )
echo v223 $( grep -w v223 $FILE | wc -l )
echo v224 $( grep -w v224 $FILE | wc -l )
echo v225 $( grep -w v225 $FILE | wc -l )
echo v226 $( grep -w v226 $FILE | wc -l )
echo v227 $( grep -w v227 $FILE | wc -l )
echo v228 $( grep -w v228 $FILE | wc -l )
echo v229 $( grep -w v229 $FILE | wc -l )
echo v230 $( grep -w v230 $FILE | wc -l )
echo v231 $( grep -w v231 $FILE | wc -l )
echo v232 $( grep -w v232 $FILE | wc -l )
echo v233 $( grep -w v233 $FILE | wc -l )
echo v234 $( grep -w v234 $FILE | wc -l )
echo v235 $( grep -w v235 $FILE | wc -l )
echo v236 $( grep -w v236 $FILE | wc -l )
echo v237 $( grep -w v237 $FILE | wc -l )
echo v238 $( grep -w v238 $FILE | wc -l )
echo v239 $( grep -w v239 $FILE | wc -l )
echo v240 $( grep -w v240 $FILE | wc -l )
echo v241 $( grep -w v241 $FILE | wc -l )
echo v242 $( grep -w v242 $FILE | wc -l )
echo v243 $( grep -w v243 $FILE | wc -l )
echo v244 $( grep -w v244 $FILE | wc -l )
echo v245 $( grep -w v245 $FILE | wc -l )
echo v246 $( grep -w v246 $FILE | wc -l )
echo v247 $( grep -w v247 $FILE | wc -l )
echo v248 $( grep -w v248 $FILE | wc -l )
echo v249 $( grep -w v249 $FILE | wc -l )
echo v250 $( grep -w v250 $FILE | wc -l )
echo v251 $( grep -w v251 $FILE | wc -l )
echo v252 $( grep -w v252 $FILE | wc -l )
echo v253 $( grep -w v253 $FILE | wc -l )
echo v254 $( grep -w v254 $FILE | wc -l )
echo v255 $( grep -w v255 $FILE | wc -l )
for num in {0..255}
do
base_pattern="(\[?${num}\b|\[\d*:${num}\])"
spattern="s${base_pattern}"
vpattern="v${base_pattern}"
apattern="a${base_pattern}"
scount=$(grep -P $spattern $FILE | wc -l)
vcount=$(grep -P $vpattern $FILE | wc -l)
acount=$(grep -P $apattern $FILE | wc -l)
bash -c "echo -n v${num} $vcount && \
echo -n , s${num} $scount && \
echo -n , a${num} $acount"
if [[ $scount -ne 0 || $vcount -ne 0 || $acount -ne 0 ]]; then
echo -n " *"
fi
echo ""
done
#!/bin/bash
PRECISION= ##--half
PRECISION=
##PRECISION=--half
##PRECISION=--double
if test -n $PRECISION && test "$PRECISION" = "--half"; then
CTYPE="-C 1"
ACCTYPE="-C 1"
else
CTYPE=""
ACCTYPE=""
fi
WTYPE=
driver="./bin/ckProfiler"
VERIFY="-v $1"
INIT=$2
NREPEAT=$3
if [ $# -ge 1 ] ; then
NREPEAT=$1
else
NREPEAT=1
fi
Operation=7
#### 0 - ADD, 5 - AVG, 7 - NORM2
Operations="0 5 7"
## for generic validation
for op in $Operation; do
for op in $Operations; do
set -x
./bin/ckProfiler reduce $PRECISION -D 64,4,280,82 -R 0 -O $op $CTYPE -v 1 1 $NREPEAT
./bin/ckProfiler reduce $PRECISION -D 4,64,280,82 -R 0 -O $op $CTYPE -v 1 1 $NREPEAT
./bin/ckProfiler reduce $PRECISION -D 280,4,64,82 -R 0 -O $op $CTYPE -v 1 1 $NREPEAT
./bin/ckProfiler reduce $PRECISION -D 64,4,280,82 -R 0,1,2 -O $op $CTYPE -v 1 1 $NREPEAT
./bin/ckProfiler reduce $PRECISION -D 4,64,280,82 -R 0,1,2 -O $op $CTYPE -v 1 1 $NREPEAT
./bin/ckProfiler reduce $PRECISION -D 64,280,82,4 -R 0,1,2 -O $op $CTYPE -v 1 1 $NREPEAT
./bin/ckProfiler reduce $PRECISION -D 700,8192 -R 1 -O $op $CTYPE -v 1 1 $NREPEAT
./bin/ckProfiler reduce $PRECISION -D 700,1024 -R 1 -O $op $CTYPE -v 1 1 $NREPEAT
./bin/ckProfiler reduce $PRECISION -D 700,4 -R 1 -O $op $CTYPE -v 1 1 $NREPEAT
####### datatype layout reduce dims op acctype verify init repeats
$driver reduce $PRECISION -D 64,4,280,82 -R 0 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
$driver reduce $PRECISION -D 64,4,280,82 -R 1 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
$driver reduce $PRECISION -D 64,4,280,82 -R 2 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
$driver reduce $PRECISION -D 64,4,280,82 -R 3 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
$driver reduce $PRECISION -D 64,4,280,82 -R 0,1,2 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
$driver reduce $PRECISION -D 64,4,280,82 -R 1,2,3 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
$driver reduce $PRECISION -D 64,4,280,82 -R 0,2,3 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
$driver reduce $PRECISION -D 64,4,280,82 -R 0,1,3 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
$driver reduce $PRECISION -D 256,22960 -R 0 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
$driver reduce $PRECISION -D 256,22960 -R 1 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
$driver reduce $PRECISION -D 4,1469440 -R 0 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
$driver reduce $PRECISION -D 4,1469440 -R 1 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
set +x
done
Operation=5
#### 0 - ADD, 5 - AVG, 7 - NORM2
Operations=5
## for performance evaluation (resnet50 NHWC => C)
for op in $Operation; do
for op in $Operations; do
set -x
./bin/ckProfiler reduce $PRECISION -D 256,14,14,1024 -R 0,1,2 -O $op $CTYPE $WTYPE -v 1 1 $NREPEAT
./bin/ckProfiler reduce $PRECISION -D 256,28,28,128 -R 0,1,2 -O $op $CTYPE $WTYPE -v 1 1 $NREPEAT
./bin/ckProfiler reduce $PRECISION -D 256,58,58,128 -R 0,1,2 -O $op $CTYPE $WTYPE -v 1 1 $NREPEAT
./bin/ckProfiler reduce $PRECISION -D 256,7,7,2048 -R 0,1,2 -O $op $CTYPE $WTYPE -v 1 1 $NREPEAT
./bin/ckProfiler reduce $PRECISION -D 256,14,14,256 -R 0,1,2 -O $op $CTYPE $WTYPE -v 1 1 $NREPEAT
./bin/ckProfiler reduce $PRECISION -D 256,30,30,256 -R 0,1,2 -O $op $CTYPE $WTYPE -v 1 1 $NREPEAT
./bin/ckProfiler reduce $PRECISION -D 256,56,56,256 -R 0,1,2 -O $op $CTYPE $WTYPE -v 1 1 $NREPEAT
./bin/ckProfiler reduce $PRECISION -D 256,16,16,512 -R 0,1,2 -O $op $CTYPE $WTYPE -v 1 1 $NREPEAT
./bin/ckProfiler reduce $PRECISION -D 256,28,28,512 -R 0,1,2 -O $op $CTYPE $WTYPE -v 1 1 $NREPEAT
./bin/ckProfiler reduce $PRECISION -D 256,7,7,512 -R 0,1,2 -O $op $CTYPE $WTYPE -v 1 1 $NREPEAT
./bin/ckProfiler reduce $PRECISION -D 256,56,56,64 -R 0,1,2 -O $op $CTYPE $WTYPE -v 1 1 $NREPEAT
./bin/ckProfiler reduce $PRECISION -D 256,230,230,3 -R 0,1,2 -O $op $CTYPE $WTYPE -v 1 1 $NREPEAT
./bin/ckProfiler reduce $PRECISION -D 128,14,14,1024 -R 0,1,2 -O $op $CTYPE $WTYPE -v 1 1 $NREPEAT
./bin/ckProfiler reduce $PRECISION -D 128,28,28,128 -R 0,1,2 -O $op $CTYPE $WTYPE -v 1 1 $NREPEAT
./bin/ckProfiler reduce $PRECISION -D 128,58,58,128 -R 0,1,2 -O $op $CTYPE $WTYPE -v 1 1 $NREPEAT
./bin/ckProfiler reduce $PRECISION -D 128,7,7,2048 -R 0,1,2 -O $op $CTYPE $WTYPE -v 1 1 $NREPEAT
./bin/ckProfiler reduce $PRECISION -D 128,14,14,256 -R 0,1,2 -O $op $CTYPE $WTYPE -v 1 1 $NREPEAT
./bin/ckProfiler reduce $PRECISION -D 128,30,30,256 -R 0,1,2 -O $op $CTYPE $WTYPE -v 1 1 $NREPEAT
./bin/ckProfiler reduce $PRECISION -D 128,56,56,256 -R 0,1,2 -O $op $CTYPE $WTYPE -v 1 1 $NREPEAT
./bin/ckProfiler reduce $PRECISION -D 128,16,16,512 -R 0,1,2 -O $op $CTYPE $WTYPE -v 1 1 $NREPEAT
./bin/ckProfiler reduce $PRECISION -D 128,28,28,512 -R 0,1,2 -O $op $CTYPE $WTYPE -v 1 1 $NREPEAT
./bin/ckProfiler reduce $PRECISION -D 128,7,7,512 -R 0,1,2 -O $op $CTYPE $WTYPE -v 1 1 $NREPEAT
./bin/ckProfiler reduce $PRECISION -D 128,56,56,64 -R 0,1,2 -O $op $CTYPE $WTYPE -v 1 1 $NREPEAT
####### datatype layout reduce dims op acctype verify init repeats
$driver reduce $PRECISION -D 256,14,14,1024 -R 0,1,2 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
$driver reduce $PRECISION -D 256,28,28,128 -R 0,1,2 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
$driver reduce $PRECISION -D 256,58,58,128 -R 0,1,2 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
$driver reduce $PRECISION -D 256,7,7,2048 -R 0,1,2 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
$driver reduce $PRECISION -D 256,14,14,256 -R 0,1,2 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
$driver reduce $PRECISION -D 256,30,30,256 -R 0,1,2 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
$driver reduce $PRECISION -D 256,56,56,256 -R 0,1,2 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
$driver reduce $PRECISION -D 256,16,16,512 -R 0,1,2 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
$driver reduce $PRECISION -D 256,28,28,512 -R 0,1,2 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
$driver reduce $PRECISION -D 256,7,7,512 -R 0,1,2 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
$driver reduce $PRECISION -D 256,56,56,64 -R 0,1,2 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
$driver reduce $PRECISION -D 256,230,230,3 -R 0,1,2 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
$driver reduce $PRECISION -D 128,14,14,1024 -R 0,1,2 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
$driver reduce $PRECISION -D 128,28,28,128 -R 0,1,2 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
$driver reduce $PRECISION -D 128,58,58,128 -R 0,1,2 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
$driver reduce $PRECISION -D 128,7,7,2048 -R 0,1,2 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
$driver reduce $PRECISION -D 128,14,14,256 -R 0,1,2 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
$driver reduce $PRECISION -D 128,30,30,256 -R 0,1,2 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
$driver reduce $PRECISION -D 128,56,56,256 -R 0,1,2 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
$driver reduce $PRECISION -D 128,16,16,512 -R 0,1,2 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
$driver reduce $PRECISION -D 128,28,28,512 -R 0,1,2 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
$driver reduce $PRECISION -D 128,7,7,512 -R 0,1,2 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
$driver reduce $PRECISION -D 128,56,56,64 -R 0,1,2 -O $op $ACCTYPE $VERIFY $INIT $NREPEAT
set +x
done
#!/bin/bash
PRECISION= ##--half
PRECISION=
##PRECISION=--half
##PRECISION=--double
if [ $# -ge 1 ] ; then
NREPEAT=$1
else
NREPEAT=1
fi
driver="./bin/ckProfiler"
Operation=4
VERIFY="-v $1"
INIT=$2
NREPEAT=$3
LENGTHS=64,4,280,82
#### 2 - MIN, 3 - MAX, 4 - AMAX
Operations="2 4"
## for generic validation
for op in $Operation; do
for op in $Operations; do
for use_idx in 0 1; do
set -x
./bin/ckProfiler reduce $PRECISION -D 64,4,280,82 -R 0 -O $op $CTYPE -v 1 1 $NREPEAT
./bin/ckProfiler reduce $PRECISION -D 4,64,280,82 -R 0 -O $op $CTYPE -v 1 1 $NREPEAT
./bin/ckProfiler reduce $PRECISION -D 280,4,64,82 -R 0 -O $op $CTYPE -v 1 1 $NREPEAT
./bin/ckProfiler reduce $PRECISION -D 64,4,280,82 -R 0,1,2 -O $op $CTYPE -v 1 1 $NREPEAT
./bin/ckProfiler reduce $PRECISION -D 4,64,280,82 -R 0,1,2 -O $op $CTYPE -v 1 1 $NREPEAT
./bin/ckProfiler reduce $PRECISION -D 64,280,82,4 -R 0,1,2 -O $op $CTYPE -v 1 1 $NREPEAT
./bin/ckProfiler reduce $PRECISION -D 700,8192 -R 1 -O $op $CTYPE -v 1 1 $NREPEAT
./bin/ckProfiler reduce $PRECISION -D 700,1024 -R 1 -O $op $CTYPE -v 1 1 $NREPEAT
./bin/ckProfiler reduce $PRECISION -D 700,4 -R 1 -O $op $CTYPE -v 1 1 $NREPEAT
####### datatype layout reduce dims op use index verify init repeats
$driver reduce $PRECISION -D 64,4,280,82 -R 0 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
$driver reduce $PRECISION -D 64,4,280,82 -R 1 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
$driver reduce $PRECISION -D 64,4,280,82 -R 2 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
$driver reduce $PRECISION -D 64,4,280,82 -R 3 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
$driver reduce $PRECISION -D 64,4,280,82 -R 0,1,2 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
$driver reduce $PRECISION -D 64,4,280,82 -R 1,2,3 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
$driver reduce $PRECISION -D 64,4,280,82 -R 0,2,3 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
$driver reduce $PRECISION -D 64,4,280,82 -R 0,1,3 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
$driver reduce $PRECISION -D 256,22960 -R 0 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
$driver reduce $PRECISION -D 256,22960 -R 1 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
$driver reduce $PRECISION -D 4,1469440 -R 0 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
$driver reduce $PRECISION -D 4,1469440 -R 1 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
set +x
done
done
Operations=2
## for performance evaluation (resnet50 NHWC => C)
for op in $Operation; do
for op in $Operations; do
for use_idx in 0 1; do
set -x
./bin/ckProfiler reduce $PRECISION -D 256,14,14,1024 -R 0,1,2 -O $op -I $use_idx -v 1 1 $NREPEAT
./bin/ckProfiler reduce $PRECISION -D 256,28,28,128 -R 0,1,2 -O $op -I $use_idx -v 1 1 $NREPEAT
./bin/ckProfiler reduce $PRECISION -D 256,58,58,128 -R 0,1,2 -O $op -I $use_idx -v 1 1 $NREPEAT
./bin/ckProfiler reduce $PRECISION -D 256,7,7,2048 -R 0,1,2 -O $op -I $use_idx -v 1 1 $NREPEAT
./bin/ckProfiler reduce $PRECISION -D 256,14,14,256 -R 0,1,2 -O $op -I $use_idx -v 1 1 $NREPEAT
./bin/ckProfiler reduce $PRECISION -D 256,30,30,256 -R 0,1,2 -O $op -I $use_idx -v 1 1 $NREPEAT
./bin/ckProfiler reduce $PRECISION -D 256,56,56,256 -R 0,1,2 -O $op -I $use_idx -v 1 1 $NREPEAT
./bin/ckProfiler reduce $PRECISION -D 256,16,16,512 -R 0,1,2 -O $op -I $use_idx -v 1 1 $NREPEAT
./bin/ckProfiler reduce $PRECISION -D 256,28,28,512 -R 0,1,2 -O $op -I $use_idx -v 1 1 $NREPEAT
./bin/ckProfiler reduce $PRECISION -D 256,7,7,512 -R 0,1,2 -O $op -I $use_idx -v 1 1 $NREPEAT
./bin/ckProfiler reduce $PRECISION -D 256,56,56,64 -R 0,1,2 -O $op -I $use_idx -v 1 1 $NREPEAT
./bin/ckProfiler reduce $PRECISION -D 256,230,230,3 -R 0,1,2 -O $op -I $use_idx -v 1 1 $NREPEAT
./bin/ckProfiler reduce $PRECISION -D 128,14,14,1024 -R 0,1,2 -O $op -I $use_idx -v 1 1 $NREPEAT
./bin/ckProfiler reduce $PRECISION -D 128,28,28,128 -R 0,1,2 -O $op -I $use_idx -v 1 1 $NREPEAT
./bin/ckProfiler reduce $PRECISION -D 128,58,58,128 -R 0,1,2 -O $op -I $use_idx -v 1 1 $NREPEAT
./bin/ckProfiler reduce $PRECISION -D 128,7,7,2048 -R 0,1,2 -O $op -I $use_idx -v 1 1 $NREPEAT
./bin/ckProfiler reduce $PRECISION -D 128,14,14,256 -R 0,1,2 -O $op -I $use_idx -v 1 1 $NREPEAT
./bin/ckProfiler reduce $PRECISION -D 128,30,30,256 -R 0,1,2 -O $op -I $use_idx -v 1 1 $NREPEAT
./bin/ckProfiler reduce $PRECISION -D 128,56,56,256 -R 0,1,2 -O $op -I $use_idx -v 1 1 $NREPEAT
./bin/ckProfiler reduce $PRECISION -D 128,16,16,512 -R 0,1,2 -O $op -I $use_idx -v 1 1 $NREPEAT
./bin/ckProfiler reduce $PRECISION -D 128,28,28,512 -R 0,1,2 -O $op -I $use_idx -v 1 1 $NREPEAT
./bin/ckProfiler reduce $PRECISION -D 128,7,7,512 -R 0,1,2 -O $op -I $use_idx -v 1 1 $NREPEAT
./bin/ckProfiler reduce $PRECISION -D 128,56,56,64 -R 0,1,2 -O $op -I $use_idx -v 1 1 $NREPEAT
####### datatype layout reduce dims op use index verify init repeats
$driver reduce $PRECISION -D 256,14,14,1024 -R 0,1,2 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
$driver reduce $PRECISION -D 256,28,28,128 -R 0,1,2 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
$driver reduce $PRECISION -D 256,58,58,128 -R 0,1,2 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
$driver reduce $PRECISION -D 256,7,7,2048 -R 0,1,2 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
$driver reduce $PRECISION -D 256,14,14,256 -R 0,1,2 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
$driver reduce $PRECISION -D 256,30,30,256 -R 0,1,2 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
$driver reduce $PRECISION -D 256,56,56,256 -R 0,1,2 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
$driver reduce $PRECISION -D 256,16,16,512 -R 0,1,2 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
$driver reduce $PRECISION -D 256,28,28,512 -R 0,1,2 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
$driver reduce $PRECISION -D 256,7,7,512 -R 0,1,2 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
$driver reduce $PRECISION -D 256,56,56,64 -R 0,1,2 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
$driver reduce $PRECISION -D 256,230,230,3 -R 0,1,2 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
$driver reduce $PRECISION -D 128,14,14,1024 -R 0,1,2 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
$driver reduce $PRECISION -D 128,28,28,128 -R 0,1,2 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
$driver reduce $PRECISION -D 128,58,58,128 -R 0,1,2 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
$driver reduce $PRECISION -D 128,7,7,2048 -R 0,1,2 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
$driver reduce $PRECISION -D 128,14,14,256 -R 0,1,2 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
$driver reduce $PRECISION -D 128,30,30,256 -R 0,1,2 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
$driver reduce $PRECISION -D 128,56,56,256 -R 0,1,2 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
$driver reduce $PRECISION -D 128,16,16,512 -R 0,1,2 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
$driver reduce $PRECISION -D 128,28,28,512 -R 0,1,2 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
$driver reduce $PRECISION -D 128,7,7,512 -R 0,1,2 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
$driver reduce $PRECISION -D 128,56,56,64 -R 0,1,2 -O $op -I $use_idx $VERIFY $INIT $NREPEAT
set +x
done
done
......
......@@ -95,13 +95,13 @@ void traverse_using_space_filling_curve()
make_tuple(12, 2, 6),
make_tuple(12, 0, 6));
constexpr index_t num_accesses = SpaceFillingCurve::GetNumOfAccess();
constexpr index_t num_access = SpaceFillingCurve::GetNumOfAccess();
static_assert(num_accesses == reduce_on_sequence(TensorLengths{} / ScalarsPerAccess{},
math::multiplies{},
Number<1>{}));
static_assert(num_access == reduce_on_sequence(TensorLengths{} / ScalarsPerAccess{},
math::multiplies{},
Number<1>{}));
static_for<1, num_accesses, 1>{}([&](auto i) {
static_for<1, num_access, 1>{}([&](auto i) {
constexpr auto idx_curr = SpaceFillingCurve::GetIndex(i);
static_assert(idx_curr[I0] == expected[i][I0]);
......@@ -115,7 +115,7 @@ void traverse_using_space_filling_curve()
static_assert(backward_step[I2] == expected_step[I2]);
});
static_for<0, num_accesses - 1, 1>{}([&](auto i) {
static_for<0, num_access - 1, 1>{}([&](auto i) {
constexpr auto idx_curr = SpaceFillingCurve::GetIndex(i);
static_assert(idx_curr[I0] == expected[i][I0]);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment