"python/vscode:/vscode.git/clone" did not exist on "e61d13acdf3193606c3bc57fb59f0de33eab7490"
threadwise_generic_tensor_op.hpp 537 Bytes
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
#ifndef CK_THREADWISE_GENERIC_TENSOR_OP_HPP
#define CK_THREADWISE_GENERIC_TENSOR_OP_HPP

#include "ConstantTensorDescriptor.hpp"
#include "ConstantMergedTensorDescriptor.hpp"

namespace ck {
template <class Float, class TDesc>
__device__ void threadwise_generic_tensor_set_zero(TDesc, Float* __restrict__ p)
{
    static_ford<decltype(TDesc::GetLengths())>{}([&](auto multi_id) {
        constexpr index_t offset = TDesc::GetOffsetFromMultiIndex(multi_id);

        p[offset] = static_cast<Float>(0);
    });
}

} // namespace ck
#endif