"utils/vscode:/vscode.git/clone" did not exist on "76b9024b282206200ee82b441dbe08c2d9299f2f"
Commit 4cf9a393 authored by Jing Zhang's avatar Jing Zhang
Browse files

fixed threadwise_copy

parent 405a15ec
...@@ -16,26 +16,9 @@ using ALayout = Row; ...@@ -16,26 +16,9 @@ using ALayout = Row;
using BLayout = Col; using BLayout = Col;
using CLayout = Row; using CLayout = Row;
struct ConvertBF16RTN_
{
// convert to bf16 using round to nearest (rtn)
template <typename Y, typename X>
__host__ __device__ void operator()(Y& y, const X& x) const
{
y = x;
}
template <>
__host__ __device__ void operator()<ck::bhalf_t, float>(ck::bhalf_t& y, const float& x) const
{
y = ck::bf16_convert_rtn<ck::bhalf_t, float>(x);
}
};
using AElementOp = PassThrough; using AElementOp = PassThrough;
using BElementOp = PassThrough; using BElementOp = PassThrough;
using CElementOp = ConvertBF16RTN_; using CElementOp = ck::tensor_operation::element_wise::ConvertBF16RTN;
static constexpr auto GemmDefault = ck::tensor_operation::device::GemmSpecialization::Default; static constexpr auto GemmDefault = ck::tensor_operation::device::GemmSpecialization::Default;
......
...@@ -39,6 +39,12 @@ struct PassThrough ...@@ -39,6 +39,12 @@ struct PassThrough
y = x; y = x;
} }
template <>
__host__ __device__ void operator()<half_t, float>(half_t& y, const float& x) const
{
y = type_convert<half_t>(x);
}
template <> template <>
__host__ __device__ void operator()<bhalf_t, bhalf_t>(bhalf_t& y, const bhalf_t& x) const __host__ __device__ void operator()<bhalf_t, bhalf_t>(bhalf_t& y, const bhalf_t& x) const
{ {
......
...@@ -104,13 +104,13 @@ struct ThreadwiseTensorSliceTransfer_v6r1 ...@@ -104,13 +104,13 @@ struct ThreadwiseTensorSliceTransfer_v6r1
// apply pointwise operation // apply pointwise operation
static_for<0, ScalarPerVector, 1>{}([&](auto i) { static_for<0, ScalarPerVector, 1>{}([&](auto i) {
SrcData v; DstData v;
// apply element-wise operation // apply element-wise operation
element_op_(v, src_vector_container.template AsType<SrcData>()[i]); element_op_(v, src_vector_container.template AsType<SrcData>()[i]);
// apply type convert // apply type convert
dst_vector_container.template AsType<DstData>()(i) = type_convert<DstData>(v); dst_vector_container.template AsType<DstData>()(i) = v;
}); });
const bool is_dst_valid = const bool is_dst_valid =
......
...@@ -92,11 +92,11 @@ struct ReferenceGemm : public device::BaseOperator ...@@ -92,11 +92,11 @@ struct ReferenceGemm : public device::BaseOperator
ck::type_convert<AccDataType>(v_a) * ck::type_convert<AccDataType>(v_b); ck::type_convert<AccDataType>(v_a) * ck::type_convert<AccDataType>(v_b);
} }
AccDataType v_c; CDataType v_c;
arg.c_element_op_(v_c, v_acc); arg.c_element_op_(v_c, v_acc);
arg.c_m_n_(m, n) = ck::type_convert<CDataType>(v_c); arg.c_m_n_(m, n) = v_c;
}; };
make_ParallelTensorFunctor( make_ParallelTensorFunctor(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment