"src/targets/gpu/device/sqdiff.cpp" did not exist on "dc0c481026f4e6b14e0f3d40116a592dcc4503cd"
Commit 6c33a53f authored by Paul's avatar Paul
Browse files

Use recip

parent d36f72c5
...@@ -51,7 +51,7 @@ __device__ void generic_binary_layernorm( ...@@ -51,7 +51,7 @@ __device__ void generic_binary_layernorm(
constexpr auto relements = r.template elements<Input1>(); constexpr auto relements = r.template elements<Input1>();
auto means = r.reduce(op::sum{}, make_array<vec_type<value_type>>(0, 0), [&](auto x1, auto x2) { auto means = r.reduce(op::sum{}, make_array<vec_type<value_type>>(0, 0), [&](auto x1, auto x2) {
auto x = op(x1, x2); auto x = op(x1, x2);
return make_array(x, x * x) / vec_type<value_type>{relements}; return make_array(x, x * x) * vec_type<value_type>{1.0 / relements};
})(input1, input2); })(input1, input2);
auto mean_x = means[0]; auto mean_x = means[0];
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment