Commit cffbd40a authored by Khalique Ahmed's avatar Khalique Ahmed
Browse files

testing diff eps value

parent 66d50268
......@@ -51,7 +51,8 @@ struct layernorm_matcher
return f("div")(arg(0)(x_minus_mean()),
arg(1)(skip_broadcasts(f("sqrt")(
arg(0)(f("add")(either_arg(0, 1)(variance(), has_value(1e-12f))))))));
arg(0)(f("add")(either_arg(0, 1)(variance(), is_constant()))))))); // 71.7596/sec
// arg(0)(f("add")(either_arg(0, 1)(variance(), has_value(1e-12f)))))))); // 70.8157/sec
}
auto matcher() const { return layernorm_onnx(); }
......
......@@ -61,8 +61,8 @@ __device__ void generic_binary_layernorm(
r.inner([&](auto& y, auto x1, auto x2, auto... xs) {
auto x = op(x1, x2);
auto m = x - mean_x;
// m * rsqrt(mean(m ^ 2) + 1e-12)
y = compute(m * rsqrt(mean_x2 - mean_x + value_type{1e-12}), xs...);
// m * rsqrt(mean(m ^ 2) + 1.00136e-05)
y = compute(m * rsqrt(mean_x2 - mean_x + value_type{1.00136e-05}), xs...);
})(output, input1, input2, inputs...);
});
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment