Unverified Commit a0a89a8e authored by Oleg Goncharov's avatar Oleg Goncharov Committed by GitHub
Browse files

[Common] Disabled the tuned NVFP4 kernels (#2615)



* Disabled the tuned NVFP4 kernels
Signed-off-by: default avatarOleg Goncharov <ogoncharov@nvidia.com>

* Disabled fast math in cpp tests
Signed-off-by: default avatarOleg Goncharov <ogoncharov@nvidia.com>

---------
Signed-off-by: default avatarOleg Goncharov <ogoncharov@nvidia.com>
parent 52ee5ea0
......@@ -677,11 +677,6 @@ std::vector<ActivationType> Activation_types = {
ActivationType::Identity
};
std::vector<bool> use_fast_nvfp4_scaling_vec = {
false,
true
};
} // namespace
class FusedCastTransposeNVFP4TestSuite : public ::testing::TestWithParam
......@@ -743,7 +738,7 @@ INSTANTIATE_TEST_SUITE_P(
::testing::ValuesIn(Activation_types),
::testing::ValuesIn(tensor_dims),
::testing::Values(DType::kBFloat16),
::testing::ValuesIn(use_fast_nvfp4_scaling_vec)),
::testing::Values(false)),
[](const testing::TestParamInfo<FusedCastTransposeNVFP4TestSuite::ParamType>& info) {
std::string name = to_string(std::get<0>(info.param));
const auto& shape = std::get<1>(info.param);
......
......@@ -1168,10 +1168,10 @@ void quantize_transpose(const Tensor &input, const Tensor *noop, Tensor *output,
// TODO(Frank): Is there a better way to do this?
bool return_transpose = output->has_columnwise_data();
if (!use_2d_quantization && (input.dtype() == DType::kBFloat16)) {
quantize_transpose_tuned_1D(input, noop, output, quant_config, stream);
return;
}
// if (!use_2d_quantization && (input.dtype() == DType::kBFloat16)) {
// quantize_transpose_tuned_1D(input, noop, output, quant_config, stream);
// return;
// }
constexpr bool COMPUTE_ACTIVATIONS = false;
using ParamOP = Empty;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment