Commit b7c23756 authored by Oleg Goncharov's avatar Oleg Goncharov Committed by wenjh
Browse files

[Common] Disabled the tuned NVFP4 kernels (#2615)



* Disabled the tuned NVFP4 kernels
Signed-off-by: default avatarOleg Goncharov <ogoncharov@nvidia.com>

* Disabled fast math in cpp tests
Signed-off-by: default avatarOleg Goncharov <ogoncharov@nvidia.com>

---------
Signed-off-by: default avatarOleg Goncharov <ogoncharov@nvidia.com>
parent 1fabbb27
...@@ -677,11 +677,6 @@ std::vector<ActivationType> Activation_types = { ...@@ -677,11 +677,6 @@ std::vector<ActivationType> Activation_types = {
ActivationType::Identity ActivationType::Identity
}; };
std::vector<bool> use_fast_nvfp4_scaling_vec = {
false,
true
};
} // namespace } // namespace
class FusedCastTransposeNVFP4TestSuite : public ::testing::TestWithParam class FusedCastTransposeNVFP4TestSuite : public ::testing::TestWithParam
...@@ -743,7 +738,7 @@ INSTANTIATE_TEST_SUITE_P( ...@@ -743,7 +738,7 @@ INSTANTIATE_TEST_SUITE_P(
::testing::ValuesIn(Activation_types), ::testing::ValuesIn(Activation_types),
::testing::ValuesIn(tensor_dims), ::testing::ValuesIn(tensor_dims),
::testing::Values(DType::kBFloat16), ::testing::Values(DType::kBFloat16),
::testing::ValuesIn(use_fast_nvfp4_scaling_vec)), ::testing::Values(false)),
[](const testing::TestParamInfo<FusedCastTransposeNVFP4TestSuite::ParamType>& info) { [](const testing::TestParamInfo<FusedCastTransposeNVFP4TestSuite::ParamType>& info) {
std::string name = to_string(std::get<0>(info.param)); std::string name = to_string(std::get<0>(info.param));
const auto& shape = std::get<1>(info.param); const auto& shape = std::get<1>(info.param);
......
...@@ -1172,10 +1172,10 @@ void quantize_transpose(const Tensor &input, const Tensor *noop, Tensor *output, ...@@ -1172,10 +1172,10 @@ void quantize_transpose(const Tensor &input, const Tensor *noop, Tensor *output,
// TODO(Frank): Is there a better way to do this? // TODO(Frank): Is there a better way to do this?
bool return_transpose = output->has_columnwise_data(); bool return_transpose = output->has_columnwise_data();
if (!use_2d_quantization && (input.dtype() == DType::kBFloat16)) { // if (!use_2d_quantization && (input.dtype() == DType::kBFloat16)) {
quantize_transpose_tuned_1D(input, noop, output, quant_config, stream); // quantize_transpose_tuned_1D(input, noop, output, quant_config, stream);
return; // return;
} // }
constexpr bool COMPUTE_ACTIVATIONS = false; constexpr bool COMPUTE_ACTIVATIONS = false;
using ParamOP = Empty; using ParamOP = Empty;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment