Modified from NVIDIA FasterTransformer: https://github.com/NVIDIA/FasterTransformer/blob/main/src/fastertransformer/cutlass_extensions/include/cutlass_extensions/interleaved_numeric_conversion.h
* Modified from NVIDIA [TRT-LLM](https://github.com/NVIDIA/TensorRT-LLM/tree/d37b507f41a87457fe9f10f7459d08f5db235745/cpp/tensorrt_llm/kernels/weightOnlyBatchedGemv)
@@ -53,17 +53,16 @@ inline auto dispatch(Tensor::ScalarType scalarType, F &&func) {
...
@@ -53,17 +53,16 @@ inline auto dispatch(Tensor::ScalarType scalarType, F &&func) {
}
}
#pragma nv_diagnostic push
#pragma nv_diagnostic push
// warning #445-D: template parameter "scalar_t" is not used in declaring the parameter types of function template "lambda []()->auto::operator auto (*)()"
// warning #445-D: template parameter "scalar_t" is not used in declaring the parameter types of function template