skip fp8

103f3110 · zhuwenwen · f48954a4 · 103f3110 · 103f3110
Commit 103f3110 authored Jun 12, 2024 by zhuwenwen
Show whitespace changes
Inline Side-by-side

Showing with 7 additions and 8 deletions

CMakeLists.txt CMakeLists.txt +0 -1

csrc/torch_bindings.cpp csrc/torch_bindings.cpp +7 -7

No files found.
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -8,7 +8,6 @@ message(STATUS "Build type: ${CMAKE_BUILD_TYPE}")
 message(STATUS "Target device: ${VLLM_TARGET_DEVICE}")

 include(${CMAKE_CURRENT_LIST_DIR}/cmake/utils.cmake)
-include_directories(/opt/rh/devtoolset-7/root/usr/include/c++/7)

 #
 # Supported python versions.  These versions will be searched in order, the

--- a/csrc/torch_bindings.cpp
+++ b/csrc/torch_bindings.cpp
@@ -157,15 +157,15 @@ TORCH_LIBRARY_EXPAND(TORCH_EXTENSION_NAME, ops) {
  ops.impl("squeezellm_gemm", torch::kCUDA, &squeezellm_gemm);

  // Compute FP8 quantized tensor for given scaling factor.
-  ops.def(
-      "static_scaled_fp8_quant(Tensor! out, Tensor input, Tensor scale) -> ()");
-  ops.impl("static_scaled_fp8_quant", torch::kCUDA, &static_scaled_fp8_quant);
+//   ops.def(
+//       "static_scaled_fp8_quant(Tensor! out, Tensor input, Tensor scale) -> ()");
+//   ops.impl("static_scaled_fp8_quant", torch::kCUDA, &static_scaled_fp8_quant);

  // Compute FP8 quantized tensor and scaling factor.
-  ops.def(
-      "dynamic_scaled_fp8_quant(Tensor! out, Tensor input, Tensor! scale) -> "
-      "()");
-  ops.impl("dynamic_scaled_fp8_quant", torch::kCUDA, &dynamic_scaled_fp8_quant);
+//   ops.def(
+//       "dynamic_scaled_fp8_quant(Tensor! out, Tensor input, Tensor! scale) -> "
+//       "()");
+//   ops.impl("dynamic_scaled_fp8_quant", torch::kCUDA, &dynamic_scaled_fp8_quant);

  // Aligning the number of tokens to be processed by each expert such
  // that it is divisible by the block size.