"docs/vscode:/vscode.git/clone" did not exist on "fa4e0fb028460cf5f4eb9cc90e206d0d6f35b026"
Commit 103f3110 authored by zhuwenwen's avatar zhuwenwen
Browse files

skip fp8

parent f48954a4
......@@ -8,7 +8,6 @@ message(STATUS "Build type: ${CMAKE_BUILD_TYPE}")
message(STATUS "Target device: ${VLLM_TARGET_DEVICE}")
include(${CMAKE_CURRENT_LIST_DIR}/cmake/utils.cmake)
include_directories(/opt/rh/devtoolset-7/root/usr/include/c++/7)
#
# Supported python versions. These versions will be searched in order, the
......
......@@ -157,15 +157,15 @@ TORCH_LIBRARY_EXPAND(TORCH_EXTENSION_NAME, ops) {
ops.impl("squeezellm_gemm", torch::kCUDA, &squeezellm_gemm);
// Compute FP8 quantized tensor for given scaling factor.
ops.def(
"static_scaled_fp8_quant(Tensor! out, Tensor input, Tensor scale) -> ()");
ops.impl("static_scaled_fp8_quant", torch::kCUDA, &static_scaled_fp8_quant);
// ops.def(
// "static_scaled_fp8_quant(Tensor! out, Tensor input, Tensor scale) -> ()");
// ops.impl("static_scaled_fp8_quant", torch::kCUDA, &static_scaled_fp8_quant);
// Compute FP8 quantized tensor and scaling factor.
ops.def(
"dynamic_scaled_fp8_quant(Tensor! out, Tensor input, Tensor! scale) -> "
"()");
ops.impl("dynamic_scaled_fp8_quant", torch::kCUDA, &dynamic_scaled_fp8_quant);
// ops.def(
// "dynamic_scaled_fp8_quant(Tensor! out, Tensor input, Tensor! scale) -> "
// "()");
// ops.impl("dynamic_scaled_fp8_quant", torch::kCUDA, &dynamic_scaled_fp8_quant);
// Aligning the number of tokens to be processed by each expert such
// that it is divisible by the block size.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment