Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
103f3110
Commit
103f3110
authored
Jun 12, 2024
by
zhuwenwen
Browse files
skip fp8
parent
f48954a4
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
7 additions
and
8 deletions
+7
-8
CMakeLists.txt
CMakeLists.txt
+0
-1
csrc/torch_bindings.cpp
csrc/torch_bindings.cpp
+7
-7
No files found.
CMakeLists.txt
View file @
103f3110
...
@@ -8,7 +8,6 @@ message(STATUS "Build type: ${CMAKE_BUILD_TYPE}")
...
@@ -8,7 +8,6 @@ message(STATUS "Build type: ${CMAKE_BUILD_TYPE}")
message
(
STATUS
"Target device:
${
VLLM_TARGET_DEVICE
}
"
)
message
(
STATUS
"Target device:
${
VLLM_TARGET_DEVICE
}
"
)
include
(
${
CMAKE_CURRENT_LIST_DIR
}
/cmake/utils.cmake
)
include
(
${
CMAKE_CURRENT_LIST_DIR
}
/cmake/utils.cmake
)
include_directories
(
/opt/rh/devtoolset-7/root/usr/include/c++/7
)
#
#
# Supported python versions. These versions will be searched in order, the
# Supported python versions. These versions will be searched in order, the
...
...
csrc/torch_bindings.cpp
View file @
103f3110
...
@@ -157,15 +157,15 @@ TORCH_LIBRARY_EXPAND(TORCH_EXTENSION_NAME, ops) {
...
@@ -157,15 +157,15 @@ TORCH_LIBRARY_EXPAND(TORCH_EXTENSION_NAME, ops) {
ops
.
impl
(
"squeezellm_gemm"
,
torch
::
kCUDA
,
&
squeezellm_gemm
);
ops
.
impl
(
"squeezellm_gemm"
,
torch
::
kCUDA
,
&
squeezellm_gemm
);
// Compute FP8 quantized tensor for given scaling factor.
// Compute FP8 quantized tensor for given scaling factor.
ops
.
def
(
//
ops.def(
"static_scaled_fp8_quant(Tensor! out, Tensor input, Tensor scale) -> ()"
);
//
"static_scaled_fp8_quant(Tensor! out, Tensor input, Tensor scale) -> ()");
ops
.
impl
(
"static_scaled_fp8_quant"
,
torch
::
kCUDA
,
&
static_scaled_fp8_quant
);
//
ops.impl("static_scaled_fp8_quant", torch::kCUDA, &static_scaled_fp8_quant);
// Compute FP8 quantized tensor and scaling factor.
// Compute FP8 quantized tensor and scaling factor.
ops
.
def
(
//
ops.def(
"dynamic_scaled_fp8_quant(Tensor! out, Tensor input, Tensor! scale) -> "
//
"dynamic_scaled_fp8_quant(Tensor! out, Tensor input, Tensor! scale) -> "
"()"
);
//
"()");
ops
.
impl
(
"dynamic_scaled_fp8_quant"
,
torch
::
kCUDA
,
&
dynamic_scaled_fp8_quant
);
//
ops.impl("dynamic_scaled_fp8_quant", torch::kCUDA, &dynamic_scaled_fp8_quant);
// Aligning the number of tokens to be processed by each expert such
// Aligning the number of tokens to be processed by each expert such
// that it is divisible by the block size.
// that it is divisible by the block size.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment