Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
55d63b12
Unverified
Commit
55d63b12
authored
Aug 22, 2024
by
Lucas Wilkinson
Committed by
GitHub
Aug 22, 2024
Browse files
[Bugfix] Don't build machete on cuda <12.0 (#7757)
parent
4f419c00
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
47 additions
and
27 deletions
+47
-27
CMakeLists.txt
CMakeLists.txt
+35
-27
csrc/quantization/machete/machete_pytorch.cu
csrc/quantization/machete/machete_pytorch.cu
+12
-0
No files found.
CMakeLists.txt
View file @
55d63b12
...
...
@@ -10,6 +10,9 @@ message(STATUS "Target device: ${VLLM_TARGET_DEVICE}")
include
(
${
CMAKE_CURRENT_LIST_DIR
}
/cmake/utils.cmake
)
# Suppress potential warnings about unused manually-specified variables
set
(
ignoreMe
"
${
VLLM_PYTHON_PATH
}
"
)
#
# Supported python versions. These versions will be searched in order, the
# first match will be selected. These should be kept in sync with setup.py.
...
...
@@ -228,35 +231,38 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
endif
()
#
# For the Machete kernels we automatically generate sources for various
# preselected input type pairs and schedules.
# Generate sources:
execute_process
(
COMMAND
${
CMAKE_COMMAND
}
-E env
PYTHONPATH=
${
CMAKE_CURRENT_SOURCE_DIR
}
/csrc/cutlass_extensions/:
${
CUTLASS_DIR
}
/python/:
${
VLLM_PYTHON_PATH
}
:$PYTHONPATH
${
Python_EXECUTABLE
}
${
CMAKE_CURRENT_SOURCE_DIR
}
/csrc/quantization/machete/generate.py
RESULT_VARIABLE machete_generation_result
OUTPUT_VARIABLE machete_generation_output
OUTPUT_FILE
${
CMAKE_CURRENT_BINARY_DIR
}
/machete_generation.log
ERROR_FILE
${
CMAKE_CURRENT_BINARY_DIR
}
/machete_generation.log
)
# Machete kernels
if
(
NOT machete_generation_result EQUAL 0
)
message
(
FATAL_ERROR
"Machete generation failed."
" Result:
\"
${
machete_generation_result
}
\"
"
"
\n
Check the log for details: "
"
${
CMAKE_CURRENT_BINARY_DIR
}
/machete_generation.log"
)
else
()
message
(
STATUS
"Machete generation completed successfully."
)
endif
()
# The machete kernels only work on hopper and require CUDA 12.0 or later.
if
(
${
CMAKE_CUDA_COMPILER_VERSION
}
VERSION_GREATER 12.0
)
#
# For the Machete kernels we automatically generate sources for various
# preselected input type pairs and schedules.
# Generate sources:
execute_process
(
COMMAND
${
CMAKE_COMMAND
}
-E env
PYTHONPATH=
${
CMAKE_CURRENT_SOURCE_DIR
}
/csrc/cutlass_extensions/:
${
CUTLASS_DIR
}
/python/:
${
VLLM_PYTHON_PATH
}
:$PYTHONPATH
${
Python_EXECUTABLE
}
${
CMAKE_CURRENT_SOURCE_DIR
}
/csrc/quantization/machete/generate.py
RESULT_VARIABLE machete_generation_result
OUTPUT_VARIABLE machete_generation_output
OUTPUT_FILE
${
CMAKE_CURRENT_BINARY_DIR
}
/machete_generation.log
ERROR_FILE
${
CMAKE_CURRENT_BINARY_DIR
}
/machete_generation.log
)
if
(
NOT machete_generation_result EQUAL 0
)
message
(
FATAL_ERROR
"Machete generation failed."
" Result:
\"
${
machete_generation_result
}
\"
"
"
\n
Check the log for details: "
"
${
CMAKE_CURRENT_BINARY_DIR
}
/machete_generation.log"
)
else
()
message
(
STATUS
"Machete generation completed successfully."
)
endif
()
# Add machete generated sources
file
(
GLOB MACHETE_GEN_SOURCES
"csrc/quantization/machete/generated/*.cu"
)
list
(
APPEND VLLM_EXT_SRC
${
MACHETE_GEN_SOURCES
}
)
message
(
STATUS
"Machete generated sources:
${
MACHETE_GEN_SOURCES
}
"
)
# Add machete generated sources
file
(
GLOB MACHETE_GEN_SOURCES
"csrc/quantization/machete/generated/*.cu"
)
list
(
APPEND VLLM_EXT_SRC
${
MACHETE_GEN_SOURCES
}
)
message
(
STATUS
"Machete generated sources:
${
MACHETE_GEN_SOURCES
}
"
)
# See comment above for scaled_mm_c3x (same if condition)
if
(
${
CMAKE_CUDA_COMPILER_VERSION
}
VERSION_GREATER 12.0
)
set_source_files_properties
(
${
MACHETE_GEN_SOURCES
}
PROPERTIES
...
...
@@ -264,7 +270,9 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
"-gencode arch=compute_90a,code=sm_90a"
)
endif
()
# Add pytorch binding
# Add pytorch binding for machete (add on even CUDA < 12.0 so that we can
# raise an error if the user that this was built with an incompatible
# CUDA version)
list
(
APPEND VLLM_EXT_SRC
csrc/quantization/machete/machete_pytorch.cu
)
endif
()
...
...
csrc/quantization/machete/machete_pytorch.cu
View file @
55d63b12
...
...
@@ -37,9 +37,13 @@ static auto scalar_type_dispatch(ScalarType const& type, Fn fn) {
//
std
::
vector
<
std
::
string
>
supported_schedules
(
ScalarTypeTorchPtr
const
&
btype
)
{
#if defined(__CUDACC_VER_MAJOR__) && __CUDACC_VER_MAJOR__ >= 12
return
scalar_type_dispatch
(
*
btype
,
[
&
](
auto
BType
)
{
return
GemmDispatcher
<
half_t
,
decltype
(
BType
)
>::
supported_schedules
();
});
#else
TORCH_CHECK
(
false
,
"Machete requires CUDA 12.0 or later"
);
#endif
}
torch
::
Tensor
gemm
(
torch
::
Tensor
const
&
A
,
torch
::
Tensor
const
&
B
,
...
...
@@ -50,6 +54,7 @@ torch::Tensor gemm(torch::Tensor const& A, torch::Tensor const& B,
c10
::
optional
<
torch
::
Tensor
>
const
&
C
,
c10
::
optional
<
double
>
alpha
,
c10
::
optional
<
double
>
beta
,
c10
::
optional
<
std
::
string
>
schedule
)
{
#if defined(__CUDACC_VER_MAJOR__) && __CUDACC_VER_MAJOR__ >= 12
auto
args
=
PyTorchArguments
{.
A
=
A
,
.
B
=
B
,
.
scales
=
scales
,
...
...
@@ -67,13 +72,20 @@ torch::Tensor gemm(torch::Tensor const& A, torch::Tensor const& B,
return
GemmDispatcher
<
ComputeType
,
decltype
(
BType
)
>::
dispatch
(
args
);
});
});
#else
TORCH_CHECK
(
false
,
"Machete requires CUDA 12.0 or later"
);
#endif
}
torch
::
Tensor
prepack_B
(
torch
::
Tensor
const
&
B
,
ScalarTypeTorchPtr
const
&
btype
)
{
#if defined(__CUDACC_VER_MAJOR__) && __CUDACC_VER_MAJOR__ >= 12
return
scalar_type_dispatch
(
*
btype
,
[
&
](
auto
BType
)
{
return
PrepackBDispatcher
<
half_t
,
decltype
(
BType
),
half_t
>::
dispatch
(
B
);
});
#else
TORCH_CHECK
(
false
,
"Machete requires CUDA 12.0 or later"
);
#endif
}
};
// namespace machete
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment