Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
9fdf3de3
Unverified
Commit
9fdf3de3
authored
Mar 18, 2024
by
bnellnm
Committed by
GitHub
Mar 18, 2024
Browse files
Cmake based build system (#2830)
parent
c0c17d48
Changes
10
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
868 additions
and
302 deletions
+868
-302
CMakeLists.txt
CMakeLists.txt
+279
-0
Dockerfile
Dockerfile
+2
-0
MANIFEST.in
MANIFEST.in
+2
-0
cmake/hipify.py
cmake/hipify.py
+73
-0
cmake/utils.cmake
cmake/utils.cmake
+334
-0
pyproject.toml
pyproject.toml
+1
-0
requirements-build.txt
requirements-build.txt
+2
-1
requirements-rocm.txt
requirements-rocm.txt
+1
-0
requirements.txt
requirements.txt
+1
-0
setup.py
setup.py
+173
-301
No files found.
CMakeLists.txt
0 → 100644
View file @
9fdf3de3
cmake_minimum_required
(
VERSION 3.21
)
project
(
vllm_extensions LANGUAGES CXX
)
message
(
STATUS
"Build type:
${
CMAKE_BUILD_TYPE
}
"
)
include
(
${
CMAKE_CURRENT_LIST_DIR
}
/cmake/utils.cmake
)
#
# Supported python versions. These versions will be searched in order, the
# first match will be selected. These should be kept in sync with setup.py.
#
set
(
PYTHON_SUPPORTED_VERSIONS
"3.8"
"3.9"
"3.10"
"3.11"
)
# Supported NVIDIA architectures.
set
(
CUDA_SUPPORTED_ARCHS
"7.0;7.5;8.0;8.6;8.9;9.0"
)
# Supported AMD GPU architectures.
set
(
HIP_SUPPORTED_ARCHS
"gfx908;gfx90a;gfx942;gfx1100"
)
#
# Supported/expected torch versions for CUDA/ROCm.
#
# Currently, having an incorrect pytorch version results in a warning
# rather than an error.
#
# Note: the CUDA torch version is derived from pyproject.toml and various
# requirements.txt files and should be kept consistent. The ROCm torch
# versions are derived from Dockerfile.rocm
#
set
(
TORCH_SUPPORTED_VERSION_CUDA
"2.1.2"
)
set
(
TORCH_SUPPORTED_VERSION_ROCM_5X
"2.0.1"
)
set
(
TORCH_SUPPORTED_VERSION_ROCM_6X
"2.1.1"
)
#
# Try to find python package with an executable that exactly matches
# `VLLM_PYTHON_EXECUTABLE` and is one of the supported versions.
#
if
(
VLLM_PYTHON_EXECUTABLE
)
find_python_from_executable
(
${
VLLM_PYTHON_EXECUTABLE
}
"
${
PYTHON_SUPPORTED_VERSIONS
}
"
)
else
()
message
(
FATAL_ERROR
"Please set VLLM_PYTHON_EXECUTABLE to the path of the desired python version"
" before running cmake configure."
)
endif
()
#
# Update cmake's `CMAKE_PREFIX_PATH` with torch location.
#
append_cmake_prefix_path
(
"torch"
"torch.utils.cmake_prefix_path"
)
#
# Import torch cmake configuration.
# Torch also imports CUDA (and partially HIP) languages with some customizations,
# so there is no need to do this explicitly with check_language/enable_language,
# etc.
#
find_package
(
Torch REQUIRED
)
#
# Normally `torch.utils.cpp_extension.CUDAExtension` would add
# `libtorch_python.so` for linking against an extension. Torch's cmake
# configuration does not include this library (presumably since the cmake
# config is used for standalone C++ binaries that link against torch).
# The `libtorch_python.so` library defines some of the glue code between
# torch/python via pybind and is required by VLLM extensions for this
# reason. So, add it by manually using `append_torchlib_if_found` from
# torch's cmake setup.
#
append_torchlib_if_found
(
torch_python
)
#
# Set up GPU language and check the torch version and warn if it isn't
# what is expected.
#
if
(
NOT HIP_FOUND AND CUDA_FOUND
)
set
(
VLLM_GPU_LANG
"CUDA"
)
if
(
NOT Torch_VERSION VERSION_EQUAL
${
TORCH_SUPPORTED_VERSION_CUDA
}
)
message
(
WARNING
"Pytorch version
${
TORCH_SUPPORTED_VERSION_CUDA
}
"
"expected for CUDA build, saw
${
Torch_VERSION
}
instead."
)
endif
()
elseif
(
HIP_FOUND
)
set
(
VLLM_GPU_LANG
"HIP"
)
# Importing torch recognizes and sets up some HIP/ROCm configuration but does
# not let cmake recognize .hip files. In order to get cmake to understand the
# .hip extension automatically, HIP must be enabled explicitly.
enable_language
(
HIP
)
# ROCm 5.x
if
(
ROCM_VERSION_DEV_MAJOR EQUAL 5 AND
NOT Torch_VERSION VERSION_EQUAL
${
TORCH_SUPPORTED_VERSION_ROCM_5X
}
)
message
(
WARNING
"Pytorch version
${
TORCH_SUPPORTED_VERSION_ROCM_5X
}
"
"expected for ROCMm 5.x build, saw
${
Torch_VERSION
}
instead."
)
endif
()
# ROCm 6.x
if
(
ROCM_VERSION_DEV_MAJOR EQUAL 6 AND
NOT Torch_VERSION VERSION_EQUAL
${
TORCH_SUPPORTED_VERSION_ROCM_6X
}
)
message
(
WARNING
"Pytorch version
${
TORCH_SUPPORTED_VERSION_ROCM_6X
}
"
"expected for ROCMm 6.x build, saw
${
Torch_VERSION
}
instead."
)
endif
()
else
()
message
(
FATAL_ERROR
"Can't find CUDA or HIP installation."
)
endif
()
#
# Override the GPU architectures detected by cmake/torch and filter them by
# the supported versions for the current language.
# The final set of arches is stored in `VLLM_GPU_ARCHES`.
#
override_gpu_arches
(
VLLM_GPU_ARCHES
${
VLLM_GPU_LANG
}
"
${${
VLLM_GPU_LANG
}
_SUPPORTED_ARCHS
}
"
)
#
# Query torch for additional GPU compilation flags for the given
# `VLLM_GPU_LANG`.
# The final set of arches is stored in `VLLM_GPU_FLAGS`.
#
get_torch_gpu_compiler_flags
(
VLLM_GPU_FLAGS
${
VLLM_GPU_LANG
}
)
#
# Set nvcc parallelism.
#
if
(
NVCC_THREADS AND VLLM_GPU_LANG STREQUAL
"CUDA"
)
list
(
APPEND VLLM_GPU_FLAGS
"--threads=
${
NVCC_THREADS
}
"
)
endif
()
#
# Define extension targets
#
#
# _C extension
#
set
(
VLLM_EXT_SRC
"csrc/cache_kernels.cu"
"csrc/attention/attention_kernels.cu"
"csrc/pos_encoding_kernels.cu"
"csrc/activation_kernels.cu"
"csrc/layernorm_kernels.cu"
"csrc/quantization/squeezellm/quant_cuda_kernel.cu"
"csrc/quantization/gptq/q_gemm.cu"
"csrc/cuda_utils_kernels.cu"
"csrc/moe_align_block_size_kernels.cu"
"csrc/pybind.cpp"
)
if
(
VLLM_GPU_LANG STREQUAL
"CUDA"
)
list
(
APPEND VLLM_EXT_SRC
"csrc/quantization/awq/gemm_kernels.cu"
"csrc/quantization/marlin/marlin_cuda_kernel.cu"
"csrc/custom_all_reduce.cu"
)
endif
()
define_gpu_extension_target
(
_C
DESTINATION vllm
LANGUAGE
${
VLLM_GPU_LANG
}
SOURCES
${
VLLM_EXT_SRC
}
COMPILE_FLAGS
${
VLLM_GPU_FLAGS
}
ARCHITECTURES
${
VLLM_GPU_ARCHES
}
WITH_SOABI
)
#
# _moe_C extension
#
set
(
VLLM_MOE_EXT_SRC
"csrc/moe/moe_ops.cpp"
"csrc/moe/topk_softmax_kernels.cu"
)
define_gpu_extension_target
(
_moe_C
DESTINATION vllm
LANGUAGE
${
VLLM_GPU_LANG
}
SOURCES
${
VLLM_MOE_EXT_SRC
}
COMPILE_FLAGS
${
VLLM_GPU_FLAGS
}
ARCHITECTURES
${
VLLM_GPU_ARCHES
}
WITH_SOABI
)
#
# _punica_C extension
#
set
(
VLLM_PUNICA_EXT_SRC
"csrc/punica/bgmv/bgmv_bf16_bf16_bf16.cu"
"csrc/punica/bgmv/bgmv_bf16_bf16_fp16.cu"
"csrc/punica/bgmv/bgmv_bf16_fp16_bf16.cu"
"csrc/punica/bgmv/bgmv_bf16_fp16_fp16.cu"
"csrc/punica/bgmv/bgmv_bf16_fp32_bf16.cu"
"csrc/punica/bgmv/bgmv_bf16_fp32_fp16.cu"
"csrc/punica/bgmv/bgmv_fp16_bf16_bf16.cu"
"csrc/punica/bgmv/bgmv_fp16_bf16_fp16.cu"
"csrc/punica/bgmv/bgmv_fp16_fp16_bf16.cu"
"csrc/punica/bgmv/bgmv_fp16_fp16_fp16.cu"
"csrc/punica/bgmv/bgmv_fp16_fp32_bf16.cu"
"csrc/punica/bgmv/bgmv_fp16_fp32_fp16.cu"
"csrc/punica/bgmv/bgmv_fp32_bf16_bf16.cu"
"csrc/punica/bgmv/bgmv_fp32_bf16_fp16.cu"
"csrc/punica/bgmv/bgmv_fp32_fp16_bf16.cu"
"csrc/punica/bgmv/bgmv_fp32_fp16_fp16.cu"
"csrc/punica/bgmv/bgmv_fp32_fp32_bf16.cu"
"csrc/punica/bgmv/bgmv_fp32_fp32_fp16.cu"
"csrc/punica/punica_ops.cc"
)
#
# Copy GPU compilation flags+update for punica
#
set
(
VLLM_PUNICA_GPU_FLAGS
${
VLLM_GPU_FLAGS
}
)
list
(
REMOVE_ITEM VLLM_PUNICA_GPU_FLAGS
"-D__CUDA_NO_HALF_OPERATORS__"
"-D__CUDA_NO_HALF_CONVERSIONS__"
"-D__CUDA_NO_BFLOAT16_CONVERSIONS__"
"-D__CUDA_NO_HALF2_OPERATORS__"
)
#
# Filter out CUDA architectures < 8.0 for punica.
#
if
(
${
VLLM_GPU_LANG
}
STREQUAL
"CUDA"
)
set
(
VLLM_PUNICA_GPU_ARCHES
)
foreach
(
ARCH
${
VLLM_GPU_ARCHES
}
)
string_to_ver
(
CODE_VER
${
ARCH
}
)
if
(
CODE_VER GREATER_EQUAL 8.0
)
list
(
APPEND VLLM_PUNICA_GPU_ARCHES
${
ARCH
}
)
endif
()
endforeach
()
message
(
STATUS
"Punica target arches:
${
VLLM_PUNICA_GPU_ARCHES
}
"
)
endif
()
if
(
VLLM_PUNICA_GPU_ARCHES
)
define_gpu_extension_target
(
_punica_C
DESTINATION vllm
LANGUAGE
${
VLLM_GPU_LANG
}
SOURCES
${
VLLM_PUNICA_EXT_SRC
}
COMPILE_FLAGS
${
VLLM_PUNICA_GPU_FLAGS
}
ARCHITECTURES
${
VLLM_PUNICA_GPU_ARCHES
}
WITH_SOABI
)
else
()
message
(
WARNING
"Unable to create _punica_C target because none of the "
"requested architectures (
${
VLLM_GPU_ARCHES
}
) are supported, i.e. >= 8.0"
)
endif
()
#
# Add the `default` target which detects which extensions should be
# built based on platform/architecture. This is the same logic that
# setup.py uses to select which extensions should be built and should
# be kept in sync.
#
# The `default` target makes direct use of cmake easier since knowledge
# of which extensions are supported has been factored in, e.g.
#
# mkdir build && cd build
# cmake -G Ninja -DVLLM_PYTHON_EXECUTABLE=`which python3` -DCMAKE_LIBRARY_OUTPUT_DIRECTORY=../vllm ..
# cmake --build . --target default
#
add_custom_target
(
default
)
if
(
VLLM_GPU_LANG STREQUAL
"CUDA"
OR VLLM_GPU_LANG STREQUAL
"HIP"
)
message
(
STATUS
"Enabling C extension."
)
add_dependencies
(
default _C
)
endif
()
if
(
VLLM_GPU_LANG STREQUAL
"CUDA"
)
message
(
STATUS
"Enabling moe extension."
)
add_dependencies
(
default _moe_C
)
# Enable punica if -DVLLM_INSTALL_PUNICA_KERNELS=ON or
# VLLM_INSTALL_PUNICA_KERNELS is set in the environment and
# there are supported target arches.
if
(
VLLM_PUNICA_GPU_ARCHES AND
(
ENV{VLLM_INSTALL_PUNICA_KERNELS} OR VLLM_INSTALL_PUNICA_KERNELS
))
message
(
STATUS
"Enabling punica extension."
)
add_dependencies
(
default _punica_C
)
endif
()
endif
()
Dockerfile
View file @
9fdf3de3
...
...
@@ -38,6 +38,8 @@ RUN --mount=type=cache,target=/root/.cache/pip \
# copy input files
COPY
csrc csrc
COPY
setup.py setup.py
COPY
cmake cmake
COPY
CMakeLists.txt CMakeLists.txt
COPY
requirements.txt requirements.txt
COPY
pyproject.toml pyproject.toml
COPY
vllm/__init__.py vllm/__init__.py
...
...
MANIFEST.in
View file @
9fdf3de3
include LICENSE
include requirements.txt
include CMakeLists.txt
recursive-include cmake *
recursive-include csrc *
cmake/hipify.py
0 → 100755
View file @
9fdf3de3
#!/usr/bin/env python3
#
# A command line tool for running pytorch's hipify preprocessor on CUDA
# source files.
#
# See https://github.com/ROCm/hipify_torch
# and <torch install dir>/utils/hipify/hipify_python.py
#
import
argparse
import
shutil
import
os
from
torch.utils.hipify.hipify_python
import
hipify
if
__name__
==
'__main__'
:
parser
=
argparse
.
ArgumentParser
()
# Project directory where all the source + include files live.
parser
.
add_argument
(
"-p"
,
"--project_dir"
,
help
=
"The project directory."
,
)
# Directory where hipified files are written.
parser
.
add_argument
(
"-o"
,
"--output_dir"
,
help
=
"The output directory."
,
)
# Source files to convert.
parser
.
add_argument
(
"sources"
,
help
=
"Source files to hipify."
,
nargs
=
"*"
,
default
=
[])
args
=
parser
.
parse_args
()
# Limit include scope to project_dir only
includes
=
[
os
.
path
.
join
(
args
.
project_dir
,
'*'
)]
# Get absolute path for all source files.
extra_files
=
[
os
.
path
.
abspath
(
s
)
for
s
in
args
.
sources
]
# Copy sources from project directory to output directory.
# The directory might already exist to hold object files so we ignore that.
shutil
.
copytree
(
args
.
project_dir
,
args
.
output_dir
,
dirs_exist_ok
=
True
)
hipify_result
=
hipify
(
project_directory
=
args
.
project_dir
,
output_directory
=
args
.
output_dir
,
header_include_dirs
=
[],
includes
=
includes
,
extra_files
=
extra_files
,
show_detailed
=
True
,
is_pytorch_extension
=
True
,
hipify_extra_files_only
=
True
)
hipified_sources
=
[]
for
source
in
args
.
sources
:
s_abs
=
os
.
path
.
abspath
(
source
)
hipified_s_abs
=
(
hipify_result
[
s_abs
].
hipified_path
if
(
s_abs
in
hipify_result
and
hipify_result
[
s_abs
].
hipified_path
is
not
None
)
else
s_abs
)
hipified_sources
.
append
(
hipified_s_abs
)
assert
(
len
(
hipified_sources
)
==
len
(
args
.
sources
))
# Print hipified source files.
print
(
"
\n
"
.
join
(
hipified_sources
))
cmake/utils.cmake
0 → 100644
View file @
9fdf3de3
#
# Attempt to find the python package that uses the same python executable as
# `EXECUTABLE` and is one of the `SUPPORTED_VERSIONS`.
#
macro
(
find_python_from_executable EXECUTABLE SUPPORTED_VERSIONS
)
file
(
REAL_PATH
${
EXECUTABLE
}
EXECUTABLE
)
set
(
Python_EXECUTABLE
${
EXECUTABLE
}
)
find_package
(
Python COMPONENTS Interpreter Development.Module
)
if
(
NOT Python_FOUND
)
message
(
FATAL_ERROR
"Unable to find python matching:
${
EXECUTABLE
}
."
)
endif
()
set
(
_VER
"
${
Python_VERSION_MAJOR
}
.
${
Python_VERSION_MINOR
}
"
)
set
(
_SUPPORTED_VERSIONS_LIST
${
SUPPORTED_VERSIONS
}
${
ARGN
}
)
if
(
NOT _VER IN_LIST _SUPPORTED_VERSIONS_LIST
)
message
(
FATAL_ERROR
"Python version (
${
_VER
}
) is not one of the supported versions: "
"
${
_SUPPORTED_VERSIONS_LIST
}
."
)
endif
()
message
(
STATUS
"Found python matching:
${
EXECUTABLE
}
."
)
endmacro
()
#
# Run `EXPR` in python. The standard output of python is stored in `OUT` and
# has trailing whitespace stripped. If an error is encountered when running
# python, a fatal message `ERR_MSG` is issued.
#
function
(
run_python OUT EXPR ERR_MSG
)
execute_process
(
COMMAND
"
${
Python_EXECUTABLE
}
"
"-c"
"
${
EXPR
}
"
OUTPUT_VARIABLE PYTHON_OUT
RESULT_VARIABLE PYTHON_ERROR_CODE
ERROR_VARIABLE PYTHON_STDERR
OUTPUT_STRIP_TRAILING_WHITESPACE
)
if
(
NOT PYTHON_ERROR_CODE EQUAL 0
)
message
(
FATAL_ERROR
"
${
ERR_MSG
}
:
${
PYTHON_STDERR
}
"
)
endif
()
set
(
${
OUT
}
${
PYTHON_OUT
}
PARENT_SCOPE
)
endfunction
()
# Run `EXPR` in python after importing `PKG`. Use the result of this to extend
# `CMAKE_PREFIX_PATH` so the torch cmake configuration can be imported.
macro
(
append_cmake_prefix_path PKG EXPR
)
run_python
(
_PREFIX_PATH
"import
${
PKG
}
; print(
${
EXPR
}
)"
"Failed to locate
${
PKG
}
path"
)
list
(
APPEND CMAKE_PREFIX_PATH
${
_PREFIX_PATH
}
)
endmacro
()
#
# Add a target named `hipify${NAME}` that runs the hipify preprocessor on a set
# of CUDA source files. The names of the corresponding "hipified" sources are
# stored in `OUT_SRCS`.
#
function
(
hipify_sources_target OUT_SRCS NAME ORIG_SRCS
)
#
# Split into C++ and non-C++ (i.e. CUDA) sources.
#
set
(
SRCS
${
ORIG_SRCS
}
)
set
(
CXX_SRCS
${
ORIG_SRCS
}
)
list
(
FILTER SRCS EXCLUDE REGEX
"\.(cc)|(cpp)$"
)
list
(
FILTER CXX_SRCS INCLUDE REGEX
"\.(cc)|(cpp)$"
)
#
# Generate ROCm/HIP source file names from CUDA file names.
# Since HIP files are generated code, they will appear in the build area
# `CMAKE_CURRENT_BINARY_DIR` directory rather than the original csrc dir.
#
set
(
HIP_SRCS
)
foreach
(
SRC
${
SRCS
}
)
string
(
REGEX REPLACE
"\.cu$"
"\.hip"
SRC
${
SRC
}
)
string
(
REGEX REPLACE
"cuda"
"hip"
SRC
${
SRC
}
)
list
(
APPEND HIP_SRCS
"
${
CMAKE_CURRENT_BINARY_DIR
}
/
${
SRC
}
"
)
endforeach
()
set
(
CSRC_BUILD_DIR
${
CMAKE_CURRENT_BINARY_DIR
}
/csrc
)
add_custom_target
(
hipify
${
NAME
}
COMMAND
${
CMAKE_SOURCE_DIR
}
/cmake/hipify.py -p
${
CMAKE_SOURCE_DIR
}
/csrc -o
${
CSRC_BUILD_DIR
}
${
SRCS
}
DEPENDS
${
CMAKE_SOURCE_DIR
}
/cmake/hipify.py
${
SRCS
}
BYPRODUCTS
${
HIP_SRCS
}
COMMENT
"Running hipify on
${
NAME
}
extension source files."
)
# Swap out original extension sources with hipified sources.
list
(
APPEND HIP_SRCS
${
CXX_SRCS
}
)
set
(
${
OUT_SRCS
}
${
HIP_SRCS
}
PARENT_SCOPE
)
endfunction
()
#
# Get additional GPU compiler flags from torch.
#
function
(
get_torch_gpu_compiler_flags OUT_GPU_FLAGS GPU_LANG
)
if
(
${
GPU_LANG
}
STREQUAL
"CUDA"
)
#
# Get common NVCC flags from torch.
#
run_python
(
GPU_FLAGS
"from torch.utils.cpp_extension import COMMON_NVCC_FLAGS; print(';'.join(COMMON_NVCC_FLAGS))"
"Failed to determine torch nvcc compiler flags"
)
if
(
CUDA_VERSION VERSION_GREATER_EQUAL 11.8
)
list
(
APPEND GPU_FLAGS
"-DENABLE_FP8_E5M2"
)
endif
()
elseif
(
${
GPU_LANG
}
STREQUAL
"HIP"
)
#
# Get common HIP/HIPCC flags from torch.
#
run_python
(
GPU_FLAGS
"import torch.utils.cpp_extension as t; print(';'.join(t.COMMON_HIP_FLAGS + t.COMMON_HIPCC_FLAGS))"
"Failed to determine torch nvcc compiler flags"
)
list
(
APPEND GPU_FLAGS
"-DUSE_ROCM"
"-U__HIP_NO_HALF_CONVERSIONS__"
"-U__HIP_NO_HALF_OPERATORS__"
"-fno-gpu-rdc"
)
endif
()
set
(
${
OUT_GPU_FLAGS
}
${
GPU_FLAGS
}
PARENT_SCOPE
)
endfunction
()
# Macro for converting a `gencode` version number to a cmake version number.
macro
(
string_to_ver OUT_VER IN_STR
)
string
(
REGEX REPLACE
"
\(
[0-9]+
\)\(
[0-9]
\)
"
"
\\
1.
\\
2"
${
OUT_VER
}
${
IN_STR
}
)
endmacro
()
#
# Override the GPU architectures detected by cmake/torch and filter them by
# `GPU_SUPPORTED_ARCHES`. Sets the final set of architectures in
# `GPU_ARCHES`.
#
# Note: this is defined as a macro since it updates `CMAKE_CUDA_FLAGS`.
#
macro
(
override_gpu_arches GPU_ARCHES GPU_LANG GPU_SUPPORTED_ARCHES
)
set
(
_GPU_SUPPORTED_ARCHES_LIST
${
GPU_SUPPORTED_ARCHES
}
${
ARGN
}
)
message
(
STATUS
"
${
GPU_LANG
}
supported arches:
${
_GPU_SUPPORTED_ARCHES_LIST
}
"
)
if
(
${
GPU_LANG
}
STREQUAL
"HIP"
)
#
# `GPU_ARCHES` controls the `--offload-arch` flags.
# `CMAKE_HIP_ARCHITECTURES` is set up by torch and can be controlled
# via the `PYTORCH_ROCM_ARCH` env variable.
#
#
# Find the intersection of the supported + detected architectures to
# set the module architecture flags.
#
set
(
${
GPU_ARCHES
}
)
foreach
(
_ARCH
${
CMAKE_HIP_ARCHITECTURES
}
)
if
(
_ARCH IN_LIST _GPU_SUPPORTED_ARCHES_LIST
)
list
(
APPEND
${
GPU_ARCHES
}
${
_ARCH
}
)
endif
()
endforeach
()
if
(
NOT
${
GPU_ARCHES
}
)
message
(
FATAL_ERROR
"None of the detected ROCm architectures:
${
CMAKE_HIP_ARCHITECTURES
}
is"
" supported. Supported ROCm architectures are:
${
_GPU_SUPPORTED_ARCHES_LIST
}
."
)
endif
()
elseif
(
${
GPU_LANG
}
STREQUAL
"CUDA"
)
#
# Setup/process CUDA arch flags.
#
# The torch cmake setup hardcodes the detected architecture flags in
# `CMAKE_CUDA_FLAGS`. Since `CMAKE_CUDA_FLAGS` is a "global" variable, it
# can't modified on a per-target basis, e.g. for the `punica` extension.
# So, all the `-gencode` flags need to be extracted and removed from
# `CMAKE_CUDA_FLAGS` for processing so they can be passed by another method.
# Since it's not possible to use `target_compiler_options` for adding target
# specific `-gencode` arguments, the target's `CUDA_ARCHITECTURES` property
# must be used instead. This requires repackaging the architecture flags
# into a format that cmake expects for `CUDA_ARCHITECTURES`.
#
# This is a bit fragile in that it depends on torch using `-gencode` as opposed
# to one of the other nvcc options to specify architectures.
#
# Note: torch uses the `TORCH_CUDA_ARCH_LIST` environment variable to override
# detected architectures.
#
message
(
DEBUG
"initial CMAKE_CUDA_FLAGS:
${
CMAKE_CUDA_FLAGS
}
"
)
# Extract all `-gencode` flags from `CMAKE_CUDA_FLAGS`
string
(
REGEX MATCHALL
"-gencode arch=[^ ]+"
_CUDA_ARCH_FLAGS
${
CMAKE_CUDA_FLAGS
}
)
# Remove all `-gencode` flags from `CMAKE_CUDA_FLAGS` since they will be modified
# and passed back via the `CUDA_ARCHITECTURES` property.
string
(
REGEX REPLACE
"-gencode arch=[^ ]+ *"
""
CMAKE_CUDA_FLAGS
${
CMAKE_CUDA_FLAGS
}
)
# If this error is triggered, it might mean that torch has changed how it sets
# up nvcc architecture code generation flags.
if
(
NOT _CUDA_ARCH_FLAGS
)
message
(
FATAL_ERROR
"Could not find any architecture related code generation flags in "
"CMAKE_CUDA_FLAGS. (
${
CMAKE_CUDA_FLAGS
}
)"
)
endif
()
message
(
DEBUG
"final CMAKE_CUDA_FLAGS:
${
CMAKE_CUDA_FLAGS
}
"
)
message
(
DEBUG
"arch flags:
${
_CUDA_ARCH_FLAGS
}
"
)
# Initialize the architecture lists to empty.
set
(
${
GPU_ARCHES
}
)
# Process each `gencode` flag.
foreach
(
_ARCH
${
_CUDA_ARCH_FLAGS
}
)
# For each flag, extract the version number and whether it refers to PTX
# or native code.
# Note: if a regex matches then `CMAKE_MATCH_1` holds the binding
# for that match.
string
(
REGEX MATCH
"arch=compute_
\(
[0-9]+a?
\)
"
_COMPUTE
${
_ARCH
}
)
if
(
_COMPUTE
)
set
(
_COMPUTE
${
CMAKE_MATCH_1
}
)
endif
()
string
(
REGEX MATCH
"code=sm_
\(
[0-9]+a?
\)
"
_SM
${
_ARCH
}
)
if
(
_SM
)
set
(
_SM
${
CMAKE_MATCH_1
}
)
endif
()
string
(
REGEX MATCH
"code=compute_
\(
[0-9]+a?
\)
"
_CODE
${
_ARCH
}
)
if
(
_CODE
)
set
(
_CODE
${
CMAKE_MATCH_1
}
)
endif
()
# Make sure the virtual architecture can be matched.
if
(
NOT _COMPUTE
)
message
(
FATAL_ERROR
"Could not determine virtual architecture from:
${
_ARCH
}
."
)
endif
()
# One of sm_ or compute_ must exist.
if
((
NOT _SM
)
AND
(
NOT _CODE
))
message
(
FATAL_ERROR
"Could not determine a codegen architecture from:
${
_ARCH
}
."
)
endif
()
if
(
_SM
)
set
(
_VIRT
""
)
set
(
_CODE_ARCH
${
_SM
}
)
else
()
set
(
_VIRT
"-virtual"
)
set
(
_CODE_ARCH
${
_CODE
}
)
endif
()
# Check if the current version is in the supported arch list.
string_to_ver
(
_CODE_VER
${
_CODE_ARCH
}
)
if
(
NOT _CODE_VER IN_LIST _GPU_SUPPORTED_ARCHES_LIST
)
message
(
STATUS
"discarding unsupported CUDA arch
${
_VER
}
."
)
continue
()
endif
()
# Add it to the arch list.
list
(
APPEND
${
GPU_ARCHES
}
"
${
_CODE_ARCH
}${
_VIRT
}
"
)
endforeach
()
endif
()
message
(
STATUS
"
${
GPU_LANG
}
target arches:
${${
GPU_ARCHES
}}
"
)
endmacro
()
#
# Define a target named `GPU_MOD_NAME` for a single extension. The
# arguments are:
#
# DESTINATION <dest> - Module destination directory.
# LANGUAGE <lang> - The GPU language for this module, e.g CUDA, HIP,
# etc.
# SOURCES <sources> - List of source files relative to CMakeLists.txt
# directory.
#
# Optional arguments:
#
# ARCHITECTURES <arches> - A list of target GPU architectures in cmake
# format.
# Refer `CMAKE_CUDA_ARCHITECTURES` documentation
# and `CMAKE_HIP_ARCHITECTURES` for more info.
# ARCHITECTURES will use cmake's defaults if
# not provided.
# COMPILE_FLAGS <flags> - Extra compiler flags passed to NVCC/hip.
# INCLUDE_DIRECTORIES <dirs> - Extra include directories.
# LINK_LIBRARIES <libraries> - Extra link libraries.
# WITH_SOABI - Generate library with python SOABI suffix name.
#
# Note: optimization level/debug info is set via cmake build type.
#
function
(
define_gpu_extension_target GPU_MOD_NAME
)
cmake_parse_arguments
(
PARSE_ARGV 1
GPU
"WITH_SOABI"
"DESTINATION;LANGUAGE"
"SOURCES;ARCHITECTURES;COMPILE_FLAGS;INCLUDE_DIRECTORIES;LIBRARIES"
)
# Add hipify preprocessing step when building with HIP/ROCm.
if
(
GPU_LANGUAGE STREQUAL
"HIP"
)
hipify_sources_target
(
GPU_SOURCES
${
GPU_MOD_NAME
}
"
${
GPU_SOURCES
}
"
)
endif
()
if
(
GPU_WITH_SOABI
)
set
(
GPU_WITH_SOABI WITH_SOABI
)
else
()
set
(
GPU_WITH_SOABI
)
endif
()
Python_add_library
(
${
GPU_MOD_NAME
}
MODULE
"
${
GPU_SOURCES
}
"
${
GPU_WITH_SOABI
}
)
if
(
GPU_LANGUAGE STREQUAL
"HIP"
)
# Make this target dependent on the hipify preprocessor step.
add_dependencies
(
${
GPU_MOD_NAME
}
hipify
${
GPU_MOD_NAME
}
)
endif
()
if
(
GPU_ARCHITECTURES
)
set_target_properties
(
${
GPU_MOD_NAME
}
PROPERTIES
${
GPU_LANGUAGE
}
_ARCHITECTURES
"
${
GPU_ARCHITECTURES
}
"
)
endif
()
set_property
(
TARGET
${
GPU_MOD_NAME
}
PROPERTY CXX_STANDARD 17
)
target_compile_options
(
${
GPU_MOD_NAME
}
PRIVATE
$<$<COMPILE_LANGUAGE:
${
GPU_LANGUAGE
}
>:
${
GPU_COMPILE_FLAGS
}
>
)
target_compile_definitions
(
${
GPU_MOD_NAME
}
PRIVATE
"-DTORCH_EXTENSION_NAME=
${
GPU_MOD_NAME
}
"
)
target_include_directories
(
${
GPU_MOD_NAME
}
PRIVATE csrc
${
GPU_INCLUDE_DIRECTORIES
}
)
target_link_libraries
(
${
GPU_MOD_NAME
}
PRIVATE
${
TORCH_LIBRARIES
}
${
GPU_LIBRARIES
}
)
install
(
TARGETS
${
GPU_MOD_NAME
}
LIBRARY DESTINATION
${
GPU_DESTINATION
}
)
endfunction
()
pyproject.toml
View file @
9fdf3de3
[build-system]
# Should be mirrored in requirements-build.txt
requires
=
[
"cmake>=3.21"
,
"ninja"
,
"packaging"
,
"setuptools >= 49.4.0"
,
...
...
requirements-build.txt
View file @
9fdf3de3
# Should be mirrored in pyproject.toml
cmake>=3.21
ninja
packaging
setuptools>=49.4.0
torch==2.1.2
wheel
\ No newline at end of file
wheel
requirements-rocm.txt
View file @
9fdf3de3
cmake>=3.21
ninja # For faster builds.
typing-extensions>=4.8.0
starlette
...
...
requirements.txt
View file @
9fdf3de3
cmake
>=3.21
ninja
# For faster builds.
psutil
ray
>= 2.9
...
...
setup.py
View file @
9fdf3de3
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment