deepgemm.cmake 5.31 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
include(FetchContent)

# If DEEPGEMM_SRC_DIR is set, DeepGEMM is built from that directory
# instead of downloading.
# It can be set as an environment variable or passed as a cmake argument.
# The environment variable takes precedence.
if (DEFINED ENV{DEEPGEMM_SRC_DIR})
  set(DEEPGEMM_SRC_DIR $ENV{DEEPGEMM_SRC_DIR})
endif()

if(DEEPGEMM_SRC_DIR)
  FetchContent_Declare(
    deepgemm
    SOURCE_DIR ${DEEPGEMM_SRC_DIR}
    CONFIGURE_COMMAND ""
    BUILD_COMMAND ""
  )
else()
  # This ref should be kept in sync with tools/install_deepgemm.sh
  FetchContent_Declare(
    deepgemm
    GIT_REPOSITORY https://github.com/deepseek-ai/DeepGEMM.git
23
    GIT_TAG 891d57b4db1071624b5c8fa0d1e51cb317fa709f
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
    GIT_SUBMODULES "third-party/cutlass" "third-party/fmt"
    GIT_PROGRESS TRUE
    CONFIGURE_COMMAND ""
    BUILD_COMMAND ""
  )
endif()

# Use FetchContent_Populate (not MakeAvailable) to avoid processing
# DeepGEMM's own CMakeLists.txt which has incompatible find_package calls.
FetchContent_GetProperties(deepgemm)
if(NOT deepgemm_POPULATED)
  FetchContent_Populate(deepgemm)
endif()
message(STATUS "DeepGEMM is available at ${deepgemm_SOURCE_DIR}")

# DeepGEMM requires CUDA 12.3+ for SM90, 12.9+ for SM100
set(DEEPGEMM_SUPPORT_ARCHS)
if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 12.3)
  list(APPEND DEEPGEMM_SUPPORT_ARCHS "9.0a")
endif()
if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 12.9)
  list(APPEND DEEPGEMM_SUPPORT_ARCHS "10.0f")
elseif(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 12.8)
  list(APPEND DEEPGEMM_SUPPORT_ARCHS "10.0a")
endif()

cuda_archs_loose_intersection(DEEPGEMM_ARCHS
  "${DEEPGEMM_SUPPORT_ARCHS}" "${CUDA_ARCHS}")

if(DEEPGEMM_ARCHS)
  message(STATUS "DeepGEMM CUDA architectures: ${DEEPGEMM_ARCHS}")

  find_package(CUDAToolkit REQUIRED)

  #
  # Build the _C pybind11 extension from DeepGEMM's C++ source.
  # This is a CXX-only module — CUDA kernels are JIT-compiled at runtime.
  #
  Python_add_library(_deep_gemm_C MODULE WITH_SOABI
    "${deepgemm_SOURCE_DIR}/csrc/python_api.cpp")

  # The pybind11 module name must be _C to match DeepGEMM's Python imports.
  set_target_properties(_deep_gemm_C PROPERTIES OUTPUT_NAME "_C")

  target_compile_definitions(_deep_gemm_C PRIVATE
    "-DTORCH_EXTENSION_NAME=_C")

  target_include_directories(_deep_gemm_C PRIVATE
    "${deepgemm_SOURCE_DIR}/csrc"
    "${deepgemm_SOURCE_DIR}/deep_gemm/include"
    "${deepgemm_SOURCE_DIR}/third-party/cutlass/include"
    "${deepgemm_SOURCE_DIR}/third-party/cutlass/tools/util/include"
    "${deepgemm_SOURCE_DIR}/third-party/fmt/include")

  target_compile_options(_deep_gemm_C PRIVATE
    $<$<COMPILE_LANGUAGE:CXX>:-std=c++17>
    $<$<COMPILE_LANGUAGE:CXX>:-O3>
    $<$<COMPILE_LANGUAGE:CXX>:-Wno-psabi>
    $<$<COMPILE_LANGUAGE:CXX>:-Wno-deprecated-declarations>)

  # torch_python is required because DeepGEMM uses pybind11 type casters
  # for at::Tensor (via PYBIND11_MODULE), unlike vLLM's own extensions which
  # use torch::Library custom ops.
  find_library(TORCH_PYTHON_LIBRARY torch_python
    PATHS "${TORCH_INSTALL_PREFIX}/lib"
    REQUIRED)

  target_link_libraries(_deep_gemm_C PRIVATE
    torch ${TORCH_LIBRARIES} "${TORCH_PYTHON_LIBRARY}"
    CUDA::cudart CUDA::nvrtc)

  # Install the shared library into the vendored package directory
  install(TARGETS _deep_gemm_C
    LIBRARY DESTINATION vllm/third_party/deep_gemm
    COMPONENT _deep_gemm_C)

  #
  # Vendor DeepGEMM Python package files
  #
  install(FILES
    "${deepgemm_SOURCE_DIR}/deep_gemm/__init__.py"
    DESTINATION vllm/third_party/deep_gemm
    COMPONENT _deep_gemm_C)

  install(DIRECTORY "${deepgemm_SOURCE_DIR}/deep_gemm/utils/"
    DESTINATION vllm/third_party/deep_gemm/utils
    COMPONENT _deep_gemm_C
    FILES_MATCHING PATTERN "*.py")

  install(DIRECTORY "${deepgemm_SOURCE_DIR}/deep_gemm/testing/"
    DESTINATION vllm/third_party/deep_gemm/testing
    COMPONENT _deep_gemm_C
    FILES_MATCHING PATTERN "*.py")

  install(DIRECTORY "${deepgemm_SOURCE_DIR}/deep_gemm/legacy/"
    DESTINATION vllm/third_party/deep_gemm/legacy
    COMPONENT _deep_gemm_C
    FILES_MATCHING PATTERN "*.py")

123
124
125
126
127
  install(DIRECTORY "${deepgemm_SOURCE_DIR}/deep_gemm/mega/"
    DESTINATION vllm/third_party/deep_gemm/mega
    COMPONENT _deep_gemm_C
    FILES_MATCHING PATTERN "*.py")

128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
  # Generate envs.py (normally generated by DeepGEMM's setup.py build step)
  file(WRITE "${CMAKE_CURRENT_BINARY_DIR}/deep_gemm_envs.py"
    "# Pre-installed environment variables\npersistent_envs = dict()\n")
  install(FILES "${CMAKE_CURRENT_BINARY_DIR}/deep_gemm_envs.py"
    DESTINATION vllm/third_party/deep_gemm
    RENAME envs.py
    COMPONENT _deep_gemm_C)

  #
  # Install include files needed for JIT compilation at runtime.
  # The JIT compiler finds these relative to the package directory.
  #

  # DeepGEMM's own CUDA headers
  install(DIRECTORY "${deepgemm_SOURCE_DIR}/deep_gemm/include/"
    DESTINATION vllm/third_party/deep_gemm/include
    COMPONENT _deep_gemm_C)

  # CUTLASS and CuTe headers (vendored for JIT, separate from vLLM's CUTLASS)
  install(DIRECTORY "${deepgemm_SOURCE_DIR}/third-party/cutlass/include/"
    DESTINATION vllm/third_party/deep_gemm/include
    COMPONENT _deep_gemm_C)

else()
  message(STATUS "DeepGEMM will not compile: "
    "unsupported CUDA architecture ${CUDA_ARCHS}")
  # Create empty target so setup.py doesn't fail on unsupported systems
  add_custom_target(_deep_gemm_C)
endif()