"docs/vscode:/vscode.git/clone" did not exist on "1244948885e364b3103aa3b515ce66fa4d9955fa"
deepgemm.cmake 5.14 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
include(FetchContent)

# If DEEPGEMM_SRC_DIR is set, DeepGEMM is built from that directory
# instead of downloading.
# It can be set as an environment variable or passed as a cmake argument.
# The environment variable takes precedence.
if (DEFINED ENV{DEEPGEMM_SRC_DIR})
  set(DEEPGEMM_SRC_DIR $ENV{DEEPGEMM_SRC_DIR})
endif()

if(DEEPGEMM_SRC_DIR)
  FetchContent_Declare(
    deepgemm
    SOURCE_DIR ${DEEPGEMM_SRC_DIR}
    CONFIGURE_COMMAND ""
    BUILD_COMMAND ""
  )
else()
  # This ref should be kept in sync with tools/install_deepgemm.sh
  FetchContent_Declare(
    deepgemm
    GIT_REPOSITORY https://github.com/deepseek-ai/DeepGEMM.git
    GIT_TAG 477618cd51baffca09c4b0b87e97c03fe827ef03
    GIT_SUBMODULES "third-party/cutlass" "third-party/fmt"
    GIT_PROGRESS TRUE
    CONFIGURE_COMMAND ""
    BUILD_COMMAND ""
  )
endif()

# Use FetchContent_Populate (not MakeAvailable) to avoid processing
# DeepGEMM's own CMakeLists.txt which has incompatible find_package calls.
FetchContent_GetProperties(deepgemm)
if(NOT deepgemm_POPULATED)
  FetchContent_Populate(deepgemm)
endif()
message(STATUS "DeepGEMM is available at ${deepgemm_SOURCE_DIR}")

# DeepGEMM requires CUDA 12.3+ for SM90, 12.9+ for SM100
set(DEEPGEMM_SUPPORT_ARCHS)
if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 12.3)
  list(APPEND DEEPGEMM_SUPPORT_ARCHS "9.0a")
endif()
if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 12.9)
  list(APPEND DEEPGEMM_SUPPORT_ARCHS "10.0f")
elseif(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 12.8)
  list(APPEND DEEPGEMM_SUPPORT_ARCHS "10.0a")
endif()

cuda_archs_loose_intersection(DEEPGEMM_ARCHS
  "${DEEPGEMM_SUPPORT_ARCHS}" "${CUDA_ARCHS}")

if(DEEPGEMM_ARCHS)
  message(STATUS "DeepGEMM CUDA architectures: ${DEEPGEMM_ARCHS}")

  find_package(CUDAToolkit REQUIRED)

  #
  # Build the _C pybind11 extension from DeepGEMM's C++ source.
  # This is a CXX-only module — CUDA kernels are JIT-compiled at runtime.
  #
  Python_add_library(_deep_gemm_C MODULE WITH_SOABI
    "${deepgemm_SOURCE_DIR}/csrc/python_api.cpp")

  # The pybind11 module name must be _C to match DeepGEMM's Python imports.
  set_target_properties(_deep_gemm_C PROPERTIES OUTPUT_NAME "_C")

  target_compile_definitions(_deep_gemm_C PRIVATE
    "-DTORCH_EXTENSION_NAME=_C")

  target_include_directories(_deep_gemm_C PRIVATE
    "${deepgemm_SOURCE_DIR}/csrc"
    "${deepgemm_SOURCE_DIR}/deep_gemm/include"
    "${deepgemm_SOURCE_DIR}/third-party/cutlass/include"
    "${deepgemm_SOURCE_DIR}/third-party/cutlass/tools/util/include"
    "${deepgemm_SOURCE_DIR}/third-party/fmt/include")

  target_compile_options(_deep_gemm_C PRIVATE
    $<$<COMPILE_LANGUAGE:CXX>:-std=c++17>
    $<$<COMPILE_LANGUAGE:CXX>:-O3>
    $<$<COMPILE_LANGUAGE:CXX>:-Wno-psabi>
    $<$<COMPILE_LANGUAGE:CXX>:-Wno-deprecated-declarations>)

  # torch_python is required because DeepGEMM uses pybind11 type casters
  # for at::Tensor (via PYBIND11_MODULE), unlike vLLM's own extensions which
  # use torch::Library custom ops.
  find_library(TORCH_PYTHON_LIBRARY torch_python
    PATHS "${TORCH_INSTALL_PREFIX}/lib"
    REQUIRED)

  target_link_libraries(_deep_gemm_C PRIVATE
    torch ${TORCH_LIBRARIES} "${TORCH_PYTHON_LIBRARY}"
    CUDA::cudart CUDA::nvrtc)

  # Install the shared library into the vendored package directory
  install(TARGETS _deep_gemm_C
    LIBRARY DESTINATION vllm/third_party/deep_gemm
    COMPONENT _deep_gemm_C)

  #
  # Vendor DeepGEMM Python package files
  #
  install(FILES
    "${deepgemm_SOURCE_DIR}/deep_gemm/__init__.py"
    DESTINATION vllm/third_party/deep_gemm
    COMPONENT _deep_gemm_C)

  install(DIRECTORY "${deepgemm_SOURCE_DIR}/deep_gemm/utils/"
    DESTINATION vllm/third_party/deep_gemm/utils
    COMPONENT _deep_gemm_C
    FILES_MATCHING PATTERN "*.py")

  install(DIRECTORY "${deepgemm_SOURCE_DIR}/deep_gemm/testing/"
    DESTINATION vllm/third_party/deep_gemm/testing
    COMPONENT _deep_gemm_C
    FILES_MATCHING PATTERN "*.py")

  install(DIRECTORY "${deepgemm_SOURCE_DIR}/deep_gemm/legacy/"
    DESTINATION vllm/third_party/deep_gemm/legacy
    COMPONENT _deep_gemm_C
    FILES_MATCHING PATTERN "*.py")

  # Generate envs.py (normally generated by DeepGEMM's setup.py build step)
  file(WRITE "${CMAKE_CURRENT_BINARY_DIR}/deep_gemm_envs.py"
    "# Pre-installed environment variables\npersistent_envs = dict()\n")
  install(FILES "${CMAKE_CURRENT_BINARY_DIR}/deep_gemm_envs.py"
    DESTINATION vllm/third_party/deep_gemm
    RENAME envs.py
    COMPONENT _deep_gemm_C)

  #
  # Install include files needed for JIT compilation at runtime.
  # The JIT compiler finds these relative to the package directory.
  #

  # DeepGEMM's own CUDA headers
  install(DIRECTORY "${deepgemm_SOURCE_DIR}/deep_gemm/include/"
    DESTINATION vllm/third_party/deep_gemm/include
    COMPONENT _deep_gemm_C)

  # CUTLASS and CuTe headers (vendored for JIT, separate from vLLM's CUTLASS)
  install(DIRECTORY "${deepgemm_SOURCE_DIR}/third-party/cutlass/include/"
    DESTINATION vllm/third_party/deep_gemm/include
    COMPONENT _deep_gemm_C)

else()
  message(STATUS "DeepGEMM will not compile: "
    "unsupported CUDA architecture ${CUDA_ARCHS}")
  # Create empty target so setup.py doesn't fail on unsupported systems
  add_custom_target(_deep_gemm_C)
endif()