Unverified Commit 8add942d authored by tpoisonooo's avatar tpoisonooo Committed by GitHub
Browse files

improvement(build): enable ninja and gold linker (#767)

* feat(build): enable ninja and lld

* fix(.github): add ninja installation

* fix(CI): remove dimsize=256

* fix(CI): add option for generate.sh

* fix(docs): update
parent 8c672a7b
......@@ -52,5 +52,5 @@ jobs:
source /opt/conda/bin/activate
conda activate py38
mkdir build && cd build
bash ../generate.sh
bash ../generate.sh make
make -j$(nproc) && make install
......@@ -103,7 +103,9 @@ if(USE_TRITONSERVER_DATATYPE)
endif()
set(CXX_STD "17" CACHE STRING "C++ standard")
# enable gold linker for binary and .so
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fuse-ld=gold")
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -fuse-ld=gold")
set(CUDA_PATH ${CUDA_TOOLKIT_ROOT_DIR})
set(TF_PATH "" CACHE STRING "TensorFlow path")
......
......@@ -12,7 +12,7 @@ conda activate $PYTHON_VERSION
cd lmdeploy
mkdir -p build && cd build && rm -rf *
bash ../generate.sh
bash ../generate.sh make
make -j$(nproc) && make install
if [ $? != 0 ]; then
echo "build failed"
......
......@@ -67,10 +67,11 @@ Then, follow the steps below to set up the compilation environment:
```
- build and install lmdeploy libraries:
```shell
apt install ninja-build # install ninja
cd lmdeploy # the home folder of lmdeploy
mkdir build && cd build
sh ../generate.sh
make -j$(nproc) && make install
ninja -j$(nproc) && ninja install
```
- install lmdeploy python package:
```shell
......
......@@ -67,10 +67,12 @@ wheel 文件存放在目录 `builder/manywheel/cuda11.8_dist` 下。
```
- lmdeploy 编译安装:
```shell
apt install ninja-build # 安装更快的 Ninja
cd lmdeploy # lmdeploy 源码的根目录
mkdir build && cd build
sh ../generate.sh
make -j$(nproc) && make install
ninja && ninja install
ninja -j$(nproc) && ninja install
```
- 安装 lmdeploy python package:
```shell
......
#!/bin/sh
cmake .. \
builder="-G Ninja"
if [ "$1" == "make" ]; then
builder=""
fi
cmake ${builder} .. \
-DCMAKE_BUILD_TYPE=RelWithDebInfo \
-DCMAKE_EXPORT_COMPILE_COMMANDS=1 \
-DCMAKE_INSTALL_PREFIX=./install \
......
......@@ -4,10 +4,10 @@ project(flash_attention2)
add_library(${PROJECT_NAME} STATIC
flash_api.cpp
flash_fwd_hdim32_fp16_sm80.cu
flash_fwd_hdim64_fp16_sm80.cu
# flash_fwd_hdim32_fp16_sm80.cu
# flash_fwd_hdim64_fp16_sm80.cu
flash_fwd_hdim128_fp16_sm80.cu
flash_fwd_hdim256_fp16_sm80.cu
# flash_fwd_hdim256_fp16_sm80.cu
)
target_include_directories(${PROJECT_NAME} PRIVATE ${CUTLASS_DIR} / include)
target_link_libraries(${PROJECT_NAME} PRIVATE nvidia::cutlass::cutlass)
......
......@@ -63,6 +63,7 @@ void run_flash_fwd(Flash_fwd_params& params, cudaStream_t stream)
});
}
#if 0
template<typename T>
void run_mha_fwd_hdim32(Flash_fwd_params& params, cudaStream_t stream)
{
......@@ -100,6 +101,7 @@ void run_mha_fwd_hdim64(Flash_fwd_params& params, cudaStream_t stream)
}
});
}
#endif
template<typename T>
void run_mha_fwd_hdim128(Flash_fwd_params& params, cudaStream_t stream)
......@@ -145,6 +147,7 @@ void run_mha_fwd_hdim128(Flash_fwd_params& params, cudaStream_t stream)
});
}
#if 0
template<typename T>
void run_mha_fwd_hdim256(Flash_fwd_params& params, cudaStream_t stream)
{
......@@ -174,3 +177,4 @@ void run_mha_fwd_hdim256(Flash_fwd_params& params, cudaStream_t stream)
// Is_causal>(params, stream);
});
}
#endif
......@@ -38,6 +38,7 @@
} \
}()
#if 0
#define FWD_HEADDIM_SWITCH(HEADDIM, ...) \
[&] { \
if (HEADDIM <= 32) { \
......@@ -57,3 +58,10 @@
return __VA_ARGS__(); \
} \
}()
#else
#define FWD_HEADDIM_SWITCH(HEADDIM, ...) \
[&] { \
constexpr static int kHeadDim = 128; \
return __VA_ARGS__(); \
}()
#endif
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment