Commit 624873b5 authored by fengzch's avatar fengzch
Browse files

update readme and setup.py

parent 349853d0
...@@ -4,5 +4,6 @@ source /usr/local/bin/fastpt -T ...@@ -4,5 +4,6 @@ source /usr/local/bin/fastpt -T
export CPLUS_INCLUDE_PATH=/opt/dtk/roctracer/include:$CPLUS_INCLUDE_PATH export CPLUS_INCLUDE_PATH=/opt/dtk/roctracer/include:$CPLUS_INCLUDE_PATH
export AMDGPU_TARGETS="gfx906;gfx926;gfx928;gfx936" export AMDGPU_TARGETS="gfx906;gfx926;gfx928;gfx936"
export FASTPT_USE_ASM=1
CXX=hipcc CC=hipcc python setup.py bdist_wheel CXX=hipcc CC=hipcc python setup.py bdist_wheel
...@@ -94,6 +94,13 @@ if __name__ == "__main__": ...@@ -94,6 +94,13 @@ if __name__ == "__main__":
ROOT_DIR = os.path.dirname(__file__) ROOT_DIR = os.path.dirname(__file__)
ignores = [
"third_party/cutlass/*",
"third_party/json/*",
"third_party/mio/*",
"third_party/spdlog/*",
]
INCLUDE_DIRS = [ INCLUDE_DIRS = [
"src", "src",
"third_party/cutlass/include", "third_party/cutlass/include",
...@@ -124,9 +131,10 @@ if __name__ == "__main__": ...@@ -124,9 +131,10 @@ if __name__ == "__main__":
#assert len(sm_targets) > 0, "No SM targets found" #assert len(sm_targets) > 0, "No SM targets found"
GCC_FLAGS = ["-DENABLE_BF16=1", "-DBUILD_NUNCHAKU=1", "-fvisibility=hidden", "-g", "-std=c++2a", "-UNDEBUG", "-Og"] GCC_FLAGS = ["-w", "-DENABLE_BF16=1", "-DBUILD_NUNCHAKU=1", "-fvisibility=hidden", "-g", "-std=c++2a", "-UNDEBUG", "-Og"]
MSVC_FLAGS = ["/DENABLE_BF16=1", "/DBUILD_NUNCHAKU=1", "/std:c++2a", "/UNDEBUG", "/Zc:__cplusplus", "/FS"] MSVC_FLAGS = ["-w", "/DENABLE_BF16=1", "/DBUILD_NUNCHAKU=1", "/std:c++2a", "/UNDEBUG", "/Zc:__cplusplus", "/FS"]
NVCC_FLAGS = [ NVCC_FLAGS = [
"-w",
"-DDCU_ASM", "-DDCU_ASM",
"-DENABLE_BF16=1", "-DENABLE_BF16=1",
"-DBUILD_NUNCHAKU=1", "-DBUILD_NUNCHAKU=1",
...@@ -173,40 +181,41 @@ if __name__ == "__main__": ...@@ -173,40 +181,41 @@ if __name__ == "__main__":
*ncond("src/SanaModel.cpp"), *ncond("src/SanaModel.cpp"),
"src/Serialization.cpp", "src/Serialization.cpp",
"src/Module.cpp", "src/Module.cpp",
# *ncond("third_party/Block-Sparse-Attention/csrc/block_sparse_attn/src/flash_fwd_hdim64_fp16_sm80.cu"), *ncond("third_party/Block-Sparse-Attention/csrc/block_sparse_attn/src/flash_fwd_hdim64_fp16_sm80.cu"),
# *ncond("third_party/Block-Sparse-Attention/csrc/block_sparse_attn/src/flash_fwd_hdim64_bf16_sm80.cu"), *ncond("third_party/Block-Sparse-Attention/csrc/block_sparse_attn/src/flash_fwd_hdim64_bf16_sm80.cu"),
# *ncond("third_party/Block-Sparse-Attention/csrc/block_sparse_attn/src/flash_fwd_hdim128_fp16_sm80.cu"), *ncond("third_party/Block-Sparse-Attention/csrc/block_sparse_attn/src/flash_fwd_hdim128_fp16_sm80.cu"),
# *ncond("third_party/Block-Sparse-Attention/csrc/block_sparse_attn/src/flash_fwd_hdim128_bf16_sm80.cu"), *ncond("third_party/Block-Sparse-Attention/csrc/block_sparse_attn/src/flash_fwd_hdim128_bf16_sm80.cu"),
# *ncond("third_party/Block-Sparse-Attention/csrc/block_sparse_attn/src/flash_fwd_block_hdim64_fp16_sm80.cu"), *ncond("third_party/Block-Sparse-Attention/csrc/block_sparse_attn/src/flash_fwd_block_hdim64_fp16_sm80.cu"),
# *ncond("third_party/Block-Sparse-Attention/csrc/block_sparse_attn/src/flash_fwd_block_hdim64_bf16_sm80.cu"), *ncond("third_party/Block-Sparse-Attention/csrc/block_sparse_attn/src/flash_fwd_block_hdim64_bf16_sm80.cu"),
# *ncond( *ncond(
# "third_party/Block-Sparse-Attention/csrc/block_sparse_attn/src/flash_fwd_block_hdim128_fp16_sm80.cu" "third_party/Block-Sparse-Attention/csrc/block_sparse_attn/src/flash_fwd_block_hdim128_fp16_sm80.cu"
# ), ),
# *ncond( *ncond(
# "third_party/Block-Sparse-Attention/csrc/block_sparse_attn/src/flash_fwd_block_hdim128_bf16_sm80.cu" "third_party/Block-Sparse-Attention/csrc/block_sparse_attn/src/flash_fwd_block_hdim128_bf16_sm80.cu"
# ), ),
# "src/kernels/activation_kernels.cu", "src/kernels/activation_kernels.cu",
# "src/kernels/layernorm_kernels.cu", "src/kernels/layernorm_kernels.cu",
# "src/kernels/misc_kernels.cu", "src/kernels/misc_kernels.cu",
# "src/kernels/zgemm/gemm_w4a4.cu", "src/kernels/zgemm/gemm_w4a4.cu",
# "src/kernels/zgemm/gemm_w4a4_test.cu", "src/kernels/zgemm/gemm_w4a4_test.cu",
# "src/kernels/zgemm/gemm_w4a4_launch_fp16_int4.cu", "src/kernels/zgemm/gemm_w4a4_launch_fp16_int4.cu",
# "src/kernels/zgemm/gemm_w4a4_launch_fp16_int4_fasteri2f.cu", "src/kernels/zgemm/gemm_w4a4_launch_fp16_int4_fasteri2f.cu",
# "src/kernels/zgemm/gemm_w4a4_launch_fp16_fp4.cu", "src/kernels/zgemm/gemm_w4a4_launch_fp16_fp4.cu",
# "src/kernels/zgemm/gemm_w4a4_launch_bf16_int4.cu", "src/kernels/zgemm/gemm_w4a4_launch_bf16_int4.cu",
# "src/kernels/zgemm/gemm_w4a4_launch_bf16_fp4.cu", "src/kernels/zgemm/gemm_w4a4_launch_bf16_fp4.cu",
# "src/kernels/zgemm/gemm_w8a8.cu", "src/kernels/zgemm/gemm_w8a8.cu",
# "src/kernels/zgemm/attention.cu", "src/kernels/zgemm/attention.cu",
# "src/kernels/dwconv.cu", "src/kernels/dwconv.cu",
# "src/kernels/gemm_batched.cu", "src/kernels/gemm_batched.cu",
# "src/kernels/gemm_f16.cu", "src/kernels/gemm_f16.cu",
# "src/kernels/awq/gemm_awq.cu", "src/kernels/awq/gemm_awq.cu",
"src/kernels/awq/gemv_awq.cu", "src/kernels/awq/gemv_awq.cu",
*ncond("third_party/Block-Sparse-Attention/csrc/block_sparse_attn/flash_api.cpp"), *ncond("third_party/Block-Sparse-Attention/csrc/block_sparse_attn/flash_api.cpp"),
*ncond("third_party/Block-Sparse-Attention/csrc/block_sparse_attn/flash_api_adapter.cpp"), *ncond("third_party/Block-Sparse-Attention/csrc/block_sparse_attn/flash_api_adapter.cpp"),
], ],
extra_compile_args={"gcc": GCC_FLAGS, "msvc": MSVC_FLAGS, "nvcc": NVCC_FLAGS, "nvcc_msvc": NVCC_MSVC_FLAGS}, extra_compile_args={"gcc": GCC_FLAGS, "msvc": MSVC_FLAGS, "nvcc": NVCC_FLAGS, "nvcc_msvc": NVCC_MSVC_FLAGS},
include_dirs=INCLUDE_DIRS, include_dirs=INCLUDE_DIRS,
ignores=ignores,
) )
setuptools.setup( setuptools.setup(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment