Commit 203a74a3 authored by huangwb's avatar huangwb
Browse files

fix kernel build bug

parent 70056d1e
...@@ -2,7 +2,19 @@ from setuptools import setup ...@@ -2,7 +2,19 @@ from setuptools import setup
from torch.utils.cpp_extension import BuildExtension, CUDAExtension from torch.utils.cpp_extension import BuildExtension, CUDAExtension
import torch import torch
extra_compile_args = ["-std=c++17"] # Compiler flags.
CXX_FLAGS = ["-g", "-O2", "-std=c++17"]
# TODO(woosuk): Should we use -O3?
NVCC_FLAGS = ["-O2", "-std=c++17","--gpu-max-threads-per-block=1024"]
ABI = 1 if torch._C._GLIBCXX_USE_CXX11_ABI else 0
CXX_FLAGS += [f"-D_GLIBCXX_USE_CXX11_ABI={ABI}"]
NVCC_FLAGS += [f"-D_GLIBCXX_USE_CXX11_ABI={ABI}"]
extra_compile_args={
"cxx": CXX_FLAGS,
"nvcc": NVCC_FLAGS,
}
if not torch.version.hip: if not torch.version.hip:
extra_compile_args.append("-arch=compute_80") extra_compile_args.append("-arch=compute_80")
......
from setuptools import setup from setuptools import setup
from torch.utils.cpp_extension import BuildExtension, CUDAExtension from torch.utils.cpp_extension import BuildExtension, CUDAExtension
import torch
# Compiler flags.
CXX_FLAGS = ["-g", "-O2", "-std=c++17"]
# TODO(woosuk): Should we use -O3?
NVCC_FLAGS = ["-O2", "-std=c++17","--gpu-max-threads-per-block=1024"]
ABI = 1 if torch._C._GLIBCXX_USE_CXX11_ABI else 0
CXX_FLAGS += [f"-D_GLIBCXX_USE_CXX11_ABI={ABI}"]
NVCC_FLAGS += [f"-D_GLIBCXX_USE_CXX11_ABI={ABI}"]
extra_compile_args={
"cxx": CXX_FLAGS,
"nvcc": NVCC_FLAGS,
}
setup( setup(
name="exllama_kernels", name="exllama_kernels",
...@@ -13,6 +27,7 @@ setup( ...@@ -13,6 +27,7 @@ setup(
"exllama_kernels/cuda_func/q4_matmul.cu", "exllama_kernels/cuda_func/q4_matmul.cu",
"exllama_kernels/cuda_func/q4_matrix.cu", "exllama_kernels/cuda_func/q4_matrix.cu",
], ],
extra_compile_args=extra_compile_args,
) )
], ],
cmdclass={"build_ext": BuildExtension}, cmdclass={"build_ext": BuildExtension},
......
...@@ -2,14 +2,18 @@ from setuptools import setup ...@@ -2,14 +2,18 @@ from setuptools import setup
from torch.utils.cpp_extension import BuildExtension, CUDAExtension from torch.utils.cpp_extension import BuildExtension, CUDAExtension
import torch import torch
extra_cuda_cflags = ["-lineinfo", "-O3"] # Compiler flags.
CXX_FLAGS = ["-g", "-O2", "-std=c++17"]
# TODO(woosuk): Should we use -O3?
NVCC_FLAGS = ["-O2", "-std=c++17","--gpu-max-threads-per-block=1024"]
if torch.version.hip: ABI = 1 if torch._C._GLIBCXX_USE_CXX11_ABI else 0
extra_cuda_cflags += ["-DHIPBLAS_USE_HIP_HALF"] CXX_FLAGS += [f"-D_GLIBCXX_USE_CXX11_ABI={ABI}"]
extra_cuda_cflags += ["-DUSE_ROCM"] NVCC_FLAGS += [f"-D_GLIBCXX_USE_CXX11_ABI={ABI}"]
extra_compile_args = { extra_compile_args={
"nvcc": extra_cuda_cflags, "cxx": CXX_FLAGS,
"nvcc": NVCC_FLAGS,
} }
setup( setup(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment