Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
text-generation-inference
Commits
203a74a3
Commit
203a74a3
authored
May 29, 2024
by
huangwb
Browse files
fix kernel build bug
parent
70056d1e
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
38 additions
and
7 deletions
+38
-7
server/custom_kernels/setup.py
server/custom_kernels/setup.py
+13
-1
server/exllama_kernels/setup.py
server/exllama_kernels/setup.py
+15
-0
server/exllamav2_kernels/setup.py
server/exllamav2_kernels/setup.py
+10
-6
No files found.
server/custom_kernels/setup.py
View file @
203a74a3
...
...
@@ -2,7 +2,19 @@ from setuptools import setup
from
torch.utils.cpp_extension
import
BuildExtension
,
CUDAExtension
import
torch
extra_compile_args
=
[
"-std=c++17"
]
# Compiler flags.
CXX_FLAGS
=
[
"-g"
,
"-O2"
,
"-std=c++17"
]
# TODO(woosuk): Should we use -O3?
NVCC_FLAGS
=
[
"-O2"
,
"-std=c++17"
,
"--gpu-max-threads-per-block=1024"
]
ABI
=
1
if
torch
.
_C
.
_GLIBCXX_USE_CXX11_ABI
else
0
CXX_FLAGS
+=
[
f
"-D_GLIBCXX_USE_CXX11_ABI=
{
ABI
}
"
]
NVCC_FLAGS
+=
[
f
"-D_GLIBCXX_USE_CXX11_ABI=
{
ABI
}
"
]
extra_compile_args
=
{
"cxx"
:
CXX_FLAGS
,
"nvcc"
:
NVCC_FLAGS
,
}
if
not
torch
.
version
.
hip
:
extra_compile_args
.
append
(
"-arch=compute_80"
)
...
...
server/exllama_kernels/setup.py
View file @
203a74a3
from
setuptools
import
setup
from
torch.utils.cpp_extension
import
BuildExtension
,
CUDAExtension
import
torch
# Compiler flags.
CXX_FLAGS
=
[
"-g"
,
"-O2"
,
"-std=c++17"
]
# TODO(woosuk): Should we use -O3?
NVCC_FLAGS
=
[
"-O2"
,
"-std=c++17"
,
"--gpu-max-threads-per-block=1024"
]
ABI
=
1
if
torch
.
_C
.
_GLIBCXX_USE_CXX11_ABI
else
0
CXX_FLAGS
+=
[
f
"-D_GLIBCXX_USE_CXX11_ABI=
{
ABI
}
"
]
NVCC_FLAGS
+=
[
f
"-D_GLIBCXX_USE_CXX11_ABI=
{
ABI
}
"
]
extra_compile_args
=
{
"cxx"
:
CXX_FLAGS
,
"nvcc"
:
NVCC_FLAGS
,
}
setup
(
name
=
"exllama_kernels"
,
...
...
@@ -13,6 +27,7 @@ setup(
"exllama_kernels/cuda_func/q4_matmul.cu"
,
"exllama_kernels/cuda_func/q4_matrix.cu"
,
],
extra_compile_args
=
extra_compile_args
,
)
],
cmdclass
=
{
"build_ext"
:
BuildExtension
},
...
...
server/exllamav2_kernels/setup.py
View file @
203a74a3
...
...
@@ -2,14 +2,18 @@ from setuptools import setup
from
torch.utils.cpp_extension
import
BuildExtension
,
CUDAExtension
import
torch
extra_cuda_cflags
=
[
"-lineinfo"
,
"-O3"
]
# Compiler flags.
CXX_FLAGS
=
[
"-g"
,
"-O2"
,
"-std=c++17"
]
# TODO(woosuk): Should we use -O3?
NVCC_FLAGS
=
[
"-O2"
,
"-std=c++17"
,
"--gpu-max-threads-per-block=1024"
]
if
torch
.
version
.
hip
:
extra_cuda_cflags
+=
[
"-D
HIPBLAS_USE_HIP_HALF
"
]
extra_cuda_cflags
+=
[
"-D
USE_ROCM
"
]
ABI
=
1
if
torch
.
_C
.
_GLIBCXX_USE_CXX11_ABI
else
0
CXX_FLAGS
+=
[
f
"-D
_GLIBCXX_USE_CXX11_ABI=
{
ABI
}
"
]
NVCC_FLAGS
+=
[
f
"-D
_GLIBCXX_USE_CXX11_ABI=
{
ABI
}
"
]
extra_compile_args
=
{
"nvcc"
:
extra_cuda_cflags
,
extra_compile_args
=
{
"cxx"
:
CXX_FLAGS
,
"nvcc"
:
NVCC_FLAGS
,
}
setup
(
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment