Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
6897826e
Commit
6897826e
authored
Sep 10, 2024
by
zhuwenwen
Browse files
update version and requirements-rocm.txt
parent
5a9db327
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
27 additions
and
13 deletions
+27
-13
CMakeLists.txt
CMakeLists.txt
+1
-1
csrc/ops.h
csrc/ops.h
+5
-5
csrc/torch_bindings.cpp
csrc/torch_bindings.cpp
+4
-4
requirements-rocm.txt
requirements-rocm.txt
+7
-1
setup.py
setup.py
+10
-2
No files found.
CMakeLists.txt
View file @
6897826e
...
...
@@ -191,7 +191,7 @@ set(VLLM_EXT_SRC
"csrc/attention/attention_kernels_opt.cu"
"csrc/opt/layernorm_kernels_opt.cu"
"csrc/quantization/squeezellm/quant_cuda_kernel.cu"
"csrc/quantization/gptq/q_gemm.cu"
#
"csrc/quantization/gptq/q_gemm.cu"
"csrc/quantization/compressed_tensors/int8_quant_kernels.cu"
# "csrc/quantization/fp8/common.cu"
"csrc/cuda_utils_kernels.cu"
...
...
csrc/ops.h
View file @
6897826e
...
...
@@ -215,12 +215,12 @@ void dynamic_scaled_int8_quant(torch::Tensor& out, torch::Tensor const& input,
void
squeezellm_gemm
(
torch
::
Tensor
vec
,
torch
::
Tensor
mat
,
torch
::
Tensor
mul
,
torch
::
Tensor
lookup_table
);
torch
::
Tensor
gptq_gemm
(
torch
::
Tensor
a
,
torch
::
Tensor
b_q_weight
,
torch
::
Tensor
b_gptq_qzeros
,
torch
::
Tensor
b_gptq_scales
,
torch
::
Tensor
b_g_idx
,
bool
use_exllama
,
int64_t
bit
);
//
torch::Tensor gptq_gemm(torch::Tensor a, torch::Tensor b_q_weight,
//
torch::Tensor b_gptq_qzeros,
//
torch::Tensor b_gptq_scales, torch::Tensor b_g_idx,
//
bool use_exllama, int64_t bit);
void
gptq_shuffle
(
torch
::
Tensor
q_weight
,
torch
::
Tensor
q_perm
,
int64_t
bit
);
//
void gptq_shuffle(torch::Tensor q_weight, torch::Tensor q_perm, int64_t bit);
// void static_scaled_fp8_quant(torch::Tensor& out, torch::Tensor const& input,
// torch::Tensor const& scale);
...
...
csrc/torch_bindings.cpp
View file @
6897826e
...
...
@@ -295,12 +295,12 @@ TORCH_LIBRARY_EXPAND(TORCH_EXTENSION_NAME, ops) {
#endif
// Quantized GEMM for GPTQ.
ops
.
def
(
"gptq_gemm"
,
&
gptq_gemm
);
ops
.
impl
(
"gptq_gemm"
,
torch
::
kCUDA
,
&
gptq_gemm
);
//
ops.def("gptq_gemm", &gptq_gemm);
//
ops.impl("gptq_gemm", torch::kCUDA, &gptq_gemm);
// Post processing for GPTQ.
ops
.
def
(
"gptq_shuffle(Tensor! q_weight, Tensor q_perm, int bit) -> ()"
);
ops
.
impl
(
"gptq_shuffle"
,
torch
::
kCUDA
,
&
gptq_shuffle
);
//
ops.def("gptq_shuffle(Tensor! q_weight, Tensor q_perm, int bit) -> ()");
//
ops.impl("gptq_shuffle", torch::kCUDA, &gptq_shuffle);
// Quantized GEMM for SqueezeLLM.
ops
.
def
(
...
...
requirements-rocm.txt
View file @
6897826e
...
...
@@ -9,3 +9,9 @@ ray >= 2.10.0
peft
pytest-asyncio
tensorizer>=2.9.0
torch == 2.1.0
triton == 2.1.0
flash_attn == 2.0.4
xformers == 0.0.25
lmslim == 0.1.0
\ No newline at end of file
setup.py
View file @
6897826e
...
...
@@ -375,13 +375,21 @@ def get_sha(root: Union[str, Path]) -> str:
def
get_version_add
(
sha
:
Optional
[
str
]
=
None
)
->
str
:
vllm_root
=
os
.
path
.
dirname
(
os
.
path
.
abspath
(
__file__
))
add_version_path
=
os
.
path
.
join
(
os
.
path
.
join
(
vllm_root
,
"vllm"
),
"version.py"
)
major
,
minor
,
*
res
=
torch
.
__version__
.
split
(
'.'
)
if
add_git_version
:
if
sha
!=
'Unknown'
:
if
sha
is
None
:
sha
=
get_sha
(
vllm_root
)
version
=
'das.opt1'
+
sha
[:
7
]
if
(
major
,
minor
)
==
(
'2'
,
'1'
):
version
=
'das.opt1.'
+
sha
[:
7
]
if
(
major
,
minor
)
==
(
'2'
,
'3'
):
version
=
'das.opt2.'
+
sha
[:
7
]
else
:
if
(
major
,
minor
)
==
(
'2'
,
'1'
):
version
=
'das.opt1'
if
(
major
,
minor
)
==
(
'2'
,
'3'
):
version
=
'das.opt2'
# dtk version
if
os
.
getenv
(
"ROCM_PATH"
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment