Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
26f2fb51
Unverified
Commit
26f2fb51
authored
Apr 30, 2024
by
Kunshang Ji
Committed by
GitHub
Apr 30, 2024
Browse files
[Core]Refactor gptq_marlin ops (#4466)
parent
fa322078
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
17 additions
and
1 deletion
+17
-1
vllm/_custom_ops.py
vllm/_custom_ops.py
+16
-0
vllm/model_executor/layers/quantization/gptq_marlin.py
vllm/model_executor/layers/quantization/gptq_marlin.py
+1
-1
No files found.
vllm/_custom_ops.py
View file @
26f2fb51
...
...
@@ -167,6 +167,22 @@ def aqlm_dequant(codes: torch.Tensor, codebooks: torch.Tensor,
return
vllm_ops
.
aqlm_dequant
(
codes
,
codebooks
,
codebook_partition_sizes
)
# gptq_marlin
def
gptq_marlin_repack
(
b_q_weight
:
torch
.
Tensor
,
perm
:
torch
.
Tensor
,
size_k
:
int
,
size_n
:
int
)
->
torch
.
Tensor
:
return
vllm_ops
.
gptq_marlin_repack
(
b_q_weight
,
perm
,
size_k
,
size_n
)
def
gptq_marlin_gemm
(
a
:
torch
.
Tensor
,
b_q_weight
:
torch
.
Tensor
,
b_scales
:
torch
.
Tensor
,
g_idx
:
torch
.
Tensor
,
perm
:
torch
.
Tensor
,
workspace
:
torch
.
Tensor
,
size_m
:
int
,
size_n
:
int
,
size_k
:
int
,
is_k_full
:
bool
)
->
torch
.
Tensor
:
return
vllm_ops
.
gptq_marlin_gemm
(
a
,
b_q_weight
,
b_scales
,
g_idx
,
perm
,
workspace
,
size_m
,
size_n
,
size_k
,
is_k_full
)
# fp8
def
scaled_fp8_quant
(
input
:
torch
.
Tensor
,
...
...
vllm/model_executor/layers/quantization/gptq_marlin.py
View file @
26f2fb51
...
...
@@ -6,7 +6,7 @@ import numpy
import
torch
from
torch.nn.parameter
import
Parameter
from
vllm
._C
import
ops
from
vllm
import
_custom_ops
as
ops
from
vllm.model_executor.layers.linear
import
(
LinearBase
,
LinearMethodBase
,
set_weight_attrs
)
from
vllm.model_executor.layers.quantization.base_config
import
(
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment