Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
4561f139
Unverified
Commit
4561f139
authored
Jan 23, 2026
by
Michael Goin
Committed by
GitHub
Jan 23, 2026
Browse files
[Refactor] Rename `gptq_marlin` to `marlin` to match MoE (#32952)
Signed-off-by:
mgoin
<
mgoin64@gmail.com
>
parent
6cc6d92b
Changes
24
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
8 additions
and
8 deletions
+8
-8
vllm/_custom_ops.py
vllm/_custom_ops.py
+4
-4
vllm/model_executor/layers/quantization/utils/marlin_utils.py
.../model_executor/layers/quantization/utils/marlin_utils.py
+2
-2
vllm/model_executor/layers/quantization/utils/marlin_utils_fp4.py
...el_executor/layers/quantization/utils/marlin_utils_fp4.py
+1
-1
vllm/model_executor/layers/quantization/utils/marlin_utils_fp8.py
...el_executor/layers/quantization/utils/marlin_utils_fp8.py
+1
-1
No files found.
vllm/_custom_ops.py
View file @
4561f139
...
...
@@ -591,8 +591,8 @@ if hasattr(torch.ops._C, "gptq_marlin_24_gemm"):
)
->
torch
.
Tensor
:
return
torch
.
empty
((
size_m
,
size_n
),
device
=
a
.
device
,
dtype
=
a
.
dtype
)
@
register_fake
(
"_C::
gptq_
marlin_gemm"
)
def
_gptq
_marlin_gemm_fake
(
@
register_fake
(
"_C::marlin_gemm"
)
def
_marlin_gemm_fake
(
a
:
torch
.
Tensor
,
c
:
torch
.
Tensor
|
None
,
b_q_weight
:
torch
.
Tensor
,
...
...
@@ -1312,7 +1312,7 @@ def marlin_int4_fp8_preprocess(
return
torch
.
ops
.
_C
.
marlin_int4_fp8_preprocess
(
qweight
,
qzeros_or_none
,
inplace
)
def
gptq_
marlin_gemm
(
def
marlin_gemm
(
a
:
torch
.
Tensor
,
c
:
torch
.
Tensor
|
None
,
b_q_weight
:
torch
.
Tensor
,
...
...
@@ -1333,7 +1333,7 @@ def gptq_marlin_gemm(
use_fp32_reduce
:
bool
=
False
,
is_zp_float
:
bool
=
False
,
)
->
torch
.
Tensor
:
return
torch
.
ops
.
_C
.
gptq_
marlin_gemm
(
return
torch
.
ops
.
_C
.
marlin_gemm
(
a
,
c
,
b_q_weight
,
...
...
vllm/model_executor/layers/quantization/utils/marlin_utils.py
View file @
4561f139
...
...
@@ -563,7 +563,7 @@ def apply_gptq_marlin_linear(
reshaped_x
,
a_scales
=
marlin_quant_input
(
reshaped_x
,
input_dtype
)
output
=
ops
.
gptq_
marlin_gemm
(
output
=
ops
.
marlin_gemm
(
reshaped_x
,
None
,
weight
,
...
...
@@ -628,7 +628,7 @@ def apply_awq_marlin_linear(
)
reshaped_x
,
a_scales
=
marlin_quant_input
(
reshaped_x
,
input_dtype
)
output
=
ops
.
gptq_
marlin_gemm
(
output
=
ops
.
marlin_gemm
(
reshaped_x
,
None
,
weight
,
...
...
vllm/model_executor/layers/quantization/utils/marlin_utils_fp4.py
View file @
4561f139
...
...
@@ -121,7 +121,7 @@ def apply_fp4_marlin_linear(
inputs
,
a_scales
=
marlin_quant_input
(
inputs
,
torch
.
float8_e4m3fn
)
output
=
ops
.
gptq_
marlin_gemm
(
output
=
ops
.
marlin_gemm
(
a
=
inputs
,
c
=
None
,
b_q_weight
=
weight
,
...
...
vllm/model_executor/layers/quantization/utils/marlin_utils_fp8.py
View file @
4561f139
...
...
@@ -66,7 +66,7 @@ def apply_fp8_marlin_linear(
# inputs, a_scales = marlin_quant_input(inputs, torch.float8_e4m3fn)
raise
RuntimeError
(
"Marlin W8A8 is not supported."
)
output
=
ops
.
gptq_
marlin_gemm
(
output
=
ops
.
marlin_gemm
(
a
=
inputs
,
c
=
None
,
b_q_weight
=
weight
,
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment