Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
e73b7dfd
Unverified
Commit
e73b7dfd
authored
May 17, 2025
by
Jinzhen Lin
Committed by
GitHub
May 16, 2025
Browse files
[Bugfix] fix `an illegal memory access was encountered` of marlin kernel + act_order (#18245)
parent
7fdfa015
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
29 additions
and
22 deletions
+29
-22
csrc/moe/marlin_moe_wna16/marlin_template.h
csrc/moe/marlin_moe_wna16/marlin_template.h
+14
-11
csrc/quantization/gptq_marlin/marlin_template.h
csrc/quantization/gptq_marlin/marlin_template.h
+14
-10
tests/weight_loading/models.txt
tests/weight_loading/models.txt
+1
-1
No files found.
csrc/moe/marlin_moe_wna16/marlin_template.h
View file @
e73b7dfd
...
...
@@ -1767,6 +1767,8 @@ __global__ void Marlin(
if
constexpr
(
has_act_order
)
{
slice_k_start
+=
tb_k
*
stages
;
if
(
slice_k_start
<
prob_k
)
{
slice_k_start_shared_fetch
+=
tb_k
*
stages
;
int
first_group_id
=
g_idx
[
slice_k_start
];
int
last_g_idx
=
slice_k_start
+
stages
*
tb_k
*
2
;
...
...
@@ -1780,6 +1782,7 @@ __global__ void Marlin(
__syncthreads
();
}
}
}
if
(
slice_iters
==
0
)
{
break
;
}
...
...
csrc/quantization/gptq_marlin/marlin_template.h
View file @
e73b7dfd
...
...
@@ -1588,6 +1588,8 @@ __global__ void Marlin(
if
constexpr
(
has_act_order
)
{
slice_k_start
+=
tb_k
*
stages
;
if
(
slice_k_start
<
prob_k
)
{
slice_k_start_shared_fetch
+=
tb_k
*
stages
;
int
first_group_id
=
g_idx
[
slice_k_start
];
int
last_g_idx
=
slice_k_start
+
stages
*
tb_k
*
2
;
...
...
@@ -1596,10 +1598,12 @@ __global__ void Marlin(
}
int
last_group_id
=
g_idx
[
last_g_idx
];
if
(
last_group_id
>=
sh_first_group_id
+
sh_num_groups
)
{
fetch_act_order_scales_to_shared
(
false
,
first_group_id
,
last_group_id
);
fetch_act_order_scales_to_shared
(
false
,
first_group_id
,
last_group_id
);
__syncthreads
();
}
}
}
// Process results and, if necessary, proceed to the next column slice.
// While this pattern may not be the most readable, other ways of writing
...
...
tests/weight_loading/models.txt
View file @
e73b7dfd
...
...
@@ -2,7 +2,7 @@ gptq_marlin, robertgshaw2/zephyr-7b-beta-channelwise-gptq, main
gptq_marlin, TheBloke/Llama-2-7B-GPTQ, main
gptq_marlin, TheBloke/TinyLlama-1.1B-Chat-v1.0-GPTQ, main
gptq_marlin, TheBloke/TinyLlama-1.1B-Chat-v1.0-GPTQ, gptq-8bit--1g-actorder_True
#
gptq_marlin, TheBloke/TinyLlama-1.1B-Chat-v1.0-GPTQ, gptq-8bit-32g-actorder_True
gptq_marlin, TheBloke/TinyLlama-1.1B-Chat-v1.0-GPTQ, gptq-8bit-32g-actorder_True
gptq_marlin, TechxGenus/gemma-1.1-2b-it-GPTQ, main
gptq, robertgshaw2/zephyr-7b-beta-channelwise-gptq, main
gptq, TheBloke/Llama-2-7B-GPTQ, main
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment