Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
a64e7b94
"vscode:/vscode.git/clone" did not exist on "6adaed42f49ff683f80521b73daa3a3bde413baa"
Unverified
Commit
a64e7b94
authored
Oct 10, 2024
by
Lucas Wilkinson
Committed by
GitHub
Oct 10, 2024
Browse files
[Bugfix] Machete garbage results for some models (large K dim) (#9212)
parent
ce00231a
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
16 additions
and
12 deletions
+16
-12
csrc/quantization/machete/machete_mainloop.cuh
csrc/quantization/machete/machete_mainloop.cuh
+13
-10
tests/kernels/test_machete_gemm.py
tests/kernels/test_machete_gemm.py
+3
-2
No files found.
csrc/quantization/machete/machete_mainloop.cuh
View file @
a64e7b94
...
...
@@ -591,24 +591,27 @@ struct MacheteCollectiveMma {
tma_load_b
=
make_tma_copy_B
(
make_logical_tensor
(
ptr_B
,
make_shape
(
N
,
K
,
L
),
args
.
dB
));
int32_t
scale_k
=
(
ModeHasScales
)
?
(
K
+
args
.
group_size
-
1
)
/
args
.
group_size
:
0
;
int32_t
group_size
=
(
ModeHasScales
)
?
args
.
group_size
:
0
;
if
constexpr
(
ModeHasScales
)
{
tma_load_scale
=
make_tma_copy_scale
(
make_logical_tensor
(
args
.
ptr_S
,
make_shape
(
M
,
args
.
group_size
,
L
),
args
.
dS
));
tma_load_scale
=
make_tma_copy_scale
(
make_logical_tensor
(
args
.
ptr_S
,
make_shape
(
M
,
scale_k
,
L
),
args
.
dS
));
}
if
constexpr
(
KernelConversionMode
==
ConversionMode
::
ConvertAndScaleWithZero
)
{
tma_load_zero
=
make_tma_copy_zero
(
make_logical_tensor
(
args
.
ptr_Z
,
make_shape
(
M
,
args
.
group_size
,
L
),
args
.
dS
));
tma_load_zero
=
make_tma_copy_zero
(
make_logical_tensor
(
args
.
ptr_Z
,
make_shape
(
M
,
scale_k
,
L
),
args
.
dS
));
}
if
constexpr
(
KernelConversionMode
==
ConversionMode
::
DirectConvert
)
{
return
{
tma_load_a
,
tma_load_b
,
tma_load_scale
,
tma_load_zero
,
0
,
0
};
}
else
if
constexpr
(
ModeHasScales
)
{
auto
scale_k
=
(
K
+
args
.
group_size
-
1
)
/
args
.
group_size
;
if
constexpr
(
KernelConversionMode
==
ConversionMode
::
DirectConvert
||
KernelConversionMode
==
ConversionMode
::
ConvertAndScale
||
KernelConversionMode
==
ConversionMode
::
ConvertAndScaleWithZero
)
{
return
{
tma_load_a
,
tma_load_b
,
tma_load_scale
,
tma_load_zero
,
scale_k
,
args
.
group_size
};
tma_load_zero
,
scale_k
,
group_size
};
}
else
{
static_assert
(
cutlass
::
detail
::
dependent_false
<
KernelSchedule
>
,
"Conversion mode not handled in to_underlying_arguments."
);
...
...
tests/kernels/test_machete_gemm.py
View file @
a64e7b94
...
...
@@ -24,13 +24,14 @@ MNK_SHAPES = [
(
1
,
128
,
128
),
(
1
,
512
,
1024
),
(
1
,
4096
,
4096
),
(
1
,
8192
,
28672
),
(
13
,
8192
,
4096
),
(
26
,
4096
,
8192
),
(
1
,
4096
,
4096
),
(
64
,
4096
,
4096
),
(
64
,
8192
,
28672
),
(
257
,
128
,
4096
),
(
257
,
4224
,
4160
),
(
257
,
4096
,
4096
),
(
64
,
4096
,
4096
),
(
1024
,
4096
,
8192
),
(
1024
,
8192
,
4096
),
]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment