Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
bb10822b
Commit
bb10822b
authored
Oct 25, 2024
by
mtgu0705
Browse files
Updated the int4 per-group dequant. Meet function bug.
parent
624c6d3e
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
11 additions
and
8 deletions
+11
-8
example/65_gemm_multiply_multiply/CMakeLists.txt
example/65_gemm_multiply_multiply/CMakeLists.txt
+1
-0
example/65_gemm_multiply_multiply/gemm_fp16int8_b_scale.cpp
example/65_gemm_multiply_multiply/gemm_fp16int8_b_scale.cpp
+3
-2
include/ck/tensor_operation/gpu/block/blockwise_gemm_pipeline_xdlops_b_scale_selector.hpp
...block/blockwise_gemm_pipeline_xdlops_b_scale_selector.hpp
+0
-1
include/ck/tensor_operation/gpu/block/blockwise_gemm_pipeline_xdlops_v3_b_scale.hpp
...n/gpu/block/blockwise_gemm_pipeline_xdlops_v3_b_scale.hpp
+1
-1
include/ck/tensor_operation/gpu/device/impl/device_gemm_multiple_d_xdl_cshuffle_v3_b_scale.hpp
...e/impl/device_gemm_multiple_d_xdl_cshuffle_v3_b_scale.hpp
+2
-1
include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_v3_multi_d_b_scale.hpp
...pu/grid/gridwise_gemm_xdl_cshuffle_v3_multi_d_b_scale.hpp
+3
-3
include/ck/utility/data_type.hpp
include/ck/utility/data_type.hpp
+1
-0
No files found.
example/65_gemm_multiply_multiply/CMakeLists.txt
View file @
bb10822b
add_example_executable
(
example_gemm_multiply_multiply_xdl_fp8 gemm_multiply_multiply_xdl_fp8.cpp
)
add_example_executable
(
example_gemm_multiply_multiply_xdl_fp8_ab_scale gemm_multiply_multiply_xdl_fp8_ab_scale.cpp
)
add_example_executable
(
example_gemm_fp16int8_b_scale gemm_fp16int8_b_scale.cpp
)
add_example_executable
(
example_gemm_fp16int4_b_scale gemm_fp16int4_b_scale.cpp
)
add_example_executable
(
example_gemm_add_add_xdl_fp16 gemm_add_add_xdl_fp16.cpp
)
example/65_gemm_multiply_multiply/gemm_fp16int8_b_scale.cpp
View file @
bb10822b
...
...
@@ -61,7 +61,7 @@ using CDEElementOp = PassThrough;
static
constexpr
auto
GemmSpec
=
ck
::
tensor_operation
::
device
::
GemmSpecialization
::
Default
;
// static constexpr ck::index_t Scale_Block_M = 128;
static
constexpr
ck
::
index_t
Scale_Block_N
=
1
28
;
static
constexpr
ck
::
index_t
Scale_Block_N
=
1
;
static
constexpr
ck
::
index_t
Scale_Block_K
=
128
;
using
DeviceOpInstance
=
ck
::
tensor_operation
::
device
::
DeviceGemmMultiD_BScale_Xdl_CShuffle_V3
...
...
@@ -217,7 +217,8 @@ int main(int argc, char* argv[])
a0_m_k
.
GenerateTensorValue
(
GeneratorTensor_1
<
A0DataType
>
{});
quant_b0_k_n
.
GenerateTensorValue
(
GeneratorTensor_1
<
QuantDataType
>
{});
// a1_m_k.GenerateTensorValue(GeneratorTensor_1<A1DataType>{});
b1_k_n
.
GenerateTensorValue
(
GeneratorTensor_1
<
B1DataType
>
{});
// b1_k_n.GenerateTensorValue(GeneratorTensor_1<B1DataType>{});
b1_k_n
.
GenerateTensorValue
(
GeneratorTensor_3
<
B1DataType
>
{
0
,
1.0
});
break
;
case
3
:
a0_m_k
.
GenerateTensorValue
(
GeneratorTensor_2
<
A0DataType
>
{
-
2
,
2
});
...
...
include/ck/tensor_operation/gpu/block/blockwise_gemm_pipeline_xdlops_b_scale_selector.hpp
View file @
bb10822b
...
...
@@ -39,7 +39,6 @@ template <BlockGemmPipelineVersion BlkGemmPipelineVer,
index_t
KPack
>
constexpr
auto
BlockGemmBScalePipeline_Selector
()
{
printf
(
"I'm Here
\n
"
);
return
BlockwiseGemmXdlops_pipeline_v3_b_scale
<
BlkGemmPipeSche
,
BlockSize
,
ADataType
,
...
...
include/ck/tensor_operation/gpu/block/blockwise_gemm_pipeline_xdlops_v3_b_scale.hpp
View file @
bb10822b
...
...
@@ -440,7 +440,7 @@ struct BlockwiseGemmXdlops_pipeline_v3_b_scale<BlockGemmPipelineScheduler::Intra
c_thread_buf
(
Number
<
c_offset
>
{})
+=
c_thread_buf_per_scale
[
Number
<
t
>
{}]
*
// type_convert<AccDataType>(a_scale_thread_buf[I0]) *
type_convert
<
AccDataType
>
(
b_scale_thread_buf
[
I
0
]);
type_convert
<
AccDataType
>
(
b_scale_thread_buf
[
n
0
]);
});
});
});
...
...
include/ck/tensor_operation/gpu/device/impl/device_gemm_multiple_d_xdl_cshuffle_v3_b_scale.hpp
View file @
bb10822b
...
...
@@ -360,7 +360,8 @@ struct DeviceGemmMultiD_BScale_Xdl_CShuffle_V3
return
false
;
}
if
(
ScaleBlockN
%
NPerBlock
!=
0
||
ScaleBlockK
!=
KPerBlock
)
// if(ScaleBlockN % NPerBlock != 0 || ScaleBlockK != KPerBlock)
if
(
ScaleBlockK
!=
KPerBlock
)
{
printf
(
"Return 1
\n
"
);
return
false
;
...
...
include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_v3_multi_d_b_scale.hpp
View file @
bb10822b
...
...
@@ -1359,15 +1359,15 @@ struct GridwiseGemmMultiD_BScale_xdl_cshuffle_v3
(
a_grid_desc_ak0_m_ak1
.
GetLength
(
I0
)
*
a_grid_desc_ak0_m_ak1
.
GetLength
(
I2
))
/
KPerBlock
);
const
index_t
ScaleSliceSizeM
=
1
;
const
index_t
ScaleSliceSizeN
=
1
;
//
const index_t ScaleSliceSizeM = 1;
const
index_t
ScaleSliceSizeN
=
NXdlPerWave
;
const
index_t
ScaleSliceSizeK
=
1
;
// constexpr auto a_scale_thread_desc = make_naive_tensor_descriptor_packed(
// make_tuple(Number<ScaleSliceSizeM>{}, Number<ScaleSliceSizeK>{}));
constexpr
auto
b_scale_thread_desc
=
make_naive_tensor_descriptor_packed
(
make_tuple
(
Number
<
ScaleSliceSize
M
>
{},
Number
<
ScaleSliceSizeK
>
{}));
make_tuple
(
Number
<
ScaleSliceSize
N
>
{},
Number
<
ScaleSliceSizeK
>
{}));
// auto a_scale_thread_copy =
// ThreadwiseTensorSliceTransfer_v2<AScaleType,
...
...
include/ck/utility/data_type.hpp
View file @
bb10822b
...
...
@@ -12,6 +12,7 @@ using half_t = _Float16;
using
int4_t
=
_BitInt
(
4
);
using
f8_t
=
_BitInt
(
8
);
using
bf8_t
=
unsigned
_BitInt
(
8
);
using
pk_i4_t
=
unsigned
char
;
inline
constexpr
auto
next_pow2
(
uint32_t
x
)
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment