Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
966f9051
Commit
966f9051
authored
Feb 18, 2025
by
mtgu0705
Browse files
fixed merge issue. fp8xint4 and fp8xint4_bpreshuffle function pass.
parent
49bac8ce
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
33 additions
and
122 deletions
+33
-122
include/ck/tensor_operation/gpu/block/blockwise_gemm_pipeline_xdlops_b_preshuffle_selector.hpp
.../blockwise_gemm_pipeline_xdlops_b_preshuffle_selector.hpp
+0
-9
include/ck/tensor_operation/gpu/block/blockwise_gemm_pipeline_xdlops_base.hpp
...eration/gpu/block/blockwise_gemm_pipeline_xdlops_base.hpp
+0
-11
include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer.hpp
...operation/gpu/thread/threadwise_tensor_slice_transfer.hpp
+33
-102
No files found.
include/ck/tensor_operation/gpu/block/blockwise_gemm_pipeline_xdlops_b_preshuffle_selector.hpp
View file @
966f9051
...
@@ -9,15 +9,6 @@
...
@@ -9,15 +9,6 @@
namespace
ck
{
namespace
ck
{
enum
struct
BlockGemmPipelineVersion
{
v1
,
// Naive
v2
,
// Mem
v3
,
// Comp
v4
,
// Comp, double lds buffer
v5
,
// Comp, double global prefetch register buffer
};
template
<
BlockGemmPipelineVersion
BlkGemmPipelineVer
,
template
<
BlockGemmPipelineVersion
BlkGemmPipelineVer
,
BlockGemmPipelineScheduler
BlkGemmPipeSche
,
BlockGemmPipelineScheduler
BlkGemmPipeSche
,
index_t
BlockSize
,
index_t
BlockSize
,
...
...
include/ck/tensor_operation/gpu/block/blockwise_gemm_pipeline_xdlops_base.hpp
View file @
966f9051
...
@@ -113,17 +113,6 @@ struct BlockwiseGemmXdlops_pipeline_base
...
@@ -113,17 +113,6 @@ struct BlockwiseGemmXdlops_pipeline_base
return
make_tuple
(
0
,
waveId_m
,
xdlops_a_idx
[
I1
],
KPerThread
*
xdlops_a_idx
[
I0
]);
return
make_tuple
(
0
,
waveId_m
,
xdlops_a_idx
[
I1
],
KPerThread
*
xdlops_a_idx
[
I0
]);
}
}
__device__
static
auto
CalculateAThreadOriginDataIndex6D
()
{
const
auto
wave_idx
=
GetWaveIdx
();
const
auto
waveId_m
=
wave_idx
[
I0
];
const
auto
xdlops_a_idx
=
xdlops_gemm
.
CalculateAThreadOriginDataIndex
();
return
make_tuple
(
0
,
waveId_m
,
xdlops_a_idx
[
I1
],
0
,
xdlops_a_idx
[
I0
],
0
);
}
__device__
static
auto
CalculateAThreadOriginDataIndex6D
()
__device__
static
auto
CalculateAThreadOriginDataIndex6D
()
{
{
...
...
include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer.hpp
View file @
966f9051
...
@@ -286,118 +286,49 @@ struct ThreadwiseTensorSliceTransfer_v2
...
@@ -286,118 +286,49 @@ struct ThreadwiseTensorSliceTransfer_v2
// loop over tensor and copy
// loop over tensor and copy
constexpr
auto
num_access
=
SpaceFillingCurve
::
GetNumOfAccess
();
constexpr
auto
num_access
=
SpaceFillingCurve
::
GetNumOfAccess
();
static_for
<
0
,
num_access
,
1
>
{}([
&
](
auto
idx_1d
)
{
typename
vector_type_maker
<
SrcData
,
SrcScalarPerVector
/
PackedSize
>::
type
src_vector
;
#if 0
using
src_vector_t
=
if constexpr(is_same<remove_cvref_t<SrcData>, pk_i4_t>::value)
typename
vector_type_maker
<
SrcData
,
SrcScalarPerVector
/
PackedSize
>::
type
::
type
;
{
constexpr
auto
src_data_idx
=
SpaceFillingCurve
::
GetIndex
(
idx_1d
);
static_for<0, num_access, 1>{}([&](auto idx_1d) {
typename vector_type_maker<SrcData, SrcScalarPerVector / PackedSize>::type src_tmp_vector;
using src_vector_t = typename decltype(src_tmp_vector)::type;
constexpr auto src_data_idx = SpaceFillingCurve::GetIndex(idx_1d);
const bool is_src_valid =
coordinate_has_valid_offset_assuming_visible_index_is_valid(src_desc, src_coord_);
// copy data from src_buf into src_tmp_vector
src_tmp_vector.template AsType<src_vector_t>()(Number<0>{}) =
src_buf.template Get<src_vector_t>(src_coord_.GetOffset() / PackedSize,
is_src_valid);
// copy data from src_tmp_vector to dst_tmp_vector (data cast data from SrcData to
// DstData)
vector_type_maker_t<DstData, SrcScalarPerVector> dst_tmp_vector;
constexpr index_t pack_size = 8;
static_assert(SrcScalarPerVector % pack_size == 0, "");
using src_v_t = typename vector_type_maker_t<SrcData, pack_size / PackedSize>::type;
using dst_v_t = typename vector_type_maker_t<DstData, pack_size>::type;
static_for<0, SrcScalarPerVector / pack_size, 1>{}([&](auto i) {
const
bool
is_src_valid
=
ck::tensor_operation::element_wise::PassThroughPack8{}(
coordinate_has_valid_offset_assuming_visible_index_is_valid
(
src_desc
,
src_coord_
);
dst_tmp_vector.template AsType<dst_v_t>()(i),
src_tmp_vector.template AsType<src_v_t>()[i]);
});
// copy data from dst_tmp_vector into dst_buf
// copy data from src_buf into src_vector
static_for<0, SrcScalarPerVector, 1>{}([&](auto i) {
src_vector
.
template
AsType
<
src_vector_t
>()(
Number
<
0
>
{})
=
constexpr index_t dst_offset =
src_buf
.
template
Get
<
src_vector_t
>(
src_coord_
.
GetOffset
()
/
PackedSize
,
is_src_valid
);
dst_desc.CalculateOffset(to_multi_index(dst_slice_origin_idx) + src_data_idx +
i * src_scalar_step_in_vector);
if constexpr(InvalidElementAsNaN)
// copy data from src_vector into dst_buf
{
static_for
<
0
,
SrcScalarPerVector
/
PackedSize
,
1
>
{}([
&
](
auto
i
)
{
dst_buf(Number<dst_offset>{}) =
constexpr
index_t
dst_offset
=
is_src_valid
dst_desc
.
CalculateOffset
(
to_multi_index
(
dst_slice_origin_idx
)
+
src_data_idx
+
? dst_tmp_vector.template AsType<DstData>()[i]
i
*
src_scalar_step_in_vector
);
: NumericLimits<DstData>::QuietNaN();
}
else
{
dst_buf(Number<dst_offset>{}) =
dst_tmp_vector.template AsType<DstData>()[i];
// type_convert<DstData>(src_vector.template AsType<SrcData>()[i]);
}
});
if constexpr(
idx_1d.value != num_access - 1
)
if
constexpr
(
InvalidElementAsNaN
)
{
{
constexpr auto forward_step = SpaceFillingCurve::GetForwardStep(idx_1d);
dst_buf
(
Number
<
dst_offset
>
{})
=
is_src_valid
move_tensor_coordinate(
?
type_convert
<
DstData
>
(
src_vector
.
template
AsType
<
SrcData
>()[
i
])
src_desc, src_coord_, make_tensor_coordinate_step(src_desc, forward_step)
);
:
NumericLimits
<
DstData
>::
QuietNaN
(
);
}
}
});
else
}
else
#endif
{
static_for
<
0
,
num_access
,
1
>
{}([
&
](
auto
idx_1d
)
{
typename
vector_type_maker
<
SrcData
,
SrcScalarPerVector
/
PackedSize
>::
type
src_vector
;
using
src_vector_t
=
typename
vector_type_maker
<
SrcData
,
SrcScalarPerVector
/
PackedSize
>::
type
::
type
;
constexpr
auto
src_data_idx
=
SpaceFillingCurve
::
GetIndex
(
idx_1d
);
const
bool
is_src_valid
=
coordinate_has_valid_offset_assuming_visible_index_is_valid
(
src_desc
,
src_coord_
);
// copy data from src_buf into src_vector
src_vector
.
template
AsType
<
src_vector_t
>()(
Number
<
0
>
{})
=
src_buf
.
template
Get
<
src_vector_t
>(
src_coord_
.
GetOffset
()
/
PackedSize
,
is_src_valid
);
// copy data from src_vector into dst_buf
static_for
<
0
,
SrcScalarPerVector
/
PackedSize
,
1
>
{}([
&
](
auto
i
)
{
constexpr
index_t
dst_offset
=
dst_desc
.
CalculateOffset
(
to_multi_index
(
dst_slice_origin_idx
)
+
src_data_idx
+
i
*
src_scalar_step_in_vector
);
if
constexpr
(
InvalidElementAsNaN
)
{
dst_buf
(
Number
<
dst_offset
>
{})
=
is_src_valid
?
type_convert
<
DstData
>
(
src_vector
.
template
AsType
<
SrcData
>()[
i
])
:
NumericLimits
<
DstData
>::
QuietNaN
();
}
else
{
dst_buf
(
Number
<
dst_offset
>
{})
=
type_convert
<
DstData
>
(
src_vector
.
template
AsType
<
SrcData
>()[
i
]);
}
});
if
constexpr
(
idx_1d
.
value
!=
num_access
-
1
)
{
{
constexpr
auto
forward_step
=
SpaceFillingCurve
::
GetForwardStep
(
idx_1d
);
dst_buf
(
Number
<
dst_offset
>
{})
=
type_convert
<
DstData
>
(
src_vector
.
template
AsType
<
SrcData
>()[
i
]);
move_tensor_coordinate
(
src_desc
,
src_coord_
,
make_tensor_coordinate_step
(
src_desc
,
forward_step
));
}
}
});
});
}
if
constexpr
(
idx_1d
.
value
!=
num_access
-
1
)
{
constexpr
auto
forward_step
=
SpaceFillingCurve
::
GetForwardStep
(
idx_1d
);
move_tensor_coordinate
(
src_desc
,
src_coord_
,
make_tensor_coordinate_step
(
src_desc
,
forward_step
));
}
});
// move src coordinate back to slice origin (or not)
// move src coordinate back to slice origin (or not)
if
constexpr
(
SrcResetCoordinateAfterRun
)
if
constexpr
(
SrcResetCoordinateAfterRun
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment