Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
0a7174ad
Unverified
Commit
0a7174ad
authored
Nov 21, 2023
by
Chao Liu
Committed by
GitHub
Nov 21, 2023
Browse files
Merge with (not the latest) upstream CK (#32)
* fix build for old ck examples * fix build for old ck
parent
496be40e
Changes
31
Hide whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
81 additions
and
118 deletions
+81
-118
include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v6r1r2.hpp
...on/gpu/thread/threadwise_tensor_slice_transfer_v6r1r2.hpp
+12
-26
include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v6r2.hpp
...tion/gpu/thread/threadwise_tensor_slice_transfer_v6r2.hpp
+15
-36
include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v6r3.hpp
...tion/gpu/thread/threadwise_tensor_slice_transfer_v6r3.hpp
+20
-48
include/ck/tile_program/tile/tile_window_impl_static_distribution.hpp
...ile_program/tile/tile_window_impl_static_distribution.hpp
+1
-0
include/ck/utility/array_multi_index.hpp
include/ck/utility/array_multi_index.hpp
+16
-0
include/ck/utility/buffer_view_impl_global.hpp
include/ck/utility/buffer_view_impl_global.hpp
+1
-1
include/ck/utility/container_helper.hpp
include/ck/utility/container_helper.hpp
+1
-1
include/ck/utility/data_type.hpp
include/ck/utility/data_type.hpp
+8
-2
include/ck/utility/magic_division.hpp
include/ck/utility/magic_division.hpp
+5
-4
library/include/ck/library/tensor_operation_instance/add_device_operation_instance.hpp
...nsor_operation_instance/add_device_operation_instance.hpp
+1
-0
test/block_to_ctile_map/test_block_to_ctile_map.cpp
test/block_to_ctile_map/test_block_to_ctile_map.cpp
+1
-0
No files found.
include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v6r1r2.hpp
View file @
0a7174ad
...
...
@@ -94,7 +94,7 @@ struct ThreadwiseTensorSliceTransfer_v6r1r2
using
dst_vector_t
=
typename
dst_vector_type
::
type
;
const
bool
is_src_valid
=
coordinate_has_valid_offset_assuming_
visible
_index_is_valid
(
src_desc
,
src_coord_
);
coordinate_has_valid_offset_assuming_
top
_index_is_valid
(
src_desc
,
src_coord_
);
// copy data from src_buf into src_vector_container
auto
src_vector_container
=
src_vector_type
{
...
...
@@ -114,7 +114,7 @@ struct ThreadwiseTensorSliceTransfer_v6r1r2
});
const
bool
is_dst_valid
=
coordinate_has_valid_offset_assuming_
visible
_index_is_valid
(
dst_desc
,
dst_coord_
);
coordinate_has_valid_offset_assuming_
top
_index_is_valid
(
dst_desc
,
dst_coord_
);
// copy data from dst_vector into dst_buf
dst_buf
.
template
Update
<
DstInMemOp
,
dst_vector_t
>(
...
...
@@ -126,28 +126,20 @@ struct ThreadwiseTensorSliceTransfer_v6r1r2
if
constexpr
(
idx_1d
.
value
!=
num_access
-
1
)
{
constexpr
auto
forward_step
=
SpaceFillingCurve
::
GetForwardStep
(
idx_1d
);
move_tensor_coordinate
(
src_desc
,
src_coord_
,
make_tensor_coordinate_step
(
src_desc
,
forward_step
));
move_tensor_coordinate
(
dst_desc
,
dst_coord_
,
make_tensor_coordinate_step
(
dst_desc
,
forward_step
));
move_tensor_coordinate
(
src_desc
,
src_coord_
,
forward_step
);
move_tensor_coordinate
(
dst_desc
,
dst_coord_
,
forward_step
);
}
});
// move coordinate back to slice origin (or not)
if
constexpr
(
SrcResetCoordinateAfterRun
)
{
const
auto
src_reset_step
=
make_tensor_coordinate_step
(
src_desc
,
GetCoordinateResetStep
());
move_tensor_coordinate
(
src_desc
,
src_coord_
,
src_reset_step
);
move_tensor_coordinate
(
src_desc
,
src_coord_
,
GetCoordinateResetStep
());
}
if
constexpr
(
DstResetCoordinateAfterRun
)
{
const
auto
dst_reset_step
=
make_tensor_coordinate_step
(
dst_desc
,
GetCoordinateResetStep
());
move_tensor_coordinate
(
dst_desc
,
dst_coord_
,
dst_reset_step
);
move_tensor_coordinate
(
dst_desc
,
dst_coord_
,
GetCoordinateResetStep
());
}
}
...
...
@@ -179,12 +171,9 @@ struct ThreadwiseTensorSliceTransfer_v6r1r2
const
Index
&
src_slice_origin_step_idx
)
{
// if src coord was not reset by RunRead(), then need to adjust the step here
const
auto
adjusted_step_idx
=
SrcResetCoordinateAfterRun
?
src_slice_origin_step_idx
:
src_slice_origin_step_idx
+
GetCoordinateResetStep
();
// is it OK to construct a new step every time?
const
auto
adjusted_step
=
make_tensor_coordinate_step
(
src_desc
,
adjusted_step_idx
);
const
auto
adjusted_step
=
SrcResetCoordinateAfterRun
?
src_slice_origin_step_idx
:
src_slice_origin_step_idx
+
GetCoordinateResetStep
();
move_tensor_coordinate
(
src_desc
,
src_coord_
,
adjusted_step
);
}
...
...
@@ -194,12 +183,9 @@ struct ThreadwiseTensorSliceTransfer_v6r1r2
const
Index
&
dst_slice_origin_step_idx
)
{
// if dst coord was not reset by Run(), then need to adjust the step here
const
auto
adjusted_step_idx
=
DstResetCoordinateAfterRun
?
dst_slice_origin_step_idx
:
dst_slice_origin_step_idx
+
GetCoordinateResetStep
();
// is it OK to construct a new step every time?
const
auto
adjusted_step
=
make_tensor_coordinate_step
(
dst_desc
,
adjusted_step_idx
);
const
auto
adjusted_step
=
DstResetCoordinateAfterRun
?
dst_slice_origin_step_idx
:
dst_slice_origin_step_idx
+
GetCoordinateResetStep
();
move_tensor_coordinate
(
dst_desc
,
dst_coord_
,
adjusted_step
);
}
...
...
include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v6r2.hpp
View file @
0a7174ad
...
...
@@ -147,38 +147,26 @@ struct ThreadwiseTensorSliceTransfer_v6r2
if
constexpr
(
idx_1d
.
value
!=
num_access
-
1
)
{
constexpr
auto
forward_step
=
SpaceFillingCurve
::
GetForwardStep
(
idx_1d
);
move_tensor_coordinate
(
src0_desc
,
src0_coord_
,
make_tensor_coordinate_step
(
src0_desc
,
forward_step
));
move_tensor_coordinate
(
src1_desc
,
src1_coord_
,
make_tensor_coordinate_step
(
src1_desc
,
forward_step
));
move_tensor_coordinate
(
dst_desc
,
dst_coord_
,
make_tensor_coordinate_step
(
dst_desc
,
forward_step
));
move_tensor_coordinate
(
src0_desc
,
src0_coord_
,
forward_step
);
move_tensor_coordinate
(
src1_desc
,
src1_coord_
,
forward_step
);
move_tensor_coordinate
(
dst_desc
,
dst_coord_
,
forward_step
);
}
});
// move coordinate back to slice origin (or not)
if
constexpr
(
Src0ResetCoordinateAfterRun
)
{
const
auto
src0_reset_step
=
make_tensor_coordinate_step
(
src0_desc
,
GetCoordinateResetStep
());
move_tensor_coordinate
(
src0_desc
,
src0_coord_
,
src0_reset_step
);
move_tensor_coordinate
(
src0_desc
,
src0_coord_
,
GetCoordinateResetStep
());
}
if
constexpr
(
Src1ResetCoordinateAfterRun
)
{
const
auto
src1_reset_step
=
make_tensor_coordinate_step
(
src1_desc
,
GetCoordinateResetStep
());
move_tensor_coordinate
(
src1_desc
,
src1_coord_
,
src1_reset_step
);
move_tensor_coordinate
(
src1_desc
,
src1_coord_
,
GetCoordinateResetStep
());
}
if
constexpr
(
DstResetCoordinateAfterRun
)
{
const
auto
dst_reset_step
=
make_tensor_coordinate_step
(
dst_desc
,
GetCoordinateResetStep
());
move_tensor_coordinate
(
dst_desc
,
dst_coord_
,
dst_reset_step
);
move_tensor_coordinate
(
dst_desc
,
dst_coord_
,
GetCoordinateResetStep
());
}
}
...
...
@@ -210,12 +198,9 @@ struct ThreadwiseTensorSliceTransfer_v6r2
const
Index
&
src0_slice_origin_step_idx
)
{
// if src coord was not reset by RunRead(), then need to adjust the step here
const
auto
adjusted_step_idx
=
Src0ResetCoordinateAfterRun
?
src0_slice_origin_step_idx
:
src0_slice_origin_step_idx
+
GetCoordinateResetStep
();
// is it OK to construct a new step every time?
const
auto
adjusted_step
=
make_tensor_coordinate_step
(
src0_desc
,
adjusted_step_idx
);
const
auto
adjusted_step
=
Src0ResetCoordinateAfterRun
?
src0_slice_origin_step_idx
:
src0_slice_origin_step_idx
+
GetCoordinateResetStep
();
move_tensor_coordinate
(
src0_desc
,
src0_coord_
,
adjusted_step
);
}
...
...
@@ -225,12 +210,9 @@ struct ThreadwiseTensorSliceTransfer_v6r2
const
Index
&
src1_slice_origin_step_idx
)
{
// if src coord was not reset by RunRead(), then need to adjust the step here
const
auto
adjusted_step_idx
=
Src1ResetCoordinateAfterRun
?
src1_slice_origin_step_idx
:
src1_slice_origin_step_idx
+
GetCoordinateResetStep
();
// is it OK to construct a new step every time?
const
auto
adjusted_step
=
make_tensor_coordinate_step
(
src1_desc
,
adjusted_step_idx
);
const
auto
adjusted_step
=
Src1ResetCoordinateAfterRun
?
src1_slice_origin_step_idx
:
src1_slice_origin_step_idx
+
GetCoordinateResetStep
();
move_tensor_coordinate
(
src1_desc
,
src1_coord_
,
adjusted_step
);
}
...
...
@@ -240,12 +222,9 @@ struct ThreadwiseTensorSliceTransfer_v6r2
const
Index
&
dst_slice_origin_step_idx
)
{
// if dst coord was not reset by Run(), then need to adjust the step here
const
auto
adjusted_step_idx
=
DstResetCoordinateAfterRun
?
dst_slice_origin_step_idx
:
dst_slice_origin_step_idx
+
GetCoordinateResetStep
();
// is it OK to construct a new step every time?
const
auto
adjusted_step
=
make_tensor_coordinate_step
(
dst_desc
,
adjusted_step_idx
);
const
auto
adjusted_step
=
DstResetCoordinateAfterRun
?
dst_slice_origin_step_idx
:
dst_slice_origin_step_idx
+
GetCoordinateResetStep
();
move_tensor_coordinate
(
dst_desc
,
dst_coord_
,
adjusted_step
);
}
...
...
include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v6r3.hpp
View file @
0a7174ad
...
...
@@ -171,48 +171,32 @@ struct ThreadwiseTensorSliceTransfer_v6r3
if
constexpr
(
idx_1d
.
value
!=
num_access
-
1
)
{
constexpr
auto
forward_step
=
SpaceFillingCurve
::
GetForwardStep
(
idx_1d
);
move_tensor_coordinate
(
src0_desc
,
src0_coord_
,
make_tensor_coordinate_step
(
src0_desc
,
forward_step
));
move_tensor_coordinate
(
src1_desc
,
src1_coord_
,
make_tensor_coordinate_step
(
src1_desc
,
forward_step
));
move_tensor_coordinate
(
src2_desc
,
src2_coord_
,
make_tensor_coordinate_step
(
src2_desc
,
forward_step
));
move_tensor_coordinate
(
dst_desc
,
dst_coord_
,
make_tensor_coordinate_step
(
dst_desc
,
forward_step
));
move_tensor_coordinate
(
src0_desc
,
src0_coord_
,
forward_step
);
move_tensor_coordinate
(
src1_desc
,
src1_coord_
,
forward_step
);
move_tensor_coordinate
(
src2_desc
,
src2_coord_
,
forward_step
);
move_tensor_coordinate
(
dst_desc
,
dst_coord_
,
forward_step
);
}
});
// move coordinate back to slice origin (or not)
if
constexpr
(
Src0ResetCoordinateAfterRun
)
{
const
auto
src0_reset_step
=
make_tensor_coordinate_step
(
src0_desc
,
GetCoordinateResetStep
());
move_tensor_coordinate
(
src0_desc
,
src0_coord_
,
src0_reset_step
);
move_tensor_coordinate
(
src0_desc
,
src0_coord_
,
GetCoordinateResetStep
());
}
if
constexpr
(
Src1ResetCoordinateAfterRun
)
{
const
auto
src1_reset_step
=
make_tensor_coordinate_step
(
src1_desc
,
GetCoordinateResetStep
());
move_tensor_coordinate
(
src1_desc
,
src1_coord_
,
src1_reset_step
);
move_tensor_coordinate
(
src1_desc
,
src1_coord_
,
GetCoordinateResetStep
());
}
if
constexpr
(
Src2ResetCoordinateAfterRun
)
{
const
auto
src2_reset_step
=
make_tensor_coordinate_step
(
src2_desc
,
GetCoordinateResetStep
());
move_tensor_coordinate
(
src2_desc
,
src2_coord_
,
src2_reset_step
);
move_tensor_coordinate
(
src2_desc
,
src2_coord_
,
GetCoordinateResetStep
());
}
if
constexpr
(
DstResetCoordinateAfterRun
)
{
const
auto
dst_reset_step
=
make_tensor_coordinate_step
(
dst_desc
,
GetCoordinateResetStep
());
move_tensor_coordinate
(
dst_desc
,
dst_coord_
,
dst_reset_step
);
move_tensor_coordinate
(
dst_desc
,
dst_coord_
,
GetCoordinateResetStep
());
}
}
...
...
@@ -244,12 +228,9 @@ struct ThreadwiseTensorSliceTransfer_v6r3
const
Index
&
src0_slice_origin_step_idx
)
{
// if src coord was not reset by RunRead(), then need to adjust the step here
const
auto
adjusted_step_idx
=
Src0ResetCoordinateAfterRun
?
src0_slice_origin_step_idx
:
src0_slice_origin_step_idx
+
GetCoordinateResetStep
();
// is it OK to construct a new step every time?
const
auto
adjusted_step
=
make_tensor_coordinate_step
(
src0_desc
,
adjusted_step_idx
);
const
auto
adjusted_step
=
Src0ResetCoordinateAfterRun
?
src0_slice_origin_step_idx
:
src0_slice_origin_step_idx
+
GetCoordinateResetStep
();
move_tensor_coordinate
(
src0_desc
,
src0_coord_
,
adjusted_step
);
}
...
...
@@ -259,12 +240,9 @@ struct ThreadwiseTensorSliceTransfer_v6r3
const
Index
&
src1_slice_origin_step_idx
)
{
// if src coord was not reset by RunRead(), then need to adjust the step here
const
auto
adjusted_step_idx
=
Src1ResetCoordinateAfterRun
?
src1_slice_origin_step_idx
:
src1_slice_origin_step_idx
+
GetCoordinateResetStep
();
// is it OK to construct a new step every time?
const
auto
adjusted_step
=
make_tensor_coordinate_step
(
src1_desc
,
adjusted_step_idx
);
const
auto
adjusted_step
=
Src1ResetCoordinateAfterRun
?
src1_slice_origin_step_idx
:
src1_slice_origin_step_idx
+
GetCoordinateResetStep
();
move_tensor_coordinate
(
src1_desc
,
src1_coord_
,
adjusted_step
);
}
...
...
@@ -274,12 +252,9 @@ struct ThreadwiseTensorSliceTransfer_v6r3
const
Index
&
src2_slice_origin_step_idx
)
{
// if src coord was not reset by RunRead(), then need to adjust the step here
const
auto
adjusted_step_idx
=
Src2ResetCoordinateAfterRun
?
src2_slice_origin_step_idx
:
src2_slice_origin_step_idx
+
GetCoordinateResetStep
();
// is it OK to construct a new step every time?
const
auto
adjusted_step
=
make_tensor_coordinate_step
(
src2_desc
,
adjusted_step_idx
);
const
auto
adjusted_step
=
Src2ResetCoordinateAfterRun
?
src2_slice_origin_step_idx
:
src2_slice_origin_step_idx
+
GetCoordinateResetStep
();
move_tensor_coordinate
(
src2_desc
,
src2_coord_
,
adjusted_step
);
}
...
...
@@ -289,12 +264,9 @@ struct ThreadwiseTensorSliceTransfer_v6r3
const
Index
&
dst_slice_origin_step_idx
)
{
// if dst coord was not reset by Run(), then need to adjust the step here
const
auto
adjusted_step_idx
=
DstResetCoordinateAfterRun
?
dst_slice_origin_step_idx
:
dst_slice_origin_step_idx
+
GetCoordinateResetStep
();
// is it OK to construct a new step every time?
const
auto
adjusted_step
=
make_tensor_coordinate_step
(
dst_desc
,
adjusted_step_idx
);
const
auto
adjusted_step
=
DstResetCoordinateAfterRun
?
dst_slice_origin_step_idx
:
dst_slice_origin_step_idx
+
GetCoordinateResetStep
();
move_tensor_coordinate
(
dst_desc
,
dst_coord_
,
adjusted_step
);
}
...
...
include/ck/tile_program/tile/tile_window_impl_static_distribution.hpp
View file @
0a7174ad
...
...
@@ -6,6 +6,7 @@
#include "ck/utility/common_header.hpp"
#include "ck/tensor_description/tensor_adaptor.hpp"
#include "ck/tensor_description/tensor_adaptor_coordinate.hpp"
#include "ck/tensor_description/tensor_space_filling_curve.hpp"
#include "ck/tile_program/tile/tile_distribution.hpp"
#include "ck/tile_program/tile/static_tile_distribution_helper.hpp"
...
...
include/ck/utility/array_multi_index.hpp
View file @
0a7174ad
...
...
@@ -75,4 +75,20 @@ __host__ __device__ constexpr auto operator*(const MultiIndex<NSize>& a, const T
return
r
;
}
// MultiIndex = index_t * MultiIndex
template
<
index_t
NSize
>
__host__
__device__
constexpr
auto
operator
*
(
index_t
a
,
const
MultiIndex
<
NSize
>&
x
)
{
MultiIndex
<
NSize
>
r
;
static_for
<
0
,
NSize
,
1
>
{}([
&
](
auto
i
)
{
r
(
i
)
=
a
*
x
[
i
];
});
return
r
;
}
// MultiIndex = MultiIndex * index_t
template
<
index_t
NSize
>
__host__
__device__
constexpr
auto
operator
*
(
const
MultiIndex
<
NSize
>&
x
,
index_t
a
)
{
return
a
*
x
;
}
}
// namespace ck
include/ck/utility/buffer_view_impl_global.hpp
View file @
0a7174ad
...
...
@@ -239,7 +239,7 @@ struct BufferView<AddressSpaceEnum::Global,
{
constexpr
index_t
t_per_x
=
scalar_per_x_vector
/
scalar_per_t_vector
;
amd_buffer_atomic_add
<
remove_cvref_t
<
T
>
,
t_per_x
,
Coherence
>
(
amd_buffer_atomic_add
<
remove_cvref_t
<
T
>
,
t_per_x
>
(
x
,
p_data_
,
i
,
is_valid_element
,
buffer_size_
);
}
else
...
...
include/ck/utility/container_helper.hpp
View file @
0a7174ad
...
...
@@ -471,7 +471,7 @@ __host__ __device__ constexpr auto sequence_to_tuple_of_number(Sequence<Is...>)
constexpr
index_t
tmp
=
Seq
::
At
(
i
);
return
Number
<
tmp
>
{};
},
Seq
::
Size
());
Number
<
Seq
::
Size
()
>
{}
);
}
}
// namespace ck
include/ck/utility/data_type.hpp
View file @
0a7174ad
...
...
@@ -9,6 +9,7 @@
namespace
ck
{
using
int64_t
=
long
;
using
bhalf_t
=
ushort
;
using
half_t
=
_Float16
;
#ifdef CK_EXPERIMENTAL_BIT_INT_EXTENSION_INT4
...
...
@@ -122,6 +123,13 @@ struct scalar_type<bhalf_t>
static
constexpr
index_t
vector_size
=
1
;
};
template
<
>
struct
scalar_type
<
int64_t
>
{
using
type
=
int64_t
;
static
constexpr
index_t
vector_size
=
1
;
};
template
<
>
struct
scalar_type
<
int32_t
>
{
...
...
@@ -908,8 +916,6 @@ struct vector_type<T, 256>
}
};
using
int64_t
=
long
;
// fp64
using
double2_t
=
typename
vector_type
<
double
,
2
>::
type
;
using
double4_t
=
typename
vector_type
<
double
,
4
>::
type
;
...
...
include/ck/utility/magic_division.hpp
View file @
0a7174ad
...
...
@@ -4,10 +4,11 @@
#pragma once
#include "ck/ck.hpp"
#include "integral_constant.hpp"
#include "number.hpp"
#include "type.hpp"
#include "tuple.hpp"
#include "ck/utility/integral_constant.hpp"
#include "ck/utility/number.hpp"
#include "ck/utility/type.hpp"
#include "ck/utility/tuple.hpp"
#include "ck/utility/bit_cast.hpp"
namespace
ck
{
...
...
library/include/ck/library/tensor_operation_instance/add_device_operation_instance.hpp
View file @
0a7174ad
...
...
@@ -7,6 +7,7 @@
#include <type_traits>
#include "ck/utility/functional2.hpp"
#include "ck/utility/remove_cvref.hpp"
namespace
ck
{
namespace
tensor_operation
{
...
...
test/block_to_ctile_map/test_block_to_ctile_map.cpp
View file @
0a7174ad
...
...
@@ -6,6 +6,7 @@
#include <gtest/gtest.h>
#include "ck/ck.hpp"
#include "ck/tensor_description/tensor_descriptor_helper.hpp"
#include "ck/tensor_operation/gpu/grid/block_to_ctile_map.hpp"
using
namespace
ck
;
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment