Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
c1e24c09
"...composable_kernel_rocm.git" did not exist on "7a7fe160866b7b2893be698d77b70cc8cf754fb5"
Commit
c1e24c09
authored
Dec 11, 2020
by
Jing Zhang
Browse files
clean code
parent
7abc0752
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
35 additions
and
95 deletions
+35
-95
composable_kernel/include/tensor_operation/blockwise_generic_tensor_slice_copy_v2.hpp
...nsor_operation/blockwise_generic_tensor_slice_copy_v2.hpp
+13
-15
composable_kernel/include/tensor_operation/threadwise_generic_tensor_slice_copy_v2.hpp
...sor_operation/threadwise_generic_tensor_slice_copy_v2.hpp
+22
-80
No files found.
composable_kernel/include/tensor_operation/blockwise_generic_tensor_slice_copy_v2.hpp
View file @
c1e24c09
...
@@ -85,7 +85,7 @@ struct BlockwiseGenericTensorSliceCopy_v5
...
@@ -85,7 +85,7 @@ struct BlockwiseGenericTensorSliceCopy_v5
if
(
BlockSize
==
mThreadClusterDesc
.
GetElementSize
()
or
if
(
BlockSize
==
mThreadClusterDesc
.
GetElementSize
()
or
get_thread_local_1d_id
()
<
mThreadClusterDesc
.
GetElementSize
())
get_thread_local_1d_id
()
<
mThreadClusterDesc
.
GetElementSize
())
{
{
mThreadwiseCopy
.
Load
(
p_block_src
,
p_thread_buffer
);
mThreadwiseCopy
.
Load
(
p_block_src
);
}
}
}
}
...
@@ -96,7 +96,7 @@ struct BlockwiseGenericTensorSliceCopy_v5
...
@@ -96,7 +96,7 @@ struct BlockwiseGenericTensorSliceCopy_v5
if
(
BlockSize
==
mThreadClusterDesc
.
GetElementSize
()
or
if
(
BlockSize
==
mThreadClusterDesc
.
GetElementSize
()
or
get_thread_local_1d_id
()
<
mThreadClusterDesc
.
GetElementSize
())
get_thread_local_1d_id
()
<
mThreadClusterDesc
.
GetElementSize
())
{
{
mThreadwiseCopy
.
Store
(
p_thread_buffer
,
p_block_dst
);
mThreadwiseCopy
.
Store
(
p_block_dst
);
}
}
}
}
...
@@ -115,8 +115,6 @@ struct BlockwiseGenericTensorSliceCopy_v5
...
@@ -115,8 +115,6 @@ struct BlockwiseGenericTensorSliceCopy_v5
get_thread_local_1d_id
()
<
mThreadClusterDesc
.
GetElementSize
())
get_thread_local_1d_id
()
<
mThreadClusterDesc
.
GetElementSize
())
{
{
RunLoadThreadBuffer
(
p_block_src
,
p_thread_buffer
);
RunLoadThreadBuffer
(
p_block_src
,
p_thread_buffer
);
// if there is type conversion, it's done during store
RunStoreThreadBuffer
(
p_thread_buffer
,
p_block_dst
);
RunStoreThreadBuffer
(
p_thread_buffer
,
p_block_dst
);
}
}
}
}
...
...
composable_kernel/include/tensor_operation/threadwise_generic_tensor_slice_copy_v2.hpp
View file @
c1e24c09
...
@@ -75,8 +75,8 @@ struct ThreadwiseGenericTensorSliceCopy_v5
...
@@ -75,8 +75,8 @@ struct ThreadwiseGenericTensorSliceCopy_v5
mDstSliceOrigin
=
dst_slice_origin
;
mDstSliceOrigin
=
dst_slice_origin
;
}
}
template
<
typename
SrcData
,
typename
DstData
>
template
<
typename
SrcData
>
__device__
void
Load
(
const
SrcData
*
p_src
,
DstData
*
p_dst
)
__device__
void
Load
(
const
SrcData
*
p_src
)
{
{
constexpr
auto
vector_access_dim
=
Number
<
SrcDstVectorReadWriteDim
>
{};
constexpr
auto
vector_access_dim
=
Number
<
SrcDstVectorReadWriteDim
>
{};
...
@@ -92,18 +92,17 @@ struct ThreadwiseGenericTensorSliceCopy_v5
...
@@ -92,18 +92,17 @@ struct ThreadwiseGenericTensorSliceCopy_v5
ford
<
decltype
(
long_vector_access_lengths
),
SrcDstDimAccessOrder
>
{}(
ford
<
decltype
(
long_vector_access_lengths
),
SrcDstDimAccessOrder
>
{}(
[
&
](
auto
long_vector_access_id
)
{
[
&
](
auto
long_vector_access_id
)
{
// data id w.r.t slicing-window
// data id w.r.t slicing-window
auto
long_vector_data_begin_id
=
long_vector_access_id
;
auto
long_vector_data_begin_id
=
long_vector_access_id
;
long_vector_data_begin_id
(
vector_access_dim
)
=
long_vector_data_begin_id
(
vector_access_dim
)
=
long_vector_size
*
long_vector_access_id
[
vector_access_dim
];
long_vector_size
*
long_vector_access_id
[
vector_access_dim
];
// buffer to hold a src long-vector
// buffer to hold a src long-vector
SrcData
p_src_
long_vector
[
long_vector_size
];
SrcData
long_vector
[
long_vector_size
];
#if 1
#if 1
// zero out buffer
// zero out buffer
static_for
<
0
,
long_vector_size
,
1
>
{}([
&
](
auto
i
)
{
p_src_
long_vector
[
i
]
=
0
;
});
static_for
<
0
,
long_vector_size
,
1
>
{}([
&
](
auto
i
)
{
long_vector
[
i
]
=
0
;
});
#endif
#endif
// load data from src to the long-vector buffer
// load data from src to the long-vector buffer
...
@@ -130,19 +129,12 @@ struct ThreadwiseGenericTensorSliceCopy_v5
...
@@ -130,19 +129,12 @@ struct ThreadwiseGenericTensorSliceCopy_v5
src_coord
.
GetOffset
(),
src_coord
.
GetOffset
(),
src_coord
.
IsOffsetValidAssumingUpperIndexIsValid
(),
src_coord
.
IsOffsetValidAssumingUpperIndexIsValid
(),
SrcDesc
::
GetElementSpace
(),
SrcDesc
::
GetElementSpace
(),
p_src_
long_vector
,
long_vector
,
buffer_offset
,
buffer_offset
,
true
,
true
,
long_vector_size
);
long_vector_size
);
});
});
// SrcData to DstData conversion
DstData
p_dst_long_vector
[
long_vector_size
];
static_for
<
0
,
long_vector_size
,
1
>
{}([
&
](
auto
i
)
{
p_dst_long_vector
[
i
]
=
type_convert
<
DstData
>
{}(
p_src_long_vector
[
i
]);
});
// store data from the long-vector buffer to dst
// store data from the long-vector buffer to dst
static_for
<
0
,
long_vector_size
/
dst_data_per_access
,
1
>
{}([
&
](
auto
i
)
{
static_for
<
0
,
long_vector_size
/
dst_data_per_access
,
1
>
{}([
&
](
auto
i
)
{
auto
scalar_id
=
make_zero_multi_index
<
nDim
>
();
auto
scalar_id
=
make_zero_multi_index
<
nDim
>
();
...
@@ -153,34 +145,15 @@ struct ThreadwiseGenericTensorSliceCopy_v5
...
@@ -153,34 +145,15 @@ struct ThreadwiseGenericTensorSliceCopy_v5
const
auto
dst_coord
=
const
auto
dst_coord
=
mDstSliceOrigin
+
(
long_vector_data_begin_id
+
scalar_id
);
mDstSliceOrigin
+
(
long_vector_data_begin_id
+
scalar_id
);
auto
buff_off
=
auto
buff_off
=
ThreadBufferDesc
::
CalculateOffset
(
long_vector_data_begin_id
+
scalar_id
);
ThreadBufferDesc
::
CalculateOffset
(
long_vector_data_begin_id
+
scalar_id
);
thread_buff
[
buff_off
]
=
p_dst_long_vector
[
buffer_offset
];
thread_buff
[
buff_off
]
=
long_vector
[
buffer_offset
];
// Check dst data's valid mapping situation, only check the first data in this
// dst
// vector. It's user's responsiblity to make sure all data in the dst vector
// has the valid/invalid mapping situation
//transfer_data<DstData,
//DstDataPerWrite,
//AddressSpace::Vgpr,
//DstAddressSpace,
//DstInMemOp,
//1,
//DstDataStride>(p_dst_long_vector,
//buffer_offset,
//true,
//long_vector_size,
//thread_buff,
//dst_coord.GetOffset(),
//dst_coord.IsOffsetValidAssumingUpperIndexIsValid(),
//DstDesc::GetElementSpace());
});
});
});
});
}
}
template
<
typename
SrcData
,
typename
DstData
>
template
<
typename
DstData
>
__device__
void
Store
(
const
SrcData
*
p_src
,
DstData
*
p_dst
)
__device__
void
Store
(
DstData
*
p_dst
)
{
{
constexpr
auto
vector_access_dim
=
Number
<
SrcDstVectorReadWriteDim
>
{};
constexpr
auto
vector_access_dim
=
Number
<
SrcDstVectorReadWriteDim
>
{};
...
@@ -196,18 +169,17 @@ struct ThreadwiseGenericTensorSliceCopy_v5
...
@@ -196,18 +169,17 @@ struct ThreadwiseGenericTensorSliceCopy_v5
ford
<
decltype
(
long_vector_access_lengths
),
SrcDstDimAccessOrder
>
{}(
ford
<
decltype
(
long_vector_access_lengths
),
SrcDstDimAccessOrder
>
{}(
[
&
](
auto
long_vector_access_id
)
{
[
&
](
auto
long_vector_access_id
)
{
// data id w.r.t slicing-window
// data id w.r.t slicing-window
auto
long_vector_data_begin_id
=
long_vector_access_id
;
auto
long_vector_data_begin_id
=
long_vector_access_id
;
long_vector_data_begin_id
(
vector_access_dim
)
=
long_vector_data_begin_id
(
vector_access_dim
)
=
long_vector_size
*
long_vector_access_id
[
vector_access_dim
];
long_vector_size
*
long_vector_access_id
[
vector_access_dim
];
// buffer to hold a src long-vector
// buffer to hold a src long-vector
Src
Data
p_src_
long_vector
[
long_vector_size
];
Dst
Data
long_vector
[
long_vector_size
];
#if 1
#if 1
// zero out buffer
// zero out buffer
static_for
<
0
,
long_vector_size
,
1
>
{}([
&
](
auto
i
)
{
p_src_
long_vector
[
i
]
=
0
;
});
static_for
<
0
,
long_vector_size
,
1
>
{}([
&
](
auto
i
)
{
long_vector
[
i
]
=
0
;
});
#endif
#endif
// load data from src to the long-vector buffer
// load data from src to the long-vector buffer
...
@@ -217,40 +189,10 @@ struct ThreadwiseGenericTensorSliceCopy_v5
...
@@ -217,40 +189,10 @@ struct ThreadwiseGenericTensorSliceCopy_v5
const
index_t
buffer_offset
=
i
*
src_data_per_access
;
const
index_t
buffer_offset
=
i
*
src_data_per_access
;
auto
buff_off
=
ThreadBufferDesc
::
CalculateOffset
(
long_vector_data_begin_id
+
scalar_id
);
auto
buff_off
=
ThreadBufferDesc
::
CalculateOffset
(
long_vector_data_begin_id
+
scalar_id
);
p_src_long_vector
[
buffer_offset
]
=
thread_buff
[
buff_off
];
//const auto src_coord =
//mSrcSliceOrigin + (long_vector_data_begin_id + scalar_id);
// Check src data's valid mapping situation, only check the first data in this
// src
// vector. It's user's responsiblity to make sure all data in the src vector
// has the valid/invalid mapping situation
//transfer_data<SrcData,
//SrcDataPerRead,
//SrcAddressSpace,
//AddressSpace::Vgpr,
//InMemoryDataOperation::Set,
//SrcDataStride,
//1>(thread_buff,
//src_coord.GetOffset(),
//src_coord.IsOffsetValidAssumingUpperIndexIsValid(),
//SrcDesc::GetElementSpace(),
//p_src_long_vector,
//buffer_offset,
//true,
//long_vector_size);
});
// SrcData to DstData conversion
DstData
p_dst_long_vector
[
long_vector_size
];
static_for
<
0
,
long_vector_size
,
1
>
{}([
&
](
auto
i
)
{
long_vector
[
buffer_offset
]
=
thread_buff
[
buff_off
];
p_dst_long_vector
[
i
]
=
type_convert
<
DstData
>
{}(
p_src_long_vector
[
i
]);
});
});
// store data from the long-vector buffer to dst
// store data from the long-vector buffer to dst
...
@@ -273,7 +215,7 @@ struct ThreadwiseGenericTensorSliceCopy_v5
...
@@ -273,7 +215,7 @@ struct ThreadwiseGenericTensorSliceCopy_v5
DstAddressSpace
,
DstAddressSpace
,
DstInMemOp
,
DstInMemOp
,
1
,
1
,
DstDataStride
>
(
p_dst_
long_vector
,
DstDataStride
>
(
long_vector
,
buffer_offset
,
buffer_offset
,
true
,
true
,
long_vector_size
,
long_vector_size
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment