Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
yangql
composable_kernel-1
Commits
80120f0a
"git@developer.sourcefind.cn:modelzoo/resnet50_tensorflow.git" did not exist on "f5a1343f97bf315b94a4f8a00663bd0f106155cf"
Commit
80120f0a
authored
Aug 09, 2021
by
Chao Liu
Browse files
tidy
parent
c3efeb5e
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
54 additions
and
37 deletions
+54
-37
composable_kernel/include/tensor_operation/gridwise_dynamic_gemm_dlops_v1r2.hpp
...ude/tensor_operation/gridwise_dynamic_gemm_dlops_v1r2.hpp
+6
-6
composable_kernel/include/tensor_operation/gridwise_dynamic_gemm_dlops_v1r3.hpp
...ude/tensor_operation/gridwise_dynamic_gemm_dlops_v1r3.hpp
+6
-6
composable_kernel/include/tensor_operation/gridwise_dynamic_gemm_xdlops_v2r3.hpp
...de/tensor_operation/gridwise_dynamic_gemm_xdlops_v2r3.hpp
+8
-8
composable_kernel/include/utility/amd_address_space.hpp
composable_kernel/include/utility/amd_address_space.hpp
+25
-0
composable_kernel/include/utility/amd_buffer_addressing.hpp
composable_kernel/include/utility/amd_buffer_addressing.hpp
+4
-4
composable_kernel/include/utility/common_header.hpp
composable_kernel/include/utility/common_header.hpp
+3
-2
composable_kernel/include/utility/config.hpp
composable_kernel/include/utility/config.hpp
+1
-10
composable_kernel/include/utility/dynamic_buffer.hpp
composable_kernel/include/utility/dynamic_buffer.hpp
+1
-1
No files found.
composable_kernel/include/tensor_operation/gridwise_dynamic_gemm_dlops_v1r2.hpp
View file @
80120f0a
...
...
@@ -80,16 +80,16 @@ __global__ void
// first cast void CONSTANT void* to void*
// second cast void* to Desc*
// the copy constructor of tensor descriptor doesn't take address_space(4)
const
auto
a_k_m0_m1_grid_desc
=
*
reinterpret_cast
<
const
AKM0M1GridDesc
*>
((
const
void
*
)
p_a_k_m0_m1_grid_desc
);
const
auto
b_k_n0_n1_grid_desc
=
*
reinterpret_cast
<
const
BKN0N1GridDesc
*>
((
const
void
*
)
p_b_k_n0_n1_grid_desc
);
const
auto
a_k_m0_m1_grid_desc
=
*
reinterpret_cast
<
const
AKM0M1GridDesc
*>
(
cast_pointer_to_generic_address_space
(
p_a_k_m0_m1_grid_desc
)
)
;
const
auto
b_k_n0_n1_grid_desc
=
*
reinterpret_cast
<
const
BKN0N1GridDesc
*>
(
cast_pointer_to_generic_address_space
(
p_b_k_n0_n1_grid_desc
)
)
;
const
auto
c_m0_m10_m11_n0_n10_n11_grid_desc
=
*
reinterpret_cast
<
const
CM0M10M11N0N10N11GridDesc
*>
(
(
const
void
*
)
p_c_m0_m10_m11_n0_n10_n11_grid_desc
);
cast_pointer_to_generic_address_space
(
p_c_m0_m10_m11_n0_n10_n11_grid_desc
)
)
;
const
auto
c_blockid_to_m0_n0_block_cluster_adaptor
=
*
reinterpret_cast
<
const
CBlockIdToM0N0BlockClusterAdaptor
*>
(
(
const
void
*
)
p_c_blockid_to_m0_n0_block_cluster_adaptor
);
cast_pointer_to_generic_address_space
(
p_c_blockid_to_m0_n0_block_cluster_adaptor
)
)
;
constexpr
index_t
shared_block_size
=
GridwiseGemm
::
GetSharedMemoryNumberOfByte
()
/
sizeof
(
FloatAB
);
...
...
composable_kernel/include/tensor_operation/gridwise_dynamic_gemm_dlops_v1r3.hpp
View file @
80120f0a
...
...
@@ -80,16 +80,16 @@ __global__ void
// first cast void CONSTANT void* to void*
// second cast void* to Desc*
// the copy constructor of tensor descriptor doesn't take address_space(4)
const
auto
a_k0_m0_m1_k1_grid_desc
=
*
reinterpret_cast
<
const
AK0M0M1K1GridDesc
*>
((
const
void
*
)
p_a_k0_m0_m1_k1_grid_desc
);
const
auto
b_k0_n0_n1_k1_grid_desc
=
*
reinterpret_cast
<
const
BK0N0N1K1GridDesc
*>
((
const
void
*
)
p_b_k0_n0_n1_k1_grid_desc
);
const
auto
a_k0_m0_m1_k1_grid_desc
=
*
reinterpret_cast
<
const
AK0M0M1K1GridDesc
*>
(
cast_pointer_to_generic_address_space
(
p_a_k0_m0_m1_k1_grid_desc
)
)
;
const
auto
b_k0_n0_n1_k1_grid_desc
=
*
reinterpret_cast
<
const
BK0N0N1K1GridDesc
*>
(
cast_pointer_to_generic_address_space
(
p_b_k0_n0_n1_k1_grid_desc
)
)
;
const
auto
c_m0_m10_m11_n0_n10_n11_grid_desc
=
*
reinterpret_cast
<
const
CM0M10M11N0N10N11GridDesc
*>
(
(
const
void
*
)
p_c_m0_m10_m11_n0_n10_n11_grid_desc
);
cast_pointer_to_generic_address_space
(
p_c_m0_m10_m11_n0_n10_n11_grid_desc
)
)
;
const
auto
c_blockid_to_m0_n0_block_cluster_adaptor
=
*
reinterpret_cast
<
const
CBlockIdToM0N0BlockClusterAdaptor
*>
(
(
const
void
*
)
p_c_blockid_to_m0_n0_block_cluster_adaptor
);
cast_pointer_to_generic_address_space
(
p_c_blockid_to_m0_n0_block_cluster_adaptor
)
)
;
constexpr
index_t
shared_block_size
=
GridwiseGemm
::
GetSharedMemoryNumberOfByte
()
/
sizeof
(
FloatAB
);
...
...
composable_kernel/include/tensor_operation/gridwise_dynamic_gemm_xdlops_v2r3.hpp
View file @
80120f0a
...
...
@@ -69,14 +69,14 @@ __global__ void
constexpr
index_t
shared_block_size
=
GridwiseGemm
::
GetSharedMemoryNumberOfByte
()
/
sizeof
(
FloatAB
);
const
auto
a_k0_m_k1_grid_desc
=
*
reinterpret_cast
<
const
AK0MK1GridDesc
*>
((
const
void
*
)
p_a_k0_m_k1_grid_desc
);
const
auto
b_k0_n_k1_grid_desc
=
*
reinterpret_cast
<
const
BK0NK1GridDesc
*>
((
const
void
*
)
p_b_k0_n_k1_grid_desc
);
const
auto
c_m0_m1_m2_n_grid_desc
=
*
reinterpret_cast
<
const
CM0M1M2NGridDesc
*>
((
const
void
*
)
p_c_m0_m1_m2_n_grid_desc
);
const
auto
c_block_cluster_adaptor
=
*
reinterpret_cast
<
const
CBlockClusterAdaptor
*>
((
const
void
*
)
p_c_block_cluster_adaptor
);
const
auto
a_k0_m_k1_grid_desc
=
*
reinterpret_cast
<
const
AK0MK1GridDesc
*>
(
cast_pointer_to_generic_address_space
(
p_a_k0_m_k1_grid_desc
)
)
;
const
auto
b_k0_n_k1_grid_desc
=
*
reinterpret_cast
<
const
BK0NK1GridDesc
*>
(
cast_pointer_to_generic_address_space
(
p_b_k0_n_k1_grid_desc
)
)
;
const
auto
c_m0_m1_m2_n_grid_desc
=
*
reinterpret_cast
<
const
CM0M1M2NGridDesc
*>
(
cast_pointer_to_generic_address_space
(
p_c_m0_m1_m2_n_grid_desc
)
)
;
const
auto
c_block_cluster_adaptor
=
*
reinterpret_cast
<
const
CBlockClusterAdaptor
*>
(
cast_pointer_to_generic_address_space
(
p_c_block_cluster_adaptor
)
)
;
__shared__
FloatAB
p_shared_block
[
shared_block_size
];
...
...
composable_kernel/include/utility/amd_address_space.hpp
0 → 100644
View file @
80120f0a
#ifndef CK_AMD_ADDRESS_SPACE_HPP
#define CK_AMD_ADDRESS_SPACE_HPP
#include "config.hpp"
namespace
ck
{
enum
AddressSpaceEnum_t
{
Generic
,
Global
,
Lds
,
Sgpr
,
Vgpr
,
};
template
<
typename
T
>
__device__
T
*
cast_pointer_to_generic_address_space
(
T
CONSTANT
*
p
)
{
return
(
T
*
)
p
;
}
}
// namespace ck
#endif
composable_kernel/include/utility/amd_buffer_addressing
_v2
.hpp
→
composable_kernel/include/utility/amd_buffer_addressing.hpp
View file @
80120f0a
#ifndef CK_AMD_BUFFER_ADDRESSING_
V2_
HPP
#define CK_AMD_BUFFER_ADDRESSING_
V2_
HPP
#ifndef CK_AMD_BUFFER_ADDRESSING_HPP
#define CK_AMD_BUFFER_ADDRESSING_HPP
#include "data_type.hpp"
namespace
ck
{
template
<
typename
T
>
union
BufferResource
_v2
union
BufferResource
{
// 128 bit SGPRs to supply buffer resource in buffer instructions
// https://rocm-documentation.readthedocs.io/en/latest/GCN_ISA_Manuals/testdocbook.html#vector-memory-buffer-instructions
...
...
@@ -19,7 +19,7 @@ union BufferResource_v2
template
<
typename
T
>
__device__
int32x4_t
make_wave_buffer_resource
(
T
*
p_wave
,
index_t
data_space_size
)
{
BufferResource
_v2
<
T
>
wave_buffer_resource
;
BufferResource
<
T
>
wave_buffer_resource
;
// wavewise base address (64 bit)
wave_buffer_resource
.
address
(
Number
<
0
>
{})
=
const_cast
<
remove_cv_t
<
T
>*>
(
p_wave
);
...
...
composable_kernel/include/utility/common_header.hpp
View file @
80120f0a
...
...
@@ -23,9 +23,10 @@
#include "tuple.hpp"
#include "tuple_helper.hpp"
#include "type.hpp"
#include "utility.hpp"
#include "magic_division.hpp"
#include "amd_buffer_addressing_v2.hpp"
#include "utility.hpp"
#include "amd_address_space.hpp"
#include "amd_buffer_addressing.hpp"
#include "static_buffer.hpp"
#include "dynamic_buffer.hpp"
...
...
composable_kernel/include/utility/config.hpp
View file @
80120f0a
...
...
@@ -7,7 +7,7 @@
#endif
#include "bfloat16_dev.hpp"
// address space for kernel parameter
//
"Constant"
address space for kernel parameter
#define CONSTANT __attribute__((address_space(4)))
// GPU target
...
...
@@ -120,15 +120,6 @@
namespace
ck
{
enum
AddressSpaceEnum_t
{
Generic
,
Global
,
Lds
,
Sgpr
,
Vgpr
};
enum
InMemoryDataOperationEnum_t
{
Set
,
...
...
composable_kernel/include/utility/dynamic_buffer.hpp
View file @
80120f0a
...
...
@@ -3,7 +3,7 @@
namespace
ck
{
#include "amd_buffer_addressing
_v2
.hpp"
#include "amd_buffer_addressing.hpp"
template
<
AddressSpaceEnum_t
BufferAddressSpace
,
typename
T
,
typename
ElementSpaceSize
>
struct
DynamicBuffer
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment