Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
6787ca76
Commit
6787ca76
authored
Dec 03, 2024
by
Ville Pietilä
Browse files
Use pinned host memory for std::vector memory allocations.
parent
2db781e9
Changes
6
Show whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
82 additions
and
13 deletions
+82
-13
_deps/gtest-src
_deps/gtest-src
+1
-0
include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_multiple_d_dl.hpp
...ion/gpu/device/impl/device_grouped_gemm_multiple_d_dl.hpp
+10
-8
include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_multiple_d_splitk_xdl_cshuffle_two_stage.hpp
...grouped_gemm_multiple_d_splitk_xdl_cshuffle_two_stage.hpp
+2
-1
include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_xdl.hpp
...sor_operation/gpu/device/impl/device_grouped_gemm_xdl.hpp
+4
-3
include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_xdl_splitk_cshuffle.hpp
...u/device/impl/device_grouped_gemm_xdl_splitk_cshuffle.hpp
+2
-1
include/ck/utility/host_memory_allocator.hpp
include/ck/utility/host_memory_allocator.hpp
+63
-0
No files found.
gtest-src
@
f8d7d77c
Subproject commit f8d7d77c06936315286eb55f8de22cd23c188571
include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_multiple_d_dl.hpp
View file @
6787ca76
...
@@ -8,6 +8,7 @@
...
@@ -8,6 +8,7 @@
#include <sstream>
#include <sstream>
#include "ck/utility/common_header.hpp"
#include "ck/utility/common_header.hpp"
#include "ck/utility/host_memory_allocator.hpp"
#include "ck/tensor_description/tensor_descriptor.hpp"
#include "ck/tensor_description/tensor_descriptor.hpp"
#include "ck/tensor_description/tensor_descriptor_helper.hpp"
#include "ck/tensor_description/tensor_descriptor_helper.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
...
@@ -408,13 +409,14 @@ struct DeviceGroupedGemmMultipleD_Dl : public DeviceGroupedGemm<ALayout,
...
@@ -408,13 +409,14 @@ struct DeviceGroupedGemmMultipleD_Dl : public DeviceGroupedGemm<ALayout,
};
};
// Argument
// Argument
struct
Argument
:
public
BaseArgument
struct
Argument
:
public
BaseArgument
{
{
Argument
(
std
::
vector
<
const
void
*>&
p_As
,
Argument
(
std
::
vector
<
const
void
*
,
Allocator
>&
p_As
,
std
::
vector
<
const
void
*>&
p_Bs
,
std
::
vector
<
const
void
*
,
Allocator
>&
p_Bs
,
std
::
vector
<
std
::
array
<
const
void
*
,
NumDTensor
>>&
p_Ds
,
std
::
vector
<
std
::
array
<
const
void
*
,
NumDTensor
>
,
Allocator
>&
p_Ds
,
std
::
vector
<
void
*>&
p_Es
,
std
::
vector
<
void
*
,
Allocator
>&
p_Es
,
std
::
vector
<
GemmDesc
>&
gemm_descs
,
std
::
vector
<
GemmDesc
,
Allocator
>&
gemm_descs
,
AElementwiseOperation
a_element_op
,
AElementwiseOperation
a_element_op
,
BElementwiseOperation
b_element_op
,
BElementwiseOperation
b_element_op
,
CDEElementwiseOperation
cde_element_op
)
CDEElementwiseOperation
cde_element_op
)
...
@@ -533,9 +535,9 @@ struct DeviceGroupedGemmMultipleD_Dl : public DeviceGroupedGemm<ALayout,
...
@@ -533,9 +535,9 @@ struct DeviceGroupedGemmMultipleD_Dl : public DeviceGroupedGemm<ALayout,
BElementwiseOperation
b_element_op_
;
BElementwiseOperation
b_element_op_
;
CDEElementwiseOperation
cde_element_op_
;
CDEElementwiseOperation
cde_element_op_
;
std
::
vector
<
GemmKernelArg
>
gemm_desc_kernel_arg_
;
std
::
vector
<
GemmKernelArg
,
ck
::
memory
::
PinnedHostMemoryAllocator
<
GemmKernelArg
>
>
gemm_desc_kernel_arg_
;
std
::
vector
<
Tuple
<
index_t
,
index_t
>>
a_mtx_mraw_kraw_
;
std
::
vector
<
Tuple
<
index_t
,
index_t
>
,
ck
::
memory
::
PinnedHostMemoryAllocator
<
Tuple
<
index_t
,
index_t
>>
>
a_mtx_mraw_kraw_
;
std
::
vector
<
Tuple
<
index_t
,
index_t
>>
b_mtx_nraw_kraw_
;
std
::
vector
<
Tuple
<
index_t
,
index_t
>
,
ck
::
memory
::
PinnedHostMemoryAllocator
<
Tuple
<
index_t
,
index_t
>>
>
b_mtx_nraw_kraw_
;
index_t
grid_size_
;
index_t
grid_size_
;
};
};
...
...
include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_multiple_d_splitk_xdl_cshuffle_two_stage.hpp
View file @
6787ca76
...
@@ -11,6 +11,7 @@
...
@@ -11,6 +11,7 @@
#include "ck/host_utility/device_prop.hpp"
#include "ck/host_utility/device_prop.hpp"
#include "ck/host_utility/kernel_launch.hpp"
#include "ck/host_utility/kernel_launch.hpp"
#include "ck/host_utility/hip_check_error.hpp"
#include "ck/host_utility/hip_check_error.hpp"
#include "ck/utility/host_memory_allocator.hpp"
#include "ck/utility/common_header.hpp"
#include "ck/utility/common_header.hpp"
#include <ck/utility/loop_scheduler.hpp>
#include <ck/utility/loop_scheduler.hpp>
#include "ck/utility/tuple.hpp"
#include "ck/utility/tuple.hpp"
...
@@ -537,7 +538,7 @@ struct DeviceGroupedGemmMultipleDSplitKXdlCShuffleTwoStage
...
@@ -537,7 +538,7 @@ struct DeviceGroupedGemmMultipleDSplitKXdlCShuffleTwoStage
std
::
vector
<
std
::
array
<
const
void
*
,
NumDTensor
>>&
p_Ds_
;
std
::
vector
<
std
::
array
<
const
void
*
,
NumDTensor
>>&
p_Ds_
;
std
::
vector
<
std
::
array
<
index_t
,
NumDTensor
>>
stride_Ds_
;
std
::
vector
<
std
::
array
<
index_t
,
NumDTensor
>>
stride_Ds_
;
std
::
vector
<
GemmTransKernelArg
>
gemm_kernel_args_
;
std
::
vector
<
GemmTransKernelArg
,
ck
::
memory
::
PinnedHostMemoryAllocator
<
GemmTransKernelArg
>
>
gemm_kernel_args_
;
std
::
vector
<
index_t
>
group_grid_size_
;
std
::
vector
<
index_t
>
group_grid_size_
;
std
::
vector
<
CGridDesc_M_N
>
elementwise_c_grid_descs_m_n_
;
std
::
vector
<
CGridDesc_M_N
>
elementwise_c_grid_descs_m_n_
;
...
...
include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_xdl.hpp
View file @
6787ca76
...
@@ -8,6 +8,7 @@
...
@@ -8,6 +8,7 @@
#include <sstream>
#include <sstream>
#include "ck/utility/common_header.hpp"
#include "ck/utility/common_header.hpp"
#include "ck/utility/host_memory_allocator.hpp"
#include "ck/tensor_description/tensor_descriptor.hpp"
#include "ck/tensor_description/tensor_descriptor.hpp"
#include "ck/tensor_description/tensor_descriptor_helper.hpp"
#include "ck/tensor_description/tensor_descriptor_helper.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
...
@@ -496,9 +497,9 @@ struct DeviceGroupedGemm_Xdl : public DeviceGroupedGemm<ALayout,
...
@@ -496,9 +497,9 @@ struct DeviceGroupedGemm_Xdl : public DeviceGroupedGemm<ALayout,
BElementwiseOperation
b_element_op_
;
BElementwiseOperation
b_element_op_
;
CDEElementwiseOperation
c_element_op_
;
CDEElementwiseOperation
c_element_op_
;
std
::
vector
<
GemmBiasTransKernelArg
>
gemm_desc_kernel_arg_
;
std
::
vector
<
GemmBiasTransKernelArg
,
ck
::
memory
::
PinnedHostMemoryAllocator
<
GemmBiasTransKernelArg
>
>
gemm_desc_kernel_arg_
;
std
::
vector
<
Tuple
<
index_t
,
index_t
>>
a_mtx_mraw_kraw_
;
std
::
vector
<
Tuple
<
index_t
,
index_t
>
,
ck
::
memory
::
PinnedHostMemoryAllocator
<
Tuple
<
index_t
,
index_t
>>
>
a_mtx_mraw_kraw_
;
std
::
vector
<
Tuple
<
index_t
,
index_t
>>
b_mtx_nraw_kraw_
;
std
::
vector
<
Tuple
<
index_t
,
index_t
>
,
ck
::
memory
::
PinnedHostMemoryAllocator
<
Tuple
<
index_t
,
index_t
>>
>
b_mtx_nraw_kraw_
;
index_t
grid_size_
;
index_t
grid_size_
;
};
};
...
...
include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_xdl_splitk_cshuffle.hpp
View file @
6787ca76
...
@@ -12,6 +12,7 @@
...
@@ -12,6 +12,7 @@
#include "ck/host_utility/hip_check_error.hpp"
#include "ck/host_utility/hip_check_error.hpp"
#include "ck/utility/common_header.hpp"
#include "ck/utility/common_header.hpp"
#include "ck/utility/tuple.hpp"
#include "ck/utility/tuple.hpp"
#include "ck/utility/host_memory_allocator.hpp"
#include "ck/tensor_description/tensor_descriptor.hpp"
#include "ck/tensor_description/tensor_descriptor.hpp"
#include "ck/tensor_description/tensor_descriptor_helper.hpp"
#include "ck/tensor_description/tensor_descriptor_helper.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
...
@@ -365,7 +366,7 @@ struct DeviceGroupedGemmXdlSplitKCShuffle : public DeviceGroupedGemmSplitK<ALayo
...
@@ -365,7 +366,7 @@ struct DeviceGroupedGemmXdlSplitKCShuffle : public DeviceGroupedGemmSplitK<ALayo
index_t
group_count_
;
index_t
group_count_
;
index_t
skipped_group_count_
;
index_t
skipped_group_count_
;
std
::
vector
<
GemmTransKernelArg
>
gemm_kernel_args_
;
std
::
vector
<
GemmTransKernelArg
,
ck
::
memory
::
PinnedHostMemoryAllocator
<
GemmTransKernelArg
>
>
gemm_kernel_args_
;
index_t
grid_size_
;
index_t
grid_size_
;
};
};
...
...
include/ck/utility/host_memory_allocator.hpp
0 → 100644
View file @
6787ca76
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include <hip/hip_runtime.h>
#include "ck/host_utility/hip_check_error.hpp"
namespace
ck
{
namespace
memory
{
template
<
typename
T
>
struct
PinnedHostMemoryAllocator
{
public:
using
value_type
=
T
;
using
pointer
=
T
*
;
using
const_pointer
=
const
T
*
;
using
void_pointer
=
void
*
;
using
const_void_pointer
=
const
void
*
;
using
size_type
=
std
::
size_t
;
using
difference_type
=
std
::
ptrdiff_t
;
template
<
typename
U
>
struct
rebind
{
using
other
=
PinnedHostMemoryAllocator
<
U
>
;
};
PinnedHostMemoryAllocator
()
=
default
;
template
<
typename
U
>
PinnedHostMemoryAllocator
(
const
PinnedHostMemoryAllocator
<
U
>&
other
)
:
std
::
allocator
<
T
>
(
other
)
{}
T
*
allocate
(
std
::
size_t
n
)
{
T
*
p
;
hip_check_error
(
hipHostMalloc
(
&
p
,
n
*
sizeof
(
T
)));
return
p
;
}
void
deallocate
(
T
*
p
,
std
::
size_t
)
{
hip_check_error
(
hipHostFree
(
p
));
}
template
<
typename
U
,
typename
...
Args
>
void
construct
(
U
*
p
,
Args
&&
...
args
)
{
new
(
p
)
U
(
std
::
forward
<
Args
>
(
args
)...);
}
template
<
typename
U
>
void
destroy
(
U
*
p
)
noexcept
{
p
->~
U
();
}
};
template
<
typename
T
,
typename
U
>
bool
operator
==
(
const
PinnedHostMemoryAllocator
<
T
>&
,
const
PinnedHostMemoryAllocator
<
U
>&
)
{
return
true
;
}
template
<
typename
T
,
typename
U
>
bool
operator
!=
(
const
PinnedHostMemoryAllocator
<
T
>&
,
const
PinnedHostMemoryAllocator
<
U
>&
)
{
return
false
;
}
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment