Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
a93d07c7
Unverified
Commit
a93d07c7
authored
Aug 06, 2024
by
Illia Silin
Committed by
GitHub
Aug 06, 2024
Browse files
Merge branch 'develop' into ck_codegen_build
parents
9d9ad510
afbf6350
Changes
52
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
12 changed files
with
452 additions
and
162 deletions
+452
-162
library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/large_tensor/device_grouped_conv3d_fwd_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp
...wd_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp
+39
-0
library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/large_tensor/device_grouped_conv3d_fwd_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp
...wd_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp
+39
-0
library/src/tensor_operation_instance/gpu/mha/CMakeLists.txt
library/src/tensor_operation_instance/gpu/mha/CMakeLists.txt
+55
-0
library/src/utility/convolution_parameter.cpp
library/src/utility/convolution_parameter.cpp
+78
-20
profiler/include/profiler/profile_conv_bwd_data_impl.hpp
profiler/include/profiler/profile_conv_bwd_data_impl.hpp
+34
-11
profiler/include/profiler/profile_conv_fwd_impl.hpp
profiler/include/profiler/profile_conv_fwd_impl.hpp
+34
-11
profiler/include/profiler/profile_grouped_conv_fwd_impl.hpp
profiler/include/profiler/profile_grouped_conv_fwd_impl.hpp
+12
-11
profiler/src/CMakeLists.txt
profiler/src/CMakeLists.txt
+5
-0
profiler/src/profile_grouped_conv_fwd.cpp
profiler/src/profile_grouped_conv_fwd.cpp
+58
-25
test/conv_util/conv_util.cpp
test/conv_util/conv_util.cpp
+29
-27
test/grouped_convnd_fwd/test_grouped_convnd_fwd.cpp
test/grouped_convnd_fwd/test_grouped_convnd_fwd.cpp
+33
-25
test/smfmac_op/smfmac_op_xdl.cpp
test/smfmac_op/smfmac_op_xdl.cpp
+36
-32
No files found.
library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/large_tensor/device_grouped_conv3d_fwd_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp
0 → 100644
View file @
a93d07c7
// SPDX-License-Identifier: MIT
// Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_large_tensor_instance.hpp"
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
namespace
ck
{
namespace
tensor_operation
{
namespace
device
{
namespace
instance
{
void
add_device_grouped_conv3d_fwd_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_f16_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvFwdMultipleABD
<
3
,
NDHWGC
,
GKZYXC
,
Empty_Tuple
,
NDHWGK
,
F16
,
F16
,
Empty_Tuple
,
F16
,
PassThrough
,
PassThrough
,
PassThrough
>>>&
instances
)
{
add_device_operation_instances
(
instances
,
device_grouped_conv_fwd_xdl_large_tensor_f16_instances
<
3
,
NDHWGC
,
GKZYXC
,
Empty_Tuple
,
NDHWGK
,
ConvFwdDefault
>
{});
}
}
// namespace instance
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/large_tensor/device_grouped_conv3d_fwd_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp
0 → 100644
View file @
a93d07c7
// SPDX-License-Identifier: MIT
// Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_large_tensor_instance.hpp"
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
namespace
ck
{
namespace
tensor_operation
{
namespace
device
{
namespace
instance
{
void
add_device_grouped_conv3d_fwd_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_f32_instances
(
std
::
vector
<
std
::
unique_ptr
<
DeviceGroupedConvFwdMultipleABD
<
3
,
NDHWGC
,
GKZYXC
,
Empty_Tuple
,
NDHWGK
,
F32
,
F32
,
Empty_Tuple
,
F32
,
PassThrough
,
PassThrough
,
PassThrough
>>>&
instances
)
{
add_device_operation_instances
(
instances
,
device_grouped_conv_fwd_xdl_large_tensor_f32_instances
<
3
,
NDHWGC
,
GKZYXC
,
Empty_Tuple
,
NDHWGK
,
ConvFwdDefault
>
{});
}
}
// namespace instance
}
// namespace device
}
// namespace tensor_operation
}
// namespace ck
library/src/tensor_operation_instance/gpu/mha/CMakeLists.txt
0 → 100644
View file @
a93d07c7
set
(
FMHA_CPP_FOLDER
${
CMAKE_CURRENT_BINARY_DIR
}
)
set
(
FMHA_SRC_FOLDER
${
CMAKE_SOURCE_DIR
}
/example/ck_tile/01_fmha/
)
set
(
CK_TILE_SRC_FOLDER
${
CMAKE_SOURCE_DIR
}
/include/ck_tile/
)
# python stuff
find_package
(
PythonInterp 3 REQUIRED
)
rocm_install
(
DIRECTORY
${
CK_TILE_SRC_FOLDER
}
DESTINATION
${
CMAKE_INSTALL_INCLUDEDIR
}
/ck_tile
)
rocm_install
(
FILES
"
${
FMHA_SRC_FOLDER
}
/fmha_fwd.hpp"
"
${
FMHA_SRC_FOLDER
}
/bias.hpp"
"
${
FMHA_SRC_FOLDER
}
/mask.hpp"
DESTINATION include/ck_tile/ops
)
# header for building lib
file
(
COPY
${
FMHA_SRC_FOLDER
}
/fmha_fwd.hpp DESTINATION
${
FMHA_CPP_FOLDER
}
)
file
(
COPY
${
FMHA_SRC_FOLDER
}
/bias.hpp DESTINATION
${
FMHA_CPP_FOLDER
}
)
file
(
COPY
${
FMHA_SRC_FOLDER
}
/mask.hpp DESTINATION
${
FMHA_CPP_FOLDER
}
)
# generate a list of kernels, but not actually emit files at config stage
execute_process
(
COMMAND
${
PYTHON_EXECUTABLE
}
${
CMAKE_SOURCE_DIR
}
/example/ck_tile/01_fmha/generate.py
--list_blobs
${
FMHA_CPP_FOLDER
}
/blob_list.txt
)
file
(
STRINGS
${
FMHA_CPP_FOLDER
}
/blob_list.txt FMHA_FWD_GEN_BLOBS
)
# actually generate the cpp files
add_custom_command
(
OUTPUT
${
FMHA_FWD_GEN_BLOBS
}
COMMAND
${
PYTHON_EXECUTABLE
}
${
CMAKE_SOURCE_DIR
}
/example/ck_tile/01_fmha/generate.py
--output_dir
${
FMHA_CPP_FOLDER
}
COMMENT
"Generating mha kernel (cpp) files now ..."
VERBATIM
)
# This is done to remove path info and just
# have filename. Since, it was cauing the cmake
# to throw "File name too long"
set
(
device_files
)
foreach
(
filepath IN LISTS FMHA_FWD_GEN_BLOBS
)
get_filename_component
(
filename
${
filepath
}
NAME
)
# Append the filename to the device_files list
list
(
APPEND device_files
${
filename
}
)
endforeach
()
add_custom_target
(
generate_cpp_files DEPENDS
${
FMHA_FWD_GEN_BLOBS
}
)
add_instance_library
(
device_mha_instance
${
device_files
}
)
if
(
TARGET device_mha_instance
)
add_dependencies
(
device_mha_instance generate_cpp_files
)
endif
()
library/src/utility/convolution_parameter.cpp
View file @
a93d07c7
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/host_utility/io.hpp"
...
...
@@ -20,6 +20,63 @@ ConvParam::ConvParam(ck::index_t n_dim,
const
std
::
vector
<
ck
::
index_t
>&
dilations
,
const
std
::
vector
<
ck
::
index_t
>&
left_pads
,
const
std
::
vector
<
ck
::
index_t
>&
right_pads
)
:
num_dim_spatial_
(
static_cast
<
ck
::
long_index_t
>
(
n_dim
)),
G_
(
static_cast
<
ck
::
long_index_t
>
(
group_count
)),
N_
(
static_cast
<
ck
::
long_index_t
>
(
n_batch
)),
K_
(
static_cast
<
ck
::
long_index_t
>
(
n_out_channels
)),
C_
(
static_cast
<
ck
::
long_index_t
>
(
n_in_channels
)),
filter_spatial_lengths_
(
num_dim_spatial_
),
input_spatial_lengths_
(
num_dim_spatial_
),
output_spatial_lengths_
(
num_dim_spatial_
),
conv_filter_strides_
(
num_dim_spatial_
),
conv_filter_dilations_
(
num_dim_spatial_
),
input_left_pads_
(
num_dim_spatial_
),
input_right_pads_
(
num_dim_spatial_
)
{
if
(
static_cast
<
ck
::
index_t
>
(
filter_spatial_lengths_
.
size
())
!=
num_dim_spatial_
||
static_cast
<
ck
::
index_t
>
(
input_spatial_lengths_
.
size
())
!=
num_dim_spatial_
||
static_cast
<
ck
::
index_t
>
(
conv_filter_strides_
.
size
())
!=
num_dim_spatial_
||
static_cast
<
ck
::
index_t
>
(
conv_filter_dilations_
.
size
())
!=
num_dim_spatial_
||
static_cast
<
ck
::
index_t
>
(
input_left_pads_
.
size
())
!=
num_dim_spatial_
||
static_cast
<
ck
::
index_t
>
(
input_right_pads_
.
size
())
!=
num_dim_spatial_
)
{
throw
(
std
::
runtime_error
(
"ConvParam::ConvParam: "
"parameter size is different from number of declared dimensions!"
));
}
for
(
ck
::
index_t
i
=
0
;
i
<
num_dim_spatial_
;
++
i
)
{
filter_spatial_lengths_
[
i
]
=
static_cast
<
ck
::
long_index_t
>
(
filters_len
[
i
]);
input_spatial_lengths_
[
i
]
=
static_cast
<
ck
::
long_index_t
>
(
input_len
[
i
]);
conv_filter_strides_
[
i
]
=
static_cast
<
ck
::
long_index_t
>
(
strides
[
i
]);
conv_filter_dilations_
[
i
]
=
static_cast
<
ck
::
long_index_t
>
(
dilations
[
i
]);
input_left_pads_
[
i
]
=
static_cast
<
ck
::
long_index_t
>
(
left_pads
[
i
]);
input_right_pads_
[
i
]
=
static_cast
<
ck
::
long_index_t
>
(
right_pads
[
i
]);
// XEff = (X - 1) * conv_dilation_w + 1;
// Wo = (Wi + in_left_pad_w + in_right_pad_w - XEff) / conv_stride_w + 1;
const
ck
::
long_index_t
x_eff
=
(
filter_spatial_lengths_
[
i
]
-
1
)
*
conv_filter_dilations_
[
i
]
+
1
;
output_spatial_lengths_
[
i
]
=
(
input_spatial_lengths_
[
i
]
+
input_left_pads_
[
i
]
+
input_right_pads_
[
i
]
-
x_eff
)
/
conv_filter_strides_
[
i
]
+
1
;
}
}
ConvParam
::
ConvParam
(
ck
::
long_index_t
n_dim
,
ck
::
long_index_t
group_count
,
ck
::
long_index_t
n_batch
,
ck
::
long_index_t
n_out_channels
,
ck
::
long_index_t
n_in_channels
,
const
std
::
vector
<
ck
::
long_index_t
>&
filters_len
,
const
std
::
vector
<
ck
::
long_index_t
>&
input_len
,
const
std
::
vector
<
ck
::
long_index_t
>&
strides
,
const
std
::
vector
<
ck
::
long_index_t
>&
dilations
,
const
std
::
vector
<
ck
::
long_index_t
>&
left_pads
,
const
std
::
vector
<
ck
::
long_index_t
>&
right_pads
)
:
num_dim_spatial_
(
n_dim
),
G_
(
group_count
),
N_
(
n_batch
),
...
...
@@ -49,7 +106,8 @@ ConvParam::ConvParam(ck::index_t n_dim,
{
// XEff = (X - 1) * conv_dilation_w + 1;
// Wo = (Wi + in_left_pad_w + in_right_pad_w - XEff) / conv_stride_w + 1;
const
ck
::
index_t
x_eff
=
(
filter_spatial_lengths_
[
i
]
-
1
)
*
conv_filter_dilations_
[
i
]
+
1
;
const
ck
::
long_index_t
x_eff
=
(
filter_spatial_lengths_
[
i
]
-
1
)
*
conv_filter_dilations_
[
i
]
+
1
;
output_spatial_lengths_
[
i
]
=
(
input_spatial_lengths_
[
i
]
+
input_left_pads_
[
i
]
+
input_right_pads_
[
i
]
-
x_eff
)
/
...
...
@@ -63,7 +121,7 @@ ConvParam::ConvParam()
{
}
std
::
vector
<
ck
::
index_t
>
ConvParam
::
GetOutputSpatialLengths
()
const
std
::
vector
<
ck
::
long_
index_t
>
ConvParam
::
GetOutputSpatialLengths
()
const
{
return
output_spatial_lengths_
;
}
...
...
@@ -97,46 +155,46 @@ std::string get_conv_param_parser_helper_msg()
ck
::
utils
::
conv
::
ConvParam
parse_conv_param
(
int
num_dim_spatial
,
int
arg_idx
,
char
*
const
argv
[])
{
const
ck
::
index_t
G
=
std
::
sto
i
(
argv
[
arg_idx
++
]);
const
ck
::
index_t
N
=
std
::
sto
i
(
argv
[
arg_idx
++
]);
const
ck
::
index_t
K
=
std
::
sto
i
(
argv
[
arg_idx
++
]);
const
ck
::
index_t
C
=
std
::
sto
i
(
argv
[
arg_idx
++
]);
std
::
vector
<
ck
::
index_t
>
filter_spatial_lengths
(
num_dim_spatial
);
std
::
vector
<
ck
::
index_t
>
input_spatial_lengths
(
num_dim_spatial
);
std
::
vector
<
ck
::
index_t
>
conv_filter_strides
(
num_dim_spatial
);
std
::
vector
<
ck
::
index_t
>
conv_filter_dilations
(
num_dim_spatial
);
std
::
vector
<
ck
::
index_t
>
input_left_pads
(
num_dim_spatial
);
std
::
vector
<
ck
::
index_t
>
input_right_pads
(
num_dim_spatial
);
const
ck
::
long_
index_t
G
=
std
::
sto
l
(
argv
[
arg_idx
++
]);
const
ck
::
long_
index_t
N
=
std
::
sto
l
(
argv
[
arg_idx
++
]);
const
ck
::
long_
index_t
K
=
std
::
sto
l
(
argv
[
arg_idx
++
]);
const
ck
::
long_
index_t
C
=
std
::
sto
l
(
argv
[
arg_idx
++
]);
std
::
vector
<
ck
::
long_
index_t
>
filter_spatial_lengths
(
num_dim_spatial
);
std
::
vector
<
ck
::
long_
index_t
>
input_spatial_lengths
(
num_dim_spatial
);
std
::
vector
<
ck
::
long_
index_t
>
conv_filter_strides
(
num_dim_spatial
);
std
::
vector
<
ck
::
long_
index_t
>
conv_filter_dilations
(
num_dim_spatial
);
std
::
vector
<
ck
::
long_
index_t
>
input_left_pads
(
num_dim_spatial
);
std
::
vector
<
ck
::
long_
index_t
>
input_right_pads
(
num_dim_spatial
);
for
(
int
i
=
0
;
i
<
num_dim_spatial
;
++
i
)
{
filter_spatial_lengths
[
i
]
=
std
::
sto
i
(
argv
[
arg_idx
++
]);
filter_spatial_lengths
[
i
]
=
std
::
sto
l
(
argv
[
arg_idx
++
]);
}
for
(
int
i
=
0
;
i
<
num_dim_spatial
;
++
i
)
{
input_spatial_lengths
[
i
]
=
std
::
sto
i
(
argv
[
arg_idx
++
]);
input_spatial_lengths
[
i
]
=
std
::
sto
l
(
argv
[
arg_idx
++
]);
}
for
(
int
i
=
0
;
i
<
num_dim_spatial
;
++
i
)
{
conv_filter_strides
[
i
]
=
std
::
sto
i
(
argv
[
arg_idx
++
]);
conv_filter_strides
[
i
]
=
std
::
sto
l
(
argv
[
arg_idx
++
]);
}
for
(
int
i
=
0
;
i
<
num_dim_spatial
;
++
i
)
{
conv_filter_dilations
[
i
]
=
std
::
sto
i
(
argv
[
arg_idx
++
]);
conv_filter_dilations
[
i
]
=
std
::
sto
l
(
argv
[
arg_idx
++
]);
}
for
(
int
i
=
0
;
i
<
num_dim_spatial
;
++
i
)
{
input_left_pads
[
i
]
=
std
::
sto
i
(
argv
[
arg_idx
++
]);
input_left_pads
[
i
]
=
std
::
sto
l
(
argv
[
arg_idx
++
]);
}
for
(
int
i
=
0
;
i
<
num_dim_spatial
;
++
i
)
{
input_right_pads
[
i
]
=
std
::
sto
i
(
argv
[
arg_idx
++
]);
input_right_pads
[
i
]
=
std
::
sto
l
(
argv
[
arg_idx
++
]);
}
return
ck
::
utils
::
conv
::
ConvParam
{
num_dim_spatial
,
...
...
profiler/include/profiler/profile_conv_bwd_data_impl.hpp
View file @
a93d07c7
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
...
...
@@ -82,6 +82,29 @@ bool profile_conv_bwd_data_impl(int do_verification,
Tensor
<
WeiDataType
>
weight
(
wei_g_k_c_xs_desc
);
Tensor
<
OutDataType
>
output
(
out_g_n_k_wos_desc
);
std
::
vector
<
ck
::
index_t
>
input_spatial_lengths_i32
(
NDimSpatial
);
std
::
vector
<
ck
::
index_t
>
filter_spatial_lengths_i32
(
NDimSpatial
);
std
::
vector
<
ck
::
index_t
>
output_spatial_lengths_i32
(
NDimSpatial
);
std
::
vector
<
ck
::
index_t
>
conv_filter_strides_i32
(
NDimSpatial
);
std
::
vector
<
ck
::
index_t
>
conv_filter_dilations_i32
(
NDimSpatial
);
std
::
vector
<
ck
::
index_t
>
input_left_pads_i32
(
NDimSpatial
);
std
::
vector
<
ck
::
index_t
>
input_right_pads_i32
(
NDimSpatial
);
for
(
ck
::
index_t
d
=
0
;
d
<
NDimSpatial
;
d
++
)
{
input_spatial_lengths_i32
[
d
]
=
static_cast
<
ck
::
index_t
>
(
conv_param
.
input_spatial_lengths_
[
d
]);
filter_spatial_lengths_i32
[
d
]
=
static_cast
<
ck
::
index_t
>
(
conv_param
.
filter_spatial_lengths_
[
d
]);
output_spatial_lengths_i32
[
d
]
=
static_cast
<
ck
::
index_t
>
(
conv_param
.
GetOutputSpatialLengths
()[
d
]);
conv_filter_strides_i32
[
d
]
=
static_cast
<
ck
::
index_t
>
(
conv_param
.
conv_filter_strides_
[
d
]);
conv_filter_dilations_i32
[
d
]
=
static_cast
<
ck
::
index_t
>
(
conv_param
.
conv_filter_dilations_
[
d
]);
input_left_pads_i32
[
d
]
=
static_cast
<
ck
::
index_t
>
(
conv_param
.
input_left_pads_
[
d
]);
input_right_pads_i32
[
d
]
=
static_cast
<
ck
::
index_t
>
(
conv_param
.
input_right_pads_
[
d
]);
}
std
::
cout
<<
"input: "
<<
input_host_result
.
mDesc
<<
std
::
endl
;
std
::
cout
<<
"weight: "
<<
weight
.
mDesc
<<
std
::
endl
;
std
::
cout
<<
"output: "
<<
output
.
mDesc
<<
std
::
endl
;
...
...
@@ -161,16 +184,16 @@ bool profile_conv_bwd_data_impl(int do_verification,
op_ptr
->
MakeArgumentPointer
(
static_cast
<
InDataType
*>
(
in_device_buf
.
GetDeviceBuffer
()),
static_cast
<
WeiDataType
*>
(
wei_device_buf
.
GetDeviceBuffer
()),
static_cast
<
OutDataType
*>
(
out_device_buf
.
GetDeviceBuffer
()),
conv_param
.
N_
,
conv_param
.
K_
,
conv_param
.
C_
,
conv_param
.
input_spatial_lengths_
,
conv_param
.
filter_spatial_lengths_
,
conv_param
.
output_spatial_lengths_
,
conv_param
.
conv_filter_strides_
,
conv_param
.
conv_filter_dilations_
,
conv_param
.
input_left_pads_
,
conv_param
.
input_right_pads_
,
static_cast
<
ck
::
index_t
>
(
conv_param
.
N_
)
,
static_cast
<
ck
::
index_t
>
(
conv_param
.
K_
)
,
static_cast
<
ck
::
index_t
>
(
conv_param
.
C_
)
,
input_spatial_lengths_
i32
,
filter_spatial_lengths_
i32
,
output_spatial_lengths_
i32
,
conv_filter_strides_
i32
,
conv_filter_dilations_
i32
,
input_left_pads_
i32
,
input_right_pads_
i32
,
in_element_op
,
wei_element_op
,
out_element_op
);
...
...
profiler/include/profiler/profile_conv_fwd_impl.hpp
View file @
a93d07c7
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
...
...
@@ -60,6 +60,29 @@ bool profile_conv_fwd_impl(int do_verification,
Tensor
<
OutDataType
>
host_output
(
out_g_n_k_wos_desc
);
Tensor
<
OutDataType
>
device_output
(
out_g_n_k_wos_desc
);
std
::
vector
<
ck
::
index_t
>
input_spatial_lengths_i32
(
NDimSpatial
);
std
::
vector
<
ck
::
index_t
>
filter_spatial_lengths_i32
(
NDimSpatial
);
std
::
vector
<
ck
::
index_t
>
output_spatial_lengths_i32
(
NDimSpatial
);
std
::
vector
<
ck
::
index_t
>
conv_filter_strides_i32
(
NDimSpatial
);
std
::
vector
<
ck
::
index_t
>
conv_filter_dilations_i32
(
NDimSpatial
);
std
::
vector
<
ck
::
index_t
>
input_left_pads_i32
(
NDimSpatial
);
std
::
vector
<
ck
::
index_t
>
input_right_pads_i32
(
NDimSpatial
);
for
(
ck
::
index_t
d
=
0
;
d
<
NDimSpatial
;
d
++
)
{
input_spatial_lengths_i32
[
d
]
=
static_cast
<
ck
::
index_t
>
(
conv_param
.
input_spatial_lengths_
[
d
]);
filter_spatial_lengths_i32
[
d
]
=
static_cast
<
ck
::
index_t
>
(
conv_param
.
filter_spatial_lengths_
[
d
]);
output_spatial_lengths_i32
[
d
]
=
static_cast
<
ck
::
index_t
>
(
conv_param
.
GetOutputSpatialLengths
()[
d
]);
conv_filter_strides_i32
[
d
]
=
static_cast
<
ck
::
index_t
>
(
conv_param
.
conv_filter_strides_
[
d
]);
conv_filter_dilations_i32
[
d
]
=
static_cast
<
ck
::
index_t
>
(
conv_param
.
conv_filter_dilations_
[
d
]);
input_left_pads_i32
[
d
]
=
static_cast
<
ck
::
index_t
>
(
conv_param
.
input_left_pads_
[
d
]);
input_right_pads_i32
[
d
]
=
static_cast
<
ck
::
index_t
>
(
conv_param
.
input_right_pads_
[
d
]);
}
std
::
cout
<<
"input: "
<<
input
.
mDesc
<<
std
::
endl
;
std
::
cout
<<
"weight: "
<<
weight
.
mDesc
<<
std
::
endl
;
std
::
cout
<<
"output: "
<<
host_output
.
mDesc
<<
std
::
endl
;
...
...
@@ -143,16 +166,16 @@ bool profile_conv_fwd_impl(int do_verification,
op_ptr
->
MakeArgumentPointer
(
static_cast
<
InDataType
*>
(
in_device_buf
.
GetDeviceBuffer
()),
static_cast
<
WeiDataType
*>
(
wei_device_buf
.
GetDeviceBuffer
()),
static_cast
<
OutDataType
*>
(
out_device_buf
.
GetDeviceBuffer
()),
conv_param
.
N_
,
conv_param
.
K_
,
conv_param
.
C_
,
conv_param
.
input_spatial_lengths_
,
conv_param
.
filter_spatial_lengths_
,
conv_param
.
GetO
utput
S
patial
L
engths
()
,
conv_param
.
conv_filter_strides_
,
conv_param
.
conv_filter_dilations_
,
conv_param
.
input_left_pads_
,
conv_param
.
input_right_pads_
,
static_cast
<
ck
::
index_t
>
(
conv_param
.
N_
)
,
static_cast
<
ck
::
index_t
>
(
conv_param
.
K_
)
,
static_cast
<
ck
::
index_t
>
(
conv_param
.
C_
)
,
input_spatial_lengths_
i32
,
filter_spatial_lengths_
i32
,
o
utput
_s
patial
_l
engths
_i32
,
conv_filter_strides_
i32
,
conv_filter_dilations_
i32
,
input_left_pads_
i32
,
input_right_pads_
i32
,
in_element_op
,
wei_element_op
,
out_element_op
);
...
...
profiler/include/profiler/profile_grouped_conv_fwd_impl.hpp
View file @
a93d07c7
...
...
@@ -33,7 +33,8 @@ template <ck::index_t NDimSpatial,
typename
WeiDataType
,
typename
OutDataType
,
typename
AComputeType
=
InDataType
,
typename
BComputeType
=
AComputeType
>
typename
BComputeType
=
AComputeType
,
typename
IndexType
=
ck
::
index_t
>
bool
profile_grouped_conv_fwd_impl
(
int
do_verification
,
int
init_method
,
bool
do_log
,
...
...
@@ -57,16 +58,16 @@ bool profile_grouped_conv_fwd_impl(int do_verification,
const
auto
out_g_n_k_wos_desc
=
ck
::
utils
::
conv
::
make_output_host_tensor_descriptor_g_n_k_wos_packed
<
OutLayout
>
(
conv_param
);
std
::
array
<
ck
::
index_t
,
NDimSpatial
+
3
>
a_g_n_c_wis_lengths
{};
std
::
array
<
ck
::
index_t
,
NDimSpatial
+
3
>
a_g_n_c_wis_strides
{};
std
::
array
<
ck
::
index_t
,
NDimSpatial
+
3
>
b_g_k_c_xs_lengths
{};
std
::
array
<
ck
::
index_t
,
NDimSpatial
+
3
>
b_g_k_c_xs_strides
{};
std
::
array
<
ck
::
index_t
,
NDimSpatial
+
3
>
e_g_n_k_wos_lengths
{};
std
::
array
<
ck
::
index_t
,
NDimSpatial
+
3
>
e_g_n_k_wos_strides
{};
std
::
array
<
ck
::
index_t
,
NDimSpatial
>
conv_filter_strides
{};
std
::
array
<
ck
::
index_t
,
NDimSpatial
>
conv_filter_dilations
{};
std
::
array
<
ck
::
index_t
,
NDimSpatial
>
input_left_pads
{};
std
::
array
<
ck
::
index_t
,
NDimSpatial
>
input_right_pads
{};
std
::
array
<
IndexType
,
NDimSpatial
+
3
>
a_g_n_c_wis_lengths
{};
std
::
array
<
IndexType
,
NDimSpatial
+
3
>
a_g_n_c_wis_strides
{};
std
::
array
<
IndexType
,
NDimSpatial
+
3
>
b_g_k_c_xs_lengths
{};
std
::
array
<
IndexType
,
NDimSpatial
+
3
>
b_g_k_c_xs_strides
{};
std
::
array
<
IndexType
,
NDimSpatial
+
3
>
e_g_n_k_wos_lengths
{};
std
::
array
<
IndexType
,
NDimSpatial
+
3
>
e_g_n_k_wos_strides
{};
std
::
array
<
IndexType
,
NDimSpatial
>
conv_filter_strides
{};
std
::
array
<
IndexType
,
NDimSpatial
>
conv_filter_dilations
{};
std
::
array
<
IndexType
,
NDimSpatial
>
input_left_pads
{};
std
::
array
<
IndexType
,
NDimSpatial
>
input_right_pads
{};
auto
copy
=
[](
const
auto
&
x
,
auto
&
y
)
{
ck
::
ranges
::
copy
(
x
,
y
.
begin
());
};
...
...
profiler/src/CMakeLists.txt
View file @
a93d07c7
...
...
@@ -82,6 +82,11 @@ set(PROFILER_EXECUTABLE ckProfiler)
add_executable
(
${
PROFILER_EXECUTABLE
}
${
PROFILER_SOURCES
}
)
target_compile_options
(
${
PROFILER_EXECUTABLE
}
PRIVATE -Wno-global-constructors
)
# flags to compress the library
if
(
NOT WIN32 AND
${
hip_VERSION_FLAT
}
GREATER 600241132
)
message
(
"Adding --offload-compress flag for
${
PROFILER_EXECUTABLE
}
"
)
target_compile_options
(
${
PROFILER_EXECUTABLE
}
PRIVATE --offload-compress
)
endif
()
target_link_libraries
(
${
PROFILER_EXECUTABLE
}
PRIVATE utility getopt::getopt
)
target_link_libraries
(
${
PROFILER_EXECUTABLE
}
PRIVATE device_gemm_instance
)
...
...
profiler/src/profile_grouped_conv_fwd.cpp
View file @
a93d07c7
This diff is collapsed.
Click to expand it.
test/conv_util/conv_util.cpp
View file @
a93d07c7
This diff is collapsed.
Click to expand it.
test/grouped_convnd_fwd/test_grouped_convnd_fwd.cpp
View file @
a93d07c7
This diff is collapsed.
Click to expand it.
test/smfmac_op/smfmac_op_xdl.cpp
View file @
a93d07c7
This diff is collapsed.
Click to expand it.
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment