Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
d670c5a6
"git@developer.sourcefind.cn:OpenDAS/megatron-lm.git" did not exist on "4ac91436792116c516b75e5608e2083dbaf56a13"
Commit
d670c5a6
authored
Sep 20, 2023
by
Jing Zhang
Browse files
fixed
parent
963bc7a3
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
37 additions
and
29 deletions
+37
-29
client_example/16_convnd_fwd/CMakeLists.txt
client_example/16_convnd_fwd/CMakeLists.txt
+3
-0
client_example/16_convnd_fwd/common.hpp
client_example/16_convnd_fwd/common.hpp
+1
-1
client_example/16_convnd_fwd/conv3d_fwd_fp16_comp_fp8.cpp
client_example/16_convnd_fwd/conv3d_fwd_fp16_comp_fp8.cpp
+3
-1
library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_forward.hpp
...or_operation_instance/gpu/grouped_convolution_forward.hpp
+5
-4
library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_f8_instance.cpp
...v3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_f8_instance.cpp
+24
-21
script/cmake-ck-dev.sh
script/cmake-ck-dev.sh
+1
-2
No files found.
client_example/16_convnd_fwd/CMakeLists.txt
View file @
d670c5a6
...
@@ -3,3 +3,6 @@ add_executable(client_conv3d_fwd_fp32 conv3d_fwd_fp32.cpp)
...
@@ -3,3 +3,6 @@ add_executable(client_conv3d_fwd_fp32 conv3d_fwd_fp32.cpp)
target_link_libraries
(
client_conv3d_fwd_fp16 PRIVATE composable_kernel::device_operations
)
target_link_libraries
(
client_conv3d_fwd_fp16 PRIVATE composable_kernel::device_operations
)
target_link_libraries
(
client_conv3d_fwd_fp32 PRIVATE composable_kernel::device_operations
)
target_link_libraries
(
client_conv3d_fwd_fp32 PRIVATE composable_kernel::device_operations
)
add_executable
(
client_conv3d_fwd_fp16_comp_fp8 conv3d_fwd_fp16_comp_fp8.cpp
)
target_link_libraries
(
client_conv3d_fwd_fp16_comp_fp8 PRIVATE composable_kernel::device_operations
)
client_example/16_convnd_fwd/common.hpp
View file @
d670c5a6
...
@@ -95,7 +95,7 @@ template <ck::index_t NumDimSpatial,
...
@@ -95,7 +95,7 @@ template <ck::index_t NumDimSpatial,
typename
WeiLayout
,
typename
WeiLayout
,
typename
OutLayout
,
typename
OutLayout
,
ck
::
index_t
NumNonSpatialDim
=
3
,
ck
::
index_t
NumNonSpatialDim
=
3
,
typename
ComputeType
=
InDataType
>
typename
ComputeType
=
InDataType
>
bool
run_grouped_conv_fwd
(
std
::
array
<
ck
::
index_t
,
NumDimSpatial
+
NumNonSpatialDim
>
in_lengths
,
bool
run_grouped_conv_fwd
(
std
::
array
<
ck
::
index_t
,
NumDimSpatial
+
NumNonSpatialDim
>
in_lengths
,
std
::
array
<
ck
::
index_t
,
NumDimSpatial
+
NumNonSpatialDim
>
wei_lengths
,
std
::
array
<
ck
::
index_t
,
NumDimSpatial
+
NumNonSpatialDim
>
wei_lengths
,
std
::
array
<
ck
::
index_t
,
NumDimSpatial
+
NumNonSpatialDim
>
out_lengths
)
std
::
array
<
ck
::
index_t
,
NumDimSpatial
+
NumNonSpatialDim
>
out_lengths
)
...
...
client_example/16_convnd_fwd/conv3d_fwd_fp16_comp_fp8.cpp
View file @
d670c5a6
...
@@ -37,7 +37,9 @@ int main()
...
@@ -37,7 +37,9 @@ int main()
OutDataType
,
OutDataType
,
InLayout
,
InLayout
,
WeiLayout
,
WeiLayout
,
OutLayout
>
(
OutLayout
,
3
,
ck
::
f8_t
>
(
{
N
,
Di
,
Hi
,
Wi
,
G
,
C
},
{
G
,
K
,
Z
,
Y
,
X
,
C
},
{
N
,
Do
,
Ho
,
Wo
,
G
,
K
})
{
N
,
Di
,
Hi
,
Wi
,
G
,
C
},
{
G
,
K
,
Z
,
Y
,
X
,
C
},
{
N
,
Do
,
Ho
,
Wo
,
G
,
K
})
?
EXIT_SUCCESS
?
EXIT_SUCCESS
:
EXIT_FAILURE
;
:
EXIT_FAILURE
;
...
...
library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_forward.hpp
View file @
d670c5a6
...
@@ -378,7 +378,7 @@ template <ck::index_t NumDimSpatial,
...
@@ -378,7 +378,7 @@ template <ck::index_t NumDimSpatial,
typename
InDataType
,
typename
InDataType
,
typename
WeiDataType
,
typename
WeiDataType
,
typename
OutDataType
,
typename
OutDataType
,
typename
ComputeType
=
InDataType
>
typename
ComputeType
>
struct
DeviceOperationInstanceFactory
<
ck
::
tensor_operation
::
device
::
DeviceGroupedConvFwdMultipleD
<
struct
DeviceOperationInstanceFactory
<
ck
::
tensor_operation
::
device
::
DeviceGroupedConvFwdMultipleD
<
NumDimSpatial
,
NumDimSpatial
,
InLayout
,
InLayout
,
...
@@ -521,14 +521,15 @@ struct DeviceOperationInstanceFactory<ck::tensor_operation::device::DeviceGroupe
...
@@ -521,14 +521,15 @@ struct DeviceOperationInstanceFactory<ck::tensor_operation::device::DeviceGroupe
add_device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instances
(
op_ptrs
);
add_device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instances
(
op_ptrs
);
}
}
else
if
constexpr
(
is_same_v
<
InDataType
,
half_t
>
&&
is_same_v
<
WeiDataType
,
half_t
>
&&
else
if
constexpr
(
is_same_v
<
InDataType
,
half_t
>
&&
is_same_v
<
WeiDataType
,
half_t
>
&&
is_same_v
<
OutDataType
,
half_t
&&
is_same_v
<
ComputeType
,
half_t
>
>
)
is_same_v
<
OutDataType
,
half_t
>
&&
is_same_v
<
ComputeType
,
half_t
>
)
{
{
add_device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instances
(
op_ptrs
);
add_device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instances
(
op_ptrs
);
}
}
else
if
constexpr
(
is_same_v
<
InDataType
,
half_t
>
&&
is_same_v
<
WeiDataType
,
half_t
>
&&
else
if
constexpr
(
is_same_v
<
InDataType
,
half_t
>
&&
is_same_v
<
WeiDataType
,
half_t
>
&&
is_same_v
<
OutDataType
,
half_t
&&
is_same_v
<
ComputeType
,
ck
::
f8_t
>
>
)
is_same_v
<
OutDataType
,
half_t
>
&&
is_same_v
<
ComputeType
,
ck
::
f8_t
>
)
{
{
add_device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_f8_instances
(
op_ptrs
);
add_device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_f8_instances
(
op_ptrs
);
}
}
else
if
constexpr
(
is_same_v
<
InDataType
,
ck
::
bhalf_t
>
&&
else
if
constexpr
(
is_same_v
<
InDataType
,
ck
::
bhalf_t
>
&&
is_same_v
<
WeiDataType
,
ck
::
bhalf_t
>
&&
is_same_v
<
WeiDataType
,
ck
::
bhalf_t
>
&&
...
...
library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_f8_instance.cpp
View file @
d670c5a6
...
@@ -24,27 +24,30 @@ void add_device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_f8_instance
...
@@ -24,27 +24,30 @@ void add_device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_f8_instance
PassThrough
,
PassThrough
,
F8
>>>&
instances
)
F8
>>>&
instances
)
{
{
add_device_operation_instances
(
instances
,
add_device_operation_instances
(
device_grouped_conv_fwd_xdl_f16_comp_f8_instances
<
3
,
instances
,
NDHWGC
,
device_grouped_conv_fwd_xdl_f16_comp_f8_instances
<
3
,
GKZYXC
,
NDHWGC
,
Empty_Tuple
,
GKZYXC
,
NDHWGK
,
Empty_Tuple
,
ConvFwdDefault
>
{});
NDHWGK
,
add_device_operation_instances
(
instances
,
ConvFwdDefault
>
{});
device_grouped_conv_fwd_xdl_f16_comp_f8_instances
<
3
,
add_device_operation_instances
(
NDHWGC
,
instances
,
GKZYXC
,
device_grouped_conv_fwd_xdl_f16_comp_f8_instances
<
3
,
Empty_Tuple
,
NDHWGC
,
NDHWGK
,
GKZYXC
,
ConvFwd1x1P0
>
{});
Empty_Tuple
,
add_device_operation_instances
(
instances
,
NDHWGK
,
device_grouped_conv_fwd_xdl_f16_comp_f8_instances
<
3
,
ConvFwd1x1P0
>
{});
NDHWGC
,
add_device_operation_instances
(
GKZYXC
,
instances
,
Empty_Tuple
,
device_grouped_conv_fwd_xdl_f16_comp_f8_instances
<
3
,
NDHWGK
,
NDHWGC
,
ConvFwd1x1S1P0
>
{});
GKZYXC
,
Empty_Tuple
,
NDHWGK
,
ConvFwd1x1S1P0
>
{});
}
}
}
// namespace instance
}
// namespace instance
...
...
script/cmake-ck-dev.sh
View file @
d670c5a6
...
@@ -6,14 +6,13 @@ rm -rf CMakeFiles
...
@@ -6,14 +6,13 @@ rm -rf CMakeFiles
MY_PROJECT_SOURCE
=
$1
MY_PROJECT_SOURCE
=
$1
cmake
\
cmake
\
-D
INSTANCES_ONLY
=
ON
\
-D
CMAKE_PREFIX_PATH
=
/opt/rocm
\
-D
CMAKE_PREFIX_PATH
=
/opt/rocm
\
-D
CMAKE_CXX_COMPILER
=
/opt/rocm/bin/hipcc
\
-D
CMAKE_CXX_COMPILER
=
/opt/rocm/bin/hipcc
\
-D
CMAKE_CXX_FLAGS
=
"-std=c++17 -O3 -ftemplate-backtrace-limit=0 -fPIE -Wno-gnu-line-marker
\
-D
CMAKE_CXX_FLAGS
=
"-std=c++17 -O3 -ftemplate-backtrace-limit=0 -fPIE -Wno-gnu-line-marker
\
-save-temps=
$PWD
"
\
-save-temps=
$PWD
"
\
-D
CMAKE_BUILD_TYPE
=
Release
\
-D
CMAKE_BUILD_TYPE
=
Release
\
-D
BUILD_DEV
=
ON
\
-D
BUILD_DEV
=
ON
\
-D
GPU_TARGETS
=
"gfx908"
\
-D
GPU_TARGETS
=
"gfx908
;gfx90a;gfx940
"
\
-D
CMAKE_VERBOSE_MAKEFILE:BOOL
=
ON
\
-D
CMAKE_VERBOSE_MAKEFILE:BOOL
=
ON
\
-D
USE_BITINT_EXTENSION_INT4
=
OFF
\
-D
USE_BITINT_EXTENSION_INT4
=
OFF
\
${
MY_PROJECT_SOURCE
}
${
MY_PROJECT_SOURCE
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment