Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
59328f3a
"...git@developer.sourcefind.cn:renzhc/diffusers_dcu.git" did not exist on "67533c798ca95172d7b9999dd28d9d6e57a67432"
Commit
59328f3a
authored
Apr 02, 2021
by
Jing Zhang
Browse files
merge master
parents
063342ca
fcbb9788
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
6 additions
and
41 deletions
+6
-41
driver/include/device_dynamic_convolution_forward_implicit_gemm_v5r1_nchw_kcyx_nkhw.hpp
...convolution_forward_implicit_gemm_v5r1_nchw_kcyx_nkhw.hpp
+1
-1
driver/src/conv_driver.cpp
driver/src/conv_driver.cpp
+4
-39
script/cmake-rocm3.7.sh
script/cmake-rocm3.7.sh
+1
-1
No files found.
driver/include/device_dynamic_convolution_forward_implicit_gemm_v5r1_nchw_kcyx_nkhw.hpp
View file @
59328f3a
...
@@ -164,7 +164,7 @@ void device_dynamic_convolution_forward_implicit_gemm_v5r1_nchw_kcyx_nkhw(
...
@@ -164,7 +164,7 @@ void device_dynamic_convolution_forward_implicit_gemm_v5r1_nchw_kcyx_nkhw(
#endif
#endif
constexpr
auto
conv_driver
=
constexpr
auto
conv_driver
=
//DriverDynamicConvolutionForwardImplicitGemm_v5r1_nchw_kcyx_nkhw_pad<
//
DriverDynamicConvolutionForwardImplicitGemm_v5r1_nchw_kcyx_nkhw_pad<
DriverDynamicConvolutionForwardImplicitGemm_v5r1_nchw_kcyx_nkhw_outpad
<
DriverDynamicConvolutionForwardImplicitGemm_v5r1_nchw_kcyx_nkhw_outpad
<
BlockSize
,
BlockSize
,
typename
vector_type
<
TInWei
,
InWeiVectorSize
>::
type
,
typename
vector_type
<
TInWei
,
InWeiVectorSize
>::
type
,
...
...
driver/src/conv_driver.cpp
View file @
59328f3a
...
@@ -50,7 +50,7 @@ int main(int argc, char* argv[])
...
@@ -50,7 +50,7 @@ int main(int argc, char* argv[])
using
LeftPads
=
Sequence
<
0
,
0
>
;
using
LeftPads
=
Sequence
<
0
,
0
>
;
using
RightPads
=
Sequence
<
0
,
0
>
;
using
RightPads
=
Sequence
<
0
,
0
>
;
#elif
1
#elif
0
constexpr
index_t
N
=
1
;
constexpr
index_t
N
=
1
;
constexpr
index_t
C
=
16
;
constexpr
index_t
C
=
16
;
constexpr
index_t
HI
=
270
;
constexpr
index_t
HI
=
270
;
...
@@ -64,21 +64,7 @@ int main(int argc, char* argv[])
...
@@ -64,21 +64,7 @@ int main(int argc, char* argv[])
using
LeftPads
=
Sequence
<
0
,
0
>
;
using
LeftPads
=
Sequence
<
0
,
0
>
;
using
RightPads
=
Sequence
<
0
,
0
>
;
using
RightPads
=
Sequence
<
0
,
0
>
;
#elif 0
#elif 1
constexpr
index_t
N
=
1
;
constexpr
index_t
C
=
16
;
constexpr
index_t
HI
=
2048
;
constexpr
index_t
WI
=
2048
;
constexpr
index_t
K
=
16
;
constexpr
index_t
Y
=
3
;
constexpr
index_t
X
=
3
;
using
ConvStrides
=
Sequence
<
1
,
1
>
;
using
ConvDilations
=
Sequence
<
1
,
1
>
;
using
LeftPads
=
Sequence
<
1
,
1
>
;
using
RightPads
=
Sequence
<
1
,
1
>
;
#elif 0
constexpr
index_t
N
=
1
;
constexpr
index_t
N
=
1
;
constexpr
index_t
C
=
16
;
constexpr
index_t
C
=
16
;
constexpr
index_t
HI
=
1080
;
constexpr
index_t
HI
=
1080
;
...
@@ -118,20 +104,6 @@ int main(int argc, char* argv[])
...
@@ -118,20 +104,6 @@ int main(int argc, char* argv[])
using
ConvStrides
=
Sequence
<
1
,
1
>
;
using
ConvStrides
=
Sequence
<
1
,
1
>
;
using
ConvDilations
=
Sequence
<
1
,
1
>
;
using
ConvDilations
=
Sequence
<
1
,
1
>
;
using
LeftPads
=
Sequence
<
1
,
1
>
;
using
RightPads
=
Sequence
<
1
,
1
>
;
#elif 0
constexpr
index_t
N
=
1
;
constexpr
index_t
C
=
16
;
constexpr
index_t
HI
=
135
;
constexpr
index_t
WI
=
240
;
constexpr
index_t
K
=
16
;
constexpr
index_t
Y
=
3
;
constexpr
index_t
X
=
3
;
using
ConvStrides
=
Sequence
<
1
,
1
>
;
using
ConvDilations
=
Sequence
<
1
,
1
>
;
using
LeftPads
=
Sequence
<
1
,
1
>
;
using
LeftPads
=
Sequence
<
1
,
1
>
;
using
RightPads
=
Sequence
<
1
,
1
>
;
using
RightPads
=
Sequence
<
1
,
1
>
;
#elif 0
#elif 0
...
@@ -663,17 +635,12 @@ int main(int argc, char* argv[])
...
@@ -663,17 +635,12 @@ int main(int argc, char* argv[])
constexpr index_t in_vector_size = 1;
constexpr index_t in_vector_size = 1;
using acc_data_t = float;
using acc_data_t = float;
using out_data_t = float;
using out_data_t = float;
#elif
1
using
in_data_t
=
half_t
;
constexpr
index_t
in_vector_size
=
16
;
using
acc_data_t
=
float
;
using
out_data_t
=
half_t
;
#elif
0
#elif
0
using
in_data_t
=
float
;
using
in_data_t
=
float
;
constexpr
index_t
in_vector_size
=
1
;
constexpr
index_t
in_vector_size
=
1
;
using
acc_data_t
=
float
;
using
acc_data_t
=
float
;
using
out_data_t
=
int8_t
;
using
out_data_t
=
int8_t
;
#elif
0
#elif
1
using
in_data_t
=
int8_t
;
using
in_data_t
=
int8_t
;
constexpr
index_t
in_vector_size
=
16
;
constexpr
index_t
in_vector_size
=
16
;
using
acc_data_t
=
int32_t
;
using
acc_data_t
=
int32_t
;
...
@@ -774,7 +741,7 @@ int main(int argc, char* argv[])
...
@@ -774,7 +741,7 @@ int main(int argc, char* argv[])
LeftPads
{},
LeftPads
{},
RightPads
{},
RightPads
{},
nrepeat
);
nrepeat
);
#elif
1
#elif
0
device_dynamic_convolution_forward_implicit_gemm_v4r4_nhwc_kyxc_nhwk
<
in_data_t
,
device_dynamic_convolution_forward_implicit_gemm_v4r4_nhwc_kyxc_nhwk
<
in_data_t
,
in_vector_size
,
in_vector_size
,
acc_data_t
,
acc_data_t
,
...
@@ -821,7 +788,6 @@ int main(int argc, char* argv[])
...
@@ -821,7 +788,6 @@ int main(int argc, char* argv[])
check_error
(
out_nkhw_host
,
out_nkhw_device
);
check_error
(
out_nkhw_host
,
out_nkhw_device
);
#if 0
if
(
do_log
)
if
(
do_log
)
{
{
LogRange
(
std
::
cout
<<
"in_nchw : "
,
in_nchw
.
mData
,
","
)
<<
std
::
endl
;
LogRange
(
std
::
cout
<<
"in_nchw : "
,
in_nchw
.
mData
,
","
)
<<
std
::
endl
;
...
@@ -829,6 +795,5 @@ int main(int argc, char* argv[])
...
@@ -829,6 +795,5 @@ int main(int argc, char* argv[])
LogRange
(
std
::
cout
<<
"out_nkhw_host : "
,
out_nkhw_host
.
mData
,
","
)
<<
std
::
endl
;
LogRange
(
std
::
cout
<<
"out_nkhw_host : "
,
out_nkhw_host
.
mData
,
","
)
<<
std
::
endl
;
LogRange
(
std
::
cout
<<
"out_nkhw_device: "
,
out_nkhw_device
.
mData
,
","
)
<<
std
::
endl
;
LogRange
(
std
::
cout
<<
"out_nkhw_device: "
,
out_nkhw_device
.
mData
,
","
)
<<
std
::
endl
;
}
}
#endif
}
}
}
}
script/cmake-rocm3.7.sh
View file @
59328f3a
...
@@ -10,7 +10,7 @@ cmake
...
@@ -10,7 +10,7 @@ cmake
-D
CMAKE_INSTALL_PREFIX
=
${
MY_PROJECT_INSTALL
}
\
-D
CMAKE_INSTALL_PREFIX
=
${
MY_PROJECT_INSTALL
}
\
-D
CMAKE_BUILD_TYPE
=
Release
\
-D
CMAKE_BUILD_TYPE
=
Release
\
-D
DEVICE_BACKEND
=
"AMD"
\
-D
DEVICE_BACKEND
=
"AMD"
\
-D
CMAKE_CXX_FLAGS
=
"-O3 --amdgpu-target=gfx
906
-gline-tables-only -save-temps=
$CWD
-ftemplate-backtrace-limit=0"
\
-D
CMAKE_CXX_FLAGS
=
"-O3 --amdgpu-target=gfx
1030
-gline-tables-only -save-temps=
$CWD
-ftemplate-backtrace-limit=0"
\
-D
CMAKE_CXX_COMPILER
=
/opt/rocm/bin/hipcc
\
-D
CMAKE_CXX_COMPILER
=
/opt/rocm/bin/hipcc
\
-D
CMAKE_PREFIX_PATH
=
"/opt/rocm"
\
-D
CMAKE_PREFIX_PATH
=
"/opt/rocm"
\
-D
CMAKE_VERBOSE_MAKEFILE:BOOL
=
ON
\
-D
CMAKE_VERBOSE_MAKEFILE:BOOL
=
ON
\
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment