Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
cba8f7f2
Commit
cba8f7f2
authored
Jun 26, 2022
by
Anthony Chang
Browse files
Merge remote-tracking branch 'upstream/develop' into gemm-layernorm-4
parents
cc50b687
b653c5eb
Changes
583
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
130 additions
and
210 deletions
+130
-210
profiler/include/profile_grouped_gemm_impl.hpp
profiler/include/profile_grouped_gemm_impl.hpp
+15
-11
profiler/include/profile_reduce_impl.hpp
profiler/include/profile_reduce_impl.hpp
+12
-7
profiler/src/profile_batched_gemm.cpp
profiler/src/profile_batched_gemm.cpp
+5
-12
profiler/src/profile_batched_gemm_reduce.cpp
profiler/src/profile_batched_gemm_reduce.cpp
+4
-3
profiler/src/profile_conv_bwd_weight.cpp
profiler/src/profile_conv_bwd_weight.cpp
+5
-3
profiler/src/profile_conv_fwd_bias_relu.cpp
profiler/src/profile_conv_fwd_bias_relu.cpp
+5
-3
profiler/src/profile_conv_fwd_bias_relu_add.cpp
profiler/src/profile_conv_fwd_bias_relu_add.cpp
+5
-3
profiler/src/profile_conv_fwd_bias_relu_atomic_add.cpp
profiler/src/profile_conv_fwd_bias_relu_atomic_add.cpp
+0
-116
profiler/src/profile_convnd_bwd_data.cpp
profiler/src/profile_convnd_bwd_data.cpp
+4
-3
profiler/src/profile_convnd_fwd.cpp
profiler/src/profile_convnd_fwd.cpp
+24
-15
profiler/src/profile_gemm.cpp
profiler/src/profile_gemm.cpp
+5
-3
profiler/src/profile_gemm_add_add_fastgelu.cpp
profiler/src/profile_gemm_add_add_fastgelu.cpp
+4
-2
profiler/src/profile_gemm_bias_2d.cpp
profiler/src/profile_gemm_bias_2d.cpp
+5
-3
profiler/src/profile_gemm_bias_add_reduce.cpp
profiler/src/profile_gemm_bias_add_reduce.cpp
+5
-3
profiler/src/profile_gemm_bias_relu.cpp
profiler/src/profile_gemm_bias_relu.cpp
+5
-3
profiler/src/profile_gemm_bias_relu_add.cpp
profiler/src/profile_gemm_bias_relu_add.cpp
+5
-3
profiler/src/profile_gemm_reduce.cpp
profiler/src/profile_gemm_reduce.cpp
+5
-3
profiler/src/profile_grouped_gemm.cpp
profiler/src/profile_grouped_gemm.cpp
+5
-3
profiler/src/profile_reduce.cpp
profiler/src/profile_reduce.cpp
+8
-4
profiler/src/profiler.cpp
profiler/src/profiler.cpp
+4
-7
No files found.
profiler/include/profile_grouped_gemm_impl.hpp
View file @
cba8f7f2
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include <iomanip>
#include "c
heck_err
.hpp"
#include "c
onfig
.hpp"
#include "
device
.hpp"
#include "
host_tensor
.hpp"
#include "host_tensor_generator.hpp"
#include "
host_conv
.hpp"
#include "
tensor_layout
.hpp"
#include "device_
tens
or.hpp"
#include "
element_wise_operation
.hpp"
#include "
device_gemm
.hpp"
#include "reference_gemm.hpp"
#include "c
k/ck
.hpp"
#include "c
k/tensor_operation/gpu/device/tensor_layout
.hpp"
#include "
ck/tensor_operation/gpu/device/device_gemm
.hpp"
#include "
ck/tensor_operation/gpu/element/element_wise_operation
.hpp"
#include "
ck/library/utility/check_err
.hpp"
#include "
ck/library/utility/conv_util
.hpp"
#include "
ck/library/host_tensor/
device_
mem
or
y
.hpp"
#include "
ck/library/host_tensor/host_tensor
.hpp"
#include "
ck/library/host_tensor/host_tensor_generator
.hpp"
#include "
ck/library/reference_tensor_operation/cpu/
reference_gemm.hpp"
namespace
ck
{
namespace
tensor_operation
{
...
...
profiler/include/profile_reduce_impl.hpp
View file @
cba8f7f2
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include "check_err.hpp"
#include "device_reduce.hpp"
#include "device_reduce_instance.hpp"
#include "reduction_enums.hpp"
#include "host_reduction.hpp"
#include "host_common_util.hpp"
#include "host_tensor_generator.hpp"
#include "ck/utility/reduction_enums.hpp"
#include "ck/tensor_operation/gpu/device/device_reduce.hpp"
#include "ck/library/utility/check_err.hpp"
#include "ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance.hpp"
#include "ck/library/host_tensor/device_memory.hpp"
#include "ck/library/host_tensor/host_reduction.hpp"
#include "ck/library/host_tensor/host_common_util.hpp"
#include "ck/library/host_tensor/host_tensor_generator.hpp"
namespace
ck
{
namespace
tensor_operation
{
...
...
profiler/src/profile_batched_gemm.cpp
View file @
cba8f7f2
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <cstdint>
#include <iostream>
#include <numeric>
#include <initializer_list>
#include <cstdlib>
#include <stdlib.h>
#include <half.hpp>
#include "config.hpp"
#include "print.hpp"
#include "device.hpp"
#include "host_tensor.hpp"
#include "host_tensor_generator.hpp"
#include "host_gemm.hpp"
#include "device_tensor.hpp"
#include "device_base.hpp"
#include "device_batched_gemm_xdl.hpp"
#include "profile_batched_gemm_impl.hpp"
#include "profiler/include/profile_batched_gemm_impl.hpp"
enum
struct
GemmMatrixLayout
{
...
...
profiler/src/profile_batched_gemm_reduce.cpp
View file @
cba8f7f2
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <iostream>
#include <numeric>
#include <initializer_list>
#include <cstdlib>
#include <stdlib.h>
#include <half.hpp>
#include "profile_batched_gemm_reduce_impl.hpp"
#include "
profiler/include/
profile_batched_gemm_reduce_impl.hpp"
int
profile_batched_gemm_reduce
(
int
argc
,
char
*
argv
[])
{
...
...
profiler/src/profile_conv_bwd_weight.cpp
View file @
cba8f7f2
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <iostream>
#include <numeric>
#include <initializer_list>
#include <cstdlib>
#include <stdlib.h>
#include <half.hpp>
#include "profile_conv_bwd_weight_impl.hpp"
#include "profiler/include/profile_conv_bwd_weight_impl.hpp"
enum
struct
ConvDataType
{
...
...
profiler/src/profile_conv_fwd_bias_relu.cpp
View file @
cba8f7f2
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <iostream>
#include <numeric>
#include <initializer_list>
#include <cstdlib>
#include <stdlib.h>
#include <half.hpp>
#include "profile_conv_fwd_bias_relu_impl.hpp"
#include "profiler/include/profile_conv_fwd_bias_relu_impl.hpp"
enum
struct
ConvDataType
{
...
...
profiler/src/profile_conv_fwd_bias_relu_add.cpp
View file @
cba8f7f2
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <iostream>
#include <numeric>
#include <initializer_list>
#include <cstdlib>
#include <stdlib.h>
#include <half.hpp>
#include "profile_conv_fwd_bias_relu_add_impl.hpp"
#include "profiler/include/profile_conv_fwd_bias_relu_add_impl.hpp"
enum
struct
ConvDataType
{
...
...
profiler/src/profile_conv_fwd_bias_relu_atomic_add.cpp
deleted
100644 → 0
View file @
cc50b687
#include <iostream>
#include <numeric>
#include <initializer_list>
#include <cstdlib>
#include <stdlib.h>
#include <half.hpp>
#include "profile_conv_fwd_bias_relu_atomic_add_impl.hpp"
enum
struct
ConvDataType
{
F32_F32_F32
,
// 0
F16_F16_F16
,
// 1
};
enum
struct
ConvInputLayout
{
NCHW
,
// 0
NHWC
,
// 1
};
enum
struct
ConvWeightLayout
{
KCYX
,
// 0
KYXC
,
// 1
};
enum
struct
ConvOutputLayout
{
NKHW
,
// 0
NHWK
,
// 1
};
int
profile_conv_fwd_bias_relu_atomic_add
(
int
argc
,
char
*
argv
[])
{
if
(
argc
!=
25
)
{
printf
(
"arg1: tensor operation (conv_fwd_bias_relu_atomic_add: "
"ForwardConvolution+Bias+ReLu+AtomicAdd)
\n
"
);
printf
(
"arg2: data type (0: fp32; 1: fp16)
\n
"
);
printf
(
"arg3: input tensor layout (0: NCHW; 1: NHWC)
\n
"
);
printf
(
"arg4: weight tensor layout (0: KCYX; 1: KYXC)
\n
"
);
printf
(
"arg5: output tensor layout (0: NKHW; 1: NHWK)
\n
"
);
printf
(
"arg6: verification (0: no; 1: yes)
\n
"
);
printf
(
"arg7: initialization (0: no init; 1: integer value; 2: decimal value)
\n
"
);
printf
(
"arg8: print tensor value (0: no; 1: yes)
\n
"
);
printf
(
"arg9: time kernel (0=n0, 1=yes)
\n
"
);
printf
(
"arg10 to 24: N, K, C, Y, X, Hi, Wi, Sy, Sx, Dy, Dx, LeftPy, LeftPx, RightPy, "
"RightPx
\n
"
);
exit
(
1
);
}
const
auto
data_type
=
static_cast
<
ConvDataType
>
(
std
::
stoi
(
argv
[
2
]));
const
auto
in_layout
=
static_cast
<
ConvInputLayout
>
(
std
::
stoi
(
argv
[
3
]));
const
auto
wei_layout
=
static_cast
<
ConvWeightLayout
>
(
std
::
stoi
(
argv
[
4
]));
const
auto
out_layout
=
static_cast
<
ConvOutputLayout
>
(
std
::
stoi
(
argv
[
5
]));
const
bool
do_verification
=
std
::
stoi
(
argv
[
6
]);
const
int
init_method
=
std
::
stoi
(
argv
[
7
]);
const
bool
do_log
=
std
::
stoi
(
argv
[
8
]);
const
bool
time_kernel
=
std
::
stoi
(
argv
[
9
]);
const
ck
::
index_t
N
=
std
::
stoi
(
argv
[
10
]);
const
ck
::
index_t
K
=
std
::
stoi
(
argv
[
11
]);
const
ck
::
index_t
C
=
std
::
stoi
(
argv
[
12
]);
const
ck
::
index_t
Y
=
std
::
stoi
(
argv
[
13
]);
const
ck
::
index_t
X
=
std
::
stoi
(
argv
[
14
]);
const
ck
::
index_t
Hi
=
std
::
stoi
(
argv
[
15
]);
const
ck
::
index_t
Wi
=
std
::
stoi
(
argv
[
16
]);
const
ck
::
index_t
conv_stride_h
=
std
::
stoi
(
argv
[
17
]);
const
ck
::
index_t
conv_stride_w
=
std
::
stoi
(
argv
[
18
]);
const
ck
::
index_t
conv_dilation_h
=
std
::
stoi
(
argv
[
19
]);
const
ck
::
index_t
conv_dilation_w
=
std
::
stoi
(
argv
[
20
]);
const
ck
::
index_t
in_left_pad_h
=
std
::
stoi
(
argv
[
21
]);
const
ck
::
index_t
in_left_pad_w
=
std
::
stoi
(
argv
[
22
]);
const
ck
::
index_t
in_right_pad_h
=
std
::
stoi
(
argv
[
23
]);
const
ck
::
index_t
in_right_pad_w
=
std
::
stoi
(
argv
[
24
]);
const
ck
::
index_t
YEff
=
(
Y
-
1
)
*
conv_dilation_h
+
1
;
const
ck
::
index_t
XEff
=
(
X
-
1
)
*
conv_dilation_w
+
1
;
const
ck
::
index_t
Ho
=
(
Hi
+
in_left_pad_h
+
in_right_pad_h
-
YEff
)
/
conv_stride_h
+
1
;
const
ck
::
index_t
Wo
=
(
Wi
+
in_left_pad_w
+
in_right_pad_w
-
XEff
)
/
conv_stride_w
+
1
;
if
(
data_type
==
ConvDataType
::
F16_F16_F16
&&
in_layout
==
ConvInputLayout
::
NHWC
&&
wei_layout
==
ConvWeightLayout
::
KYXC
&&
out_layout
==
ConvOutputLayout
::
NHWK
)
{
ck
::
profiler
::
profile_conv_fwd_bias_relu_atomic_add_impl
<
2
,
ck
::
half_t
,
ck
::
half_t
,
ck
::
half_t
,
ck
::
tensor_layout
::
convolution
::
NHWC
,
ck
::
tensor_layout
::
convolution
::
KYXC
,
ck
::
tensor_layout
::
convolution
::
NHWK
>
(
do_verification
,
init_method
,
do_log
,
time_kernel
,
N
,
K
,
C
,
std
::
vector
<
ck
::
index_t
>
{
Hi
,
Wi
},
std
::
vector
<
ck
::
index_t
>
{
Y
,
X
},
std
::
vector
<
ck
::
index_t
>
{
Ho
,
Wo
},
std
::
vector
<
ck
::
index_t
>
{
conv_stride_h
,
conv_stride_w
},
std
::
vector
<
ck
::
index_t
>
{
conv_dilation_h
,
conv_dilation_w
},
std
::
vector
<
ck
::
index_t
>
{
in_left_pad_h
,
in_left_pad_w
},
std
::
vector
<
ck
::
index_t
>
{
in_right_pad_h
,
in_right_pad_w
});
}
else
{
throw
std
::
runtime_error
(
"wrong! data_type & layout for this operator is not implemented"
);
}
return
0
;
}
profiler/src/profile_convnd_bwd_data.cpp
View file @
cba8f7f2
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <iostream>
#include <numeric>
#include <initializer_list>
#include <cstdlib>
#include <stdlib.h>
#include <half.hpp>
#include "profile_convnd_bwd_data_impl.hpp"
#include "
profiler/include/
profile_convnd_bwd_data_impl.hpp"
namespace
{
...
...
profiler/src/profile_convnd_fwd.cpp
View file @
cba8f7f2
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <cstdlib>
#include <functional>
#include <iostream>
#include <memory>
#include <string>
#include <vector>
#include <half.hpp>
#include "conv_util.hpp"
#include "element_wise_operation.hpp"
#include "fill.hpp"
#include "profile_convnd_fwd.hpp"
#include "tensor_layout.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "ck/library/utility/conv_util.hpp"
#include "ck/library/utility/fill.hpp"
#include "profiler/include/profile_convnd_fwd.hpp"
namespace
{
...
...
@@ -150,9 +154,12 @@ void profile_convnd_instances_impl(const ck::utils::conv::ConvParams& params,
ck
::
tensor_operation
::
element_wise
::
PassThrough
,
ck
::
tensor_operation
::
element_wise
::
PassThrough
,
ck
::
tensor_operation
::
element_wise
::
PassThrough
,
ck
::
utils
::
FillUniform
<
int
>
,
ck
::
utils
::
FillUniform
<
int
>>>
(
params
,
true
,
ck
::
utils
::
FillUniform
<
int
>
{},
ck
::
utils
::
FillUniform
<
int
>
{});
ck
::
utils
::
FillUniformDistributionIntegerValue
<
int
>
,
ck
::
utils
::
FillUniformDistributionIntegerValue
<
int
>>>
(
params
,
true
,
ck
::
utils
::
FillUniformDistributionIntegerValue
<
int
>
{},
ck
::
utils
::
FillUniformDistributionIntegerValue
<
int
>
{});
break
;
case
2
:
conv_instance
=
std
::
make_unique
<
...
...
@@ -165,12 +172,12 @@ void profile_convnd_instances_impl(const ck::utils::conv::ConvParams& params,
ck
::
tensor_operation
::
element_wise
::
PassThrough
,
ck
::
tensor_operation
::
element_wise
::
PassThrough
,
ck
::
tensor_operation
::
element_wise
::
PassThrough
,
ck
::
utils
::
FillUniform
<
InDataType
>
,
ck
::
utils
::
FillUniform
<
WeiDataType
>>>
(
ck
::
utils
::
FillUniform
Distribution
<
InDataType
>
,
ck
::
utils
::
FillUniform
Distribution
<
WeiDataType
>>>
(
params
,
true
,
ck
::
utils
::
FillUniform
<
InDataType
>
{},
ck
::
utils
::
FillUniform
<
WeiDataType
>
{});
ck
::
utils
::
FillUniform
Distribution
<
InDataType
>
{},
ck
::
utils
::
FillUniform
Distribution
<
WeiDataType
>
{});
break
;
default:
throw
std
::
runtime_error
(
"Unsupported init method!"
);
}
...
...
@@ -181,8 +188,10 @@ void profile_convnd_instances_impl(const ck::utils::conv::ConvParams& params,
_1
,
_2
,
_3
);
OpInstanceRunEngine
<
InDataType
,
WeiDataType
,
OutDataType
>
run_engine
(
*
conv_instance
,
reference_conv_fwd_fun
);
OpInstanceRunEngine
<
InDataType
,
WeiDataType
,
OutDataType
>
run_engine
(
*
conv_instance
,
reference_conv_fwd_fun
,
do_verification
);
auto
best_conf
=
run_engine
.
Profile
(
conv
::
ConvolutionFwdInstances
<
InDataType
,
WeiDataType
,
OutDataType
>::
template
Get
<
NDim
>(),
time_kernel
,
...
...
profiler/src/profile_gemm.cpp
View file @
cba8f7f2
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <iostream>
#include <numeric>
#include <initializer_list>
#include <cstdlib>
#include <stdlib.h>
#include <half.hpp>
#include "profile_gemm_impl.hpp"
#include "profiler/include/profile_gemm_impl.hpp"
enum
struct
GemmMatrixLayout
{
...
...
profiler/src/profile_gemm_add_add_fastgelu.cpp
View file @
cba8f7f2
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <iostream>
#include <numeric>
#include <initializer_list>
#include <cstdlib>
#include <stdlib.h>
#include "profile_gemm_add_add_fastgelu_impl.hpp"
#include "
profiler/include/
profile_gemm_add_add_fastgelu_impl.hpp"
int
profile_gemm_add_add_fastgelu
(
int
argc
,
char
*
argv
[])
{
...
...
profiler/src/profile_gemm_bias_2d.cpp
View file @
cba8f7f2
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <iostream>
#include <numeric>
#include <initializer_list>
#include <cstdlib>
#include <stdlib.h>
#include <half.hpp>
#include "profile_gemm_bias_2d_impl.hpp"
#include "profiler/include/profile_gemm_bias_2d_impl.hpp"
enum
struct
GemmMatrixLayout
{
...
...
profiler/src/profile_gemm_bias_add_reduce.cpp
View file @
cba8f7f2
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <iostream>
#include <numeric>
#include <initializer_list>
#include <cstdlib>
#include <stdlib.h>
#include <half.hpp>
#include "profile_gemm_bias_add_reduce_impl.hpp"
#include "profiler/include/profile_gemm_bias_add_reduce_impl.hpp"
int
profile_gemm_bias_add_reduce
(
int
argc
,
char
*
argv
[])
{
...
...
profiler/src/profile_gemm_bias_relu.cpp
View file @
cba8f7f2
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <iostream>
#include <numeric>
#include <initializer_list>
#include <cstdlib>
#include <stdlib.h>
#include <half.hpp>
#include "profile_gemm_bias_relu_impl.hpp"
#include "profiler/include/profile_gemm_bias_relu_impl.hpp"
enum
struct
GemmMatrixLayout
{
...
...
profiler/src/profile_gemm_bias_relu_add.cpp
View file @
cba8f7f2
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <iostream>
#include <numeric>
#include <initializer_list>
#include <cstdlib>
#include <stdlib.h>
#include <half.hpp>
#include "profile_gemm_bias_relu_add_impl.hpp"
#include "profiler/include/profile_gemm_bias_relu_add_impl.hpp"
enum
struct
GemmMatrixLayout
{
...
...
profiler/src/profile_gemm_reduce.cpp
View file @
cba8f7f2
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <iostream>
#include <numeric>
#include <initializer_list>
#include <cstdlib>
#include <stdlib.h>
#include <half.hpp>
#include "profile_gemm_reduce_impl.hpp"
#include "profiler/include/profile_gemm_reduce_impl.hpp"
int
profile_gemm_reduce
(
int
argc
,
char
*
argv
[])
{
...
...
profiler/src/profile_grouped_gemm.cpp
View file @
cba8f7f2
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <iostream>
#include <numeric>
#include <initializer_list>
#include <cstdlib>
#include <stdlib.h>
#include <half.hpp>
#include "profile_grouped_gemm_impl.hpp"
#include "profiler/include/profile_grouped_gemm_impl.hpp"
enum
struct
GemmMatrixLayout
{
...
...
profiler/src/profile_reduce.cpp
View file @
cba8f7f2
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <iostream>
#include <fstream>
#include <cstdlib>
...
...
@@ -6,11 +9,12 @@
#include <sstream>
#include <getopt.h>
#include "data_type_enum.hpp"
#include "reduction_enums.hpp"
#include "ck/utility/reduction_enums.hpp"
#include "ck/library/host_tensor/host_common_util.hpp"
#include "
host_common_uti
l.hpp"
#include "profile
_reduce_impl
.hpp"
#include "
profiler/include/profile_reduce_imp
l.hpp"
#include "profile
r/include/data_type_enum
.hpp"
using
namespace
std
;
...
...
profiler/src/profiler.cpp
View file @
cba8f7f2
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <iostream>
#include <numeric>
#include <initializer_list>
#include <cstdlib>
#include <cstring>
#include "profile_convnd_fwd.hpp"
#include "
profiler/include/
profile_convnd_fwd.hpp"
int
profile_gemm
(
int
,
char
*
[]);
int
profile_gemm_bias_2d
(
int
,
char
*
[]);
...
...
@@ -17,7 +20,6 @@ int profile_grouped_gemm(int, char*[]);
int
profile_conv_fwd
(
int
,
char
*
[]);
int
profile_conv_fwd_bias_relu
(
int
,
char
*
[]);
int
profile_conv_fwd_bias_relu_add
(
int
,
char
*
[]);
int
profile_conv_fwd_bias_relu_atomic_add
(
int
,
char
*
[]);
int
profile_convnd_bwd_data
(
int
,
char
*
[],
int
);
int
profile_reduce
(
int
,
char
*
[]);
int
profile_conv_bwd_weight
(
int
,
char
*
[]);
...
...
@@ -36,7 +38,6 @@ static void print_helper_message()
" conv_fwd: ForwardConvolution
\n
"
" conv_fwd_bias_relu: ForwardConvolution+Bias+ReLU
\n
"
" conv_fwd_bias_relu_add: ForwardConvolution+Bias+ReLU+Add
\n
"
" conv_fwd_bias_relu_atomic_add: ForwardConvolution+Bias+ReLU+AtomicAdd
\n
"
" conv1d_bwd_data: BackwardConvolution data 1 dim
\n
"
" conv2d_bwd_data: BackwardConvolution data 2 dim
\n
"
" conv3d_bwd_data: BackwardConvolution data 3 dim
\n
"
...
...
@@ -103,10 +104,6 @@ int main(int argc, char* argv[])
{
return
profile_conv_fwd_bias_relu_add
(
argc
,
argv
);
}
else
if
(
strcmp
(
argv
[
1
],
"conv_fwd_bias_relu_atomic_add"
)
==
0
)
{
return
profile_conv_fwd_bias_relu_atomic_add
(
argc
,
argv
);
}
else
if
(
strcmp
(
argv
[
1
],
"conv1d_bwd_data"
)
==
0
)
{
return
profile_convnd_bwd_data
(
argc
,
argv
,
1
);
...
...
Prev
1
…
23
24
25
26
27
28
29
30
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment