Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
3289a5c9
Commit
3289a5c9
authored
Nov 21, 2024
by
Andriy Roshchenko
Browse files
Narrowing the scope of PR to OCP FP8 enablement only
parent
dbfb222d
Changes
63
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
36 additions
and
166 deletions
+36
-166
example/35_splitK_gemm/splitK_gemm_xdl_fp16_fp8.cpp
example/35_splitK_gemm/splitK_gemm_xdl_fp16_fp8.cpp
+0
-1
example/35_splitK_gemm/splitK_gemm_xdl_fp32.cpp
example/35_splitK_gemm/splitK_gemm_xdl_fp32.cpp
+1
-2
example/35_splitK_gemm/splitK_gemm_xdl_int4.cpp
example/35_splitK_gemm/splitK_gemm_xdl_int4.cpp
+1
-2
example/35_splitK_gemm/splitK_gemm_xdl_int8.cpp
example/35_splitK_gemm/splitK_gemm_xdl_int8.cpp
+1
-2
example/35_splitK_gemm/splitK_gemm_xdl_lds_direct_load_fp16.cpp
...e/35_splitK_gemm/splitK_gemm_xdl_lds_direct_load_fp16.cpp
+0
-1
example/38_grouped_conv_bwd_data_multiple_d/common.hpp
example/38_grouped_conv_bwd_data_multiple_d/common.hpp
+1
-1
example/38_grouped_conv_bwd_data_multiple_d/grouped_conv_bwd_data_wmma_fp16.cpp
...v_bwd_data_multiple_d/grouped_conv_bwd_data_wmma_fp16.cpp
+1
-2
example/38_grouped_conv_bwd_data_multiple_d/grouped_conv_bwd_data_xdl_fp16.cpp
...nv_bwd_data_multiple_d/grouped_conv_bwd_data_xdl_fp16.cpp
+1
-2
example/38_grouped_conv_bwd_data_multiple_d/grouped_conv_bwd_data_xdl_fp16_comp_bf8_fp8.cpp
...ultiple_d/grouped_conv_bwd_data_xdl_fp16_comp_bf8_fp8.cpp
+1
-2
example/38_grouped_conv_bwd_data_multiple_d/run_grouped_conv_bwd_data_example.inc
...bwd_data_multiple_d/run_grouped_conv_bwd_data_example.inc
+3
-88
example/59_grouped_gemm_multi_ABD/grouped_gemm_multi_abd_xdl_fixed_nk_bias_bf16_i8.cpp
..._ABD/grouped_gemm_multi_abd_xdl_fixed_nk_bias_bf16_i8.cpp
+3
-3
example/59_grouped_gemm_multi_ABD/grouped_gemm_multi_abd_xdl_fixed_nk_bias_fp16.cpp
...lti_ABD/grouped_gemm_multi_abd_xdl_fixed_nk_bias_fp16.cpp
+3
-3
example/62_convnd_activ/convinvscale/run_convnd_fwd_convinvscale_example.inc
...ctiv/convinvscale/run_convnd_fwd_convinvscale_example.inc
+1
-1
example/62_convnd_activ/convscale/run_convnd_fwd_convscale_example.inc
...nvnd_activ/convscale/run_convnd_fwd_convscale_example.inc
+1
-1
example/62_convnd_activ/convscale_add/run_convnd_fwd_convscale_add_example.inc
...iv/convscale_add/run_convnd_fwd_convscale_add_example.inc
+1
-1
example/62_convnd_activ/convscale_reduce/run_convnd_fwd_example.inc
..._convnd_activ/convscale_reduce/run_convnd_fwd_example.inc
+1
-1
example/62_convnd_activ/convscale_relu/run_convnd_fwd_convscale_relu_example.inc
.../convscale_relu/run_convnd_fwd_convscale_relu_example.inc
+1
-1
example/65_gemm_multiply_multiply/gemm_multiply_multiply_xdl_fp8.cpp
...gemm_multiply_multiply/gemm_multiply_multiply_xdl_fp8.cpp
+1
-1
example/65_gemm_multiply_multiply/gemm_multiply_multiply_xdl_fp8_ab_scale.cpp
...iply_multiply/gemm_multiply_multiply_xdl_fp8_ab_scale.cpp
+13
-50
include/ck/utility/amd_buffer_addressing.hpp
include/ck/utility/amd_buffer_addressing.hpp
+1
-1
No files found.
example/35_splitK_gemm/splitK_gemm_xdl_fp16_fp8.cpp
View file @
3289a5c9
...
@@ -16,7 +16,6 @@
...
@@ -16,7 +16,6 @@
#include "ck/library/utility/host_tensor.hpp"
#include "ck/library/utility/host_tensor.hpp"
#include "ck/library/utility/host_tensor_generator.hpp"
#include "ck/library/utility/host_tensor_generator.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
#include "ck/library/reference_tensor_operation/gpu/reference_gemm.hpp"
#include "ck/library/utility/literals.hpp"
#include "ck/library/utility/literals.hpp"
template
<
ck
::
index_t
...
Is
>
template
<
ck
::
index_t
...
Is
>
...
...
example/35_splitK_gemm/splitK_gemm_xdl_fp32.cpp
View file @
3289a5c9
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
#include <iostream>
#include <iostream>
#include <numeric>
#include <numeric>
...
@@ -16,7 +16,6 @@
...
@@ -16,7 +16,6 @@
#include "ck/library/utility/host_tensor.hpp"
#include "ck/library/utility/host_tensor.hpp"
#include "ck/library/utility/host_tensor_generator.hpp"
#include "ck/library/utility/host_tensor_generator.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
#include "ck/library/reference_tensor_operation/gpu/reference_gemm.hpp"
#include "ck/library/utility/literals.hpp"
#include "ck/library/utility/literals.hpp"
template
<
ck
::
index_t
...
Is
>
template
<
ck
::
index_t
...
Is
>
...
...
example/35_splitK_gemm/splitK_gemm_xdl_int4.cpp
View file @
3289a5c9
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
#include <iostream>
#include <iostream>
#include <numeric>
#include <numeric>
...
@@ -16,7 +16,6 @@
...
@@ -16,7 +16,6 @@
#include "ck/library/utility/host_tensor.hpp"
#include "ck/library/utility/host_tensor.hpp"
#include "ck/library/utility/host_tensor_generator.hpp"
#include "ck/library/utility/host_tensor_generator.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
#include "ck/library/reference_tensor_operation/gpu/reference_gemm.hpp"
#include "ck/library/utility/literals.hpp"
#include "ck/library/utility/literals.hpp"
template
<
ck
::
index_t
...
Is
>
template
<
ck
::
index_t
...
Is
>
...
...
example/35_splitK_gemm/splitK_gemm_xdl_int8.cpp
View file @
3289a5c9
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
#include <iostream>
#include <iostream>
#include <numeric>
#include <numeric>
...
@@ -16,7 +16,6 @@
...
@@ -16,7 +16,6 @@
#include "ck/library/utility/host_tensor.hpp"
#include "ck/library/utility/host_tensor.hpp"
#include "ck/library/utility/host_tensor_generator.hpp"
#include "ck/library/utility/host_tensor_generator.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
#include "ck/library/reference_tensor_operation/gpu/reference_gemm.hpp"
#include "ck/library/utility/literals.hpp"
#include "ck/library/utility/literals.hpp"
template
<
ck
::
index_t
...
Is
>
template
<
ck
::
index_t
...
Is
>
...
...
example/35_splitK_gemm/splitK_gemm_xdl_lds_direct_load_fp16.cpp
View file @
3289a5c9
...
@@ -24,7 +24,6 @@
...
@@ -24,7 +24,6 @@
#include "ck/library/utility/host_tensor.hpp"
#include "ck/library/utility/host_tensor.hpp"
#include "ck/library/utility/host_tensor_generator.hpp"
#include "ck/library/utility/host_tensor_generator.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
#include "ck/library/reference_tensor_operation/gpu/reference_gemm.hpp"
#include "ck/library/utility/literals.hpp"
#include "ck/library/utility/literals.hpp"
template
<
ck
::
index_t
...
Is
>
template
<
ck
::
index_t
...
Is
>
...
...
example/38_grouped_conv_bwd_data_multiple_d/common.hpp
View file @
3289a5c9
...
@@ -40,7 +40,7 @@ using BF8 = ck::bf8_t;
...
@@ -40,7 +40,7 @@ using BF8 = ck::bf8_t;
struct
ExecutionConfig
final
struct
ExecutionConfig
final
{
{
bool
do_verification
=
true
;
bool
do_verification
=
true
;
int
init_method
=
2
;
int
init_method
=
1
;
bool
time_kernel
=
false
;
bool
time_kernel
=
false
;
};
};
...
...
example/38_grouped_conv_bwd_data_multiple_d/grouped_conv_bwd_data_wmma_fp16.cpp
View file @
3289a5c9
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2023
-2024
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_data_multiple_d_wmma_cshuffle.hpp"
#include "ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_data_multiple_d_wmma_cshuffle.hpp"
#include "common.hpp"
#include "common.hpp"
...
@@ -11,7 +11,6 @@ using AccDataType = FP32;
...
@@ -11,7 +11,6 @@ using AccDataType = FP32;
using
CShuffleDataType
=
FP16
;
using
CShuffleDataType
=
FP16
;
using
DsDataType
=
ck
::
Tuple
<>
;
using
DsDataType
=
ck
::
Tuple
<>
;
using
InDataType
=
FP16
;
using
InDataType
=
FP16
;
using
VerifyDataType
=
FP16
;
// is used for selection of check tolerances
using
OutLayout
=
ck
::
tensor_layout
::
convolution
::
GNHWK
;
using
OutLayout
=
ck
::
tensor_layout
::
convolution
::
GNHWK
;
using
WeiLayout
=
ck
::
tensor_layout
::
convolution
::
GKYXC
;
using
WeiLayout
=
ck
::
tensor_layout
::
convolution
::
GKYXC
;
...
...
example/38_grouped_conv_bwd_data_multiple_d/grouped_conv_bwd_data_xdl_fp16.cpp
View file @
3289a5c9
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp"
#include "ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp"
#include "common.hpp"
#include "common.hpp"
...
@@ -10,7 +10,6 @@ using AccDataType = FP32;
...
@@ -10,7 +10,6 @@ using AccDataType = FP32;
using
CShuffleDataType
=
FP16
;
using
CShuffleDataType
=
FP16
;
using
DsDataType
=
ck
::
Tuple
<>
;
using
DsDataType
=
ck
::
Tuple
<>
;
using
InDataType
=
FP16
;
using
InDataType
=
FP16
;
using
VerifyDataType
=
FP16
;
// is used for selection of check tolerances
using
OutLayout
=
ck
::
tensor_layout
::
convolution
::
GNHWK
;
using
OutLayout
=
ck
::
tensor_layout
::
convolution
::
GNHWK
;
using
WeiLayout
=
ck
::
tensor_layout
::
convolution
::
GKYXC
;
using
WeiLayout
=
ck
::
tensor_layout
::
convolution
::
GKYXC
;
...
...
example/38_grouped_conv_bwd_data_multiple_d/grouped_conv_bwd_data_xdl_fp16_comp_bf8_fp8.cpp
View file @
3289a5c9
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp"
#include "ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp"
#include "common.hpp"
#include "common.hpp"
...
@@ -12,7 +12,6 @@ using DsDataType = ck::Tuple<>;
...
@@ -12,7 +12,6 @@ using DsDataType = ck::Tuple<>;
using
InDataType
=
FP16
;
using
InDataType
=
FP16
;
using
AComputeType
=
BF8
;
using
AComputeType
=
BF8
;
using
BComputeType
=
FP8
;
using
BComputeType
=
FP8
;
using
VerifyDataType
=
BF8
;
// is used for selection of check tolerances
using
OutLayout
=
ck
::
tensor_layout
::
convolution
::
GNHWK
;
using
OutLayout
=
ck
::
tensor_layout
::
convolution
::
GNHWK
;
using
WeiLayout
=
ck
::
tensor_layout
::
convolution
::
GKYXC
;
using
WeiLayout
=
ck
::
tensor_layout
::
convolution
::
GKYXC
;
...
...
example/38_grouped_conv_bwd_data_multiple_d/run_grouped_conv_bwd_data_example.inc
View file @
3289a5c9
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
template
<
typename
DataType
>
inline
__host__
__device__
constexpr
double
get_rtol
()
{
if
constexpr
(
std
::
is_same_v
<
DataType
,
float
>
)
{
return
1
e
-
3
;
}
else
if
constexpr
(
std
::
is_same_v
<
DataType
,
double
>
)
{
return
1
e
-
6
;
}
else
if
constexpr
(
std
::
is_same_v
<
DataType
,
ck
::
half_t
>
)
{
return
1
e
-
3
;
}
else
if
constexpr
(
std
::
is_same_v
<
DataType
,
ck
::
bhalf_t
>
)
{
return
5
e
-
2
;
}
else
if
constexpr
(
std
::
is_same_v
<
DataType
,
int32_t
>
)
{
return
1
e
-
1
;
}
else
if
constexpr
(
std
::
is_same_v
<
DataType
,
int8_t
>
)
{
return
1
e
-
1
;
}
else
if
constexpr
(
std
::
is_same_v
<
DataType
,
ck
::
f8_t
>
)
{
return
1
;
}
else
if
constexpr
(
std
::
is_same_v
<
DataType
,
ck
::
bf8_t
>
)
{
return
1
;
}
else
{
return
1
e
-
3
;
}
}
template
<
typename
DataType
>
inline
__host__
__device__
constexpr
double
get_atol
()
{
if
constexpr
(
std
::
is_same_v
<
DataType
,
float
>
)
{
return
1
e
-
3
;
}
else
if
constexpr
(
std
::
is_same_v
<
DataType
,
double
>
)
{
return
1
e
-
6
;
}
else
if
constexpr
(
std
::
is_same_v
<
DataType
,
ck
::
half_t
>
)
{
return
1
e
-
3
;
}
else
if
constexpr
(
std
::
is_same_v
<
DataType
,
ck
::
bhalf_t
>
)
{
return
5
e
-
2
;
}
else
if
constexpr
(
std
::
is_same_v
<
DataType
,
int32_t
>
)
{
return
1
e
-
1
;
}
else
if
constexpr
(
std
::
is_same_v
<
DataType
,
int8_t
>
)
{
return
1
e
-
1
;
}
else
if
constexpr
(
std
::
is_same_v
<
DataType
,
ck
::
f8_t
>
)
{
return
1
;
}
else
if
constexpr
(
std
::
is_same_v
<
DataType
,
ck
::
bf8_t
>
)
{
return
1
;
}
else
{
return
1
e
-
3
;
}
}
bool
run_conv_bwd_data
(
const
ExecutionConfig
&
config
,
bool
run_conv_bwd_data
(
const
ExecutionConfig
&
config
,
const
ck
::
utils
::
conv
::
ConvParam
&
conv_params
,
const
ck
::
utils
::
conv
::
ConvParam
&
conv_params
,
...
@@ -108,7 +27,7 @@ bool run_conv_bwd_data(const ExecutionConfig& config,
...
@@ -108,7 +27,7 @@ bool run_conv_bwd_data(const ExecutionConfig& config,
wei
.
GenerateTensorValue
(
GeneratorTensor_2
<
WeiDataType
>
{
-
5
,
5
});
wei
.
GenerateTensorValue
(
GeneratorTensor_2
<
WeiDataType
>
{
-
5
,
5
});
break
;
break
;
default
:
default
:
out
.
GenerateTensorValue
(
GeneratorTensor_3
<
OutDataType
>
{
-
0.
5
,
0.5
});
out
.
GenerateTensorValue
(
GeneratorTensor_3
<
OutDataType
>
{
0.
0
,
1.0
});
wei
.
GenerateTensorValue
(
GeneratorTensor_3
<
WeiDataType
>
{
-
0.5
,
0.5
});
wei
.
GenerateTensorValue
(
GeneratorTensor_3
<
WeiDataType
>
{
-
0.5
,
0.5
});
}
}
...
@@ -219,11 +138,7 @@ bool run_conv_bwd_data(const ExecutionConfig& config,
...
@@ -219,11 +138,7 @@ bool run_conv_bwd_data(const ExecutionConfig& config,
in_device_buf
.
FromDevice
(
in_device
.
mData
.
data
());
in_device_buf
.
FromDevice
(
in_device
.
mData
.
data
());
return
ck
::
utils
::
check_err
(
in_device
.
mData
,
return
ck
::
utils
::
check_err
(
in_device
.
mData
,
in_host
.
mData
);
in_host
.
mData
,
"Error: Incorrect results!"
,
get_rtol
<
VerifyDataType
>
(),
get_atol
<
VerifyDataType
>
());
}
}
return
true
;
return
true
;
...
...
example/59_grouped_gemm_multi_ABD/grouped_gemm_multi_abd_xdl_fixed_nk_bias_bf16_i8.cpp
View file @
3289a5c9
...
@@ -194,9 +194,9 @@ bool run_grouped_gemm(const ProblemSize& problem_size, const ExecutionConfig& co
...
@@ -194,9 +194,9 @@ bool run_grouped_gemm(const ProblemSize& problem_size, const ExecutionConfig& co
b1_tensors
[
i
].
GenerateTensorValue
(
GeneratorTensor_3
<
B1DataType
>
{
-
0.5
,
0.5
});
b1_tensors
[
i
].
GenerateTensorValue
(
GeneratorTensor_3
<
B1DataType
>
{
-
0.5
,
0.5
});
break
;
break
;
default:
default:
a0_tensors
[
i
].
GenerateTensorValue
(
GeneratorTensor_
1
<
A0DataType
>
{
1
});
a0_tensors
[
i
].
GenerateTensorValue
(
GeneratorTensor_
Sequential
<
A0DataType
,
0
>
{});
b0_tensors
[
i
].
GenerateTensorValue
(
GeneratorTensor_
1
<
B0DataType
>
{
1
});
b0_tensors
[
i
].
GenerateTensorValue
(
GeneratorTensor_
Sequential
<
B0DataType
,
1
>
{});
b1_tensors
[
i
].
GenerateTensorValue
(
GeneratorTensor_
1
<
B1DataType
>
{
1
});
b1_tensors
[
i
].
GenerateTensorValue
(
GeneratorTensor_
Sequential
<
B1DataType
,
1
>
{});
}
}
d0_tensors
[
i
].
GenerateTensorValue
(
GeneratorTensor_3
<
D0DataType
>
{
-
0.5
,
0.5
});
d0_tensors
[
i
].
GenerateTensorValue
(
GeneratorTensor_3
<
D0DataType
>
{
-
0.5
,
0.5
});
...
...
example/59_grouped_gemm_multi_ABD/grouped_gemm_multi_abd_xdl_fixed_nk_bias_fp16.cpp
View file @
3289a5c9
...
@@ -184,9 +184,9 @@ bool run_grouped_gemm(const ProblemSize& problem_size, const ExecutionConfig& co
...
@@ -184,9 +184,9 @@ bool run_grouped_gemm(const ProblemSize& problem_size, const ExecutionConfig& co
b_tensors
[
i
].
GenerateTensorValue
(
GeneratorTensor_3
<
B0DataType
>
{
-
0.5
,
0.5
});
b_tensors
[
i
].
GenerateTensorValue
(
GeneratorTensor_3
<
B0DataType
>
{
-
0.5
,
0.5
});
break
;
break
;
default:
default:
a0_tensors
[
i
].
GenerateTensorValue
(
GeneratorTensor_
1
<
A0DataType
>
{
1
});
a0_tensors
[
i
].
GenerateTensorValue
(
GeneratorTensor_
Sequential
<
A0DataType
,
0
>
{});
a1_tensors
[
i
].
GenerateTensorValue
(
GeneratorTensor_
1
<
A1DataType
>
{
1
});
a1_tensors
[
i
].
GenerateTensorValue
(
GeneratorTensor_
Sequential
<
A1DataType
,
0
>
{});
b_tensors
[
i
].
GenerateTensorValue
(
GeneratorTensor_
1
<
B0DataType
>
{
-
1
});
b_tensors
[
i
].
GenerateTensorValue
(
GeneratorTensor_
Sequential
<
B0DataType
,
1
>
{});
}
}
d0_tensors
[
i
].
GenerateTensorValue
(
GeneratorTensor_3
<
D0DataType
>
{
-
0.5
,
0.5
});
d0_tensors
[
i
].
GenerateTensorValue
(
GeneratorTensor_3
<
D0DataType
>
{
-
0.5
,
0.5
});
...
...
example/62_convnd_activ/convinvscale/run_convnd_fwd_convinvscale_example.inc
View file @
3289a5c9
...
@@ -8,7 +8,7 @@ bool run_convnd_fwd_example(int argc, char* argv[])
...
@@ -8,7 +8,7 @@ bool run_convnd_fwd_example(int argc, char* argv[])
print_helper_msg
();
print_helper_msg
();
bool
do_verification
=
true
;
bool
do_verification
=
true
;
int
init_method
=
2
;
int
init_method
=
1
;
bool
time_kernel
=
false
;
bool
time_kernel
=
false
;
ck
::
utils
::
conv
::
ConvParam
conv_param
{
ck
::
utils
::
conv
::
ConvParam
conv_param
{
...
...
example/62_convnd_activ/convscale/run_convnd_fwd_convscale_example.inc
View file @
3289a5c9
...
@@ -8,7 +8,7 @@ bool run_convnd_fwd_example(int argc, char* argv[])
...
@@ -8,7 +8,7 @@ bool run_convnd_fwd_example(int argc, char* argv[])
print_helper_msg
();
print_helper_msg
();
bool
do_verification
=
true
;
bool
do_verification
=
true
;
int
init_method
=
2
;
int
init_method
=
1
;
bool
time_kernel
=
false
;
bool
time_kernel
=
false
;
ck
::
utils
::
conv
::
ConvParam
conv_param
{
ck
::
utils
::
conv
::
ConvParam
conv_param
{
...
...
example/62_convnd_activ/convscale_add/run_convnd_fwd_convscale_add_example.inc
View file @
3289a5c9
...
@@ -8,7 +8,7 @@ bool run_convnd_fwd_example(int argc, char* argv[])
...
@@ -8,7 +8,7 @@ bool run_convnd_fwd_example(int argc, char* argv[])
print_helper_msg
();
print_helper_msg
();
bool
do_verification
=
true
;
bool
do_verification
=
true
;
int
init_method
=
2
;
int
init_method
=
1
;
bool
time_kernel
=
false
;
bool
time_kernel
=
false
;
ck
::
utils
::
conv
::
ConvParam
conv_param
{
ck
::
utils
::
conv
::
ConvParam
conv_param
{
...
...
example/62_convnd_activ/convscale_reduce/run_convnd_fwd_example.inc
View file @
3289a5c9
...
@@ -8,7 +8,7 @@ bool run_convnd_fwd_example(int argc, char* argv[])
...
@@ -8,7 +8,7 @@ bool run_convnd_fwd_example(int argc, char* argv[])
print_helper_msg
();
print_helper_msg
();
bool
do_verification
=
true
;
bool
do_verification
=
true
;
int
init_method
=
2
;
int
init_method
=
1
;
bool
time_kernel
=
false
;
bool
time_kernel
=
false
;
ck
::
utils
::
conv
::
ConvParam
conv_param
{
ck
::
utils
::
conv
::
ConvParam
conv_param
{
...
...
example/62_convnd_activ/convscale_relu/run_convnd_fwd_convscale_relu_example.inc
View file @
3289a5c9
...
@@ -8,7 +8,7 @@ bool run_convnd_fwd_example(int argc, char* argv[])
...
@@ -8,7 +8,7 @@ bool run_convnd_fwd_example(int argc, char* argv[])
print_helper_msg
();
print_helper_msg
();
bool
do_verification
=
true
;
bool
do_verification
=
true
;
int
init_method
=
2
;
int
init_method
=
1
;
bool
time_kernel
=
false
;
bool
time_kernel
=
false
;
ck
::
utils
::
conv
::
ConvParam
conv_param
{
ck
::
utils
::
conv
::
ConvParam
conv_param
{
...
...
example/65_gemm_multiply_multiply/gemm_multiply_multiply_xdl_fp8.cpp
View file @
3289a5c9
...
@@ -86,7 +86,7 @@ using DeviceOpInstance = ck::tensor_operation::device::DeviceGemmMultiD_Xdl_CShu
...
@@ -86,7 +86,7 @@ using DeviceOpInstance = ck::tensor_operation::device::DeviceGemmMultiD_Xdl_CShu
int
main
(
int
argc
,
char
*
argv
[])
int
main
(
int
argc
,
char
*
argv
[])
{
{
bool
do_verification
=
true
;
bool
do_verification
=
true
;
int
init_method
=
2
;
int
init_method
=
1
;
bool
time_kernel
=
false
;
bool
time_kernel
=
false
;
// GEMM shape
// GEMM shape
...
...
example/65_gemm_multiply_multiply/gemm_multiply_multiply_xdl_fp8_ab_scale.cpp
View file @
3289a5c9
...
@@ -78,7 +78,7 @@ using DeviceOpInstance = ck::tensor_operation::device::DeviceGemmMultiD_ABScale_
...
@@ -78,7 +78,7 @@ using DeviceOpInstance = ck::tensor_operation::device::DeviceGemmMultiD_ABScale_
int
main
(
int
argc
,
char
*
argv
[])
int
main
(
int
argc
,
char
*
argv
[])
{
{
bool
do_verification
=
true
;
bool
do_verification
=
true
;
int
init_method
=
5
;
int
init_method
=
1
;
bool
time_kernel
=
false
;
bool
time_kernel
=
false
;
// GEMM shape
// GEMM shape
...
@@ -186,20 +186,6 @@ int main(int argc, char* argv[])
...
@@ -186,20 +186,6 @@ int main(int argc, char* argv[])
b0_k_n
.
GenerateTensorValue
(
GeneratorTensor_1
<
B0DataType
>
{});
b0_k_n
.
GenerateTensorValue
(
GeneratorTensor_1
<
B0DataType
>
{});
a1_m_k
.
GenerateTensorValue
(
GeneratorTensor_3
<
A1DataType
>
{
0
,
1.0
});
a1_m_k
.
GenerateTensorValue
(
GeneratorTensor_3
<
A1DataType
>
{
0
,
1.0
});
b1_k_n
.
GenerateTensorValue
(
GeneratorTensor_3
<
B1DataType
>
{
0
,
1.0
});
b1_k_n
.
GenerateTensorValue
(
GeneratorTensor_3
<
B1DataType
>
{
0
,
1.0
});
break
;
case
6
:
a0_m_k
.
GenerateTensorValue
(
GeneratorTensor_PI
<
A0DataType
>
{});
b0_k_n
.
GenerateTensorValue
(
GeneratorTensor_1
<
B0DataType
>
{
0.5
f
});
a1_m_k
.
GenerateTensorValue
(
GeneratorTensor_1
<
A1DataType
>
{
0.5
});
b1_k_n
.
GenerateTensorValue
(
GeneratorTensor_1
<
B1DataType
>
{
4
});
break
;
case
7
:
a0_m_k
.
GenerateTensorValue
(
GeneratorTensor_PI_A
<
A0DataType
>
{});
b0_k_n
.
GenerateTensorValue
(
GeneratorTensor_PI_B
<
B0DataType
>
{});
a1_m_k
.
GenerateTensorValue
(
GeneratorTensor_1
<
A1DataType
>
{
2
});
b1_k_n
.
GenerateTensorValue
(
GeneratorTensor_1
<
B1DataType
>
{
0.5
});
break
;
break
;
default:
default:
a0_m_k
.
GenerateTensorValue
(
GeneratorTensor_3
<
A0DataType
>
{
-
0.5
,
0.5
});
a0_m_k
.
GenerateTensorValue
(
GeneratorTensor_3
<
A0DataType
>
{
-
0.5
,
0.5
});
...
@@ -254,28 +240,23 @@ int main(int argc, char* argv[])
...
@@ -254,28 +240,23 @@ int main(int argc, char* argv[])
"not support this GEMM problem"
);
"not support this GEMM problem"
);
}
}
std
::
cout
<<
"Compute GEMM on device...
\n
"
;
float
ave_time
=
invoker
.
Run
(
argument
,
StreamConfig
{
nullptr
,
time_kernel
,
20
,
50
});
float
ave_time
=
invoker
.
Run
(
argument
,
StreamConfig
{
nullptr
,
time_kernel
,
20
,
50
});
std
::
cout
<<
"DONE!"
<<
std
::
endl
;
if
(
time_kernel
)
{
std
::
size_t
flop
=
std
::
size_t
(
2
)
*
M
*
N
*
K
;
std
::
size_t
num_btype
=
sizeof
(
A0DataType
)
*
M
*
K
+
sizeof
(
B0DataType
)
*
K
*
N
+
sizeof
(
EDataType
)
*
M
*
N
;
float
tflops
=
static_cast
<
float
>
(
flop
)
/
1.E9
/
ave_time
;
std
::
size_t
flop
=
std
::
size_t
(
2
)
*
M
*
N
*
K
;
std
::
size_t
num_btype
=
sizeof
(
A0DataType
)
*
M
*
K
+
sizeof
(
B0DataType
)
*
K
*
N
+
sizeof
(
EDataType
)
*
M
*
N
;
float
gb_per_sec
=
num_btype
/
1.E
6
/
ave_time
;
float
tflops
=
static_cast
<
float
>
(
flop
)
/
1.E
9
/
ave_time
;
std
::
cout
<<
"Perf: "
<<
ave_time
<<
" ms, "
<<
tflops
<<
" TFlops, "
<<
gb_per_sec
float
gb_per_sec
=
num_btype
/
1.E6
/
ave_time
;
<<
" GB/s"
<<
std
::
endl
;
}
std
::
cout
<<
"Perf: "
<<
ave_time
<<
" ms, "
<<
tflops
<<
" TFlops, "
<<
gb_per_sec
<<
" GB/s"
<<
std
::
endl
;
e_device_buf
.
FromDevice
(
e_m_n_device_result
.
mData
.
data
());
e_device_buf
.
FromDevice
(
e_m_n_device_result
.
mData
.
data
());
if
(
do_verification
)
if
(
do_verification
)
{
{
std
::
cout
<<
"Running verification on CPU."
<<
std
::
endl
;
Tensor
<
AccDataType
>
c_m_n
({
M
,
N
});
Tensor
<
AccDataType
>
c_m_n
({
M
,
N
});
Tensor
<
float
>
a_m_k
({
M
,
K
});
Tensor
<
float
>
a_m_k
({
M
,
K
});
Tensor
<
float
>
b_k_n
({
K
,
N
});
Tensor
<
float
>
b_k_n
({
K
,
N
});
...
@@ -325,28 +306,10 @@ int main(int argc, char* argv[])
...
@@ -325,28 +306,10 @@ int main(int argc, char* argv[])
e_device_buf
.
FromDevice
(
e_m_n_device_result
.
mData
.
data
());
e_device_buf
.
FromDevice
(
e_m_n_device_result
.
mData
.
data
());
if
(
init_method
==
6
||
init_method
==
7
)
return
ck
::
utils
::
check_err
(
{
e_m_n_device_result
,
e_m_n_host_result
,
"Error: Incorrect results!"
,
5e-2
,
5e-2
)
std
::
cout
<<
std
::
fixed
<<
std
::
setprecision
(
16
);
?
0
:
1
;
float
d
=
ck
::
type_convert
<
float
>
(
e_m_n_device_result
(
0
,
10
));
float
h
=
ck
::
type_convert
<
float
>
(
e_m_n_host_result
(
10
,
0
));
std
::
cout
<<
"device result: "
<<
d
<<
std
::
endl
;
std
::
cout
<<
"host result: "
<<
h
<<
std
::
endl
;
std
::
cout
<<
"expected result: "
<<
M_PI
<<
std
::
endl
;
std
::
cout
<<
"device - host: "
<<
std
::
abs
(
d
-
h
)
<<
std
::
endl
;
std
::
cout
<<
"device - expected: "
<<
std
::
abs
(
d
-
M_PI
)
<<
std
::
endl
;
std
::
cout
<<
"atol: "
<<
5e-2
<<
std
::
endl
;
}
if
(
ck
::
utils
::
check_err
(
e_m_n_device_result
,
e_m_n_host_result
,
"Error: Incorrect results!"
,
5e-2
,
5e-2
))
{
std
::
cout
<<
"Verification on CPU: PASS"
<<
std
::
endl
;
return
0
;
}
else
return
1
;
}
}
return
0
;
return
0
;
...
...
include/ck/utility/amd_buffer_addressing.hpp
View file @
3289a5c9
...
@@ -847,7 +847,7 @@ amd_buffer_load_invalid_element_return_zero(const T* p_src_wave,
...
@@ -847,7 +847,7 @@ amd_buffer_load_invalid_element_return_zero(const T* p_src_wave,
vector_t
tmp
{
amd_buffer_load_impl
<
scalar_t
,
vector_size
,
coherence
>
(
vector_t
tmp
{
amd_buffer_load_impl
<
scalar_t
,
vector_size
,
coherence
>
(
src_wave_buffer_resource
,
src_thread_addr_offset
,
0
)};
src_wave_buffer_resource
,
src_thread_addr_offset
,
0
)};
return
src_thread_element_valid
?
tmp
:
vector_t
{
0
}
;
return
src_thread_element_valid
?
tmp
:
vector_t
(
0
)
;
#endif
#endif
}
}
...
...
Prev
1
2
3
4
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment