Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
b206fb26
Commit
b206fb26
authored
Oct 29, 2024
by
Andriy Roshchenko
Browse files
Extend GeneratorTensor_Sequential to produce values of prescribed data types.
parent
24771ab7
Changes
17
Show whitespace changes
Inline
Side-by-side
Showing
17 changed files
with
64 additions
and
38 deletions
+64
-38
example/15_grouped_gemm/grouped_gemm_multiple_d_splitk_xdl_fp16.cpp
..._grouped_gemm/grouped_gemm_multiple_d_splitk_xdl_fp16.cpp
+4
-4
example/15_grouped_gemm/grouped_gemm_multiple_d_xdl_fp16.cpp
example/15_grouped_gemm/grouped_gemm_multiple_d_xdl_fp16.cpp
+4
-4
example/15_grouped_gemm/grouped_gemm_xdl_fixed_nk_bias_fp16.cpp
...e/15_grouped_gemm/grouped_gemm_xdl_fixed_nk_bias_fp16.cpp
+3
-3
example/15_grouped_gemm/grouped_gemm_xdl_fixed_nk_fp16.cpp
example/15_grouped_gemm/grouped_gemm_xdl_fixed_nk_fp16.cpp
+2
-2
example/15_grouped_gemm/grouped_gemm_xdl_fixed_nk_fp16_fp8.cpp
...le/15_grouped_gemm/grouped_gemm_xdl_fixed_nk_fp16_fp8.cpp
+7
-3
example/15_grouped_gemm/run_grouped_gemm_example.inc
example/15_grouped_gemm/run_grouped_gemm_example.inc
+6
-3
example/21_gemm_layernorm/gemm_xdl_layernorm_naive_single_kernel_fp16.cpp
...layernorm/gemm_xdl_layernorm_naive_single_kernel_fp16.cpp
+2
-2
example/31_batched_gemm_gemm/run_batched_gemm_gemm_example.inc
...le/31_batched_gemm_gemm/run_batched_gemm_gemm_example.inc
+1
-1
example/32_batched_gemm_scale_softmax_gemm/run_batched_gemm_scale_softmax_gemm.inc
...cale_softmax_gemm/run_batched_gemm_scale_softmax_gemm.inc
+1
-1
example/32_batched_gemm_scale_softmax_gemm/run_batched_gemm_scale_softmax_gemm_permute.inc
...tmax_gemm/run_batched_gemm_scale_softmax_gemm_permute.inc
+1
-1
example/32_batched_gemm_scale_softmax_gemm/run_grouped_gemm_scale_softmax_gemm_permute.inc
...tmax_gemm/run_grouped_gemm_scale_softmax_gemm_permute.inc
+1
-1
example/35_splitK_gemm/run_splitK_gemm_example.inc
example/35_splitK_gemm/run_splitK_gemm_example.inc
+2
-2
example/37_batched_gemm_add_add_relu_gemm_add/batched_gemm_add_add_relu_gemm_add_xdl_fp16.cpp
..._gemm_add/batched_gemm_add_add_relu_gemm_add_xdl_fp16.cpp
+1
-1
example/47_gemm_bias_softmax_gemm_permute/gemm_bias_softmax_gemm_permute_xdl.cpp
...ftmax_gemm_permute/gemm_bias_softmax_gemm_permute_xdl.cpp
+1
-1
example/59_grouped_gemm_multi_ABD/grouped_gemm_multi_abd_xdl_fixed_nk_bias_bf16_i8.cpp
..._ABD/grouped_gemm_multi_abd_xdl_fixed_nk_bias_bf16_i8.cpp
+3
-3
example/59_grouped_gemm_multi_ABD/grouped_gemm_multi_abd_xdl_fixed_nk_bias_fp16.cpp
...lti_ABD/grouped_gemm_multi_abd_xdl_fixed_nk_bias_fp16.cpp
+3
-3
library/include/ck/library/utility/host_tensor_generator.hpp
library/include/ck/library/utility/host_tensor_generator.hpp
+22
-3
No files found.
example/15_grouped_gemm/grouped_gemm_multiple_d_splitk_xdl_fp16.cpp
View file @
b206fb26
...
@@ -186,15 +186,15 @@ bool run_grouped_gemm(const ProblemSize& problem_size, const ExecutionConfig& co
...
@@ -186,15 +186,15 @@ bool run_grouped_gemm(const ProblemSize& problem_size, const ExecutionConfig& co
b_tensors
[
i
].
GenerateTensorValue
(
GeneratorTensor_3
<
BDataType
>
{
-
0.5
,
0.5
});
b_tensors
[
i
].
GenerateTensorValue
(
GeneratorTensor_3
<
BDataType
>
{
-
0.5
,
0.5
});
for
(
int
j
=
0
;
j
<
NumDMatrices
;
++
j
)
for
(
int
j
=
0
;
j
<
NumDMatrices
;
++
j
)
{
{
d_tensors
[
i
][
j
].
GenerateTensorValue
(
GeneratorTensor_3
<
A
DataType
>
{
0.0
,
1.0
});
d_tensors
[
i
][
j
].
GenerateTensorValue
(
GeneratorTensor_3
<
D
DataType
>
{
0.0
,
1.0
});
}
}
break
;
break
;
default:
default:
a_tensors
[
i
].
GenerateTensorValue
(
GeneratorTensor_Sequential
<
0
>
{});
a_tensors
[
i
].
GenerateTensorValue
(
GeneratorTensor_Sequential
<
ADataType
,
0
>
{});
b_tensors
[
i
].
GenerateTensorValue
(
GeneratorTensor_Sequential
<
1
>
{});
b_tensors
[
i
].
GenerateTensorValue
(
GeneratorTensor_Sequential
<
BDataType
,
1
>
{});
for
(
int
j
=
0
;
j
<
NumDMatrices
;
++
j
)
for
(
int
j
=
0
;
j
<
NumDMatrices
;
++
j
)
{
{
d_tensors
[
i
][
j
].
GenerateTensorValue
(
GeneratorTensor_Sequential
<
0
>
{});
d_tensors
[
i
][
j
].
GenerateTensorValue
(
GeneratorTensor_Sequential
<
DDataType
,
0
>
{});
}
}
}
}
}
}
...
...
example/15_grouped_gemm/grouped_gemm_multiple_d_xdl_fp16.cpp
View file @
b206fb26
...
@@ -190,15 +190,15 @@ bool run_grouped_gemm(const ProblemSize& problem_size, const ExecutionConfig& co
...
@@ -190,15 +190,15 @@ bool run_grouped_gemm(const ProblemSize& problem_size, const ExecutionConfig& co
b_tensors
[
i
].
GenerateTensorValue
(
GeneratorTensor_3
<
BDataType
>
{
-
0.5
,
0.5
});
b_tensors
[
i
].
GenerateTensorValue
(
GeneratorTensor_3
<
BDataType
>
{
-
0.5
,
0.5
});
for
(
int
j
=
0
;
j
<
NumDs
;
++
j
)
for
(
int
j
=
0
;
j
<
NumDs
;
++
j
)
{
{
d_tensors
[
i
][
j
].
GenerateTensorValue
(
GeneratorTensor_3
<
A
DataType
>
{
0.0
,
1.0
});
d_tensors
[
i
][
j
].
GenerateTensorValue
(
GeneratorTensor_3
<
D
DataType
>
{
0.0
,
1.0
});
}
}
break
;
break
;
default:
default:
a_tensors
[
i
].
GenerateTensorValue
(
GeneratorTensor_Sequential
<
0
>
{});
a_tensors
[
i
].
GenerateTensorValue
(
GeneratorTensor_Sequential
<
ADataType
,
0
>
{});
b_tensors
[
i
].
GenerateTensorValue
(
GeneratorTensor_Sequential
<
1
>
{});
b_tensors
[
i
].
GenerateTensorValue
(
GeneratorTensor_Sequential
<
BDataType
,
1
>
{});
for
(
int
j
=
0
;
j
<
NumDs
;
++
j
)
for
(
int
j
=
0
;
j
<
NumDs
;
++
j
)
{
{
d_tensors
[
i
][
j
].
GenerateTensorValue
(
GeneratorTensor_Sequential
<
0
>
{});
d_tensors
[
i
][
j
].
GenerateTensorValue
(
GeneratorTensor_Sequential
<
DDataType
,
0
>
{});
}
}
}
}
}
}
...
...
example/15_grouped_gemm/grouped_gemm_xdl_fixed_nk_bias_fp16.cpp
View file @
b206fb26
...
@@ -167,11 +167,11 @@ bool run_grouped_gemm(const ProblemSize& problem_size, const ExecutionConfig& co
...
@@ -167,11 +167,11 @@ bool run_grouped_gemm(const ProblemSize& problem_size, const ExecutionConfig& co
b_tensors
[
i
].
GenerateTensorValue
(
GeneratorTensor_3
<
BDataType
>
{
-
0.5
,
0.5
});
b_tensors
[
i
].
GenerateTensorValue
(
GeneratorTensor_3
<
BDataType
>
{
-
0.5
,
0.5
});
break
;
break
;
default:
default:
a_tensors
[
i
].
GenerateTensorValue
(
GeneratorTensor_Sequential
<
0
>
{});
a_tensors
[
i
].
GenerateTensorValue
(
GeneratorTensor_Sequential
<
ADataType
,
0
>
{});
b_tensors
[
i
].
GenerateTensorValue
(
GeneratorTensor_Sequential
<
1
>
{});
b_tensors
[
i
].
GenerateTensorValue
(
GeneratorTensor_Sequential
<
BDataType
,
1
>
{});
}
}
d0_tensors
[
i
].
GenerateTensorValue
(
GeneratorTensor_Sequential
<
1
>
{});
d0_tensors
[
i
].
GenerateTensorValue
(
GeneratorTensor_Sequential
<
D0DataType
,
1
>
{});
}
}
using
GroupedGemmKernelArgument
=
ck
::
tensor_operation
::
device
::
GroupedGemmKernelArgument
<
1
>
;
using
GroupedGemmKernelArgument
=
ck
::
tensor_operation
::
device
::
GroupedGemmKernelArgument
<
1
>
;
...
...
example/15_grouped_gemm/grouped_gemm_xdl_fixed_nk_fp16.cpp
View file @
b206fb26
...
@@ -157,8 +157,8 @@ bool run_grouped_gemm(const ProblemSize& problem_size, const ExecutionConfig& co
...
@@ -157,8 +157,8 @@ bool run_grouped_gemm(const ProblemSize& problem_size, const ExecutionConfig& co
b_tensors
[
i
].
GenerateTensorValue
(
GeneratorTensor_3
<
BDataType
>
{
-
0.5
,
0.5
});
b_tensors
[
i
].
GenerateTensorValue
(
GeneratorTensor_3
<
BDataType
>
{
-
0.5
,
0.5
});
break
;
break
;
default:
default:
a_tensors
[
i
].
GenerateTensorValue
(
GeneratorTensor_
Sequential
<
0
>
{
});
a_tensors
[
i
].
GenerateTensorValue
(
GeneratorTensor_
1
<
ADataType
>
{
1.0
});
b_tensors
[
i
].
GenerateTensorValue
(
GeneratorTensor_
Sequential
<
1
>
{
});
b_tensors
[
i
].
GenerateTensorValue
(
GeneratorTensor_
1
<
BDataType
>
{
1.0
});
}
}
}
}
...
...
example/15_grouped_gemm/grouped_gemm_xdl_fixed_nk_fp16_fp8.cpp
View file @
b206fb26
...
@@ -154,12 +154,12 @@ bool run_grouped_gemm(const ProblemSize& problem_size, const ExecutionConfig& co
...
@@ -154,12 +154,12 @@ bool run_grouped_gemm(const ProblemSize& problem_size, const ExecutionConfig& co
b_tensors
[
i
].
GenerateTensorValue
(
GeneratorTensor_2
<
BDataType
>
{
-
5
,
5
});
b_tensors
[
i
].
GenerateTensorValue
(
GeneratorTensor_2
<
BDataType
>
{
-
5
,
5
});
break
;
break
;
case
2
:
case
2
:
a_tensors
[
i
].
GenerateTensorValue
(
GeneratorTensor_3
<
ADataType
>
{
0
.0
,
1.0
});
a_tensors
[
i
].
GenerateTensorValue
(
GeneratorTensor_3
<
ADataType
>
{
-
1
.0
,
1.0
});
b_tensors
[
i
].
GenerateTensorValue
(
GeneratorTensor_3
<
BDataType
>
{
-
0.5
,
0.5
});
b_tensors
[
i
].
GenerateTensorValue
(
GeneratorTensor_3
<
BDataType
>
{
-
0.5
,
0.5
});
break
;
break
;
default:
default:
a_tensors
[
i
].
GenerateTensorValue
(
GeneratorTensor_
Sequential
<
0
>
{
});
a_tensors
[
i
].
GenerateTensorValue
(
GeneratorTensor_
1
<
ADataType
>
{
1.0
});
b_tensors
[
i
].
GenerateTensorValue
(
GeneratorTensor_
Sequential
<
1
>
{
});
b_tensors
[
i
].
GenerateTensorValue
(
GeneratorTensor_
1
<
BDataType
>
{
1.0
});
}
}
}
}
...
@@ -266,6 +266,7 @@ bool run_grouped_gemm(const ProblemSize& problem_size, const ExecutionConfig& co
...
@@ -266,6 +266,7 @@ bool run_grouped_gemm(const ProblemSize& problem_size, const ExecutionConfig& co
BElementOp
,
BElementOp
,
CDEElementOp
>
;
CDEElementOp
>
;
std
::
cout
<<
"Running verification on CPU."
<<
std
::
endl
;
for
(
std
::
size_t
i
=
0
;
i
<
gemm_descs
.
size
();
i
++
)
for
(
std
::
size_t
i
=
0
;
i
<
gemm_descs
.
size
();
i
++
)
{
{
c_tensors_device
[
i
]
->
FromDevice
(
c_device_tensors
[
i
].
mData
.
data
(),
c_tensors_device
[
i
]
->
FromDevice
(
c_device_tensors
[
i
].
mData
.
data
(),
...
@@ -285,6 +286,9 @@ bool run_grouped_gemm(const ProblemSize& problem_size, const ExecutionConfig& co
...
@@ -285,6 +286,9 @@ bool run_grouped_gemm(const ProblemSize& problem_size, const ExecutionConfig& co
pass
&=
ck
::
utils
::
check_err
(
c_device_tensors
[
i
],
c_host_tensors
[
i
]);
pass
&=
ck
::
utils
::
check_err
(
c_device_tensors
[
i
],
c_host_tensors
[
i
]);
}
}
if
(
pass
)
std
::
cout
<<
"Verification on CPU: PASS"
<<
std
::
endl
;
}
}
return
pass
;
return
pass
;
...
...
example/15_grouped_gemm/run_grouped_gemm_example.inc
View file @
b206fb26
...
@@ -120,12 +120,12 @@ bool run_grouped_gemm(const ProblemSize& problem_size, const ExecutionConfig& co
...
@@ -120,12 +120,12 @@ bool run_grouped_gemm(const ProblemSize& problem_size, const ExecutionConfig& co
b_tensors
[
i
]
.
GenerateTensorValue
(
GeneratorTensor_2
<
BDataType
>
{
-
5
,
5
});
b_tensors
[
i
]
.
GenerateTensorValue
(
GeneratorTensor_2
<
BDataType
>
{
-
5
,
5
});
break
;
break
;
case
2
:
case
2
:
a_tensors
[
i
]
.
GenerateTensorValue
(
GeneratorTensor_3
<
ADataType
>
{
0
.0
,
1.0
});
a_tensors
[
i
]
.
GenerateTensorValue
(
GeneratorTensor_3
<
ADataType
>
{
-
1
.0
,
1.0
});
b_tensors
[
i
]
.
GenerateTensorValue
(
GeneratorTensor_3
<
BDataType
>
{
-
0.5
,
0.5
});
b_tensors
[
i
]
.
GenerateTensorValue
(
GeneratorTensor_3
<
BDataType
>
{
-
0.5
,
0.5
});
break
;
break
;
default
:
default
:
a_tensors
[
i
]
.
GenerateTensorValue
(
GeneratorTensor_
Sequential
<
0
>
{});
a_tensors
[
i
]
.
GenerateTensorValue
(
GeneratorTensor_
1
<
ADataType
>
{
1
});
b_tensors
[
i
]
.
GenerateTensorValue
(
GeneratorTensor_
Sequential
<
1
>
{});
b_tensors
[
i
]
.
GenerateTensorValue
(
GeneratorTensor_
1
<
BDataType
>
{
1
});
}
}
}
}
...
@@ -184,6 +184,7 @@ bool run_grouped_gemm(const ProblemSize& problem_size, const ExecutionConfig& co
...
@@ -184,6 +184,7 @@ bool run_grouped_gemm(const ProblemSize& problem_size, const ExecutionConfig& co
bool
pass
=
true
;
bool
pass
=
true
;
if
(
config
.
do_verification
)
if
(
config
.
do_verification
)
{
{
std
::
cout
<<
"Running verification on CPU."
<<
std
::
endl
;
using
ReferenceGemmInstance
=
ck
::
tensor_operation
::
host
::
ReferenceGemm
<
ADataType
,
using
ReferenceGemmInstance
=
ck
::
tensor_operation
::
host
::
ReferenceGemm
<
ADataType
,
BDataType
,
BDataType
,
EDataType
,
EDataType
,
...
@@ -215,6 +216,8 @@ bool run_grouped_gemm(const ProblemSize& problem_size, const ExecutionConfig& co
...
@@ -215,6 +216,8 @@ bool run_grouped_gemm(const ProblemSize& problem_size, const ExecutionConfig& co
pass
&=
ck
::
utils
::
check_err
(
c_device_tensors
[
i
],
c_host_tensors
[
i
]);
pass
&=
ck
::
utils
::
check_err
(
c_device_tensors
[
i
],
c_host_tensors
[
i
]);
#endif
#endif
}
}
if
(
pass
)
std
::
cout
<<
"Verification on CPU: PASS"
<<
std
::
endl
;
}
}
if
(
config
.
time_kernel
)
if
(
config
.
time_kernel
)
...
...
example/21_gemm_layernorm/gemm_xdl_layernorm_naive_single_kernel_fp16.cpp
View file @
b206fb26
...
@@ -175,8 +175,8 @@ int main(int argc, char* argv[])
...
@@ -175,8 +175,8 @@ int main(int argc, char* argv[])
b_k_n
.
GenerateTensorValue
(
GeneratorTensor_3
<
BDataType
>
{
-
0.5
,
0.5
});
b_k_n
.
GenerateTensorValue
(
GeneratorTensor_3
<
BDataType
>
{
-
0.5
,
0.5
});
break
;
break
;
default:
default:
a_m_k
.
GenerateTensorValue
(
GeneratorTensor_
Sequential
<
0
>
{});
a_m_k
.
GenerateTensorValue
(
GeneratorTensor_
1
<
ADataType
>
{
1
});
b_k_n
.
GenerateTensorValue
(
GeneratorTensor_
Sequential
<
1
>
{});
b_k_n
.
GenerateTensorValue
(
GeneratorTensor_
1
<
BDataType
>
{
1
});
}
}
c0_n_bias
.
GenerateTensorValue
(
GeneratorTensor_2
<
C0DataType
>
{
-
5
,
5
});
c0_n_bias
.
GenerateTensorValue
(
GeneratorTensor_2
<
C0DataType
>
{
-
5
,
5
});
...
...
example/31_batched_gemm_gemm/run_batched_gemm_gemm_example.inc
View file @
b206fb26
...
@@ -150,7 +150,7 @@ bool run_batched_gemm_gemm_example(int argc, char* argv[])
...
@@ -150,7 +150,7 @@ bool run_batched_gemm_gemm_example(int argc, char* argv[])
break
;
break
;
default
:
default
:
a_g_m_k
.
GenerateTensorValue
(
GeneratorTensor_1
<
ADataType
>
{
1
});
a_g_m_k
.
GenerateTensorValue
(
GeneratorTensor_1
<
ADataType
>
{
1
});
b0_g_k_n
.
GenerateTensorValue
(
GeneratorTensor_Sequential
<
1
>
{});
b0_g_k_n
.
GenerateTensorValue
(
GeneratorTensor_Sequential
<
B0DataType
,
1
>
{});
b1_g_n_o
.
GenerateTensorValue
(
GeneratorTensor_Diagonal
<
B1DataType
>
{});
b1_g_n_o
.
GenerateTensorValue
(
GeneratorTensor_Diagonal
<
B1DataType
>
{});
}
}
...
...
example/32_batched_gemm_scale_softmax_gemm/run_batched_gemm_scale_softmax_gemm.inc
View file @
b206fb26
...
@@ -157,7 +157,7 @@ int run(int argc, char* argv[])
...
@@ -157,7 +157,7 @@ int run(int argc, char* argv[])
break
;
break
;
default
:
default
:
a_g_m_k
.
GenerateTensorValue
(
GeneratorTensor_1
<
ADataType
>
{
1
});
a_g_m_k
.
GenerateTensorValue
(
GeneratorTensor_1
<
ADataType
>
{
1
});
b0_g_k_n
.
GenerateTensorValue
(
GeneratorTensor_Sequential
<
1
>
{});
b0_g_k_n
.
GenerateTensorValue
(
GeneratorTensor_Sequential
<
B0DataType
,
1
>
{});
b1_g_n_o
.
GenerateTensorValue
(
GeneratorTensor_Diagonal
<
B1DataType
>
{});
b1_g_n_o
.
GenerateTensorValue
(
GeneratorTensor_Diagonal
<
B1DataType
>
{});
}
}
...
...
example/32_batched_gemm_scale_softmax_gemm/run_batched_gemm_scale_softmax_gemm_permute.inc
View file @
b206fb26
...
@@ -118,7 +118,7 @@ int run(int argc, char* argv[])
...
@@ -118,7 +118,7 @@ int run(int argc, char* argv[])
b1_gs_os_ns
.
GenerateTensorValue
(
GeneratorTensor_Diagonal
<
B1DataType
>
{});
b1_gs_os_ns
.
GenerateTensorValue
(
GeneratorTensor_Diagonal
<
B1DataType
>
{});
break
;
break
;
default
:
default
:
a_gs_ms_ks
.
GenerateTensorValue
(
GeneratorTensor_Sequential
<
2
>
{});
a_gs_ms_ks
.
GenerateTensorValue
(
GeneratorTensor_Sequential
<
ADataType
,
2
>
{});
b0_gs_ns_ks
.
GenerateTensorValue
(
GeneratorTensor_Diagonal
<
B0DataType
>
{});
b0_gs_ns_ks
.
GenerateTensorValue
(
GeneratorTensor_Diagonal
<
B0DataType
>
{});
b1_gs_os_ns
.
GenerateTensorValue
(
GeneratorTensor_Diagonal
<
B1DataType
>
{});
b1_gs_os_ns
.
GenerateTensorValue
(
GeneratorTensor_Diagonal
<
B1DataType
>
{});
}
}
...
...
example/32_batched_gemm_scale_softmax_gemm/run_grouped_gemm_scale_softmax_gemm_permute.inc
View file @
b206fb26
...
@@ -152,7 +152,7 @@ int run(int argc, char* argv[])
...
@@ -152,7 +152,7 @@ int run(int argc, char* argv[])
break
;
break
;
default
:
default
:
a_gs_ms_ks
.
GenerateTensorValue
(
GeneratorTensor_1
<
ADataType
>
{
1
});
a_gs_ms_ks
.
GenerateTensorValue
(
GeneratorTensor_1
<
ADataType
>
{
1
});
b0_gs_ns_ks
.
GenerateTensorValue
(
GeneratorTensor_Sequential
<
1
>
{});
b0_gs_ns_ks
.
GenerateTensorValue
(
GeneratorTensor_Sequential
<
B0DataType
,
1
>
{});
b1_gs_os_ns
.
GenerateTensorValue
(
GeneratorTensor_Diagonal
<
B1DataType
>
{});
b1_gs_os_ns
.
GenerateTensorValue
(
GeneratorTensor_Diagonal
<
B1DataType
>
{});
}
}
...
...
example/35_splitK_gemm/run_splitK_gemm_example.inc
View file @
b206fb26
...
@@ -66,8 +66,8 @@ bool run_splitK_gemm(const ProblemSize& problem_size, const ExecutionConfig& con
...
@@ -66,8 +66,8 @@ bool run_splitK_gemm(const ProblemSize& problem_size, const ExecutionConfig& con
b_k_n
.
GenerateTensorValue
(
GeneratorTensor_3
<
BDataType
>
{
-
0.5
,
0.5
});
b_k_n
.
GenerateTensorValue
(
GeneratorTensor_3
<
BDataType
>
{
-
0.5
,
0.5
});
break
;
break
;
default
:
default
:
a_m_k
.
GenerateTensorValue
(
GeneratorTensor_
Sequential
<
0
>
{});
a_m_k
.
GenerateTensorValue
(
GeneratorTensor_
1
<
ADataType
>
{
1
});
b_k_n
.
GenerateTensorValue
(
GeneratorTensor_
Sequential
<
1
>
{});
b_k_n
.
GenerateTensorValue
(
GeneratorTensor_
1
<
BDataType
>
{
1
});
}
}
DeviceMem
a_m_k_device_buf
(
sizeof
(
ADataType
)
*
a_m_k
.
mDesc
.
GetElementSpaceSize
());
DeviceMem
a_m_k_device_buf
(
sizeof
(
ADataType
)
*
a_m_k
.
mDesc
.
GetElementSpaceSize
());
...
...
example/37_batched_gemm_add_add_relu_gemm_add/batched_gemm_add_add_relu_gemm_add_xdl_fp16.cpp
View file @
b206fb26
...
@@ -377,7 +377,7 @@ int main(int argc, char* argv[])
...
@@ -377,7 +377,7 @@ int main(int argc, char* argv[])
break
;
break
;
default:
default:
a0_g_m_k
.
GenerateTensorValue
(
GeneratorTensor_1
<
A0DataType
>
{
1
});
a0_g_m_k
.
GenerateTensorValue
(
GeneratorTensor_1
<
A0DataType
>
{
1
});
b0_g_k_n
.
GenerateTensorValue
(
GeneratorTensor_Sequential
<
1
>
{});
b0_g_k_n
.
GenerateTensorValue
(
GeneratorTensor_Sequential
<
B0DataType
,
1
>
{});
d00_g_m_n
.
GenerateTensorValue
(
GeneratorTensor_1
<
D00DataType
>
{
1
});
d00_g_m_n
.
GenerateTensorValue
(
GeneratorTensor_1
<
D00DataType
>
{
1
});
d01_g_m_n
.
GenerateTensorValue
(
GeneratorTensor_1
<
D01DataType
>
{
1
});
d01_g_m_n
.
GenerateTensorValue
(
GeneratorTensor_1
<
D01DataType
>
{
1
});
b1_g_n_o
.
GenerateTensorValue
(
GeneratorTensor_Diagonal
<
B1DataType
>
{});
b1_g_n_o
.
GenerateTensorValue
(
GeneratorTensor_Diagonal
<
B1DataType
>
{});
...
...
example/47_gemm_bias_softmax_gemm_permute/gemm_bias_softmax_gemm_permute_xdl.cpp
View file @
b206fb26
...
@@ -248,7 +248,7 @@ int main(int argc, char* argv[])
...
@@ -248,7 +248,7 @@ int main(int argc, char* argv[])
d0_gs_ms_ns
.
GenerateTensorValue
(
GeneratorTensor_1
<
D0DataType
>
{
1
});
d0_gs_ms_ns
.
GenerateTensorValue
(
GeneratorTensor_1
<
D0DataType
>
{
1
});
break
;
break
;
default:
default:
a_gs_ms_ks
.
GenerateTensorValue
(
GeneratorTensor_Sequential
<
2
>
{});
a_gs_ms_ks
.
GenerateTensorValue
(
GeneratorTensor_Sequential
<
ADataType
,
2
>
{});
b0_gs_ns_ks
.
GenerateTensorValue
(
GeneratorTensor_Diagonal
<
B0DataType
>
{});
b0_gs_ns_ks
.
GenerateTensorValue
(
GeneratorTensor_Diagonal
<
B0DataType
>
{});
b1_gs_os_ns
.
GenerateTensorValue
(
GeneratorTensor_Diagonal
<
B1DataType
>
{});
b1_gs_os_ns
.
GenerateTensorValue
(
GeneratorTensor_Diagonal
<
B1DataType
>
{});
d0_gs_ms_ns
.
GenerateTensorValue
(
GeneratorTensor_1
<
D0DataType
>
{
1
});
d0_gs_ms_ns
.
GenerateTensorValue
(
GeneratorTensor_1
<
D0DataType
>
{
1
});
...
...
example/59_grouped_gemm_multi_ABD/grouped_gemm_multi_abd_xdl_fixed_nk_bias_bf16_i8.cpp
View file @
b206fb26
...
@@ -194,9 +194,9 @@ bool run_grouped_gemm(const ProblemSize& problem_size, const ExecutionConfig& co
...
@@ -194,9 +194,9 @@ bool run_grouped_gemm(const ProblemSize& problem_size, const ExecutionConfig& co
b1_tensors
[
i
].
GenerateTensorValue
(
GeneratorTensor_3
<
B1DataType
>
{
-
0.5
,
0.5
});
b1_tensors
[
i
].
GenerateTensorValue
(
GeneratorTensor_3
<
B1DataType
>
{
-
0.5
,
0.5
});
break
;
break
;
default:
default:
a0_tensors
[
i
].
GenerateTensorValue
(
GeneratorTensor_
Sequential
<
0
>
{});
a0_tensors
[
i
].
GenerateTensorValue
(
GeneratorTensor_
1
<
A0DataType
>
{
1
});
b0_tensors
[
i
].
GenerateTensorValue
(
GeneratorTensor_
Sequential
<
1
>
{});
b0_tensors
[
i
].
GenerateTensorValue
(
GeneratorTensor_
1
<
B0DataType
>
{
1
});
b1_tensors
[
i
].
GenerateTensorValue
(
GeneratorTensor_
Sequential
<
1
>
{});
b1_tensors
[
i
].
GenerateTensorValue
(
GeneratorTensor_
1
<
B1DataType
>
{
1
});
}
}
d0_tensors
[
i
].
GenerateTensorValue
(
GeneratorTensor_3
<
D0DataType
>
{
-
0.5
,
0.5
});
d0_tensors
[
i
].
GenerateTensorValue
(
GeneratorTensor_3
<
D0DataType
>
{
-
0.5
,
0.5
});
...
...
example/59_grouped_gemm_multi_ABD/grouped_gemm_multi_abd_xdl_fixed_nk_bias_fp16.cpp
View file @
b206fb26
...
@@ -184,9 +184,9 @@ bool run_grouped_gemm(const ProblemSize& problem_size, const ExecutionConfig& co
...
@@ -184,9 +184,9 @@ bool run_grouped_gemm(const ProblemSize& problem_size, const ExecutionConfig& co
b_tensors
[
i
].
GenerateTensorValue
(
GeneratorTensor_3
<
B0DataType
>
{
-
0.5
,
0.5
});
b_tensors
[
i
].
GenerateTensorValue
(
GeneratorTensor_3
<
B0DataType
>
{
-
0.5
,
0.5
});
break
;
break
;
default:
default:
a0_tensors
[
i
].
GenerateTensorValue
(
GeneratorTensor_
Sequential
<
0
>
{});
a0_tensors
[
i
].
GenerateTensorValue
(
GeneratorTensor_
1
<
A0DataType
>
{
1
});
a1_tensors
[
i
].
GenerateTensorValue
(
GeneratorTensor_
Sequential
<
0
>
{});
a1_tensors
[
i
].
GenerateTensorValue
(
GeneratorTensor_
1
<
A1DataType
>
{
1
});
b_tensors
[
i
].
GenerateTensorValue
(
GeneratorTensor_
Sequential
<
1
>
{
});
b_tensors
[
i
].
GenerateTensorValue
(
GeneratorTensor_
1
<
B0DataType
>
{
-
1
});
}
}
d0_tensors
[
i
].
GenerateTensorValue
(
GeneratorTensor_3
<
D0DataType
>
{
-
0.5
,
0.5
});
d0_tensors
[
i
].
GenerateTensorValue
(
GeneratorTensor_3
<
D0DataType
>
{
-
0.5
,
0.5
});
...
...
library/include/ck/library/utility/host_tensor_generator.hpp
View file @
b206fb26
...
@@ -256,14 +256,33 @@ struct GeneratorTensor_Checkboard
...
@@ -256,14 +256,33 @@ struct GeneratorTensor_Checkboard
}
}
};
};
template
<
ck
::
index_t
Dim
>
/**
* @brief Is used to generate sequential values based on the specified dimension.
*
* @tparam T The type of the tensor values.
* @tparam Dim The specific dimension used for generation.
*
* GeneratorTensor_Sequential<1>{} will generate the following values for a 3x3 tensor:
*
* 0 1 2
* 0 1 2
* 0 1 2
*
* Essentially, the values generated are logical coordinates of the generated element that
* correspond to dimension Dim. E.g. for 2-dimensional tensor and Dim=1, the values are the column
* indices.
*
*/
template
<
typename
T
,
ck
::
index_t
Dim
>
struct
GeneratorTensor_Sequential
struct
GeneratorTensor_Sequential
{
{
template
<
typename
...
Ts
>
template
<
typename
...
Ts
>
float
operator
()(
Ts
...
Xs
)
const
T
operator
()(
Ts
...
Xs
)
const
{
{
std
::
array
<
ck
::
index_t
,
sizeof
...(
Ts
)
>
dims
=
{{
static_cast
<
ck
::
index_t
>
(
Xs
)...}};
std
::
array
<
ck
::
index_t
,
sizeof
...(
Ts
)
>
dims
=
{{
static_cast
<
ck
::
index_t
>
(
Xs
)...}};
return
dims
[
Dim
];
float
tmp
=
dims
[
Dim
];
return
ck
::
type_convert
<
T
>
(
tmp
);
}
}
};
};
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment