Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
1fb3bb8d
Commit
1fb3bb8d
authored
Nov 07, 2024
by
Andriy Roshchenko
Browse files
Introduce two new tensor generators
parent
3dea7cc8
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
139 additions
and
1 deletion
+139
-1
CMakePresets.json
CMakePresets.json
+1
-1
example/01_gemm/run_gemm_example.inc
example/01_gemm/run_gemm_example.inc
+34
-0
library/include/ck/library/utility/host_tensor_generator.hpp
library/include/ck/library/utility/host_tensor_generator.hpp
+104
-0
No files found.
CMakePresets.json
View file @
1fb3bb8d
...
@@ -11,7 +11,7 @@
...
@@ -11,7 +11,7 @@
"environment"
:
{
"environment"
:
{
"MY_ENVIRONMENT_VARIABLE"
:
"NONE"
,
"MY_ENVIRONMENT_VARIABLE"
:
"NONE"
,
"PATH"
:
"/usr/local/.cargo/bin:$penv{PATH}"
,
"PATH"
:
"/usr/local/.cargo/bin:$penv{PATH}"
,
"SCCACHE_IDLE_TIMEOUT"
:
"
72
00"
"SCCACHE_IDLE_TIMEOUT"
:
"
110
00"
},
},
"cacheVariables"
:
{
"cacheVariables"
:
{
"CMAKE_BUILD_TYPE"
:
"Debug"
,
"CMAKE_BUILD_TYPE"
:
"Debug"
,
...
...
example/01_gemm/run_gemm_example.inc
View file @
1fb3bb8d
...
@@ -166,6 +166,14 @@ bool run_gemm(const ProblemType& problem_size, const ExecutionConfig& config)
...
@@ -166,6 +166,14 @@ bool run_gemm(const ProblemType& problem_size, const ExecutionConfig& config)
ck
::
utils
::
FillUniformDistributionIntegerValue
<
ADataType
>
{
-
2.
f
,
2.
f
}(
a_m_k
);
ck
::
utils
::
FillUniformDistributionIntegerValue
<
ADataType
>
{
-
2.
f
,
2.
f
}(
a_m_k
);
ck
::
utils
::
FillUniformDistributionIntegerValue
<
BDataType
>
{
-
2.
f
,
2.
f
}(
b_k_n
);
ck
::
utils
::
FillUniformDistributionIntegerValue
<
BDataType
>
{
-
2.
f
,
2.
f
}(
b_k_n
);
break
;
break
;
case
6
:
a_m_k
.
GenerateTensorValue
(
GeneratorTensor_PI
<
ADataType
>
{});
b_k_n
.
GenerateTensorValue
(
GeneratorTensor_1
<
BDataType
>
{
1
});
break
;
case
7
:
a_m_k
.
GenerateTensorValue
(
GeneratorTensor_PI_A
<
ADataType
>
{});
b_k_n
.
GenerateTensorValue
(
GeneratorTensor_PI_B
<
BDataType
>
{});
break
;
default
:
default
:
ck
::
utils
::
FillUniformDistribution
<
ADataType
>
{
-
0.1
f
,
0.1
f
}(
a_m_k
);
ck
::
utils
::
FillUniformDistribution
<
ADataType
>
{
-
0.1
f
,
0.1
f
}(
a_m_k
);
ck
::
utils
::
FillUniformDistribution
<
BDataType
>
{
-
0.1
f
,
0.1
f
}(
b_k_n
);
ck
::
utils
::
FillUniformDistribution
<
BDataType
>
{
-
0.1
f
,
0.1
f
}(
b_k_n
);
...
@@ -368,6 +376,32 @@ bool run_gemm(const ProblemType& problem_size, const ExecutionConfig& config)
...
@@ -368,6 +376,32 @@ bool run_gemm(const ProblemType& problem_size, const ExecutionConfig& config)
#endif
#endif
if
(
pass
)
if
(
pass
)
std
::
cout
<<
"Verification on CPU: PASS"
<<
std
::
endl
;
std
::
cout
<<
"Verification on CPU: PASS"
<<
std
::
endl
;
if
(
config
.
init_method
==
6
||
config
.
init_method
==
7
)
{
std
::
cout
<<
std
::
fixed
<<
std
::
setprecision
(
16
);
// AccDataType a = ck::type_convert<AccDataType>(a_m_k(0, 10));
// AccDataType b = ck::type_convert<AccDataType>(b_k_n(0, 10));
// std::cout << "a(0,10): " << a << std::endl;
// std::cout << "b(0,10): " << b << std::endl;
// std::cout << "a: " << ck::type_convert<AccDataType>(a_m_k(0, 0)) << std::endl;
// std::cout << "a: " << ck::type_convert<AccDataType>(a_m_k(0, 1)) << std::endl;
// std::cout << "a: " << ck::type_convert<AccDataType>(a_m_k(0, 2)) << std::endl;
// std::cout << "b: " << ck::type_convert<AccDataType>(b_k_n(0, 0)) << std::endl;
// std::cout << "b: " << ck::type_convert<AccDataType>(b_k_n(1, 0)) << std::endl;
// std::cout << "b: " << ck::type_convert<AccDataType>(b_k_n(2, 0)) << std::endl;
AccDataType
d
=
ck
::
type_convert
<
AccDataType
>
(
c_m_n_device_result
(
0
,
10
));
AccDataType
h
=
ck
::
type_convert
<
AccDataType
>
(
c_m_n_host_result
(
10
,
0
));
std
::
cout
<<
"device result: "
<<
d
<<
std
::
endl
;
std
::
cout
<<
"host result: "
<<
h
<<
std
::
endl
;
std
::
cout
<<
"expected result: "
<<
M_PI
<<
std
::
endl
;
std
::
cout
<<
"device - host: "
<<
std
::
abs
(
d
-
h
)
<<
std
::
endl
;
std
::
cout
<<
"device - expected: "
<<
std
::
abs
(
d
-
M_PI
)
<<
std
::
endl
;
std
::
cout
<<
"atol: "
<<
get_atol
<
CDataType
>
()
<<
std
::
endl
;
std
::
cout
<<
std
::
endl
<<
std
::
endl
;
}
}
}
if
((
config
.
do_verification
==
2
)
||
(
config
.
do_verification
==
3
))
if
((
config
.
do_verification
==
2
)
||
(
config
.
do_verification
==
3
))
...
...
library/include/ck/library/utility/host_tensor_generator.hpp
View file @
1fb3bb8d
...
@@ -304,3 +304,107 @@ struct GeneratorTensor_Diagonal
...
@@ -304,3 +304,107 @@ struct GeneratorTensor_Diagonal
return
pred
?
value
:
T
{
0
};
return
pred
?
value
:
T
{
0
};
}
}
};
};
/**
* @brief Used to generate tensor entries from coefficients of Leibniz formula for Pi.
*
* @tparam T The type of the tensor values.
*
* Usage: For verification of GEMM
* a_m_k.GenerateTensorValue(GeneratorTensor_PI<ADataType>{});
* b_k_n.GenerateTensorValue(GeneratorTensor_1<BDataType>{1});
*
* c = a * b;
*
* We expect that |c[i][j]-M_PI| <= truncation_error(K)
*/
template
<
typename
T
>
struct
GeneratorTensor_PI
{
template
<
typename
...
Ts
>
T
operator
()(
Ts
...
Xs
)
const
{
static
constexpr
double
pi
=
3.14159265358979323846
;
std
::
array
<
ck
::
index_t
,
sizeof
...(
Ts
)
>
dims
=
{{
static_cast
<
ck
::
index_t
>
(
Xs
)...}};
if
constexpr
(
dims
.
size
()
>
0
)
{
constexpr
auto
last_dim
=
dims
.
size
()
-
1
;
size_t
i
=
dims
[
last_dim
];
float
fi
=
i
;
float
tmp
=
(
i
%
2
==
0
)
?
4.0
:
-
4.0
;
tmp
/=
(
2.0
*
fi
+
1.0
);
return
ck
::
type_convert
<
T
>
(
tmp
);
}
else
{
return
ck
::
type_convert
<
T
>
(
pi
);
}
}
static
double
truncation_error
(
size_t
N
)
{
return
4.0
/
(
2.0
*
N
+
1.0
);
}
};
/**
* @brief Used to generate tensor entries from coefficients of non-alternating version of Leibniz
* formula for Pi.
*
* @tparam T The type of the tensor values.
*
* Usage: For verification of GEMM
* a_m_k.GenerateTensorValue(GeneratorTensor_PI_A<ADataType>{});
* b_k_n.GenerateTensorValue(GeneratorTensor_PI_B<BDataType>{});
*
* c = a * b;
*
* We expect that |c[i][j]-M_PI| <= 0.00013 for K >= 4096 and a,b,c are float.
*/
template
<
typename
T
>
struct
GeneratorTensor_PI_A
{
static
constexpr
double
pi
=
3.14159265358979323846
;
template
<
typename
...
Ts
>
T
operator
()(
Ts
...
Xs
)
const
{
std
::
array
<
ck
::
index_t
,
sizeof
...(
Ts
)
>
dims
=
{{
static_cast
<
ck
::
index_t
>
(
Xs
)...}};
if
constexpr
(
dims
.
size
()
>
0
)
{
constexpr
auto
last_dim
=
dims
.
size
()
-
1
;
size_t
i
=
dims
[
last_dim
];
float
fi
=
i
;
float
tmp
=
2.0
/
(
4.0
*
fi
+
1.0
);
return
ck
::
type_convert
<
T
>
(
tmp
);
}
else
{
return
ck
::
type_convert
<
T
>
(
pi
/
2.0
);
}
}
};
template
<
typename
T
>
struct
GeneratorTensor_PI_B
{
static
constexpr
double
pi
=
3.14159265358979323846
;
template
<
typename
...
Ts
>
T
operator
()(
Ts
...
Xs
)
const
{
std
::
array
<
ck
::
index_t
,
sizeof
...(
Ts
)
>
dims
=
{{
static_cast
<
ck
::
index_t
>
(
Xs
)...}};
if
constexpr
(
dims
.
size
()
>
0
)
{
size_t
i
=
dims
[
0
];
float
fi
=
i
;
float
tmp
=
4.0
/
(
4.0
*
fi
+
3.0
);
return
ck
::
type_convert
<
T
>
(
tmp
);
}
else
{
return
ck
::
type_convert
<
T
>
(
2.0
);
}
}
};
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment