Commit 1fb3bb8d authored by Andriy Roshchenko's avatar Andriy Roshchenko
Browse files

Introduce two new tensor generators

parent 3dea7cc8
...@@ -11,7 +11,7 @@ ...@@ -11,7 +11,7 @@
"environment": { "environment": {
"MY_ENVIRONMENT_VARIABLE": "NONE", "MY_ENVIRONMENT_VARIABLE": "NONE",
"PATH": "/usr/local/.cargo/bin:$penv{PATH}", "PATH": "/usr/local/.cargo/bin:$penv{PATH}",
"SCCACHE_IDLE_TIMEOUT": "7200" "SCCACHE_IDLE_TIMEOUT": "11000"
}, },
"cacheVariables": { "cacheVariables": {
"CMAKE_BUILD_TYPE": "Debug", "CMAKE_BUILD_TYPE": "Debug",
......
...@@ -166,6 +166,14 @@ bool run_gemm(const ProblemType& problem_size, const ExecutionConfig& config) ...@@ -166,6 +166,14 @@ bool run_gemm(const ProblemType& problem_size, const ExecutionConfig& config)
ck::utils::FillUniformDistributionIntegerValue<ADataType>{-2.f, 2.f}(a_m_k); ck::utils::FillUniformDistributionIntegerValue<ADataType>{-2.f, 2.f}(a_m_k);
ck::utils::FillUniformDistributionIntegerValue<BDataType>{-2.f, 2.f}(b_k_n); ck::utils::FillUniformDistributionIntegerValue<BDataType>{-2.f, 2.f}(b_k_n);
break; break;
case 6:
a_m_k.GenerateTensorValue(GeneratorTensor_PI<ADataType>{});
b_k_n.GenerateTensorValue(GeneratorTensor_1<BDataType>{1});
break;
case 7:
a_m_k.GenerateTensorValue(GeneratorTensor_PI_A<ADataType>{});
b_k_n.GenerateTensorValue(GeneratorTensor_PI_B<BDataType>{});
break;
default: default:
ck::utils::FillUniformDistribution<ADataType>{-0.1f, 0.1f}(a_m_k); ck::utils::FillUniformDistribution<ADataType>{-0.1f, 0.1f}(a_m_k);
ck::utils::FillUniformDistribution<BDataType>{-0.1f, 0.1f}(b_k_n); ck::utils::FillUniformDistribution<BDataType>{-0.1f, 0.1f}(b_k_n);
...@@ -368,6 +376,32 @@ bool run_gemm(const ProblemType& problem_size, const ExecutionConfig& config) ...@@ -368,6 +376,32 @@ bool run_gemm(const ProblemType& problem_size, const ExecutionConfig& config)
#endif #endif
if(pass) if(pass)
std::cout << "Verification on CPU: PASS" << std::endl; std::cout << "Verification on CPU: PASS" << std::endl;
if(config.init_method == 6 || config.init_method == 7)
{
std::cout << std::fixed << std::setprecision(16);
// AccDataType a = ck::type_convert<AccDataType>(a_m_k(0, 10));
// AccDataType b = ck::type_convert<AccDataType>(b_k_n(0, 10));
// std::cout << "a(0,10): " << a << std::endl;
// std::cout << "b(0,10): " << b << std::endl;
// std::cout << "a: " << ck::type_convert<AccDataType>(a_m_k(0, 0)) << std::endl;
// std::cout << "a: " << ck::type_convert<AccDataType>(a_m_k(0, 1)) << std::endl;
// std::cout << "a: " << ck::type_convert<AccDataType>(a_m_k(0, 2)) << std::endl;
// std::cout << "b: " << ck::type_convert<AccDataType>(b_k_n(0, 0)) << std::endl;
// std::cout << "b: " << ck::type_convert<AccDataType>(b_k_n(1, 0)) << std::endl;
// std::cout << "b: " << ck::type_convert<AccDataType>(b_k_n(2, 0)) << std::endl;
AccDataType d = ck::type_convert<AccDataType>(c_m_n_device_result(0, 10));
AccDataType h = ck::type_convert<AccDataType>(c_m_n_host_result(10, 0));
std::cout << "device result: " << d << std::endl;
std::cout << "host result: " << h << std::endl;
std::cout << "expected result: " << M_PI << std::endl;
std::cout << "device - host: " << std::abs(d - h) << std::endl;
std::cout << "device - expected: " << std::abs(d - M_PI) << std::endl;
std::cout << "atol: " << get_atol<CDataType>() << std::endl;
std::cout << std::endl << std::endl;
}
} }
if((config.do_verification == 2) || (config.do_verification == 3)) if((config.do_verification == 2) || (config.do_verification == 3))
......
...@@ -304,3 +304,107 @@ struct GeneratorTensor_Diagonal ...@@ -304,3 +304,107 @@ struct GeneratorTensor_Diagonal
return pred ? value : T{0}; return pred ? value : T{0};
} }
}; };
/**
* @brief Used to generate tensor entries from coefficients of Leibniz formula for Pi.
*
* @tparam T The type of the tensor values.
*
* Usage: For verification of GEMM
* a_m_k.GenerateTensorValue(GeneratorTensor_PI<ADataType>{});
* b_k_n.GenerateTensorValue(GeneratorTensor_1<BDataType>{1});
*
* c = a * b;
*
* We expect that |c[i][j]-M_PI| <= truncation_error(K)
*/
template <typename T>
struct GeneratorTensor_PI
{
template <typename... Ts>
T operator()(Ts... Xs) const
{
static constexpr double pi = 3.14159265358979323846;
std::array<ck::index_t, sizeof...(Ts)> dims = {{static_cast<ck::index_t>(Xs)...}};
if constexpr(dims.size() > 0)
{
constexpr auto last_dim = dims.size() - 1;
size_t i = dims[last_dim];
float fi = i;
float tmp = (i % 2 == 0) ? 4.0 : -4.0;
tmp /= (2.0 * fi + 1.0);
return ck::type_convert<T>(tmp);
}
else
{
return ck::type_convert<T>(pi);
}
}
static double truncation_error(size_t N) { return 4.0 / (2.0 * N + 1.0); }
};
/**
* @brief Used to generate tensor entries from coefficients of non-alternating version of Leibniz
* formula for Pi.
*
* @tparam T The type of the tensor values.
*
* Usage: For verification of GEMM
* a_m_k.GenerateTensorValue(GeneratorTensor_PI_A<ADataType>{});
* b_k_n.GenerateTensorValue(GeneratorTensor_PI_B<BDataType>{});
*
* c = a * b;
*
* We expect that |c[i][j]-M_PI| <= 0.00013 for K >= 4096 and a,b,c are float.
*/
template <typename T>
struct GeneratorTensor_PI_A
{
static constexpr double pi = 3.14159265358979323846;
template <typename... Ts>
T operator()(Ts... Xs) const
{
std::array<ck::index_t, sizeof...(Ts)> dims = {{static_cast<ck::index_t>(Xs)...}};
if constexpr(dims.size() > 0)
{
constexpr auto last_dim = dims.size() - 1;
size_t i = dims[last_dim];
float fi = i;
float tmp = 2.0 / (4.0 * fi + 1.0);
return ck::type_convert<T>(tmp);
}
else
{
return ck::type_convert<T>(pi / 2.0);
}
}
};
template <typename T>
struct GeneratorTensor_PI_B
{
static constexpr double pi = 3.14159265358979323846;
template <typename... Ts>
T operator()(Ts... Xs) const
{
std::array<ck::index_t, sizeof...(Ts)> dims = {{static_cast<ck::index_t>(Xs)...}};
if constexpr(dims.size() > 0)
{
size_t i = dims[0];
float fi = i;
float tmp = 4.0 / (4.0 * fi + 3.0);
return ck::type_convert<T>(tmp);
}
else
{
return ck::type_convert<T>(2.0);
}
}
};
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment