Commit f1f36a61 authored by Andriy Roshchenko's avatar Andriy Roshchenko
Browse files

WIP: Debug some test cases

parent 9f8e26f6
...@@ -112,14 +112,14 @@ bool run_mxmfma_test(ck::index_t init) ...@@ -112,14 +112,14 @@ bool run_mxmfma_test(ck::index_t init)
TEST(MXMFMA, MXFP8MFMA16x16x128) TEST(MXMFMA, MXFP8MFMA16x16x128)
{ {
auto AB_init = 1; auto AB_init = 2;
auto pass = run_mxmfma_test<f8_t, f8_t, float, ck::MFMA_F8F6F4::SCALE_F32_16x16x128>(AB_init); auto pass = run_mxmfma_test<f8_t, f8_t, half_t, ck::MFMA_F8F6F4::SCALE_F32_16x16x128>(AB_init);
EXPECT_TRUE(pass); EXPECT_TRUE(pass);
} }
TEST(MXMFMA, MXFP8MFMA32x32x64) TEST(MXMFMA, MXFP8MFMA32x32x64)
{ {
auto AB_init = 1; auto AB_init = 2;
auto pass = run_mxmfma_test<f8_t, f8_t, half_t, ck::MFMA_F8F6F4::SCALE_F32_32x32x64>(AB_init); auto pass = run_mxmfma_test<f8_t, f8_t, float, ck::MFMA_F8F6F4::SCALE_F32_32x32x64>(AB_init);
EXPECT_TRUE(pass); EXPECT_TRUE(pass);
} }
...@@ -738,8 +738,15 @@ void RunHostGEMM(const Tensor<ADataType>& A, ...@@ -738,8 +738,15 @@ void RunHostGEMM(const Tensor<ADataType>& A,
Tensor<CDataType>& C) Tensor<CDataType>& C)
{ {
using PassThrough = ck::tensor_operation::element_wise::PassThrough; using PassThrough = ck::tensor_operation::element_wise::PassThrough;
using GemmInstance = ck::tensor_operation::host:: using GemmInstance = ck::tensor_operation::host::ReferenceGemm<float,
ReferenceGemm<float, float, CDataType, float, PassThrough, PassThrough, PassThrough>; float,
CDataType,
float,
PassThrough,
PassThrough,
PassThrough,
float,
float>;
Tensor<float> a_m_k(A.mDesc); Tensor<float> a_m_k(A.mDesc);
Tensor<float> b_k_n(B.mDesc); Tensor<float> b_k_n(B.mDesc);
...@@ -753,8 +760,8 @@ void RunHostGEMM(const Tensor<ADataType>& A, ...@@ -753,8 +760,8 @@ void RunHostGEMM(const Tensor<ADataType>& A,
{ {
for(size_t k = 0; k < K; k++) for(size_t k = 0; k < K; k++)
{ {
a_m_k(m, k) = a_m_k(m, k) = type_convert<float>(type_convert<ADataType>(
type_convert<float>(A(m, k)) * type_convert<float>(a_scales(m, k / BLOCK_X)); type_convert<float>(A(m, k)) * type_convert<float>(a_scales(m, k / BLOCK_X))));
} }
} }
...@@ -762,8 +769,8 @@ void RunHostGEMM(const Tensor<ADataType>& A, ...@@ -762,8 +769,8 @@ void RunHostGEMM(const Tensor<ADataType>& A,
{ {
for(size_t k = 0; k < K; k++) for(size_t k = 0; k < K; k++)
{ {
b_k_n(k, n) = b_k_n(k, n) = type_convert<float>(type_convert<BDataType>(
type_convert<float>(B(k, n)) * type_convert<float>(b_scales(k / BLOCK_X, n)); type_convert<float>(B(k, n)) * type_convert<float>(b_scales(k / BLOCK_X, n))));
} }
} }
...@@ -862,18 +869,23 @@ struct TestMXMFMA ...@@ -862,18 +869,23 @@ struct TestMXMFMA
case 1: case 1:
// results in C = {K} // results in C = {K}
a_m_k.GenerateTensorValue(GeneratorTensor_1<ADataType>{1.0f}); a_m_k.GenerateTensorValue(GeneratorTensor_1<ADataType>{1.0f});
a_scales.GenerateTensorValue(GeneratorTensor_1<ScaleType>{ScaleType{1.0f}}); a_scales.GenerateTensorValue(GeneratorTensor_1<ScaleType>{ScaleType{512.0f}});
b_n_k.GenerateTensorValue(GeneratorTensor_1<BDataType>{1.0f}); b_n_k.GenerateTensorValue(GeneratorTensor_1<BDataType>{1.0f});
b_scales.GenerateTensorValue(GeneratorTensor_1<ScaleType>{ScaleType{1.0f}}); b_scales.GenerateTensorValue(GeneratorTensor_1<ScaleType>{ScaleType{1.0f / 512}});
break; break;
case 2: case 2:
// expect small round off errors // expect small round off errors
a_m_k.GenerateTensorValue(GeneratorTensor_3<ADataType>{-5, 5}); a_m_k.GenerateTensorValue(GeneratorTensor_3<ADataType>{-0.5, 0.5});
// a_m_k.GenerateTensorValue(GeneratorTensor_1<ADataType>{1.9f});
a_scales.GenerateTensorValue( a_scales.GenerateTensorValue(
GeneratorTensor_2<ScaleType>{126, 129}); // scales: {0.5, 1, 2} GeneratorTensor_2<ScaleType>{127, 129}); // scales: {0.5, 1, 2}
b_n_k.GenerateTensorValue(GeneratorTensor_3<BDataType>{-5, 5}); // a_scales.GenerateTensorValue(GeneratorTensor_1<ScaleType>{ScaleType{1.0f}});
b_scales.GenerateTensorValue( // b_n_k.GenerateTensorValue(GeneratorTensor_3<BDataType>{-5, 5});
GeneratorTensor_2<ScaleType>{126, 129}); // scales: {0.5, 1, 2} // b_scales.GenerateTensorValue(
// GeneratorTensor_2<ScaleType>{125, 128}); // scales: {0.5, 1, 2}
b_n_k.GenerateTensorValue(GeneratorTensor_1<BDataType>{1.0f});
b_scales.GenerateTensorValue(GeneratorTensor_1<ScaleType>{ScaleType{1.0f}});
break; break;
case 3: case 3:
// expect small round off errors // expect small round off errors
...@@ -884,12 +896,33 @@ struct TestMXMFMA ...@@ -884,12 +896,33 @@ struct TestMXMFMA
b_scales.GenerateTensorValue( b_scales.GenerateTensorValue(
GeneratorTensor_2<ScaleType>{126, 129}); // scales: {0.5, 1, 2} GeneratorTensor_2<ScaleType>{126, 129}); // scales: {0.5, 1, 2}
break; break;
case 4:
a_m_k.GenerateTensorValue(GeneratorTensor_1<ADataType>{1.0f});
a_scales.GenerateTensorValue(GeneratorTensor_Sequential<ScaleType, 0>{-9});
b_n_k.GenerateTensorValue(GeneratorTensor_1<BDataType>{1.0f});
b_scales.GenerateTensorValue(GeneratorTensor_1<ScaleType>{ScaleType{1.0f}});
break;
case 5:
a_m_k.GenerateTensorValue(GeneratorTensor_1<ADataType>{1.0f});
a_scales.GenerateTensorValue(GeneratorTensor_1<ScaleType>{ScaleType{1.0f}});
b_n_k.GenerateTensorValue(GeneratorTensor_1<BDataType>{1.0f});
b_scales.GenerateTensorValue(GeneratorTensor_Sequential<ScaleType, 1>{-9});
break;
case 6:
a_m_k.GenerateTensorValue(GeneratorTensor_1<ADataType>{0.00195312f});
a_scales.GenerateTensorValue(GeneratorTensor_1<ScaleType>{ScaleType{1.0f / 16}});
b_n_k.GenerateTensorValue(GeneratorTensor_1<BDataType>{1.0f});
b_scales.GenerateTensorValue(GeneratorTensor_Sequential<ScaleType, 1>{-9});
break;
default: default:
// all initial values are representable in FP8, BF8 // all initial values are representable in FP8, BF8
a_m_k.GenerateTensorValue(GeneratorTensor_2<ADataType>{-5, 6}); a_m_k.GenerateTensorValue(GeneratorTensor_2<ADataType>{-5, 6});
a_scales.GenerateTensorValue(GeneratorTensor_3<ScaleType>{1.0f / 32.0f, 1.0f}); // a_scales.GenerateTensorValue(GeneratorTensor_3<ScaleType>{1.0f / 32.0f, 1.0f});
a_scales.GenerateTensorValue(GeneratorTensor_1<ScaleType>{ScaleType{1.0f}});
b_n_k.GenerateTensorValue(GeneratorTensor_2<BDataType>{-5, 6}); b_n_k.GenerateTensorValue(GeneratorTensor_2<BDataType>{-5, 6});
b_scales.GenerateTensorValue(GeneratorTensor_3<ScaleType>{1.0f / 32.0f, 1.0f}); // b_scales.GenerateTensorValue(GeneratorTensor_3<ScaleType>{1.0f / 32.0f, 1.0f});
b_scales.GenerateTensorValue(GeneratorTensor_1<ScaleType>{ScaleType{1.0f}});
break; break;
} }
...@@ -946,12 +979,79 @@ struct TestMXMFMA ...@@ -946,12 +979,79 @@ struct TestMXMFMA
RunDeviceGEMM(mfma_kernel, a, a_scales, b, b_scales, c_device); RunDeviceGEMM(mfma_kernel, a, a_scales, b, b_scales, c_device);
#if 1
#if 1
std::cout << "a:" << std::endl;
for(size_t i = 0; i < BLOCK_M; i++)
{
for(size_t j = 0; j < BLOCK_K; j++)
{
std::cout << type_convert<float>(a(i, j)) << " ";
}
std::cout << std::endl;
}
std::cout << "b:" << std::endl;
for(size_t i = 0; i < BLOCK_K; i++)
{
for(size_t j = 0; j < BLOCK_N; j++)
{
std::cout << type_convert<float>(b(i, j)) << " ";
}
std::cout << std::endl;
}
#endif
#if 1
std::cout << "a_scale:" << std::endl;
for(size_t i = 0; i < BLOCK_M; i++)
{
for(size_t j = 0; j < BLOCK_K / BLOCK_X; j++)
{
std::cout << type_convert<float>(a_scales(i, j)) << " ";
}
std::cout << std::endl;
}
std::cout << "b_scale:" << std::endl;
for(size_t i = 0; i < BLOCK_K / BLOCK_X; i++)
{
for(size_t j = 0; j < BLOCK_N; j++)
{
std::cout << type_convert<float>(b_scales(i, j)) << " ";
}
std::cout << std::endl;
}
#endif
std::cout << "c_device:" << std::endl;
for(size_t i = 0; i < BLOCK_M; i++)
{
for(size_t j = 0; j < BLOCK_N; j++)
{
std::cout << type_convert<float>(c_device(i, j)) << " ";
}
std::cout << std::endl;
}
#endif
bool res = false; bool res = false;
if constexpr(std::is_same<CDataType, float>::value || if constexpr(std::is_same<CDataType, float>::value ||
std::is_same<CDataType, half_t>::value) std::is_same<CDataType, half_t>::value)
{ {
res = ck::utils::check_err(c_device.mData, c_host.mData); res = ck::utils::check_err(c_device.mData, c_host.mData);
std::cout << (res ? "SUCCESS" : "FAILURE") << std::endl; std::cout << (res ? "SUCCESS" : "FAILURE") << std::endl;
if(!res)
{
std::cout << "c_host:" << std::endl;
for(size_t i = 0; i < BLOCK_M; i++)
{
for(size_t j = 0; j < BLOCK_N; j++)
{
std::cout << type_convert<float>(c_host(i, j)) << " ";
}
std::cout << std::endl;
}
}
} }
else else
{ {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment