Unverified Commit 93c99f3d authored by Anthony Chang's avatar Anthony Chang Committed by GitHub
Browse files

Standalone sweep once softmax kernel w/ ckProfiler (#295)

* use 'sweep once' softmax kernel where applicable

* threadwise copy's dst buffer can specify invalid element value

* add int8 in/out float compute softmax support

give a bit of leeway for int absolute tolerance as there's a single data point of all test cases showing off-by-1 error

* format

* softmax inherits DeviceNormalization

* softmax profiler stub

* tighten up reference softmax interface

* example prints tensor dimension

* add fp32 to softmax profiler

* rename header

* hook with ckProfiler

* format

* resolve merge conflict

* resolve merge conflicts

* update normalization profiler help string

* resolve conflict

* typo

* remove residual

* softmax profiler: address feedback

* test for mixed precision input/output

* fully qualify ck::math::isnan

* add comment for device normalization interface

* revise wording

* constness for alpha/beta scaler pointer
parent eccf8773
...@@ -15,14 +15,19 @@ class TestSoftmaxFP16 : public ck::TestSoftmax<Tuple> ...@@ -15,14 +15,19 @@ class TestSoftmaxFP16 : public ck::TestSoftmax<Tuple>
// clang-format off // clang-format off
using KernelTypes = ::testing::Types< using KernelTypes = ::testing::Types<
// InDataType, AccDataType, OutDataType, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSize> // InDataType, AccDataType, OutDataType, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSize>
std::tuple<ck::half_t, float, float, I<3>, I<1>, I<256>, I<8>, I<32>, I<1>, I<8>, I<1>, I<8>, I<4>>, // mixed precision
std::tuple<ck::half_t, float, ck::half_t, I<3>, I<1>, I<256>, I<8>, I<32>, I<1>, I<8>, I<1>, I<8>, I<8>>, std::tuple<ck::half_t, float, ck::half_t, I<3>, I<1>, I<256>, I<8>, I<32>, I<1>, I<8>, I<1>, I<8>, I<8>>,
std::tuple<ck::half_t, float, ck::half_t, I<3>, I<1>, I<256>, I<4>, I<64>, I<1>, I<8>, I<1>, I<8>, I<8>>, std::tuple<ck::half_t, float, ck::half_t, I<3>, I<1>, I<256>, I<4>, I<64>, I<1>, I<8>, I<1>, I<8>, I<8>>,
std::tuple<ck::half_t, float, ck::half_t, I<3>, I<1>, I<256>, I<2>, I<128>, I<1>, I<8>, I<1>, I<8>, I<8>>, std::tuple<ck::half_t, float, ck::half_t, I<3>, I<1>, I<256>, I<2>, I<128>, I<1>, I<8>, I<1>, I<8>, I<8>>,
std::tuple<ck::half_t, float, ck::half_t, I<3>, I<1>, I<256>, I<1>, I<256>, I<1>, I<8>, I<1>, I<8>, I<8>>, std::tuple<ck::half_t, float, ck::half_t, I<3>, I<1>, I<256>, I<1>, I<256>, I<1>, I<8>, I<1>, I<8>, I<8>>,
std::tuple<ck::half_t, float, ck::half_t, I<3>, I<1>, I<256>, I<1>, I<256>, I<1>, I<16>, I<1>, I<8>, I<8>>,
std::tuple<ck::half_t, float, ck::half_t, I<3>, I<1>, I<256>, I<1>, I<256>, I<1>, I<32>, I<1>, I<8>, I<8>>,
std::tuple<ck::half_t, float, ck::half_t, I<3>, I<2>, I<256>, I<8>, I<32>, I<1>, I<8>, I<1>, I<8>, I<8>>, std::tuple<ck::half_t, float, ck::half_t, I<3>, I<2>, I<256>, I<8>, I<32>, I<1>, I<8>, I<1>, I<8>, I<8>>,
std::tuple<ck::half_t, float, ck::half_t, I<3>, I<2>, I<256>, I<4>, I<64>, I<1>, I<8>, I<1>, I<8>, I<8>>, std::tuple<ck::half_t, float, ck::half_t, I<3>, I<2>, I<256>, I<4>, I<64>, I<1>, I<8>, I<1>, I<8>, I<8>>,
std::tuple<ck::half_t, float, ck::half_t, I<3>, I<2>, I<256>, I<2>, I<128>, I<1>, I<8>, I<1>, I<8>, I<8>>, std::tuple<ck::half_t, float, ck::half_t, I<3>, I<2>, I<256>, I<2>, I<128>, I<1>, I<8>, I<1>, I<8>, I<8>>,
std::tuple<ck::half_t, float, ck::half_t, I<3>, I<2>, I<256>, I<1>, I<256>, I<1>, I<8>, I<1>, I<8>, I<8>> std::tuple<ck::half_t, float, ck::half_t, I<3>, I<2>, I<256>, I<1>, I<256>, I<1>, I<8>, I<1>, I<8>, I<8>>,
std::tuple<ck::half_t, float, ck::half_t, I<3>, I<2>, I<256>, I<1>, I<256>, I<1>, I<16>, I<1>, I<8>, I<8>>,
std::tuple<ck::half_t, float, ck::half_t, I<3>, I<2>, I<256>, I<1>, I<256>, I<1>, I<32>, I<1>, I<8>, I<8>>
>; >;
// clang-format on // clang-format on
TYPED_TEST_SUITE(TestSoftmaxFP16, KernelTypes); TYPED_TEST_SUITE(TestSoftmaxFP16, KernelTypes);
......
...@@ -15,14 +15,19 @@ class TestSoftmaxFP32 : public ck::TestSoftmax<Tuple> ...@@ -15,14 +15,19 @@ class TestSoftmaxFP32 : public ck::TestSoftmax<Tuple>
// clang-format off // clang-format off
using KernelTypes = ::testing::Types< using KernelTypes = ::testing::Types<
// InDataType, AccDataType, OutDataType, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSize> // InDataType, AccDataType, OutDataType, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSize>
std::tuple<float, float, ck::half_t, I<3>, I<2>, I<256>, I<1>, I<256>, I<1>, I<8>, I<1>, I<4>, I<8>>, // mixed precision
std::tuple<float, float, float, I<3>, I<1>, I<256>, I<8>, I<32>, I<1>, I<4>, I<1>, I<4>, I<4>>, std::tuple<float, float, float, I<3>, I<1>, I<256>, I<8>, I<32>, I<1>, I<4>, I<1>, I<4>, I<4>>,
std::tuple<float, float, float, I<3>, I<1>, I<256>, I<4>, I<64>, I<1>, I<4>, I<1>, I<4>, I<4>>, std::tuple<float, float, float, I<3>, I<1>, I<256>, I<4>, I<64>, I<1>, I<4>, I<1>, I<4>, I<4>>,
std::tuple<float, float, float, I<3>, I<1>, I<256>, I<2>, I<128>, I<1>, I<4>, I<1>, I<4>, I<4>>, std::tuple<float, float, float, I<3>, I<1>, I<256>, I<2>, I<128>, I<1>, I<4>, I<1>, I<4>, I<4>>,
std::tuple<float, float, float, I<3>, I<1>, I<256>, I<1>, I<256>, I<1>, I<4>, I<1>, I<4>, I<4>>, std::tuple<float, float, float, I<3>, I<1>, I<256>, I<1>, I<256>, I<1>, I<4>, I<1>, I<4>, I<4>>,
std::tuple<float, float, float, I<3>, I<1>, I<256>, I<1>, I<256>, I<1>, I<8>, I<1>, I<4>, I<4>>,
std::tuple<float, float, float, I<3>, I<1>, I<256>, I<1>, I<256>, I<1>, I<16>, I<1>, I<4>, I<4>>,
std::tuple<float, float, float, I<3>, I<2>, I<256>, I<8>, I<32>, I<1>, I<4>, I<1>, I<4>, I<4>>, std::tuple<float, float, float, I<3>, I<2>, I<256>, I<8>, I<32>, I<1>, I<4>, I<1>, I<4>, I<4>>,
std::tuple<float, float, float, I<3>, I<2>, I<256>, I<4>, I<64>, I<1>, I<4>, I<1>, I<4>, I<4>>, std::tuple<float, float, float, I<3>, I<2>, I<256>, I<4>, I<64>, I<1>, I<4>, I<1>, I<4>, I<4>>,
std::tuple<float, float, float, I<3>, I<2>, I<256>, I<2>, I<128>, I<1>, I<4>, I<1>, I<4>, I<4>>, std::tuple<float, float, float, I<3>, I<2>, I<256>, I<2>, I<128>, I<1>, I<4>, I<1>, I<4>, I<4>>,
std::tuple<float, float, float, I<3>, I<2>, I<256>, I<1>, I<256>, I<1>, I<4>, I<1>, I<4>, I<4>> std::tuple<float, float, float, I<3>, I<2>, I<256>, I<1>, I<256>, I<1>, I<4>, I<1>, I<4>, I<4>>,
std::tuple<float, float, float, I<3>, I<2>, I<256>, I<1>, I<256>, I<1>, I<8>, I<1>, I<4>, I<4>>,
std::tuple<float, float, float, I<3>, I<2>, I<256>, I<1>, I<256>, I<1>, I<16>, I<1>, I<4>, I<4>>
>; >;
// clang-format on // clang-format on
TYPED_TEST_SUITE(TestSoftmaxFP32, KernelTypes); TYPED_TEST_SUITE(TestSoftmaxFP32, KernelTypes);
......
#include "gtest/gtest.h"
#include "test_softmax_util.hpp"
template <ck::index_t N>
using I = ck::Number<N>;
template <typename Tuple>
class TestSoftmaxINT8 : public ck::TestSoftmax<Tuple>
{
};
// clang-format off
using KernelTypes = ::testing::Types<
// InDataType, AccDataType, OutDataType, Rank, NumReduceDim, BlockSize, MThreadClusterSize, KThreadClusterSize, MThreadSliceSize, KThreadSliceSize, InSrcVectorDim, InSrcVectorSize, OutDstVectorSize>
std::tuple<int8_t, float, int8_t, I<3>, I<1>, I<256>, I<8>, I<32>, I<1>, I<16>, I<1>, I<16>, I<16>>,
std::tuple<int8_t, float, int8_t, I<3>, I<1>, I<256>, I<4>, I<64>, I<1>, I<16>, I<1>, I<16>, I<16>>,
std::tuple<int8_t, float, int8_t, I<3>, I<1>, I<256>, I<2>, I<128>, I<1>, I<16>, I<1>, I<16>, I<16>>,
std::tuple<int8_t, float, int8_t, I<3>, I<1>, I<256>, I<1>, I<256>, I<1>, I<16>, I<1>, I<16>, I<16>>,
std::tuple<int8_t, float, int8_t, I<3>, I<1>, I<256>, I<1>, I<256>, I<1>, I<32>, I<1>, I<16>, I<16>>,
std::tuple<int8_t, float, int8_t, I<3>, I<1>, I<256>, I<1>, I<256>, I<1>, I<64>, I<1>, I<16>, I<16>>,
std::tuple<int8_t, float, int8_t, I<3>, I<2>, I<256>, I<8>, I<32>, I<1>, I<16>, I<1>, I<16>, I<16>>,
std::tuple<int8_t, float, int8_t, I<3>, I<2>, I<256>, I<4>, I<64>, I<1>, I<16>, I<1>, I<16>, I<16>>,
std::tuple<int8_t, float, int8_t, I<3>, I<2>, I<256>, I<2>, I<128>, I<1>, I<16>, I<1>, I<16>, I<16>>,
std::tuple<int8_t, float, int8_t, I<3>, I<2>, I<256>, I<1>, I<256>, I<1>, I<16>, I<1>, I<16>, I<16>>,
std::tuple<int8_t, float, int8_t, I<3>, I<2>, I<256>, I<1>, I<256>, I<1>, I<32>, I<1>, I<16>, I<16>>,
std::tuple<int8_t, float, int8_t, I<3>, I<2>, I<256>, I<1>, I<256>, I<1>, I<64>, I<1>, I<16>, I<16>>
>;
// clang-format on
TYPED_TEST_SUITE(TestSoftmaxINT8, KernelTypes);
TYPED_TEST(TestSoftmaxINT8, Test_INT8) { this->Run(); }
// SPDX-License-Identifier: MIT // SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include <vector> #include <vector>
#include <iostream> #include <iostream>
#include <gtest/gtest.h> #include <gtest/gtest.h>
...@@ -16,6 +18,18 @@ ...@@ -16,6 +18,18 @@
namespace ck { namespace ck {
template <typename Range>
std::string serialize_range(const Range& range)
{
std::stringstream ss;
for(auto& r : range)
{
ss << r << ", ";
}
std::string str = ss.str();
return std::string(str.begin(), str.end() - 2);
}
template <typename Tuple> template <typename Tuple>
class TestSoftmax : public ::testing::Test class TestSoftmax : public ::testing::Test
{ {
...@@ -80,23 +94,43 @@ class TestSoftmax : public ::testing::Test ...@@ -80,23 +94,43 @@ class TestSoftmax : public ::testing::Test
auto argument_ptr = device_instance.MakeArgumentPointer(i_in_lengths, auto argument_ptr = device_instance.MakeArgumentPointer(i_in_lengths,
i_in_strides, i_in_strides,
reduce_dims, reduce_dims,
alpha, &alpha,
beta, &beta,
in_dev.GetDeviceBuffer(), in_dev.GetDeviceBuffer(),
out_dev.GetDeviceBuffer()); out_dev.GetDeviceBuffer());
if(!device_instance.IsSupportedArgument(argument_ptr.get())) if(!device_instance.IsSupportedArgument(argument_ptr.get()))
{ {
FAIL() << "Unsupported argument"; // std::cout << "Skipped due to unsupported argument: "
// << "input lengths = [" << serialize_range(in_length) << "], "
// << "scaler = [" << alpha << ", " << beta << "]." << std::endl;
return;
} }
auto invoker_ptr = device_instance.MakeInvokerPointer(); auto invoker_ptr = device_instance.MakeInvokerPointer();
invoker_ptr->Run(argument_ptr.get()); invoker_ptr->Run(argument_ptr.get());
ref_instance_invoker_.Run({in, out_ref, alpha, beta, Rank, reduce_dims}); ref_instance_invoker_.Run({in, out_ref, alpha, beta, reduce_dims});
out_dev.FromDevice(out.mData.data()); out_dev.FromDevice(out.mData.data());
EXPECT_TRUE(ck::utils::check_err(out.mData, out_ref.mData));
bool pass;
if(std::is_same<InDataType, int8_t>::value)
{
EXPECT_TRUE(pass = ck::utils::check_err(
out.mData, out_ref.mData, "Error: Incorrect results!", 0, 1));
}
else
{
EXPECT_TRUE(pass = ck::utils::check_err(out.mData, out_ref.mData));
}
if(!pass)
{
FAIL() << "Failure in input lengths = [" << serialize_range(in_length) << "], "
<< "scaler = [" << alpha << ", " << beta << "].";
}
} }
void Run() void Run()
...@@ -105,13 +139,14 @@ class TestSoftmax : public ::testing::Test ...@@ -105,13 +139,14 @@ class TestSoftmax : public ::testing::Test
{ {
for(auto scale : this->scales_) for(auto scale : this->scales_)
{ {
this->RunSingle(in_length, std::get<0>(scale), std::get<1>(scale)); this->RunSingle(in_length, scale[0], scale[1]);
} }
} }
} }
std::vector<std::vector<index_t>> in_lengths_ = {{1, 8, 128}, {2, 128, 1024}, {3, 9, 1032}}; std::vector<std::vector<index_t>> in_lengths_ = {
std::vector<std::tuple<AccDataType, AccDataType>> scales_ = {{1, 0}, {2, 2}, {0, 1}}; {1, 8, 128}, {2, 128, 1024}, {3, 9, 1032}, {4, 4, 2048}, {8, 1, 8192}};
std::vector<std::vector<AccDataType>> scales_ = {{1, 0}, {1, 1}, {0, 1}, {2, 2}};
typename ReferenceInstance::Invoker ref_instance_invoker_; typename ReferenceInstance::Invoker ref_instance_invoker_;
}; };
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment