BnTraining.cpp

#include <iostream>

#include <hipdnn_data_sdk/utilities/Tensor.hpp>
#include <hipdnn_frontend.hpp>

#include "hipdnn_data_sdk/utilities/Workspace.hpp"
#include "utils.hpp"

int main()
{
    using InputType = hipdnn_data_sdk::types::half;

    const int64_t n = 16; // Batch size
    // Input
    const int64_t c = 16; // Number of channels
    const int64_t h = 16; // Height
    const int64_t w = 16; // Width

    auto buildBnTrainingGraph = [=](hipdnnHandle_t handle) {
        auto graph = std::make_shared<hipdnn_frontend::graph::Graph>();
        graph->set_name("bn_training_graph")
            .set_io_data_type(hipdnn_frontend::getDataTypeEnumFromType<InputType>())
            .set_intermediate_data_type(hipdnn_frontend::getDataTypeEnumFromType<InputType>())
            .set_compute_data_type(hipdnn_frontend::DataType::FLOAT);

        auto x = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
            hipdnn_frontend::graph::Tensor_attributes()
                .set_name("x")
                .set_dim({n, c, h, w})
                .set_stride({c * h * w, 1, c * w, c}));

        auto scale = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
            hipdnn_frontend::graph::Tensor_attributes()
                .set_name("scale")
                .set_dim({1, c, 1, 1})
                .set_stride({c, 1, c, c}));

        auto bias = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
            hipdnn_frontend::graph::Tensor_attributes()
                .set_name("bias")
                .set_dim({1, c, 1, 1})
                .set_stride({c, 1, c, c}));

        auto prevRunningMean = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
            hipdnn_frontend::graph::Tensor_attributes()
                .set_name("prev_running_mean")
                .set_dim({1, c, 1, 1})
                .set_stride({c, 1, c, c}));

        auto prevRunningVar = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
            hipdnn_frontend::graph::Tensor_attributes()
                .set_name("prev_running_variance")
                .set_dim({1, c, 1, 1})
                .set_stride({c, 1, c, c}));

        auto momentum = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
            hipdnn_frontend::graph::Tensor_attributes()
                .set_name("momentum")
                .set_dim({1, 1, 1, 1})
                .set_stride({1, 1, 1, 1}));

        auto epsilon = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
            hipdnn_frontend::graph::Tensor_attributes()
                .set_name("epsilon")
                .set_dim({1, 1, 1, 1})
                .set_stride({1, 1, 1, 1}));

        epsilon->set_value(1e-5);
        momentum->set_value(0.1);

        auto bnTrainingAttributes
            = hipdnn_frontend::graph::BatchnormAttributes()
                  .set_name("bn_training_node")
                  .set_epsilon(epsilon)
                  .set_previous_running_stats(prevRunningMean, prevRunningVar, momentum);

        auto [y, savedMean, savedInvVariance, nextRunningMean, nextRunningVar]
            = graph->batchnorm(x, scale, bias, bnTrainingAttributes);
        y->set_output(true);
        nextRunningMean->set_output(true);
        nextRunningVar->set_output(true);
        savedMean->set_output(true);
        savedInvVariance->set_output(true);

        // build graph
        HIPDNN_FE_CHECK(graph->build(handle));

        return std::make_tuple(graph,
                               x,
                               scale,
                               bias,
                               prevRunningMean,
                               prevRunningVar,
                               momentum,
                               epsilon,
                               y,
                               savedMean,
                               savedInvVariance,
                               nextRunningMean,
                               nextRunningVar);
    };

    auto backend = hipdnn_frontend::detail::hipdnnBackend();
    if(!backend)
    {
        std::cout << "Creat backend failed. \n";
        return 1;
    }

    hipdnnHandle_t handle;
    HIPDNN_CHECK(backend->create(&handle));

    auto [graph,
          x,
          scale,
          bias,
          prevRunningMean,
          prevRunningVar,
          momentum,
          epsilon,
          y,
          savedMean,
          savedInvVariance,
          nextRunningMean,
          nextRunningVar]
        = buildBnTrainingGraph(handle);

    hipdnn_data_sdk::utilities::Tensor<InputType> xTensor(x->get_dim(), x->get_stride());
    hipdnn_data_sdk::utilities::Tensor<InputType> scaleTensor(scale->get_dim());
    hipdnn_data_sdk::utilities::Tensor<InputType> biasTensor(bias->get_dim());
    hipdnn_data_sdk::utilities::Tensor<InputType> prevMeanTensor(prevRunningMean->get_dim());
    hipdnn_data_sdk::utilities::Tensor<InputType> prevVarTensor(prevRunningVar->get_dim());
    hipdnn_data_sdk::utilities::Tensor<InputType> momentumTensor(momentum->get_dim());
    hipdnn_data_sdk::utilities::Tensor<InputType> epsilonTensor(epsilon->get_dim());
    hipdnn_data_sdk::utilities::Tensor<InputType> yTensor(y->get_dim(), y->get_stride());
    hipdnn_data_sdk::utilities::Tensor<InputType> savedMeanTensor(savedMean->get_dim());
    hipdnn_data_sdk::utilities::Tensor<InputType> savedInvVarTensor(savedInvVariance->get_dim());

    std::unordered_map<int64_t, void*> variantPack;
    variantPack[x->get_uid()] = xTensor.memory().deviceData();
    variantPack[scale->get_uid()] = scaleTensor.memory().deviceData();
    variantPack[bias->get_uid()] = biasTensor.memory().deviceData();
    variantPack[prevRunningMean->get_uid()] = prevMeanTensor.memory().deviceData();
    variantPack[prevRunningVar->get_uid()] = prevVarTensor.memory().deviceData();
    variantPack[momentum->get_uid()] = momentumTensor.memory().deviceData();
    variantPack[epsilon->get_uid()] = epsilonTensor.memory().deviceData();
    variantPack[y->get_uid()] = yTensor.memory().deviceData();

    // hipDNN uses two separate memory blocks to store the statistics before and after updates,
    // whereas MIOpen only uses one memory block to store them.
    // To accommodate this difference, both the prev and next statistics in the hipDNN interface are pointed to the same memory address here,
    // and the plugin layer passes this address to MIOpen.
    variantPack[nextRunningMean->get_uid()] = prevMeanTensor.memory().deviceData();
    variantPack[nextRunningVar->get_uid()] = prevVarTensor.memory().deviceData();
    variantPack[savedMean->get_uid()] = savedMeanTensor.memory().deviceData();
    variantPack[savedInvVariance->get_uid()] = savedInvVarTensor.memory().deviceData();

    int64_t workspaceSize = 0;
    HIPDNN_FE_CHECK(graph->get_workspace_size(workspaceSize));
    const hipdnn_data_sdk::utilities::Workspace workspace(static_cast<size_t>(workspaceSize));

    HIPDNN_FE_CHECK(graph->execute(handle, variantPack, workspace.get()));

    std::cout << "Batch normalization training graph execution complete. ";

    HIPDNN_CHECK(backend->destroy(handle));
    return 0;
}