DeformAttnBackward.cpp

#include <iostream>
#include <random>
#include <tuple>
#include <unordered_map>
#include <vector>

#include "utils.hpp"

#include <hipdnn_data_sdk/utilities/Tensor.hpp>
#include <hipdnn_data_sdk/utilities/Workspace.hpp>
#include <hipdnn_frontend.hpp>

int main()
{
    using InputType = float;
    using ComputeType = float;
    using ShapeIndexType = int64_t;

    const int64_t n = 2; // Batch size
    const int64_t nHeads = 2; // Number of attention heads
    const int64_t embedDimsPerHead = 32; // Embedding dimensions per attention head
    const int64_t nLevels = 2; // Number of feature levels
    const int64_t nPoints = 2; // Number of sampling points per attention head
    const int64_t nQueries = 32; // Number of queries

    // Randomly generate spatial shapes and level start index
    // nKeys: total number of keys across all feature levels
    // spatialShapesData: (nLevels, 2) tensor containing height and width of each feature level
    // levelStartIndexData: (nLevels,) tensor containing starting index of each feature level
    auto [nKeys, spatialShapesData, levelStartIndexData] = [=]() {
        int64_t nKeysLocal = 0;
        std::vector<ShapeIndexType> spatialShapesLocal(static_cast<size_t>(nLevels * 2));
        std::vector<ShapeIndexType> levelStartIndexLocal(static_cast<size_t>(nLevels));

        std::random_device rd;
        std::mt19937 gen(rd());
        std::uniform_int_distribution<int64_t> dist(1, 128);
        levelStartIndexLocal[0] = 0;
        for(size_t i = 0; i < spatialShapesLocal.size(); i += 2)
        {
            spatialShapesLocal[i] = dist(gen); // height
            spatialShapesLocal[i + 1] = dist(gen); // width

            if(i > 0)
            {
                levelStartIndexLocal[i / 2] = nKeysLocal;
            }
            nKeysLocal += spatialShapesLocal[i] * spatialShapesLocal[i + 1];
        }

        return std::make_tuple(nKeysLocal, spatialShapesLocal, levelStartIndexLocal);
    }();

    auto buildDeformableAttnBwdGraph = [=, nKeysLocal = nKeys](hipdnnHandle_t handle) {
        auto graph = std::make_shared<hipdnn_frontend::graph::Graph>();

        graph->set_name("deformable_attention_backward_graph")
            .set_io_data_type(hipdnn_frontend::getDataTypeEnumFromType<InputType>())
            .set_intermediate_data_type(hipdnn_frontend::getDataTypeEnumFromType<ComputeType>())
            .set_compute_data_type(hipdnn_frontend::getDataTypeEnumFromType<ComputeType>());

        auto value = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
            hipdnn_frontend::graph::Tensor_attributes()
                .set_name("value")
                .set_dim({n, nKeysLocal, nHeads, embedDimsPerHead})
                .set_stride({nKeysLocal * nHeads * embedDimsPerHead,
                             nHeads * embedDimsPerHead,
                             embedDimsPerHead,
                             1}));
        auto spatialShapes = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
            hipdnn_frontend::graph::Tensor_attributes()
                .set_name("spatial_shapes")
                .set_dim({nLevels, 2})
                .set_stride({2, 1})
                .set_data_type(hipdnn_frontend::getDataTypeEnumFromType<ShapeIndexType>()));
        auto levelStartIndex = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
            hipdnn_frontend::graph::Tensor_attributes()
                .set_name("level_start_index")
                .set_dim({nLevels})
                .set_stride({1})
                .set_data_type(hipdnn_frontend::getDataTypeEnumFromType<ShapeIndexType>()));
        auto samplingLocations = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
            hipdnn_frontend::graph::Tensor_attributes()
                .set_name("sampling_locations")
                .set_dim({n, nQueries, nHeads, nLevels, nPoints, 2})
                .set_stride({nQueries * nHeads * nLevels * nPoints * 2,
                             nHeads * nLevels * nPoints * 2,
                             nLevels * nPoints * 2,
                             nPoints * 2,
                             2,
                             1}));
        auto attentionWeights = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
            hipdnn_frontend::graph::Tensor_attributes()
                .set_name("attention_weights")
                .set_dim({n, nQueries, nHeads, nLevels, nPoints})
                .set_stride({nQueries * nHeads * nLevels * nPoints,
                             nHeads * nLevels * nPoints,
                             nLevels * nPoints,
                             nPoints,
                             1}));

        auto gradOutput = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
            hipdnn_frontend::graph::Tensor_attributes()
                .set_name("grad_output")
                .set_dim({n, nQueries, nHeads * embedDimsPerHead})
                .set_stride({nQueries * nHeads * embedDimsPerHead, nHeads * embedDimsPerHead, 1}));

        auto deformAttnDgradAttributes
            = hipdnn_frontend::graph::DeformAttnDgradAttributes().set_name(
                "deform_attn_backward_node");
        auto [gradValue, gradSamplingLoc, gradAttnWeight]
            = graph->deform_attn_dgrad(value,
                                       spatialShapes,
                                       levelStartIndex,
                                       samplingLocations,
                                       attentionWeights,
                                       gradOutput,
                                       deformAttnDgradAttributes);
        gradValue->set_output(true);
        gradSamplingLoc->set_output(true);
        gradAttnWeight->set_output(true);

        // build graph
        HIPDNN_FE_CHECK(graph->build(handle));

        return std::make_tuple(graph,
                               value,
                               spatialShapes,
                               levelStartIndex,
                               samplingLocations,
                               attentionWeights,
                               gradOutput,
                               gradValue,
                               gradSamplingLoc,
                               gradAttnWeight);
    };

    auto backend = hipdnn_frontend::detail::hipdnnBackend();
    if(!backend)
    {
        std::cout << "Creat backend failed. \n";
        return 1;
    }

    hipdnnHandle_t handle;
    HIPDNN_CHECK(backend->create(&handle));

    auto [graph,
          value,
          spatialShapes,
          levelStartIndex,
          samplingLocations,
          attentionWeights,
          gradOutput,
          gradValue,
          gradSamplingLoc,
          gradAttnWeight]
        = buildDeformableAttnBwdGraph(handle);

    // Allocate DCU memory
    hipdnn_data_sdk::utilities::Tensor<InputType> valueTensor(value->get_dim(),
                                                              value->get_stride());
    hipdnn_data_sdk::utilities::Tensor<ShapeIndexType> spatialShapesTensor(
        spatialShapes->get_dim(), spatialShapes->get_stride());
    hipdnn_data_sdk::utilities::Tensor<ShapeIndexType> levelStartIndexTensor(
        levelStartIndex->get_dim(), levelStartIndex->get_stride());
    hipdnn_data_sdk::utilities::Tensor<InputType> samplingLocationsTensor(
        samplingLocations->get_dim(), samplingLocations->get_stride());
    hipdnn_data_sdk::utilities::Tensor<InputType> attentionWeightsTensor(
        attentionWeights->get_dim(), attentionWeights->get_stride());
    hipdnn_data_sdk::utilities::Tensor<InputType> gradOutputTensor(gradOutput->get_dim(),
                                                                   gradOutput->get_stride());
    hipdnn_data_sdk::utilities::Tensor<InputType> gradValueTensor(gradValue->get_dim(),
                                                                  gradValue->get_stride());
    hipdnn_data_sdk::utilities::Tensor<InputType> gradSamplingLocTensor(
        gradSamplingLoc->get_dim(), gradSamplingLoc->get_stride());
    hipdnn_data_sdk::utilities::Tensor<InputType> gradAttnWeightTensor(
        gradAttnWeight->get_dim(), gradAttnWeight->get_stride());

    // Fill tensors with data
    spatialShapesTensor.fillWithData(spatialShapesData.data(),
                                     spatialShapesData.size() * sizeof(ShapeIndexType));
    levelStartIndexTensor.fillWithData(levelStartIndexData.data(),
                                       levelStartIndexData.size() * sizeof(ShapeIndexType));

    std::unordered_map<int64_t, void*> variantPack;
    variantPack[value->get_uid()] = valueTensor.memory().deviceData();
    variantPack[spatialShapes->get_uid()] = spatialShapesTensor.memory().deviceData();
    variantPack[levelStartIndex->get_uid()] = levelStartIndexTensor.memory().deviceData();
    variantPack[samplingLocations->get_uid()] = samplingLocationsTensor.memory().deviceData();
    variantPack[attentionWeights->get_uid()] = attentionWeightsTensor.memory().deviceData();
    variantPack[gradOutput->get_uid()] = gradOutputTensor.memory().deviceData();
    variantPack[gradValue->get_uid()] = gradValueTensor.memory().deviceData();
    variantPack[gradSamplingLoc->get_uid()] = gradSamplingLocTensor.memory().deviceData();
    variantPack[gradAttnWeight->get_uid()] = gradAttnWeightTensor.memory().deviceData();

    int64_t workspaceSize = 0;
    HIPDNN_FE_CHECK(graph->get_workspace_size(workspaceSize));
    const hipdnn_data_sdk::utilities::Workspace workspace(static_cast<size_t>(workspaceSize));

    HIPDNN_FE_CHECK(graph->execute(handle, variantPack, workspace.get()));

    std::cout << "Deformable_attention_backward graph execution complete. \n";

    HIPDNN_CHECK(backend->destroy(handle));
    return 0;
}