#include #include #include #include #include #include "utils.hpp" #include #include #include int main() { using InputType = float; using ComputeType = float; using ShapeIndexType = int64_t; const int64_t n = 2; // Batch size const int64_t nHeads = 2; // Number of attention heads const int64_t embedDimsPerHead = 32; // Embedding dimensions per attention head const int64_t nLevels = 2; // Number of feature levels const int64_t nPoints = 2; // Number of sampling points per attention head const int64_t nQueries = 32; // Number of queries // Randomly generate spatial shapes and level start index // nKeys: total number of keys across all feature levels // spatialShapesData: (nLevels, 2) tensor containing height and width of each feature level // levelStartIndexData: (nLevels,) tensor containing starting index of each feature level auto [nKeys, spatialShapesData, levelStartIndexData] = [=]() { int64_t nKeysLocal = 0; std::vector spatialShapesLocal(static_cast(nLevels * 2)); std::vector levelStartIndexLocal(static_cast(nLevels)); std::random_device rd; std::mt19937 gen(rd()); std::uniform_int_distribution dist(1, 128); levelStartIndexLocal[0] = 0; for(size_t i = 0; i < spatialShapesLocal.size(); i += 2) { spatialShapesLocal[i] = dist(gen); // height spatialShapesLocal[i + 1] = dist(gen); // width if(i > 0) { levelStartIndexLocal[i / 2] = nKeysLocal; } nKeysLocal += spatialShapesLocal[i] * spatialShapesLocal[i + 1]; } return std::make_tuple(nKeysLocal, spatialShapesLocal, levelStartIndexLocal); }(); auto buildDeformableAttnBwdGraph = [=, nKeysLocal = nKeys](hipdnnHandle_t handle) { auto graph = std::make_shared(); graph->set_name("deformable_attention_backward_graph") .set_io_data_type(hipdnn_frontend::getDataTypeEnumFromType()) .set_intermediate_data_type(hipdnn_frontend::getDataTypeEnumFromType()) .set_compute_data_type(hipdnn_frontend::getDataTypeEnumFromType()); auto value = std::make_shared( hipdnn_frontend::graph::Tensor_attributes() .set_name("value") .set_dim({n, nKeysLocal, nHeads, embedDimsPerHead}) .set_stride({nKeysLocal * nHeads * embedDimsPerHead, nHeads * embedDimsPerHead, embedDimsPerHead, 1})); auto spatialShapes = std::make_shared( hipdnn_frontend::graph::Tensor_attributes() .set_name("spatial_shapes") .set_dim({nLevels, 2}) .set_stride({2, 1}) .set_data_type(hipdnn_frontend::getDataTypeEnumFromType())); auto levelStartIndex = std::make_shared( hipdnn_frontend::graph::Tensor_attributes() .set_name("level_start_index") .set_dim({nLevels}) .set_stride({1}) .set_data_type(hipdnn_frontend::getDataTypeEnumFromType())); auto samplingLocations = std::make_shared( hipdnn_frontend::graph::Tensor_attributes() .set_name("sampling_locations") .set_dim({n, nQueries, nHeads, nLevels, nPoints, 2}) .set_stride({nQueries * nHeads * nLevels * nPoints * 2, nHeads * nLevels * nPoints * 2, nLevels * nPoints * 2, nPoints * 2, 2, 1})); auto attentionWeights = std::make_shared( hipdnn_frontend::graph::Tensor_attributes() .set_name("attention_weights") .set_dim({n, nQueries, nHeads, nLevels, nPoints}) .set_stride({nQueries * nHeads * nLevels * nPoints, nHeads * nLevels * nPoints, nLevels * nPoints, nPoints, 1})); auto gradOutput = std::make_shared( hipdnn_frontend::graph::Tensor_attributes() .set_name("grad_output") .set_dim({n, nQueries, nHeads * embedDimsPerHead}) .set_stride({nQueries * nHeads * embedDimsPerHead, nHeads * embedDimsPerHead, 1})); auto deformAttnDgradAttributes = hipdnn_frontend::graph::DeformAttnDgradAttributes().set_name( "deform_attn_backward_node"); auto [gradValue, gradSamplingLoc, gradAttnWeight] = graph->deform_attn_dgrad(value, spatialShapes, levelStartIndex, samplingLocations, attentionWeights, gradOutput, deformAttnDgradAttributes); gradValue->set_output(true); gradSamplingLoc->set_output(true); gradAttnWeight->set_output(true); // build graph HIPDNN_FE_CHECK(graph->build(handle)); return std::make_tuple(graph, value, spatialShapes, levelStartIndex, samplingLocations, attentionWeights, gradOutput, gradValue, gradSamplingLoc, gradAttnWeight); }; auto backend = hipdnn_frontend::detail::hipdnnBackend(); if(!backend) { std::cout << "Creat backend failed. \n"; return 1; } hipdnnHandle_t handle; HIPDNN_CHECK(backend->create(&handle)); auto [graph, value, spatialShapes, levelStartIndex, samplingLocations, attentionWeights, gradOutput, gradValue, gradSamplingLoc, gradAttnWeight] = buildDeformableAttnBwdGraph(handle); // Allocate DCU memory hipdnn_data_sdk::utilities::Tensor valueTensor(value->get_dim(), value->get_stride()); hipdnn_data_sdk::utilities::Tensor spatialShapesTensor( spatialShapes->get_dim(), spatialShapes->get_stride()); hipdnn_data_sdk::utilities::Tensor levelStartIndexTensor( levelStartIndex->get_dim(), levelStartIndex->get_stride()); hipdnn_data_sdk::utilities::Tensor samplingLocationsTensor( samplingLocations->get_dim(), samplingLocations->get_stride()); hipdnn_data_sdk::utilities::Tensor attentionWeightsTensor( attentionWeights->get_dim(), attentionWeights->get_stride()); hipdnn_data_sdk::utilities::Tensor gradOutputTensor(gradOutput->get_dim(), gradOutput->get_stride()); hipdnn_data_sdk::utilities::Tensor gradValueTensor(gradValue->get_dim(), gradValue->get_stride()); hipdnn_data_sdk::utilities::Tensor gradSamplingLocTensor( gradSamplingLoc->get_dim(), gradSamplingLoc->get_stride()); hipdnn_data_sdk::utilities::Tensor gradAttnWeightTensor( gradAttnWeight->get_dim(), gradAttnWeight->get_stride()); // Fill tensors with data spatialShapesTensor.fillWithData(spatialShapesData.data(), spatialShapesData.size() * sizeof(ShapeIndexType)); levelStartIndexTensor.fillWithData(levelStartIndexData.data(), levelStartIndexData.size() * sizeof(ShapeIndexType)); std::unordered_map variantPack; variantPack[value->get_uid()] = valueTensor.memory().deviceData(); variantPack[spatialShapes->get_uid()] = spatialShapesTensor.memory().deviceData(); variantPack[levelStartIndex->get_uid()] = levelStartIndexTensor.memory().deviceData(); variantPack[samplingLocations->get_uid()] = samplingLocationsTensor.memory().deviceData(); variantPack[attentionWeights->get_uid()] = attentionWeightsTensor.memory().deviceData(); variantPack[gradOutput->get_uid()] = gradOutputTensor.memory().deviceData(); variantPack[gradValue->get_uid()] = gradValueTensor.memory().deviceData(); variantPack[gradSamplingLoc->get_uid()] = gradSamplingLocTensor.memory().deviceData(); variantPack[gradAttnWeight->get_uid()] = gradAttnWeightTensor.memory().deviceData(); int64_t workspaceSize = 0; HIPDNN_FE_CHECK(graph->get_workspace_size(workspaceSize)); const hipdnn_data_sdk::utilities::Workspace workspace(static_cast(workspaceSize)); HIPDNN_FE_CHECK(graph->execute(handle, variantPack, workspace.get())); std::cout << "Deformable_attention_backward graph execution complete. \n"; HIPDNN_CHECK(backend->destroy(handle)); return 0; }