Initial

ca34d4d2 · yanjl1 · ca34d4d2 · ca34d4d2 · ca34d4d2 · ca34d4d2
Commit ca34d4d2 authored Jun 02, 2026 by yanjl1
20 changed files
--- a/cpp/conv_fusion/Int8ConvBias.cpp
+++ b/cpp/conv_fusion/Int8ConvBias.cpp
+
+#include <iostream>
+
+#include "hipdnn_frontend/Types.hpp"
+#include "utils.hpp"
+
+#include <hipdnn_data_sdk/utilities/Tensor.hpp>
+#include <hipdnn_data_sdk/utilities/Workspace.hpp>
+#include <hipdnn_frontend.hpp>
+
+int main()
+{
+    using InputType = int8_t;
+    using BiasType = float;
+
+    const int64_t n = 2; // Batch size
+    // Input
+    const int64_t c = 64; // Number of channels
+    const int64_t h = 16; // Height
+    const int64_t w = 8; // Width
+
+    // Filter
+    const int64_t k = 128; // Number of filters
+    const int64_t r = 3; // Height
+    const int64_t s = 3; // Width
+
+    // Conv param
+    const std::vector<int64_t> strides = {1, 1};
+    const std::vector<int64_t> padding = {1, 1};
+    const std::vector<int64_t> dilation = {1, 1};
+    const int64_t vectorCount = 32;
+
+    auto buildConvBiasGraph = [=](hipdnnHandle_t handle) {
+        auto graph = std::make_shared<hipdnn_frontend::graph::Graph>();
+
+        graph->set_name("int8_conv_bias_graph")
+            .set_io_data_type(hipdnn_frontend::getDataTypeEnumFromType<InputType>())
+            .set_intermediate_data_type(hipdnn_frontend::getDataTypeEnumFromType<InputType>())
+            .set_compute_data_type(hipdnn_frontend::DataType::FLOAT); //
+
+        // create conv with NCHWc32
+        auto input = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
+            hipdnn_frontend::graph::Tensor_attributes()
+                .set_name("input")
+                .set_dim({n, c, h, w})
+                .set_stride({c * h * w, h * w, w, 1})
+                .set_vector_count(vectorCount));
+
+        // create filter with NCHWc32
+        auto filter = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
+            hipdnn_frontend::graph::Tensor_attributes()
+                .set_name("filter")
+                .set_dim({k, c, r, s})
+                .set_stride({c * r * s, r * s, s, 1})
+                .set_vector_count(vectorCount));
+        auto convFpropAttributes = hipdnn_frontend::graph::ConvFpropAttributes()
+                                       .set_name("conv_fprop_node")
+                                       .set_padding(padding)
+                                       .set_stride(strides)
+                                       .set_dilation(dilation);
+        auto convOutput = graph->conv_fprop(input, filter, convFpropAttributes);
+
+        // create sub node for dequantize:zero_point_dq
+        auto zeroPointDq = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
+            hipdnn_frontend::graph::Tensor_attributes().set_name("zero_point_dq").set_value(0));
+        auto convDeqSubAttributes = hipdnn_frontend::graph::PointwiseAttributes()
+                                        .set_name("conv_deq_sub_node")
+                                        .set_mode(hipdnn_frontend::PointwiseMode_t::SUB);
+        auto convDeqSubOutput = graph->pointwise(convOutput, zeroPointDq, convDeqSubAttributes);
+
+        // create mul node for dequantize:scale_dq
+        auto scaleDq = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
+            hipdnn_frontend::graph::Tensor_attributes().set_name("scale_dq").set_value(1.0));
+        auto convDeqMulAttributes = hipdnn_frontend::graph::PointwiseAttributes()
+                                        .set_name("conv_deq_mul_node")
+                                        .set_mode(hipdnn_frontend::PointwiseMode_t::MUL);
+        auto convDeqMulOutput = graph->pointwise(convDeqSubOutput, scaleDq, convDeqMulAttributes);
+
+        // create bias
+        auto bias = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
+            hipdnn_frontend::graph::Tensor_attributes()
+                .set_name("bias")
+                .set_dim({1, k, 1, 1})
+                .set_stride({k, 1, 1, 1})
+                .set_data_type(hipdnn_frontend::getDataTypeEnumFromType<BiasType>()));
+        auto biasAttributes = hipdnn_frontend::graph::PointwiseAttributes()
+                                  .set_name("bias_node")
+                                  .set_mode(hipdnn_frontend::PointwiseMode_t::ADD);
+        auto biasOutput = graph->pointwise(convDeqMulOutput, bias, biasAttributes);
+
+        // create div node for quantize:scale_q
+        auto scaleQ = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
+            hipdnn_frontend::graph::Tensor_attributes().set_name("scale_q").set_value(1));
+        auto quantizeDivAttributes = hipdnn_frontend::graph::PointwiseAttributes()
+                                         .set_name("quantize_div_node")
+                                         .set_mode(hipdnn_frontend::PointwiseMode_t::DIV);
+        auto quantizeDivOutput = graph->pointwise(biasOutput, scaleQ, quantizeDivAttributes);
+
+        // cretate  add node for quantize:zero_point_q.
+        auto zeroPointQ = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
+            hipdnn_frontend::graph::Tensor_attributes().set_name("zero_point_q").set_value(0));
+        auto quantizeAddAttributes = hipdnn_frontend::graph::PointwiseAttributes()
+                                         .set_name("quantize_add_node")
+                                         .set_mode(hipdnn_frontend::PointwiseMode_t::ADD);
+        auto quantizeOutput
+            = graph->pointwise(quantizeDivOutput, zeroPointQ, quantizeAddAttributes);
+        quantizeOutput->set_output(true).set_vector_count(vectorCount);
+
+        // build graph
+        HIPDNN_FE_CHECK(graph->build(handle));
+
+        return std::make_tuple(graph, input, filter, bias, quantizeOutput);
+    };
+
+    auto backend = hipdnn_frontend::detail::hipdnnBackend();
+    if(!backend)
+    {
+        std::cout << "Creat backend failed. \n";
+        return 1;
+    }
+
+    hipdnnHandle_t handle;
+    HIPDNN_CHECK(backend->create(&handle));
+
+    auto [graph, input, filter, bias, output] = buildConvBiasGraph(handle);
+
+    // Allocate DCU memory
+    hipdnn_data_sdk::utilities::Tensor<InputType> inputTensor(input->get_dim(),
+                                                              input->get_stride());
+    hipdnn_data_sdk::utilities::Tensor<InputType> wTensor(filter->get_dim(), filter->get_stride());
+    hipdnn_data_sdk::utilities::Tensor<InputType> biasTensor(bias->get_dim(), bias->get_stride());
+    hipdnn_data_sdk::utilities::Tensor<InputType> outTensor(output->get_dim(),
+                                                            output->get_stride());
+
+    std::unordered_map<int64_t, void*> variantPack;
+    variantPack[input->get_uid()] = inputTensor.memory().deviceData();
+    variantPack[filter->get_uid()] = wTensor.memory().deviceData();
+    variantPack[bias->get_uid()] = biasTensor.memory().deviceData();
+    variantPack[output->get_uid()] = outTensor.memory().deviceData();
+
+    int64_t workspaceSize = 0;
+    HIPDNN_FE_CHECK(graph->get_workspace_size(workspaceSize));
+    const hipdnn_data_sdk::utilities::Workspace workspace(static_cast<size_t>(workspaceSize));
+
+    HIPDNN_FE_CHECK(graph->execute(handle, variantPack, workspace.get()));
+
+    std::cout << "int8_convolution_bias graph execution complete. \n";
+
+    HIPDNN_CHECK(backend->destroy(handle));
+    return 0;
+}
--- a/cpp/conv_fusion/Int8ConvBiasAdd.cpp
+++ b/cpp/conv_fusion/Int8ConvBiasAdd.cpp
+
+#include <iostream>
+
+#include "hipdnn_frontend/Types.hpp"
+#include "utils.hpp"
+
+#include <hipdnn_data_sdk/utilities/Tensor.hpp>
+#include <hipdnn_data_sdk/utilities/Workspace.hpp>
+#include <hipdnn_frontend.hpp>
+
+int main()
+{
+    using InputType = int8_t;
+    using BiasType = float;
+
+    const int64_t n = 2; // Batch size
+    // Input
+    const int64_t c = 64; // Number of channels
+    const int64_t h = 16; // Height
+    const int64_t w = 8; // Width
+
+    // Filter
+    const int64_t k = 128; // Number of filters
+    const int64_t r = 3; // Height
+    const int64_t s = 3; // Width
+
+    // Conv param
+    const std::vector<int64_t> strides = {1, 1};
+    const std::vector<int64_t> padding = {1, 1};
+    const std::vector<int64_t> dilation = {1, 1};
+
+    const int64_t outH = ((h + 2 * padding[0] - (dilation[0] * (r - 1) + 1)) / strides[0]) + 1;
+    const int64_t outW = ((w + 2 * padding[1] - (dilation[1] * (s - 1) + 1)) / strides[1]) + 1;
+
+    const int64_t vectorCount = 32;
+
+    auto buildConvBiasGraph = [=](hipdnnHandle_t handle) {
+        auto graph = std::make_shared<hipdnn_frontend::graph::Graph>();
+
+        graph->set_name("int8_conv_bias_add_graph")
+            .set_io_data_type(hipdnn_frontend::getDataTypeEnumFromType<InputType>())
+            .set_intermediate_data_type(hipdnn_frontend::getDataTypeEnumFromType<InputType>())
+            .set_compute_data_type(hipdnn_frontend::DataType::FLOAT); //
+
+        // create conv with NCHWc32
+        auto input = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
+            hipdnn_frontend::graph::Tensor_attributes()
+                .set_name("input")
+                .set_dim({n, c, h, w})
+                .set_stride({c * h * w, h * w, w, 1})
+                .set_vector_count(vectorCount));
+
+        // create filter with NCHWc32
+        auto filter = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
+            hipdnn_frontend::graph::Tensor_attributes()
+                .set_name("filter")
+                .set_dim({k, c, r, s})
+                .set_stride({c * r * s, r * s, s, 1})
+                .set_vector_count(vectorCount));
+        auto convFpropAttributes = hipdnn_frontend::graph::ConvFpropAttributes()
+                                       .set_name("conv_fprop_node")
+                                       .set_padding(padding)
+                                       .set_stride(strides)
+                                       .set_dilation(dilation);
+        auto convOutput = graph->conv_fprop(input, filter, convFpropAttributes);
+
+        // create sub node for dequantize:zero_point_dq
+        auto zeroPointDq = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
+            hipdnn_frontend::graph::Tensor_attributes().set_name("zero_point_dq").set_value(0));
+        auto convDeqSubAttributes = hipdnn_frontend::graph::PointwiseAttributes()
+                                        .set_name("conv_deq_sub_node")
+                                        .set_mode(hipdnn_frontend::PointwiseMode_t::SUB);
+        auto convDeqSubOutput = graph->pointwise(convOutput, zeroPointDq, convDeqSubAttributes);
+
+        // create mul node for dequantize:scale_dq
+        auto scaleDq = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
+            hipdnn_frontend::graph::Tensor_attributes().set_name("scale_dq").set_value(2.0));
+        auto convDeqMulAttributes = hipdnn_frontend::graph::PointwiseAttributes()
+                                        .set_name("conv_deq_mul_node")
+                                        .set_mode(hipdnn_frontend::PointwiseMode_t::MUL);
+        auto convDeqMulOutput = graph->pointwise(convDeqSubOutput, scaleDq, convDeqMulAttributes);
+
+        // create bias
+        auto bias = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
+            hipdnn_frontend::graph::Tensor_attributes()
+                .set_name("bias")
+                .set_dim({1, k, 1, 1})
+                .set_stride({k, 1, 1, 1})
+                .set_data_type(hipdnn_frontend::getDataTypeEnumFromType<BiasType>()));
+        auto biasAttributes = hipdnn_frontend::graph::PointwiseAttributes()
+                                  .set_name("bias_node")
+                                  .set_mode(hipdnn_frontend::PointwiseMode_t::ADD);
+        auto biasOutput = graph->pointwise(convDeqMulOutput, bias, biasAttributes);
+
+        // create add
+        auto add = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
+            hipdnn_frontend::graph::Tensor_attributes()
+                .set_name("add")
+                .set_dim({n, k, outH, outW})
+                .set_stride({outH * outW * k, outH * outW, outW, 1})
+                .set_vector_count(vectorCount));
+
+        // create sub node for dequantize:zero_point_dq_add
+        auto zeroPointDqAdd = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
+            hipdnn_frontend::graph::Tensor_attributes().set_name("zero_point_dq_add").set_value(0));
+        auto addDeqSubAttributes = hipdnn_frontend::graph::PointwiseAttributes()
+                                       .set_name("add_deq_sub_node")
+                                       .set_mode(hipdnn_frontend::PointwiseMode_t::SUB);
+        auto addDeqSubOutput = graph->pointwise(add, zeroPointDqAdd, addDeqSubAttributes);
+
+        // create mul node for dequantize:scale_dq_add
+        auto scaleDqAdd = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
+            hipdnn_frontend::graph::Tensor_attributes().set_name("scale_dq_add").set_value(1.0));
+        auto addDeqMulAttributes = hipdnn_frontend::graph::PointwiseAttributes()
+                                       .set_name("add_deq_mul_node")
+                                       .set_mode(hipdnn_frontend::PointwiseMode_t::MUL);
+        auto addDeqMulOutput = graph->pointwise(addDeqSubOutput, scaleDqAdd, addDeqMulAttributes);
+
+        //  create add
+        auto addAttributes = hipdnn_frontend::graph::PointwiseAttributes()
+                                 .set_name("add_node")
+                                 .set_mode(hipdnn_frontend::PointwiseMode_t::ADD);
+        auto addOutput = graph->pointwise(biasOutput, addDeqMulOutput, addAttributes);
+
+        // create div node for quantize:scale_q
+        auto scaleQ = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
+            hipdnn_frontend::graph::Tensor_attributes().set_name("scale_q").set_value(1));
+        auto quantizeDivAttributes = hipdnn_frontend::graph::PointwiseAttributes()
+                                         .set_name("quantize_div_node")
+                                         .set_mode(hipdnn_frontend::PointwiseMode_t::DIV);
+        auto quantizeDivOutput = graph->pointwise(addOutput, scaleQ, quantizeDivAttributes);
+
+        // cretate  add node for quantize:zero_point_q.
+        auto zeroPointQ = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
+            hipdnn_frontend::graph::Tensor_attributes().set_name("zero_point_q").set_value(0));
+        auto quantizeAddAttributes = hipdnn_frontend::graph::PointwiseAttributes()
+                                         .set_name("quantize_add_node")
+                                         .set_mode(hipdnn_frontend::PointwiseMode_t::ADD);
+        auto quantizeOutput
+            = graph->pointwise(quantizeDivOutput, zeroPointQ, quantizeAddAttributes);
+        quantizeOutput->set_output(true).set_vector_count(vectorCount);
+
+        // build graph
+        HIPDNN_FE_CHECK(graph->build(handle));
+
+        return std::make_tuple(graph, input, filter, bias, add, quantizeOutput);
+    };
+
+    auto backend = hipdnn_frontend::detail::hipdnnBackend();
+    if(!backend)
+    {
+        std::cout << "Creat backend failed. \n";
+        return 1;
+    }
+
+    hipdnnHandle_t handle;
+    HIPDNN_CHECK(backend->create(&handle));
+
+    auto [graph, input, filter, bias, add, output] = buildConvBiasGraph(handle);
+
+    // Allocate DCU memory
+    hipdnn_data_sdk::utilities::Tensor<InputType> inputTensor(input->get_dim(),
+                                                              input->get_stride());
+    hipdnn_data_sdk::utilities::Tensor<InputType> wTensor(filter->get_dim(), filter->get_stride());
+    hipdnn_data_sdk::utilities::Tensor<InputType> biasTensor(bias->get_dim(), bias->get_stride());
+    hipdnn_data_sdk::utilities::Tensor<InputType> addTensor(add->get_dim(), add->get_stride());
+    hipdnn_data_sdk::utilities::Tensor<InputType> outTensor(output->get_dim(),
+                                                            output->get_stride());
+
+    std::unordered_map<int64_t, void*> variantPack;
+    variantPack[input->get_uid()] = inputTensor.memory().deviceData();
+    variantPack[filter->get_uid()] = wTensor.memory().deviceData();
+    variantPack[bias->get_uid()] = biasTensor.memory().deviceData();
+    variantPack[add->get_uid()] = addTensor.memory().deviceData();
+    variantPack[output->get_uid()] = outTensor.memory().deviceData();
+
+    int64_t workspaceSize = 0;
+    HIPDNN_FE_CHECK(graph->get_workspace_size(workspaceSize));
+    const hipdnn_data_sdk::utilities::Workspace workspace(static_cast<size_t>(workspaceSize));
+
+    HIPDNN_FE_CHECK(graph->execute(handle, variantPack, workspace.get()));
+
+    std::cout << "int8_convolution_bias_add graph execution complete. \n";
+
+    HIPDNN_CHECK(backend->destroy(handle));
+    return 0;
+}
--- a/cpp/conv_fusion/Int8ConvBiasAddRelu.cpp
+++ b/cpp/conv_fusion/Int8ConvBiasAddRelu.cpp
+
+#include <iostream>
+
+#include "hipdnn_frontend/Types.hpp"
+#include "utils.hpp"
+
+#include <hipdnn_data_sdk/utilities/Tensor.hpp>
+#include <hipdnn_data_sdk/utilities/Workspace.hpp>
+#include <hipdnn_frontend.hpp>
+
+int main()
+{
+    using InputType = int8_t;
+    using BiasType = float;
+
+    const int64_t n = 2; // Batch size
+    // Input
+    const int64_t c = 64; // Number of channels
+    const int64_t h = 16; // Height
+    const int64_t w = 8; // Width
+
+    // Filter
+    const int64_t k = 128; // Number of filters
+    const int64_t r = 3; // Height
+    const int64_t s = 3; // Width
+
+    // Conv param
+    const std::vector<int64_t> strides = {1, 1};
+    const std::vector<int64_t> padding = {1, 1};
+    const std::vector<int64_t> dilation = {1, 1};
+
+    const int64_t outH = ((h + 2 * padding[0] - (dilation[0] * (r - 1) + 1)) / strides[0]) + 1;
+    const int64_t outW = ((w + 2 * padding[1] - (dilation[1] * (s - 1) + 1)) / strides[1]) + 1;
+
+    const int64_t vectorCount = 32;
+
+    auto buildConvBiasGraph = [=](hipdnnHandle_t handle) {
+        auto graph = std::make_shared<hipdnn_frontend::graph::Graph>();
+
+        graph->set_name("int8_conv_bias_add_relu_graph")
+            .set_io_data_type(hipdnn_frontend::getDataTypeEnumFromType<InputType>())
+            .set_intermediate_data_type(hipdnn_frontend::getDataTypeEnumFromType<InputType>())
+            .set_compute_data_type(hipdnn_frontend::DataType::FLOAT); //
+
+        // create conv with NCHWc32
+        auto input = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
+            hipdnn_frontend::graph::Tensor_attributes()
+                .set_name("input")
+                .set_dim({n, c, h, w})
+                .set_stride({c * h * w, h * w, w, 1})
+                .set_vector_count(vectorCount));
+
+        // create filter with NCHWc32
+        auto filter = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
+            hipdnn_frontend::graph::Tensor_attributes()
+                .set_name("filter")
+                .set_dim({k, c, r, s})
+                .set_stride({c * r * s, r * s, s, 1})
+                .set_vector_count(vectorCount));
+        auto convFpropAttributes = hipdnn_frontend::graph::ConvFpropAttributes()
+                                       .set_name("conv_fprop_node")
+                                       .set_padding(padding)
+                                       .set_stride(strides)
+                                       .set_dilation(dilation);
+        auto convOutput = graph->conv_fprop(input, filter, convFpropAttributes);
+
+        // create sub node for dequantize:zero_point_dq
+        auto zeroPointDq = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
+            hipdnn_frontend::graph::Tensor_attributes().set_name("zero_point_dq").set_value(0));
+        auto convDeqSubAttributes = hipdnn_frontend::graph::PointwiseAttributes()
+                                        .set_name("conv_deq_sub_node")
+                                        .set_mode(hipdnn_frontend::PointwiseMode_t::SUB);
+        auto convDeqSubOutput = graph->pointwise(convOutput, zeroPointDq, convDeqSubAttributes);
+
+        // create mul node for dequantize:scale_dq
+        auto scaleDq = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
+            hipdnn_frontend::graph::Tensor_attributes().set_name("scale_dq").set_value(1.0));
+        auto convDeqMulAttributes = hipdnn_frontend::graph::PointwiseAttributes()
+                                        .set_name("conv_deq_mul_node")
+                                        .set_mode(hipdnn_frontend::PointwiseMode_t::MUL);
+        auto convDeqMulOutput = graph->pointwise(convDeqSubOutput, scaleDq, convDeqMulAttributes);
+
+        // create bias
+        auto bias = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
+            hipdnn_frontend::graph::Tensor_attributes()
+                .set_name("bias")
+                .set_dim({1, k, 1, 1})
+                .set_stride({k, 1, 1, 1})
+                .set_data_type(hipdnn_frontend::getDataTypeEnumFromType<BiasType>()));
+        auto biasAttributes = hipdnn_frontend::graph::PointwiseAttributes()
+                                  .set_name("bias_node")
+                                  .set_mode(hipdnn_frontend::PointwiseMode_t::ADD);
+        auto biasOutput = graph->pointwise(convDeqMulOutput, bias, biasAttributes);
+
+        // create add
+        auto add = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
+            hipdnn_frontend::graph::Tensor_attributes()
+                .set_name("add")
+                .set_dim({n, k, outH, outW})
+                .set_stride({outH * outW * k, outH * outW, outW, 1})
+                .set_vector_count(vectorCount));
+
+        // create sub node for dequantize:zero_point_dq_add
+        auto zeroPointDqAdd = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
+            hipdnn_frontend::graph::Tensor_attributes().set_name("zero_point_dq_add").set_value(0));
+        auto addDeqSubAttributes = hipdnn_frontend::graph::PointwiseAttributes()
+                                       .set_name("add_deq_sub_node")
+                                       .set_mode(hipdnn_frontend::PointwiseMode_t::SUB);
+        auto addDeqSubOutput = graph->pointwise(add, zeroPointDqAdd, addDeqSubAttributes);
+
+        // create mul node for dequantize:scale_dq_add
+        auto scaleDqAdd = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
+            hipdnn_frontend::graph::Tensor_attributes().set_name("scale_dq_add").set_value(1.0));
+        auto addDeqMulAttributes = hipdnn_frontend::graph::PointwiseAttributes()
+                                       .set_name("add_deq_mul_node")
+                                       .set_mode(hipdnn_frontend::PointwiseMode_t::MUL);
+        auto addDeqMulOutput = graph->pointwise(addDeqSubOutput, scaleDqAdd, addDeqMulAttributes);
+
+        //  create add
+        auto addAttributes = hipdnn_frontend::graph::PointwiseAttributes()
+                                 .set_name("add_node")
+                                 .set_mode(hipdnn_frontend::PointwiseMode_t::ADD);
+        auto addOutput = graph->pointwise(biasOutput, addDeqMulOutput, addAttributes);
+
+        // create relu
+        auto reluAttributes = hipdnn_frontend::graph::PointwiseAttributes()
+                                  .set_name("relu_node")
+                                  .set_mode(hipdnn_frontend::PointwiseMode_t::RELU_FWD);
+        auto reluOutput = graph->pointwise(addOutput, reluAttributes);
+
+        // create div node for quantize:scale_q
+        auto scaleQ = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
+            hipdnn_frontend::graph::Tensor_attributes().set_name("scale_q").set_value(1));
+        auto quantizeDivAttributes = hipdnn_frontend::graph::PointwiseAttributes()
+                                         .set_name("quantize_div_node")
+                                         .set_mode(hipdnn_frontend::PointwiseMode_t::DIV);
+        auto quantizeDivOutput = graph->pointwise(reluOutput, scaleQ, quantizeDivAttributes);
+
+        // cretate  add node for quantize:zero_point_q.
+        auto zeroPointQ = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
+            hipdnn_frontend::graph::Tensor_attributes().set_name("zero_point_q").set_value(0));
+        auto quantizeAddAttributes = hipdnn_frontend::graph::PointwiseAttributes()
+                                         .set_name("quantize_add_node")
+                                         .set_mode(hipdnn_frontend::PointwiseMode_t::ADD);
+        auto quantizeOutput
+            = graph->pointwise(quantizeDivOutput, zeroPointQ, quantizeAddAttributes);
+        quantizeOutput->set_output(true).set_vector_count(vectorCount);
+
+        // build graph
+        HIPDNN_FE_CHECK(graph->build(handle));
+
+        return std::make_tuple(graph, input, filter, bias, add, quantizeOutput);
+    };
+
+    auto backend = hipdnn_frontend::detail::hipdnnBackend();
+    if(!backend)
+    {
+        std::cout << "Creat backend failed. \n";
+        return 1;
+    }
+
+    hipdnnHandle_t handle;
+    HIPDNN_CHECK(backend->create(&handle));
+
+    auto [graph, input, filter, bias, add, output] = buildConvBiasGraph(handle);
+
+    // Allocate DCU memory
+    hipdnn_data_sdk::utilities::Tensor<InputType> inputTensor(input->get_dim(),
+                                                              input->get_stride());
+    hipdnn_data_sdk::utilities::Tensor<InputType> wTensor(filter->get_dim(), filter->get_stride());
+    hipdnn_data_sdk::utilities::Tensor<InputType> biasTensor(bias->get_dim(), bias->get_stride());
+    hipdnn_data_sdk::utilities::Tensor<InputType> addTensor(add->get_dim(), add->get_stride());
+    hipdnn_data_sdk::utilities::Tensor<InputType> outTensor(output->get_dim(),
+                                                            output->get_stride());
+
+    std::unordered_map<int64_t, void*> variantPack;
+    variantPack[input->get_uid()] = inputTensor.memory().deviceData();
+    variantPack[filter->get_uid()] = wTensor.memory().deviceData();
+    variantPack[bias->get_uid()] = biasTensor.memory().deviceData();
+    variantPack[add->get_uid()] = addTensor.memory().deviceData();
+    variantPack[output->get_uid()] = outTensor.memory().deviceData();
+
+    int64_t workspaceSize = 0;
+    HIPDNN_FE_CHECK(graph->get_workspace_size(workspaceSize));
+    const hipdnn_data_sdk::utilities::Workspace workspace(static_cast<size_t>(workspaceSize));
+
+    HIPDNN_FE_CHECK(graph->execute(handle, variantPack, workspace.get()));
+
+    std::cout << "int8_convolution_bias_add_relu graph execution complete. \n";
+
+    HIPDNN_CHECK(backend->destroy(handle));
+    return 0;
+}
--- a/cpp/conv_fusion/Int8ConvBiasRelu.cpp
+++ b/cpp/conv_fusion/Int8ConvBiasRelu.cpp
+
+#include <iostream>
+
+#include "hipdnn_frontend/Types.hpp"
+#include "utils.hpp"
+
+#include <hipdnn_data_sdk/utilities/Tensor.hpp>
+#include <hipdnn_data_sdk/utilities/Workspace.hpp>
+#include <hipdnn_frontend.hpp>
+
+int main()
+{
+    using InputType = int8_t;
+    using BiasType = float;
+
+    const int64_t n = 2; // Batch size
+    // Input
+    const int64_t c = 64; // Number of channels
+    const int64_t h = 16; // Height
+    const int64_t w = 8; // Width
+
+    // Filter
+    const int64_t k = 128; // Number of filters
+    const int64_t r = 3; // Height
+    const int64_t s = 3; // Width
+
+    // Conv param
+    const std::vector<int64_t> strides = {1, 1};
+    const std::vector<int64_t> padding = {1, 1};
+    const std::vector<int64_t> dilation = {1, 1};
+    const int64_t vectorCount = 32;
+
+    auto buildConvBiasGraph = [=](hipdnnHandle_t handle) {
+        auto graph = std::make_shared<hipdnn_frontend::graph::Graph>();
+
+        graph->set_name("int8_conv_bias_relu_graph")
+            .set_io_data_type(hipdnn_frontend::getDataTypeEnumFromType<InputType>())
+            .set_intermediate_data_type(hipdnn_frontend::getDataTypeEnumFromType<InputType>())
+            .set_compute_data_type(hipdnn_frontend::DataType::FLOAT); //
+
+        // create conv with NCHWc32
+        auto input = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
+            hipdnn_frontend::graph::Tensor_attributes()
+                .set_name("input")
+                .set_dim({n, c, h, w})
+                .set_stride({c * h * w, h * w, w, 1})
+                .set_vector_count(vectorCount));
+
+        // create filter with NCHWc32
+        auto filter = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
+            hipdnn_frontend::graph::Tensor_attributes()
+                .set_name("filter")
+                .set_dim({k, c, r, s})
+                .set_stride({c * r * s, r * s, s, 1})
+                .set_vector_count(vectorCount));
+        auto convFpropAttributes = hipdnn_frontend::graph::ConvFpropAttributes()
+                                       .set_name("conv_fprop_node")
+                                       .set_padding(padding)
+                                       .set_stride(strides)
+                                       .set_dilation(dilation);
+        auto convOutput = graph->conv_fprop(input, filter, convFpropAttributes);
+
+        // create sub node for dequantize:zero_point_dq
+        auto zeroPointDq = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
+            hipdnn_frontend::graph::Tensor_attributes().set_name("zero_point_dq").set_value(0));
+        auto convDeqSubAttributes = hipdnn_frontend::graph::PointwiseAttributes()
+                                        .set_name("conv_deq_sub_node")
+                                        .set_mode(hipdnn_frontend::PointwiseMode_t::SUB);
+        auto convDeqSubOutput = graph->pointwise(convOutput, zeroPointDq, convDeqSubAttributes);
+
+        // create mul node for dequantize:scale_dq
+        auto scaleDq = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
+            hipdnn_frontend::graph::Tensor_attributes().set_name("scale_dq").set_value(1.0));
+        auto convDeqMulAttributes = hipdnn_frontend::graph::PointwiseAttributes()
+                                        .set_name("conv_deq_mul_node")
+                                        .set_mode(hipdnn_frontend::PointwiseMode_t::MUL);
+        auto convDeqMulOutput = graph->pointwise(convDeqSubOutput, scaleDq, convDeqMulAttributes);
+
+        // create bias
+        auto bias = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
+            hipdnn_frontend::graph::Tensor_attributes()
+                .set_name("bias")
+                .set_dim({1, k, 1, 1})
+                .set_stride({k, 1, 1, 1})
+                .set_data_type(hipdnn_frontend::getDataTypeEnumFromType<BiasType>()));
+        auto biasAttributes = hipdnn_frontend::graph::PointwiseAttributes()
+                                  .set_name("bias_node")
+                                  .set_mode(hipdnn_frontend::PointwiseMode_t::ADD);
+        auto biasOutput = graph->pointwise(convDeqMulOutput, bias, biasAttributes);
+
+        // create relu
+        auto reluAttributes = hipdnn_frontend::graph::PointwiseAttributes()
+                                  .set_name("relu_node")
+                                  .set_mode(hipdnn_frontend::PointwiseMode_t::RELU_FWD);
+        auto reluOutput = graph->pointwise(biasOutput, reluAttributes);
+
+        // create div node for quantize:scale_q
+        auto scaleQ = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
+            hipdnn_frontend::graph::Tensor_attributes().set_name("scale_q").set_value(1));
+        auto quantizeDivAttributes = hipdnn_frontend::graph::PointwiseAttributes()
+                                         .set_name("quantize_div_node")
+                                         .set_mode(hipdnn_frontend::PointwiseMode_t::DIV);
+        auto quantizeDivOutput = graph->pointwise(reluOutput, scaleQ, quantizeDivAttributes);
+
+        // cretate  add node for quantize:zero_point_q.
+        auto zeroPointQ = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
+            hipdnn_frontend::graph::Tensor_attributes().set_name("zero_point_q").set_value(0));
+        auto quantizeAddAttributes = hipdnn_frontend::graph::PointwiseAttributes()
+                                         .set_name("quantize_add_node")
+                                         .set_mode(hipdnn_frontend::PointwiseMode_t::ADD);
+        auto quantizeOutput
+            = graph->pointwise(quantizeDivOutput, zeroPointQ, quantizeAddAttributes);
+        quantizeOutput->set_output(true).set_vector_count(vectorCount);
+
+        // build graph
+        HIPDNN_FE_CHECK(graph->build(handle));
+
+        return std::make_tuple(graph, input, filter, bias, quantizeOutput);
+    };
+
+    auto backend = hipdnn_frontend::detail::hipdnnBackend();
+    if(!backend)
+    {
+        std::cout << "Creat backend failed. \n";
+        return 1;
+    }
+
+    hipdnnHandle_t handle;
+    HIPDNN_CHECK(backend->create(&handle));
+
+    auto [graph, input, filter, bias, output] = buildConvBiasGraph(handle);
+
+    // Allocate DCU memory
+    hipdnn_data_sdk::utilities::Tensor<InputType> inputTensor(input->get_dim(),
+                                                              input->get_stride());
+    hipdnn_data_sdk::utilities::Tensor<InputType> wTensor(filter->get_dim(), filter->get_stride());
+    hipdnn_data_sdk::utilities::Tensor<InputType> biasTensor(bias->get_dim(), bias->get_stride());
+    hipdnn_data_sdk::utilities::Tensor<InputType> outTensor(output->get_dim(),
+                                                            output->get_stride());
+
+    std::unordered_map<int64_t, void*> variantPack;
+    variantPack[input->get_uid()] = inputTensor.memory().deviceData();
+    variantPack[filter->get_uid()] = wTensor.memory().deviceData();
+    variantPack[bias->get_uid()] = biasTensor.memory().deviceData();
+    variantPack[output->get_uid()] = outTensor.memory().deviceData();
+
+    int64_t workspaceSize = 0;
+    HIPDNN_FE_CHECK(graph->get_workspace_size(workspaceSize));
+    const hipdnn_data_sdk::utilities::Workspace workspace(static_cast<size_t>(workspaceSize));
+
+    HIPDNN_FE_CHECK(graph->execute(handle, variantPack, workspace.get()));
+
+    std::cout << "int8_convolution_bias_relu graph execution complete. \n";
+
+    HIPDNN_CHECK(backend->destroy(handle));
+    return 0;
+}
--- a/cpp/conv_fusion/Int8ConvBiasReluAdd.cpp
+++ b/cpp/conv_fusion/Int8ConvBiasReluAdd.cpp
+
+#include <iostream>
+
+#include "hipdnn_frontend/Types.hpp"
+#include "utils.hpp"
+
+#include <hipdnn_data_sdk/utilities/Tensor.hpp>
+#include <hipdnn_data_sdk/utilities/Workspace.hpp>
+#include <hipdnn_frontend.hpp>
+
+int main()
+{
+    using InputType = int8_t;
+    using BiasType = float;
+
+    const int64_t n = 2; // Batch size
+    // Input
+    const int64_t c = 3; // Number of channels
+    const int64_t h = 16; // Height
+    const int64_t w = 8; // Width
+
+    // Filter
+    const int64_t k = 128; // Number of filters
+    const int64_t r = 3; // Height
+    const int64_t s = 3; // Width
+
+    // Conv param
+    const std::vector<int64_t> strides = {1, 1};
+    const std::vector<int64_t> padding = {0, 0};
+    const std::vector<int64_t> dilation = {1, 1};
+
+    auto buildConvBiasGraph = [=](hipdnnHandle_t handle) {
+        auto graph = std::make_shared<hipdnn_frontend::graph::Graph>();
+
+        graph->set_name("int8_conv_bias_relu_add graph")
+            .set_io_data_type(hipdnn_frontend::getDataTypeEnumFromType<InputType>())
+            .set_intermediate_data_type(hipdnn_frontend::getDataTypeEnumFromType<InputType>())
+            .set_compute_data_type(hipdnn_frontend::DataType::FLOAT); //
+
+        // create conv with NHWC
+        auto input = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
+            hipdnn_frontend::graph::Tensor_attributes()
+                .set_name("input")
+                .set_dim({n, c, h, w})
+                .set_stride({c * h * w, 1, c * w, c}));
+
+        // create filter with NHWC
+        auto filter = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
+            hipdnn_frontend::graph::Tensor_attributes()
+                .set_name("filter")
+                .set_dim({k, c, r, s})
+                .set_stride({c * r * s, 1, c * s, c}));
+        auto convFpropAttributes = hipdnn_frontend::graph::ConvFpropAttributes()
+                                       .set_name("conv_fprop_node")
+                                       .set_padding(padding)
+                                       .set_stride(strides)
+                                       .set_dilation(dilation);
+        auto convOutput = graph->conv_fprop(input, filter, convFpropAttributes);
+
+        // create sub node for dequantize:zero_point_dq
+        auto zeroPointDq = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
+            hipdnn_frontend::graph::Tensor_attributes().set_name("zero_point_dq").set_value(0));
+        auto convDeqSubAttributes = hipdnn_frontend::graph::PointwiseAttributes()
+                                        .set_name("conv_deq_sub_node")
+                                        .set_mode(hipdnn_frontend::PointwiseMode_t::SUB);
+        auto convDeqSubOutput = graph->pointwise(convOutput, zeroPointDq, convDeqSubAttributes);
+
+        // create mul node for dequantize:scale_dq
+        auto scaleDq = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
+            hipdnn_frontend::graph::Tensor_attributes().set_name("scale_dq").set_value(1.0));
+        auto convDeqMulAttributes = hipdnn_frontend::graph::PointwiseAttributes()
+                                        .set_name("conv_deq_mul_node")
+                                        .set_mode(hipdnn_frontend::PointwiseMode_t::MUL);
+        auto convDeqMulOutput = graph->pointwise(convDeqSubOutput, scaleDq, convDeqMulAttributes);
+
+        // create bias
+        auto bias = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
+            hipdnn_frontend::graph::Tensor_attributes()
+                .set_name("bias")
+                .set_dim({1, k, 1, 1})
+                .set_stride({k, 1, k, k})
+                .set_data_type(hipdnn_frontend::getDataTypeEnumFromType<BiasType>()));
+        auto biasAttributes = hipdnn_frontend::graph::PointwiseAttributes()
+                                  .set_name("bias_node")
+                                  .set_mode(hipdnn_frontend::PointwiseMode_t::ADD);
+        auto biasOutput = graph->pointwise(convDeqMulOutput, bias, biasAttributes);
+
+        // create relu
+        auto reluAttributes = hipdnn_frontend::graph::PointwiseAttributes()
+                                  .set_name("relu_node")
+                                  .set_mode(hipdnn_frontend::PointwiseMode_t::RELU_FWD);
+        auto reluOutput = graph->pointwise(biasOutput, reluAttributes);
+
+        // create add
+        auto add = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
+            hipdnn_frontend::graph::Tensor_attributes()
+                .set_name("add")
+                .set_dim({1, k, 1, 1})
+                .set_stride({k, 1, k, k}));
+
+        // create sub node for dequantize:zero_point_dq_add
+        auto zeroPointDqAdd = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
+            hipdnn_frontend::graph::Tensor_attributes().set_name("zero_point_dq_add").set_value(0));
+        auto addDeqSubAttributes = hipdnn_frontend::graph::PointwiseAttributes()
+                                       .set_name("add_deq_sub_node")
+                                       .set_mode(hipdnn_frontend::PointwiseMode_t::SUB);
+        auto addDeqSubOutput = graph->pointwise(add, zeroPointDqAdd, addDeqSubAttributes);
+
+        // create mul node for dequantize:scale_dq_add
+        auto scaleDqAdd = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
+            hipdnn_frontend::graph::Tensor_attributes().set_name("scale_dq_add").set_value(1.0));
+        auto addDeqMulAttributes = hipdnn_frontend::graph::PointwiseAttributes()
+                                       .set_name("add_deq_mul_node")
+                                       .set_mode(hipdnn_frontend::PointwiseMode_t::MUL);
+        auto addDeqMulOutput = graph->pointwise(addDeqSubOutput, scaleDqAdd, addDeqMulAttributes);
+
+        //  create add
+        auto addAttributes = hipdnn_frontend::graph::PointwiseAttributes()
+                                 .set_name("add_node")
+                                 .set_mode(hipdnn_frontend::PointwiseMode_t::ADD);
+        auto addOutput = graph->pointwise(reluOutput, addDeqMulOutput, addAttributes);
+
+        // create div node for quantize:scale_q
+        auto scaleQ = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
+            hipdnn_frontend::graph::Tensor_attributes().set_name("scale_q").set_value(1));
+        auto quantizeDivAttributes = hipdnn_frontend::graph::PointwiseAttributes()
+                                         .set_name("quantize_div_node")
+                                         .set_mode(hipdnn_frontend::PointwiseMode_t::DIV);
+        auto quantizeDivOutput = graph->pointwise(addOutput, scaleQ, quantizeDivAttributes);
+
+        // cretate  add node for quantize:zero_point_q.
+        auto zeroPointQ = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
+            hipdnn_frontend::graph::Tensor_attributes().set_name("zero_point_q").set_value(0));
+        auto quantizeAddAttributes = hipdnn_frontend::graph::PointwiseAttributes()
+                                         .set_name("quantize_add_node")
+                                         .set_mode(hipdnn_frontend::PointwiseMode_t::ADD);
+        auto quantizeOutput
+            = graph->pointwise(quantizeDivOutput, zeroPointQ, quantizeAddAttributes);
+        quantizeOutput->set_output(true);
+
+        // build graph
+        HIPDNN_FE_CHECK(graph->build(handle));
+
+        return std::make_tuple(graph, input, filter, bias, add, quantizeOutput);
+    };
+
+    auto backend = hipdnn_frontend::detail::hipdnnBackend();
+    if(!backend)
+    {
+        std::cout << "Creat backend failed. \n";
+        return 1;
+    }
+
+    hipdnnHandle_t handle;
+    HIPDNN_CHECK(backend->create(&handle));
+
+    auto [graph, input, filter, bias, add, output] = buildConvBiasGraph(handle);
+
+    // Allocate DCU memory
+    hipdnn_data_sdk::utilities::Tensor<InputType> inputTensor(input->get_dim(),
+                                                              input->get_stride());
+    hipdnn_data_sdk::utilities::Tensor<InputType> wTensor(filter->get_dim(), filter->get_stride());
+    hipdnn_data_sdk::utilities::Tensor<InputType> biasTensor(bias->get_dim(), bias->get_stride());
+    hipdnn_data_sdk::utilities::Tensor<InputType> addTensor(add->get_dim(), add->get_stride());
+    hipdnn_data_sdk::utilities::Tensor<InputType> outTensor(output->get_dim(),
+                                                            output->get_stride());
+
+    std::unordered_map<int64_t, void*> variantPack;
+    variantPack[input->get_uid()] = inputTensor.memory().deviceData();
+    variantPack[filter->get_uid()] = wTensor.memory().deviceData();
+    variantPack[bias->get_uid()] = biasTensor.memory().deviceData();
+    variantPack[add->get_uid()] = addTensor.memory().deviceData();
+    variantPack[output->get_uid()] = outTensor.memory().deviceData();
+
+    int64_t workspaceSize = 0;
+    HIPDNN_FE_CHECK(graph->get_workspace_size(workspaceSize));
+    const hipdnn_data_sdk::utilities::Workspace workspace(static_cast<size_t>(workspaceSize));
+
+    HIPDNN_FE_CHECK(graph->execute(handle, variantPack, workspace.get()));
+
+    std::cout << "int8_convolution_bias_add graph execution complete. \n";
+
+    HIPDNN_CHECK(backend->destroy(handle));
+    return 0;
+}
--- a/cpp/convolution/ConvBackward.cpp
+++ b/cpp/convolution/ConvBackward.cpp
+#include <iostream>
+
+#include <hipdnn_data_sdk/utilities/Tensor.hpp>
+#include <hipdnn_data_sdk/utilities/Workspace.hpp>
+#include <hipdnn_frontend.hpp>
+
+#include "utils.hpp"
+
+int main()
+{
+    using InputType = float;
+
+    const int64_t n = 4; // Batch size
+
+    // Input
+    const int64_t c = 32; // Number of channels
+    const int64_t h = 16; // Height
+    const int64_t w = 16; // Width
+
+    // Filter
+    const int64_t k = 64; // Number of filters
+    const int64_t r = 3; // Height
+    const int64_t s = 3; // Width
+
+    // Conv param
+    const int64_t strideH = 1; // Height stride
+    const int64_t strideW = 1; // Width stride
+    const int64_t padH = 1; // Height padding
+    const int64_t padW = 1; // Width padding
+    const int64_t dilH = 1; // Height dilation
+    const int64_t dilW = 1; // Width dilation
+
+    auto buildConvBackwardGraph = [=](hipdnnHandle_t handle) {
+        auto graph = std::make_shared<hipdnn_frontend::graph::Graph>();
+        graph->set_name("conv_backward_graph")
+            .set_io_data_type(hipdnn_frontend::getDataTypeEnumFromType<InputType>())
+            .set_intermediate_data_type(hipdnn_frontend::getDataTypeEnumFromType<InputType>())
+            .set_compute_data_type(hipdnn_frontend::DataType::FLOAT);
+
+        auto loss = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
+            hipdnn_frontend::graph::Tensor_attributes()
+                .set_name("loss")
+                .set_dim({n, k, h, w})
+                .set_stride({k * h * w, 1, k * w, k}));
+
+        auto filter = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
+            hipdnn_frontend::graph::Tensor_attributes()
+                .set_name("filter")
+                .set_dim({k, c, r, s})
+                .set_stride({c * r * s, 1, c * s, c}));
+
+        auto convDgradAttributes = hipdnn_frontend::graph::ConvDgradAttributes()
+                                       .set_name("conv_backward_node")
+                                       .set_padding({padH, padW})
+                                       .set_stride({strideH, strideW})
+                                       .set_dilation({dilH, dilW});
+        auto dx = graph->conv_dgrad(loss, filter, convDgradAttributes);
+        dx->set_output(true);
+
+        // build graph
+        HIPDNN_FE_CHECK(graph->build(handle));
+
+        return std::make_tuple(graph, loss, filter, dx);
+    };
+
+    auto backend = hipdnn_frontend::detail::hipdnnBackend();
+    if(!backend)
+    {
+        std::cout << "Creat backend failed. \n";
+        return 1;
+    }
+
+    hipdnnHandle_t handle;
+    HIPDNN_CHECK(backend->create(&handle));
+
+    auto [graph, loss, filter, dx] = buildConvBackwardGraph(handle);
+
+    hipdnn_data_sdk::utilities::Tensor<InputType> lossTensor(loss->get_dim(), loss->get_stride());
+    hipdnn_data_sdk::utilities::Tensor<InputType> filterTensor(filter->get_dim(),
+                                                               filter->get_stride());
+    hipdnn_data_sdk::utilities::Tensor<InputType> dxTensor(dx->get_dim(), dx->get_stride());
+
+    std::unordered_map<int64_t, void*> variantPack;
+    variantPack[loss->get_uid()] = lossTensor.memory().deviceData();
+    variantPack[filter->get_uid()] = filterTensor.memory().deviceData();
+    variantPack[dx->get_uid()] = dxTensor.memory().deviceData();
+
+    int64_t workspaceSize = 0;
+    HIPDNN_FE_CHECK(graph->get_workspace_size(workspaceSize));
+    const hipdnn_data_sdk::utilities::Workspace workspace(static_cast<size_t>(workspaceSize));
+
+    HIPDNN_FE_CHECK(graph->execute(handle, variantPack, workspace.get()));
+
+    std::cout << "Convolution backward graph execution complete. \n";
+
+    HIPDNN_CHECK(backend->destroy(handle));
+    return 0;
+}
--- a/cpp/convolution/ConvBackwardWeight.cpp
+++ b/cpp/convolution/ConvBackwardWeight.cpp
+#include <iostream>
+
+#include <hipdnn_data_sdk/utilities/Tensor.hpp>
+#include <hipdnn_data_sdk/utilities/Workspace.hpp>
+#include <hipdnn_frontend.hpp>
+
+#include "utils.hpp"
+
+int main()
+{
+    using InputType = float;
+
+    const int64_t n = 4; // Batch size
+    // Input
+    const int64_t c = 64; // Number of channels
+    const int64_t h = 16; // Height
+    const int64_t w = 16; // Width
+
+    // Conv param
+    const int64_t strideH = 1; // Height stride
+    const int64_t strideW = 1; // Width stride
+    const int64_t padH = 1; // Height padding
+    const int64_t padW = 1; // Width padding
+    const int64_t dilH = 1; // Height dilation
+    const int64_t dilW = 1; // Width dilation
+
+    auto buildConvBackwardWeigthGraph = [=](hipdnnHandle_t handle) {
+        auto graph = std::make_shared<hipdnn_frontend::graph::Graph>();
+        graph->set_name("conv_backward_weigth_graph")
+            .set_io_data_type(hipdnn_frontend::getDataTypeEnumFromType<InputType>())
+            .set_intermediate_data_type(hipdnn_frontend::getDataTypeEnumFromType<InputType>())
+            .set_compute_data_type(hipdnn_frontend::DataType::FLOAT);
+
+        auto image = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
+            hipdnn_frontend::graph::Tensor_attributes()
+                .set_name("image")
+                .set_dim({n, c, h, w})
+                .set_stride({c * h * w, 1, c * w, c}));
+
+        auto loss = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
+            hipdnn_frontend::graph::Tensor_attributes()
+                .set_name("loss")
+                .set_dim({n, c, h, w})
+                .set_stride({c * h * w, 1, c * w, c}));
+
+        auto convWrwAttributes = hipdnn_frontend::graph::ConvWgradAttributes()
+                                     .set_name("conv_backward_weight_node")
+                                     .set_padding({padH, padW})
+                                     .set_stride({strideH, strideW})
+                                     .set_dilation({dilH, dilW});
+
+        auto dw = graph->conv_wgrad(loss, image, convWrwAttributes);
+        dw->set_output(true);
+
+        // build graph
+        HIPDNN_FE_CHECK(graph->build(handle));
+
+        return std::make_tuple(graph, image, loss, dw);
+    };
+
+    auto backend = hipdnn_frontend::detail::hipdnnBackend();
+    if(!backend)
+    {
+        std::cout << "Creat backend failed. \n";
+        return 1;
+    }
+
+    hipdnnHandle_t handle;
+    HIPDNN_CHECK(backend->create(&handle));
+
+    auto [graph, image, loss, dw] = buildConvBackwardWeigthGraph(handle);
+
+    hipdnn_data_sdk::utilities::Tensor<InputType> inputTensor(image->get_dim(),
+                                                              image->get_stride());
+    hipdnn_data_sdk::utilities::Tensor<InputType> lossTensor(loss->get_dim(), loss->get_stride());
+    hipdnn_data_sdk::utilities::Tensor<InputType> dwTensor(dw->get_dim(), dw->get_stride());
+
+    std::unordered_map<int64_t, void*> variantPack;
+    variantPack[image->get_uid()] = inputTensor.memory().deviceData();
+    variantPack[loss->get_uid()] = lossTensor.memory().deviceData();
+    variantPack[dw->get_uid()] = dwTensor.memory().deviceData();
+
+    int64_t workspaceSize = 0;
+    HIPDNN_FE_CHECK(graph->get_workspace_size(workspaceSize));
+    const hipdnn_data_sdk::utilities::Workspace workspace(static_cast<size_t>(workspaceSize));
+
+    HIPDNN_FE_CHECK(graph->execute(handle, variantPack, workspace.get()));
+
+    std::cout << "Convolution backward_weight graph execution complete.\n\n";
+
+    HIPDNN_CHECK(backend->destroy(handle));
+    return 0;
+}
--- a/cpp/convolution/ConvForward.cpp
+++ b/cpp/convolution/ConvForward.cpp
+#include <iostream>
+
+#include <hipdnn_data_sdk/utilities/Tensor.hpp>
+#include <hipdnn_data_sdk/utilities/Workspace.hpp>
+#include <hipdnn_frontend.hpp>
+
+#include "utils.hpp"
+
+int main()
+{
+    using InputType = float;
+
+    const int64_t n = 16; // Batch size
+    // Input
+    const int64_t c = 16; // Number of channels
+    const int64_t h = 16; // Height
+    const int64_t w = 16; // Width
+
+    // Filter
+    const int64_t k = 16; // Number of filters
+    const int64_t r = 3; // Height
+    const int64_t s = 3; // Width
+
+    // Conv param
+    const int64_t strideH = 1; // Height stride
+    const int64_t strideW = 1; // Width stride
+    const int64_t padH = 1; // Height padding
+    const int64_t padW = 1; // Width padding
+    const int64_t dilH = 1; // Height dilation
+    const int64_t dilW = 1; // Width dilation
+
+    auto buildConvForwardGraph = [=](hipdnnHandle_t handle) {
+        auto graph = std::make_shared<hipdnn_frontend::graph::Graph>();
+
+        graph->set_name("conv_forward_graph")
+            .set_io_data_type(hipdnn_frontend::getDataTypeEnumFromType<InputType>())
+            .set_intermediate_data_type(hipdnn_frontend::getDataTypeEnumFromType<InputType>())
+            .set_compute_data_type(hipdnn_frontend::DataType::FLOAT);
+
+        auto image = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
+            hipdnn_frontend::graph::Tensor_attributes()
+                .set_name("image")
+                .set_dim({n, c, h, w})
+                .set_stride({c * h * w, 1, c * w, c}));
+
+        auto filter = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
+            hipdnn_frontend::graph::Tensor_attributes()
+                .set_name("filter")
+                .set_dim({k, c, r, s})
+                .set_stride({c * r * s, 1, c * s, c}));
+
+        auto convFpropAttributes = hipdnn_frontend::graph::ConvFpropAttributes()
+                                       .set_name("conv_fprop_node")
+                                       .set_padding({padH, padW})
+                                       .set_stride({strideH, strideW})
+                                       .set_dilation({dilH, dilW});
+
+        auto y = graph->conv_fprop(image, filter, convFpropAttributes);
+        y->set_output(true);
+
+        // build graph
+        HIPDNN_FE_CHECK(graph->build(handle));
+
+        return std::make_tuple(graph, image, filter, y);
+    };
+
+    auto backend = hipdnn_frontend::detail::hipdnnBackend();
+    if(!backend)
+    {
+        std::cout << "Creat backend failed. \n";
+        return 1;
+    }
+
+    hipdnnHandle_t handle;
+    HIPDNN_CHECK(backend->create(&handle));
+
+    auto [graph, input, filter, y] = buildConvForwardGraph(handle);
+
+    hipdnn_data_sdk::utilities::Tensor<InputType> inputTensor(input->get_dim(),
+                                                              input->get_stride());
+    hipdnn_data_sdk::utilities::Tensor<InputType> filterTensor(filter->get_dim(),
+                                                               filter->get_stride());
+    hipdnn_data_sdk::utilities::Tensor<InputType> yTensor(y->get_dim(), y->get_stride());
+
+    std::unordered_map<int64_t, void*> variantPack;
+    variantPack[input->get_uid()] = inputTensor.memory().deviceData();
+    variantPack[filter->get_uid()] = filterTensor.memory().deviceData();
+    variantPack[y->get_uid()] = yTensor.memory().deviceData();
+
+    int64_t workspaceSize = 0;
+    HIPDNN_FE_CHECK(graph->get_workspace_size(workspaceSize));
+    const hipdnn_data_sdk::utilities::Workspace workspace(static_cast<size_t>(workspaceSize));
+
+    HIPDNN_FE_CHECK(graph->execute(handle, variantPack, workspace.get()));
+
+    std::cout << "Convolution forward graph execution complete. \n";
+
+    HIPDNN_CHECK(backend->destroy(handle));
+    return 0;
+}
--- a/cpp/ctc_loss/CtcLoss.cpp
+++ b/cpp/ctc_loss/CtcLoss.cpp
+// Copyright © Advanced Micro Devices, Inc., or its affiliates.
+// SPDX-License-Identifier:  MIT
+
+#include <iostream>
+
+#include "utils.hpp"
+
+#include <hipdnn_data_sdk/utilities/Tensor.hpp>
+#include <hipdnn_data_sdk/utilities/Workspace.hpp>
+#include <hipdnn_frontend.hpp>
+
+int main()
+{
+    using InputType = float;
+
+    const int64_t n0 = 700; // time_step
+    const int64_t n1 = 4; // batch_size
+    const int64_t n2 = 10; // class_number
+
+    auto buildCtcLossGraph = [=](hipdnnHandle_t handle) {
+        auto graph = std::make_shared<hipdnn_frontend::graph::Graph>();
+
+        graph->set_name("ctc_loss_graph")
+            .set_io_data_type(hipdnn_frontend::getDataTypeEnumFromType<InputType>())
+            .set_intermediate_data_type(hipdnn_frontend::getDataTypeEnumFromType<InputType>())
+            .set_compute_data_type(hipdnn_frontend::DataType::FLOAT);
+
+        auto probs = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
+            hipdnn_frontend::graph::Tensor_attributes()
+                .set_name("probs")
+                .set_dim({n0, n1, n2})
+                .set_stride({n1 * n2, n1, 1}));
+
+        auto ctcLossAttributes = hipdnn_frontend::graph::CtcLossAttributes()
+                                     .set_blank_label_id(0)
+                                     .set_apply_softmax(true)
+                                     .set_algo(0)
+                                     .set_labels({1, 2, 3, 3, 3, 5, 6})
+                                     .set_label_lengths({1, 1, 2, 3})
+                                     .set_input_lengths({500, 500, 600})
+                                     .set_name("ctc_loss");
+
+        auto [losses, gradients] = graph->ctc_loss(probs, ctcLossAttributes);
+        losses->set_output(true);
+        gradients->set_output(true);
+
+        // build graph
+        HIPDNN_FE_CHECK(graph->build(handle));
+
+        return std::make_tuple(graph, probs, losses, gradients);
+    };
+
+    auto backend = hipdnn_frontend::detail::hipdnnBackend();
+    if(!backend)
+    {
+        std::cout << "Create backend failed.\n";
+        return 1;
+    }
+
+    hipdnnHandle_t handle;
+    HIPDNN_CHECK(backend->create(&handle));
+
+    auto [graph, probs, losses, gradients] = buildCtcLossGraph(handle);
+
+    hipdnn_data_sdk::utilities::Tensor<InputType> probsTensor(probs->get_dim(),
+                                                              probs->get_stride());
+    hipdnn_data_sdk::utilities::Tensor<InputType> lossesTensor(losses->get_dim(),
+                                                               losses->get_stride());
+    hipdnn_data_sdk::utilities::Tensor<InputType> gradientsTensor(gradients->get_dim(),
+                                                                  gradients->get_stride());
+
+    std::unordered_map<int64_t, void*> variantPack;
+    variantPack[probs->get_uid()] = probsTensor.memory().deviceData();
+    variantPack[losses->get_uid()] = lossesTensor.memory().deviceData();
+    variantPack[gradients->get_uid()] = gradientsTensor.memory().deviceData();
+
+    int64_t workspaceSize = 0;
+    HIPDNN_FE_CHECK(graph->get_workspace_size(workspaceSize));
+    const hipdnn_data_sdk::utilities::Workspace workspace(static_cast<size_t>(workspaceSize));
+
+    HIPDNN_FE_CHECK(graph->execute(handle, variantPack, workspace.get()));
+
+    std::cout << "CtcLoss graph execution complete.\n";
+
+    HIPDNN_CHECK(backend->destroy(handle));
+    return 0;
+}
--- a/cpp/deformattention/DeformAttnBackward.cpp
+++ b/cpp/deformattention/DeformAttnBackward.cpp
+#include <iostream>
+#include <random>
+#include <tuple>
+#include <unordered_map>
+#include <vector>
+
+#include "utils.hpp"
+
+#include <hipdnn_data_sdk/utilities/Tensor.hpp>
+#include <hipdnn_data_sdk/utilities/Workspace.hpp>
+#include <hipdnn_frontend.hpp>
+
+int main()
+{
+    using InputType = float;
+    using ComputeType = float;
+    using ShapeIndexType = int64_t;
+
+    const int64_t n = 2; // Batch size
+    const int64_t nHeads = 2; // Number of attention heads
+    const int64_t embedDimsPerHead = 32; // Embedding dimensions per attention head
+    const int64_t nLevels = 2; // Number of feature levels
+    const int64_t nPoints = 2; // Number of sampling points per attention head
+    const int64_t nQueries = 32; // Number of queries
+
+    // Randomly generate spatial shapes and level start index
+    // nKeys: total number of keys across all feature levels
+    // spatialShapesData: (nLevels, 2) tensor containing height and width of each feature level
+    // levelStartIndexData: (nLevels,) tensor containing starting index of each feature level
+    auto [nKeys, spatialShapesData, levelStartIndexData] = [=]() {
+        int64_t nKeysLocal = 0;
+        std::vector<ShapeIndexType> spatialShapesLocal(static_cast<size_t>(nLevels * 2));
+        std::vector<ShapeIndexType> levelStartIndexLocal(static_cast<size_t>(nLevels));
+
+        std::random_device rd;
+        std::mt19937 gen(rd());
+        std::uniform_int_distribution<int64_t> dist(1, 128);
+        levelStartIndexLocal[0] = 0;
+        for(size_t i = 0; i < spatialShapesLocal.size(); i += 2)
+        {
+            spatialShapesLocal[i] = dist(gen); // height
+            spatialShapesLocal[i + 1] = dist(gen); // width
+
+            if(i > 0)
+            {
+                levelStartIndexLocal[i / 2] = nKeysLocal;
+            }
+            nKeysLocal += spatialShapesLocal[i] * spatialShapesLocal[i + 1];
+        }
+
+        return std::make_tuple(nKeysLocal, spatialShapesLocal, levelStartIndexLocal);
+    }();
+
+    auto buildDeformableAttnBwdGraph = [=, nKeysLocal = nKeys](hipdnnHandle_t handle) {
+        auto graph = std::make_shared<hipdnn_frontend::graph::Graph>();
+
+        graph->set_name("deformable_attention_backward_graph")
+            .set_io_data_type(hipdnn_frontend::getDataTypeEnumFromType<InputType>())
+            .set_intermediate_data_type(hipdnn_frontend::getDataTypeEnumFromType<ComputeType>())
+            .set_compute_data_type(hipdnn_frontend::getDataTypeEnumFromType<ComputeType>());
+
+        auto value = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
+            hipdnn_frontend::graph::Tensor_attributes()
+                .set_name("value")
+                .set_dim({n, nKeysLocal, nHeads, embedDimsPerHead})
+                .set_stride({nKeysLocal * nHeads * embedDimsPerHead,
+                             nHeads * embedDimsPerHead,
+                             embedDimsPerHead,
+                             1}));
+        auto spatialShapes = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
+            hipdnn_frontend::graph::Tensor_attributes()
+                .set_name("spatial_shapes")
+                .set_dim({nLevels, 2})
+                .set_stride({2, 1})
+                .set_data_type(hipdnn_frontend::getDataTypeEnumFromType<ShapeIndexType>()));
+        auto levelStartIndex = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
+            hipdnn_frontend::graph::Tensor_attributes()
+                .set_name("level_start_index")
+                .set_dim({nLevels})
+                .set_stride({1})
+                .set_data_type(hipdnn_frontend::getDataTypeEnumFromType<ShapeIndexType>()));
+        auto samplingLocations = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
+            hipdnn_frontend::graph::Tensor_attributes()
+                .set_name("sampling_locations")
+                .set_dim({n, nQueries, nHeads, nLevels, nPoints, 2})
+                .set_stride({nQueries * nHeads * nLevels * nPoints * 2,
+                             nHeads * nLevels * nPoints * 2,
+                             nLevels * nPoints * 2,
+                             nPoints * 2,
+                             2,
+                             1}));
+        auto attentionWeights = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
+            hipdnn_frontend::graph::Tensor_attributes()
+                .set_name("attention_weights")
+                .set_dim({n, nQueries, nHeads, nLevels, nPoints})
+                .set_stride({nQueries * nHeads * nLevels * nPoints,
+                             nHeads * nLevels * nPoints,
+                             nLevels * nPoints,
+                             nPoints,
+                             1}));
+
+        auto gradOutput = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
+            hipdnn_frontend::graph::Tensor_attributes()
+                .set_name("grad_output")
+                .set_dim({n, nQueries, nHeads * embedDimsPerHead})
+                .set_stride({nQueries * nHeads * embedDimsPerHead, nHeads * embedDimsPerHead, 1}));
+
+        auto deformAttnDgradAttributes
+            = hipdnn_frontend::graph::DeformAttnDgradAttributes().set_name(
+                "deform_attn_backward_node");
+        auto [gradValue, gradSamplingLoc, gradAttnWeight]
+            = graph->deform_attn_dgrad(value,
+                                       spatialShapes,
+                                       levelStartIndex,
+                                       samplingLocations,
+                                       attentionWeights,
+                                       gradOutput,
+                                       deformAttnDgradAttributes);
+        gradValue->set_output(true);
+        gradSamplingLoc->set_output(true);
+        gradAttnWeight->set_output(true);
+
+        // build graph
+        HIPDNN_FE_CHECK(graph->build(handle));
+
+        return std::make_tuple(graph,
+                               value,
+                               spatialShapes,
+                               levelStartIndex,
+                               samplingLocations,
+                               attentionWeights,
+                               gradOutput,
+                               gradValue,
+                               gradSamplingLoc,
+                               gradAttnWeight);
+    };
+
+    auto backend = hipdnn_frontend::detail::hipdnnBackend();
+    if(!backend)
+    {
+        std::cout << "Creat backend failed. \n";
+        return 1;
+    }
+
+    hipdnnHandle_t handle;
+    HIPDNN_CHECK(backend->create(&handle));
+
+    auto [graph,
+          value,
+          spatialShapes,
+          levelStartIndex,
+          samplingLocations,
+          attentionWeights,
+          gradOutput,
+          gradValue,
+          gradSamplingLoc,
+          gradAttnWeight]
+        = buildDeformableAttnBwdGraph(handle);
+
+    // Allocate DCU memory
+    hipdnn_data_sdk::utilities::Tensor<InputType> valueTensor(value->get_dim(),
+                                                              value->get_stride());
+    hipdnn_data_sdk::utilities::Tensor<ShapeIndexType> spatialShapesTensor(
+        spatialShapes->get_dim(), spatialShapes->get_stride());
+    hipdnn_data_sdk::utilities::Tensor<ShapeIndexType> levelStartIndexTensor(
+        levelStartIndex->get_dim(), levelStartIndex->get_stride());
+    hipdnn_data_sdk::utilities::Tensor<InputType> samplingLocationsTensor(
+        samplingLocations->get_dim(), samplingLocations->get_stride());
+    hipdnn_data_sdk::utilities::Tensor<InputType> attentionWeightsTensor(
+        attentionWeights->get_dim(), attentionWeights->get_stride());
+    hipdnn_data_sdk::utilities::Tensor<InputType> gradOutputTensor(gradOutput->get_dim(),
+                                                                   gradOutput->get_stride());
+    hipdnn_data_sdk::utilities::Tensor<InputType> gradValueTensor(gradValue->get_dim(),
+                                                                  gradValue->get_stride());
+    hipdnn_data_sdk::utilities::Tensor<InputType> gradSamplingLocTensor(
+        gradSamplingLoc->get_dim(), gradSamplingLoc->get_stride());
+    hipdnn_data_sdk::utilities::Tensor<InputType> gradAttnWeightTensor(
+        gradAttnWeight->get_dim(), gradAttnWeight->get_stride());
+
+    // Fill tensors with data
+    spatialShapesTensor.fillWithData(spatialShapesData.data(),
+                                     spatialShapesData.size() * sizeof(ShapeIndexType));
+    levelStartIndexTensor.fillWithData(levelStartIndexData.data(),
+                                       levelStartIndexData.size() * sizeof(ShapeIndexType));
+
+    std::unordered_map<int64_t, void*> variantPack;
+    variantPack[value->get_uid()] = valueTensor.memory().deviceData();
+    variantPack[spatialShapes->get_uid()] = spatialShapesTensor.memory().deviceData();
+    variantPack[levelStartIndex->get_uid()] = levelStartIndexTensor.memory().deviceData();
+    variantPack[samplingLocations->get_uid()] = samplingLocationsTensor.memory().deviceData();
+    variantPack[attentionWeights->get_uid()] = attentionWeightsTensor.memory().deviceData();
+    variantPack[gradOutput->get_uid()] = gradOutputTensor.memory().deviceData();
+    variantPack[gradValue->get_uid()] = gradValueTensor.memory().deviceData();
+    variantPack[gradSamplingLoc->get_uid()] = gradSamplingLocTensor.memory().deviceData();
+    variantPack[gradAttnWeight->get_uid()] = gradAttnWeightTensor.memory().deviceData();
+
+    int64_t workspaceSize = 0;
+    HIPDNN_FE_CHECK(graph->get_workspace_size(workspaceSize));
+    const hipdnn_data_sdk::utilities::Workspace workspace(static_cast<size_t>(workspaceSize));
+
+    HIPDNN_FE_CHECK(graph->execute(handle, variantPack, workspace.get()));
+
+    std::cout << "Deformable_attention_backward graph execution complete. \n";
+
+    HIPDNN_CHECK(backend->destroy(handle));
+    return 0;
+}
--- a/cpp/deformattention/DeformAttnForward.cpp
+++ b/cpp/deformattention/DeformAttnForward.cpp
+#include <iostream>
+#include <random>
+#include <tuple>
+#include <unordered_map>
+#include <vector>
+
+#include "utils.hpp"
+
+#include <hipdnn_data_sdk/utilities/Tensor.hpp>
+#include <hipdnn_data_sdk/utilities/Workspace.hpp>
+#include <hipdnn_frontend.hpp>
+
+int main()
+{
+    using InputType = float;
+    using ComputeType = float;
+    using ShapeIndexType = int64_t;
+
+    const int64_t n = 2; // Batch size
+    const int64_t nHeads = 2; // Number of attention heads
+    const int64_t embedDimsPerHead = 32; // Embedding dimensions per attention head
+    const int64_t nLevels = 2; // Number of feature levels
+    const int64_t nPoints = 2; // Number of sampling points per attention head
+    const int64_t nQueries = 32; // Number of queries
+
+    // Randomly generate spatial shapes and level start index
+    // nKeys: total number of keys across all feature levels
+    // spatialShapesData: (nLevels, 2) tensor containing height and width of each feature level
+    // levelStartIndexData: (nLevels,) tensor containing starting index of each feature level
+    auto [nKeys, spatialShapesData, levelStartIndexData] = [=]() {
+        int64_t nKeysLocal = 0;
+        std::vector<ShapeIndexType> spatialShapesLocal(static_cast<size_t>(nLevels * 2));
+        std::vector<ShapeIndexType> levelStartIndexLocal(static_cast<size_t>(nLevels));
+
+        std::random_device rd;
+        std::mt19937 gen(rd());
+        std::uniform_int_distribution<int64_t> dist(1, 128);
+        levelStartIndexLocal[0] = 0;
+        for(size_t i = 0; i < spatialShapesLocal.size(); i += 2)
+        {
+            spatialShapesLocal[i] = dist(gen); // height
+            spatialShapesLocal[i + 1] = dist(gen); // width
+
+            if(i > 0)
+            {
+                levelStartIndexLocal[i / 2] = nKeysLocal;
+            }
+            nKeysLocal += spatialShapesLocal[i] * spatialShapesLocal[i + 1];
+        }
+
+        return std::make_tuple(nKeysLocal, spatialShapesLocal, levelStartIndexLocal);
+    }();
+
+    auto buildDeformableAttnFwdGraph = [=, nKeysLocal = nKeys](hipdnnHandle_t handle) {
+        auto graph = std::make_shared<hipdnn_frontend::graph::Graph>();
+
+        graph->set_name("deformable_attention_forward_graph")
+            .set_io_data_type(hipdnn_frontend::getDataTypeEnumFromType<InputType>())
+            .set_intermediate_data_type(hipdnn_frontend::getDataTypeEnumFromType<ComputeType>())
+            .set_compute_data_type(hipdnn_frontend::getDataTypeEnumFromType<ComputeType>());
+
+        auto value = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
+            hipdnn_frontend::graph::Tensor_attributes()
+                .set_name("value")
+                .set_dim({n, nKeysLocal, nHeads, embedDimsPerHead})
+                .set_stride({nKeysLocal * nHeads * embedDimsPerHead,
+                             nHeads * embedDimsPerHead,
+                             embedDimsPerHead,
+                             1}));
+        auto spatialShapes = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
+            hipdnn_frontend::graph::Tensor_attributes()
+                .set_name("spatial_shapes")
+                .set_dim({nLevels, 2})
+                .set_stride({2, 1})
+                .set_data_type(hipdnn_frontend::getDataTypeEnumFromType<ShapeIndexType>()));
+        auto levelStartIndex = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
+            hipdnn_frontend::graph::Tensor_attributes()
+                .set_name("level_start_index")
+                .set_dim({nLevels})
+                .set_stride({1})
+                .set_data_type(hipdnn_frontend::getDataTypeEnumFromType<ShapeIndexType>()));
+        auto samplingLocations = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
+            hipdnn_frontend::graph::Tensor_attributes()
+                .set_name("sampling_locations")
+                .set_dim({n, nQueries, nHeads, nLevels, nPoints, 2})
+                .set_stride({nQueries * nHeads * nLevels * nPoints * 2,
+                             nHeads * nLevels * nPoints * 2,
+                             nLevels * nPoints * 2,
+                             nPoints * 2,
+                             2,
+                             1}));
+        auto attentionWeights = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
+            hipdnn_frontend::graph::Tensor_attributes()
+                .set_name("attention_weights")
+                .set_dim({n, nQueries, nHeads, nLevels, nPoints})
+                .set_stride({nQueries * nHeads * nLevels * nPoints,
+                             nHeads * nLevels * nPoints,
+                             nLevels * nPoints,
+                             nPoints,
+                             1}));
+
+        auto deformAttnAttributes = hipdnn_frontend::graph::DeformAttnFpropAttributes().set_name(
+            "deform_attn_forward_node");
+        auto output = graph->deform_attn_fprop(value,
+                                               spatialShapes,
+                                               levelStartIndex,
+                                               samplingLocations,
+                                               attentionWeights,
+                                               deformAttnAttributes);
+        output->set_output(true);
+
+        // build graph
+        HIPDNN_FE_CHECK(graph->build(handle));
+
+        return std::make_tuple(graph,
+                               value,
+                               spatialShapes,
+                               levelStartIndex,
+                               samplingLocations,
+                               attentionWeights,
+                               output);
+    };
+
+    auto backend = hipdnn_frontend::detail::hipdnnBackend();
+    if(!backend)
+    {
+        std::cout << "Creat backend failed. \n";
+        return 1;
+    }
+
+    hipdnnHandle_t handle;
+    HIPDNN_CHECK(backend->create(&handle));
+
+    auto [graph, value, spatialShapes, levelStartIndex, samplingLocations, attentionWeights, output]
+        = buildDeformableAttnFwdGraph(handle);
+
+    // Allocate DCU memory
+    hipdnn_data_sdk::utilities::Tensor<InputType> valueTensor(value->get_dim(),
+                                                              value->get_stride());
+    hipdnn_data_sdk::utilities::Tensor<ShapeIndexType> spatialShapesTensor(
+        spatialShapes->get_dim(), spatialShapes->get_stride());
+    hipdnn_data_sdk::utilities::Tensor<ShapeIndexType> levelStartIndexTensor(
+        levelStartIndex->get_dim(), levelStartIndex->get_stride());
+    hipdnn_data_sdk::utilities::Tensor<InputType> samplingLocationsTensor(
+        samplingLocations->get_dim(), samplingLocations->get_stride());
+    hipdnn_data_sdk::utilities::Tensor<InputType> attentionWeightsTensor(
+        attentionWeights->get_dim(), attentionWeights->get_stride());
+    hipdnn_data_sdk::utilities::Tensor<InputType> outTensor(output->get_dim(),
+                                                            output->get_stride());
+
+    // Fill tensors with data
+    spatialShapesTensor.fillWithData(spatialShapesData.data(),
+                                     spatialShapesData.size() * sizeof(ShapeIndexType));
+    levelStartIndexTensor.fillWithData(levelStartIndexData.data(),
+                                       levelStartIndexData.size() * sizeof(ShapeIndexType));
+
+    std::unordered_map<int64_t, void*> variantPack;
+    variantPack[value->get_uid()] = valueTensor.memory().deviceData();
+    variantPack[spatialShapes->get_uid()] = spatialShapesTensor.memory().deviceData();
+    variantPack[levelStartIndex->get_uid()] = levelStartIndexTensor.memory().deviceData();
+    variantPack[samplingLocations->get_uid()] = samplingLocationsTensor.memory().deviceData();
+    variantPack[attentionWeights->get_uid()] = attentionWeightsTensor.memory().deviceData();
+    variantPack[output->get_uid()] = outTensor.memory().deviceData();
+
+    int64_t workspaceSize = 0;
+    HIPDNN_FE_CHECK(graph->get_workspace_size(workspaceSize));
+    const hipdnn_data_sdk::utilities::Workspace workspace(static_cast<size_t>(workspaceSize));
+
+    HIPDNN_FE_CHECK(graph->execute(handle, variantPack, workspace.get()));
+
+    std::cout << "Deformable_attention_forward graph execution complete. \n";
+
+    HIPDNN_CHECK(backend->destroy(handle));
+    return 0;
+}
--- a/cpp/deformconvolution/DeformConvBackward.cpp
+++ b/cpp/deformconvolution/DeformConvBackward.cpp
+#include <iostream>
+
+#include <hipdnn_data_sdk/utilities/Tensor.hpp>
+#include <hipdnn_data_sdk/utilities/Workspace.hpp>
+#include <hipdnn_frontend.hpp>
+
+#include "utils.hpp"
+
+int main()
+{
+    using InputType = float;
+
+    const int64_t n = 4; // Batch size
+
+    // Input
+    const int64_t c = 64; // Number of channels
+    const int64_t h = 16; // Height
+    const int64_t w = 16; // Width
+
+    // Filter
+    const int64_t k = 64; // Number of filters
+    const int64_t r = 1; // Height
+    const int64_t s = 1; // Width
+
+    // Conv param
+    const int64_t strideH = 1; // Height stride
+    const int64_t strideW = 1; // Width stride
+    const int64_t padH = 0; // Height padding
+    const int64_t padW = 0; // Width padding
+    const int64_t dilH = 1; // Height dilation
+    const int64_t dilW = 1; // Width dilation
+
+    const int64_t outH = ((h + 2 * padH - (dilH * (r - 1) + 1)) / strideH) + 1;
+    const int64_t outW = ((w + 2 * padW - (dilW * (s - 1) + 1)) / strideW) + 1;
+
+    const int64_t g = 1; // Number of groups
+
+    auto buildDeformConvBackwardGraph = [=](hipdnnHandle_t handle) {
+        auto graph = std::make_shared<hipdnn_frontend::graph::Graph>();
+        graph->set_name("deform_conv_backward_graph")
+            .set_io_data_type(hipdnn_frontend::getDataTypeEnumFromType<InputType>())
+            .set_intermediate_data_type(hipdnn_frontend::getDataTypeEnumFromType<InputType>())
+            .set_compute_data_type(hipdnn_frontend::DataType::FLOAT);
+
+        auto image = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
+            hipdnn_frontend::graph::Tensor_attributes()
+                .set_name("image")
+                .set_dim({n, c, h, w})
+                .set_stride({c * h * w, 1, c * w, c}));
+
+        auto filter = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
+            hipdnn_frontend::graph::Tensor_attributes()
+                .set_name("filter")
+                .set_dim({k, c / g, r, s})
+                .set_stride({c / g * r * s, 1, c / g * s, c / g}));
+
+        auto loss = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
+            hipdnn_frontend::graph::Tensor_attributes()
+                .set_name("loss")
+                .set_dim({n, k, outH, outW})
+                .set_stride({k * outH * outW, 1, k * outW, k}));
+
+        auto offset = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
+            hipdnn_frontend::graph::Tensor_attributes()
+                .set_name("offset")
+                .set_dim({n, 2 * g * r * s, outH, outW})
+                .set_stride({2 * g * r * s * outH * outW, 1, 2 * g * r * s * outW, 2 * g * r * s}));
+
+        auto mask = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
+            hipdnn_frontend::graph::Tensor_attributes()
+                .set_name("mask")
+                .set_dim({n, g * r * s, outH, outW})
+                .set_stride({g * r * s * outH * outW, 1, g * r * s * outW, g * r * s}));
+
+        auto deformConvBwdAttributes = hipdnn_frontend::graph::DeformConvDgradAttributes()
+                                           .set_name("deform_conv_backward_node")
+                                           .set_padding({padH, padW})
+                                           .set_stride({strideH, strideW})
+                                           .set_dilation({dilH, dilW})
+                                           .set_x(image)
+                                           .set_mask(mask);
+
+        auto [dx, doffset, dmask]
+            = graph->deform_conv_dgrad(loss, filter, offset, deformConvBwdAttributes);
+        dx->set_output(true);
+        doffset->set_output(true);
+        dmask->set_output(true);
+
+        // build graph
+        HIPDNN_FE_CHECK(graph->build(handle));
+
+        return std::make_tuple(graph, image, filter, loss, offset, mask, dx, doffset, dmask);
+    };
+
+    auto backend = hipdnn_frontend::detail::hipdnnBackend();
+    if(!backend)
+    {
+        std::cout << "Creat backend failed. \n";
+        return 1;
+    }
+
+    hipdnnHandle_t handle;
+    HIPDNN_CHECK(backend->create(&handle));
+
+    auto [graph, image, filter, loss, offset, mask, dx, doffset, dmask]
+        = buildDeformConvBackwardGraph(handle);
+
+    hipdnn_data_sdk::utilities::Tensor<InputType> lossTensor(loss->get_dim(), loss->get_stride());
+    hipdnn_data_sdk::utilities::Tensor<InputType> inputTensor(image->get_dim(),
+                                                              image->get_stride());
+    hipdnn_data_sdk::utilities::Tensor<InputType> filterTensor(filter->get_dim(),
+                                                               filter->get_stride());
+    hipdnn_data_sdk::utilities::Tensor<InputType> offsetTensor(offset->get_dim(),
+                                                               offset->get_stride());
+    hipdnn_data_sdk::utilities::Tensor<InputType> maskTensor(mask->get_dim(), mask->get_stride());
+    hipdnn_data_sdk::utilities::Tensor<InputType> dxTensor(dx->get_dim(), dx->get_stride());
+    hipdnn_data_sdk::utilities::Tensor<InputType> doffsetTensor(doffset->get_dim(),
+                                                                doffset->get_stride());
+    hipdnn_data_sdk::utilities::Tensor<InputType> dmaskTensor(dmask->get_dim(),
+                                                              dmask->get_stride());
+
+    // Pixel-level offset values for each sampling point of the convolution kernel
+    offsetTensor.fillWithRandomValues(static_cast<InputType>(0.0f), static_cast<InputType>(1.0f));
+
+    std::unordered_map<int64_t, void*> variantPack;
+    variantPack[loss->get_uid()] = lossTensor.memory().deviceData();
+    variantPack[image->get_uid()] = inputTensor.memory().deviceData();
+    variantPack[filter->get_uid()] = filterTensor.memory().deviceData();
+    variantPack[offset->get_uid()] = offsetTensor.memory().deviceData();
+    variantPack[mask->get_uid()] = maskTensor.memory().deviceData();
+    variantPack[dx->get_uid()] = dxTensor.memory().deviceData();
+    variantPack[doffset->get_uid()] = doffsetTensor.memory().deviceData();
+    variantPack[dmask->get_uid()] = dmaskTensor.memory().deviceData();
+
+    int64_t workspaceSize = 0;
+    HIPDNN_FE_CHECK(graph->get_workspace_size(workspaceSize));
+    const hipdnn_data_sdk::utilities::Workspace workspace(static_cast<size_t>(workspaceSize));
+
+    HIPDNN_FE_CHECK(graph->execute(handle, variantPack, workspace.get()));
+
+    std::cout << "Deformable convolution backward graph execution complete.\n\n";
+
+    HIPDNN_CHECK(backend->destroy(handle));
+    return 0;
+}
--- a/cpp/deformconvolution/DeformConvBackwardWeight.cpp
+++ b/cpp/deformconvolution/DeformConvBackwardWeight.cpp
+#include <iostream>
+
+#include <hipdnn_data_sdk/utilities/Tensor.hpp>
+#include <hipdnn_data_sdk/utilities/Workspace.hpp>
+#include <hipdnn_frontend.hpp>
+
+#include "utils.hpp"
+
+int main()
+{
+    using InputType = float;
+
+    const int64_t n = 4; // Batch size
+
+    // Input
+    const int64_t c = 64; // Number of channels
+    const int64_t h = 16; // Height
+    const int64_t w = 16; // Width
+
+    // Filter
+    const int64_t k = 64;
+    const int64_t r = 3; // Height
+    const int64_t s = 3; // Width
+
+    // Conv param
+    const int64_t strideH = 1; // Height stride
+    const int64_t strideW = 1; // Width stride
+    const int64_t padH = 1; // Height padding
+    const int64_t padW = 1; // Width padding
+    const int64_t dilH = 1; // Height dilation
+    const int64_t dilW = 1; // Width dilation
+
+    const int64_t g = 1; // Number of groups
+
+    auto buildDeformConvBackwardWeightGraph = [=](hipdnnHandle_t handle) {
+        auto graph = std::make_shared<hipdnn_frontend::graph::Graph>();
+        graph->set_name("deform_conv_backward_weight_graph")
+            .set_io_data_type(hipdnn_frontend::getDataTypeEnumFromType<InputType>())
+            .set_intermediate_data_type(hipdnn_frontend::getDataTypeEnumFromType<InputType>())
+            .set_compute_data_type(hipdnn_frontend::DataType::FLOAT);
+
+        auto image = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
+            hipdnn_frontend::graph::Tensor_attributes()
+                .set_name("image")
+                .set_dim({n, c, h, w})
+                .set_stride({c * h * w, 1, c * w, c}));
+
+        auto loss = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
+            hipdnn_frontend::graph::Tensor_attributes()
+                .set_name("loss")
+                .set_dim({n, k, h, w})
+                .set_stride({k * h * w, 1, k * w, k}));
+
+        auto offset = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
+            hipdnn_frontend::graph::Tensor_attributes()
+                .set_name("offset")
+                .set_dim({n, 2 * g * r * s, h, w})
+                .set_stride({2 * g * r * s * h * w, 1, 2 * g * r * s * w, 2 * g * r * s}));
+
+        auto deformConvWrwAttributes = hipdnn_frontend::graph::DeformConvWgradAttributes()
+                                           .set_name("deform_conv_backward_weight_node")
+                                           .set_padding({padH, padW})
+                                           .set_stride({strideH, strideW})
+                                           .set_dilation({dilH, dilW});
+
+        auto dw = graph->deform_conv_wgrad(loss, offset, image, deformConvWrwAttributes);
+        dw->set_output(true).set_dim({k, c / g, r, s});
+
+        // build graph
+        HIPDNN_FE_CHECK(graph->build(handle));
+
+        return std::make_tuple(graph, image, loss, offset, dw);
+    };
+
+    auto backend = hipdnn_frontend::detail::hipdnnBackend();
+    if(!backend)
+    {
+        std::cout << "Creat backend failed. \n";
+        return 1;
+    }
+
+    hipdnnHandle_t handle;
+    HIPDNN_CHECK(backend->create(&handle));
+
+    auto [graph, image, loss, offset, dw] = buildDeformConvBackwardWeightGraph(handle);
+
+    hipdnn_data_sdk::utilities::Tensor<InputType> inputTensor(image->get_dim(),
+                                                              image->get_stride());
+    hipdnn_data_sdk::utilities::Tensor<InputType> offsetTensor(offset->get_dim(),
+                                                               offset->get_stride());
+    hipdnn_data_sdk::utilities::Tensor<InputType> lossTensor(loss->get_dim(), loss->get_stride());
+    hipdnn_data_sdk::utilities::Tensor<InputType> dwTensor(dw->get_dim(), dw->get_stride());
+
+    std::unordered_map<int64_t, void*> variantPack;
+    variantPack[image->get_uid()] = inputTensor.memory().deviceData();
+    variantPack[offset->get_uid()] = offsetTensor.memory().deviceData();
+    variantPack[loss->get_uid()] = lossTensor.memory().deviceData();
+    variantPack[dw->get_uid()] = dwTensor.memory().deviceData();
+
+    int64_t workspaceSize = 0;
+    HIPDNN_FE_CHECK(graph->get_workspace_size(workspaceSize));
+    const hipdnn_data_sdk::utilities::Workspace workspace(static_cast<size_t>(workspaceSize));
+
+    HIPDNN_FE_CHECK(graph->execute(handle, variantPack, workspace.get()));
+
+    std::cout << "Deformable convolution backward_weight graph execution complete. \n";
+
+    HIPDNN_CHECK(backend->destroy(handle));
+    return 0;
+}
--- a/cpp/deformconvolution/DeformConvForward.cpp
+++ b/cpp/deformconvolution/DeformConvForward.cpp
+#include <iostream>
+
+#include <hipdnn_data_sdk/utilities/Tensor.hpp>
+#include <hipdnn_data_sdk/utilities/Workspace.hpp>
+#include <hipdnn_frontend.hpp>
+
+#include "utils.hpp"
+
+int main()
+{
+    using InputType = float;
+
+    const int64_t n = 4; // Batch size
+
+    // Input
+    const int64_t c = 3; // Number of channels
+    const int64_t h = 10; // Height
+    const int64_t w = 10; // Width
+
+    // Filter
+    const int64_t k = 5; // Number of filters
+    const int64_t r = 3; // Height
+    const int64_t s = 3; // Width
+
+    // Conv param
+    const int64_t strideH = 1; // Height stride
+    const int64_t strideW = 1; // Width stride
+    const int64_t padH = 0; // Height padding
+    const int64_t padW = 0; // Width padding
+    const int64_t dilH = 1; // Height dilation
+    const int64_t dilW = 1; // Width dilation
+
+    // Offset spatial dim
+    const int64_t outH = ((h + 2 * padH - (dilH * (r - 1) + 1)) / strideH) + 1;
+    const int64_t outW = ((w + 2 * padW - (dilW * (s - 1) + 1)) / strideW) + 1;
+
+    const int64_t g = 1; // Number of groups
+
+    auto buildDeformConvForwardGraph = [=](hipdnnHandle_t handle) {
+        auto graph = std::make_shared<hipdnn_frontend::graph::Graph>();
+        graph->set_name("deform_conv_forward_graph")
+            .set_io_data_type(hipdnn_frontend::getDataTypeEnumFromType<InputType>())
+            .set_intermediate_data_type(hipdnn_frontend::getDataTypeEnumFromType<InputType>())
+            .set_compute_data_type(hipdnn_frontend::DataType::FLOAT);
+
+        auto image = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
+            hipdnn_frontend::graph::Tensor_attributes()
+                .set_name("image")
+                .set_dim({n, c, h, w})
+                .set_stride({c * h * w, 1, c * w, c}));
+
+        auto filter = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
+            hipdnn_frontend::graph::Tensor_attributes()
+                .set_name("filter")
+                .set_dim({k, c / g, r, s})
+                .set_stride({c / g * r * s, 1, c / g * s, c / g}));
+
+        auto offset = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
+            hipdnn_frontend::graph::Tensor_attributes()
+                .set_name("offset")
+                .set_dim({n, 2 * g * r * s, outH, outW})
+                .set_stride({2 * g * r * s * outH * outW, 1, 2 * g * r * s * outW, 2 * g * r * s}));
+
+        auto deformConvFwdAttributes = hipdnn_frontend::graph::DeformConvFpropAttributes()
+                                           .set_name("deform_conv_forward_node")
+                                           .set_padding({padH, padW})
+                                           .set_stride({strideH, strideW})
+                                           .set_dilation({dilH, dilW});
+
+        auto y = graph->deform_conv_fprop(image, offset, filter, deformConvFwdAttributes);
+        y->set_output(true);
+
+        // build graph
+        HIPDNN_FE_CHECK(graph->build(handle));
+
+        return std::make_tuple(graph, image, filter, offset, y);
+    };
+
+    auto backend = hipdnn_frontend::detail::hipdnnBackend();
+    if(!backend)
+    {
+        std::cout << "Creat backend failed. \n";
+        return 1;
+    }
+
+    hipdnnHandle_t handle;
+    HIPDNN_CHECK(backend->create(&handle));
+
+    auto [graph, image, filter, offset, y] = buildDeformConvForwardGraph(handle);
+
+    hipdnn_data_sdk::utilities::Tensor<InputType> inputTensor(image->get_dim(),
+                                                              image->get_stride());
+    hipdnn_data_sdk::utilities::Tensor<InputType> filterTensor(filter->get_dim(),
+                                                               filter->get_stride());
+    hipdnn_data_sdk::utilities::Tensor<InputType> offsetTensor(offset->get_dim(),
+                                                               offset->get_stride());
+    hipdnn_data_sdk::utilities::Tensor<InputType> yTensor(y->get_dim(), y->get_stride());
+
+    std::unordered_map<int64_t, void*> variantPack;
+    variantPack[image->get_uid()] = inputTensor.memory().deviceData();
+    variantPack[filter->get_uid()] = filterTensor.memory().deviceData();
+    variantPack[offset->get_uid()] = offsetTensor.memory().deviceData();
+    variantPack[y->get_uid()] = yTensor.memory().deviceData();
+
+    int64_t workspaceSize = 0;
+    HIPDNN_FE_CHECK(graph->get_workspace_size(workspaceSize));
+    const hipdnn_data_sdk::utilities::Workspace workspace(static_cast<size_t>(workspaceSize));
+
+    HIPDNN_FE_CHECK(graph->execute(handle, variantPack, workspace.get()));
+
+    std::cout << "Deform convolution forward graph execution complete.\n";
+
+    HIPDNN_CHECK(backend->destroy(handle));
+    return 0;
+}
--- a/cpp/fusion/AddLayernorm.cpp
+++ b/cpp/fusion/AddLayernorm.cpp
+#include <iostream>
+
+#include "utils.hpp"
+
+#include <hipdnn_data_sdk/utilities/Tensor.hpp>
+#include <hipdnn_data_sdk/utilities/Workspace.hpp>
+#include <hipdnn_frontend.hpp>
+
+int main()
+{
+    using InputType = hipdnn_data_sdk::types::half;
+
+    const int64_t n = 16; // Batch size
+    const int64_t c = 16; // Number of channels
+    const int64_t h = 16; // Height
+    const int64_t w = 16; // Width
+
+    auto buildAddLayernormGraph = [=](hipdnnHandle_t handle) {
+        auto graph = std::make_shared<hipdnn_frontend::graph::Graph>();
+
+        graph->set_name("add_layernorm_graph")
+            .set_io_data_type(hipdnn_frontend::getDataTypeEnumFromType<InputType>())
+            .set_intermediate_data_type(hipdnn_frontend::getDataTypeEnumFromType<InputType>())
+            .set_compute_data_type(hipdnn_frontend::DataType::FLOAT); //
+
+        // create add
+        auto input1 = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
+            hipdnn_frontend::graph::Tensor_attributes()
+                .set_name("input1")
+                .set_dim({n, c, h, w})
+                .set_stride({c * h * w, 1, c * w, c}));
+        auto input2 = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
+            hipdnn_frontend::graph::Tensor_attributes()
+                .set_name("input2")
+                .set_dim({n, c, h, w})
+                .set_stride({c * h * w, 1, c * w, c}));
+        auto addAttributes = hipdnn_frontend::graph::PointwiseAttributes()
+                                 .set_name("add_node")
+                                 .set_mode(hipdnn_frontend::PointwiseMode_t::ADD);
+        auto addOutput = graph->pointwise(input1, input2, addAttributes);
+        addOutput->set_output(true);
+
+        // create layernorm
+        auto scale = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
+            hipdnn_frontend::graph::Tensor_attributes().set_name("scale").set_dim({w}).set_stride(
+                {1}));
+        auto bias = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
+            hipdnn_frontend::graph::Tensor_attributes().set_name("bias").set_dim({w}).set_stride(
+                {1}));
+        auto epsilon = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
+            hipdnn_frontend::graph::Tensor_attributes().set_name("epsilon").set_value(1e-5));
+        auto layernormAttributes
+            = hipdnn_frontend::graph::LayernormAttributes()
+                  .set_name("layernorm_node")
+                  .set_epsilon(epsilon)
+                  .set_forward_phase(hipdnn_frontend::NormFwdPhase_t::INFERENCE);
+        auto [y, mean, inv_variance]
+            = graph->layernorm(addOutput, scale, bias, layernormAttributes);
+        y->set_output(true);
+
+        // build graph
+        HIPDNN_FE_CHECK(graph->build(handle));
+
+        return std::make_tuple(graph, input1, input2, scale, bias, addOutput, y);
+    };
+
+    auto backend = hipdnn_frontend::detail::hipdnnBackend();
+    if(!backend)
+    {
+        std::cout << "Creat backend failed. \n";
+        return 1;
+    }
+
+    hipdnnHandle_t handle;
+    HIPDNN_CHECK(backend->create(&handle));
+
+    auto [graph, input1, input2, scale, bias, addOutput, y] = buildAddLayernormGraph(handle);
+
+    // Allocate DCU memory
+    hipdnn_data_sdk::utilities::Tensor<InputType> input1Tensor(input1->get_dim(),
+                                                               input1->get_stride());
+    hipdnn_data_sdk::utilities::Tensor<InputType> input2Tensor(input2->get_dim(),
+                                                               input2->get_stride());
+    hipdnn_data_sdk::utilities::Tensor<InputType> scaleTensor(scale->get_dim(),
+                                                              scale->get_stride());
+    hipdnn_data_sdk::utilities::Tensor<InputType> biasTensor(bias->get_dim(), bias->get_stride());
+    hipdnn_data_sdk::utilities::Tensor<InputType> yTensor(y->get_dim(), y->get_stride());
+    hipdnn_data_sdk::utilities::Tensor<InputType> addOutputTensor(addOutput->get_dim(),
+                                                                  addOutput->get_stride());
+
+    std::unordered_map<int64_t, void*> variantPack;
+    variantPack[input1->get_uid()] = input1Tensor.memory().deviceData();
+    variantPack[input2->get_uid()] = input2Tensor.memory().deviceData();
+    variantPack[scale->get_uid()] = scaleTensor.memory().deviceData();
+    variantPack[bias->get_uid()] = biasTensor.memory().deviceData();
+    variantPack[y->get_uid()] = yTensor.memory().deviceData();
+    variantPack[addOutput->get_uid()] = addOutputTensor.memory().deviceData();
+
+    int64_t workspaceSize = 0;
+    HIPDNN_FE_CHECK(graph->get_workspace_size(workspaceSize));
+    const hipdnn_data_sdk::utilities::Workspace workspace(static_cast<size_t>(workspaceSize));
+
+    HIPDNN_FE_CHECK(graph->execute(handle, variantPack, workspace.get()));
+
+    std::cout << "addlayernorm graph execution complete. \n";
+
+    HIPDNN_CHECK(backend->destroy(handle));
+    return 0;
+}
--- a/cpp/fusion/GroupnormSwish.cpp
+++ b/cpp/fusion/GroupnormSwish.cpp
+#include <iostream>
+
+#include "utils.hpp"
+
+#include <hipdnn_data_sdk/utilities/Tensor.hpp>
+#include <hipdnn_data_sdk/utilities/Workspace.hpp>
+#include <hipdnn_frontend.hpp>
+
+int main()
+{
+    using InputType = float;
+
+    const int64_t n = 1; // Batch size
+    // Input
+    const int64_t c = 16; // Number of channels
+    const int64_t h = 16; // Height
+    const int64_t w = 16; // Width
+
+    auto buildGroupnormSwishGraph = [=](hipdnnHandle_t handle) {
+        auto graph = std::make_shared<hipdnn_frontend::graph::Graph>();
+
+        graph->set_name("group_norm_swish_graph")
+            .set_io_data_type(hipdnn_frontend::getDataTypeEnumFromType<InputType>())
+            .set_intermediate_data_type(hipdnn_frontend::getDataTypeEnumFromType<InputType>())
+            .set_compute_data_type(hipdnn_frontend::DataType::FLOAT); //
+
+        // create groupnorm
+        auto input = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
+            hipdnn_frontend::graph::Tensor_attributes()
+                .set_name("input")
+                .set_dim({n, c, h, w})
+                .set_stride({c * h * w, h * w, w, 1}));
+        auto scale = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
+            hipdnn_frontend::graph::Tensor_attributes().set_name("scale").set_dim({c}).set_stride(
+                {1}));
+        auto bias = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
+            hipdnn_frontend::graph::Tensor_attributes().set_name("bias").set_dim({c}).set_stride(
+                {1}));
+        auto epsilon = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
+            hipdnn_frontend::graph::Tensor_attributes()
+                .set_name("epsilon")
+                .set_dim({1})
+                .set_stride({1})
+                .set_data_type(hipdnn_frontend::DataType::FLOAT));
+        auto groups = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
+            hipdnn_frontend::graph::Tensor_attributes()
+                .set_name("groups")
+                .set_dim({1})
+                .set_stride({1})
+                .set_data_type(hipdnn_frontend::DataType::INT32));
+        epsilon->set_value(1e-5);
+        groups->set_value(2);
+        auto groupnormFwdAttributes
+            = hipdnn_frontend::graph::GroupnormFwdAttributes()
+                  .set_name("groupnorm_forward_node")
+                  .set_epsilon(epsilon)
+                  .set_groups(groups)
+                  .set_forward_phase(hipdnn_frontend::NormFwdPhase_t::TRAINING);
+        auto [y, mean, inv_variance] = graph->groupnorm(input, scale, bias, groupnormFwdAttributes);
+        mean->set_output(true);
+        inv_variance->set_output(true);
+
+        // create swish
+        auto swishAttributes = hipdnn_frontend::graph::PointwiseAttributes().set_mode(
+            hipdnn_frontend::PointwiseMode_t::SWISH_FWD);
+        auto output = graph->pointwise(y, swishAttributes);
+        output->set_output(true);
+
+        // build graph
+        HIPDNN_FE_CHECK(graph->build(handle));
+
+        return std::make_tuple(graph, input, scale, bias, mean, inv_variance, output);
+    };
+
+    auto backend = hipdnn_frontend::detail::hipdnnBackend();
+    if(!backend)
+    {
+        std::cout << "Creat backend failed. \n";
+        return 1;
+    }
+
+    hipdnnHandle_t handle;
+    HIPDNN_CHECK(backend->create(&handle));
+
+    auto [graph, input, scale, bias, mean, inv_variance, output] = buildGroupnormSwishGraph(handle);
+
+    // Allocate DCU memory
+    hipdnn_data_sdk::utilities::Tensor<InputType> inputTensor(input->get_dim(),
+                                                              input->get_stride());
+    hipdnn_data_sdk::utilities::Tensor<InputType> scaleTensor(scale->get_dim(),
+                                                              scale->get_stride());
+    hipdnn_data_sdk::utilities::Tensor<InputType> biasTensor(bias->get_dim(), bias->get_stride());
+    hipdnn_data_sdk::utilities::Tensor<InputType> meanTensor(mean->get_dim(), mean->get_stride());
+    hipdnn_data_sdk::utilities::Tensor<InputType> invVarianceTensor(inv_variance->get_dim(),
+                                                                    inv_variance->get_stride());
+    hipdnn_data_sdk::utilities::Tensor<InputType> outTensor(output->get_dim(),
+                                                            output->get_stride());
+
+    std::unordered_map<int64_t, void*> variantPack;
+    variantPack[input->get_uid()] = inputTensor.memory().deviceData();
+    variantPack[scale->get_uid()] = scaleTensor.memory().deviceData();
+    variantPack[bias->get_uid()] = biasTensor.memory().deviceData();
+    variantPack[mean->get_uid()] = meanTensor.memory().deviceData();
+    variantPack[inv_variance->get_uid()] = invVarianceTensor.memory().deviceData();
+    variantPack[output->get_uid()] = outTensor.memory().deviceData();
+
+    int64_t workspaceSize = 0;
+    HIPDNN_FE_CHECK(graph->get_workspace_size(workspaceSize));
+    const hipdnn_data_sdk::utilities::Workspace workspace(static_cast<size_t>(workspaceSize));
+
+    HIPDNN_FE_CHECK(graph->execute(handle, variantPack, workspace.get()));
+
+    std::cout << "groupnorm_swish graph execution complete. \n";
+
+    HIPDNN_CHECK(backend->destroy(handle));
+    return 0;
+}
--- a/cpp/fusion/PointwiseConvGenstats.cpp
+++ b/cpp/fusion/PointwiseConvGenstats.cpp
+#include <iostream>
+
+#include "utils.hpp"
+
+#include <hipdnn_data_sdk/utilities/Tensor.hpp>
+#include <hipdnn_data_sdk/utilities/Workspace.hpp>
+#include <hipdnn_frontend.hpp>
+
+int main()
+{
+    using InputType = float;
+
+    const int64_t n = 4; // Batch size
+    // Input
+    const int64_t c = 64; // Number of channels
+    const int64_t h = 16; // Height
+    const int64_t w = 16; // Width
+
+    // Filter
+    const int64_t k = 32; // Number of filters
+    const int64_t r = 3; // Height
+    const int64_t s = 3; // Width
+
+    auto buildConvBiasPreluAddGraph = [=](hipdnnHandle_t handle) {
+        auto graph = std::make_shared<hipdnn_frontend::graph::Graph>();
+        graph->set_name("pw_conv_genstats_graph")
+            .set_io_data_type(hipdnn_frontend::getDataTypeEnumFromType<InputType>())
+            .set_intermediate_data_type(hipdnn_frontend::getDataTypeEnumFromType<InputType>())
+            .set_compute_data_type(hipdnn_frontend::DataType::FLOAT); //
+
+        // create conv
+        auto input = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
+            hipdnn_frontend::graph::Tensor_attributes()
+                .set_name("input")
+                .set_dim({n, c, h, w})
+                .set_stride({c * h * w, 1, c * w, c}));
+
+        // create bias
+        auto scale = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
+            hipdnn_frontend::graph::Tensor_attributes()
+                .set_name("scale")
+                .set_dim({1, c, 1, 1})
+                .set_stride({c, 1, c, c}));
+        auto mulAttributes = hipdnn_frontend::graph::PointwiseAttributes()
+                                 .set_name("mul_node")
+                                 .set_mode(hipdnn_frontend::PointwiseMode_t::MUL);
+        auto mulOutput = graph->pointwise(input, scale, mulAttributes);
+
+        // create add
+        auto bias = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
+            hipdnn_frontend::graph::Tensor_attributes()
+                .set_name("bias")
+                .set_dim({1, c, 1, 1})
+                .set_stride({c, 1, c, c}));
+        auto addAttributes = hipdnn_frontend::graph::PointwiseAttributes()
+                                 .set_name("add_node")
+                                 .set_mode(hipdnn_frontend::PointwiseMode_t::ADD);
+        auto addOutput = graph->pointwise(mulOutput, bias, addAttributes);
+
+        // create relu
+        auto reluAttributes = hipdnn_frontend::graph::PointwiseAttributes()
+                                  .set_name("relu_node")
+                                  .set_mode(hipdnn_frontend::PointwiseMode_t::RELU_FWD);
+        auto reluOutput = graph->pointwise(addOutput, reluAttributes);
+
+        auto filter = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
+            hipdnn_frontend::graph::Tensor_attributes()
+                .set_name("filter")
+                .set_dim({k, c, r, s})
+                .set_stride({c * r * s, 1, c * s, c}));
+        auto convFpropAttributes = hipdnn_frontend::graph::ConvFpropAttributes()
+                                       .set_name("conv_fprop_node")
+                                       .set_padding({1, 1})
+                                       .set_stride({1, 1})
+                                       .set_dilation({1, 1});
+        auto convOutput = graph->conv_fprop(reluOutput, filter, convFpropAttributes);
+        convOutput->set_output(true);
+
+        auto genstatsAttributes = hipdnn_frontend::graph::GenstatsAttributes();
+        auto [sum, sqSum] = graph->genstats(convOutput, genstatsAttributes);
+
+        sum->set_output(true);
+        sqSum->set_output(true);
+
+        // build graph
+        HIPDNN_FE_CHECK(graph->build(handle));
+
+        return std::make_tuple(graph, input, filter, scale, bias, convOutput, sum, sqSum);
+    };
+
+    auto backend = hipdnn_frontend::detail::hipdnnBackend();
+    if(!backend)
+    {
+        std::cout << "Creat backend failed. \n";
+        return 1;
+    }
+
+    hipdnnHandle_t handle;
+    HIPDNN_CHECK(backend->create(&handle));
+
+    auto [graph, input, filter, scale, bias, convOutput, sum, sqSum]
+        = buildConvBiasPreluAddGraph(handle);
+
+    // Allocate DCU memory
+    hipdnn_data_sdk::utilities::Tensor<InputType> inputTensor(input->get_dim(),
+                                                              input->get_stride());
+    hipdnn_data_sdk::utilities::Tensor<InputType> wTensor(filter->get_dim(), filter->get_stride());
+    hipdnn_data_sdk::utilities::Tensor<InputType> biasTensor(bias->get_dim(), bias->get_stride());
+    hipdnn_data_sdk::utilities::Tensor<InputType> scaleTensor(scale->get_dim(),
+                                                              scale->get_stride());
+    hipdnn_data_sdk::utilities::Tensor<InputType> outTensor(convOutput->get_dim(),
+                                                            convOutput->get_stride());
+    hipdnn_data_sdk::utilities::Tensor<InputType> sumTensor(sum->get_dim(), sum->get_stride());
+    hipdnn_data_sdk::utilities::Tensor<InputType> sqSumTensor(sqSum->get_dim(),
+                                                              sqSum->get_stride());
+
+    std::unordered_map<int64_t, void*> variantPack;
+    variantPack[input->get_uid()] = inputTensor.memory().deviceData();
+    variantPack[filter->get_uid()] = wTensor.memory().deviceData();
+    variantPack[bias->get_uid()] = biasTensor.memory().deviceData();
+    variantPack[scale->get_uid()] = scaleTensor.memory().deviceData();
+    variantPack[convOutput->get_uid()] = outTensor.memory().deviceData();
+    variantPack[sum->get_uid()] = sumTensor.memory().deviceData();
+    variantPack[sqSum->get_uid()] = sqSumTensor.memory().deviceData();
+
+    int64_t workspaceSize = 0;
+    HIPDNN_FE_CHECK(graph->get_workspace_size(workspaceSize));
+    const hipdnn_data_sdk::utilities::Workspace workspace(static_cast<size_t>(workspaceSize));
+
+    HIPDNN_FE_CHECK(graph->execute(handle, variantPack, workspace.get()));
+
+    std::cout << "Pointwise_conv_genstats graph execution complete. \n";
+
+    HIPDNN_CHECK(backend->destroy(handle));
+    return 0;
+}
--- a/cpp/fusion/ReshapeTranspose.cpp
+++ b/cpp/fusion/ReshapeTranspose.cpp
+#include <iostream>
+
+#include "utils.hpp"
+
+#include <hipdnn_data_sdk/utilities/Tensor.hpp>
+#include <hipdnn_data_sdk/utilities/Workspace.hpp>
+#include <hipdnn_frontend.hpp>
+
+int main()
+{
+    using InputType = hipdnn_data_sdk::types::half;
+
+    const int64_t n = 2; // Batch size
+    // Input
+    const int64_t c = 64; // Number of channels
+    const int64_t h = 4; // Height
+    const int64_t w = 5; // Width
+
+    auto buildReshapeTransposeGraph = [=](hipdnnHandle_t handle) {
+        auto graph = std::make_shared<hipdnn_frontend::graph::Graph>();
+
+        graph->set_name("reshape_transpose_graph")
+            .set_io_data_type(hipdnn_frontend::getDataTypeEnumFromType<InputType>())
+            .set_intermediate_data_type(hipdnn_frontend::getDataTypeEnumFromType<InputType>())
+            .set_compute_data_type(hipdnn_frontend::DataType::FLOAT); //
+
+        auto shape = std::vector<int64_t>{2, 2, 32, 4, 5};
+        auto permutation = std::vector<int64_t>{0, 1, 3, 4, 2};
+        const int64_t vectorCount = 32;
+
+        // create reshape
+        auto input = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
+            hipdnn_frontend::graph::Tensor_attributes()
+                .set_name("input")
+                .set_dim({n, c, h, w})
+                .set_stride({c * h * w, 1, c * w, c}));
+        auto reshapeAttributes
+            = hipdnn_frontend::graph::ReshapeAttributes().set_name("reshape_node").set_dim(shape);
+        auto reshapeOutput = graph->reshape(input, reshapeAttributes);
+
+        // create transpose
+        auto transposeAttributes = hipdnn_frontend::graph::TransposeAttributes()
+                                       .set_name("transpose_node")
+                                       .set_permutation(permutation);
+        auto output = graph->transpose(reshapeOutput, transposeAttributes);
+        output->set_output(true).set_vector_count_and_dimension(vectorCount, 1);
+
+        // build graph
+        HIPDNN_FE_CHECK(graph->build(handle));
+
+        return std::make_tuple(graph, input, output);
+    };
+    auto backend = hipdnn_frontend::detail::hipdnnBackend();
+    if(!backend)
+    {
+        std::cout << "Creat backend failed. \n";
+        return 1;
+    }
+
+    hipdnnHandle_t handle;
+    HIPDNN_CHECK(backend->create(&handle));
+
+    auto [graph, input, output] = buildReshapeTransposeGraph(handle);
+
+    hipdnn_data_sdk::utilities::Tensor<InputType> inputTensor(input->get_dim(),
+                                                              input->get_stride());
+    hipdnn_data_sdk::utilities::Tensor<InputType> outputTensor(output->get_dim(),
+                                                               output->get_stride());
+
+    std::unordered_map<int64_t, void*> variantPack;
+    variantPack[input->get_uid()] = inputTensor.memory().deviceData();
+    variantPack[output->get_uid()] = outputTensor.memory().deviceData();
+
+    int64_t workspaceSize = 0;
+    HIPDNN_FE_CHECK(graph->get_workspace_size(workspaceSize));
+    const hipdnn_data_sdk::utilities::Workspace workspace(static_cast<size_t>(workspaceSize));
+
+    HIPDNN_FE_CHECK(graph->execute(handle, variantPack, workspace.get()));
+
+    std::cout << "Reshape_transpose graph execution complete. \n";
+
+    HIPDNN_CHECK(backend->destroy(handle));
+    return 0;
+}
--- a/cpp/genstats/Genstats.cpp
+++ b/cpp/genstats/Genstats.cpp
+#include <iostream>
+
+#include "utils.hpp"
+
+#include <hipdnn_data_sdk/utilities/Tensor.hpp>
+#include <hipdnn_data_sdk/utilities/Workspace.hpp>
+#include <hipdnn_frontend.hpp>
+
+int main()
+{
+    using InputType = hipdnn_data_sdk::types::half;
+
+    const int64_t n = 2; // Batch size
+    // Input
+    const int64_t c = 3; // Number of channels
+    const int64_t h = 4; // Height
+    const int64_t w = 5; // Width
+
+    auto buildGenstatsGraph = [=](hipdnnHandle_t handle) {
+        auto graph = std::make_shared<hipdnn_frontend::graph::Graph>();
+
+        graph->set_name("genstats_graph")
+            .set_io_data_type(hipdnn_frontend::getDataTypeEnumFromType<InputType>())
+            .set_intermediate_data_type(hipdnn_frontend::getDataTypeEnumFromType<InputType>())
+            .set_compute_data_type(hipdnn_frontend::DataType::FLOAT); //
+
+        // create genstats
+        auto input = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
+            hipdnn_frontend::graph::Tensor_attributes()
+                .set_name("input")
+                .set_dim({n, c, h, w})
+                .set_stride({c * h * w, 1, c * w, c}));
+        auto genstatsAttributes
+            = hipdnn_frontend::graph::GenstatsAttributes().set_name("genstats_node");
+        auto [sum, sqSum] = graph->genstats(input, genstatsAttributes);
+        sum->set_output(true);
+        sqSum->set_output(true);
+
+        // build graph
+        HIPDNN_FE_CHECK(graph->build(handle));
+
+        return std::make_tuple(graph, input, sum, sqSum);
+    };
+
+    auto backend = hipdnn_frontend::detail::hipdnnBackend();
+    if(!backend)
+    {
+        std::cout << "Creat backend failed. \n";
+        return 1;
+    }
+
+    hipdnnHandle_t handle;
+    HIPDNN_CHECK(backend->create(&handle));
+
+    auto [graph, input, sum, sqSum] = buildGenstatsGraph(handle);
+
+    // Allocate DCU memory
+    hipdnn_data_sdk::utilities::Tensor<InputType> inputTensor(input->get_dim(),
+                                                              input->get_stride());
+    hipdnn_data_sdk::utilities::Tensor<InputType> sumTensor(sum->get_dim(), sum->get_stride());
+    hipdnn_data_sdk::utilities::Tensor<InputType> sqSumTensor(sqSum->get_dim(),
+                                                              sqSum->get_stride());
+    std::unordered_map<int64_t, void*> variantPack;
+    variantPack[input->get_uid()] = inputTensor.memory().deviceData();
+    variantPack[sum->get_uid()] = sumTensor.memory().deviceData();
+    variantPack[sqSum->get_uid()] = sqSumTensor.memory().deviceData();
+
+    int64_t workspaceSize = 0;
+    HIPDNN_FE_CHECK(graph->get_workspace_size(workspaceSize));
+    const hipdnn_data_sdk::utilities::Workspace workspace(static_cast<size_t>(workspaceSize));
+
+    HIPDNN_FE_CHECK(graph->execute(handle, variantPack, workspace.get()));
+
+    std::cout << "Genstats graph execution complete. \n";
+
+    HIPDNN_CHECK(backend->destroy(handle));
+    return 0;
+}
--- a/cpp/getitem_backward/GetitemBackwardIndices.cpp
+++ b/cpp/getitem_backward/GetitemBackwardIndices.cpp
+// Copyright © Advanced Micro Devices, Inc., or its affiliates.
+// SPDX-License-Identifier:  MIT
+
+#include <iostream>
+
+#include "utils.hpp"
+
+#include <hipdnn_data_sdk/utilities/Tensor.hpp>
+#include <hipdnn_data_sdk/utilities/Workspace.hpp>
+#include <hipdnn_frontend.hpp>
+
+int main()
+{
+    using InputType = hipdnn_data_sdk::types::half;
+
+    const int64_t dyN0 = 64;
+    const int64_t dyN1 = 32;
+
+    const int64_t dxN0 = 128;
+    const int64_t dxN1 = 64;
+
+    auto buildGetitemBackwardGraph = [=](hipdnnHandle_t handle) {
+        auto graph = std::make_shared<hipdnn_frontend::graph::Graph>();
+
+        graph->set_name("getitem_backward_graph")
+            .set_io_data_type(hipdnn_frontend::getDataTypeEnumFromType<InputType>())
+            .set_intermediate_data_type(hipdnn_frontend::getDataTypeEnumFromType<InputType>())
+            .set_compute_data_type(hipdnn_frontend::DataType::FLOAT);
+
+        auto dy = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
+            hipdnn_frontend::graph::Tensor_attributes()
+                .set_name("dy")
+                .set_dim({dyN0, dyN1})
+                .set_stride({dyN1, 1}));
+
+        auto xIndeices = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
+            hipdnn_frontend::graph::Tensor_attributes()
+                .set_name("x_indeices")
+                .set_dim({64, 32})
+                .set_stride({32, 1})
+                .set_data_type(hipdnn_frontend::DataType::INT32));
+        auto yIndeices = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
+            hipdnn_frontend::graph::Tensor_attributes()
+                .set_name("y_indeices")
+                .set_dim({64, 32})
+                .set_stride({32, 1})
+                .set_data_type(hipdnn_frontend::DataType::INT32));
+
+        auto getitemBackwardAttributes = hipdnn_frontend::graph::GetitemBackwardAttributes()
+                                             .set_dims({0, 1})
+                                             .set_indices({xIndeices, yIndeices})
+                                             .set_offset(0)
+                                             .set_name("getitem_backward");
+
+        auto [dx, error] = graph->getitem_backward(dy, getitemBackwardAttributes);
+        dx->set_output(true).set_dim({dxN0, dxN1}).set_stride({dxN1, 1});
+        error->set_output(true);
+
+        // build graph
+        HIPDNN_FE_CHECK(graph->build(handle));
+
+        return std::make_tuple(graph, dy, xIndeices, yIndeices, dx, error);
+    };
+
+    auto backend = hipdnn_frontend::detail::hipdnnBackend();
+    if(!backend)
+    {
+        std::cout << "Create backend failed.\n";
+        return 1;
+    }
+
+    hipdnnHandle_t handle;
+    HIPDNN_CHECK(backend->create(&handle));
+
+    auto [graph, dy, xindex, yindex, dx, error] = buildGetitemBackwardGraph(handle);
+
+    hipdnn_data_sdk::utilities::Tensor<InputType> dyTensor(dy->get_dim(), dy->get_stride());
+    hipdnn_data_sdk::utilities::Tensor<InputType> dxTensor(dx->get_dim(), dx->get_stride());
+    hipdnn_data_sdk::utilities::Tensor<InputType> xIndexTensor(xindex->get_dim(),
+                                                               xindex->get_stride());
+    hipdnn_data_sdk::utilities::Tensor<InputType> yIndexTensor(yindex->get_dim(),
+                                                               yindex->get_stride());
+    hipdnn_data_sdk::utilities::Tensor<InputType> errorTensor(error->get_dim(),
+                                                              error->get_stride());
+
+    std::unordered_map<int64_t, void*> variantPack;
+    variantPack[dy->get_uid()] = dyTensor.memory().deviceData();
+    variantPack[dx->get_uid()] = dxTensor.memory().deviceData();
+    variantPack[xindex->get_uid()] = xIndexTensor.memory().deviceData();
+    variantPack[yindex->get_uid()] = yIndexTensor.memory().deviceData();
+    variantPack[error->get_uid()] = errorTensor.memory().deviceData();
+
+    int64_t workspaceSize = 0;
+    HIPDNN_FE_CHECK(graph->get_workspace_size(workspaceSize));
+    const hipdnn_data_sdk::utilities::Workspace workspace(static_cast<size_t>(workspaceSize));
+
+    HIPDNN_FE_CHECK(graph->execute(handle, variantPack, workspace.get()));
+
+    std::cout << "GetitemBackwardIndices graph execution complete.\n";
+
+    HIPDNN_CHECK(backend->destroy(handle));
+    return 0;
+}