#include #include "utils.hpp" #include #include #include int main() { using InputType = hipdnn_data_sdk::types::half; const int64_t n = 16; // Batch size const int64_t c = 16; // Number of channels const int64_t h = 16; // Height const int64_t w = 16; // Width auto buildAddLayernormGraph = [=](hipdnnHandle_t handle) { auto graph = std::make_shared(); graph->set_name("add_layernorm_graph") .set_io_data_type(hipdnn_frontend::getDataTypeEnumFromType()) .set_intermediate_data_type(hipdnn_frontend::getDataTypeEnumFromType()) .set_compute_data_type(hipdnn_frontend::DataType::FLOAT); // // create add auto input1 = std::make_shared( hipdnn_frontend::graph::Tensor_attributes() .set_name("input1") .set_dim({n, c, h, w}) .set_stride({c * h * w, 1, c * w, c})); auto input2 = std::make_shared( hipdnn_frontend::graph::Tensor_attributes() .set_name("input2") .set_dim({n, c, h, w}) .set_stride({c * h * w, 1, c * w, c})); auto addAttributes = hipdnn_frontend::graph::PointwiseAttributes() .set_name("add_node") .set_mode(hipdnn_frontend::PointwiseMode_t::ADD); auto addOutput = graph->pointwise(input1, input2, addAttributes); addOutput->set_output(true); // create layernorm auto scale = std::make_shared( hipdnn_frontend::graph::Tensor_attributes().set_name("scale").set_dim({w}).set_stride( {1})); auto bias = std::make_shared( hipdnn_frontend::graph::Tensor_attributes().set_name("bias").set_dim({w}).set_stride( {1})); auto epsilon = std::make_shared( hipdnn_frontend::graph::Tensor_attributes().set_name("epsilon").set_value(1e-5)); auto layernormAttributes = hipdnn_frontend::graph::LayernormAttributes() .set_name("layernorm_node") .set_epsilon(epsilon) .set_forward_phase(hipdnn_frontend::NormFwdPhase_t::INFERENCE); auto [y, mean, inv_variance] = graph->layernorm(addOutput, scale, bias, layernormAttributes); y->set_output(true); // build graph HIPDNN_FE_CHECK(graph->build(handle)); return std::make_tuple(graph, input1, input2, scale, bias, addOutput, y); }; auto backend = hipdnn_frontend::detail::hipdnnBackend(); if(!backend) { std::cout << "Creat backend failed. \n"; return 1; } hipdnnHandle_t handle; HIPDNN_CHECK(backend->create(&handle)); auto [graph, input1, input2, scale, bias, addOutput, y] = buildAddLayernormGraph(handle); // Allocate DCU memory hipdnn_data_sdk::utilities::Tensor input1Tensor(input1->get_dim(), input1->get_stride()); hipdnn_data_sdk::utilities::Tensor input2Tensor(input2->get_dim(), input2->get_stride()); hipdnn_data_sdk::utilities::Tensor scaleTensor(scale->get_dim(), scale->get_stride()); hipdnn_data_sdk::utilities::Tensor biasTensor(bias->get_dim(), bias->get_stride()); hipdnn_data_sdk::utilities::Tensor yTensor(y->get_dim(), y->get_stride()); hipdnn_data_sdk::utilities::Tensor addOutputTensor(addOutput->get_dim(), addOutput->get_stride()); std::unordered_map variantPack; variantPack[input1->get_uid()] = input1Tensor.memory().deviceData(); variantPack[input2->get_uid()] = input2Tensor.memory().deviceData(); variantPack[scale->get_uid()] = scaleTensor.memory().deviceData(); variantPack[bias->get_uid()] = biasTensor.memory().deviceData(); variantPack[y->get_uid()] = yTensor.memory().deviceData(); variantPack[addOutput->get_uid()] = addOutputTensor.memory().deviceData(); int64_t workspaceSize = 0; HIPDNN_FE_CHECK(graph->get_workspace_size(workspaceSize)); const hipdnn_data_sdk::utilities::Workspace workspace(static_cast(workspaceSize)); HIPDNN_FE_CHECK(graph->execute(handle, variantPack, workspace.get())); std::cout << "addlayernorm graph execution complete. \n"; HIPDNN_CHECK(backend->destroy(handle)); return 0; }