Commit ca34d4d2 authored by yanjl1's avatar yanjl1
Browse files

Initial

parents
#include <iostream>
#include "utils.hpp"
#include <hipdnn_data_sdk/utilities/Tensor.hpp>
#include <hipdnn_data_sdk/utilities/Workspace.hpp>
#include <hipdnn_frontend.hpp>
int main()
{
using InputType = float;
const int64_t n = 1; // Batch size
const int64_t c = 2; // Number of channels
const int64_t h = 3; // Height
const int64_t w = 4; // Width
auto buildRmsnormInferenceGraph = [=](hipdnnHandle_t handle) {
auto graph = std::make_shared<hipdnn_frontend::graph::Graph>();
graph->set_name("rmsnorm_inference_graph")
.set_io_data_type(hipdnn_frontend::getDataTypeEnumFromType<InputType>())
.set_intermediate_data_type(hipdnn_frontend::getDataTypeEnumFromType<InputType>())
.set_compute_data_type(hipdnn_frontend::DataType::FLOAT);
auto input = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
hipdnn_frontend::graph::Tensor_attributes()
.set_name("input")
.set_dim({n, c, h, w})
.set_stride({c * h * w, h * w, w, 1}));
auto scale = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
hipdnn_frontend::graph::Tensor_attributes()
.set_name("scale")
.set_dim({1, 1, 1, w})
.set_stride({w, w, w, 1}));
auto epsilon = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
hipdnn_frontend::graph::Tensor_attributes().set_name("epsilon").set_value(1e-5));
auto rmsnormAttributes = hipdnn_frontend::graph::RMSNormAttributes()
.set_name("rmsnorm_inference_node")
.set_epsilon(epsilon)
.set_forward_phase(hipdnn_frontend::NormFwdPhase_t::INFERENCE);
auto [output, invVariance] = graph->rmsnorm(input, scale, rmsnormAttributes);
output->set_output(true);
// build graph
HIPDNN_FE_CHECK(graph->build(handle));
return std::make_tuple(graph, input, scale, output);
};
auto backend = hipdnn_frontend::detail::hipdnnBackend();
if(!backend)
{
std::cout << "Creat backend failed. \n";
return 1;
}
hipdnnHandle_t handle;
HIPDNN_CHECK(backend->create(&handle));
auto [graph, input, scale, output] = buildRmsnormInferenceGraph(handle);
hipdnn_data_sdk::utilities::Tensor<InputType> inputTensor(input->get_dim(),
input->get_stride());
hipdnn_data_sdk::utilities::Tensor<InputType> scaleTensor(scale->get_dim(),
scale->get_stride());
hipdnn_data_sdk::utilities::Tensor<InputType> outputTensor(output->get_dim(),
output->get_stride());
std::unordered_map<int64_t, void*> variantPack;
variantPack[input->get_uid()] = inputTensor.memory().deviceData();
variantPack[scale->get_uid()] = scaleTensor.memory().deviceData();
variantPack[output->get_uid()] = outputTensor.memory().deviceData();
int64_t workspaceSize = 0;
HIPDNN_FE_CHECK(graph->get_workspace_size(workspaceSize));
const hipdnn_data_sdk::utilities::Workspace workspace(static_cast<size_t>(workspaceSize));
HIPDNN_FE_CHECK(graph->execute(handle, variantPack, workspace.get()));
std::cout << "Rmsnorm_inference graph execution complete. \n";
HIPDNN_CHECK(backend->destroy(handle));
return 0;
}
#include <iostream>
#include "utils.hpp"
#include <hipdnn_data_sdk/utilities/Tensor.hpp>
#include <hipdnn_data_sdk/utilities/Workspace.hpp>
#include <hipdnn_frontend.hpp>
int main()
{
using InputType = float;
auto buildRngGraph = [=](hipdnnHandle_t handle) {
auto graph = std::make_shared<hipdnn_frontend::graph::Graph>();
graph->set_name("rng_graph")
.set_io_data_type(hipdnn_frontend::getDataTypeEnumFromType<InputType>())
.set_intermediate_data_type(hipdnn_frontend::getDataTypeEnumFromType<InputType>())
.set_compute_data_type(hipdnn_frontend::DataType::FLOAT);
auto seed = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
hipdnn_frontend::graph::Tensor_attributes().set_name("seed").set_dim({1}).set_stride(
{1}));
auto offset = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
hipdnn_frontend::graph::Tensor_attributes().set_name("offset").set_dim({1}).set_stride(
{1}));
auto rngAttributes = hipdnn_frontend::graph::RngAttributes()
.set_name("rng_node")
.set_seed(seed)
.set_offset(offset)
.set_dim({2, 2})
.set_stride({1, 1})
.set_distribution(hipdnn_frontend::RngDistribution_t::UNIFORM);
auto output = graph->rng(seed, offset, rngAttributes);
output->set_output(true);
// build graph
HIPDNN_FE_CHECK(graph->build(handle));
return std::make_tuple(graph, seed, offset, output);
};
auto backend = hipdnn_frontend::detail::hipdnnBackend();
if(!backend)
{
std::cout << "Creat backend failed. \n";
return 1;
}
hipdnnHandle_t handle;
HIPDNN_CHECK(backend->create(&handle));
auto [graph, seed, offset, output] = buildRngGraph(handle);
hipdnn_data_sdk::utilities::Tensor<InputType> seedTensor(seed->get_dim(), seed->get_stride());
hipdnn_data_sdk::utilities::Tensor<InputType> offsetTensor(offset->get_dim(),
offset->get_stride());
hipdnn_data_sdk::utilities::Tensor<InputType> outputTensor(output->get_dim(),
output->get_stride());
std::unordered_map<int64_t, void*> variantPack;
variantPack[seed->get_uid()] = seedTensor.memory().deviceData();
variantPack[offset->get_uid()] = offsetTensor.memory().deviceData();
variantPack[output->get_uid()] = outputTensor.memory().deviceData();
int64_t workspaceSize = 0;
HIPDNN_FE_CHECK(graph->get_workspace_size(workspaceSize));
const hipdnn_data_sdk::utilities::Workspace workspace(static_cast<size_t>(workspaceSize));
HIPDNN_FE_CHECK(graph->execute(handle, variantPack, workspace.get()));
std::cout << "Rng graph execution complete. \n";
HIPDNN_CHECK(backend->destroy(handle));
return 0;
}
// Copyright © Advanced Micro Devices, Inc., or its affiliates.
// SPDX-License-Identifier: MIT
#include <iostream>
#include "utils.hpp"
#include <hipdnn_data_sdk/utilities/Tensor.hpp>
#include <hipdnn_data_sdk/utilities/Workspace.hpp>
#include <hipdnn_frontend.hpp>
int main()
{
using InputType = float;
const int64_t n0 = 2;
const int64_t n1 = 4;
const int64_t n2 = 8;
auto buildRopeBackwardGraph = [=](hipdnnHandle_t handle) {
auto graph = std::make_shared<hipdnn_frontend::graph::Graph>();
graph->set_name("rope_backward_graph")
.set_io_data_type(hipdnn_frontend::getDataTypeEnumFromType<InputType>())
.set_intermediate_data_type(hipdnn_frontend::getDataTypeEnumFromType<InputType>())
.set_compute_data_type(hipdnn_frontend::DataType::FLOAT);
auto dy = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
hipdnn_frontend::graph::Tensor_attributes()
.set_name("dy")
.set_dim({n0, n1, n2})
.set_stride({n1 * n2, n2, 1}));
auto cos = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
hipdnn_frontend::graph::Tensor_attributes()
.set_name("cos")
.set_dim({n0, n1, n2})
.set_stride({n1 * n2, n2, 1}));
auto sin = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
hipdnn_frontend::graph::Tensor_attributes()
.set_name("sin")
.set_dim({n0, n1, n2})
.set_stride({n1 * n2, n2, 1}));
auto ropeBackwardAttributes
= hipdnn_frontend::graph::RopeBackwardAttributes().set_name("rope_backward");
auto dx = graph->rope_backward(dy, cos, sin, ropeBackwardAttributes);
dx->set_output(true).set_dim({n0, n1, n2}).set_stride({n1 * n2, n2, 1});
// build graph
HIPDNN_FE_CHECK(graph->build(handle));
return std::make_tuple(graph, dy, cos, sin, dx);
};
auto backend = hipdnn_frontend::detail::hipdnnBackend();
if(!backend)
{
std::cout << "Create backend failed.\n";
return 1;
}
hipdnnHandle_t handle;
HIPDNN_CHECK(backend->create(&handle));
auto [graph, dy, cos, sin, dx] = buildRopeBackwardGraph(handle);
hipdnn_data_sdk::utilities::Tensor<InputType> dyTensor(dy->get_dim(), dy->get_stride());
hipdnn_data_sdk::utilities::Tensor<InputType> cosTensor(cos->get_dim(), cos->get_stride());
hipdnn_data_sdk::utilities::Tensor<InputType> sinTensor(sin->get_dim(), sin->get_stride());
hipdnn_data_sdk::utilities::Tensor<InputType> dxTensor(dx->get_dim(), dx->get_stride());
std::unordered_map<int64_t, void*> variantPack;
variantPack[dy->get_uid()] = dyTensor.memory().deviceData();
variantPack[cos->get_uid()] = cosTensor.memory().deviceData();
variantPack[sin->get_uid()] = sinTensor.memory().deviceData();
variantPack[dx->get_uid()] = dxTensor.memory().deviceData();
int64_t workspaceSize = 0;
HIPDNN_FE_CHECK(graph->get_workspace_size(workspaceSize));
const hipdnn_data_sdk::utilities::Workspace workspace(static_cast<size_t>(workspaceSize));
HIPDNN_FE_CHECK(graph->execute(handle, variantPack, workspace.get()));
std::cout << "RopeBackward graph execution complete.\n";
HIPDNN_CHECK(backend->destroy(handle));
return 0;
}
// Copyright © Advanced Micro Devices, Inc., or its affiliates.
// SPDX-License-Identifier: MIT
#include <iostream>
#include "utils.hpp"
#include <hipdnn_data_sdk/utilities/Tensor.hpp>
#include <hipdnn_data_sdk/utilities/Workspace.hpp>
#include <hipdnn_frontend.hpp>
int main()
{
using InputType = float;
const int64_t n0 = 2;
const int64_t n1 = 4;
const int64_t n2 = 8;
auto buildRopeForwardGraph = [=](hipdnnHandle_t handle) {
auto graph = std::make_shared<hipdnn_frontend::graph::Graph>();
graph->set_name("rope_forward_graph")
.set_io_data_type(hipdnn_frontend::getDataTypeEnumFromType<InputType>())
.set_intermediate_data_type(hipdnn_frontend::getDataTypeEnumFromType<InputType>())
.set_compute_data_type(hipdnn_frontend::DataType::FLOAT);
auto x = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
hipdnn_frontend::graph::Tensor_attributes()
.set_name("x")
.set_dim({n0, n1, n2})
.set_stride({n1 * n2, n2, 1}));
auto cos = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
hipdnn_frontend::graph::Tensor_attributes()
.set_name("cos")
.set_dim({n0, n1, n2})
.set_stride({n1 * n2, n2, 1}));
auto sin = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
hipdnn_frontend::graph::Tensor_attributes()
.set_name("sin")
.set_dim({n0, n1, n2})
.set_stride({n1 * n2, n2, 1}));
auto ropeForwardAttributes
= hipdnn_frontend::graph::RopeForwardAttributes().set_name("rope_forward");
auto y = graph->rope_forward(x, cos, sin, ropeForwardAttributes);
y->set_output(true).set_dim({n0, n1, n2}).set_stride({n1 * n2, n2, 1});
// build graph
HIPDNN_FE_CHECK(graph->build(handle));
return std::make_tuple(graph, x, cos, sin, y);
};
auto backend = hipdnn_frontend::detail::hipdnnBackend();
if(!backend)
{
std::cout << "Create backend failed.\n";
return 1;
}
hipdnnHandle_t handle;
HIPDNN_CHECK(backend->create(&handle));
auto [graph, x, cos, sin, y] = buildRopeForwardGraph(handle);
hipdnn_data_sdk::utilities::Tensor<InputType> xTensor(x->get_dim(), x->get_stride());
hipdnn_data_sdk::utilities::Tensor<InputType> cosTensor(cos->get_dim(), cos->get_stride());
hipdnn_data_sdk::utilities::Tensor<InputType> sinTensor(sin->get_dim(), sin->get_stride());
hipdnn_data_sdk::utilities::Tensor<InputType> yTensor(y->get_dim(), y->get_stride());
std::unordered_map<int64_t, void*> variantPack;
variantPack[x->get_uid()] = xTensor.memory().deviceData();
variantPack[cos->get_uid()] = cosTensor.memory().deviceData();
variantPack[sin->get_uid()] = sinTensor.memory().deviceData();
variantPack[y->get_uid()] = yTensor.memory().deviceData();
int64_t workspaceSize = 0;
HIPDNN_FE_CHECK(graph->get_workspace_size(workspaceSize));
const hipdnn_data_sdk::utilities::Workspace workspace(static_cast<size_t>(workspaceSize));
HIPDNN_FE_CHECK(graph->execute(handle, variantPack, workspace.get()));
std::cout << "RopeForward graph execution complete.\n";
HIPDNN_CHECK(backend->destroy(handle));
return 0;
}
#include <iostream>
#include "utils.hpp"
#include <hipdnn_data_sdk/utilities/Tensor.hpp>
#include <hipdnn_data_sdk/utilities/Workspace.hpp>
#include <hipdnn_frontend.hpp>
int main()
{
using InputType = hipdnn_data_sdk::types::half;
const int64_t b = 2; // batch size
const int64_t headDimQ = 4; // head dim
const int64_t headDimK = 4; // head dim
const int64_t headDimV = 4; // head dim
const int64_t seqLenQ = 64; // q tensor is padded to this seq length
const int64_t seqLenKV = 64; // k and v tensor is padded to this seq length
const int64_t dQK = 32; // hidden dim
const int64_t dV = 32; // hidden dim
const float attnScale = 1.0f;
const bool generateStats = false;
const bool causalMask = false;
const bool paddingMask = false;
const bool alibiMask = false;
const bool hasAttnBias = false;
auto buildSdpaInferenceGraph = [=](hipdnnHandle_t handle) {
auto graph = std::make_shared<hipdnn_frontend::graph::Graph>();
graph->set_name("sdpa_inference_graph")
.set_io_data_type(hipdnn_frontend::getDataTypeEnumFromType<InputType>())
.set_intermediate_data_type(hipdnn_frontend::DataType::FLOAT)
.set_compute_data_type(hipdnn_frontend::DataType::FLOAT);
auto q = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
hipdnn_frontend::graph::Tensor_attributes()
.set_name("q")
.set_dim({b, headDimQ, seqLenQ, dQK})
.set_stride({headDimQ * seqLenQ * dQK, seqLenQ * dQK, dQK, 1}));
auto k = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
hipdnn_frontend::graph::Tensor_attributes()
.set_name("k")
.set_dim({b, headDimK, seqLenKV, dQK})
.set_stride({headDimK * seqLenKV * dQK, seqLenKV * dQK, dQK, 1}));
auto v = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
hipdnn_frontend::graph::Tensor_attributes()
.set_name("v")
.set_dim({b, headDimV, seqLenKV, dV})
.set_stride({headDimV * seqLenKV * dV, seqLenKV * dV, dV, 1}));
auto bias = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
hipdnn_frontend::graph::Tensor_attributes()
.set_name("bias")
.set_dim({b, 1, seqLenQ, seqLenKV})
.set_stride({seqLenQ * seqLenKV, seqLenQ * seqLenKV, seqLenKV, 1}));
auto seqLengthQ = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
hipdnn_frontend::graph::Tensor_attributes()
.set_name("seq_length_q")
.set_data_type(hipdnn_frontend::DataType::INT32)
.set_dim({b, 1, 1, 1})
.set_stride({1, 1, 1, 1}));
auto seqLengthKV = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
hipdnn_frontend::graph::Tensor_attributes()
.set_name("seq_length_kv")
.set_data_type(hipdnn_frontend::DataType::INT32)
.set_dim({b, 1, 1, 1})
.set_stride({1, 1, 1, 1}));
auto sdpaAttributes = hipdnn_frontend::graph::SdpaAttributes()
.set_name("sdpa_inference_node")
.set_generate_stats(generateStats)
.set_alibi_mask(alibiMask)
.set_attn_scale_value(attnScale);
if(causalMask)
{
sdpaAttributes.set_diagonal_alignment(hipdnn_frontend::DiagonalAlignment_t::TOP_LEFT)
.set_diagonal_band_right_bound(0);
}
if(hasAttnBias)
{
sdpaAttributes.set_bias(bias);
}
if(paddingMask)
{
sdpaAttributes.set_padding_mask(paddingMask)
.set_seq_len_q(seqLengthQ)
.set_seq_len_kv(seqLengthKV);
}
auto [outO, outStats] = graph->sdpa(q, k, v, sdpaAttributes);
outO->set_output(true);
if(generateStats)
{
outStats->set_output(true).set_data_type(hipdnn_frontend::DataType_t::FLOAT);
}
// build graph
HIPDNN_FE_CHECK(graph->build(handle));
return std::make_tuple(graph, q, k, v, bias, outStats, seqLengthQ, seqLengthKV, outO);
};
auto backend = hipdnn_frontend::detail::hipdnnBackend();
if(!backend)
{
std::cout << "Creat backend failed. \n";
return 1;
}
hipdnnHandle_t handle;
HIPDNN_CHECK(backend->create(&handle));
auto [graph, q, k, v, bias, outStats, seqLengthQ, seqLengthKV, outO]
= buildSdpaInferenceGraph(handle);
hipdnn_data_sdk::utilities::Tensor<InputType> qTensor(q->get_dim(), q->get_stride());
hipdnn_data_sdk::utilities::Tensor<InputType> kTensor(k->get_dim(), k->get_stride());
hipdnn_data_sdk::utilities::Tensor<InputType> vTensor(v->get_dim(), v->get_stride());
hipdnn_data_sdk::utilities::Tensor<InputType> oTensor(outO->get_dim(), outO->get_stride());
hipdnn_data_sdk::utilities::Tensor<InputType> biasTensor(bias->get_dim());
hipdnn_data_sdk::utilities::Tensor<float> outStatsTensor(
generateStats ? outStats->get_dim() : std::vector<int64_t>{});
hipdnn_data_sdk::utilities::Tensor<int32_t> seqLengthQTensor(seqLengthQ->get_dim());
hipdnn_data_sdk::utilities::Tensor<int32_t> seqLengthKVTensor(seqLengthKV->get_dim());
std::unordered_map<int64_t, void*> variantPack;
variantPack[q->get_uid()] = qTensor.memory().deviceData();
variantPack[k->get_uid()] = kTensor.memory().deviceData();
variantPack[v->get_uid()] = vTensor.memory().deviceData();
variantPack[outO->get_uid()] = oTensor.memory().deviceData();
if(hasAttnBias)
{
variantPack[bias->get_uid()] = biasTensor.memory().deviceData();
}
if(generateStats)
{
variantPack[outStats->get_uid()] = outStatsTensor.memory().deviceData();
}
if(paddingMask)
{
variantPack[seqLengthQ->get_uid()] = seqLengthQTensor.memory().deviceData();
variantPack[seqLengthKV->get_uid()] = seqLengthKVTensor.memory().deviceData();
}
int64_t workspaceSize = 0;
HIPDNN_FE_CHECK(graph->get_workspace_size(workspaceSize));
const hipdnn_data_sdk::utilities::Workspace workspace(static_cast<size_t>(workspaceSize));
HIPDNN_FE_CHECK(graph->execute(handle, variantPack, workspace.get()));
std::cout << "Sdpa_inference graph execution complete. \n";
HIPDNN_CHECK(backend->destroy(handle));
return 0;
}
#include <iostream>
#include "utils.hpp"
#include <hipdnn_data_sdk/utilities/Tensor.hpp>
#include <hipdnn_data_sdk/utilities/Workspace.hpp>
#include <hipdnn_frontend.hpp>
int main()
{
using InputType = float;
const int64_t n = 2; // BATCH SIZE
const int64_t c = 16; // CHANNELS (FEATURES)
const int64_t h = 512; // HEIGHT (SPATIAL DIMENSION)
const int64_t w = 32; // WIDTH (SPATIAL DIMENSION)
auto buildSliceGraph = [=](hipdnnHandle_t handle) {
auto graph = std::make_shared<hipdnn_frontend::graph::Graph>();
graph->set_name("slice_graph")
.set_io_data_type(hipdnn_frontend::getDataTypeEnumFromType<InputType>())
.set_intermediate_data_type(hipdnn_frontend::getDataTypeEnumFromType<InputType>())
.set_compute_data_type(hipdnn_frontend::DataType::FLOAT);
auto input = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
hipdnn_frontend::graph::Tensor_attributes()
.set_name("input")
.set_dim({n, c, h, w})
.set_stride({c * h * w, h * w, w, 1}));
auto sliceAttributes = hipdnn_frontend::graph::SliceAttributes()
.set_name("slice_node")
.set_slices({{0, 1}, {0, 1}, {0, 1}, {0, 1}});
auto output = graph->slice(input, sliceAttributes);
output->set_output(true);
// build graph
HIPDNN_FE_CHECK(graph->build(handle));
return std::make_tuple(graph, input, output);
};
auto backend = hipdnn_frontend::detail::hipdnnBackend();
if(!backend)
{
std::cout << "Creat backend failed. \n";
return 1;
}
hipdnnHandle_t handle;
HIPDNN_CHECK(backend->create(&handle));
auto [graph, input, output] = buildSliceGraph(handle);
hipdnn_data_sdk::utilities::Tensor<InputType> inputTensor(input->get_dim(),
input->get_stride());
hipdnn_data_sdk::utilities::Tensor<InputType> outputTensor(output->get_dim(),
output->get_stride());
std::unordered_map<int64_t, void*> variantPack;
variantPack[input->get_uid()] = inputTensor.memory().deviceData();
variantPack[output->get_uid()] = outputTensor.memory().deviceData();
int64_t workspaceSize = 0;
HIPDNN_FE_CHECK(graph->get_workspace_size(workspaceSize));
const hipdnn_data_sdk::utilities::Workspace workspace(static_cast<size_t>(workspaceSize));
HIPDNN_FE_CHECK(graph->execute(handle, variantPack, workspace.get()));
std::cout << "Slice graph execution complete. \n";
HIPDNN_CHECK(backend->destroy(handle));
return 0;
}
// Copyright © Advanced Micro Devices, Inc., or its affiliates.
// SPDX-License-Identifier: MIT
#include <hipdnn_frontend/Types.hpp>
#include <iostream>
#include "utils.hpp"
#include <hipdnn_data_sdk/utilities/Tensor.hpp>
#include <hipdnn_data_sdk/utilities/Workspace.hpp>
#include <hipdnn_frontend.hpp>
int main()
{
using InputType = hipdnn_data_sdk::types::half;
const int64_t n = 16; // Batch size
const int64_t c = 10; // Number of classes
auto buildSoftMarginLossBackwardGraph = [=](hipdnnHandle_t handle) {
auto graph = std::make_shared<hipdnn_frontend::graph::Graph>();
graph->set_name("soft_margin_loss_backward_graph")
.set_io_data_type(hipdnn_frontend::getDataTypeEnumFromType<InputType>())
.set_intermediate_data_type(hipdnn_frontend::getDataTypeEnumFromType<InputType>())
.set_compute_data_type(hipdnn_frontend::DataType::FLOAT);
auto input = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
hipdnn_frontend::graph::Tensor_attributes()
.set_name("input")
.set_dim({n, c})
.set_stride({c, 1}));
auto target = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
hipdnn_frontend::graph::Tensor_attributes()
.set_name("target")
.set_dim({n, c})
.set_stride({c, 1}));
auto doutput = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
hipdnn_frontend::graph::Tensor_attributes()
.set_name("doutput")
.set_dim({n, c})
.set_stride({c, 1}));
auto softMarginLossBackwardAttributes
= hipdnn_frontend::graph::SoftMarginLossBackwardAttributes()
.set_name("soft_margin_loss_backward")
.set_reduction(hipdnn_frontend::ReductionMode::NONE);
auto dinput = graph->soft_margin_loss_backward(
input, target, doutput, softMarginLossBackwardAttributes);
dinput->set_output(true);
// build graph
HIPDNN_FE_CHECK(graph->build(handle));
return std::make_tuple(graph, input, target, doutput, dinput);
};
auto backend = hipdnn_frontend::detail::hipdnnBackend();
if(!backend)
{
std::cout << "Create backend failed.\n";
return 1;
}
hipdnnHandle_t handle;
HIPDNN_CHECK(backend->create(&handle));
auto [graph, input, target, doutput, dinput] = buildSoftMarginLossBackwardGraph(handle);
hipdnn_data_sdk::utilities::Tensor<InputType> inputTensor(input->get_dim(),
input->get_stride());
hipdnn_data_sdk::utilities::Tensor<InputType> targetTensor(target->get_dim(),
target->get_stride());
hipdnn_data_sdk::utilities::Tensor<InputType> doutputTensor(doutput->get_dim(),
doutput->get_stride());
hipdnn_data_sdk::utilities::Tensor<InputType> dinputTensor(dinput->get_dim(),
dinput->get_stride());
std::unordered_map<int64_t, void*> variantPack;
variantPack[input->get_uid()] = inputTensor.memory().deviceData();
variantPack[target->get_uid()] = targetTensor.memory().deviceData();
variantPack[doutput->get_uid()] = doutputTensor.memory().deviceData();
variantPack[dinput->get_uid()] = dinputTensor.memory().deviceData();
int64_t workspaceSize = 0;
HIPDNN_FE_CHECK(graph->get_workspace_size(workspaceSize));
const hipdnn_data_sdk::utilities::Workspace workspace(static_cast<size_t>(workspaceSize));
HIPDNN_FE_CHECK(graph->execute(handle, variantPack, workspace.get()));
std::cout << "SoftMarginLossBackward graph execution complete.\n";
HIPDNN_CHECK(backend->destroy(handle));
return 0;
}
// Copyright © Advanced Micro Devices, Inc., or its affiliates.
// SPDX-License-Identifier: MIT
#include <hipdnn_frontend/Types.hpp>
#include <iostream>
#include "utils.hpp"
#include <hipdnn_data_sdk/utilities/Tensor.hpp>
#include <hipdnn_data_sdk/utilities/Workspace.hpp>
#include <hipdnn_frontend.hpp>
int main()
{
using InputType = hipdnn_data_sdk::types::half;
const int64_t n = 16; // Batch size
const int64_t c = 10; // Number of classes
const int64_t h = 16;
const int64_t w = 32;
auto buildSoftMarginLossGraph = [=](hipdnnHandle_t handle) {
auto graph = std::make_shared<hipdnn_frontend::graph::Graph>();
graph->set_name("soft_margin_loss_graph")
.set_io_data_type(hipdnn_frontend::getDataTypeEnumFromType<InputType>())
.set_intermediate_data_type(hipdnn_frontend::getDataTypeEnumFromType<InputType>())
.set_compute_data_type(hipdnn_frontend::DataType::FLOAT);
auto input = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
hipdnn_frontend::graph::Tensor_attributes()
.set_name("input")
.set_dim({n, c, h, w})
.set_stride({c * h * w, h * w, w, 1}));
auto target = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
hipdnn_frontend::graph::Tensor_attributes()
.set_name("target")
.set_dim({n, c, h, w})
.set_stride({c * h * w, h * w, w, 1}));
auto softMarginLossAttributes = hipdnn_frontend::graph::SoftMarginLossAttributes()
.set_reduction(hipdnn_frontend::ReductionMode::ADD)
.set_name("soft_margin_loss");
auto output = graph->soft_margin_loss(input, target, softMarginLossAttributes);
output->set_output(true);
// build graph
HIPDNN_FE_CHECK(graph->build(handle));
return std::make_tuple(graph, input, target, output);
};
auto backend = hipdnn_frontend::detail::hipdnnBackend();
if(!backend)
{
std::cout << "Create backend failed.\n";
return 1;
}
hipdnnHandle_t handle;
HIPDNN_CHECK(backend->create(&handle));
auto [graph, input, target, output] = buildSoftMarginLossGraph(handle);
hipdnn_data_sdk::utilities::Tensor<InputType> inputTensor(input->get_dim(),
input->get_stride());
hipdnn_data_sdk::utilities::Tensor<InputType> targetTensor(target->get_dim(),
target->get_stride());
hipdnn_data_sdk::utilities::Tensor<InputType> outputTensor(output->get_dim(),
output->get_stride());
std::unordered_map<int64_t, void*> variantPack;
variantPack[input->get_uid()] = inputTensor.memory().deviceData();
variantPack[target->get_uid()] = targetTensor.memory().deviceData();
variantPack[output->get_uid()] = outputTensor.memory().deviceData();
int64_t workspaceSize = 0;
HIPDNN_FE_CHECK(graph->get_workspace_size(workspaceSize));
const hipdnn_data_sdk::utilities::Workspace workspace(static_cast<size_t>(workspaceSize));
HIPDNN_FE_CHECK(graph->execute(handle, variantPack, workspace.get()));
std::cout << "SoftMarginLoss graph execution complete.\n";
HIPDNN_CHECK(backend->destroy(handle));
return 0;
}
// Copyright © Advanced Micro Devices, Inc., or its affiliates.
// SPDX-License-Identifier: MIT
#include <iostream>
#include "utils.hpp"
#include <hipdnn_data_sdk/utilities/Tensor.hpp>
#include <hipdnn_data_sdk/utilities/Workspace.hpp>
#include <hipdnn_frontend.hpp>
int main()
{
using InputType = float;
const int64_t n = 2; // Batch size
const int64_t c = 1; // Number of channels
const int64_t h = 3; // Height
const int64_t w = 4; // Width
auto buildSoftmaxGraph = [=](hipdnnHandle_t handle) {
auto graph = std::make_shared<hipdnn_frontend::graph::Graph>();
graph->set_name("softmax_graph")
.set_io_data_type(hipdnn_frontend::getDataTypeEnumFromType<InputType>())
.set_intermediate_data_type(hipdnn_frontend::getDataTypeEnumFromType<InputType>())
.set_compute_data_type(hipdnn_frontend::DataType::FLOAT);
auto p = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
hipdnn_frontend::graph::Tensor_attributes()
.set_name("p")
.set_dim({n, c, h, w})
.set_stride({c * h * w, h * w, w, 1}));
auto softmaxAttributes
= hipdnn_frontend::graph::SoftmaxAttributes().set_name("softmax_node").set_axis(3);
auto s = graph->softmax(p, softmaxAttributes);
s->set_output(true);
HIPDNN_FE_CHECK(graph->build(handle));
return std::make_tuple(graph, p, s);
};
auto backend = hipdnn_frontend::detail::hipdnnBackend();
if(!backend)
{
std::cout << "Create backend failed.\n";
return 1;
}
hipdnnHandle_t handle;
HIPDNN_CHECK(backend->create(&handle));
auto [graph, p, s] = buildSoftmaxGraph(handle);
hipdnn_data_sdk::utilities::Tensor<InputType> pTensor(p->get_dim(), p->get_stride());
hipdnn_data_sdk::utilities::Tensor<InputType> sTensor(s->get_dim(), s->get_stride());
std::unordered_map<int64_t, void*> variantPack;
variantPack[p->get_uid()] = pTensor.memory().deviceData();
variantPack[s->get_uid()] = sTensor.memory().deviceData();
int64_t workspaceSize = 0;
HIPDNN_FE_CHECK(graph->get_workspace_size(workspaceSize));
const hipdnn_data_sdk::utilities::Workspace workspace(static_cast<size_t>(workspaceSize));
HIPDNN_FE_CHECK(graph->execute(handle, variantPack, workspace.get()));
std::cout << "Softmax graph execution complete.\n";
HIPDNN_CHECK(backend->destroy(handle));
return 0;
}
#include <iostream>
#include "utils.hpp"
#include <hipdnn_data_sdk/utilities/Tensor.hpp>
#include <hipdnn_data_sdk/utilities/Workspace.hpp>
#include <hipdnn_frontend.hpp>
int main()
{
using InputType = float;
const int64_t n = 1; // Batch size
const int64_t c = 2; // Number of channels
const int64_t h = 3; // Height
const int64_t w = 4; // Width
const auto permutation
= std::vector<int64_t>{0, 2, 3, 1}; // nhwc->nchw[0, 1, 2, 3] or nchw->nhwc[0, 2, 3, 1]
auto buildTransposeGraph = [=](hipdnnHandle_t handle) {
auto graph = std::make_shared<hipdnn_frontend::graph::Graph>();
graph->set_name("transpose_graph")
.set_io_data_type(hipdnn_frontend::getDataTypeEnumFromType<InputType>())
.set_intermediate_data_type(hipdnn_frontend::getDataTypeEnumFromType<InputType>())
.set_compute_data_type(hipdnn_frontend::DataType::FLOAT);
auto input = std::make_shared<hipdnn_frontend::graph::TensorAttributes>(
hipdnn_frontend::graph::Tensor_attributes()
.set_name("input")
.set_dim({n, c, h, w})
.set_stride({c * h * w, h * w, w, 1})); // nhwc->nchw下,修改为nhwc的stride步长
auto transposeAttributes = hipdnn_frontend::graph::TransposeAttributes()
.set_name("transpose_node")
.set_permutation(permutation);
auto output = graph->transpose(input, transposeAttributes);
output->set_output(true);
// build graph
HIPDNN_FE_CHECK(graph->build(handle));
return std::make_tuple(graph, input, output);
};
auto backend = hipdnn_frontend::detail::hipdnnBackend();
if(!backend)
{
std::cout << "Creat backend failed. \n";
return 1;
}
hipdnnHandle_t handle;
HIPDNN_CHECK(backend->create(&handle));
auto [graph, input, output] = buildTransposeGraph(handle);
hipdnn_data_sdk::utilities::Tensor<InputType> inputTensor(input->get_dim(),
input->get_stride());
hipdnn_data_sdk::utilities::Tensor<InputType> outputTensor(output->get_dim(),
output->get_stride());
std::unordered_map<int64_t, void*> variantPack;
variantPack[input->get_uid()] = inputTensor.memory().deviceData();
variantPack[output->get_uid()] = outputTensor.memory().deviceData();
int64_t workspaceSize = 0;
HIPDNN_FE_CHECK(graph->get_workspace_size(workspaceSize));
const hipdnn_data_sdk::utilities::Workspace workspace(static_cast<size_t>(workspaceSize));
HIPDNN_FE_CHECK(graph->execute(handle, variantPack, workspace.get()));
std::cout << "Transpose graph execution complete. \n";
HIPDNN_CHECK(backend->destroy(handle));
return 0;
}
// Copyright © Advanced Micro Devices, Inc., or its affiliates.
// SPDX-License-Identifier: MIT
#pragma once
#include <hip/hip_runtime.h>
#include <hipdnn_frontend.hpp>
#define HIP_CHECK(status) \
do \
{ \
if((status) != hipSuccess) \
{ \
std::cerr << "HIP Error: " << hipGetErrorString((status)) << " in file " << __FILE__ \
<< " at line " << __LINE__ << "\n"; \
exit(EXIT_FAILURE); \
} \
} while(0)
#define HIPDNN_CHECK(status) \
do \
{ \
if((status) != HIPDNN_STATUS_SUCCESS) \
{ \
std::cerr << "hipDNN Error: " << hipdnnGetErrorString((status)) << " in file " \
<< __FILE__ << " at line " << __LINE__ << "\n"; \
exit(EXIT_FAILURE); \
} \
} while(0)
#define HIPDNN_FE_CHECK(statusObj) \
do \
{ \
auto const& status = statusObj; \
if(!status.is_good()) \
{ \
std::cerr << "hipDNN Frontend Error: " << status.get_message() << " in file " \
<< __FILE__ << " at line " << __LINE__ << "\n"; \
exit(EXIT_FAILURE); \
} \
} while(0)
import hipdnn
import torch
def build_adamw_graph(
hipdnn_handle,
torch_tensor_params,
torch_tensor_grads,
torch_tensor_exp_avgs,
torch_tensor_exp_avg_sqs,
torch_tensor_max_exp_avg_sqs,
hipdnn_data_type,
):
# Create graph
graph = hipdnn.pygraph(
handle=hipdnn_handle,
io_data_type=hipdnn_data_type,
intermediate_data_type=hipdnn.data_type.FLOAT,
compute_data_type=hipdnn.data_type.FLOAT,
name="adamw",
)
# Create hipdnn tensors
hipdnn_tensor_params = graph.tensor_like(torch_tensor_params)
hipdnn_tensor_grads = graph.tensor_like(torch_tensor_grads)
hipdnn_tensor_exp_avgs = graph.tensor_like(torch_tensor_exp_avgs)
hipdnn_tensor_exp_avg_sqs = graph.tensor_like(torch_tensor_exp_avg_sqs)
hipdnn_tensor_max_exp_avg_sqs = graph.tensor_like(torch_tensor_max_exp_avg_sqs)
# Create adamw op
graph.adamw(
params=hipdnn_tensor_params,
grads=hipdnn_tensor_grads,
exp_avgs=hipdnn_tensor_exp_avgs,
exp_avg_sqs=hipdnn_tensor_exp_avg_sqs,
max_exp_avg_sqs=hipdnn_tensor_max_exp_avg_sqs,
)
graph.build(hipdnn_handle)
return (
graph,
hipdnn_tensor_params,
hipdnn_tensor_grads,
hipdnn_tensor_exp_avgs,
hipdnn_tensor_exp_avg_sqs,
hipdnn_tensor_max_exp_avg_sqs,
)
if __name__ == "__main__":
# Input dimensions
batch, channels, height, width = 1, 2, 3, 4
hipdnn_data_type = hipdnn.data_type.FLOAT
torch_data_type = torch.float32
torch_tensor_params = torch.rand(
batch, channels, height, width, dtype=torch_data_type, device="cuda"
)
torch_tensor_grads = torch.rand(
batch, channels, height, width, dtype=torch_data_type, device="cuda"
)
torch_tensor_exp_avgs = torch.rand(
batch, channels, height, width, dtype=torch_data_type, device="cuda"
)
torch_tensor_exp_avg_sqs = torch.rand(
batch, channels, height, width, dtype=torch_data_type, device="cuda"
)
torch_tensor_max_exp_avg_sqs = torch.rand(
batch, channels, height, width, dtype=torch_data_type, device="cuda"
)
hipdnn_handle = hipdnn.create_handle()
(
graph,
hipdnn_tensor_params,
hipdnn_tensor_grads,
hipdnn_tensor_exp_avgs,
hipdnn_tensor_exp_avg_sqs,
hipdnn_tensor_max_exp_avg_sqs,
) = build_adamw_graph(
hipdnn_handle,
torch_tensor_params,
torch_tensor_grads,
torch_tensor_exp_avgs,
torch_tensor_exp_avg_sqs,
torch_tensor_max_exp_avg_sqs,
hipdnn_data_type,
)
variant_pack = {
hipdnn_tensor_params: torch_tensor_params.data_ptr(),
hipdnn_tensor_grads: torch_tensor_grads.data_ptr(),
hipdnn_tensor_exp_avgs: torch_tensor_exp_avgs.data_ptr(),
hipdnn_tensor_exp_avg_sqs: torch_tensor_exp_avg_sqs.data_ptr(),
hipdnn_tensor_max_exp_avg_sqs: torch_tensor_max_exp_avg_sqs.data_ptr(),
}
workspace = torch.empty(graph.get_workspace_size(), dtype=torch.uint8, device="cuda")
graph.exec(variant_pack=variant_pack, workspace=workspace.data_ptr())
print("Adamw graph execution complete.")
import hipdnn
import torch
def build_transformer_adamw_graph(
hipdnn_handle,
torch_tensor_params,
torch_tensor_grads,
torch_tensor_exp_avgs,
torch_tensor_exp_avg_sqs,
hipdnn_data_type,
):
# Create graph
graph = hipdnn.pygraph(
handle=hipdnn_handle,
io_data_type=hipdnn_data_type,
intermediate_data_type=hipdnn.data_type.FLOAT,
compute_data_type=hipdnn.data_type.FLOAT,
name="adamw",
)
# Create hipdnn tensors
hipdnn_tensor_params = graph.tensor_like(torch_tensor_params)
hipdnn_tensor_grads = graph.tensor_like(torch_tensor_grads)
hipdnn_tensor_exp_avgs = graph.tensor_like(torch_tensor_exp_avgs)
hipdnn_tensor_exp_avg_sqs = graph.tensor_like(torch_tensor_exp_avg_sqs)
# Create adamw op
graph.adamw(
params=hipdnn_tensor_params,
grads=hipdnn_tensor_grads,
exp_avgs=hipdnn_tensor_exp_avgs,
exp_avg_sqs=hipdnn_tensor_exp_avg_sqs,
is_transformeradamw=True,
)
graph.build(hipdnn_handle)
return (
graph,
hipdnn_tensor_params,
hipdnn_tensor_grads,
hipdnn_tensor_exp_avgs,
hipdnn_tensor_exp_avg_sqs,
)
if __name__ == "__main__":
# Input dimensions
batch, channels, height, width = 1, 2, 3, 4
hipdnn_data_type = hipdnn.data_type.FLOAT
torch_data_type = torch.float32
torch_tensor_params = torch.rand(
batch, channels, height, width, dtype=torch_data_type, device="cuda"
)
torch_tensor_grads = torch.rand(
batch, channels, height, width, dtype=torch_data_type, device="cuda"
)
torch_tensor_exp_avgs = torch.rand(
batch, channels, height, width, dtype=torch_data_type, device="cuda"
)
torch_tensor_exp_avg_sqs = torch.rand(
batch, channels, height, width, dtype=torch_data_type, device="cuda"
)
hipdnn_handle = hipdnn.create_handle()
(
graph,
hipdnn_tensor_params,
hipdnn_tensor_grads,
hipdnn_tensor_exp_avgs,
hipdnn_tensor_exp_avg_sqs,
) = build_transformer_adamw_graph(
hipdnn_handle,
torch_tensor_params,
torch_tensor_grads,
torch_tensor_exp_avgs,
torch_tensor_exp_avg_sqs,
hipdnn_data_type,
)
variant_pack = {
hipdnn_tensor_params: torch_tensor_params.data_ptr(),
hipdnn_tensor_grads: torch_tensor_grads.data_ptr(),
hipdnn_tensor_exp_avgs: torch_tensor_exp_avgs.data_ptr(),
hipdnn_tensor_exp_avg_sqs: torch_tensor_exp_avg_sqs.data_ptr(),
}
workspace = torch.empty(graph.get_workspace_size(), dtype=torch.uint8, device="cuda")
graph.exec(variant_pack=variant_pack, workspace=workspace.data_ptr())
print("Transformer adamw graph execution complete.")
import hipdnn
import torch
def build_bacthnorm_inference_graph(
hipdnn_handle,
torch_tensor_x,
torch_tensor_scale,
torch_tensor_bias,
torch_tensor_mean,
torch_tensor_variance,
torch_tensor_epsilon,
hipdnn_data_type,
):
# Create graph
graph = hipdnn.pygraph(
handle=hipdnn_handle,
io_data_type=hipdnn_data_type,
intermediate_data_type=hipdnn.data_type.FLOAT,
compute_data_type=hipdnn.data_type.FLOAT,
name="bacthNorm_inference",
)
# Create hipdnn tensors
hipdnn_tensor_x = graph.tensor_like(torch_tensor_x)
hipdnn_tensor_scale = graph.tensor_like(torch_tensor_scale)
hipdnn_tensor_bias = graph.tensor_like(torch_tensor_bias)
hipdnn_tensor_mean = graph.tensor_like(torch_tensor_mean)
hipdnn_tensor_variance = graph.tensor_like(torch_tensor_variance)
hipdnn_tensor_epsilon = graph.tensor_like(torch_tensor_epsilon)
hipdnn_tensor_epsilon.set_value(1e-5)
# Create batchnorm op
hipdnn_tensor_y = graph.batchnorm_inference_ext(
input=hipdnn_tensor_x,
mean=hipdnn_tensor_mean,
variance=hipdnn_tensor_variance,
scale=hipdnn_tensor_scale,
bias=hipdnn_tensor_bias,
epsilon=hipdnn_tensor_epsilon,
)
hipdnn_tensor_y.set_output(True)
graph.build(hipdnn_handle)
return (
graph,
hipdnn_tensor_x,
hipdnn_tensor_scale,
hipdnn_tensor_bias,
hipdnn_tensor_mean,
hipdnn_tensor_variance,
hipdnn_tensor_epsilon,
hipdnn_tensor_y,
)
if __name__ == "__main__":
# Input dimensions
n = 4 # Batch size
c = 16 # Number of input channels
h = 56 # Height
w = 56 # Width
hipdnn_data_type = hipdnn.data_type.FLOAT
torch_data_type = torch.float32
torch_tensor_x = torch.rand(n, c, h, w, dtype=torch_data_type, device="cuda")
torch_tensor_scale = torch.rand(1, c, 1, 1, dtype=torch.float32, device="cuda")
torch_tensor_bias = torch.rand(1, c, 1, 1, dtype=torch.float32, device="cuda")
torch_tensor_mean = torch.rand(1, c, 1, 1, dtype=torch.float32, device="cuda")
torch_tensor_variance = torch.rand(1, c, 1, 1, dtype=torch.float32, device="cuda")
torch_tensor_epsilon = torch.full(
(1, 1, 1, 1), 1e-5, dtype=torch.float32, requires_grad=False, device="cuda"
)
hipdnn_handle = hipdnn.create_handle()
(
graph,
hipdnn_tensor_x,
hipdnn_tensor_scale,
hipdnn_tensor_bias,
hipdnn_tensor_mean,
hipdnn_tensor_variance,
hipdnn_tensor_epsilon,
hipdnn_tensor_y,
) = build_bacthnorm_inference_graph(
hipdnn_handle,
torch_tensor_x,
torch_tensor_scale,
torch_tensor_bias,
torch_tensor_mean,
torch_tensor_variance,
torch_tensor_epsilon,
hipdnn_data_type,
)
torch_tensor_y = torch.empty(hipdnn_tensor_y.get_dim(), dtype=torch_data_type, device="cuda")
variant_pack = {
hipdnn_tensor_x: torch_tensor_x.data_ptr(),
hipdnn_tensor_mean: torch_tensor_mean.data_ptr(),
hipdnn_tensor_variance: torch_tensor_variance.data_ptr(),
hipdnn_tensor_scale: torch_tensor_scale.data_ptr(),
hipdnn_tensor_bias: torch_tensor_bias.data_ptr(),
hipdnn_tensor_epsilon: torch_tensor_epsilon.data_ptr(),
hipdnn_tensor_y: torch_tensor_y.data_ptr(),
}
workspace = torch.empty(graph.get_workspace_size(), dtype=torch.uint8, device="cuda")
graph.exec(variant_pack=variant_pack, workspace=workspace.data_ptr())
print("BatchNorm inference graph execution complete.")
import hipdnn
import torch
def build_batchnorm_training_graph(
hipdnn_handle,
torch_tensor_x,
torch_tensor_scale,
torch_tensor_bias,
torch_tensor_prev_running_mean,
torch_tensor_prev_running_var,
torch_tensor_momentum,
torch_tensor_epsilon,
hipdnn_data_type,
):
# Create graph
graph = hipdnn.pygraph(
handle=hipdnn_handle,
io_data_type=hipdnn_data_type,
intermediate_data_type=hipdnn.data_type.FLOAT,
compute_data_type=hipdnn.data_type.FLOAT,
name="batchnorm_training",
)
# Create hipdnn tensors
hipdnn_tensor_x = graph.tensor_like(torch_tensor_x)
hipdnn_tensor_scale = graph.tensor_like(torch_tensor_scale)
hipdnn_tensor_bias = graph.tensor_like(torch_tensor_bias)
hipdnn_tensor_prev_running_mean = graph.tensor_like(torch_tensor_prev_running_mean)
hipdnn_tensor_prev_running_var = graph.tensor_like(torch_tensor_prev_running_var)
hipdnn_tensor_momentum = graph.tensor_like(torch_tensor_momentum)
hipdnn_tensor_momentum.set_value(0.1)
hipdnn_tensor_epsilon = graph.tensor_like(torch_tensor_epsilon)
hipdnn_tensor_epsilon.set_value(1e-5)
# Create batchnorm op
(
hipdnn_tensor_y,
hipdnn_tensor_saved_mean,
hipdnn_tensor_saved_inv_variance,
hipdnn_tensor_next_running_mean,
hipdnn_tensor_next_running_var,
) = graph.batchnorm(
input=hipdnn_tensor_x,
scale=hipdnn_tensor_scale,
bias=hipdnn_tensor_bias,
in_running_mean=hipdnn_tensor_prev_running_mean,
in_running_var=hipdnn_tensor_prev_running_var,
epsilon=hipdnn_tensor_epsilon,
momentum=hipdnn_tensor_momentum,
)
hipdnn_tensor_y.set_output(True)
hipdnn_tensor_saved_mean.set_output(True)
hipdnn_tensor_saved_inv_variance.set_output(True)
hipdnn_tensor_next_running_mean.set_output(True)
hipdnn_tensor_next_running_var.set_output(True)
graph.build(hipdnn_handle)
return (
graph,
hipdnn_tensor_x,
hipdnn_tensor_scale,
hipdnn_tensor_bias,
hipdnn_tensor_prev_running_mean,
hipdnn_tensor_prev_running_var,
hipdnn_tensor_epsilon,
hipdnn_tensor_momentum,
hipdnn_tensor_y,
hipdnn_tensor_saved_mean,
hipdnn_tensor_saved_inv_variance,
hipdnn_tensor_next_running_mean,
hipdnn_tensor_next_running_var,
)
if __name__ == "__main__":
# Input dimensions
n = 4 # Batch size
c = 16 # Number of input channels
h = 56 # Height
w = 56 # Width
hipdnn_data_type = hipdnn.data_type.FLOAT
torch_data_type = torch.float32
torch_tensor_x = torch.rand(n, c, h, w, dtype=torch_data_type, device="cuda")
torch_tensor_scale = torch.rand(1, c, 1, 1, dtype=torch.float32, device="cuda")
torch_tensor_bias = torch.rand(1, c, 1, 1, dtype=torch.float32, device="cuda")
torch_tensor_prev_running_mean = torch.rand(1, c, 1, 1, dtype=torch.float32, device="cuda")
torch_tensor_prev_running_var = torch.rand(1, c, 1, 1, dtype=torch.float32, device="cuda")
torch_tensor_epsilon = torch.full(
(1, 1, 1, 1), 1e-5, dtype=torch.float32, requires_grad=False, device="cuda"
)
torch_tensor_momentum = torch.full(
(1, 1, 1, 1), 1e-5, dtype=torch.float32, requires_grad=False, device="cuda"
)
hipdnn_handle = hipdnn.create_handle()
(
graph,
hipdnn_tensor_x,
hipdnn_tensor_scale,
hipdnn_tensor_bias,
hipdnn_tensor_prev_running_mean,
hipdnn_tensor_prev_running_var,
hipdnn_tensor_epsilon,
hipdnn_tensor_momentum,
hipdnn_tensor_y,
hipdnn_tensor_saved_mean,
hipdnn_tensor_saved_inv_variance,
hipdnn_tensor_next_running_mean,
hipdnn_tensor_next_running_var,
) = build_batchnorm_training_graph(
hipdnn_handle,
torch_tensor_x,
torch_tensor_scale,
torch_tensor_bias,
torch_tensor_prev_running_mean,
torch_tensor_prev_running_var,
torch_tensor_momentum,
torch_tensor_epsilon,
hipdnn_data_type,
)
torch_tensor_y = torch.empty(hipdnn_tensor_y.get_dim(), dtype=torch_data_type, device="cuda")
torch_tensor_saved_mean = torch.empty(
hipdnn_tensor_saved_mean.get_dim(), dtype=torch.float32, device="cuda"
)
torch_tensor_saved_inv_variance = torch.empty(
hipdnn_tensor_saved_inv_variance.get_dim(), dtype=torch.float32, device="cuda"
)
variant_pack = {
hipdnn_tensor_x: torch_tensor_x.data_ptr(),
hipdnn_tensor_prev_running_mean: torch_tensor_prev_running_mean.data_ptr(),
hipdnn_tensor_prev_running_var: torch_tensor_prev_running_var.data_ptr(),
hipdnn_tensor_scale: torch_tensor_scale.data_ptr(),
hipdnn_tensor_bias: torch_tensor_bias.data_ptr(),
hipdnn_tensor_epsilon: torch_tensor_epsilon.data_ptr(),
hipdnn_tensor_momentum: torch_tensor_momentum.data_ptr(),
hipdnn_tensor_y: torch_tensor_y.data_ptr(),
hipdnn_tensor_next_running_mean: torch_tensor_prev_running_mean.data_ptr(),
hipdnn_tensor_next_running_var: torch_tensor_prev_running_var.data_ptr(),
hipdnn_tensor_saved_mean: torch_tensor_saved_mean.data_ptr(),
hipdnn_tensor_saved_inv_variance: torch_tensor_saved_inv_variance.data_ptr(),
}
workspace = torch.empty(graph.get_workspace_size(), dtype=torch.uint8, device="cuda")
graph.exec(variant_pack=variant_pack, workspace=workspace.data_ptr())
print("BatchNorm training graph execution complete.")
import hipdnn
import torch
def build_bn_finalize_graph(
hipdnn_handle,
torch_tensor_sum,
torch_tensor_sq_sum,
torch_tensor_scale,
torch_tensor_bias,
torch_tensor_prev_running_mean,
torch_tensor_prev_running_variance,
torch_tensor_momentum,
torch_tensor_epsilon,
torch_tensor_accum_count,
hipdnn_data_type,
):
graph = hipdnn.pygraph(
handle=hipdnn_handle,
io_data_type=hipdnn_data_type,
intermediate_data_type=hipdnn.data_type.FLOAT,
compute_data_type=hipdnn.data_type.FLOAT,
name="bn_finalize",
)
hipdnn_tensor_sum = graph.tensor_like(torch_tensor_sum)
hipdnn_tensor_sq_sum = graph.tensor_like(torch_tensor_sq_sum)
hipdnn_tensor_scale = graph.tensor_like(torch_tensor_scale)
hipdnn_tensor_bias = graph.tensor_like(torch_tensor_bias)
hipdnn_tensor_prev_running_mean = graph.tensor_like(torch_tensor_prev_running_mean)
hipdnn_tensor_prev_running_variance = graph.tensor_like(torch_tensor_prev_running_variance)
hipdnn_tensor_momentum = graph.tensor_like(torch_tensor_momentum)
hipdnn_tensor_momentum.set_value(0.001)
hipdnn_tensor_epsilon = graph.tensor_like(torch_tensor_epsilon)
hipdnn_tensor_epsilon.set_value(1e-5)
hipdnn_tensor_accum_count = graph.tensor_like(torch_tensor_accum_count)
hipdnn_tensor_accum_count.set_value(torch_tensor_accum_count.item())
(
hipdnn_tensor_eq_scale,
hipdnn_tensor_eq_bias,
hipdnn_tensor_mean,
hipdnn_tensor_inv_variance,
hipdnn_tensor_next_running_mean,
hipdnn_tensor_next_running_variance,
) = graph.bn_finalize(
sum=hipdnn_tensor_sum,
sq_sum=hipdnn_tensor_sq_sum,
scale=hipdnn_tensor_scale,
bias=hipdnn_tensor_bias,
epsilon=hipdnn_tensor_epsilon,
accum_count=hipdnn_tensor_accum_count,
prev_running_mean=hipdnn_tensor_prev_running_mean,
prev_running_variance=hipdnn_tensor_prev_running_variance,
momentum=hipdnn_tensor_momentum,
name="bn_finalize_node",
)
hipdnn_tensor_eq_scale.set_output(True)
hipdnn_tensor_eq_bias.set_output(True)
hipdnn_tensor_mean.set_output(True)
hipdnn_tensor_inv_variance.set_output(True)
hipdnn_tensor_next_running_mean.set_output(True)
hipdnn_tensor_next_running_variance.set_output(True)
graph.build(hipdnn_handle)
return (
graph,
hipdnn_tensor_sum,
hipdnn_tensor_sq_sum,
hipdnn_tensor_scale,
hipdnn_tensor_bias,
hipdnn_tensor_prev_running_mean,
hipdnn_tensor_prev_running_variance,
hipdnn_tensor_momentum,
hipdnn_tensor_epsilon,
hipdnn_tensor_accum_count,
hipdnn_tensor_eq_scale,
hipdnn_tensor_eq_bias,
hipdnn_tensor_mean,
hipdnn_tensor_inv_variance,
hipdnn_tensor_next_running_mean,
hipdnn_tensor_next_running_variance,
)
if __name__ == "__main__":
n = 1
c = 32
h = 1
w = 1
hipdnn_data_type = hipdnn.data_type.FLOAT
torch_data_type = torch.float32
torch_tensor_sum = torch.rand(n, c, h, w, dtype=torch_data_type, device="cuda")
torch_tensor_sq_sum = torch.rand(n, c, h, w, dtype=torch_data_type, device="cuda")
torch_tensor_scale = torch.rand(n, c, h, w, dtype=torch_data_type, device="cuda")
torch_tensor_bias = torch.rand(n, c, h, w, dtype=torch_data_type, device="cuda")
torch_tensor_prev_running_mean = torch.rand(n, c, h, w, dtype=torch_data_type, device="cuda")
torch_tensor_prev_running_variance = torch.rand(
n, c, h, w, dtype=torch_data_type, device="cuda"
)
torch_tensor_momentum = torch.full(
(1, 1, 1, 1), 0.001, dtype=torch.float32, requires_grad=False, device="cuda"
)
torch_tensor_epsilon = torch.full(
(1, 1, 1, 1), 1e-5, dtype=torch.float32, requires_grad=False, device="cuda"
)
torch_tensor_accum_count = torch.full(
(1, 1, 1, 1), n * h * w, dtype=torch.int32, requires_grad=False, device="cuda"
)
hipdnn_handle = hipdnn.create_handle()
(
graph,
hipdnn_tensor_sum,
hipdnn_tensor_sq_sum,
hipdnn_tensor_scale,
hipdnn_tensor_bias,
hipdnn_tensor_prev_running_mean,
hipdnn_tensor_prev_running_variance,
hipdnn_tensor_momentum,
hipdnn_tensor_epsilon,
hipdnn_tensor_accum_count,
hipdnn_tensor_eq_scale,
hipdnn_tensor_eq_bias,
hipdnn_tensor_mean,
hipdnn_tensor_inv_variance,
hipdnn_tensor_next_running_mean,
hipdnn_tensor_next_running_variance,
) = build_bn_finalize_graph(
hipdnn_handle,
torch_tensor_sum,
torch_tensor_sq_sum,
torch_tensor_scale,
torch_tensor_bias,
torch_tensor_prev_running_mean,
torch_tensor_prev_running_variance,
torch_tensor_momentum,
torch_tensor_epsilon,
torch_tensor_accum_count,
hipdnn_data_type,
)
torch_tensor_eq_scale = torch.empty(
hipdnn_tensor_eq_scale.get_dim(), dtype=torch_data_type, device="cuda"
)
torch_tensor_eq_bias = torch.empty(
hipdnn_tensor_eq_bias.get_dim(), dtype=torch_data_type, device="cuda"
)
torch_tensor_mean = torch.empty(
hipdnn_tensor_mean.get_dim(), dtype=torch_data_type, device="cuda"
)
torch_tensor_inv_variance = torch.empty(
hipdnn_tensor_inv_variance.get_dim(), dtype=torch_data_type, device="cuda"
)
torch_tensor_next_running_mean = torch.empty(
hipdnn_tensor_next_running_mean.get_dim(), dtype=torch_data_type, device="cuda"
)
torch_tensor_next_running_variance = torch.empty(
hipdnn_tensor_next_running_variance.get_dim(), dtype=torch_data_type, device="cuda"
)
variant_pack = {
hipdnn_tensor_sum: torch_tensor_sum.data_ptr(),
hipdnn_tensor_sq_sum: torch_tensor_sq_sum.data_ptr(),
hipdnn_tensor_scale: torch_tensor_scale.data_ptr(),
hipdnn_tensor_bias: torch_tensor_bias.data_ptr(),
hipdnn_tensor_prev_running_mean: torch_tensor_prev_running_mean.data_ptr(),
hipdnn_tensor_prev_running_variance: torch_tensor_prev_running_variance.data_ptr(),
hipdnn_tensor_momentum: torch_tensor_momentum.data_ptr(),
hipdnn_tensor_epsilon: torch_tensor_epsilon.data_ptr(),
hipdnn_tensor_accum_count: torch_tensor_accum_count.data_ptr(),
hipdnn_tensor_eq_scale: torch_tensor_eq_scale.data_ptr(),
hipdnn_tensor_eq_bias: torch_tensor_eq_bias.data_ptr(),
hipdnn_tensor_mean: torch_tensor_mean.data_ptr(),
hipdnn_tensor_inv_variance: torch_tensor_inv_variance.data_ptr(),
hipdnn_tensor_next_running_mean: torch_tensor_next_running_mean.data_ptr(),
hipdnn_tensor_next_running_variance: torch_tensor_next_running_variance.data_ptr(),
}
workspace = torch.empty(graph.get_workspace_size(), dtype=torch.uint8, device="cuda")
graph.exec(variant_pack=variant_pack, workspace=workspace.data_ptr())
print("Batch normalization finalize graph execution complete.")
import hipdnn
import torch
def build_block_scale_dequantize_graph(
hipdnn_handle, torch_tensor_x, torch_tensor_scale, block_size, hipdnn_data_type
):
# Create graph
graph = hipdnn.pygraph(
handle=hipdnn_handle,
io_data_type=hipdnn_data_type,
intermediate_data_type=hipdnn.data_type.FLOAT,
compute_data_type=hipdnn.data_type.FLOAT,
name="block_scale_dequantize",
)
# Create hipdnn tensors
hipdnn_tensor_x = graph.tensor_like(torch_tensor_x)
hipdnn_tensor_scale = graph.tensor_like(torch_tensor_scale)
# Create block scale op
hipdnn_tensor_y = graph.block_scale_dequantize(
input=hipdnn_tensor_x,
descale=hipdnn_tensor_scale,
block_size=[1, block_size],
name="block_scale_dequantize",
)
hipdnn_tensor_y.set_output(True)
graph.build(hipdnn_handle)
return (graph, hipdnn_tensor_x, hipdnn_tensor_scale, hipdnn_tensor_y)
if __name__ == "__main__":
batch, channels, height, width, block_size = 1, 32, 32, 32, 32
hipdnn_data_type = hipdnn.data_type.FLOAT
torch_data_type = torch.float32
torch_tensor_x = torch.rand(
batch, channels, height, width, dtype=torch_data_type, device="cuda"
)
torch_tensor_scale = torch.rand(
batch, channels, height, width // block_size, dtype=torch_data_type, device="cuda"
)
hipdnn_handle = hipdnn.create_handle()
# graph, hipdnn_tensor_x, hipdnn_tensor_scale, hipdnn_tensor_y = build_block_scale_dequantize_graph(
# hipdnn_handle, torch_tensor_x,torch_tensor_scale, block_size, hipdnn_data_type)
# torch_tensor_y = torch.empty(hipdnn_tensor_y.get_dim(), dtype=torch_data_type, device="cuda")
# variant_pack = {
# hipdnn_tensor_x: torch_tensor_x.data_ptr(),
# hipdnn_tensor_scale: torch_tensor_scale.data_ptr(),
# hipdnn_tensor_y: torch_tensor_y.data_ptr()
# }
# workspace = torch.empty(graph.get_workspace_size(), dtype=torch.uint8, device="cuda")
# graph.exec(variant_pack=variant_pack, workspace=workspace.data_ptr())
# print("Block scale dequantize graph execution complete.")
import hipdnn
import torch
def build_block_scale_quantize_graph(hipdnn_handle, torch_tensor_x, block_size, hipdnn_data_type):
# Create graph
graph = hipdnn.pygraph(
handle=hipdnn_handle,
io_data_type=hipdnn_data_type,
intermediate_data_type=hipdnn.data_type.FLOAT,
compute_data_type=hipdnn.data_type.FLOAT,
name="block_scale_quantize",
)
# Create hipdnn tensors
hipdnn_tensor_x = graph.tensor_like(torch_tensor_x)
# Create block scale op
hipdnn_tensor_y, hipdnn_tensor_scale = graph.block_scale_quantize(
input=hipdnn_tensor_x,
block_size=block_size,
axis=2,
transpose=False,
name="block_scale_quantize",
)
hipdnn_tensor_y.set_output(True)
hipdnn_tensor_scale.set_output(True)
graph.build(hipdnn_handle)
return (graph, hipdnn_tensor_x, hipdnn_tensor_y, hipdnn_tensor_scale)
if __name__ == "__main__":
batch, channels, height, width, block_size = 1, 32, 32, 32, 32
hipdnn_data_type = hipdnn.data_type.FLOAT
torch_data_type = torch.float32
torch_tensor_x = torch.rand(
batch, channels, height, width, dtype=torch_data_type, device="cuda"
)
hipdnn_handle = hipdnn.create_handle()
# graph, hipdnn_tensor_x, hipdnn_tensor_y, hipdnn_tensor_scale = build_block_scale_quantize_graph(
# hipdnn_handle, torch_tensor_x, block_size, hipdnn_data_type)
# torch_tensor_y = torch.empty(hipdnn_tensor_y.get_dim(), dtype=torch_data_type, device="cuda")
# torch_tensor_scale = torch.empty(hipdnn_tensor_scale.get_dim(), dtype=torch_data_type, device="cuda")
# variant_pack = {
# hipdnn_tensor_x: torch_tensor_x.data_ptr(),
# hipdnn_tensor_y: torch_tensor_y.data_ptr(),
# hipdnn_tensor_scale: torch_tensor_scale.data_ptr()
# }
# workspace = torch.empty(graph.get_workspace_size(), dtype=torch.uint8, device="cuda")
# graph.exec(variant_pack=variant_pack, workspace=workspace.data_ptr())
# print("Block scale quantize graph execution complete.")
import hipdnn
import torch
def build_concat_conv_graph(
hipdnn_handle,
torch_tensor_x1,
torch_tensor_x2,
torch_tensor_w,
padding,
stride,
dilation,
hipdnn_data_type,
concat_axis,
):
# Create graph
graph = hipdnn.pygraph(
handle=hipdnn_handle,
io_data_type=hipdnn_data_type,
intermediate_data_type=hipdnn.data_type.FLOAT,
compute_data_type=hipdnn.data_type.FLOAT,
name="concat_conv",
)
# Create hipdnn tensors
hipdnn_tensor_x1 = graph.tensor_like(torch_tensor_x1)
hipdnn_tensor_x2 = graph.tensor_like(torch_tensor_x2)
hipdnn_tensor_w = graph.tensor_like(torch_tensor_w)
# Create concatenate op
hipdnn_tensor_concat_output = graph.concatenate(
x=[hipdnn_tensor_x1, hipdnn_tensor_x2], axis=concat_axis, name="concatenate"
)
# Create conv op
hipdnn_tensor_y = graph.conv_fprop(
image=hipdnn_tensor_concat_output,
weight=hipdnn_tensor_w,
padding=padding,
stride=stride,
dilation=dilation,
name="conv2d",
)
hipdnn_tensor_y.set_output(True)
graph.build(hipdnn_handle)
return (graph, hipdnn_tensor_x1, hipdnn_tensor_x2, hipdnn_tensor_w, hipdnn_tensor_y)
if __name__ == "__main__":
# Input dimensions
n = 1
c = 32
h = 128
w = 128
# Filter dimensions
k = 32
r = 2
s = 2
# Convolution parameters
stride_h = 1 # Height stride
stride_w = 1 # Width stride
pad_h = 1 # Height padding
pad_w = 1 # Width padding
dil_h = 1 # Height dilation
dil_w = 1 # Width dilation
hipdnn_data_type = hipdnn.data_type.HALF
torch_data_type = torch.float16
concat_axis = 1
torch_tensor_x1 = torch.rand(n, c, h, w, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
torch_tensor_x2 = torch.rand(n, c, h, w, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
torch_tensor_w = torch.rand(k, 2 * c, r, s, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
hipdnn_handle = hipdnn.create_handle()
graph, hipdnn_tensor_x1, hipdnn_tensor_x2, hipdnn_tensor_w, hipdnn_tensor_y = (
build_concat_conv_graph(
hipdnn_handle,
torch_tensor_x1,
torch_tensor_x2,
torch_tensor_w,
[pad_h, pad_w],
[stride_h, stride_w],
[dil_h, dil_w],
hipdnn_data_type,
concat_axis,
)
)
torch_tensor_y = torch.empty(hipdnn_tensor_y.get_dim(), dtype=torch_data_type, device="cuda")
variant_pack = {
hipdnn_tensor_x1: torch_tensor_x1.data_ptr(),
hipdnn_tensor_x2: torch_tensor_x2.data_ptr(),
hipdnn_tensor_w: torch_tensor_w.data_ptr(),
hipdnn_tensor_y: torch_tensor_y.data_ptr(),
}
workspace = torch.empty(graph.get_workspace_size(), dtype=torch.uint8, device="cuda")
graph.exec(variant_pack=variant_pack, workspace=workspace.data_ptr())
print("Concat_conv graph execution complete.")
import hipdnn
import torch
def build_concat_conv_bias_graph(
hipdnn_handle,
torch_tensor_x1,
torch_tensor_x2,
torch_tensor_w,
torch_tensor_bias,
padding,
stride,
dilation,
hipdnn_data_type,
concat_axis,
):
# Create graph
graph = hipdnn.pygraph(
handle=hipdnn_handle,
io_data_type=hipdnn_data_type,
intermediate_data_type=hipdnn.data_type.FLOAT,
compute_data_type=hipdnn.data_type.FLOAT,
name="concat_conv_bias",
)
# Create hipdnn tensors
hipdnn_tensor_x1 = graph.tensor_like(torch_tensor_x1)
hipdnn_tensor_x2 = graph.tensor_like(torch_tensor_x2)
hipdnn_tensor_w = graph.tensor_like(torch_tensor_w)
hipdnn_tensor_bias = graph.tensor_like(torch_tensor_bias)
# Create concatenate op
hipdnn_tensor_concat_output = graph.concatenate(
x=[hipdnn_tensor_x1, hipdnn_tensor_x2], axis=concat_axis, name="concatenate"
)
# Create conv op
hipdnn_tensor_conv_output = graph.conv_fprop(
image=hipdnn_tensor_concat_output,
weight=hipdnn_tensor_w,
padding=padding,
stride=stride,
dilation=dilation,
name="conv2d",
)
# Create bias
hipdnn_tensor_y = graph.add(a=hipdnn_tensor_conv_output, b=hipdnn_tensor_bias, name="bias")
hipdnn_tensor_y.set_output(True)
graph.build(hipdnn_handle)
return (
graph,
hipdnn_tensor_x1,
hipdnn_tensor_x2,
hipdnn_tensor_w,
hipdnn_tensor_bias,
hipdnn_tensor_y,
)
if __name__ == "__main__":
# Input dimensions
n = 1
c = 32
h = 128
w = 128
# Filter dimensions
k = 32
r = 2
s = 2
# Convolution parameters
stride_h = 1 # Height stride
stride_w = 1 # Width stride
pad_h = 1 # Height padding
pad_w = 1 # Width padding
dil_h = 1 # Height dilation
dil_w = 1 # Width dilation
hipdnn_data_type = hipdnn.data_type.HALF
torch_data_type = torch.float16
concat_axis = 1
torch_tensor_x1 = torch.rand(n, c, h, w, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
torch_tensor_x2 = torch.rand(n, c, h, w, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
torch_tensor_w = torch.rand(k, 2 * c, r, s, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
torch_tensor_bias = torch.rand(1, k, 1, 1, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
hipdnn_handle = hipdnn.create_handle()
(
graph,
hipdnn_tensor_x1,
hipdnn_tensor_x2,
hipdnn_tensor_w,
hipdnn_tensor_bias,
hipdnn_tensor_y,
) = build_concat_conv_bias_graph(
hipdnn_handle,
torch_tensor_x1,
torch_tensor_x2,
torch_tensor_w,
torch_tensor_bias,
[pad_h, pad_w],
[stride_h, stride_w],
[dil_h, dil_w],
hipdnn_data_type,
concat_axis,
)
torch_tensor_y = torch.empty(hipdnn_tensor_y.get_dim(), dtype=torch_data_type, device="cuda")
variant_pack = {
hipdnn_tensor_x1: torch_tensor_x1.data_ptr(),
hipdnn_tensor_x2: torch_tensor_x2.data_ptr(),
hipdnn_tensor_w: torch_tensor_w.data_ptr(),
hipdnn_tensor_bias: torch_tensor_bias.data_ptr(),
hipdnn_tensor_y: torch_tensor_y.data_ptr(),
}
workspace = torch.empty(graph.get_workspace_size(), dtype=torch.uint8, device="cuda")
graph.exec(variant_pack=variant_pack, workspace=workspace.data_ptr())
print("Concat_conv_bias graph execution complete.")
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment