Commit ca34d4d2 authored by yanjl1's avatar yanjl1
Browse files

Initial

parents
import hipdnn
import torch
def build_groupnorm_swish_graph(
hipdnn_handle,
torch_tensor_x,
torch_tensor_scale,
torch_tensor_bias,
torch_tensor_epsilon,
mode,
eps,
groups,
hipdnn_data_type,
):
# Create graph
graph = hipdnn.pygraph(
handle=hipdnn_handle,
io_data_type=hipdnn_data_type,
intermediate_data_type=hipdnn.data_type.FLOAT,
compute_data_type=hipdnn.data_type.FLOAT,
name="groupnorm_swish_graph",
)
# Create hipdnn tensors
hipdnn_tensor_x = graph.tensor_like(torch_tensor_x)
hipdnn_tensor_scale = graph.tensor_like(torch_tensor_scale)
hipdnn_tensor_bias = graph.tensor_like(torch_tensor_bias)
hipdnn_tensor_epsilon = graph.tensor_like(torch_tensor_epsilon)
hipdnn_tensor_epsilon.set_value(eps)
# Create op
hipdnn_tensor_gn_out, hipdnn_tensor_mean, hipdnn_tensor_inv_var = graph.groupnorm(
mode,
hipdnn_tensor_x,
hipdnn_tensor_scale,
hipdnn_tensor_bias,
hipdnn_tensor_epsilon,
groups,
hipdnn.data_type.FLOAT,
name="groupnorm",
)
hipdnn_tensor_mean.set_output(True)
hipdnn_tensor_inv_var.set_output(True)
hipdnn_tensor_y = graph.swish(input=hipdnn_tensor_gn_out, name="swish")
hipdnn_tensor_y.set_output(True)
graph.build(hipdnn_handle)
return (
graph,
hipdnn_tensor_x,
hipdnn_tensor_scale,
hipdnn_tensor_bias,
hipdnn_tensor_y,
hipdnn_tensor_mean,
hipdnn_tensor_inv_var,
)
if __name__ == "__main__":
# Input dimensions
batch = 2 # Batch size
channels = 16 # Number of channels
height = 32 # height
width = 32 # width
mode = hipdnn.norm_forward_phase.TRAINING # Mode
eps = 1e-5 # epsilon
groups = 2 # groups
hipdnn_data_type = hipdnn.data_type.FLOAT
torch_data_type = torch.float32
torch_tensor_x = torch.rand(
(batch, channels, height, width), dtype=torch_data_type, device="cuda"
)
torch_tensor_scale = torch.rand(channels, dtype=torch_data_type, device="cuda")
torch_tensor_bias = torch.rand(channels, dtype=torch_data_type, device="cuda")
torch_tensor_mean = torch.rand(channels, dtype=torch_data_type, device="cuda")
torch_tensor_inv_var = torch.rand(channels, dtype=torch_data_type, device="cuda")
torch_tensor_epsilon = torch.full(
(1, 1, 1, 1), eps, dtype=torch.float32, requires_grad=False, device="cpu"
)
hipdnn_handle = hipdnn.create_handle()
(
graph,
hipdnn_tensor_x,
hipdnn_tensor_scale,
hipdnn_tensor_bias,
hipdnn_tensor_y,
hipdnn_tensor_mean,
hipdnn_tensor_inv_var,
) = build_groupnorm_swish_graph(
hipdnn_handle,
torch_tensor_x,
torch_tensor_scale,
torch_tensor_bias,
torch_tensor_epsilon,
mode,
eps,
groups,
hipdnn_data_type,
)
torch_tensor_y = torch.empty(hipdnn_tensor_y.get_dim(), dtype=torch_data_type, device="cuda")
torch_tensor_mean = torch.empty(
hipdnn_tensor_mean.get_dim(), dtype=torch_data_type, device="cuda"
)
torch_tensor_inv_var = torch.empty(
hipdnn_tensor_inv_var.get_dim(), dtype=torch_data_type, device="cuda"
)
variant_pack = {
hipdnn_tensor_x: torch_tensor_x.data_ptr(),
hipdnn_tensor_scale: torch_tensor_scale.data_ptr(),
hipdnn_tensor_bias: torch_tensor_bias.data_ptr(),
hipdnn_tensor_y: torch_tensor_y.data_ptr(),
hipdnn_tensor_mean: torch_tensor_mean.data_ptr(),
hipdnn_tensor_inv_var: torch_tensor_inv_var.data_ptr(),
}
workspace = torch.empty(graph.get_workspace_size(), dtype=torch.uint8, device="cuda")
graph.exec(variant_pack=variant_pack, workspace=workspace.data_ptr())
print("groupnorm_swish graph execution complete.")
import hipdnn
import torch
def build_pointwise_convolution_batchnorm_graph(
hipdnn_handle,
torch_tensor_x,
torch_tensor_w,
torch_tensor_scale,
torch_tensor_bias,
padding,
stride,
dilation,
hipdnn_data_type,
):
# Create graph
graph = hipdnn.pygraph(
handle=hipdnn_handle,
io_data_type=hipdnn_data_type,
intermediate_data_type=hipdnn.data_type.FLOAT,
compute_data_type=hipdnn.data_type.FLOAT,
name="pointwise_convolution_batchnorm",
)
# Create hipdnn tensors
hipdnn_tensor_x = graph.tensor_like(torch_tensor_x)
hipdnn_tensor_w = graph.tensor_like(torch_tensor_w)
hipdnn_tensor_scale = graph.tensor_like(torch_tensor_scale)
hipdnn_tensor_bias = graph.tensor_like(torch_tensor_bias)
# Create op
hipdnn_tensor_mul_out = graph.mul(a=hipdnn_tensor_x, b=hipdnn_tensor_scale, name="mul")
hipdnn_tensor_add_out = graph.add(a=hipdnn_tensor_mul_out, b=hipdnn_tensor_bias, name="add")
hipdnn_tensor_relu_out = graph.relu(input=hipdnn_tensor_add_out, name="relu")
hipdnn_tensor_conv_out = graph.conv_fprop(
image=hipdnn_tensor_relu_out,
weight=hipdnn_tensor_w,
padding=padding,
stride=stride,
dilation=dilation,
name="conv2d",
)
hipdnn_tensor_conv_out.set_output(True)
[hipdnn_tensor_sum_out, hipdnn_tensor_sq_sum_out] = graph.genstats(
hipdnn_tensor_conv_out, name="genstats"
)
hipdnn_tensor_sum_out.set_output(True)
hipdnn_tensor_sq_sum_out.set_output(True)
graph.build(hipdnn_handle)
return (
graph,
hipdnn_tensor_x,
hipdnn_tensor_w,
hipdnn_tensor_scale,
hipdnn_tensor_bias,
hipdnn_tensor_conv_out,
hipdnn_tensor_sum_out,
hipdnn_tensor_sq_sum_out,
)
if __name__ == "__main__":
# Input dimensions
n = 4 # Batch size
c = 64 # Number of input channels
h = 16 # Height
w = 16 # Width
# Filter dimensions
k = 32 # Number of output channels
r = 3 # Filter height
s = 3 # Filter width
# Convolution parameters
stride_h = 1 # Height stride
stride_w = 1 # Width stride
pad_h = 1 # Height padding
pad_w = 1 # Width padding
dil_h = 1 # Height dilation
dil_w = 1 # Width dilation
hipdnn_data_type = hipdnn.data_type.FLOAT
torch_data_type = torch.float
torch_tensor_x = torch.rand(n, c, h, w, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
torch_tensor_w = torch.rand(k, c, r, s, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
torch_tensor_scale = torch.rand(1, c, 1, 1, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
torch_tensor_bias = torch.rand(1, c, 1, 1, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
hipdnn_handle = hipdnn.create_handle()
(
graph,
hipdnn_tensor_x,
hipdnn_tensor_w,
hipdnn_tensor_scale,
hipdnn_tensor_bias,
hipdnn_tensor_conv_out,
hipdnn_tensor_sum_out,
hipdnn_tensor_sq_sum_out,
) = build_pointwise_convolution_batchnorm_graph(
hipdnn_handle,
torch_tensor_x,
torch_tensor_w,
torch_tensor_scale,
torch_tensor_bias,
[pad_h, pad_w],
[stride_h, stride_w],
[dil_h, dil_w],
hipdnn_data_type,
)
torch_tensor_conv_out = torch.empty(
hipdnn_tensor_conv_out.get_dim(),
dtype=torch_data_type,
memory_format=torch.channels_last,
device="cuda",
)
torch_tensor_sum_out = torch.empty(
hipdnn_tensor_sum_out.get_dim(),
dtype=torch_data_type,
memory_format=torch.channels_last,
device="cuda",
)
torch_tensor_sq_sum_out = torch.empty(
hipdnn_tensor_sq_sum_out.get_dim(),
dtype=torch_data_type,
memory_format=torch.channels_last,
device="cuda",
)
variant_pack = {
hipdnn_tensor_x: torch_tensor_x.data_ptr(),
hipdnn_tensor_w: torch_tensor_w.data_ptr(),
hipdnn_tensor_scale: torch_tensor_scale.data_ptr(),
hipdnn_tensor_bias: torch_tensor_bias.data_ptr(),
hipdnn_tensor_conv_out: torch_tensor_conv_out.data_ptr(),
hipdnn_tensor_sum_out: torch_tensor_sum_out.data_ptr(),
hipdnn_tensor_sq_sum_out: torch_tensor_sq_sum_out.data_ptr(),
}
workspace = torch.empty(graph.get_workspace_size(), dtype=torch.uint8, device="cuda")
graph.exec(variant_pack=variant_pack, workspace=workspace.data_ptr())
print("pointwise_convolution_batchnorm graph execution complete.")
import hipdnn
import torch
def build_genstats_graph(hipdnn_handle, torch_tensor_x, hipdnn_data_type):
# Create graph
graph = hipdnn.pygraph(
handle=hipdnn_handle,
io_data_type=hipdnn_data_type,
intermediate_data_type=hipdnn.data_type.FLOAT,
compute_data_type=hipdnn.data_type.FLOAT,
name="genstats_graph",
)
# Create hipdnn tensors
hipdnn_tensor_x = graph.tensor_like(torch_tensor_x)
# Create op
hipdnn_tensor_sum, hipdnn_tensor_sq_sum = graph.genstats(
hipdnn_tensor_x, hipdnn.data_type.FLOAT, name="genstats"
)
hipdnn_tensor_sum.set_output(True)
hipdnn_tensor_sq_sum.set_output(True)
graph.build(hipdnn_handle)
return (graph, hipdnn_tensor_x, hipdnn_tensor_sum, hipdnn_tensor_sq_sum)
if __name__ == "__main__":
# Input dimensions
n = 2 # Batch size
c = 3 # Number of input channels
h = 4 # Height
w = 5 # Width
hipdnn_data_type = hipdnn.data_type.FLOAT
torch_data_type = torch.float32
torch_tensor_x = torch.rand((n, c, h, w), dtype=torch_data_type, device="cuda")
# hipdnn_handle = hipdnn.create_handle()
# graph, hipdnn_tensor_x, hipdnn_tensor_sum, hipdnn_tensor_sq_sum = build_genstats_graph(hipdnn_handle,torch_tensor_x,hipdnn_data_type)
# torch_tensor_sum = torch.empty(hipdnn_tensor_sum.get_dim(), dtype=torch_data_type, device="cuda")
# torch_tensor_sq_sum = torch.empty(hipdnn_tensor_sq_sum.get_dim(), dtype=torch_data_type, device="cuda")
# variant_pack = {
# hipdnn_tensor_x: torch_tensor_x.data_ptr(),
# hipdnn_tensor_sum: torch_tensor_sum.data_ptr(),
# hipdnn_tensor_sq_sum: torch_tensor_sq_sum.data_ptr(),
# }
# workspace = torch.empty(graph.get_workspace_size(), dtype=torch.uint8, device="cuda")
# graph.exec(variant_pack=variant_pack, workspace=workspace.data_ptr())
print("genstats graph execution complete.")
import hipdnn
import torch
def build_getitem_backward_graph(
hipdnn_handle, torch_tensor_dy, hipdnn_data_type, dx_dim, index_dims, torch_tensor_indeices
):
graph = hipdnn.pygraph(
handle=hipdnn_handle,
io_data_type=hipdnn_data_type,
intermediate_data_type=hipdnn.data_type.FLOAT,
compute_data_type=hipdnn.data_type.FLOAT,
name="getitem_backward_inference",
)
hipdnn_tensor_dy = graph.tensor_like(torch_tensor_dy)
hipdnn_tensor_indeices = []
for i in range(len(index_dims)):
hipdnn_tensor_indeices.append(graph.tensor_like(torch_tensor_indeices[i]))
dx, error = graph.getitem_backward(
dy=hipdnn_tensor_dy,
indices=hipdnn_tensor_indeices,
dims=index_dims,
offset=0,
name="getitem_backward",
)
dx.set_output(True).set_dim(dx_dim)
error.set_output(True)
graph.build(hipdnn_handle)
return (graph, hipdnn_tensor_dy, dx, error, hipdnn_tensor_indeices)
if __name__ == "__main__":
dy_batch = 32
dy_channel = 16
dx_batch = 64
dx_channel = 32
heigth = 32
width = 32
dy_dim = [dy_batch, dy_channel, heigth, width]
dx_dim = [dx_batch, dx_channel, heigth, width]
# index dim
index_dims = [1, 2]
hipdnn_data_type = hipdnn.data_type.FLOAT
torch_data_type = torch.float32
torch_tensor_dy = torch.rand(dy_dim, dtype=torch_data_type, device="cuda")
torch_tensor_indeices = []
for i in range(len(index_dims)):
torch_tensor_indeicesDim1 = torch.randint(
0,
dx_dim[index_dims[i]],
(dy_batch, dy_channel, heigth, width),
dtype=torch.int32,
device="cuda",
)
torch_tensor_indeices.append(torch_tensor_indeicesDim1)
hipdnn_handle = hipdnn.create_handle()
graph, hipdnn_tensor_dy, hipdnn_tensor_dx, hipdnn_tensor_error, hipdnn_tensor_indeices = (
build_getitem_backward_graph(
hipdnn_handle,
torch_tensor_dy,
hipdnn_data_type,
dx_dim,
index_dims,
torch_tensor_indeices,
)
)
torch_tensor_dx = torch.empty(hipdnn_tensor_dx.get_dim(), dtype=torch_data_type, device="cuda")
# error tensor must be int32
torch_tensor_error = torch.empty(len(index_dims), dtype=torch.int32, device="cuda")
variant_pack = {
hipdnn_tensor_dy: torch_tensor_dy.data_ptr(),
hipdnn_tensor_dx: torch_tensor_dx.data_ptr(),
hipdnn_tensor_indeices[0]: torch_tensor_indeices[0].data_ptr(),
hipdnn_tensor_indeices[1]: torch_tensor_indeices[1].data_ptr(),
hipdnn_tensor_error: torch_tensor_error.data_ptr(),
}
workspace = torch.empty(graph.get_workspace_size(), dtype=torch.uint8, device="cuda")
graph.exec(variant_pack=variant_pack, workspace=workspace.data_ptr())
print("getitem_backward graph execution complete.")
import hipdnn
import torch
def build_groupnorm_bwd_graph(
hipdnn_handle,
torch_tensor_x,
torch_tensor_dy,
torch_tensor_scale,
torch_tensor_epsilon,
torch_tensor_mean,
torch_tensor_inv_var,
groups,
hipdnn_data_type,
):
# Create graph
graph = hipdnn.pygraph(
handle=hipdnn_handle,
io_data_type=hipdnn_data_type,
intermediate_data_type=hipdnn.data_type.FLOAT,
compute_data_type=hipdnn.data_type.FLOAT,
name="groupnorm_bwd",
)
# Create hipdnn tensors
hipdnn_tensor_x = graph.tensor_like(torch_tensor_x)
hipdnn_tensor_dy = graph.tensor_like(torch_tensor_dy)
hipdnn_tensor_scale = graph.tensor_like(torch_tensor_scale)
hipdnn_tensor_epsilon = graph.tensor_like(torch_tensor_epsilon)
hipdnn_tensor_mean = graph.tensor_like(torch_tensor_mean)
hipdnn_tensor_inv_var = graph.tensor_like(torch_tensor_inv_var)
# Create groupnorm op
hipdnn_tensor_dx, hipdnn_tensor_dbias, hipdnn_tensor_dscale = graph.groupnorm_backward(
x=hipdnn_tensor_x,
dy=hipdnn_tensor_dy,
scale=hipdnn_tensor_scale,
epsilon=hipdnn_tensor_epsilon,
mean=hipdnn_tensor_mean,
inv_variance=hipdnn_tensor_inv_var,
groups=groups,
name="groupnorm_backward",
)
hipdnn_tensor_dx.set_output(True)
hipdnn_tensor_dbias.set_output(True)
hipdnn_tensor_dscale.set_output(True)
graph.build(hipdnn_handle)
return (
graph,
hipdnn_tensor_x,
hipdnn_tensor_dy,
hipdnn_tensor_scale,
hipdnn_tensor_epsilon,
hipdnn_tensor_mean,
hipdnn_tensor_inv_var,
hipdnn_tensor_dx,
hipdnn_tensor_dbias,
hipdnn_tensor_dscale,
)
if __name__ == "__main__":
# Input dimensions
batch, channels, height, width = 2, 16, 512, 512
hipdnn_data_type = hipdnn.data_type.FLOAT
torch_data_type = torch.float32
torch_tensor_x = torch.rand(
batch, channels, height, width, dtype=torch_data_type, device="cuda"
)
torch_tensor_dy = torch.rand(
batch, channels, height, width, dtype=torch_data_type, device="cuda"
)
torch_tensor_scale = torch.rand(channels, dtype=torch_data_type, device="cuda")
torch_tensor_epsilon = torch.full(
(1, 1, 1, 1), 1e-5, dtype=torch.float32, requires_grad=False, device="cpu"
)
groups = 2
torch_tensor_mean = torch.rand(groups * batch, dtype=torch.float32, device="cuda")
torch_tensor_inv_var = torch.rand(groups * batch, dtype=torch.float32, device="cuda")
hipdnn_handle = hipdnn.create_handle()
(
graph,
hipdnn_tensor_x,
hipdnn_tensor_dy,
hipdnn_tensor_scale,
hipdnn_tensor_epsilon,
hipdnn_tensor_mean,
hipdnn_tensor_inv_var,
hipdnn_tensor_dx,
hipdnn_tensor_dbias,
hipdnn_tensor_dscale,
) = build_groupnorm_bwd_graph(
hipdnn_handle,
torch_tensor_x,
torch_tensor_dy,
torch_tensor_scale,
torch_tensor_epsilon,
torch_tensor_mean,
torch_tensor_inv_var,
groups,
hipdnn_data_type,
)
torch_tensor_dx = torch.empty(hipdnn_tensor_dx.get_dim(), dtype=torch_data_type, device="cuda")
torch_tensor_dscale = torch.empty(
hipdnn_tensor_dscale.get_dim(), dtype=torch_data_type, device="cuda"
)
torch_tensor_dbias = torch.empty(
hipdnn_tensor_dbias.get_dim(), dtype=torch_data_type, device="cuda"
)
variant_pack = {
hipdnn_tensor_x: torch_tensor_x.data_ptr(),
hipdnn_tensor_dy: torch_tensor_dy.data_ptr(),
hipdnn_tensor_scale: torch_tensor_scale.data_ptr(),
hipdnn_tensor_mean: torch_tensor_mean.data_ptr(),
hipdnn_tensor_inv_var: torch_tensor_inv_var.data_ptr(),
hipdnn_tensor_dx: torch_tensor_dx.data_ptr(),
hipdnn_tensor_dbias: torch_tensor_dbias.data_ptr(),
hipdnn_tensor_dscale: torch_tensor_dscale.data_ptr(),
}
workspace = torch.empty(graph.get_workspace_size(), dtype=torch.uint8, device="cuda")
graph.exec(variant_pack=variant_pack, workspace=workspace.data_ptr())
print("groupnorm bwd graph execution complete.")
import hipdnn
import torch
def build_groupnorm_fwd_graph(
hipdnn_handle,
torch_tensor_x,
torch_tensor_scale,
torch_tensor_bias,
torch_tensor_epsilon,
groups,
hipdnn_data_type,
):
# Create graph
graph = hipdnn.pygraph(
handle=hipdnn_handle,
io_data_type=hipdnn_data_type,
intermediate_data_type=hipdnn.data_type.FLOAT,
compute_data_type=hipdnn.data_type.FLOAT,
name="groupnorm_fwd",
)
# Create hipdnn tensors
hipdnn_tensor_x = graph.tensor_like(torch_tensor_x)
hipdnn_tensor_scale = graph.tensor_like(torch_tensor_scale)
hipdnn_tensor_bias = graph.tensor_like(torch_tensor_bias)
hipdnn_tensor_epsilon = graph.tensor_like(torch_tensor_epsilon)
# Create groupnorm op
hipdnn_tensor_y, hipdnn_tensor_mean, hipdnn_tensor_inv_var = graph.groupnorm(
norm_forward_phase=hipdnn.norm_forward_phase.TRAINING,
input=hipdnn_tensor_x,
scale=hipdnn_tensor_scale,
bias=hipdnn_tensor_bias,
epsilon=hipdnn_tensor_epsilon,
groups=groups,
name="groupnorm",
)
hipdnn_tensor_y.set_output(True)
hipdnn_tensor_mean.set_output(True)
hipdnn_tensor_inv_var.set_output(True)
graph.build(hipdnn_handle)
return (
graph,
hipdnn_tensor_x,
hipdnn_tensor_scale,
hipdnn_tensor_bias,
hipdnn_tensor_epsilon,
hipdnn_tensor_y,
hipdnn_tensor_mean,
hipdnn_tensor_inv_var,
)
if __name__ == "__main__":
# Input dimensions
batch, channels, height, width = 2, 16, 512, 512
hipdnn_data_type = hipdnn.data_type.FLOAT
torch_data_type = torch.float32
torch_tensor_x = torch.rand(
batch, channels, height, width, dtype=torch_data_type, device="cuda"
)
torch_tensor_scale = torch.rand(channels, dtype=torch_data_type, device="cuda")
torch_tensor_bias = torch.rand(channels, dtype=torch_data_type, device="cuda")
torch_tensor_epsilon = torch.full(
(1, 1, 1, 1), 1e-5, dtype=torch.float32, requires_grad=False, device="cpu"
)
groups = 2
hipdnn_handle = hipdnn.create_handle()
(
graph,
hipdnn_tensor_x,
hipdnn_tensor_scale,
hipdnn_tensor_bias,
hipdnn_tensor_epsilon,
hipdnn_tensor_y,
hipdnn_tensor_mean,
hipdnn_tensor_inv_var,
) = build_groupnorm_fwd_graph(
hipdnn_handle,
torch_tensor_x,
torch_tensor_scale,
torch_tensor_bias,
torch_tensor_epsilon,
groups,
hipdnn_data_type,
)
torch_tensor_y = torch.empty(hipdnn_tensor_y.get_dim(), dtype=torch_data_type, device="cuda")
torch_tensor_mean = torch.empty(
hipdnn_tensor_mean.get_dim(), dtype=torch.float32, device="cuda"
)
torch_tensor_inv_var = torch.empty(
hipdnn_tensor_inv_var.get_dim(), dtype=torch.float32, device="cuda"
)
variant_pack = {
hipdnn_tensor_x: torch_tensor_x.data_ptr(),
hipdnn_tensor_scale: torch_tensor_scale.data_ptr(),
hipdnn_tensor_bias: torch_tensor_bias.data_ptr(),
hipdnn_tensor_y: torch_tensor_y.data_ptr(),
hipdnn_tensor_mean: torch_tensor_mean.data_ptr(),
hipdnn_tensor_inv_var: torch_tensor_inv_var.data_ptr(),
}
workspace = torch.empty(graph.get_workspace_size(), dtype=torch.uint8, device="cuda")
graph.exec(variant_pack=variant_pack, workspace=workspace.data_ptr())
print("groupnorm fwd graph execution complete.")
import hipdnn
import torch
def build_instancenorm_graph(
hipdnn_handle,
torch_tensor_x,
torch_tensor_scale,
torch_tensor_bias,
torch_tensor_epsilon,
mode,
eps,
hipdnn_data_type,
):
# Create graph
graph = hipdnn.pygraph(
handle=hipdnn_handle,
io_data_type=hipdnn_data_type,
intermediate_data_type=hipdnn.data_type.FLOAT,
compute_data_type=hipdnn.data_type.FLOAT,
name="instancenorm_inference",
)
# Create hipdnn tensors
hipdnn_tensor_x = graph.tensor_like(torch_tensor_x)
hipdnn_tensor_scale = graph.tensor_like(torch_tensor_scale)
hipdnn_tensor_bias = graph.tensor_like(torch_tensor_bias)
hipdnn_tensor_epsilon = graph.tensor_like(torch_tensor_epsilon)
hipdnn_tensor_epsilon.set_value(eps)
# Create op
hipdnn_tensor_y, hipdnn_tensor_mean, hipdnn_tensor_inv_var = graph.instancenorm(
mode,
hipdnn_tensor_x,
hipdnn_tensor_scale,
hipdnn_tensor_bias,
hipdnn_tensor_epsilon,
hipdnn.data_type.FLOAT,
name="instancenorm",
)
hipdnn_tensor_y.set_output(True)
graph.build(hipdnn_handle)
return (graph, hipdnn_tensor_x, hipdnn_tensor_scale, hipdnn_tensor_bias, hipdnn_tensor_y)
if __name__ == "__main__":
# Input dimensions
n = 16 # Batch size
c = 32 # Number of input channels
h = 64 # Height
w = 64 # Width
mode = hipdnn.norm_forward_phase.INFERENCE # Mode
eps = 1e-5
hipdnn_data_type = hipdnn.data_type.FLOAT
torch_data_type = torch.float32
torch_tensor_x = torch.rand((n, c, h, w), dtype=torch_data_type, device="cuda")
torch_tensor_scale = torch.rand((1, c, 1, 1), dtype=torch_data_type, device="cuda")
torch_tensor_bias = torch.rand((1, c, 1, 1), dtype=torch_data_type, device="cuda")
torch_tensor_epsilon = torch.full(
(1, 1, 1, 1), eps, dtype=torch.float32, requires_grad=False, device="cpu"
)
hipdnn_handle = hipdnn.create_handle()
graph, hipdnn_tensor_x, hipdnn_tensor_scale, hipdnn_tensor_bias, hipdnn_tensor_y = (
build_instancenorm_graph(
hipdnn_handle,
torch_tensor_x,
torch_tensor_scale,
torch_tensor_bias,
torch_tensor_epsilon,
mode,
eps,
hipdnn_data_type,
)
)
torch_tensor_y = torch.empty(hipdnn_tensor_y.get_dim(), dtype=torch_data_type, device="cuda")
variant_pack = {
hipdnn_tensor_x: torch_tensor_x.data_ptr(),
hipdnn_tensor_scale: torch_tensor_scale.data_ptr(),
hipdnn_tensor_bias: torch_tensor_bias.data_ptr(),
hipdnn_tensor_y: torch_tensor_y.data_ptr(),
}
workspace = torch.empty(graph.get_workspace_size(), dtype=torch.uint8, device="cuda")
graph.exec(variant_pack=variant_pack, workspace=workspace.data_ptr())
print("instancenorm graph execution complete.")
import hipdnn
import torch
def build_instancenorm_graph(
hipdnn_handle,
torch_tensor_x,
torch_tensor_scale,
torch_tensor_dy,
torch_tensor_mean,
torch_tensor_inv_var,
hipdnn_data_type,
):
# Create graph
graph = hipdnn.pygraph(
handle=hipdnn_handle,
io_data_type=hipdnn_data_type,
intermediate_data_type=hipdnn.data_type.FLOAT,
compute_data_type=hipdnn.data_type.FLOAT,
name="instancenorm_backward",
)
# Create hipdnn tensors
hipdnn_tensor_x = graph.tensor_like(torch_tensor_x)
hipdnn_tensor_scale = graph.tensor_like(torch_tensor_scale)
hipdnn_tensor_dy = graph.tensor_like(torch_tensor_dy)
hipdnn_tensor_mean = graph.tensor_like(torch_tensor_mean)
hipdnn_tensor_inv_var = graph.tensor_like(torch_tensor_inv_var)
# Create op
hipdnn_tensor_dx, hipdnn_tensor_dbias, hipdnn_tensor_dscale = graph.instancenorm_backward(
hipdnn_tensor_dy,
hipdnn_tensor_x,
hipdnn_tensor_scale,
hipdnn_tensor_mean,
hipdnn_tensor_inv_var,
hipdnn.data_type.FLOAT,
name="instancenorm_backward",
)
hipdnn_tensor_dx.set_output(True)
hipdnn_tensor_dbias.set_output(True)
hipdnn_tensor_dscale.set_output(True)
graph.build(hipdnn_handle)
return (
graph,
hipdnn_tensor_x,
hipdnn_tensor_scale,
hipdnn_tensor_dy,
hipdnn_tensor_mean,
hipdnn_tensor_inv_var,
hipdnn_tensor_dx,
hipdnn_tensor_dbias,
hipdnn_tensor_dscale,
)
if __name__ == "__main__":
# Input dimensions
n = 16 # Batch size
c = 32 # Number of input channels
h = 64 # Height
w = 64 # Width
hipdnn_data_type = hipdnn.data_type.FLOAT
torch_data_type = torch.float32
torch_tensor_x = torch.rand((n, c, h, w), dtype=torch_data_type, device="cuda")
torch_tensor_scale = torch.rand((1, c, 1, 1), dtype=torch_data_type, device="cuda")
torch_tensor_dy = torch.rand((n, c, h, w), dtype=torch_data_type, device="cuda")
torch_tensor_mean = torch.rand((n, c, 1, 1), dtype=torch_data_type, device="cuda")
torch_tensor_inv_var = torch.rand((n, c, 1, 1), dtype=torch_data_type, device="cuda")
hipdnn_handle = hipdnn.create_handle()
(
graph,
hipdnn_tensor_x,
hipdnn_tensor_scale,
hipdnn_tensor_dy,
hipdnn_tensor_mean,
hipdnn_tensor_inv_var,
hipdnn_tensor_dx,
hipdnn_tensor_dbias,
hipdnn_tensor_dscale,
) = build_instancenorm_graph(
hipdnn_handle,
torch_tensor_x,
torch_tensor_scale,
torch_tensor_dy,
torch_tensor_mean,
torch_tensor_inv_var,
hipdnn_data_type,
)
torch_tensor_dx = torch.empty(hipdnn_tensor_dx.get_dim(), dtype=torch_data_type, device="cuda")
torch_tensor_dbias = torch.empty(
hipdnn_tensor_dbias.get_dim(), dtype=torch_data_type, device="cuda"
)
torch_tensor_dscale = torch.empty(
hipdnn_tensor_dscale.get_dim(), dtype=torch_data_type, device="cuda"
)
variant_pack = {
hipdnn_tensor_x: torch_tensor_x.data_ptr(),
hipdnn_tensor_scale: torch_tensor_scale.data_ptr(),
hipdnn_tensor_dy: torch_tensor_dy.data_ptr(),
hipdnn_tensor_mean: torch_tensor_mean.data_ptr(),
hipdnn_tensor_inv_var: torch_tensor_inv_var.data_ptr(),
hipdnn_tensor_dx: torch_tensor_dx.data_ptr(),
hipdnn_tensor_dbias: torch_tensor_dbias.data_ptr(),
hipdnn_tensor_dscale: torch_tensor_dscale.data_ptr(),
}
workspace = torch.empty(graph.get_workspace_size(), dtype=torch.uint8, device="cuda")
graph.exec(variant_pack=variant_pack, workspace=workspace.data_ptr())
print("instancenorm backward graph execution complete.")
import hipdnn
import torch
def build_kthvalue_graph(hipdnn_handle, torch_tensor_input, hipdnn_data_type):
graph = hipdnn.pygraph(
handle=hipdnn_handle,
io_data_type=hipdnn_data_type,
intermediate_data_type=hipdnn.data_type.FLOAT,
compute_data_type=hipdnn.data_type.FLOAT,
name="kthvalue_inference",
)
hipdnn_tensor_input = graph.tensor_like(torch_tensor_input)
output, indices = graph.kthvalue(
input=hipdnn_tensor_input, k=2, dim=1, keep_dim=False, name="kthvalue"
)
output.set_output(True).set_dim([4])
indices.set_output(True).set_dim([4])
graph.build(hipdnn_handle)
return (graph, hipdnn_tensor_input, output, indices)
if __name__ == "__main__":
batch, dim = 4, 10
hipdnn_data_type = hipdnn.data_type.FLOAT
torch_data_type = torch.float32
torch_tensor_input = torch.rand(batch, dim, dtype=torch_data_type, device="cuda")
hipdnn_handle = hipdnn.create_handle()
graph, hipdnn_tensor_input, hipdnn_tensor_output, hipdnn_tensor_indices = build_kthvalue_graph(
hipdnn_handle, torch_tensor_input, hipdnn_data_type
)
torch_tensor_output = torch.empty(batch, dtype=torch_data_type, device="cuda")
torch_tensor_indices = torch.empty(batch, dtype=torch.int64, device="cuda")
variant_pack = {
hipdnn_tensor_input: torch_tensor_input.data_ptr(),
hipdnn_tensor_output: torch_tensor_output.data_ptr(),
hipdnn_tensor_indices: torch_tensor_indices.data_ptr(),
}
workspace = torch.empty(graph.get_workspace_size(), dtype=torch.uint8, device="cuda")
graph.exec(variant_pack=variant_pack, workspace=workspace.data_ptr())
print("kthvalue graph execution complete.")
import hipdnn
import torch
def build_layernorm_inference_graph(
hipdnn_handle,
torch_tensor_x,
torch_tensor_scale,
torch_tensor_bias,
torch_tensor_epsilon,
hipdnn_data_type,
):
# Create graph
graph = hipdnn.pygraph(
handle=hipdnn_handle,
io_data_type=hipdnn_data_type,
intermediate_data_type=hipdnn.data_type.FLOAT,
compute_data_type=hipdnn.data_type.FLOAT,
name="layernorm_inference",
)
# Create hipdnn tensors
hipdnn_tensor_x = graph.tensor_like(torch_tensor_x)
hipdnn_tensor_scale = graph.tensor_like(torch_tensor_scale)
hipdnn_tensor_bias = graph.tensor_like(torch_tensor_bias)
hipdnn_tensor_epsilon = graph.tensor_like(torch_tensor_epsilon)
# Create layernorm op
hipdnn_tensor_y, hipdnn_tensor_mean, hipdnn_tensor_inv_var = graph.layernorm(
norm_forward_phase=hipdnn.norm_forward_phase.INFERENCE,
input=hipdnn_tensor_x,
scale=hipdnn_tensor_scale,
bias=hipdnn_tensor_bias,
epsilon=hipdnn_tensor_epsilon,
name="layernorm",
)
hipdnn_tensor_y.set_output(True)
graph.build(hipdnn_handle)
return (
graph,
hipdnn_tensor_x,
hipdnn_tensor_scale,
hipdnn_tensor_bias,
hipdnn_tensor_epsilon,
hipdnn_tensor_y,
)
if __name__ == "__main__":
# Input dimensions
batch, seq_len, embedding_dim = 2, 1024, 768
hipdnn_data_type = hipdnn.data_type.FLOAT
torch_data_type = torch.float32
torch_tensor_x = torch.rand(batch, seq_len, embedding_dim, dtype=torch_data_type, device="cuda")
torch_tensor_scale = torch.rand(embedding_dim, dtype=torch_data_type, device="cuda")
torch_tensor_bias = torch.rand(embedding_dim, dtype=torch_data_type, device="cuda")
torch_tensor_epsilon = torch.full(
(1, 1, 1, 1), 1e-5, dtype=torch.float32, requires_grad=False, device="cpu"
)
hipdnn_handle = hipdnn.create_handle()
(
graph,
hipdnn_tensor_x,
hipdnn_tensor_scale,
hipdnn_tensor_bias,
hipdnn_tensor_epsilon,
hipdnn_tensor_y,
) = build_layernorm_inference_graph(
hipdnn_handle,
torch_tensor_x,
torch_tensor_scale,
torch_tensor_bias,
torch_tensor_epsilon,
hipdnn_data_type,
)
torch_tensor_y = torch.empty(hipdnn_tensor_y.get_dim(), dtype=torch_data_type, device="cuda")
variant_pack = {
hipdnn_tensor_x: torch_tensor_x.data_ptr(),
hipdnn_tensor_scale: torch_tensor_scale.data_ptr(),
hipdnn_tensor_bias: torch_tensor_bias.data_ptr(),
hipdnn_tensor_y: torch_tensor_y.data_ptr(),
}
workspace = torch.empty(graph.get_workspace_size(), dtype=torch.uint8, device="cuda")
graph.exec(variant_pack=variant_pack, workspace=workspace.data_ptr())
print("layernorm inference graph execution complete.")
import hipdnn
import torch
def build_layernorm_backward_graph(
hipdnn_handle,
torch_tensor_x,
torch_tensor_dy,
torch_tensor_scale,
torch_tensor_mean,
torch_tensor_inv_var,
hipdnn_data_type,
):
# Create graph
graph = hipdnn.pygraph(
handle=hipdnn_handle,
io_data_type=hipdnn_data_type,
intermediate_data_type=hipdnn.data_type.FLOAT,
compute_data_type=hipdnn.data_type.FLOAT,
name="layernorm_backward",
)
# Create hipdnn tensors
hipdnn_tensor_x = graph.tensor_like(torch_tensor_x)
hipdnn_tensor_dy = graph.tensor_like(torch_tensor_dy)
hipdnn_tensor_scale = graph.tensor_like(torch_tensor_scale)
hipdnn_tensor_mean = graph.tensor_like(torch_tensor_mean)
hipdnn_tensor_inv_var = graph.tensor_like(torch_tensor_inv_var)
# Create layernorm op
hipdnn_tensor_dx, hipdnn_tensor_dbias, hipdnn_tensor_dscale = graph.layernorm_backward(
grad=hipdnn_tensor_dy,
input=hipdnn_tensor_x,
scale=hipdnn_tensor_scale,
mean=hipdnn_tensor_mean,
inv_variance=hipdnn_tensor_inv_var,
name="layernorm_backward",
)
hipdnn_tensor_dx.set_output(True)
hipdnn_tensor_dbias.set_output(True)
hipdnn_tensor_dscale.set_output(True)
graph.build(hipdnn_handle)
return (
graph,
hipdnn_tensor_x,
hipdnn_tensor_dy,
hipdnn_tensor_scale,
hipdnn_tensor_mean,
hipdnn_tensor_inv_var,
hipdnn_tensor_dx,
hipdnn_tensor_dbias,
hipdnn_tensor_dscale,
)
if __name__ == "__main__":
# Input dimensions
batch, seq_len, embedding_dim = 2, 1024, 768
hipdnn_data_type = hipdnn.data_type.FLOAT
torch_data_type = torch.float32
torch_tensor_x = torch.rand(batch, seq_len, embedding_dim, dtype=torch_data_type, device="cuda")
torch_tensor_dy = torch.rand(
batch, seq_len, embedding_dim, dtype=torch_data_type, device="cuda"
)
torch_tensor_scale = torch.rand(embedding_dim, dtype=torch_data_type, device="cuda")
torch_tensor_mean = torch.rand(batch * seq_len, dtype=torch_data_type, device="cuda")
torch_tensor_inv_var = torch.rand(batch * seq_len, dtype=torch_data_type, device="cuda")
hipdnn_handle = hipdnn.create_handle()
(
graph,
hipdnn_tensor_x,
hipdnn_tensor_dy,
hipdnn_tensor_scale,
hipdnn_tensor_mean,
hipdnn_tensor_inv_var,
hipdnn_tensor_dx,
hipdnn_tensor_dbias,
hipdnn_tensor_dscale,
) = build_layernorm_backward_graph(
hipdnn_handle,
torch_tensor_x,
torch_tensor_dy,
torch_tensor_scale,
torch_tensor_mean,
torch_tensor_inv_var,
hipdnn_data_type,
)
torch_tensor_dx = torch.empty(hipdnn_tensor_dx.get_dim(), dtype=torch_data_type, device="cuda")
torch_tensor_dbias = torch.empty(
hipdnn_tensor_dbias.get_dim(), dtype=torch_data_type, device="cuda"
)
torch_tensor_dscale = torch.empty(
hipdnn_tensor_dscale.get_dim(), dtype=torch_data_type, device="cuda"
)
variant_pack = {
hipdnn_tensor_x: torch_tensor_x.data_ptr(),
hipdnn_tensor_dy: torch_tensor_dy.data_ptr(),
hipdnn_tensor_scale: torch_tensor_scale.data_ptr(),
hipdnn_tensor_mean: torch_tensor_mean.data_ptr(),
hipdnn_tensor_inv_var: torch_tensor_inv_var.data_ptr(),
hipdnn_tensor_dx: torch_tensor_dx.data_ptr(),
hipdnn_tensor_dbias: torch_tensor_dbias.data_ptr(),
hipdnn_tensor_dscale: torch_tensor_dscale.data_ptr(),
}
workspace = torch.empty(graph.get_workspace_size(), dtype=torch.uint8, device="cuda")
graph.exec(variant_pack=variant_pack, workspace=workspace.data_ptr())
print("layernorm backward graph execution complete.")
import hipdnn
import torch
def build_matmul_graph(
hipdnn_handle,
torch_tensor_inputA,
torch_tensor_inputB,
hipdnn_data_type,
):
# Create graph
graph = hipdnn.pygraph(
handle=hipdnn_handle,
io_data_type=hipdnn_data_type,
intermediate_data_type=hipdnn.data_type.FLOAT,
compute_data_type=hipdnn.data_type.FLOAT,
name="matmul",
)
# Create hipdnn tensors
hipdnn_tensor_inputA = graph.tensor_like(torch_tensor_inputA)
hipdnn_tensor_inputB = graph.tensor_like(torch_tensor_inputB)
# Create matmul
hipdnn_tensor_y = graph.matmul(
a=hipdnn_tensor_inputA,
b=hipdnn_tensor_inputB,
name="matmul",
)
hipdnn_tensor_y.set_output(True)
graph.build(hipdnn_handle)
return (graph, hipdnn_tensor_inputA, hipdnn_tensor_inputB, hipdnn_tensor_y)
if __name__ == "__main__":
# Input dimensions
b = 2 # Batch size
n = 16 # Height
m = 32 # Width
hipdnn_data_type = hipdnn.data_type.HALF
torch_data_type = torch.float16
torch_tensor_inputA = torch.rand(b, n, m, dtype=torch_data_type, device="cuda")
torch_tensor_inputB = torch.rand(b, m, n, dtype=torch_data_type, device="cuda")
hipdnn_handle = hipdnn.create_handle()
graph, hipdnn_tensor_inputA, hipdnn_tensor_inputB, hipdnn_tensor_y = build_matmul_graph(
hipdnn_handle,
torch_tensor_inputA,
torch_tensor_inputB,
hipdnn_data_type,
)
torch_tensor_y = torch.empty(
hipdnn_tensor_y.get_dim(),
dtype=torch_data_type,
device="cuda",
)
variant_pack = {
hipdnn_tensor_inputA: torch_tensor_inputA.data_ptr(),
hipdnn_tensor_inputB: torch_tensor_inputB.data_ptr(),
hipdnn_tensor_y: torch_tensor_y.data_ptr(),
}
workspace = torch.empty(graph.get_workspace_size(), dtype=torch.uint8, device="cuda")
graph.exec(variant_pack=variant_pack, workspace=workspace.data_ptr())
print("matmul graph execution complete.")
import hipdnn
import torch
def build_matmul_bias_graph(
hipdnn_handle,
torch_tensor_inputA,
torch_tensor_inputB,
torch_tensor_bias,
hipdnn_data_type,
):
# Create graph
graph = hipdnn.pygraph(
handle=hipdnn_handle,
io_data_type=hipdnn_data_type,
intermediate_data_type=hipdnn.data_type.FLOAT,
compute_data_type=hipdnn.data_type.FLOAT,
name="matmul_bias",
)
# Create hipdnn tensors
hipdnn_tensor_inputA = graph.tensor_like(torch_tensor_inputA)
hipdnn_tensor_inputB = graph.tensor_like(torch_tensor_inputB)
hipdnn_tensor_bias = graph.tensor_like(torch_tensor_bias)
# Create matmul
hipdnn_tensor_matmul_output = graph.matmul(
a=hipdnn_tensor_inputA,
b=hipdnn_tensor_inputB,
name="matmul",
)
# Create bias
hipdnn_tensor_y = graph.add(a=hipdnn_tensor_matmul_output, b=hipdnn_tensor_bias, name="bias")
hipdnn_tensor_y.set_output(True)
graph.build(hipdnn_handle)
return (graph, hipdnn_tensor_inputA, hipdnn_tensor_inputB, hipdnn_tensor_bias, hipdnn_tensor_y)
if __name__ == "__main__":
# Input dimensions
b = 2 # Batch size
n = 16 # Height
m = 32 # Width
hipdnn_data_type = hipdnn.data_type.HALF
torch_data_type = torch.float16
torch_tensor_inputA = torch.rand(b, n, m, dtype=torch_data_type, device="cuda")
torch_tensor_inputB = torch.rand(b, m, n, dtype=torch_data_type, device="cuda")
torch_tensor_bias = torch.rand(1, 1, n, dtype=torch_data_type, device="cuda")
hipdnn_handle = hipdnn.create_handle()
graph, hipdnn_tensor_inputA, hipdnn_tensor_inputB, hipdnn_tensor_bias, hipdnn_tensor_y = (
build_matmul_bias_graph(
hipdnn_handle,
torch_tensor_inputA,
torch_tensor_inputB,
torch_tensor_bias,
hipdnn_data_type,
)
)
torch_tensor_y = torch.empty(
hipdnn_tensor_y.get_dim(),
dtype=torch_data_type,
device="cuda",
)
variant_pack = {
hipdnn_tensor_inputA: torch_tensor_inputA.data_ptr(),
hipdnn_tensor_inputB: torch_tensor_inputB.data_ptr(),
hipdnn_tensor_bias: torch_tensor_bias.data_ptr(),
hipdnn_tensor_y: torch_tensor_y.data_ptr(),
}
workspace = torch.empty(graph.get_workspace_size(), dtype=torch.uint8, device="cuda")
graph.exec(variant_pack=variant_pack, workspace=workspace.data_ptr())
print("matmul_bias graph execution complete.")
import hipdnn
import torch
def build_matmul_bias_graph(
hipdnn_handle,
torch_tensor_inputA,
torch_tensor_inputB,
torch_tensor_bias,
hipdnn_data_type,
):
# Create graph
graph = hipdnn.pygraph(
handle=hipdnn_handle,
io_data_type=hipdnn_data_type,
intermediate_data_type=hipdnn.data_type.FLOAT,
compute_data_type=hipdnn.data_type.FLOAT,
name="matmul_bias",
)
# Create hipdnn tensors
hipdnn_tensor_inputA = graph.tensor_like(torch_tensor_inputA)
hipdnn_tensor_inputB = graph.tensor_like(torch_tensor_inputB)
hipdnn_tensor_bias = graph.tensor_like(torch_tensor_bias)
# Create matmul
hipdnn_tensor_matmul_output = graph.matmul(
a=hipdnn_tensor_inputA,
b=hipdnn_tensor_inputB,
name="matmul",
)
# Create bias
hipdnn_tensor_bias_out = graph.add(
a=hipdnn_tensor_matmul_output, b=hipdnn_tensor_bias, name="bias"
)
# Create relu
hipdnn_tensor_y = graph.relu(input=hipdnn_tensor_bias_out, lower_clip=0.0, name="relu")
hipdnn_tensor_y.set_output(True)
graph.build(hipdnn_handle)
return (graph, hipdnn_tensor_inputA, hipdnn_tensor_inputB, hipdnn_tensor_bias, hipdnn_tensor_y)
if __name__ == "__main__":
# Input dimensions
b = 2 # Batch size
n = 16 # Height
m = 32 # Width
hipdnn_data_type = hipdnn.data_type.HALF
torch_data_type = torch.float16
torch_tensor_inputA = torch.rand(b, n, m, dtype=torch_data_type, device="cuda")
torch_tensor_inputB = torch.rand(b, m, n, dtype=torch_data_type, device="cuda")
torch_tensor_bias = torch.rand(1, 1, n, dtype=torch_data_type, device="cuda")
hipdnn_handle = hipdnn.create_handle()
graph, hipdnn_tensor_inputA, hipdnn_tensor_inputB, hipdnn_tensor_bias, hipdnn_tensor_y = (
build_matmul_bias_graph(
hipdnn_handle,
torch_tensor_inputA,
torch_tensor_inputB,
torch_tensor_bias,
hipdnn_data_type,
)
)
torch_tensor_y = torch.empty(
hipdnn_tensor_y.get_dim(),
dtype=torch_data_type,
device="cuda",
)
variant_pack = {
hipdnn_tensor_inputA: torch_tensor_inputA.data_ptr(),
hipdnn_tensor_inputB: torch_tensor_inputB.data_ptr(),
hipdnn_tensor_bias: torch_tensor_bias.data_ptr(),
hipdnn_tensor_y: torch_tensor_y.data_ptr(),
}
workspace = torch.empty(graph.get_workspace_size(), dtype=torch.uint8, device="cuda")
graph.exec(variant_pack=variant_pack, workspace=workspace.data_ptr())
print("matmul_bias_relu graph execution complete.")
import hipdnn
import torch
def build_multi_margin_loss_graph(
hipdnn_handle, torch_tensor_input, torch_tensor_target, torch_tensor_weight, hipdnn_data_type
):
graph = hipdnn.pygraph(
handle=hipdnn_handle,
io_data_type=hipdnn_data_type,
intermediate_data_type=hipdnn.data_type.FLOAT,
compute_data_type=hipdnn.data_type.FLOAT,
name="multi_margin_loss",
)
hipdnn_tensor_input = graph.tensor_like(torch_tensor_input)
hipdnn_tensor_target = graph.tensor_like(torch_tensor_target)
hipdnn_tensor_weight = graph.tensor_like(torch_tensor_weight)
hipdnn_tensor_output = graph.multi_margin_loss(
input=hipdnn_tensor_input,
target=hipdnn_tensor_target,
weight=hipdnn_tensor_weight,
p=1,
margin=1.0,
reduction=hipdnn.reduction_mode.AVG,
name="multi_margin_loss",
)
hipdnn_tensor_output.set_output(True)
graph.build(hipdnn_handle)
return (
graph,
hipdnn_tensor_input,
hipdnn_tensor_target,
hipdnn_tensor_weight,
hipdnn_tensor_output,
)
if __name__ == "__main__":
batch, num_classes = 4, 10
hipdnn_data_type = hipdnn.data_type.FLOAT
torch_data_type = torch.float32
torch_tensor_input = torch.rand(batch, num_classes, dtype=torch_data_type, device="cuda")
torch_tensor_target = torch.randint(0, num_classes, (batch,), dtype=torch.int64, device="cuda")
torch_tensor_weight = torch.ones(num_classes, device="cuda")
hipdnn_handle = hipdnn.create_handle()
graph, hipdnn_tensor_input, hipdnn_tensor_target, hipdnn_tensor_weight, hipdnn_tensor_output = (
build_multi_margin_loss_graph(
hipdnn_handle,
torch_tensor_input,
torch_tensor_target,
torch_tensor_weight,
hipdnn_data_type,
)
)
torch_tensor_output = torch.empty(batch, dtype=torch_data_type, device="cuda")
variant_pack = {
hipdnn_tensor_input: torch_tensor_input.data_ptr(),
hipdnn_tensor_target: torch_tensor_target.data_ptr(),
hipdnn_tensor_weight: torch_tensor_weight.data_ptr(),
hipdnn_tensor_output: torch_tensor_output.data_ptr(),
}
workspace = torch.empty(graph.get_workspace_size(), dtype=torch.uint8, device="cuda")
graph.exec(variant_pack=variant_pack, workspace=workspace.data_ptr())
print("multi_margin_loss graph execution complete.")
import hipdnn
import torch
def build_binary_pointwise_graph(
hipdnn_handle,
torch_tensor_in0,
torch_tensor_in1,
hipdnn_data_type,
):
# Create graph
graph = hipdnn.pygraph(
handle=hipdnn_handle,
io_data_type=hipdnn_data_type,
intermediate_data_type=hipdnn.data_type.FLOAT,
compute_data_type=hipdnn.data_type.FLOAT,
name="add_graph",
)
# Create hipdnn tensors
hipdnn_tensor_in0 = graph.tensor_like(torch_tensor_in0)
hipdnn_tensor_in1 = graph.tensor_like(torch_tensor_in1)
# Using the add op as an example, other binary pointwise ops can be used similarly.
# Create binary pointwise ADD op
hipdnn_tensor_out = graph.add(
hipdnn_tensor_in0,
hipdnn_tensor_in1,
hipdnn.data_type.FLOAT,
"add_node",
)
hipdnn_tensor_out.set_output(True)
graph.build(hipdnn_handle)
return (graph, hipdnn_tensor_in0, hipdnn_tensor_in1, hipdnn_tensor_out)
if __name__ == "__main__":
# Input dimensions
n = 8 # Batch size
c = 32 # Number of channels
h = 16 # Height
w = 16 # Width
hipdnn_data_type = hipdnn.data_type.FLOAT
torch_data_type = torch.float32
torch_tensor_x = torch.rand(n, c, h, w, dtype=torch_data_type, device="cuda")
torch_tensor_b = torch.rand(n, c, h, w, dtype=torch_data_type, device="cuda")
hipdnn_handle = hipdnn.create_handle()
graph, hipdnn_tensor_in0, hipdnn_tensor_in1, hipdnn_tensor_out = build_binary_pointwise_graph(
hipdnn_handle,
torch_tensor_x,
torch_tensor_b,
hipdnn_data_type,
)
torch_tensor_y = torch.empty(hipdnn_tensor_out.get_dim(), dtype=torch_data_type, device="cuda")
variant_pack = {
hipdnn_tensor_in0: torch_tensor_x.data_ptr(),
hipdnn_tensor_in1: torch_tensor_b.data_ptr(),
hipdnn_tensor_out: torch_tensor_y.data_ptr(),
}
workspace = torch.empty(graph.get_workspace_size(), dtype=torch.uint8, device="cuda")
graph.exec(variant_pack=variant_pack, workspace=workspace.data_ptr())
print("Binary pointwise ADD graph execution complete.")
import hipdnn
import torch
def build_prelu_backward_graph(
hipdnn_handle,
torch_tensor_x,
torch_tensor_dy,
torch_tensor_weight,
negative_slope,
hipdnn_data_type,
):
# Create graph
graph = hipdnn.pygraph(
handle=hipdnn_handle,
io_data_type=hipdnn_data_type,
intermediate_data_type=hipdnn.data_type.FLOAT,
compute_data_type=hipdnn.data_type.FLOAT,
name="convolution_forward",
)
# Create hipdnn tensors
hipdnn_tensor_x = graph.tensor_like(torch_tensor_x)
hipdnn_tensor_dy = graph.tensor_like(torch_tensor_dy)
hipdnn_tensor_weight = graph.tensor_like(torch_tensor_weight)
# Create prelu op
hipdnn_tensor_dx, hipdnn_tensor_dweight = graph.prelu_backward(
input=hipdnn_tensor_x,
weight=hipdnn_tensor_weight,
loss=hipdnn_tensor_dy,
negative_slope=negative_slope,
name="prelu_backward",
)
hipdnn_tensor_dx.set_output(True)
hipdnn_tensor_dweight.set_output(True)
graph.build(hipdnn_handle)
return (
graph,
hipdnn_tensor_x,
hipdnn_tensor_dy,
hipdnn_tensor_weight,
hipdnn_tensor_dx,
hipdnn_tensor_dweight,
)
if __name__ == "__main__":
# Input dimensions
batch, channels, height, width = 128, 64, 112, 112
hipdnn_data_type = hipdnn.data_type.FLOAT
torch_data_type = torch.float32
torch_tensor_x = torch.rand(
batch, channels, height, width, dtype=torch_data_type, device="cuda"
)
torch_tensor_dy = torch.rand(
batch, channels, height, width, dtype=torch_data_type, device="cuda"
)
torch_tensor_weight = torch.rand(channels, dtype=torch_data_type, device="cuda")
negative_slope = 0.1
hipdnn_handle = hipdnn.create_handle()
(
graph,
hipdnn_tensor_x,
hipdnn_tensor_dy,
hipdnn_tensor_weight,
hipdnn_tensor_dx,
hipdnn_tensor_dweight,
) = build_prelu_backward_graph(
hipdnn_handle,
torch_tensor_x,
torch_tensor_dy,
torch_tensor_weight,
negative_slope,
hipdnn_data_type,
)
torch_tensor_dx = torch.empty(hipdnn_tensor_dx.get_dim(), dtype=torch_data_type, device="cuda")
torch_tensor_dweight = torch.empty(
hipdnn_tensor_dweight.get_dim(), dtype=torch_data_type, device="cuda"
)
variant_pack = {
hipdnn_tensor_x: torch_tensor_x.data_ptr(),
hipdnn_tensor_dy: torch_tensor_dy.data_ptr(),
hipdnn_tensor_weight: torch_tensor_weight.data_ptr(),
hipdnn_tensor_dx: torch_tensor_dx.data_ptr(),
hipdnn_tensor_dweight: torch_tensor_dweight.data_ptr(),
}
workspace = torch.empty(graph.get_workspace_size(), dtype=torch.uint8, device="cuda")
graph.exec(variant_pack=variant_pack, workspace=workspace.data_ptr())
print("Prelu backward graph execution complete.")
import hipdnn
import torch
def build_reduction_graph(hipdnn_handle, torch_tensor_x, mode, y_dims, hipdnn_data_type):
# Create graph
graph = hipdnn.pygraph(
handle=hipdnn_handle,
io_data_type=hipdnn_data_type,
intermediate_data_type=hipdnn.data_type.FLOAT,
compute_data_type=hipdnn.data_type.FLOAT,
name="reduction_inference",
)
# Create hipdnn tensors
hipdnn_tensor_x = graph.tensor_like(torch_tensor_x)
# Create op
hipdnn_tensor_y = graph.reduction(
input=hipdnn_tensor_x,
mode=mode,
compute_data_type=hipdnn.data_type.FLOAT,
name="reduction",
)
hipdnn_tensor_y.set_dim(y_dims).set_output(True)
graph.build(hipdnn_handle)
return (graph, hipdnn_tensor_x, hipdnn_tensor_y)
if __name__ == "__main__":
# Input dimensions
batch = 2 # Batch size
seq_len = 1024 # Number of seq
embedding_dim = 768 # Number of feature
mode = hipdnn.reduction_mode.ADD # Mode
hipdnn_data_type = hipdnn.data_type.FLOAT
torch_data_type = torch.float32
torch_tensor_x = torch.rand(batch, seq_len, embedding_dim, dtype=torch_data_type, device="cuda")
hipdnn_handle = hipdnn.create_handle()
graph, hipdnn_tensor_x, hipdnn_tensor_y = build_reduction_graph(
hipdnn_handle, torch_tensor_x, mode, [batch, seq_len, 1], hipdnn_data_type
)
torch_tensor_y = torch.empty(hipdnn_tensor_y.get_dim(), dtype=torch_data_type, device="cuda")
variant_pack = {
hipdnn_tensor_x: torch_tensor_x.data_ptr(),
hipdnn_tensor_y: torch_tensor_y.data_ptr(),
}
workspace = torch.empty(graph.get_workspace_size(), dtype=torch.uint8, device="cuda")
graph.exec(variant_pack=variant_pack, workspace=workspace.data_ptr())
print("reduction graph execution complete.")
import hipdnn
import torch
def build_resample_graph(
hipdnn_handle,
torch_tensor_x,
scale,
resample_mode,
coordinate_transform_mode,
generate_index,
hipdnn_data_type,
):
# Create graph
graph = hipdnn.pygraph(
handle=hipdnn_handle,
io_data_type=hipdnn_data_type,
intermediate_data_type=hipdnn.data_type.FLOAT,
compute_data_type=hipdnn.data_type.FLOAT,
name="resample",
)
# Create hipdnn tensors
hipdnn_tensor_x = graph.tensor_like(torch_tensor_x)
# Create resample op
hipdnn_tensor_y, _ = graph.resample(
x=hipdnn_tensor_x,
scale=scale,
resample_mode=resample_mode,
coordinate_transform_mode=coordinate_transform_mode,
generate_index=generate_index,
name="resample",
)
hipdnn_tensor_y.set_output(True)
graph.build(hipdnn_handle)
return (graph, hipdnn_tensor_x, hipdnn_tensor_y)
if __name__ == "__main__":
# Input dimensions
n = 4 # Batch size
c = 16 # Number of input channels
h = 8 # Height
w = 8 # Width
scale = [3, 3]
resample_mode = hipdnn.resample_mode.BILINEAR
coordinate_transform_mode = hipdnn.coordinate_transform_mode.COORDINATE_ASYMMETRIC
generate_index = False
hipdnn_data_type = hipdnn.data_type.HALF
torch_data_type = torch.float16
torch_tensor_x = torch.rand(n, c, h, w, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
hipdnn_handle = hipdnn.create_handle()
graph, hipdnn_tensor_x, hipdnn_tensor_y = build_resample_graph(
hipdnn_handle,
torch_tensor_x,
scale,
resample_mode,
coordinate_transform_mode,
generate_index,
hipdnn_data_type,
)
torch_tensor_y = torch.empty(hipdnn_tensor_y.get_dim(), dtype=torch_data_type, device="cuda")
variant_pack = {
hipdnn_tensor_x: torch_tensor_x.data_ptr(),
hipdnn_tensor_y: torch_tensor_y.data_ptr(),
}
workspace = torch.empty(graph.get_workspace_size(), dtype=torch.uint8, device="cuda")
graph.exec(variant_pack=variant_pack, workspace=workspace.data_ptr())
print("Resample graph execution complete.")
import hipdnn
import torch
def build_rmsnorm_graph(
hipdnn_handle,
torch_tensor_x,
torch_tensor_scale,
torch_tensor_bias,
torch_tensor_epsilon,
norm_forward_phase,
eps,
hipdnn_data_type,
):
# Create graph
graph = hipdnn.pygraph(
handle=hipdnn_handle,
io_data_type=hipdnn_data_type,
intermediate_data_type=hipdnn.data_type.FLOAT,
compute_data_type=hipdnn.data_type.FLOAT,
name="rmsnorm_inference",
)
# Create hipdnn tensors
hipdnn_tensor_x = graph.tensor_like(torch_tensor_x)
hipdnn_tensor_scale = graph.tensor_like(torch_tensor_scale)
hipdnn_tensor_bias = graph.tensor_like(torch_tensor_bias)
hipdnn_tensor_epsilon = graph.tensor_like(torch_tensor_epsilon)
hipdnn_tensor_epsilon.set_value(eps)
# Create op
hipdnn_tensor_y, hipdnn_tensor_inv_var = graph.rmsnorm(
norm_forward_phase=norm_forward_phase,
input=hipdnn_tensor_x,
scale=hipdnn_tensor_scale,
bias=hipdnn_tensor_bias,
epsilon=hipdnn_tensor_epsilon,
compute_data_type=hipdnn.data_type.FLOAT,
name="rmsnorm",
)
hipdnn_tensor_y.set_output(True)
hipdnn_tensor_inv_var.set_output(True)
graph.build(hipdnn_handle)
return (graph, hipdnn_tensor_x, hipdnn_tensor_scale, hipdnn_tensor_y, hipdnn_tensor_inv_var)
if __name__ == "__main__":
# Input dimensions
batch = 2 # Batch size
seq_len = 1024 # Number of input channels
embedding_dim = 768 # Number of feature
norm_forward_phase = hipdnn.norm_forward_phase.TRAINING # Norm forward phase
eps = 1e-5
hipdnn_data_type = hipdnn.data_type.FLOAT
torch_data_type = torch.float32
torch_tensor_x = torch.rand(batch, seq_len, embedding_dim, dtype=torch_data_type, device="cuda")
torch_tensor_scale = torch.rand(1, 1, embedding_dim, dtype=torch_data_type, device="cuda")
torch_tensor_bias = torch.rand(1, 1, embedding_dim, dtype=torch_data_type, device="cuda")
torch_tensor_epsilon = torch.full(
(1, 1, 1, 1), eps, dtype=torch.float32, requires_grad=False, device="cpu"
)
hipdnn_handle = hipdnn.create_handle()
graph, hipdnn_tensor_x, hipdnn_tensor_scale, hipdnn_tensor_y, hipdnn_tensor_inv_var = (
build_rmsnorm_graph(
hipdnn_handle,
torch_tensor_x,
torch_tensor_scale,
torch_tensor_bias,
torch_tensor_epsilon,
norm_forward_phase,
eps,
hipdnn_data_type,
)
)
torch_tensor_y = torch.empty(hipdnn_tensor_y.get_dim(), dtype=torch_data_type, device="cuda")
torch_tensor_inv_var = torch.empty(
hipdnn_tensor_inv_var.get_dim(), dtype=torch_data_type, device="cuda"
)
variant_pack = {
hipdnn_tensor_x: torch_tensor_x.data_ptr(),
hipdnn_tensor_scale: torch_tensor_scale.data_ptr(),
hipdnn_tensor_inv_var: torch_tensor_inv_var.data_ptr(),
hipdnn_tensor_y: torch_tensor_y.data_ptr(),
}
workspace = torch.empty(graph.get_workspace_size(), dtype=torch.uint8, device="cuda")
graph.exec(variant_pack=variant_pack, workspace=workspace.data_ptr())
print("rmsnorm graph execution complete.")
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment