Commit ca34d4d2 authored by yanjl1's avatar yanjl1
Browse files

Initial

parents
import hipdnn
import torch
def build_concat_conv_bias_add_graph(
hipdnn_handle,
torch_tensor_x1,
torch_tensor_x2,
torch_tensor_w,
torch_tensor_bias,
torch_tensor_add,
padding,
stride,
dilation,
hipdnn_data_type,
concat_axis,
):
# Create graph
graph = hipdnn.pygraph(
handle=hipdnn_handle,
io_data_type=hipdnn_data_type,
intermediate_data_type=hipdnn.data_type.FLOAT,
compute_data_type=hipdnn.data_type.FLOAT,
name="concat_conv_bias_add",
)
# Create hipdnn tensors
hipdnn_tensor_x1 = graph.tensor_like(torch_tensor_x1)
hipdnn_tensor_x2 = graph.tensor_like(torch_tensor_x2)
hipdnn_tensor_w = graph.tensor_like(torch_tensor_w)
hipdnn_tensor_bias = graph.tensor_like(torch_tensor_bias)
hipdnn_tensor_add = graph.tensor_like(torch_tensor_add)
# Create concatenate op
hipdnn_tensor_concat_output = graph.concatenate(
x=[hipdnn_tensor_x1, hipdnn_tensor_x2], axis=concat_axis, name="concatenate"
)
# Create conv op
hipdnn_tensor_conv_output = graph.conv_fprop(
image=hipdnn_tensor_concat_output,
weight=hipdnn_tensor_w,
padding=padding,
stride=stride,
dilation=dilation,
name="conv2d",
)
# Create bias
hipdnn_tensor_bias_output = graph.add(
a=hipdnn_tensor_conv_output, b=hipdnn_tensor_bias, name="bias"
)
# Create add
hipdnn_tensor_y = graph.add(a=hipdnn_tensor_bias_output, b=hipdnn_tensor_add, name="add")
hipdnn_tensor_y.set_output(True)
graph.build(hipdnn_handle)
return (
graph,
hipdnn_tensor_x1,
hipdnn_tensor_x2,
hipdnn_tensor_w,
hipdnn_tensor_bias,
hipdnn_tensor_add,
hipdnn_tensor_y,
)
if __name__ == "__main__":
# Input dimensions
n = 1
c = 32
h = 128
w = 128
# Filter dimensions
k = 32
r = 3
s = 3
# Convolution parameters
stride_h = 1 # Height stride
stride_w = 1 # Width stride
pad_h = 1 # Height padding
pad_w = 1 # Width padding
dil_h = 1 # Height dilation
dil_w = 1 # Width dilation
hipdnn_data_type = hipdnn.data_type.HALF
torch_data_type = torch.float16
concat_axis = 1
torch_tensor_x1 = torch.rand(n, c, h, w, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
torch_tensor_x2 = torch.rand(n, c, h, w, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
torch_tensor_w = torch.rand(k, 2 * c, r, s, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
torch_tensor_bias = torch.rand(1, k, 1, 1, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
torch_tensor_add = torch.rand(
n,
k,
h,
w,
dtype=torch_data_type,
device="cuda",
).to(memory_format=torch.channels_last)
hipdnn_handle = hipdnn.create_handle()
(
graph,
hipdnn_tensor_x1,
hipdnn_tensor_x2,
hipdnn_tensor_w,
hipdnn_tensor_bias,
hipdnn_tensor_add,
hipdnn_tensor_y,
) = build_concat_conv_bias_add_graph(
hipdnn_handle,
torch_tensor_x1,
torch_tensor_x2,
torch_tensor_w,
torch_tensor_bias,
torch_tensor_add,
[pad_h, pad_w],
[stride_h, stride_w],
[dil_h, dil_w],
hipdnn_data_type,
concat_axis,
)
torch_tensor_y = torch.empty(hipdnn_tensor_y.get_dim(), dtype=torch_data_type, device="cuda")
variant_pack = {
hipdnn_tensor_x1: torch_tensor_x1.data_ptr(),
hipdnn_tensor_x2: torch_tensor_x2.data_ptr(),
hipdnn_tensor_w: torch_tensor_w.data_ptr(),
hipdnn_tensor_bias: torch_tensor_bias.data_ptr(),
hipdnn_tensor_add: torch_tensor_add.data_ptr(),
hipdnn_tensor_y: torch_tensor_y.data_ptr(),
}
workspace = torch.empty(graph.get_workspace_size(), dtype=torch.uint8, device="cuda")
graph.exec(variant_pack=variant_pack, workspace=workspace.data_ptr())
print("Concat_conv_bias_add graph execution complete.")
import hipdnn
import torch
def build_concat_conv_bias_relu_graph(
hipdnn_handle,
torch_tensor_x1,
torch_tensor_x2,
torch_tensor_w,
torch_tensor_bias,
padding,
stride,
dilation,
hipdnn_data_type,
concat_axis,
):
# Create graph
graph = hipdnn.pygraph(
handle=hipdnn_handle,
io_data_type=hipdnn_data_type,
intermediate_data_type=hipdnn.data_type.FLOAT,
compute_data_type=hipdnn.data_type.FLOAT,
name="concat_conv_bias_relu",
)
# Create hipdnn tensors
hipdnn_tensor_x1 = graph.tensor_like(torch_tensor_x1)
hipdnn_tensor_x2 = graph.tensor_like(torch_tensor_x2)
hipdnn_tensor_w = graph.tensor_like(torch_tensor_w)
hipdnn_tensor_bias = graph.tensor_like(torch_tensor_bias)
# Create concatenate op
hipdnn_tensor_concat_output = graph.concatenate(
x=[hipdnn_tensor_x1, hipdnn_tensor_x2], axis=concat_axis, name="concatenate"
)
# Create conv op
hipdnn_tensor_conv_output = graph.conv_fprop(
image=hipdnn_tensor_concat_output,
weight=hipdnn_tensor_w,
padding=padding,
stride=stride,
dilation=dilation,
name="conv2d",
)
# Create bias
hipdnn_tensor_bias_output = graph.add(
a=hipdnn_tensor_conv_output, b=hipdnn_tensor_bias, name="bias"
)
# Create relu
hipdnn_tensor_y = graph.relu(input=hipdnn_tensor_bias_output, name="relu")
hipdnn_tensor_y.set_output(True)
graph.build(hipdnn_handle)
return (
graph,
hipdnn_tensor_x1,
hipdnn_tensor_x2,
hipdnn_tensor_w,
hipdnn_tensor_bias,
hipdnn_tensor_y,
)
if __name__ == "__main__":
# Input dimensions
n = 1
c = 32
h = 128
w = 128
# Filter dimensions
k = 32
r = 2
s = 2
# Convolution parameters
stride_h = 1 # Height stride
stride_w = 1 # Width stride
pad_h = 1 # Height padding
pad_w = 1 # Width padding
dil_h = 1 # Height dilation
dil_w = 1 # Width dilation
hipdnn_data_type = hipdnn.data_type.HALF
torch_data_type = torch.float16
concat_axis = 1
torch_tensor_x1 = torch.rand(n, c, h, w, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
torch_tensor_x2 = torch.rand(n, c, h, w, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
torch_tensor_w = torch.rand(k, 2 * c, r, s, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
torch_tensor_bias = torch.rand(1, k, 1, 1, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
hipdnn_handle = hipdnn.create_handle()
(
graph,
hipdnn_tensor_x1,
hipdnn_tensor_x2,
hipdnn_tensor_w,
hipdnn_tensor_bias,
hipdnn_tensor_y,
) = build_concat_conv_bias_relu_graph(
hipdnn_handle,
torch_tensor_x1,
torch_tensor_x2,
torch_tensor_w,
torch_tensor_bias,
[pad_h, pad_w],
[stride_h, stride_w],
[dil_h, dil_w],
hipdnn_data_type,
concat_axis,
)
torch_tensor_y = torch.empty(hipdnn_tensor_y.get_dim(), dtype=torch_data_type, device="cuda")
variant_pack = {
hipdnn_tensor_x1: torch_tensor_x1.data_ptr(),
hipdnn_tensor_x2: torch_tensor_x2.data_ptr(),
hipdnn_tensor_w: torch_tensor_w.data_ptr(),
hipdnn_tensor_bias: torch_tensor_bias.data_ptr(),
hipdnn_tensor_y: torch_tensor_y.data_ptr(),
}
workspace = torch.empty(graph.get_workspace_size(), dtype=torch.uint8, device="cuda")
graph.exec(variant_pack=variant_pack, workspace=workspace.data_ptr())
print("Concat_conv_bias_relu graph execution complete.")
import hipdnn
import torch
def build_concat_conv_bias_relu_add_graph(
hipdnn_handle,
torch_tensor_x1,
torch_tensor_x2,
torch_tensor_w,
torch_tensor_bias,
torch_tensor_add,
padding,
stride,
dilation,
hipdnn_data_type,
concat_axis,
):
# Create graph
graph = hipdnn.pygraph(
handle=hipdnn_handle,
io_data_type=hipdnn_data_type,
intermediate_data_type=hipdnn.data_type.FLOAT,
compute_data_type=hipdnn.data_type.FLOAT,
name="concat_conv_bias_relu_add",
)
# Create hipdnn tensors
hipdnn_tensor_x1 = graph.tensor_like(torch_tensor_x1)
hipdnn_tensor_x2 = graph.tensor_like(torch_tensor_x2)
hipdnn_tensor_w = graph.tensor_like(torch_tensor_w)
hipdnn_tensor_bias = graph.tensor_like(torch_tensor_bias)
hipdnn_tensor_add = graph.tensor_like(torch_tensor_add)
# Create concatenate op
hipdnn_tensor_concat_output = graph.concatenate(
x=[hipdnn_tensor_x1, hipdnn_tensor_x2], axis=concat_axis, name="concatenate"
)
# Create conv op
hipdnn_tensor_conv_output = graph.conv_fprop(
image=hipdnn_tensor_concat_output,
weight=hipdnn_tensor_w,
padding=padding,
stride=stride,
dilation=dilation,
name="conv2d",
)
# Create bias
hipdnn_tensor_bias_output = graph.add(
a=hipdnn_tensor_conv_output, b=hipdnn_tensor_bias, name="bias"
)
# Create relu
hipdnn_tensor_relu_output = graph.relu(input=hipdnn_tensor_bias_output, name="relu")
# Create add
hipdnn_tensor_y = graph.add(a=hipdnn_tensor_relu_output, b=hipdnn_tensor_add, name="add")
hipdnn_tensor_y.set_output(True)
graph.build(hipdnn_handle)
return (
graph,
hipdnn_tensor_x1,
hipdnn_tensor_x2,
hipdnn_tensor_w,
hipdnn_tensor_bias,
hipdnn_tensor_add,
hipdnn_tensor_y,
)
if __name__ == "__main__":
# Input dimensions
n = 1
c = 32
h = 128
w = 128
# Filter dimensions
k = 32
r = 3
s = 3
# Convolution parameters
stride_h = 1 # Height stride
stride_w = 1 # Width stride
pad_h = 1 # Height padding
pad_w = 1 # Width padding
dil_h = 1 # Height dilation
dil_w = 1 # Width dilation
hipdnn_data_type = hipdnn.data_type.HALF
torch_data_type = torch.float16
concat_axis = 1
torch_tensor_x1 = torch.rand(n, c, h, w, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
torch_tensor_x2 = torch.rand(n, c, h, w, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
torch_tensor_w = torch.rand(k, 2 * c, r, s, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
torch_tensor_bias = torch.rand(1, k, 1, 1, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
torch_tensor_add = torch.rand(
n,
k,
h,
w,
dtype=torch_data_type,
device="cuda",
).to(memory_format=torch.channels_last)
hipdnn_handle = hipdnn.create_handle()
(
graph,
hipdnn_tensor_x1,
hipdnn_tensor_x2,
hipdnn_tensor_w,
hipdnn_tensor_bias,
hipdnn_tensor_add,
hipdnn_tensor_y,
) = build_concat_conv_bias_relu_add_graph(
hipdnn_handle,
torch_tensor_x1,
torch_tensor_x2,
torch_tensor_w,
torch_tensor_bias,
torch_tensor_add,
[pad_h, pad_w],
[stride_h, stride_w],
[dil_h, dil_w],
hipdnn_data_type,
concat_axis,
)
torch_tensor_y = torch.empty(hipdnn_tensor_y.get_dim(), dtype=torch_data_type, device="cuda")
variant_pack = {
hipdnn_tensor_x1: torch_tensor_x1.data_ptr(),
hipdnn_tensor_x2: torch_tensor_x2.data_ptr(),
hipdnn_tensor_w: torch_tensor_w.data_ptr(),
hipdnn_tensor_bias: torch_tensor_bias.data_ptr(),
hipdnn_tensor_add: torch_tensor_add.data_ptr(),
hipdnn_tensor_y: torch_tensor_y.data_ptr(),
}
workspace = torch.empty(graph.get_workspace_size(), dtype=torch.uint8, device="cuda")
graph.exec(variant_pack=variant_pack, workspace=workspace.data_ptr())
print("Concat_conv_bias_relu_add graph execution complete.")
import hipdnn
import torch
def build_concatenate_graph(hipdnn_handle, torch_tensor_x1, torch_tensor_x2, hipdnn_data_type):
# Create graph
graph = hipdnn.pygraph(
handle=hipdnn_handle,
io_data_type=hipdnn_data_type,
intermediate_data_type=hipdnn.data_type.FLOAT,
compute_data_type=hipdnn.data_type.FLOAT,
name="concatenate",
)
# Create hipdnn tensors
hipdnn_tensor_x1 = graph.tensor_like(torch_tensor_x1)
hipdnn_tensor_x2 = graph.tensor_like(torch_tensor_x2)
# Create concatenate op
hipdnn_tensor_y = graph.concatenate(
x=[hipdnn_tensor_x1, hipdnn_tensor_x2], axis=0, name="concatenate"
)
hipdnn_tensor_y.set_output(True)
graph.build(hipdnn_handle)
return (graph, hipdnn_tensor_x1, hipdnn_tensor_x2, hipdnn_tensor_y)
if __name__ == "__main__":
# Input dimensions
batch, seq_len, embedding_dim = 2, 1024, 768
hipdnn_data_type = hipdnn.data_type.FLOAT
torch_data_type = torch.float32
torch_tensor_x1 = torch.rand(
batch, seq_len, embedding_dim, dtype=torch_data_type, device="cuda"
)
torch_tensor_x2 = torch.rand(
batch, seq_len, embedding_dim, dtype=torch_data_type, device="cuda"
)
hipdnn_handle = hipdnn.create_handle()
graph, hipdnn_tensor_x1, hipdnn_tensor_x2, hipdnn_tensor_y = build_concatenate_graph(
hipdnn_handle, torch_tensor_x1, torch_tensor_x2, hipdnn_data_type
)
torch_tensor_y = torch.empty(hipdnn_tensor_y.get_dim(), dtype=torch_data_type, device="cuda")
variant_pack = {
hipdnn_tensor_x1: torch_tensor_x1.data_ptr(),
hipdnn_tensor_x2: torch_tensor_x2.data_ptr(),
hipdnn_tensor_y: torch_tensor_y.data_ptr(),
}
workspace = torch.empty(graph.get_workspace_size(), dtype=torch.uint8, device="cuda")
graph.exec(variant_pack=variant_pack, workspace=workspace.data_ptr())
print("Concatenate graph execution complete.")
import hipdnn
import torch
def build_conv_genstats_graph(
hipdnn_handle, torch_tensor_x, torch_tensor_w, padding, stride, dilation, hipdnn_data_type
):
graph = hipdnn.pygraph(
handle=hipdnn_handle,
io_data_type=hipdnn_data_type,
intermediate_data_type=hipdnn.data_type.FLOAT,
compute_data_type=hipdnn.data_type.FLOAT,
name="conv_genstats",
)
hipdnn_tensor_x = graph.tensor_like(torch_tensor_x)
hipdnn_tensor_w = graph.tensor_like(torch_tensor_w)
hipdnn_tensor_y = graph.conv_fprop(
image=hipdnn_tensor_x,
weight=hipdnn_tensor_w,
padding=padding,
stride=stride,
dilation=dilation,
name="conv",
)
hipdnn_tensor_y.set_output(True)
hipdnn_tensor_sum, hipdnn_tensor_sq_sum = graph.genstats(
hipdnn_tensor_y, hipdnn.data_type.FLOAT, name="genstats"
)
hipdnn_tensor_sum.set_output(True)
hipdnn_tensor_sq_sum.set_output(True)
graph.build(hipdnn_handle)
return (
graph,
hipdnn_tensor_x,
hipdnn_tensor_w,
hipdnn_tensor_y,
hipdnn_tensor_sum,
hipdnn_tensor_sq_sum,
)
if __name__ == "__main__":
n = 4
c = 64
h = 16
w = 16
k = 32
r = 3
s = 3
stride_h = 1
stride_w = 1
pad_h = 1
pad_w = 1
dil_h = 1
dil_w = 1
hipdnn_data_type = hipdnn.data_type.FLOAT
torch_data_type = torch.float32
torch_tensor_x = torch.rand(n, c, h, w, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
torch_tensor_w = torch.rand(k, c, r, s, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
hipdnn_handle = hipdnn.create_handle()
(
graph,
hipdnn_tensor_x,
hipdnn_tensor_w,
hipdnn_tensor_y,
hipdnn_tensor_sum,
hipdnn_tensor_sq_sum,
) = build_conv_genstats_graph(
hipdnn_handle,
torch_tensor_x,
torch_tensor_w,
[pad_h, pad_w],
[stride_h, stride_w],
[dil_h, dil_w],
hipdnn_data_type,
)
torch_tensor_y = torch.empty(
hipdnn_tensor_y.get_dim(),
dtype=torch_data_type,
memory_format=torch.channels_last,
device="cuda",
)
torch_tensor_sum = torch.empty(
hipdnn_tensor_sum.get_dim(), dtype=torch_data_type, device="cuda"
)
torch_tensor_sq_sum = torch.empty(
hipdnn_tensor_sq_sum.get_dim(), dtype=torch_data_type, device="cuda"
)
variant_pack = {
hipdnn_tensor_x: torch_tensor_x.data_ptr(),
hipdnn_tensor_w: torch_tensor_w.data_ptr(),
hipdnn_tensor_y: torch_tensor_y.data_ptr(),
hipdnn_tensor_sum: torch_tensor_sum.data_ptr(),
hipdnn_tensor_sq_sum: torch_tensor_sq_sum.data_ptr(),
}
workspace = torch.empty(graph.get_workspace_size(), dtype=torch.uint8, device="cuda")
graph.exec(variant_pack=variant_pack, workspace=workspace.data_ptr())
print("conv_genstats graph execution complete.")
import hipdnn
import torch
def build_mul_mul_add_add_graph(
hipdnn_handle,
torch_tensor_a,
torch_tensor_x,
torch_tensor_b,
torch_tensor_y,
torch_tensor_bias,
hipdnn_data_type,
):
graph = hipdnn.pygraph(
handle=hipdnn_handle,
io_data_type=hipdnn_data_type,
intermediate_data_type=hipdnn.data_type.FLOAT,
compute_data_type=hipdnn.data_type.FLOAT,
name="mul_mul_add_add",
)
hipdnn_tensor_a = graph.tensor_like(torch_tensor_a)
hipdnn_tensor_x = graph.tensor_like(torch_tensor_x)
hipdnn_tensor_b = graph.tensor_like(torch_tensor_b)
hipdnn_tensor_y = graph.tensor_like(torch_tensor_y)
hipdnn_tensor_bias = graph.tensor_like(torch_tensor_bias)
hipdnn_tensor_mul0 = graph.mul(a=hipdnn_tensor_x, b=hipdnn_tensor_a, name="mul0")
hipdnn_tensor_mul1 = graph.mul(a=hipdnn_tensor_y, b=hipdnn_tensor_b, name="mul1")
hipdnn_tensor_add0 = graph.add(a=hipdnn_tensor_mul0, b=hipdnn_tensor_mul1, name="add0")
hipdnn_tensor_z = graph.add(a=hipdnn_tensor_add0, b=hipdnn_tensor_bias, name="add1")
hipdnn_tensor_z.set_output(True)
graph.build(hipdnn_handle)
return (
graph,
hipdnn_tensor_a,
hipdnn_tensor_x,
hipdnn_tensor_b,
hipdnn_tensor_y,
hipdnn_tensor_bias,
hipdnn_tensor_z,
)
if __name__ == "__main__":
n = 1
c = 4
h = 32
w = 32
hipdnn_data_type = hipdnn.data_type.FLOAT
torch_data_type = torch.float32
torch_tensor_a = torch.rand(1, c, 1, 1, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
torch_tensor_x = torch.rand(n, c, h, w, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
torch_tensor_b = torch.rand(1, c, 1, 1, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
torch_tensor_y = torch.rand(n, c, h, w, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
torch_tensor_bias = torch.rand(1, c, 1, 1, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
hipdnn_handle = hipdnn.create_handle()
(
graph,
hipdnn_tensor_a,
hipdnn_tensor_x,
hipdnn_tensor_b,
hipdnn_tensor_y,
hipdnn_tensor_bias,
hipdnn_tensor_z,
) = build_mul_mul_add_add_graph(
hipdnn_handle,
torch_tensor_a,
torch_tensor_x,
torch_tensor_b,
torch_tensor_y,
torch_tensor_bias,
hipdnn_data_type,
)
torch_tensor_z = torch.empty(
hipdnn_tensor_z.get_dim(), dtype=torch_data_type, device="cuda"
).to(memory_format=torch.channels_last)
variant_pack = {
hipdnn_tensor_a: torch_tensor_a.data_ptr(),
hipdnn_tensor_x: torch_tensor_x.data_ptr(),
hipdnn_tensor_b: torch_tensor_b.data_ptr(),
hipdnn_tensor_y: torch_tensor_y.data_ptr(),
hipdnn_tensor_bias: torch_tensor_bias.data_ptr(),
hipdnn_tensor_z: torch_tensor_z.data_ptr(),
}
workspace = torch.empty(graph.get_workspace_size(), dtype=torch.uint8, device="cuda")
graph.exec(variant_pack=variant_pack, workspace=workspace.data_ptr())
print("mul_mul_add_add graph execution complete.")
import hipdnn
import torch
def build_scale_bias_graph(
hipdnn_handle, torch_tensor_x, torch_tensor_scale, torch_tensor_bias, hipdnn_data_type
):
graph = hipdnn.pygraph(
handle=hipdnn_handle,
io_data_type=hipdnn_data_type,
intermediate_data_type=hipdnn.data_type.FLOAT,
compute_data_type=hipdnn.data_type.FLOAT,
name="scale_bias",
)
hipdnn_tensor_x = graph.tensor_like(torch_tensor_x)
hipdnn_tensor_scale = graph.tensor_like(torch_tensor_scale)
hipdnn_tensor_bias = graph.tensor_like(torch_tensor_bias)
hipdnn_tensor_scale_out = graph.mul(a=hipdnn_tensor_x, b=hipdnn_tensor_scale, name="scale")
hipdnn_tensor_y = graph.add(a=hipdnn_tensor_scale_out, b=hipdnn_tensor_bias, name="bias")
hipdnn_tensor_y.set_output(True)
graph.build(hipdnn_handle)
return (graph, hipdnn_tensor_x, hipdnn_tensor_scale, hipdnn_tensor_bias, hipdnn_tensor_y)
if __name__ == "__main__":
n = 1
c = 4
h = 32
w = 32
hipdnn_data_type = hipdnn.data_type.FLOAT
torch_data_type = torch.float32
torch_tensor_x = torch.rand(n, c, h, w, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
torch_tensor_scale = torch.rand(1, c, 1, 1, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
torch_tensor_bias = torch.rand(1, c, 1, 1, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
hipdnn_handle = hipdnn.create_handle()
graph, hipdnn_tensor_x, hipdnn_tensor_scale, hipdnn_tensor_bias, hipdnn_tensor_y = (
build_scale_bias_graph(
hipdnn_handle,
torch_tensor_x,
torch_tensor_scale,
torch_tensor_bias,
hipdnn_data_type,
)
)
torch_tensor_y = torch.empty(
hipdnn_tensor_y.get_dim(), dtype=torch_data_type, device="cuda"
).to(memory_format=torch.channels_last)
variant_pack = {
hipdnn_tensor_x: torch_tensor_x.data_ptr(),
hipdnn_tensor_scale: torch_tensor_scale.data_ptr(),
hipdnn_tensor_bias: torch_tensor_bias.data_ptr(),
hipdnn_tensor_y: torch_tensor_y.data_ptr(),
}
workspace = torch.empty(graph.get_workspace_size(), dtype=torch.uint8, device="cuda")
graph.exec(variant_pack=variant_pack, workspace=workspace.data_ptr())
print("scale_bias graph execution complete.")
import hipdnn
import torch
def build_scale_bias_relu_conv_genstats_graph(
hipdnn_handle,
torch_tensor_x,
torch_tensor_w,
torch_tensor_scale,
torch_tensor_bias,
padding,
stride,
dilation,
hipdnn_data_type,
):
graph = hipdnn.pygraph(
handle=hipdnn_handle,
io_data_type=hipdnn_data_type,
intermediate_data_type=hipdnn.data_type.FLOAT,
compute_data_type=hipdnn.data_type.FLOAT,
name="scale_bias_relu_conv_genstats",
)
hipdnn_tensor_x = graph.tensor_like(torch_tensor_x)
hipdnn_tensor_w = graph.tensor_like(torch_tensor_w)
hipdnn_tensor_scale = graph.tensor_like(torch_tensor_scale)
hipdnn_tensor_bias = graph.tensor_like(torch_tensor_bias)
hipdnn_tensor_scale_out = graph.mul(a=hipdnn_tensor_x, b=hipdnn_tensor_scale, name="scale")
hipdnn_tensor_bias_out = graph.add(a=hipdnn_tensor_scale_out, b=hipdnn_tensor_bias, name="bias")
hipdnn_tensor_relu_out = graph.relu(input=hipdnn_tensor_bias_out, name="relu")
hipdnn_tensor_conv_out = graph.conv_fprop(
image=hipdnn_tensor_relu_out,
weight=hipdnn_tensor_w,
padding=padding,
stride=stride,
dilation=dilation,
name="conv",
)
hipdnn_tensor_conv_out.set_output(True)
hipdnn_tensor_sum_out, hipdnn_tensor_sq_sum_out = graph.genstats(
hipdnn_tensor_conv_out, hipdnn.data_type.FLOAT, name="genstats"
)
hipdnn_tensor_sum_out.set_output(True)
hipdnn_tensor_sq_sum_out.set_output(True)
graph.build(hipdnn_handle)
return (
graph,
hipdnn_tensor_x,
hipdnn_tensor_w,
hipdnn_tensor_scale,
hipdnn_tensor_bias,
hipdnn_tensor_conv_out,
hipdnn_tensor_sum_out,
hipdnn_tensor_sq_sum_out,
)
if __name__ == "__main__":
n = 4
c = 64
h = 16
w = 16
k = 32
r = 3
s = 3
stride_h = 1
stride_w = 1
pad_h = 1
pad_w = 1
dil_h = 1
dil_w = 1
hipdnn_data_type = hipdnn.data_type.FLOAT
torch_data_type = torch.float32
torch_tensor_x = torch.rand(n, c, h, w, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
torch_tensor_w = torch.rand(k, c, r, s, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
torch_tensor_scale = torch.rand(1, c, 1, 1, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
torch_tensor_bias = torch.rand(1, c, 1, 1, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
hipdnn_handle = hipdnn.create_handle()
(
graph,
hipdnn_tensor_x,
hipdnn_tensor_w,
hipdnn_tensor_scale,
hipdnn_tensor_bias,
hipdnn_tensor_conv_out,
hipdnn_tensor_sum_out,
hipdnn_tensor_sq_sum_out,
) = build_scale_bias_relu_conv_genstats_graph(
hipdnn_handle,
torch_tensor_x,
torch_tensor_w,
torch_tensor_scale,
torch_tensor_bias,
[pad_h, pad_w],
[stride_h, stride_w],
[dil_h, dil_w],
hipdnn_data_type,
)
torch_tensor_conv_out = torch.empty(
hipdnn_tensor_conv_out.get_dim(),
dtype=torch_data_type,
memory_format=torch.channels_last,
device="cuda",
)
torch_tensor_sum_out = torch.empty(
hipdnn_tensor_sum_out.get_dim(), dtype=torch_data_type, device="cuda"
)
torch_tensor_sq_sum_out = torch.empty(
hipdnn_tensor_sq_sum_out.get_dim(), dtype=torch_data_type, device="cuda"
)
variant_pack = {
hipdnn_tensor_x: torch_tensor_x.data_ptr(),
hipdnn_tensor_w: torch_tensor_w.data_ptr(),
hipdnn_tensor_scale: torch_tensor_scale.data_ptr(),
hipdnn_tensor_bias: torch_tensor_bias.data_ptr(),
hipdnn_tensor_conv_out: torch_tensor_conv_out.data_ptr(),
hipdnn_tensor_sum_out: torch_tensor_sum_out.data_ptr(),
hipdnn_tensor_sq_sum_out: torch_tensor_sq_sum_out.data_ptr(),
}
workspace = torch.empty(graph.get_workspace_size(), dtype=torch.uint8, device="cuda")
graph.exec(variant_pack=variant_pack, workspace=workspace.data_ptr())
print("scale_bias_relu_conv_genstats graph execution complete.")
import hipdnn
import torch
def build_scale_bias_relu_convwrw_graph(
hipdnn_handle,
torch_tensor_x,
torch_tensor_dy,
torch_tensor_scale,
torch_tensor_bias,
padding,
stride,
dilation,
hipdnn_data_type,
):
graph = hipdnn.pygraph(
handle=hipdnn_handle,
io_data_type=hipdnn_data_type,
intermediate_data_type=hipdnn.data_type.FLOAT,
compute_data_type=hipdnn.data_type.FLOAT,
name="scale_bias_relu_convwrw",
)
hipdnn_tensor_x = graph.tensor_like(torch_tensor_x)
hipdnn_tensor_dy = graph.tensor_like(torch_tensor_dy)
hipdnn_tensor_scale = graph.tensor_like(torch_tensor_scale)
hipdnn_tensor_bias = graph.tensor_like(torch_tensor_bias)
hipdnn_tensor_scale_out = graph.mul(a=hipdnn_tensor_x, b=hipdnn_tensor_scale, name="scale")
hipdnn_tensor_bias_out = graph.add(a=hipdnn_tensor_scale_out, b=hipdnn_tensor_bias, name="bias")
hipdnn_tensor_relu_out = graph.relu(input=hipdnn_tensor_bias_out, name="relu")
hipdnn_tensor_dw = graph.conv_wgrad(
image=hipdnn_tensor_relu_out,
loss=hipdnn_tensor_dy,
padding=padding,
stride=stride,
dilation=dilation,
name="convwrw",
)
hipdnn_tensor_dw.set_output(True)
graph.build(hipdnn_handle)
return (
graph,
hipdnn_tensor_x,
hipdnn_tensor_dy,
hipdnn_tensor_scale,
hipdnn_tensor_bias,
hipdnn_tensor_dw,
)
if __name__ == "__main__":
n = 1
c = 32
h = 128
w = 128
k = 32
stride_h = 1
stride_w = 1
pad_h = 1
pad_w = 1
dil_h = 1
dil_w = 1
hipdnn_data_type = hipdnn.data_type.FLOAT
torch_data_type = torch.float32
torch_tensor_x = torch.rand(n, c, h, w, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
torch_tensor_dy = torch.rand(n, k, h, w, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
torch_tensor_scale = torch.rand(1, c, 1, 1, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
torch_tensor_bias = torch.rand(1, c, 1, 1, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
hipdnn_handle = hipdnn.create_handle()
(
graph,
hipdnn_tensor_x,
hipdnn_tensor_dy,
hipdnn_tensor_scale,
hipdnn_tensor_bias,
hipdnn_tensor_dw,
) = build_scale_bias_relu_convwrw_graph(
hipdnn_handle,
torch_tensor_x,
torch_tensor_dy,
torch_tensor_scale,
torch_tensor_bias,
[pad_h, pad_w],
[stride_h, stride_w],
[dil_h, dil_w],
hipdnn_data_type,
)
torch_tensor_dw = torch.empty(hipdnn_tensor_dw.get_dim(), dtype=torch_data_type, device="cuda")
variant_pack = {
hipdnn_tensor_x: torch_tensor_x.data_ptr(),
hipdnn_tensor_dy: torch_tensor_dy.data_ptr(),
hipdnn_tensor_scale: torch_tensor_scale.data_ptr(),
hipdnn_tensor_bias: torch_tensor_bias.data_ptr(),
hipdnn_tensor_dw: torch_tensor_dw.data_ptr(),
}
workspace = torch.empty(graph.get_workspace_size(), dtype=torch.uint8, device="cuda")
graph.exec(variant_pack=variant_pack, workspace=workspace.data_ptr())
print("scale_bias_relu_convwrw graph execution complete.")
import hipdnn
import torch
def build_sub_mul_mul_add_convbwd_relubwd_bnwrw_graph(
hipdnn_handle,
torch_tensor_x_bn,
torch_tensor_mean_bn,
torch_tensor_inv_std_bn,
torch_tensor_scale_bn,
torch_tensor_bias_bn,
torch_tensor_dy,
torch_tensor_filter,
padding,
stride,
dilation,
hipdnn_data_type,
):
graph = hipdnn.pygraph(
handle=hipdnn_handle,
io_data_type=hipdnn_data_type,
intermediate_data_type=hipdnn.data_type.FLOAT,
compute_data_type=hipdnn.data_type.FLOAT,
name="sub_mul_mul_add_convbwd_relubwd_bnwrw",
)
hipdnn_tensor_x_bn = graph.tensor_like(torch_tensor_x_bn)
hipdnn_tensor_mean_bn = graph.tensor_like(torch_tensor_mean_bn)
hipdnn_tensor_inv_std_bn = graph.tensor_like(torch_tensor_inv_std_bn)
hipdnn_tensor_scale_bn = graph.tensor_like(torch_tensor_scale_bn)
hipdnn_tensor_bias_bn = graph.tensor_like(torch_tensor_bias_bn)
hipdnn_tensor_dy = graph.tensor_like(torch_tensor_dy)
hipdnn_tensor_filter = graph.tensor_like(torch_tensor_filter)
hipdnn_tensor_sub_out = graph.sub(a=hipdnn_tensor_x_bn, b=hipdnn_tensor_mean_bn, name="sub")
hipdnn_tensor_mul_out0 = graph.mul(
a=hipdnn_tensor_sub_out, b=hipdnn_tensor_inv_std_bn, name="mul0"
)
hipdnn_tensor_mul_out1 = graph.mul(
a=hipdnn_tensor_mul_out0, b=hipdnn_tensor_scale_bn, name="mul1"
)
hipdnn_tensor_add_out = graph.add(a=hipdnn_tensor_mul_out1, b=hipdnn_tensor_bias_bn, name="add")
hipdnn_tensor_dx = graph.conv_dgrad(
loss=hipdnn_tensor_dy,
filter=hipdnn_tensor_filter,
padding=padding,
stride=stride,
dilation=dilation,
name="conv_dgrad",
)
hipdnn_tensor_drelu = graph.relu_backward(
loss=hipdnn_tensor_dx, input=hipdnn_tensor_add_out, name="relu_backward"
)
hipdnn_tensor_drelu.set_output(True)
(
hipdnn_tensor_dscale,
hipdnn_tensor_dbias,
hipdnn_tensor_eq_scale_dy,
hipdnn_tensor_eq_scale_x,
hipdnn_tensor_eq_bias,
) = graph.dbn_weight(
dy=hipdnn_tensor_drelu,
input=hipdnn_tensor_x_bn,
mean=hipdnn_tensor_mean_bn,
inv_variance=hipdnn_tensor_inv_std_bn,
scale=hipdnn_tensor_scale_bn,
name="bn_backward_weight",
)
hipdnn_tensor_dscale.set_output(True)
hipdnn_tensor_dbias.set_output(True)
hipdnn_tensor_eq_scale_dy.set_output(True)
hipdnn_tensor_eq_scale_x.set_output(True)
hipdnn_tensor_eq_bias.set_output(True)
graph.build(hipdnn_handle)
return (
graph,
hipdnn_tensor_x_bn,
hipdnn_tensor_mean_bn,
hipdnn_tensor_inv_std_bn,
hipdnn_tensor_scale_bn,
hipdnn_tensor_bias_bn,
hipdnn_tensor_dy,
hipdnn_tensor_filter,
hipdnn_tensor_drelu,
hipdnn_tensor_dscale,
hipdnn_tensor_dbias,
hipdnn_tensor_eq_scale_dy,
hipdnn_tensor_eq_scale_x,
hipdnn_tensor_eq_bias,
)
if __name__ == "__main__":
n = 4
c = 64
h = 16
w = 16
k = 32
r = 3
s = 3
stride_h = 1
stride_w = 1
pad_h = 1
pad_w = 1
dil_h = 1
dil_w = 1
hipdnn_data_type = hipdnn.data_type.FLOAT
torch_data_type = torch.float32
torch_tensor_x_bn = torch.rand(n, c, h, w, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
torch_tensor_mean_bn = torch.rand(1, c, 1, 1, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
torch_tensor_inv_std_bn = torch.rand(1, c, 1, 1, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
torch_tensor_scale_bn = torch.rand(1, c, 1, 1, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
torch_tensor_bias_bn = torch.rand(1, c, 1, 1, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
torch_tensor_dy = torch.rand(n, k, h, w, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
torch_tensor_filter = torch.rand(k, c, r, s, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
hipdnn_handle = hipdnn.create_handle()
(
graph,
hipdnn_tensor_x_bn,
hipdnn_tensor_mean_bn,
hipdnn_tensor_inv_std_bn,
hipdnn_tensor_scale_bn,
hipdnn_tensor_bias_bn,
hipdnn_tensor_dy,
hipdnn_tensor_filter,
hipdnn_tensor_drelu,
hipdnn_tensor_dscale,
hipdnn_tensor_dbias,
hipdnn_tensor_eq_scale_dy,
hipdnn_tensor_eq_scale_x,
hipdnn_tensor_eq_bias,
) = build_sub_mul_mul_add_convbwd_relubwd_bnwrw_graph(
hipdnn_handle,
torch_tensor_x_bn,
torch_tensor_mean_bn,
torch_tensor_inv_std_bn,
torch_tensor_scale_bn,
torch_tensor_bias_bn,
torch_tensor_dy,
torch_tensor_filter,
[pad_h, pad_w],
[stride_h, stride_w],
[dil_h, dil_w],
hipdnn_data_type,
)
torch_tensor_drelu = torch.empty(
hipdnn_tensor_drelu.get_dim(),
dtype=torch_data_type,
memory_format=torch.channels_last,
device="cuda",
)
torch_tensor_dscale = torch.empty(
hipdnn_tensor_dscale.get_dim(),
dtype=torch_data_type,
memory_format=torch.channels_last,
device="cuda",
)
torch_tensor_dbias = torch.empty(
hipdnn_tensor_dbias.get_dim(),
dtype=torch_data_type,
memory_format=torch.channels_last,
device="cuda",
)
torch_tensor_eq_scale_dy = torch.empty(
hipdnn_tensor_eq_scale_dy.get_dim(),
dtype=torch_data_type,
memory_format=torch.channels_last,
device="cuda",
)
torch_tensor_eq_scale_x = torch.empty(
hipdnn_tensor_eq_scale_x.get_dim(),
dtype=torch_data_type,
memory_format=torch.channels_last,
device="cuda",
)
torch_tensor_eq_bias = torch.empty(
hipdnn_tensor_eq_bias.get_dim(),
dtype=torch_data_type,
memory_format=torch.channels_last,
device="cuda",
)
variant_pack = {
hipdnn_tensor_x_bn: torch_tensor_x_bn.data_ptr(),
hipdnn_tensor_mean_bn: torch_tensor_mean_bn.data_ptr(),
hipdnn_tensor_inv_std_bn: torch_tensor_inv_std_bn.data_ptr(),
hipdnn_tensor_scale_bn: torch_tensor_scale_bn.data_ptr(),
hipdnn_tensor_bias_bn: torch_tensor_bias_bn.data_ptr(),
hipdnn_tensor_dy: torch_tensor_dy.data_ptr(),
hipdnn_tensor_filter: torch_tensor_filter.data_ptr(),
hipdnn_tensor_drelu: torch_tensor_drelu.data_ptr(),
hipdnn_tensor_dscale: torch_tensor_dscale.data_ptr(),
hipdnn_tensor_dbias: torch_tensor_dbias.data_ptr(),
hipdnn_tensor_eq_scale_dy: torch_tensor_eq_scale_dy.data_ptr(),
hipdnn_tensor_eq_scale_x: torch_tensor_eq_scale_x.data_ptr(),
hipdnn_tensor_eq_bias: torch_tensor_eq_bias.data_ptr(),
}
workspace = torch.empty(graph.get_workspace_size(), dtype=torch.uint8, device="cuda")
graph.exec(variant_pack=variant_pack, workspace=workspace.data_ptr())
print("sub_mul_mul_add_convbwd_relubwd_bnwrw graph execution complete.")
import hipdnn
import torch
def build_conv_bias_add_dts_graph(
hipdnn_handle,
torch_tensor_x,
torch_tensor_w,
torch_tensor_bias,
torch_tensor_add,
padding,
stride,
dilation,
hipdnn_data_type,
depth_to_space_mode,
block_size,
):
# Create graph
graph = hipdnn.pygraph(
handle=hipdnn_handle,
io_data_type=hipdnn_data_type,
intermediate_data_type=hipdnn.data_type.FLOAT,
compute_data_type=hipdnn.data_type.FLOAT,
name="conv_bias_add_dts",
)
# Create hipdnn tensors
hipdnn_tensor_x = graph.tensor_like(torch_tensor_x)
hipdnn_tensor_w = graph.tensor_like(torch_tensor_w)
hipdnn_tensor_bias = graph.tensor_like(torch_tensor_bias)
hipdnn_tensor_add = graph.tensor_like(torch_tensor_add)
# Create conv
hipdnn_tensor_conv_output = graph.conv_fprop(
image=hipdnn_tensor_x,
weight=hipdnn_tensor_w,
padding=padding,
stride=stride,
dilation=dilation,
name="conv2d",
)
# Create bias
hipdnn_tensor_bias_output = graph.add(
a=hipdnn_tensor_conv_output, b=hipdnn_tensor_bias, name="bias"
)
# Create add
hipdnn_tensor_add_output = graph.add(
a=hipdnn_tensor_bias_output, b=hipdnn_tensor_add, name="add"
)
n = torch_tensor_x.shape[0]
H = torch_tensor_x.shape[2]
W = torch_tensor_x.shape[3]
k = torch_tensor_w.shape[0]
r = torch_tensor_w.shape[2]
s = torch_tensor_w.shape[3]
outH = int((H + 2 * padding[0] - (dilation[0] * (r - 1) + 1)) / stride[0]) + 1
outW = int((W + 2 * padding[1] - (dilation[1] * (s - 1) + 1)) / stride[1]) + 1
if depth_to_space_mode == "CRD":
first_reshape_dim = [
n,
int(k // (block_size * block_size)),
block_size,
block_size,
outH,
outW,
]
permutation = [0, 1, 4, 2, 5, 3]
else:
first_reshape_dim = [n, block_size, block_size, k // (block_size * block_size), outH, outW]
permutation = [0, 3, 4, 1, 5, 2]
second_reshape_dim = [
n,
int(k // (block_size * block_size)),
block_size * outH,
block_size * outW,
]
# Create first reshape
hipdnn_tensor_first_reshape_output = graph.reshape(
input=hipdnn_tensor_add_output, name="first_reshape"
)
hipdnn_tensor_first_reshape_output.set_dim(first_reshape_dim)
# Create transpose
hipdnn_tensor_transpose_output = graph.transpose(
input=hipdnn_tensor_first_reshape_output,
permutation=permutation,
name="transpose",
)
# Create second reshape
hipdnn_tensor_second_reshape_output = graph.reshape(
input=hipdnn_tensor_transpose_output, name="second_reshape"
)
hipdnn_tensor_second_reshape_output.set_dim(second_reshape_dim).set_stride(
[k * outH * outW, 1, k // block_size * outW, k // (block_size * block_size)]
)
hipdnn_tensor_second_reshape_output.set_output(True)
graph.build(hipdnn_handle)
return (
graph,
hipdnn_tensor_x,
hipdnn_tensor_w,
hipdnn_tensor_bias,
hipdnn_tensor_add,
hipdnn_tensor_second_reshape_output,
)
if __name__ == "__main__":
# Input dimensions
n = 1 # Batch size
c = 128 # Number of input channels
h = 270 # Height
w = 480 # Width
# Filter dimensions
k = 128 # Number of output channels
r = 3 # Filter height
s = 3 # Filter width
# Convolution parameters
stride_h = 1 # Height stride
stride_w = 1 # Width stride
pad_h = 1 # Height padding
pad_w = 1 # Width padding
dil_h = 1 # Height dilation
dil_w = 1 # Width dilation
block_size = 2
depth_to_sacpe_mode = "CRD"
outH = int((h + 2 * pad_h - (dil_h * (r - 1) + 1)) / stride_h) + 1
outW = int((w + 2 * pad_w - (dil_w * (s - 1) + 1)) / stride_w) + 1
hipdnn_data_type = hipdnn.data_type.HALF
torch_data_type = torch.float16
torch_tensor_x = torch.rand(n, c, h, w, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
torch_tensor_w = torch.rand(k, c, r, s, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
torch_tensor_bias = torch.rand(1, k, 1, 1, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
torch_tensor_add = torch.rand(
n,
k,
outH,
outW,
dtype=torch_data_type,
device="cuda",
).to(memory_format=torch.channels_last)
hipdnn_handle = hipdnn.create_handle()
(
graph,
hipdnn_tensor_x,
hipdnn_tensor_w,
hipdnn_tensor_bias,
hipdnn_tensor_add,
hipdnn_tensor_y,
) = build_conv_bias_add_dts_graph(
hipdnn_handle,
torch_tensor_x,
torch_tensor_w,
torch_tensor_bias,
torch_tensor_add,
[pad_h, pad_w],
[stride_h, stride_w],
[dil_h, dil_w],
hipdnn_data_type,
depth_to_sacpe_mode,
block_size,
)
torch_tensor_y = torch.empty(
hipdnn_tensor_y.get_dim(),
dtype=torch_data_type,
memory_format=torch.channels_last,
device="cuda",
)
variant_pack = {
hipdnn_tensor_x: torch_tensor_x.data_ptr(),
hipdnn_tensor_w: torch_tensor_w.data_ptr(),
hipdnn_tensor_bias: torch_tensor_bias.data_ptr(),
hipdnn_tensor_add: torch_tensor_add.data_ptr(),
hipdnn_tensor_y: torch_tensor_y.data_ptr(),
}
workspace = torch.empty(graph.get_workspace_size(), dtype=torch.uint8, device="cuda")
graph.exec(variant_pack=variant_pack, workspace=workspace.data_ptr())
print("conv_bias_add_dts graph execution complete.")
import hipdnn
import torch
def build_conv_bias_dts_graph(
hipdnn_handle,
torch_tensor_x,
torch_tensor_w,
torch_tensor_bias,
padding,
stride,
dilation,
hipdnn_data_type,
depth_to_space_mode,
block_size,
):
# Create graph
graph = hipdnn.pygraph(
handle=hipdnn_handle,
io_data_type=hipdnn_data_type,
intermediate_data_type=hipdnn.data_type.FLOAT,
compute_data_type=hipdnn.data_type.FLOAT,
name="conv_bias_dts",
)
# Create hipdnn tensors
hipdnn_tensor_x = graph.tensor_like(torch_tensor_x)
hipdnn_tensor_w = graph.tensor_like(torch_tensor_w)
hipdnn_tensor_bias = graph.tensor_like(torch_tensor_bias)
# Create conv
hipdnn_tensor_conv_output = graph.conv_fprop(
image=hipdnn_tensor_x,
weight=hipdnn_tensor_w,
padding=padding,
stride=stride,
dilation=dilation,
name="conv2d",
)
# Create bias
hipdnn_tensor_bias_output = graph.add(
a=hipdnn_tensor_conv_output, b=hipdnn_tensor_bias, name="bias"
)
n = torch_tensor_x.shape[0]
H = torch_tensor_x.shape[2]
W = torch_tensor_x.shape[3]
k = torch_tensor_w.shape[0]
r = torch_tensor_w.shape[2]
s = torch_tensor_w.shape[3]
outH = int((H + 2 * padding[0] - (dilation[0] * (r - 1) + 1)) / stride[0]) + 1
outW = int((W + 2 * padding[1] - (dilation[1] * (s - 1) + 1)) / stride[1]) + 1
if depth_to_space_mode == "CRD":
first_reshape_dim = [
n,
int(k // (block_size * block_size)),
block_size,
block_size,
outH,
outW,
]
permutation = [0, 1, 4, 2, 5, 3]
else:
first_reshape_dim = [n, block_size, block_size, k // (block_size * block_size), outH, outW]
permutation = [0, 3, 4, 1, 5, 2]
second_reshape_dim = [
n,
int(k // (block_size * block_size)),
block_size * outH,
block_size * outW,
]
# Create first reshape
hipdnn_tensor_first_reshape_output = graph.reshape(
input=hipdnn_tensor_bias_output, name="first_reshape"
)
hipdnn_tensor_first_reshape_output.set_dim(first_reshape_dim)
# Create transpose
hipdnn_tensor_transpose_output = graph.transpose(
input=hipdnn_tensor_first_reshape_output,
permutation=permutation,
name="transpose",
)
# Create second reshape
hipdnn_tensor_second_reshape_output = graph.reshape(
input=hipdnn_tensor_transpose_output, name="second_reshape"
)
hipdnn_tensor_second_reshape_output.set_dim(second_reshape_dim).set_stride(
[k * outH * outW, 1, k // block_size * outW, k // (block_size * block_size)]
).set_output(True)
graph.build(hipdnn_handle)
return (
graph,
hipdnn_tensor_x,
hipdnn_tensor_w,
hipdnn_tensor_bias,
hipdnn_tensor_second_reshape_output,
)
if __name__ == "__main__":
# Input dimensions
n = 1 # Batch size
c = 8 # Number of input channels
h = 128 # Height
w = 128 # Width
# Filter dimensions
k = 16 # Number of output channels
r = 3 # Filter height
s = 3 # Filter width
# Convolution parameters
stride_h = 1 # Height stride
stride_w = 1 # Width stride
pad_h = 1 # Height padding
pad_w = 1 # Width padding
dil_h = 1 # Height dilation
dil_w = 1 # Width dilation
block_size = 2
depth_to_sacpe_mode = "DCR"
hipdnn_data_type = hipdnn.data_type.HALF
torch_data_type = torch.float16
torch_tensor_x = torch.rand(n, c, h, w, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
torch_tensor_w = torch.rand(k, c, r, s, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
torch_tensor_bias = torch.rand(1, k, 1, 1, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
hipdnn_handle = hipdnn.create_handle()
graph, hipdnn_tensor_x, hipdnn_tensor_w, hipdnn_tensor_bias, hipdnn_tensor_y = (
build_conv_bias_dts_graph(
hipdnn_handle,
torch_tensor_x,
torch_tensor_w,
torch_tensor_bias,
[pad_h, pad_w],
[stride_h, stride_w],
[dil_h, dil_w],
hipdnn_data_type,
depth_to_sacpe_mode,
block_size,
)
)
torch_tensor_y = torch.empty(
hipdnn_tensor_y.get_dim(),
dtype=torch_data_type,
memory_format=torch.channels_last,
device="cuda",
)
variant_pack = {
hipdnn_tensor_x: torch_tensor_x.data_ptr(),
hipdnn_tensor_w: torch_tensor_w.data_ptr(),
hipdnn_tensor_bias: torch_tensor_bias.data_ptr(),
hipdnn_tensor_y: torch_tensor_y.data_ptr(),
}
workspace = torch.empty(graph.get_workspace_size(), dtype=torch.uint8, device="cuda")
graph.exec(variant_pack=variant_pack, workspace=workspace.data_ptr())
print("conv_bias_dts graph execution complete.")
import hipdnn
import torch
def build_conv_bias_dts_add_graph(
hipdnn_handle,
torch_tensor_x,
torch_tensor_w,
torch_tensor_bias,
torch_tensor_add,
padding,
stride,
dilation,
hipdnn_data_type,
depth_to_space_mode,
block_size,
):
# Create graph
graph = hipdnn.pygraph(
handle=hipdnn_handle,
io_data_type=hipdnn_data_type,
intermediate_data_type=hipdnn.data_type.FLOAT,
compute_data_type=hipdnn.data_type.FLOAT,
name="conv_bias_dts_add",
)
# Create hipdnn tensors
hipdnn_tensor_x = graph.tensor_like(torch_tensor_x)
hipdnn_tensor_w = graph.tensor_like(torch_tensor_w)
hipdnn_tensor_bias = graph.tensor_like(torch_tensor_bias)
hipdnn_tensor_add = graph.tensor_like(torch_tensor_add)
# Create conv
hipdnn_tensor_conv_output = graph.conv_fprop(
image=hipdnn_tensor_x,
weight=hipdnn_tensor_w,
padding=padding,
stride=stride,
dilation=dilation,
name="conv2d",
)
# Create bias
hipdnn_tensor_bias_output = graph.add(
a=hipdnn_tensor_conv_output, b=hipdnn_tensor_bias, name="bias"
)
n = torch_tensor_x.shape[0]
H = torch_tensor_x.shape[2]
W = torch_tensor_x.shape[3]
k = torch_tensor_w.shape[0]
r = torch_tensor_w.shape[2]
s = torch_tensor_w.shape[3]
outH = int((H + 2 * padding[0] - (dilation[0] * (r - 1) + 1)) / stride[0]) + 1
outW = int((W + 2 * padding[1] - (dilation[1] * (s - 1) + 1)) / stride[1]) + 1
if depth_to_space_mode == "CRD":
first_reshape_dim = [
n,
int(k // (block_size * block_size)),
block_size,
block_size,
outH,
outW,
]
permutation = [0, 1, 4, 2, 5, 3]
else:
first_reshape_dim = [n, block_size, block_size, k // (block_size * block_size), outH, outW]
permutation = [0, 3, 4, 1, 5, 2]
second_reshape_dim = [
n,
int(k // (block_size * block_size)),
block_size * outH,
block_size * outW,
]
# Create first reshape
hipdnn_tensor_first_reshape_output = graph.reshape(
input=hipdnn_tensor_bias_output, name="first_reshape"
)
hipdnn_tensor_first_reshape_output.set_dim(first_reshape_dim)
# Create transpose
hipdnn_tensor_transpose_output = graph.transpose(
input=hipdnn_tensor_first_reshape_output,
permutation=permutation,
name="transpose",
)
# Create second reshape
hipdnn_tensor_second_reshape_output = graph.reshape(
input=hipdnn_tensor_transpose_output, name="second_reshape"
)
hipdnn_tensor_second_reshape_output.set_dim(second_reshape_dim).set_stride(
[k * outH * outW, 1, k // block_size * outW, k // (block_size * block_size)]
)
# Create add
hipdnn_tensor_y = graph.add(
a=hipdnn_tensor_second_reshape_output, b=hipdnn_tensor_add, name="add"
)
hipdnn_tensor_y.set_output(True)
graph.build(hipdnn_handle)
return (
graph,
hipdnn_tensor_x,
hipdnn_tensor_w,
hipdnn_tensor_bias,
hipdnn_tensor_add,
hipdnn_tensor_y,
)
if __name__ == "__main__":
# Input dimensions
n = 1 # Batch size
c = 8 # Number of input channels
h = 128 # Height
w = 128 # Width
# Filter dimensions
k = 16 # Number of output channels
r = 3 # Filter height
s = 3 # Filter width
# Convolution parameters
stride_h = 1 # Height stride
stride_w = 1 # Width stride
pad_h = 1 # Height padding
pad_w = 1 # Width padding
dil_h = 1 # Height dilation
dil_w = 1 # Width dilation
block_size = 2
depth_to_sacpe_mode = "CRD"
hipdnn_data_type = hipdnn.data_type.HALF
torch_data_type = torch.float16
torch_tensor_x = torch.rand(n, c, h, w, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
torch_tensor_w = torch.rand(k, c, r, s, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
torch_tensor_bias = torch.rand(1, k, 1, 1, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
torch_tensor_add = torch.rand(
n,
k // (block_size * block_size),
h * block_size,
w * block_size,
dtype=torch_data_type,
device="cuda",
).to(memory_format=torch.channels_last)
hipdnn_handle = hipdnn.create_handle()
(
graph,
hipdnn_tensor_x,
hipdnn_tensor_w,
hipdnn_tensor_bias,
hipdnn_tensor_add,
hipdnn_tensor_y,
) = build_conv_bias_dts_add_graph(
hipdnn_handle,
torch_tensor_x,
torch_tensor_w,
torch_tensor_bias,
torch_tensor_add,
[pad_h, pad_w],
[stride_h, stride_w],
[dil_h, dil_w],
hipdnn_data_type,
depth_to_sacpe_mode,
block_size,
)
torch_tensor_y = torch.empty(
hipdnn_tensor_y.get_dim(),
dtype=torch_data_type,
memory_format=torch.channels_last,
device="cuda",
)
variant_pack = {
hipdnn_tensor_x: torch_tensor_x.data_ptr(),
hipdnn_tensor_w: torch_tensor_w.data_ptr(),
hipdnn_tensor_bias: torch_tensor_bias.data_ptr(),
hipdnn_tensor_add: torch_tensor_add.data_ptr(),
hipdnn_tensor_y: torch_tensor_y.data_ptr(),
}
workspace = torch.empty(graph.get_workspace_size(), dtype=torch.uint8, device="cuda")
graph.exec(variant_pack=variant_pack, workspace=workspace.data_ptr())
print("conv_bias_dts_add graph execution complete.")
import hipdnn
import torch
def build_conv_bias_dts_leakyrelu_graph(
hipdnn_handle,
torch_tensor_x,
torch_tensor_w,
torch_tensor_bias,
padding,
stride,
dilation,
hipdnn_data_type,
depth_to_space_mode,
block_size,
):
# Create graph
graph = hipdnn.pygraph(
handle=hipdnn_handle,
io_data_type=hipdnn_data_type,
intermediate_data_type=hipdnn.data_type.FLOAT,
compute_data_type=hipdnn.data_type.FLOAT,
name="conv_bias_dts_leakyrelu",
)
# Create hipdnn tensors
hipdnn_tensor_x = graph.tensor_like(torch_tensor_x)
hipdnn_tensor_w = graph.tensor_like(torch_tensor_w)
hipdnn_tensor_bias = graph.tensor_like(torch_tensor_bias)
# Create conv
hipdnn_tensor_conv_output = graph.conv_fprop(
image=hipdnn_tensor_x,
weight=hipdnn_tensor_w,
padding=padding,
stride=stride,
dilation=dilation,
name="conv2d",
)
# Create bias
hipdnn_tensor_bias_output = graph.add(
a=hipdnn_tensor_conv_output, b=hipdnn_tensor_bias, name="bias"
)
n = torch_tensor_x.shape[0]
H = torch_tensor_x.shape[2]
W = torch_tensor_x.shape[3]
k = torch_tensor_w.shape[0]
r = torch_tensor_w.shape[2]
s = torch_tensor_w.shape[3]
outH = int((H + 2 * padding[0] - (dilation[0] * (r - 1) + 1)) / stride[0]) + 1
outW = int((W + 2 * padding[1] - (dilation[1] * (s - 1) + 1)) / stride[1]) + 1
if depth_to_space_mode == "CRD":
first_reshape_dim = [
n,
int(k // (block_size * block_size)),
block_size,
block_size,
outH,
outW,
]
permutation = [0, 1, 4, 2, 5, 3]
else:
first_reshape_dim = [n, block_size, block_size, k // (block_size * block_size), outH, outW]
permutation = [0, 3, 4, 1, 5, 2]
second_reshape_dim = [
n,
int(k // (block_size * block_size)),
block_size * outH,
block_size * outW,
]
# Create first reshape
hipdnn_tensor_first_reshape_output = graph.reshape(
input=hipdnn_tensor_bias_output, name="first_reshape"
)
hipdnn_tensor_first_reshape_output.set_dim(first_reshape_dim)
# Create transpose
hipdnn_tensor_transpose_output = graph.transpose(
input=hipdnn_tensor_first_reshape_output,
permutation=permutation,
name="transpose",
)
# Create second reshape
hipdnn_tensor_second_reshape_output = graph.reshape(
input=hipdnn_tensor_transpose_output, name="second_reshape"
)
hipdnn_tensor_second_reshape_output.set_dim(second_reshape_dim).set_stride(
[k * outH * outW, 1, k // block_size * outW, k // (block_size * block_size)]
)
# Create leakyrelu
hipdnn_tensor_y = graph.leaky_relu(
input=hipdnn_tensor_second_reshape_output, negative_slope=-1.0, name="leaky_relu"
)
hipdnn_tensor_y.set_output(True)
graph.build(hipdnn_handle)
return (graph, hipdnn_tensor_x, hipdnn_tensor_w, hipdnn_tensor_bias, hipdnn_tensor_y)
if __name__ == "__main__":
# Input dimensions
n = 1 # Batch size
c = 8 # Number of input channels
h = 128 # Height
w = 128 # Width
# Filter dimensions
k = 16 # Number of output channels
r = 3 # Filter height
s = 3 # Filter width
# Convolution parameters
stride_h = 1 # Height stride
stride_w = 1 # Width stride
pad_h = 1 # Height padding
pad_w = 1 # Width padding
dil_h = 1 # Height dilation
dil_w = 1 # Width dilation
block_size = 2
depth_to_sacpe_mode = "DCR"
hipdnn_data_type = hipdnn.data_type.HALF
torch_data_type = torch.float16
torch_tensor_x = torch.rand(n, c, h, w, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
torch_tensor_w = torch.rand(k, c, r, s, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
torch_tensor_bias = torch.rand(1, k, 1, 1, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
hipdnn_handle = hipdnn.create_handle()
graph, hipdnn_tensor_x, hipdnn_tensor_w, hipdnn_tensor_bias, hipdnn_tensor_y = (
build_conv_bias_dts_leakyrelu_graph(
hipdnn_handle,
torch_tensor_x,
torch_tensor_w,
torch_tensor_bias,
[pad_h, pad_w],
[stride_h, stride_w],
[dil_h, dil_w],
hipdnn_data_type,
depth_to_sacpe_mode,
block_size,
)
)
torch_tensor_y = torch.empty(
hipdnn_tensor_y.get_dim(),
dtype=torch_data_type,
memory_format=torch.channels_last,
device="cuda",
)
variant_pack = {
hipdnn_tensor_x: torch_tensor_x.data_ptr(),
hipdnn_tensor_w: torch_tensor_w.data_ptr(),
hipdnn_tensor_bias: torch_tensor_bias.data_ptr(),
hipdnn_tensor_y: torch_tensor_y.data_ptr(),
}
workspace = torch.empty(graph.get_workspace_size(), dtype=torch.uint8, device="cuda")
graph.exec(variant_pack=variant_pack, workspace=workspace.data_ptr())
print("conv_bias_dts_leakyrelu graph execution complete.")
import hipdnn
import torch
def build_conv_bias_dts_leakyrelu_add_graph(
hipdnn_handle,
torch_tensor_x,
torch_tensor_w,
torch_tensor_bias,
torch_tensor_add,
padding,
stride,
dilation,
hipdnn_data_type,
depth_to_space_mode,
block_size,
):
# Create graph
graph = hipdnn.pygraph(
handle=hipdnn_handle,
io_data_type=hipdnn_data_type,
intermediate_data_type=hipdnn.data_type.FLOAT,
compute_data_type=hipdnn.data_type.FLOAT,
name="conv_bias_dts_leakyrelu_add",
)
# Create hipdnn tensors
hipdnn_tensor_x = graph.tensor_like(torch_tensor_x)
hipdnn_tensor_w = graph.tensor_like(torch_tensor_w)
hipdnn_tensor_bias = graph.tensor_like(torch_tensor_bias)
hipdnn_tensor_add = graph.tensor_like(torch_tensor_add)
# Create conv
hipdnn_tensor_conv_output = graph.conv_fprop(
image=hipdnn_tensor_x,
weight=hipdnn_tensor_w,
padding=padding,
stride=stride,
dilation=dilation,
name="conv2d",
)
# Create bias
hipdnn_tensor_bias_output = graph.add(
a=hipdnn_tensor_conv_output, b=hipdnn_tensor_bias, name="bias"
)
n = torch_tensor_x.shape[0]
H = torch_tensor_x.shape[2]
W = torch_tensor_x.shape[3]
k = torch_tensor_w.shape[0]
r = torch_tensor_w.shape[2]
s = torch_tensor_w.shape[3]
outH = int((H + 2 * padding[0] - (dilation[0] * (r - 1) + 1)) / stride[0]) + 1
outW = int((W + 2 * padding[1] - (dilation[1] * (s - 1) + 1)) / stride[1]) + 1
if depth_to_space_mode == "CRD":
first_reshape_dim = [
n,
int(k // (block_size * block_size)),
block_size,
block_size,
outH,
outW,
]
permutation = [0, 1, 4, 2, 5, 3]
else:
first_reshape_dim = [n, block_size, block_size, k // (block_size * block_size), outH, outW]
permutation = [0, 3, 4, 1, 5, 2]
second_reshape_dim = [
n,
int(k // (block_size * block_size)),
block_size * outH,
block_size * outW,
]
# Create first reshape
hipdnn_tensor_first_reshape_output = graph.reshape(
input=hipdnn_tensor_bias_output, name="first_reshape"
)
hipdnn_tensor_first_reshape_output.set_dim(first_reshape_dim)
# Create transpose
hipdnn_tensor_transpose_output = graph.transpose(
input=hipdnn_tensor_first_reshape_output,
permutation=permutation,
name="transpose",
)
# Create second reshape
hipdnn_tensor_second_reshape_output = graph.reshape(
input=hipdnn_tensor_transpose_output, name="second_reshape"
)
hipdnn_tensor_second_reshape_output.set_dim(second_reshape_dim).set_stride(
[k * outH * outW, 1, k // block_size * outW, k // (block_size * block_size)]
)
# Create leakyRelu
hipdnn_tensor_leaky_relu_output = graph.leaky_relu(
input=hipdnn_tensor_second_reshape_output, negative_slope=-1.0, name="leaky_relu"
)
# Cretae add
hipdnn_tensor_y = graph.add(a=hipdnn_tensor_leaky_relu_output, b=hipdnn_tensor_add, name="add")
hipdnn_tensor_y.set_output(True)
graph.build(hipdnn_handle)
return (
graph,
hipdnn_tensor_x,
hipdnn_tensor_w,
hipdnn_tensor_bias,
hipdnn_tensor_add,
hipdnn_tensor_y,
)
if __name__ == "__main__":
# Input dimensions
n = 1 # Batch size
c = 8 # Number of input channels
h = 128 # Height
w = 128 # Width
# Filter dimensions
k = 16 # Number of output channels
r = 3 # Filter height
s = 3 # Filter width
# Convolution parameters
stride_h = 1 # Height stride
stride_w = 1 # Width stride
pad_h = 1 # Height padding
pad_w = 1 # Width padding
dil_h = 1 # Height dilation
dil_w = 1 # Width dilation
block_size = 2
depth_to_sacpe_mode = "CRD"
hipdnn_data_type = hipdnn.data_type.HALF
torch_data_type = torch.float16
torch_tensor_x = torch.rand(n, c, h, w, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
torch_tensor_w = torch.rand(k, c, r, s, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
torch_tensor_bias = torch.rand(1, k, 1, 1, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
torch_tensor_add = torch.rand(
n,
k // (block_size * block_size),
h * block_size,
w * block_size,
dtype=torch_data_type,
device="cuda",
).to(memory_format=torch.channels_last)
hipdnn_handle = hipdnn.create_handle()
(
graph,
hipdnn_tensor_x,
hipdnn_tensor_w,
hipdnn_tensor_bias,
hipdnn_tensor_add,
hipdnn_tensor_y,
) = build_conv_bias_dts_leakyrelu_add_graph(
hipdnn_handle,
torch_tensor_x,
torch_tensor_w,
torch_tensor_bias,
torch_tensor_add,
[pad_h, pad_w],
[stride_h, stride_w],
[dil_h, dil_w],
hipdnn_data_type,
depth_to_sacpe_mode,
block_size,
)
torch_tensor_y = torch.empty(
hipdnn_tensor_y.get_dim(),
dtype=torch_data_type,
memory_format=torch.channels_last,
device="cuda",
)
variant_pack = {
hipdnn_tensor_x: torch_tensor_x.data_ptr(),
hipdnn_tensor_w: torch_tensor_w.data_ptr(),
hipdnn_tensor_bias: torch_tensor_bias.data_ptr(),
hipdnn_tensor_add: torch_tensor_add.data_ptr(),
hipdnn_tensor_y: torch_tensor_y.data_ptr(),
}
workspace = torch.empty(graph.get_workspace_size(), dtype=torch.uint8, device="cuda")
graph.exec(variant_pack=variant_pack, workspace=workspace.data_ptr())
print("conv_bias_dts_leakyrelu_add graph execution complete.")
import hipdnn
import torch
def build_conv_dts_graph(
hipdnn_handle,
torch_tensor_x,
torch_tensor_w,
padding,
stride,
dilation,
hipdnn_data_type,
depth_to_space_mode,
block_size,
):
# Create graph
graph = hipdnn.pygraph(
handle=hipdnn_handle,
io_data_type=hipdnn_data_type,
intermediate_data_type=hipdnn.data_type.FLOAT,
compute_data_type=hipdnn.data_type.FLOAT,
name="conv_dts",
)
# Create hipdnn tensors
hipdnn_tensor_x = graph.tensor_like(torch_tensor_x)
hipdnn_tensor_w = graph.tensor_like(torch_tensor_w)
# Create conv
hipdnn_tensor_conv_output = graph.conv_fprop(
image=hipdnn_tensor_x,
weight=hipdnn_tensor_w,
padding=padding,
stride=stride,
dilation=dilation,
name="conv2d",
)
n = torch_tensor_x.shape[0]
H = torch_tensor_x.shape[2]
W = torch_tensor_x.shape[3]
k = torch_tensor_w.shape[0]
r = torch_tensor_w.shape[2]
s = torch_tensor_w.shape[3]
outH = int((H + 2 * padding[0] - (dilation[0] * (r - 1) + 1)) / stride[0]) + 1
outW = int((W + 2 * padding[1] - (dilation[1] * (s - 1) + 1)) / stride[1]) + 1
if depth_to_space_mode == "CRD":
first_reshape_dim = [
n,
int(k // (block_size * block_size)),
block_size,
block_size,
outH,
outW,
]
permutation = [0, 1, 4, 2, 5, 3]
else:
first_reshape_dim = [n, block_size, block_size, k // (block_size * block_size), outH, outW]
permutation = [0, 3, 4, 1, 5, 2]
second_reshape_dim = [
n,
int(k // (block_size * block_size)),
block_size * outH,
block_size * outW,
]
print(first_reshape_dim)
# Create first reshape
hipdnn_tensor_first_reshape_output = graph.reshape(
input=hipdnn_tensor_conv_output, name="first_reshape"
)
hipdnn_tensor_first_reshape_output.set_dim(first_reshape_dim)
# Create transpose
hipdnn_tensor_transpose_output = graph.transpose(
input=hipdnn_tensor_first_reshape_output,
permutation=permutation,
name="transpose",
)
# Create second reshape
hipdnn_tensor_second_reshape_output = graph.reshape(
input=hipdnn_tensor_transpose_output, name="second_reshape"
)
hipdnn_tensor_second_reshape_output.set_dim(second_reshape_dim).set_stride(
[k * outH * outW, 1, k // block_size * outW, k // (block_size * block_size)]
).set_output(True)
graph.build(hipdnn_handle)
return (graph, hipdnn_tensor_x, hipdnn_tensor_w, hipdnn_tensor_second_reshape_output)
if __name__ == "__main__":
# Input dimensions
n = 1 # Batch size
c = 8 # Number of input channels
h = 128 # Height
w = 128 # Width
# Filter dimensions
k = 16 # Number of output channels
r = 3 # Filter height
s = 3 # Filter width
# Convolution parameters
stride_h = 1 # Height stride
stride_w = 1 # Width stride
pad_h = 1 # Height padding
pad_w = 1 # Width padding
dil_h = 1 # Height dilation
dil_w = 1 # Width dilation
block_size = 2
depth_to_sacpe_mode = "DCR"
hipdnn_data_type = hipdnn.data_type.HALF
torch_data_type = torch.float16
torch_tensor_x = torch.rand(n, c, h, w, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
torch_tensor_w = torch.rand(k, c, r, s, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
hipdnn_handle = hipdnn.create_handle()
graph, hipdnn_tensor_x, hipdnn_tensor_w, hipdnn_tensor_y = build_conv_dts_graph(
hipdnn_handle,
torch_tensor_x,
torch_tensor_w,
[pad_h, pad_w],
[stride_h, stride_w],
[dil_h, dil_w],
hipdnn_data_type,
depth_to_sacpe_mode,
block_size,
)
torch_tensor_y = torch.empty(
hipdnn_tensor_y.get_dim(),
dtype=torch_data_type,
memory_format=torch.channels_last,
device="cuda",
)
variant_pack = {
hipdnn_tensor_x: torch_tensor_x.data_ptr(),
hipdnn_tensor_w: torch_tensor_w.data_ptr(),
hipdnn_tensor_y: torch_tensor_y.data_ptr(),
}
workspace = torch.empty(graph.get_workspace_size(), dtype=torch.uint8, device="cuda")
graph.exec(variant_pack=variant_pack, workspace=workspace.data_ptr())
print("conv_dts graph execution complete.")
import hipdnn
import torch
def build_conv_bias_graph(
hipdnn_handle,
torch_tensor_x,
torch_tensor_w,
torch_tensor_bias,
padding,
stride,
dilation,
hipdnn_data_type,
):
# Create graph
graph = hipdnn.pygraph(
handle=hipdnn_handle,
io_data_type=hipdnn_data_type,
intermediate_data_type=hipdnn.data_type.FLOAT,
compute_data_type=hipdnn.data_type.FLOAT,
name="conv_bias",
)
# Create hipdnn tensors
hipdnn_tensor_x = graph.tensor_like(torch_tensor_x)
hipdnn_tensor_w = graph.tensor_like(torch_tensor_w)
hipdnn_tensor_bias = graph.tensor_like(torch_tensor_bias)
# Create op
hipdnn_tensor_conv_output = graph.conv_fprop(
image=hipdnn_tensor_x,
weight=hipdnn_tensor_w,
padding=padding,
stride=stride,
dilation=dilation,
name="conv2d",
)
hipdnn_tensor_y = graph.add(a=hipdnn_tensor_conv_output, b=hipdnn_tensor_bias, name="bias")
hipdnn_tensor_y.set_output(True)
graph.build(hipdnn_handle)
return (graph, hipdnn_tensor_x, hipdnn_tensor_w, hipdnn_tensor_bias, hipdnn_tensor_y)
if __name__ == "__main__":
# Input dimensions
n = 1 # Batch size
c = 16 # Number of input channels
h = 16 # Height
w = 16 # Width
# Filter dimensions
k = 16 # Number of output channels
r = 3 # Filter height
s = 3 # Filter width
# Convolution parameters
stride_h = 1 # Height stride
stride_w = 1 # Width stride
pad_h = 1 # Height padding
pad_w = 1 # Width padding
dil_h = 1 # Height dilation
dil_w = 1 # Width dilation
hipdnn_data_type = hipdnn.data_type.HALF
torch_data_type = torch.float16
torch_tensor_x = torch.rand(n, c, h, w, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
torch_tensor_w = torch.rand(k, c, r, s, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
torch_tensor_bias = torch.rand(1, k, 1, 1, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
hipdnn_handle = hipdnn.create_handle()
graph, hipdnn_tensor_x, hipdnn_tensor_w, hipdnn_tensor_bias, hipdnn_tensor_y = (
build_conv_bias_graph(
hipdnn_handle,
torch_tensor_x,
torch_tensor_w,
torch_tensor_bias,
[pad_h, pad_w],
[stride_h, stride_w],
[dil_h, dil_w],
hipdnn_data_type,
)
)
torch_tensor_y = torch.empty(
hipdnn_tensor_y.get_dim(),
dtype=torch_data_type,
memory_format=torch.channels_last,
device="cuda",
)
variant_pack = {
hipdnn_tensor_x: torch_tensor_x.data_ptr(),
hipdnn_tensor_w: torch_tensor_w.data_ptr(),
hipdnn_tensor_bias: torch_tensor_bias.data_ptr(),
hipdnn_tensor_y: torch_tensor_y.data_ptr(),
}
workspace = torch.empty(graph.get_workspace_size(), dtype=torch.uint8, device="cuda")
graph.exec(variant_pack=variant_pack, workspace=workspace.data_ptr())
print("conv_bias graph execution complete.")
import hipdnn
import torch
def build_conv_bias_add_graph(
hipdnn_handle,
torch_tensor_x,
torch_tensor_w,
torch_tensor_bias,
torch_tensor_add,
padding,
stride,
dilation,
hipdnn_data_type,
):
# Create graph
graph = hipdnn.pygraph(
handle=hipdnn_handle,
io_data_type=hipdnn_data_type,
intermediate_data_type=hipdnn.data_type.FLOAT,
compute_data_type=hipdnn.data_type.FLOAT,
name="conv_bias_add",
)
# Create hipdnn tensors
hipdnn_tensor_x = graph.tensor_like(torch_tensor_x)
hipdnn_tensor_w = graph.tensor_like(torch_tensor_w)
hipdnn_tensor_bias = graph.tensor_like(torch_tensor_bias)
hipdnn_tensor_add = graph.tensor_like(torch_tensor_add)
# Create op
hipdnn_tensor_conv_output = graph.conv_fprop(
image=hipdnn_tensor_x,
weight=hipdnn_tensor_w,
padding=padding,
stride=stride,
dilation=dilation,
name="conv2d",
)
hipdnn_tensor_add_output = graph.add(
a=hipdnn_tensor_conv_output, b=hipdnn_tensor_bias, name="bias"
)
hipdnn_tensor_y = graph.add(a=hipdnn_tensor_add_output, b=hipdnn_tensor_add, name="add")
hipdnn_tensor_y.set_output(True)
graph.build(hipdnn_handle)
return (
graph,
hipdnn_tensor_x,
hipdnn_tensor_w,
hipdnn_tensor_bias,
hipdnn_tensor_add,
hipdnn_tensor_y,
)
if __name__ == "__main__":
# Input dimensions
n = 1 # Batch size
c = 16 # Number of input channels
h = 16 # Height
w = 16 # Width
# Filter dimensions
k = 16 # Number of output channels
r = 3 # Filter height
s = 3 # Filter width
# Convolution parameters
stride_h = 1 # Height stride
stride_w = 1 # Width stride
pad_h = 1 # Height padding
pad_w = 1 # Width padding
dil_h = 1 # Height dilation
dil_w = 1 # Width dilation
hipdnn_data_type = hipdnn.data_type.HALF
torch_data_type = torch.float16
torch_tensor_x = torch.rand(n, c, h, w, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
torch_tensor_w = torch.rand(k, c, r, s, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
torch_tensor_bias = torch.rand(1, k, 1, 1, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
torch_tensor_add = torch.rand(n, c, h, w, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
hipdnn_handle = hipdnn.create_handle()
(
graph,
hipdnn_tensor_x,
hipdnn_tensor_w,
hipdnn_tensor_bias,
hipdnn_tensor_add,
hipdnn_tensor_y,
) = build_conv_bias_add_graph(
hipdnn_handle,
torch_tensor_x,
torch_tensor_w,
torch_tensor_bias,
torch_tensor_add,
[pad_h, pad_w],
[stride_h, stride_w],
[dil_h, dil_w],
hipdnn_data_type,
)
torch_tensor_y = torch.empty(
hipdnn_tensor_y.get_dim(),
dtype=torch_data_type,
memory_format=torch.channels_last,
device="cuda",
)
variant_pack = {
hipdnn_tensor_x: torch_tensor_x.data_ptr(),
hipdnn_tensor_w: torch_tensor_w.data_ptr(),
hipdnn_tensor_bias: torch_tensor_bias.data_ptr(),
hipdnn_tensor_add: torch_tensor_add.data_ptr(),
hipdnn_tensor_y: torch_tensor_y.data_ptr(),
}
workspace = torch.empty(graph.get_workspace_size(), dtype=torch.uint8, device="cuda")
graph.exec(variant_pack=variant_pack, workspace=workspace.data_ptr())
print("conv_bias_add graph execution complete.")
import hipdnn
import torch
def build_conv_bias_add_relu_graph(
hipdnn_handle,
torch_tensor_x,
torch_tensor_w,
torch_tensor_bias,
torch_tensor_add,
padding,
stride,
dilation,
hipdnn_data_type,
):
# Create graph
graph = hipdnn.pygraph(
handle=hipdnn_handle,
io_data_type=hipdnn_data_type,
intermediate_data_type=hipdnn.data_type.FLOAT,
compute_data_type=hipdnn.data_type.FLOAT,
name="convolution_bias_add_relu",
)
# Create hipdnn tensors
hipdnn_tensor_x = graph.tensor_like(torch_tensor_x)
hipdnn_tensor_w = graph.tensor_like(torch_tensor_w)
hipdnn_tensor_bias = graph.tensor_like(torch_tensor_bias)
hipdnn_tensor_add = graph.tensor_like(torch_tensor_add)
# Create op
hipdnn_tensor_conv_output = graph.conv_fprop(
image=hipdnn_tensor_x,
weight=hipdnn_tensor_w,
padding=padding,
stride=stride,
dilation=dilation,
name="conv2d",
)
hipdnn_tensor_bias_output = graph.add(
a=hipdnn_tensor_conv_output, b=hipdnn_tensor_bias, name="bias"
)
hipdnn_tensor_add_output = graph.add(
a=hipdnn_tensor_bias_output, b=hipdnn_tensor_add, name="add"
)
hipdnn_tensor_y = graph.relu(input=hipdnn_tensor_add_output, name="relu")
hipdnn_tensor_y.set_output(True)
graph.build(hipdnn_handle)
return (
graph,
hipdnn_tensor_x,
hipdnn_tensor_w,
hipdnn_tensor_bias,
hipdnn_tensor_add,
hipdnn_tensor_y,
)
if __name__ == "__main__":
# Input dimensions
n = 1 # Batch size
c = 16 # Number of input channels
h = 16 # Height
w = 16 # Width
# Filter dimensions
k = 16 # Number of output channels
r = 3 # Filter height
s = 3 # Filter width
# Convolution parameters
stride_h = 1 # Height stride
stride_w = 1 # Width stride
pad_h = 1 # Height padding
pad_w = 1 # Width padding
dil_h = 1 # Height dilation
dil_w = 1 # Width dilation
hipdnn_data_type = hipdnn.data_type.HALF
torch_data_type = torch.float16
torch_tensor_x = torch.rand(n, c, h, w, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
torch_tensor_w = torch.rand(k, c, r, s, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
torch_tensor_bias = torch.rand(1, k, 1, 1, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
torch_tensor_add = torch.rand(n, c, h, w, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
hipdnn_handle = hipdnn.create_handle()
(
graph,
hipdnn_tensor_x,
hipdnn_tensor_w,
hipdnn_tensor_bias,
hipdnn_tensor_add,
hipdnn_tensor_y,
) = build_conv_bias_add_relu_graph(
hipdnn_handle,
torch_tensor_x,
torch_tensor_w,
torch_tensor_bias,
torch_tensor_add,
[pad_h, pad_w],
[stride_h, stride_w],
[dil_h, dil_w],
hipdnn_data_type,
)
torch_tensor_y = torch.empty(
hipdnn_tensor_y.get_dim(),
dtype=torch_data_type,
memory_format=torch.channels_last,
device="cuda",
)
variant_pack = {
hipdnn_tensor_x: torch_tensor_x.data_ptr(),
hipdnn_tensor_w: torch_tensor_w.data_ptr(),
hipdnn_tensor_bias: torch_tensor_bias.data_ptr(),
hipdnn_tensor_add: torch_tensor_add.data_ptr(),
hipdnn_tensor_y: torch_tensor_y.data_ptr(),
}
workspace = torch.empty(graph.get_workspace_size(), dtype=torch.uint8, device="cuda")
graph.exec(variant_pack=variant_pack, workspace=workspace.data_ptr())
print("convolution_bias_add_relu graph execution complete.")
import hipdnn
import torch
def build_conv_bias_prelu_graph(
hipdnn_handle,
torch_tensor_x,
torch_tensor_w,
torch_tensor_bias,
padding,
stride,
dilation,
negative_slope,
hipdnn_data_type,
):
# Create graph
graph = hipdnn.pygraph(
handle=hipdnn_handle,
io_data_type=hipdnn_data_type,
intermediate_data_type=hipdnn.data_type.FLOAT,
compute_data_type=hipdnn.data_type.FLOAT,
name="conv_bias_prelu",
)
# Create hipdnn tensors
hipdnn_tensor_x = graph.tensor_like(torch_tensor_x)
hipdnn_tensor_w = graph.tensor_like(torch_tensor_w)
hipdnn_tensor_bias = graph.tensor_like(torch_tensor_bias)
# Create op
hipdnn_tensor_conv_output = graph.conv_fprop(
image=hipdnn_tensor_x,
weight=hipdnn_tensor_w,
padding=padding,
stride=stride,
dilation=dilation,
name="conv2d",
)
hipdnn_tensor_add_output = graph.add(
a=hipdnn_tensor_conv_output, b=hipdnn_tensor_bias, name="bias"
)
hipdnn_tensor_y = graph.prelu(
input=hipdnn_tensor_add_output, negative_slope=negative_slope, name="prelu"
)
hipdnn_tensor_y.set_output(True)
graph.build(hipdnn_handle)
return (graph, hipdnn_tensor_x, hipdnn_tensor_w, hipdnn_tensor_bias, hipdnn_tensor_y)
if __name__ == "__main__":
# Input dimensions
n = 1 # Batch size
c = 16 # Number of input channels
h = 16 # Height
w = 16 # Width
# Filter dimensions
k = 16 # Number of output channels
r = 3 # Filter height
s = 3 # Filter width
# Convolution parameters
stride_h = 1 # Height stride
stride_w = 1 # Width stride
pad_h = 1 # Height padding
pad_w = 1 # Width padding
dil_h = 1 # Height dilation
dil_w = 1 # Width dilation
# activate parameters
negative_slope = 0.01 # Negative slope
hipdnn_data_type = hipdnn.data_type.HALF
torch_data_type = torch.float16
torch_tensor_x = torch.rand(n, c, h, w, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
torch_tensor_w = torch.rand(k, c, r, s, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
torch_tensor_bias = torch.rand(1, k, 1, 1, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
hipdnn_handle = hipdnn.create_handle()
graph, hipdnn_tensor_x, hipdnn_tensor_w, hipdnn_tensor_bias, hipdnn_tensor_y = (
build_conv_bias_prelu_graph(
hipdnn_handle,
torch_tensor_x,
torch_tensor_w,
torch_tensor_bias,
[pad_h, pad_w],
[stride_h, stride_w],
[dil_h, dil_w],
negative_slope,
hipdnn_data_type,
)
)
torch_tensor_y = torch.empty(
hipdnn_tensor_y.get_dim(),
dtype=torch_data_type,
memory_format=torch.channels_last,
device="cuda",
)
variant_pack = {
hipdnn_tensor_x: torch_tensor_x.data_ptr(),
hipdnn_tensor_w: torch_tensor_w.data_ptr(),
hipdnn_tensor_bias: torch_tensor_bias.data_ptr(),
hipdnn_tensor_y: torch_tensor_y.data_ptr(),
}
workspace = torch.empty(graph.get_workspace_size(), dtype=torch.uint8, device="cuda")
graph.exec(variant_pack=variant_pack, workspace=workspace.data_ptr())
print("conv_bias_prelu graph execution complete.")
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment