Commit ca34d4d2 authored by yanjl1's avatar yanjl1
Browse files

Initial

parents
import hipdnn
import torch
def build_conv_bias_prelu_add_graph(
hipdnn_handle,
torch_tensor_x,
torch_tensor_w,
torch_tensor_bias,
torch_tensor_add,
padding,
stride,
dilation,
negative_slope,
hipdnn_data_type,
):
# Create graph
graph = hipdnn.pygraph(
handle=hipdnn_handle,
io_data_type=hipdnn_data_type,
intermediate_data_type=hipdnn.data_type.FLOAT,
compute_data_type=hipdnn.data_type.FLOAT,
name="conv_bias_prelu_add",
)
# Create hipdnn tensors
hipdnn_tensor_x = graph.tensor_like(torch_tensor_x)
hipdnn_tensor_w = graph.tensor_like(torch_tensor_w)
hipdnn_tensor_bias = graph.tensor_like(torch_tensor_bias)
hipdnn_tensor_add = graph.tensor_like(torch_tensor_add)
# Create op
hipdnn_tensor_conv_output = graph.conv_fprop(
image=hipdnn_tensor_x,
weight=hipdnn_tensor_w,
padding=padding,
stride=stride,
dilation=dilation,
name="conv2d",
)
hipdnn_tensor_add_output = graph.add(
a=hipdnn_tensor_conv_output, b=hipdnn_tensor_bias, name="bias"
)
hipdnn_tensor_prelu_output = graph.prelu(
input=hipdnn_tensor_add_output, negative_slope=negative_slope, name="prelu"
)
hipdnn_tensor_y = graph.add(a=hipdnn_tensor_prelu_output, b=hipdnn_tensor_add, name="add")
hipdnn_tensor_y.set_output(True)
graph.build(hipdnn_handle)
return (
graph,
hipdnn_tensor_x,
hipdnn_tensor_w,
hipdnn_tensor_bias,
hipdnn_tensor_add,
hipdnn_tensor_y,
)
if __name__ == "__main__":
# Input dimensions
n = 1 # Batch size
c = 16 # Number of input channels
h = 16 # Height
w = 16 # Width
# Filter dimensions
k = 16 # Number of output channels
r = 3 # Filter height
s = 3 # Filter width
# Convolution parameters
stride_h = 1 # Height stride
stride_w = 1 # Width stride
pad_h = 1 # Height padding
pad_w = 1 # Width padding
dil_h = 1 # Height dilation
dil_w = 1 # Width dilation
# activate parameters
negative_slope = 0.01 # Negative slope
hipdnn_data_type = hipdnn.data_type.HALF
torch_data_type = torch.float16
torch_tensor_x = torch.rand(n, c, h, w, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
torch_tensor_w = torch.rand(k, c, r, s, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
torch_tensor_bias = torch.rand(1, k, 1, 1, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
torch_tensor_add = torch.rand(n, c, h, w, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
hipdnn_handle = hipdnn.create_handle()
(
graph,
hipdnn_tensor_x,
hipdnn_tensor_w,
hipdnn_tensor_bias,
hipdnn_tensor_add,
hipdnn_tensor_y,
) = build_conv_bias_prelu_add_graph(
hipdnn_handle,
torch_tensor_x,
torch_tensor_w,
torch_tensor_bias,
torch_tensor_add,
[pad_h, pad_w],
[stride_h, stride_w],
[dil_h, dil_w],
negative_slope,
hipdnn_data_type,
)
torch_tensor_y = torch.empty(
hipdnn_tensor_y.get_dim(),
dtype=torch_data_type,
memory_format=torch.channels_last,
device="cuda",
)
variant_pack = {
hipdnn_tensor_x: torch_tensor_x.data_ptr(),
hipdnn_tensor_w: torch_tensor_w.data_ptr(),
hipdnn_tensor_bias: torch_tensor_bias.data_ptr(),
hipdnn_tensor_add: torch_tensor_add.data_ptr(),
hipdnn_tensor_y: torch_tensor_y.data_ptr(),
}
workspace = torch.empty(graph.get_workspace_size(), dtype=torch.uint8, device="cuda")
graph.exec(variant_pack=variant_pack, workspace=workspace.data_ptr())
print("conv_bias_prelu_add graph execution complete.")
import hipdnn
import torch
def build_conv_bias_relu_graph(
hipdnn_handle,
torch_tensor_x,
torch_tensor_w,
torch_tensor_bias,
padding,
stride,
dilation,
hipdnn_data_type,
):
# Create graph
graph = hipdnn.pygraph(
handle=hipdnn_handle,
io_data_type=hipdnn_data_type,
intermediate_data_type=hipdnn.data_type.FLOAT,
compute_data_type=hipdnn.data_type.FLOAT,
name="conv_bias_relu",
)
# Create hipdnn tensors
hipdnn_tensor_x = graph.tensor_like(torch_tensor_x)
hipdnn_tensor_w = graph.tensor_like(torch_tensor_w)
hipdnn_tensor_bias = graph.tensor_like(torch_tensor_bias)
# Create op
hipdnn_tensor_conv_output = graph.conv_fprop(
image=hipdnn_tensor_x,
weight=hipdnn_tensor_w,
padding=padding,
stride=stride,
dilation=dilation,
name="conv2d",
)
hipdnn_tensor_add_output = graph.add(
a=hipdnn_tensor_conv_output, b=hipdnn_tensor_bias, name="bias"
)
hipdnn_tensor_y = graph.relu(input=hipdnn_tensor_add_output, name="relu")
hipdnn_tensor_y.set_output(True)
graph.build(hipdnn_handle)
return (graph, hipdnn_tensor_x, hipdnn_tensor_w, hipdnn_tensor_bias, hipdnn_tensor_y)
if __name__ == "__main__":
# Input dimensions
n = 1 # Batch size
c = 16 # Number of input channels
h = 16 # Height
w = 16 # Width
# Filter dimensions
k = 16 # Number of output channels
r = 3 # Filter height
s = 3 # Filter width
# Convolution parameters
stride_h = 1 # Height stride
stride_w = 1 # Width stride
pad_h = 1 # Height padding
pad_w = 1 # Width padding
dil_h = 1 # Height dilation
dil_w = 1 # Width dilation
hipdnn_data_type = hipdnn.data_type.HALF
torch_data_type = torch.float16
torch_tensor_x = torch.rand(n, c, h, w, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
torch_tensor_w = torch.rand(k, c, r, s, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
torch_tensor_bias = torch.rand(1, k, 1, 1, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
hipdnn_handle = hipdnn.create_handle()
graph, hipdnn_tensor_x, hipdnn_tensor_w, hipdnn_tensor_bias, hipdnn_tensor_y = (
build_conv_bias_relu_graph(
hipdnn_handle,
torch_tensor_x,
torch_tensor_w,
torch_tensor_bias,
[pad_h, pad_w],
[stride_h, stride_w],
[dil_h, dil_w],
hipdnn_data_type,
)
)
torch_tensor_y = torch.empty(
hipdnn_tensor_y.get_dim(),
dtype=torch_data_type,
memory_format=torch.channels_last,
device="cuda",
)
variant_pack = {
hipdnn_tensor_x: torch_tensor_x.data_ptr(),
hipdnn_tensor_w: torch_tensor_w.data_ptr(),
hipdnn_tensor_bias: torch_tensor_bias.data_ptr(),
hipdnn_tensor_y: torch_tensor_y.data_ptr(),
}
workspace = torch.empty(graph.get_workspace_size(), dtype=torch.uint8, device="cuda")
graph.exec(variant_pack=variant_pack, workspace=workspace.data_ptr())
print("conv_bias_relu graph execution complete.")
import hipdnn
import torch
def build_conv_bias_swish_graph(
hipdnn_handle,
torch_tensor_x,
torch_tensor_w,
torch_tensor_bias,
padding,
stride,
dilation,
hipdnn_data_type,
):
# Create graph
graph = hipdnn.pygraph(
handle=hipdnn_handle,
io_data_type=hipdnn_data_type,
intermediate_data_type=hipdnn.data_type.FLOAT,
compute_data_type=hipdnn.data_type.FLOAT,
name="conv_bias_swish",
)
# Create hipdnn tensors
hipdnn_tensor_x = graph.tensor_like(torch_tensor_x)
hipdnn_tensor_w = graph.tensor_like(torch_tensor_w)
hipdnn_tensor_bias = graph.tensor_like(torch_tensor_bias)
# Create op
hipdnn_tensor_conv_output = graph.conv_fprop(
image=hipdnn_tensor_x,
weight=hipdnn_tensor_w,
padding=padding,
stride=stride,
dilation=dilation,
name="conv2d",
)
hipdnn_tensor_add_output = graph.add(
a=hipdnn_tensor_conv_output, b=hipdnn_tensor_bias, name="bias"
)
hipdnn_tensor_y = graph.swish(input=hipdnn_tensor_add_output, name="swish")
hipdnn_tensor_y.set_output(True)
graph.build(hipdnn_handle)
return (graph, hipdnn_tensor_x, hipdnn_tensor_w, hipdnn_tensor_bias, hipdnn_tensor_y)
if __name__ == "__main__":
# Input dimensions
n = 1 # Batch size
c = 16 # Number of input channels
h = 16 # Height
w = 16 # Width
# Filter dimensions
k = 16 # Number of output channels
r = 3 # Filter height
s = 3 # Filter width
# Convolution parameters
stride_h = 1 # Height stride
stride_w = 1 # Width stride
pad_h = 1 # Height padding
pad_w = 1 # Width padding
dil_h = 1 # Height dilation
dil_w = 1 # Width dilation
hipdnn_data_type = hipdnn.data_type.HALF
torch_data_type = torch.float16
torch_tensor_x = torch.rand(n, c, h, w, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
torch_tensor_w = torch.rand(k, c, r, s, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
torch_tensor_bias = torch.rand(1, k, 1, 1, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
hipdnn_handle = hipdnn.create_handle()
graph, hipdnn_tensor_x, hipdnn_tensor_w, hipdnn_tensor_bias, hipdnn_tensor_y = (
build_conv_bias_swish_graph(
hipdnn_handle,
torch_tensor_x,
torch_tensor_w,
torch_tensor_bias,
[pad_h, pad_w],
[stride_h, stride_w],
[dil_h, dil_w],
hipdnn_data_type,
)
)
torch_tensor_y = torch.empty(
hipdnn_tensor_y.get_dim(),
dtype=torch_data_type,
memory_format=torch.channels_last,
device="cuda",
)
variant_pack = {
hipdnn_tensor_x: torch_tensor_x.data_ptr(),
hipdnn_tensor_w: torch_tensor_w.data_ptr(),
hipdnn_tensor_bias: torch_tensor_bias.data_ptr(),
hipdnn_tensor_y: torch_tensor_y.data_ptr(),
}
workspace = torch.empty(graph.get_workspace_size(), dtype=torch.uint8, device="cuda")
graph.exec(variant_pack=variant_pack, workspace=workspace.data_ptr())
print("conv_bias_swish graph execution complete.")
import hipdnn
import torch
def build_conv_bias_swish_add_graph(
hipdnn_handle,
torch_tensor_x,
torch_tensor_w,
torch_tensor_bias,
torch_tensor_add,
padding,
stride,
dilation,
hipdnn_data_type,
):
# Create graph
graph = hipdnn.pygraph(
handle=hipdnn_handle,
io_data_type=hipdnn_data_type,
intermediate_data_type=hipdnn.data_type.FLOAT,
compute_data_type=hipdnn.data_type.FLOAT,
name="conv_bias_swish_add",
)
# Create hipdnn tensors
hipdnn_tensor_x = graph.tensor_like(torch_tensor_x)
hipdnn_tensor_w = graph.tensor_like(torch_tensor_w)
hipdnn_tensor_bias = graph.tensor_like(torch_tensor_bias)
hipdnn_tensor_add = graph.tensor_like(torch_tensor_add)
# Create op
hipdnn_tensor_conv_output = graph.conv_fprop(
image=hipdnn_tensor_x,
weight=hipdnn_tensor_w,
padding=padding,
stride=stride,
dilation=dilation,
name="conv2d",
)
hipdnn_tensor_bias_output = graph.add(
a=hipdnn_tensor_conv_output, b=hipdnn_tensor_bias, name="bias"
)
hipdnn_tensor_swish_output = graph.swish(input=hipdnn_tensor_bias_output, name="swish")
hipdnn_tensor_y = graph.add(a=hipdnn_tensor_swish_output, b=hipdnn_tensor_add, name="add")
hipdnn_tensor_y.set_output(True)
graph.build(hipdnn_handle)
return (
graph,
hipdnn_tensor_x,
hipdnn_tensor_w,
hipdnn_tensor_bias,
hipdnn_tensor_add,
hipdnn_tensor_y,
)
if __name__ == "__main__":
# Input dimensions
n = 1 # Batch size
c = 16 # Number of input channels
h = 16 # Height
w = 16 # Width
# Filter dimensions
k = 16 # Number of output channels
r = 3 # Filter height
s = 3 # Filter width
# Convolution parameters
stride_h = 1 # Height stride
stride_w = 1 # Width stride
pad_h = 1 # Height padding
pad_w = 1 # Width padding
dil_h = 1 # Height dilation
dil_w = 1 # Width dilation
hipdnn_data_type = hipdnn.data_type.HALF
torch_data_type = torch.float16
torch_tensor_x = torch.rand(n, c, h, w, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
torch_tensor_w = torch.rand(k, c, r, s, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
torch_tensor_bias = torch.rand(1, k, 1, 1, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
torch_tensor_add = torch.rand(n, c, h, w, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
hipdnn_handle = hipdnn.create_handle()
(
graph,
hipdnn_tensor_x,
hipdnn_tensor_w,
hipdnn_tensor_bias,
hipdnn_tensor_add,
hipdnn_tensor_y,
) = build_conv_bias_swish_add_graph(
hipdnn_handle,
torch_tensor_x,
torch_tensor_w,
torch_tensor_bias,
torch_tensor_add,
[pad_h, pad_w],
[stride_h, stride_w],
[dil_h, dil_w],
hipdnn_data_type,
)
torch_tensor_y = torch.empty(
hipdnn_tensor_y.get_dim(),
dtype=torch_data_type,
memory_format=torch.channels_last,
device="cuda",
)
variant_pack = {
hipdnn_tensor_x: torch_tensor_x.data_ptr(),
hipdnn_tensor_w: torch_tensor_w.data_ptr(),
hipdnn_tensor_bias: torch_tensor_bias.data_ptr(),
hipdnn_tensor_add: torch_tensor_add.data_ptr(),
hipdnn_tensor_y: torch_tensor_y.data_ptr(),
}
workspace = torch.empty(graph.get_workspace_size(), dtype=torch.uint8, device="cuda")
graph.exec(variant_pack=variant_pack, workspace=workspace.data_ptr())
print("conv_bias_swish_add graph execution complete.")
import hipdnn
import torch
def build_convBwd_bias_relu_graph(
hipdnn_handle,
torch_tensor_x,
torch_tensor_w,
torch_tensor_bias,
padding,
stride,
dilation,
output_padding,
hipdnn_data_type,
):
# Create graph
graph = hipdnn.pygraph(
handle=hipdnn_handle,
io_data_type=hipdnn_data_type,
intermediate_data_type=hipdnn.data_type.FLOAT,
compute_data_type=hipdnn.data_type.FLOAT,
name="convBwd_bias_relu",
)
# Create hipdnn tensors
hipdnn_tensor_x = graph.tensor_like(torch_tensor_x)
hipdnn_tensor_w = graph.tensor_like(torch_tensor_w)
hipdnn_tensor_bias = graph.tensor_like(torch_tensor_bias)
# Create op
hipdnn_tensor_conv_output = graph.conv_dgrad(
loss=hipdnn_tensor_x,
filter=hipdnn_tensor_w,
padding=padding,
stride=stride,
dilation=dilation,
output_padding=output_padding,
name="conv2d",
)
hipdnn_tensor_add_output = graph.add(
a=hipdnn_tensor_conv_output, b=hipdnn_tensor_bias, name="bias"
)
hipdnn_tensor_y = graph.relu(input=hipdnn_tensor_add_output, name="relu")
hipdnn_tensor_y.set_output(True)
graph.build(hipdnn_handle)
return (graph, hipdnn_tensor_x, hipdnn_tensor_w, hipdnn_tensor_bias, hipdnn_tensor_y)
if __name__ == "__main__":
# Input dimensions
n = 1 # Batch size
c = 32 # Number of input channels
h = 270 # Height
w = 480 # Width
# Filter dimensions
k = 32 # Number of output channels
r = 3 # Filter height
s = 3 # Filter width
# Convolution parameters
stride_h = 2 # Height stride
stride_w = 2 # Width stride
pad_h = 0 # Height padding
pad_w = 0 # Width padding
dil_h = 1 # Height dilation
dil_w = 1 # Width dilation
output_padding_h = 1 # Output height padding
output_padding_w = 1 # Output width padding
hipdnn_data_type = hipdnn.data_type.HALF
torch_data_type = torch.float16
torch_tensor_x = torch.rand(n, c, h, w, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
torch_tensor_w = torch.rand(k, c, r, s, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
torch_tensor_bias = torch.rand(1, k, 1, 1, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
hipdnn_handle = hipdnn.create_handle()
graph, hipdnn_tensor_x, hipdnn_tensor_w, hipdnn_tensor_bias, hipdnn_tensor_y = (
build_convBwd_bias_relu_graph(
hipdnn_handle,
torch_tensor_x,
torch_tensor_w,
torch_tensor_bias,
[pad_h, pad_w],
[stride_h, stride_w],
[dil_h, dil_w],
[output_padding_h, output_padding_w],
hipdnn_data_type,
)
)
torch_tensor_y = torch.empty(
hipdnn_tensor_y.get_dim(),
dtype=torch_data_type,
memory_format=torch.channels_last,
device="cuda",
)
variant_pack = {
hipdnn_tensor_x: torch_tensor_x.data_ptr(),
hipdnn_tensor_w: torch_tensor_w.data_ptr(),
hipdnn_tensor_bias: torch_tensor_bias.data_ptr(),
hipdnn_tensor_y: torch_tensor_y.data_ptr(),
}
workspace = torch.empty(graph.get_workspace_size(), dtype=torch.uint8, device="cuda")
graph.exec(variant_pack=variant_pack, workspace=workspace.data_ptr())
print("convBwd_bias_relu graph execution complete.")
import hipdnn
import torch
def build_convint8_bias_graph(
hipdnn_handle,
torch_tensor_x,
torch_tensor_w,
torch_tensor_zero_point_dq,
torch_tensor_scale_dq,
torch_tensor_bias,
torch_tensor_zero_point_q,
torch_tensor_scale_q,
padding,
stride,
dilation,
hipdnn_data_type,
):
# Create graph
graph = hipdnn.pygraph(
handle=hipdnn_handle,
io_data_type=hipdnn_data_type,
intermediate_data_type=hipdnn_data_type,
compute_data_type=hipdnn.data_type.FLOAT,
name="convint8_bias",
)
# Create hipdnn conv input and filter tensor with NCHWc32 layout
hipdnn_tensor_x = graph.tensor_like(torch_tensor_x).set_vector_count_and_dimension(32, 1)
hipdnn_tensor_w = graph.tensor_like(torch_tensor_w).set_vector_count_and_dimension(32, 1)
# Create conv_fprop op
hipdnn_tensor_conv_output = graph.conv_fprop(
image=hipdnn_tensor_x,
weight=hipdnn_tensor_w,
padding=padding,
stride=stride,
dilation=dilation,
name="conv_fprop_node",
)
# Create sub node for dequantize:zero_point_dq
hipdnn_tensor_zero_point_dq = graph.tensor_like(torch_tensor_zero_point_dq)
hipdnn_tensor_zero_point_dq.set_value(0.0)
hipdnn_tensor_conv_deq_sub_output = graph.sub(
a=hipdnn_tensor_conv_output, b=hipdnn_tensor_zero_point_dq, name="conv_deq_sub_node"
)
# Create mul node for dequantize:scale_dq
hipdnn_tensor_scale_dq = graph.tensor_like(torch_tensor_scale_dq)
hipdnn_tensor_scale_dq.set_value(1.0)
hipdnn_tensor_conv_deq_mul_output = graph.mul(
a=hipdnn_tensor_conv_deq_sub_output, b=hipdnn_tensor_scale_dq, name="conv_deq_mul_node"
)
# Create bias node
hipdnn_tensor_bias = graph.tensor_like(torch_tensor_bias)
hipdnn_tensor_bias_output = graph.add(
a=hipdnn_tensor_conv_deq_mul_output, b=hipdnn_tensor_bias, name="bias_node"
)
# Create div node for quantize:scale_q
hipdnn_tensor_scale_q = graph.tensor_like(torch_tensor_scale_q)
hipdnn_tensor_scale_q.set_value(1.0)
hipdnn_tensor_quantize_div_output = graph.div(
a=hipdnn_tensor_bias_output, b=hipdnn_tensor_scale_q, name="quantize_div_node"
)
# Create add node for quantize:zero_point_q
hipdnn_tensor_zero_point_q = graph.tensor_like(torch_tensor_zero_point_q)
hipdnn_tensor_zero_point_q.set_value(0.0)
hipdnn_tensor_output = graph.add(
a=hipdnn_tensor_quantize_div_output, b=hipdnn_tensor_zero_point_q, name="quantize_add_node"
)
hipdnn_tensor_output.set_output(True).set_vector_count_and_dimension(32, 1)
graph.build(hipdnn_handle)
return (
graph,
hipdnn_tensor_x,
hipdnn_tensor_w,
hipdnn_tensor_bias,
hipdnn_tensor_output,
)
if __name__ == "__main__":
# Input dimensions
n = 2 # Batch size
c = 32 # Number of input channels
h = 16 # Height
w = 8 # Width
# Filter dimensions
k = 128 # Number of output channels
r = 3 # Filter height
s = 3 # Filter width
# Convolution parameters
stride_h = 1 # Height stride
stride_w = 1 # Width stride
pad_h = 0 # Height padding
pad_w = 0 # Width padding
dil_h = 1 # Height dilation
dil_w = 1 # Width dilation
hipdnn_data_type = hipdnn.data_type.INT8
torch_data_type = torch.int8
bias_data_type = torch.float32
quantize_data_type = torch.float32
torch_tensor_x = torch.randint(
low=-128,
high=128,
size=(n, c, h, w),
dtype=torch_data_type,
device="cuda",
)
torch_tensor_w = torch.randint(
low=-128, high=128, size=(k, c, r, s), dtype=torch_data_type, device="cuda"
)
torch_tensor_zero_point_dq = torch.rand(1, 1, 1, 1, device="cuda")
torch_tensor_scale_dq = torch.rand(1, 1, 1, 1, device="cuda")
torch_tensor_bias = torch.rand(1, k, 1, 1, dtype=bias_data_type, device="cuda")
torch_tensor_zero_point_q = torch.rand(1, 1, 1, 1, device="cuda")
torch_tensor_scale_q = torch.rand(1, 1, 1, 1, device="cuda")
hipdnn_handle = hipdnn.create_handle()
(
graph,
hipdnn_tensor_x,
hipdnn_tensor_w,
hipdnn_tensor_bias,
hipdnn_tensor_y,
) = build_convint8_bias_graph(
hipdnn_handle,
torch_tensor_x,
torch_tensor_w,
torch_tensor_zero_point_dq,
torch_tensor_scale_dq,
torch_tensor_bias,
torch_tensor_zero_point_q,
torch_tensor_scale_q,
[pad_h, pad_w],
[stride_h, stride_w],
[dil_h, dil_w],
hipdnn_data_type,
)
torch_tensor_y = torch.empty(
hipdnn_tensor_y.get_dim(),
dtype=torch_data_type,
device="cuda",
)
variant_pack = {
hipdnn_tensor_x: torch_tensor_x.data_ptr(),
hipdnn_tensor_w: torch_tensor_w.data_ptr(),
hipdnn_tensor_bias: torch_tensor_bias.data_ptr(),
hipdnn_tensor_y: torch_tensor_y.data_ptr(),
}
workspace = torch.empty(graph.get_workspace_size(), dtype=torch.uint8, device="cuda")
graph.exec(variant_pack=variant_pack, workspace=workspace.data_ptr())
print("convint8_bias graph execution complete.")
import hipdnn
import torch
def build_convint8_bias_add_graph(
hipdnn_handle,
torch_tensor_x,
torch_tensor_w,
torch_tensor_zero_point_dq,
torch_tensor_scale_dq,
torch_tensor_bias,
torch_tensor_add,
torch_tensor_zero_point_dq_add,
torch_tensor_scale_dq_add,
torch_tensor_zero_point_q,
torch_tensor_scale_q,
padding,
stride,
dilation,
hipdnn_data_type,
):
# Create graph
graph = hipdnn.pygraph(
handle=hipdnn_handle,
io_data_type=hipdnn_data_type,
intermediate_data_type=hipdnn_data_type,
compute_data_type=hipdnn.data_type.FLOAT,
name="convint8_bias_add",
)
# Create hipdnn conv input and filter tensor with NCHWc32 layout
hipdnn_tensor_x = graph.tensor_like(torch_tensor_x).set_vector_count_and_dimension(32, 1)
hipdnn_tensor_w = graph.tensor_like(torch_tensor_w).set_vector_count_and_dimension(32, 1)
# Create conv_fprop op
hipdnn_tensor_conv_output = graph.conv_fprop(
image=hipdnn_tensor_x,
weight=hipdnn_tensor_w,
padding=padding,
stride=stride,
dilation=dilation,
name="conv_fprop_node",
)
# Create sub node for dequantize:zero_point_dq
hipdnn_tensor_zero_point_dq = graph.tensor_like(torch_tensor_zero_point_dq)
hipdnn_tensor_zero_point_dq.set_value(0.0)
hipdnn_tensor_conv_deq_sub_output = graph.sub(
hipdnn_tensor_conv_output, hipdnn_tensor_zero_point_dq, name="conv_deq_sub_node"
)
# Create mul node for dequantize:scale_dq
hipdnn_tensor_scale_dq = graph.tensor_like(torch_tensor_scale_dq)
hipdnn_tensor_scale_dq.set_value(1.0)
hipdnn_tensor_conv_deq_mul_output = graph.mul(
hipdnn_tensor_conv_deq_sub_output, hipdnn_tensor_scale_dq, name="conv_deq_mul_node"
)
# Create bias node
hipdnn_tensor_bias = graph.tensor_like(torch_tensor_bias)
hipdnn_tensor_bias_output = graph.add(
hipdnn_tensor_conv_deq_mul_output, hipdnn_tensor_bias, name="bias_node"
)
# Cretae add original input(without dequantize)
hipdnn_tensor_add = graph.tensor_like(torch_tensor_add)
hipdnn_tensor_add.set_vector_count_and_dimension(32, 1)
# Create sub node for dequantize:zero_point_dq_add
hipdnn_tensor_zero_point_dq_add = graph.tensor_like(torch_tensor_zero_point_dq_add)
hipdnn_tensor_zero_point_dq_add.set_value(0.0)
hipdnn_tensor_add_deq_sub_output = graph.sub(
hipdnn_tensor_add, hipdnn_tensor_zero_point_dq_add, name="add_deq_sub_node"
)
# Create mul node for dequantize:scale_dq_add
hipdnn_tensor_scale_dq_add = graph.tensor_like(torch_tensor_scale_dq_add)
hipdnn_tensor_scale_dq_add.set_value(1.0)
hipdnn_tensor_add_deq_mul_output = graph.mul(
hipdnn_tensor_add_deq_sub_output, hipdnn_tensor_scale_dq_add, name="add_deq_mul_node"
)
hipdnn_tensor_add_deq_mul_output
# Create add op
hipdnn_tensor_add_output = graph.add(
a=hipdnn_tensor_bias_output, b=hipdnn_tensor_add_deq_mul_output, name="add_node"
)
# Create div node for quantize:scale_q
hipdnn_tensor_scale_q = graph.tensor_like(torch_tensor_scale_q)
hipdnn_tensor_scale_q.set_value(1.0)
hipdnn_tensor_quantize_div_output = graph.div(
a=hipdnn_tensor_add_output, b=hipdnn_tensor_scale_q, name="quantize_div_node"
)
# Create add node for quantize:zero_point_q
hipdnn_tensor_zero_point_q = graph.tensor_like(torch_tensor_zero_point_q)
hipdnn_tensor_zero_point_q.set_value(0.0)
hipdnn_tensor_output = graph.add(
hipdnn_tensor_quantize_div_output, hipdnn_tensor_zero_point_q, name="quantize_add_node"
)
hipdnn_tensor_output.set_output(True).set_vector_count_and_dimension(32, 1)
graph.build(hipdnn_handle)
return (
graph,
hipdnn_tensor_x,
hipdnn_tensor_w,
hipdnn_tensor_bias,
hipdnn_tensor_add,
hipdnn_tensor_output,
)
if __name__ == "__main__":
# Input dimensions
n = 2 # Batch size
c = 32 # Number of input channels
h = 16 # Height
w = 8 # Width
# Filter dimensions
k = 128 # Number of output channels
r = 3 # Filter height
s = 3 # Filter width
# Convolution parameters
stride_h = 1 # Height stride
stride_w = 1 # Width stride
pad_h = 0 # Height padding
pad_w = 0 # Width padding
dil_h = 1 # Height dilation
dil_w = 1 # Width dilation
out_h = int(((h + 2 * pad_h - (dil_h * (r - 1) + 1)) / stride_h) + 1)
out_w = int(((w + 2 * pad_w - (dil_w * (s - 1) + 1)) / stride_w) + 1)
hipdnn_data_type = hipdnn.data_type.INT8
torch_data_type = torch.int8
bias_data_type = torch.float32
quantize_data_type = torch.float32
torch_tensor_x = torch.randint(
low=-128,
high=128,
size=(n, c, h, w),
dtype=torch_data_type,
device="cuda",
)
torch_tensor_w = torch.randint(
low=-128, high=128, size=(k, c, r, s), dtype=torch_data_type, device="cuda"
)
torch_tensor_zero_point_dq = torch.rand(1, 1, 1, 1, device="cuda")
torch_tensor_scale_dq = torch.rand(1, 1, 1, 1, device="cuda")
torch_tensor_bias = torch.rand(1, k, 1, 1, dtype=bias_data_type, device="cuda")
torch_tensor_add = torch.randint(
low=-128, high=128, size=(n, k, out_h, out_w), dtype=torch_data_type, device="cuda"
)
torch_tensor_zero_point_dq_add = torch.rand(1, 1, 1, 1, device="cuda")
torch_tensor_scale_dq_add = torch.rand(1, 1, 1, 1, device="cuda")
torch_tensor_zero_point_q = torch.rand(1, 1, 1, 1, device="cuda")
torch_tensor_scale_q = torch.rand(1, 1, 1, 1, device="cuda")
hipdnn_handle = hipdnn.create_handle()
(
graph,
hipdnn_tensor_x,
hipdnn_tensor_w,
hipdnn_tensor_bias,
hipdnn_tensor_add,
hipdnn_tensor_y,
) = build_convint8_bias_add_graph(
hipdnn_handle,
torch_tensor_x,
torch_tensor_w,
torch_tensor_zero_point_dq,
torch_tensor_scale_dq,
torch_tensor_bias,
torch_tensor_add,
torch_tensor_zero_point_dq_add,
torch_tensor_scale_dq_add,
torch_tensor_zero_point_q,
torch_tensor_scale_q,
[pad_h, pad_w],
[stride_h, stride_w],
[dil_h, dil_w],
hipdnn_data_type,
)
torch_tensor_y = torch.empty(
hipdnn_tensor_y.get_dim(),
dtype=torch_data_type,
device="cuda",
)
variant_pack = {
hipdnn_tensor_x: torch_tensor_x.data_ptr(),
hipdnn_tensor_w: torch_tensor_w.data_ptr(),
hipdnn_tensor_bias: torch_tensor_bias.data_ptr(),
hipdnn_tensor_add: torch_tensor_add.data_ptr(),
hipdnn_tensor_y: torch_tensor_y.data_ptr(),
}
workspace = torch.empty(graph.get_workspace_size(), dtype=torch.uint8, device="cuda")
graph.exec(variant_pack=variant_pack, workspace=workspace.data_ptr())
print("convint8_bias_add graph execution complete.")
import hipdnn
import torch
def build_convint8_bias_add_relu_graph(
hipdnn_handle,
torch_tensor_x,
torch_tensor_w,
torch_tensor_zero_point_dq,
torch_tensor_scale_dq,
torch_tensor_bias,
torch_tensor_add,
torch_tensor_zero_point_dq_add,
torch_tensor_scale_dq_add,
torch_tensor_zero_point_q,
torch_tensor_scale_q,
padding,
stride,
dilation,
hipdnn_data_type,
):
# Create graph
graph = hipdnn.pygraph(
handle=hipdnn_handle,
io_data_type=hipdnn_data_type,
intermediate_data_type=hipdnn_data_type,
compute_data_type=hipdnn.data_type.FLOAT,
name="convint8_bias_add_relu",
)
# Create hipdnn conv input and filter tensor with NCHWc32 layout
hipdnn_tensor_x = graph.tensor_like(torch_tensor_x).set_vector_count_and_dimension(32, 1)
hipdnn_tensor_w = graph.tensor_like(torch_tensor_w).set_vector_count_and_dimension(32, 1)
# Create conv_fprop op
hipdnn_tensor_conv_output = graph.conv_fprop(
image=hipdnn_tensor_x,
weight=hipdnn_tensor_w,
padding=padding,
stride=stride,
dilation=dilation,
name="conv_fprop_node",
)
# Create sub node for dequantize:zero_point_dq
hipdnn_tensor_zero_point_dq = graph.tensor_like(torch_tensor_zero_point_dq)
hipdnn_tensor_zero_point_dq.set_value(0.0)
hipdnn_tensor_conv_deq_sub_output = graph.sub(
hipdnn_tensor_conv_output, hipdnn_tensor_zero_point_dq, name="conv_deq_sub_node"
)
# Create mul node for dequantize:scale_dq
hipdnn_tensor_scale_dq = graph.tensor_like(torch_tensor_scale_dq)
hipdnn_tensor_scale_dq.set_value(1.0)
hipdnn_tensor_conv_deq_mul_output = graph.mul(
hipdnn_tensor_conv_deq_sub_output, hipdnn_tensor_scale_dq, name="conv_deq_mul_node"
)
# Create bias node
hipdnn_tensor_bias = graph.tensor_like(torch_tensor_bias)
hipdnn_tensor_bias_output = graph.add(
hipdnn_tensor_conv_deq_mul_output, hipdnn_tensor_bias, name="bias_node"
)
# Cretae add original input(without dequantize)
hipdnn_tensor_add = graph.tensor_like(torch_tensor_add)
hipdnn_tensor_add.set_vector_count_and_dimension(32, 1)
# Create sub node for dequantize:zero_point_dq_add
hipdnn_tensor_zero_point_dq_add = graph.tensor_like(torch_tensor_zero_point_dq_add)
hipdnn_tensor_zero_point_dq_add.set_value(0.0)
hipdnn_tensor_add_deq_sub_output = graph.sub(
hipdnn_tensor_add, hipdnn_tensor_zero_point_dq_add, name="add_deq_sub_node"
)
# Create mul node for dequantize:scale_dq_add
hipdnn_tensor_scale_dq_add = graph.tensor_like(torch_tensor_scale_dq_add)
hipdnn_tensor_scale_dq_add.set_value(1.0)
hipdnn_tensor_add_deq_mul_output = graph.mul(
hipdnn_tensor_add_deq_sub_output, hipdnn_tensor_scale_dq_add, name="add_deq_mul_node"
)
hipdnn_tensor_add_deq_mul_output
# Create add op
hipdnn_tensor_add_output = graph.add(
a=hipdnn_tensor_bias_output, b=hipdnn_tensor_add_deq_mul_output, name="add_node"
)
# Create relu node
hipdnn_tensor_relu_output = graph.relu(input=hipdnn_tensor_add_output, name="relu_node")
# Create div node for quantize:scale_q
hipdnn_tensor_scale_q = graph.tensor_like(torch_tensor_scale_q)
hipdnn_tensor_scale_q.set_value(1.0)
hipdnn_tensor_quantize_div_output = graph.div(
a=hipdnn_tensor_relu_output, b=hipdnn_tensor_scale_q, name="quantize_div_node"
)
# Create add node for quantize:zero_point_q
hipdnn_tensor_zero_point_q = graph.tensor_like(torch_tensor_zero_point_q)
hipdnn_tensor_zero_point_q.set_value(0.0)
hipdnn_tensor_output = graph.add(
hipdnn_tensor_quantize_div_output, hipdnn_tensor_zero_point_q, name="quantize_add_node"
)
hipdnn_tensor_output.set_output(True).set_vector_count_and_dimension(32, 1)
graph.build(hipdnn_handle)
return (
graph,
hipdnn_tensor_x,
hipdnn_tensor_w,
hipdnn_tensor_bias,
hipdnn_tensor_add,
hipdnn_tensor_output,
)
if __name__ == "__main__":
# Input dimensions
n = 2 # Batch size
c = 32 # Number of input channels
h = 16 # Height
w = 8 # Width
# Filter dimensions
k = 128 # Number of output channels
r = 3 # Filter height
s = 3 # Filter width
# Convolution parameters
stride_h = 1 # Height stride
stride_w = 1 # Width stride
pad_h = 0 # Height padding
pad_w = 0 # Width padding
dil_h = 1 # Height dilation
dil_w = 1 # Width dilation
out_h = int(((h + 2 * pad_h - (dil_h * (r - 1) + 1)) / stride_h) + 1)
out_w = int(((w + 2 * pad_w - (dil_w * (s - 1) + 1)) / stride_w) + 1)
hipdnn_data_type = hipdnn.data_type.INT8
torch_data_type = torch.int8
bias_data_type = torch.float32
quantize_data_type = torch.float32
torch_tensor_x = torch.randint(
low=-128,
high=128,
size=(n, c, h, w),
dtype=torch_data_type,
device="cuda",
)
torch_tensor_w = torch.randint(
low=-128, high=128, size=(k, c, r, s), dtype=torch_data_type, device="cuda"
)
torch_tensor_zero_point_dq = torch.rand(1, 1, 1, 1, device="cuda")
torch_tensor_scale_dq = torch.rand(1, 1, 1, 1, device="cuda")
torch_tensor_bias = torch.rand(1, k, 1, 1, dtype=bias_data_type, device="cuda")
torch_tensor_add = torch.randint(
low=-128, high=128, size=(n, k, out_h, out_w), dtype=torch_data_type, device="cuda"
)
torch_tensor_zero_point_dq_add = torch.rand(1, 1, 1, 1, device="cuda")
torch_tensor_scale_dq_add = torch.rand(1, 1, 1, 1, device="cuda")
torch_tensor_zero_point_q = torch.rand(1, 1, 1, 1, device="cuda")
torch_tensor_scale_q = torch.rand(1, 1, 1, 1, device="cuda")
hipdnn_handle = hipdnn.create_handle()
(
graph,
hipdnn_tensor_x,
hipdnn_tensor_w,
hipdnn_tensor_bias,
hipdnn_tensor_add,
hipdnn_tensor_y,
) = build_convint8_bias_add_relu_graph(
hipdnn_handle,
torch_tensor_x,
torch_tensor_w,
torch_tensor_zero_point_dq,
torch_tensor_scale_dq,
torch_tensor_bias,
torch_tensor_add,
torch_tensor_zero_point_dq_add,
torch_tensor_scale_dq_add,
torch_tensor_zero_point_q,
torch_tensor_scale_q,
[pad_h, pad_w],
[stride_h, stride_w],
[dil_h, dil_w],
hipdnn_data_type,
)
torch_tensor_y = torch.empty(
hipdnn_tensor_y.get_dim(),
dtype=torch_data_type,
device="cuda",
)
variant_pack = {
hipdnn_tensor_x: torch_tensor_x.data_ptr(),
hipdnn_tensor_w: torch_tensor_w.data_ptr(),
hipdnn_tensor_bias: torch_tensor_bias.data_ptr(),
hipdnn_tensor_add: torch_tensor_add.data_ptr(),
hipdnn_tensor_y: torch_tensor_y.data_ptr(),
}
workspace = torch.empty(graph.get_workspace_size(), dtype=torch.uint8, device="cuda")
graph.exec(variant_pack=variant_pack, workspace=workspace.data_ptr())
print("convint8_bias_add_relu graph execution complete.")
import hipdnn
import torch
def build_convint8_bias_relu_graph(
hipdnn_handle,
torch_tensor_x,
torch_tensor_w,
torch_tensor_zero_point_dq,
torch_tensor_scale_dq,
torch_tensor_bias,
torch_tensor_zero_point_q,
torch_tensor_scale_q,
padding,
stride,
dilation,
hipdnn_data_type,
):
# Create graph
graph = hipdnn.pygraph(
handle=hipdnn_handle,
io_data_type=hipdnn_data_type,
intermediate_data_type=hipdnn_data_type,
compute_data_type=hipdnn.data_type.FLOAT,
name="convint8_bias_relu",
)
# Create hipdnn conv input and filter tensor with NCHWc32 layout
hipdnn_tensor_x = graph.tensor_like(torch_tensor_x).set_vector_count_and_dimension(32, 1)
hipdnn_tensor_w = graph.tensor_like(torch_tensor_w).set_vector_count_and_dimension(32, 1)
# Create conv_fprop op
hipdnn_tensor_conv_output = graph.conv_fprop(
image=hipdnn_tensor_x,
weight=hipdnn_tensor_w,
padding=padding,
stride=stride,
dilation=dilation,
name="conv_fprop_node",
)
# Create sub node for dequantize:zero_point_dq
hipdnn_tensor_zero_point_dq = graph.tensor_like(torch_tensor_zero_point_dq)
hipdnn_tensor_zero_point_dq.set_value(0.0)
hipdnn_tensor_conv_deq_sub_output = graph.sub(
a=hipdnn_tensor_conv_output, b=hipdnn_tensor_zero_point_dq, name="conv_deq_sub_node"
)
# Create mul node for dequantize:scale_dq
hipdnn_tensor_scale_dq = graph.tensor_like(torch_tensor_scale_dq)
hipdnn_tensor_scale_dq.set_value(1.0)
hipdnn_tensor_conv_deq_mul_output = graph.mul(
a=hipdnn_tensor_conv_deq_sub_output, b=hipdnn_tensor_scale_dq, name="conv_deq_mul_node"
)
# Create bias node
hipdnn_tensor_bias = graph.tensor_like(torch_tensor_bias)
hipdnn_tensor_bias_output = graph.add(
a=hipdnn_tensor_conv_deq_mul_output, b=hipdnn_tensor_bias, name="bias_node"
)
# Create relu node
hipdnn_tensor_relu_output = graph.relu(input=hipdnn_tensor_bias_output, name="relu_node")
# Create div node for quantize:scale_q
hipdnn_tensor_scale_q = graph.tensor_like(torch_tensor_scale_q)
hipdnn_tensor_scale_q.set_value(1.0)
hipdnn_tensor_quantize_div_output = graph.div(
a=hipdnn_tensor_relu_output, b=hipdnn_tensor_scale_q, name="quantize_div_node"
)
# Create add node for quantize:zero_point_q
hipdnn_tensor_zero_point_q = graph.tensor_like(torch_tensor_zero_point_q)
hipdnn_tensor_zero_point_q.set_value(0.0)
hipdnn_tensor_output = graph.add(
a=hipdnn_tensor_quantize_div_output, b=hipdnn_tensor_zero_point_q, name="quantize_add_node"
)
hipdnn_tensor_output.set_output(True).set_vector_count_and_dimension(32, 1)
graph.build(hipdnn_handle)
return (
graph,
hipdnn_tensor_x,
hipdnn_tensor_w,
hipdnn_tensor_bias,
hipdnn_tensor_output,
)
if __name__ == "__main__":
# Input dimensions
n = 2 # Batch size
c = 32 # Number of input channels
h = 16 # Height
w = 8 # Width
# Filter dimensions
k = 128 # Number of output channels
r = 3 # Filter height
s = 3 # Filter width
# Convolution parameters
stride_h = 1 # Height stride
stride_w = 1 # Width stride
pad_h = 0 # Height padding
pad_w = 0 # Width padding
dil_h = 1 # Height dilation
dil_w = 1 # Width dilation
hipdnn_data_type = hipdnn.data_type.INT8
torch_data_type = torch.int8
bias_data_type = torch.float32
quantize_data_type = torch.float32
torch_tensor_x = torch.randint(
low=-128,
high=128,
size=(n, c, h, w),
dtype=torch_data_type,
device="cuda",
)
torch_tensor_w = torch.randint(
low=-128, high=128, size=(k, c, r, s), dtype=torch_data_type, device="cuda"
)
torch_tensor_zero_point_dq = torch.rand(1, 1, 1, 1, device="cuda")
torch_tensor_scale_dq = torch.rand(1, 1, 1, 1, device="cuda")
torch_tensor_bias = torch.rand(1, k, 1, 1, dtype=bias_data_type, device="cuda")
torch_tensor_zero_point_q = torch.rand(1, 1, 1, 1, device="cuda")
torch_tensor_scale_q = torch.rand(1, 1, 1, 1, device="cuda")
hipdnn_handle = hipdnn.create_handle()
(
graph,
hipdnn_tensor_x,
hipdnn_tensor_w,
hipdnn_tensor_bias,
hipdnn_tensor_y,
) = build_convint8_bias_relu_graph(
hipdnn_handle,
torch_tensor_x,
torch_tensor_w,
torch_tensor_zero_point_dq,
torch_tensor_scale_dq,
torch_tensor_bias,
torch_tensor_zero_point_q,
torch_tensor_scale_q,
[pad_h, pad_w],
[stride_h, stride_w],
[dil_h, dil_w],
hipdnn_data_type,
)
torch_tensor_y = torch.empty(
hipdnn_tensor_y.get_dim(),
dtype=torch_data_type,
device="cuda",
)
variant_pack = {
hipdnn_tensor_x: torch_tensor_x.data_ptr(),
hipdnn_tensor_w: torch_tensor_w.data_ptr(),
hipdnn_tensor_bias: torch_tensor_bias.data_ptr(),
hipdnn_tensor_y: torch_tensor_y.data_ptr(),
}
workspace = torch.empty(graph.get_workspace_size(), dtype=torch.uint8, device="cuda")
graph.exec(variant_pack=variant_pack, workspace=workspace.data_ptr())
print("convint8_bias_relu graph execution complete.")
import hipdnn
import torch
def build_convint8_bias_relu_add_graph(
hipdnn_handle,
torch_tensor_x,
torch_tensor_w,
torch_tensor_zero_point_dq,
torch_tensor_scale_dq,
torch_tensor_bias,
torch_tensor_add,
torch_tensor_zero_point_dq_add,
torch_tensor_scale_dq_add,
torch_tensor_zero_point_q,
torch_tensor_scale_q,
padding,
stride,
dilation,
hipdnn_data_type,
):
# Create graph
graph = hipdnn.pygraph(
handle=hipdnn_handle,
io_data_type=hipdnn_data_type,
intermediate_data_type=hipdnn_data_type,
compute_data_type=hipdnn.data_type.FLOAT,
name="convint8_bias_relu_add",
)
# Create hipdnn conv input and filter tensor with NCHWc32 layout
hipdnn_tensor_x = graph.tensor_like(torch_tensor_x).set_vector_count_and_dimension(32, 1)
hipdnn_tensor_w = graph.tensor_like(torch_tensor_w).set_vector_count_and_dimension(32, 1)
# Create conv_fprop op
hipdnn_tensor_conv_output = graph.conv_fprop(
image=hipdnn_tensor_x,
weight=hipdnn_tensor_w,
padding=padding,
stride=stride,
dilation=dilation,
name="conv_fprop_node",
)
# Create sub node for dequantize:zero_point_dq
hipdnn_tensor_zero_point_dq = graph.tensor_like(torch_tensor_zero_point_dq)
hipdnn_tensor_zero_point_dq.set_value(0.0)
hipdnn_tensor_conv_deq_sub_output = graph.sub(
hipdnn_tensor_conv_output, hipdnn_tensor_zero_point_dq, name="conv_deq_sub_node"
)
# Create mul node for dequantize:scale_dq
hipdnn_tensor_scale_dq = graph.tensor_like(torch_tensor_scale_dq)
hipdnn_tensor_scale_dq.set_value(1.0)
hipdnn_tensor_conv_deq_mul_output = graph.mul(
hipdnn_tensor_conv_deq_sub_output, hipdnn_tensor_scale_dq, name="conv_deq_mul_node"
)
# Create bias node
hipdnn_tensor_bias = graph.tensor_like(torch_tensor_bias)
hipdnn_tensor_bias_output = graph.add(
hipdnn_tensor_conv_deq_mul_output, hipdnn_tensor_bias, name="bias_node"
)
# Create relu node
hipdnn_tensor_relu_output = graph.relu(input=hipdnn_tensor_bias_output, name="relu_node")
# Cretae add original input(without dequantize)
hipdnn_tensor_add = graph.tensor_like(torch_tensor_add)
hipdnn_tensor_add.set_vector_count_and_dimension(32, 1)
# Create sub node for dequantize:zero_point_dq_add
hipdnn_tensor_zero_point_dq_add = graph.tensor_like(torch_tensor_zero_point_dq_add)
hipdnn_tensor_zero_point_dq_add.set_value(0.0)
hipdnn_tensor_add_deq_sub_output = graph.sub(
hipdnn_tensor_add, hipdnn_tensor_zero_point_dq_add, name="add_deq_sub_node"
)
# Create mul node for dequantize:scale_dq_add
hipdnn_tensor_scale_dq_add = graph.tensor_like(torch_tensor_scale_dq_add)
hipdnn_tensor_scale_dq_add.set_value(1.0)
hipdnn_tensor_add_deq_mul_output = graph.mul(
hipdnn_tensor_add_deq_sub_output, hipdnn_tensor_scale_dq_add, name="add_deq_mul_node"
)
hipdnn_tensor_add_deq_mul_output
# Create add op
hipdnn_tensor_add_output = graph.add(
a=hipdnn_tensor_relu_output, b=hipdnn_tensor_add_deq_mul_output, name="add_node"
)
# Create div node for quantize:scale_q
hipdnn_tensor_scale_q = graph.tensor_like(torch_tensor_scale_q)
hipdnn_tensor_scale_q.set_value(1.0)
hipdnn_tensor_quantize_div_output = graph.div(
a=hipdnn_tensor_add_output, b=hipdnn_tensor_scale_q, name="quantize_div_node"
)
# Create add node for quantize:zero_point_q
hipdnn_tensor_zero_point_q = graph.tensor_like(torch_tensor_zero_point_q)
hipdnn_tensor_zero_point_q.set_value(0.0)
hipdnn_tensor_output = graph.add(
hipdnn_tensor_quantize_div_output, hipdnn_tensor_zero_point_q, name="quantize_add_node"
)
hipdnn_tensor_output.set_output(True).set_vector_count_and_dimension(32, 1)
graph.build(hipdnn_handle)
return (
graph,
hipdnn_tensor_x,
hipdnn_tensor_w,
hipdnn_tensor_bias,
hipdnn_tensor_add,
hipdnn_tensor_output,
)
if __name__ == "__main__":
# Input dimensions
n = 2 # Batch size
c = 32 # Number of input channels
h = 16 # Height
w = 8 # Width
# Filter dimensions
k = 128 # Number of output channels
r = 3 # Filter height
s = 3 # Filter width
# Convolution parameters
stride_h = 1 # Height stride
stride_w = 1 # Width stride
pad_h = 0 # Height padding
pad_w = 0 # Width padding
dil_h = 1 # Height dilation
dil_w = 1 # Width dilation
out_h = int(((h + 2 * pad_h - (dil_h * (r - 1) + 1)) / stride_h) + 1)
out_w = int(((w + 2 * pad_w - (dil_w * (s - 1) + 1)) / stride_w) + 1)
hipdnn_data_type = hipdnn.data_type.INT8
torch_data_type = torch.int8
bias_data_type = torch.float32
quantize_data_type = torch.float32
torch_tensor_x = torch.randint(
low=-128,
high=128,
size=(n, c, h, w),
dtype=torch_data_type,
device="cuda",
)
torch_tensor_w = torch.randint(
low=-128, high=128, size=(k, c, r, s), dtype=torch_data_type, device="cuda"
)
torch_tensor_zero_point_dq = torch.rand(1, 1, 1, 1, device="cuda")
torch_tensor_scale_dq = torch.rand(1, 1, 1, 1, device="cuda")
torch_tensor_bias = torch.rand(1, k, 1, 1, dtype=bias_data_type, device="cuda")
torch_tensor_add = torch.randint(
low=-128, high=128, size=(n, k, out_h, out_w), dtype=torch_data_type, device="cuda"
)
torch_tensor_zero_point_dq_add = torch.rand(1, 1, 1, 1, device="cuda")
torch_tensor_scale_dq_add = torch.rand(1, 1, 1, 1, device="cuda")
torch_tensor_zero_point_q = torch.rand(1, 1, 1, 1, device="cuda")
torch_tensor_scale_q = torch.rand(1, 1, 1, 1, device="cuda")
hipdnn_handle = hipdnn.create_handle()
(
graph,
hipdnn_tensor_x,
hipdnn_tensor_w,
hipdnn_tensor_bias,
hipdnn_tensor_add,
hipdnn_tensor_y,
) = build_convint8_bias_relu_add_graph(
hipdnn_handle,
torch_tensor_x,
torch_tensor_w,
torch_tensor_zero_point_dq,
torch_tensor_scale_dq,
torch_tensor_bias,
torch_tensor_add,
torch_tensor_zero_point_dq_add,
torch_tensor_scale_dq_add,
torch_tensor_zero_point_q,
torch_tensor_scale_q,
[pad_h, pad_w],
[stride_h, stride_w],
[dil_h, dil_w],
hipdnn_data_type,
)
torch_tensor_y = torch.empty(
hipdnn_tensor_y.get_dim(),
dtype=torch_data_type,
device="cuda",
)
variant_pack = {
hipdnn_tensor_x: torch_tensor_x.data_ptr(),
hipdnn_tensor_w: torch_tensor_w.data_ptr(),
hipdnn_tensor_bias: torch_tensor_bias.data_ptr(),
hipdnn_tensor_add: torch_tensor_add.data_ptr(),
hipdnn_tensor_y: torch_tensor_y.data_ptr(),
}
workspace = torch.empty(graph.get_workspace_size(), dtype=torch.uint8, device="cuda")
graph.exec(variant_pack=variant_pack, workspace=workspace.data_ptr())
print("convint8_bias_relu_add graph execution complete.")
import hipdnn
import torch
def build_conv_backward_graph(
hipdnn_handle, torch_tensor_dy, torch_tensor_w, padding, stride, dilation, hipdnn_data_type
):
# Create graph
graph = hipdnn.pygraph(
handle=hipdnn_handle,
io_data_type=hipdnn_data_type,
intermediate_data_type=hipdnn.data_type.FLOAT,
compute_data_type=hipdnn.data_type.FLOAT,
name="convolution_backward",
)
# Create hipdnn tensors
hipdnn_tensor_dy = graph.tensor_like(torch_tensor_dy)
hipdnn_tensor_w = graph.tensor_like(torch_tensor_w)
# Create conv op
hipdnn_tensor_dx = graph.conv_dgrad(
loss=hipdnn_tensor_dy,
filter=hipdnn_tensor_w,
padding=padding,
stride=stride,
dilation=dilation,
name="conv2d_backward",
)
hipdnn_tensor_dx.set_output(True)
graph.build(hipdnn_handle)
return (graph, hipdnn_tensor_dy, hipdnn_tensor_w, hipdnn_tensor_dx)
if __name__ == "__main__":
# Input dimensions
n = 4 # Batch size
c = 32 # Number of input channels
h = 16 # Height
w = 16 # Width
# Filter dimensions
k = 64 # Number of output channels
r = 3 # Filter height
s = 3 # Filter width
# Convolution parameters
stride_h = 1 # Height stride
stride_w = 1 # Width stride
pad_h = 1 # Height padding
pad_w = 1 # Width padding
dil_h = 1 # Height dilation
dil_w = 1 # Width dilation
hipdnn_data_type = hipdnn.data_type.FLOAT
torch_data_type = torch.float32
torch_tensor_dy = torch.rand(n, k, h, w, dtype=torch_data_type, device="cuda")
torch_tensor_w = torch.rand(k, c, r, s, dtype=torch_data_type, device="cuda")
hipdnn_handle = hipdnn.create_handle()
graph, hipdnn_tensor_dy, hipdnn_tensor_w, hipdnn_tensor_dx = build_conv_backward_graph(
hipdnn_handle,
torch_tensor_dy,
torch_tensor_w,
[pad_h, pad_w],
[stride_h, stride_w],
[dil_h, dil_w],
hipdnn_data_type,
)
torch_tensor_dx = torch.empty(hipdnn_tensor_dx.get_dim(), dtype=torch_data_type, device="cuda")
variant_pack = {
hipdnn_tensor_dy: torch_tensor_dy.data_ptr(),
hipdnn_tensor_w: torch_tensor_w.data_ptr(),
hipdnn_tensor_dx: torch_tensor_dx.data_ptr(),
}
workspace = torch.empty(graph.get_workspace_size(), dtype=torch.uint8, device="cuda")
graph.exec(variant_pack=variant_pack, workspace=workspace.data_ptr())
print("Convolution backward graph execution complete.")
import hipdnn
import torch
def build_conv_forward_graph(
hipdnn_handle, torch_tensor_x, torch_tensor_w, padding, stride, dilation, hipdnn_data_type
):
# Create graph
graph = hipdnn.pygraph(
handle=hipdnn_handle,
io_data_type=hipdnn_data_type,
intermediate_data_type=hipdnn.data_type.FLOAT,
compute_data_type=hipdnn.data_type.FLOAT,
name="convolution_forward",
)
# Create hipdnn tensors
hipdnn_tensor_x = graph.tensor_like(torch_tensor_x)
hipdnn_tensor_w = graph.tensor_like(torch_tensor_w)
# Create conv op
hipdnn_tensor_y = graph.conv_fprop(
image=hipdnn_tensor_x,
weight=hipdnn_tensor_w,
padding=padding,
stride=stride,
dilation=dilation,
name="conv2d_forward",
)
hipdnn_tensor_y.set_output(True)
graph.build(hipdnn_handle)
return (graph, hipdnn_tensor_x, hipdnn_tensor_w, hipdnn_tensor_y)
if __name__ == "__main__":
# Input dimensions
n = 4 # Batch size
c = 16 # Number of input channels
h = 56 # Height
w = 56 # Width
# Filter dimensions
k = 4 # Number of output channels
r = 1 # Filter height
s = 1 # Filter width
# Convolution parameters
stride_h = 1 # Height stride
stride_w = 1 # Width stride
pad_h = 1 # Height padding
pad_w = 1 # Width padding
dil_h = 1 # Height dilation
dil_w = 1 # Width dilation
hipdnn_data_type = hipdnn.data_type.FLOAT
torch_data_type = torch.float32
torch_tensor_x = torch.rand(n, c, h, w, dtype=torch_data_type, device="cuda")
torch_tensor_w = torch.rand(k, c, r, s, dtype=torch_data_type, device="cuda")
hipdnn_handle = hipdnn.create_handle()
graph, hipdnn_tensor_x, hipdnn_tensor_w, hipdnn_tensor_y = build_conv_forward_graph(
hipdnn_handle,
torch_tensor_x,
torch_tensor_w,
[pad_h, pad_w],
[stride_h, stride_w],
[dil_h, dil_w],
hipdnn_data_type,
)
torch_tensor_y = torch.empty(hipdnn_tensor_y.get_dim(), dtype=torch_data_type, device="cuda")
variant_pack = {
hipdnn_tensor_x: torch_tensor_x.data_ptr(),
hipdnn_tensor_w: torch_tensor_w.data_ptr(),
hipdnn_tensor_y: torch_tensor_y.data_ptr(),
}
workspace = torch.empty(graph.get_workspace_size(), dtype=torch.uint8, device="cuda")
graph.exec(variant_pack=variant_pack, workspace=workspace.data_ptr())
print("Convolution forward graph execution complete.")
import hipdnn
import torch
def build_conv_wrw_graph(
hipdnn_handle, torch_tensor_x, torch_tensor_dy, padding, stride, dilation, hipdnn_data_type
):
# Create graph
graph = hipdnn.pygraph(
handle=hipdnn_handle,
io_data_type=hipdnn_data_type,
intermediate_data_type=hipdnn.data_type.FLOAT,
compute_data_type=hipdnn.data_type.FLOAT,
name="convolution_wrw",
)
# Create hipdnn tensors
hipdnn_tensor_x = graph.tensor_like(torch_tensor_x)
hipdnn_tensor_dy = graph.tensor_like(torch_tensor_dy)
# Create conv op
hipdnn_tensor_dw = graph.conv_wgrad(
image=hipdnn_tensor_x,
loss=hipdnn_tensor_dy,
padding=padding,
stride=stride,
dilation=dilation,
name="conv2d_wrw",
)
hipdnn_tensor_dw.set_output(True)
graph.build(hipdnn_handle)
return (graph, hipdnn_tensor_x, hipdnn_tensor_dy, hipdnn_tensor_dw)
if __name__ == "__main__":
# Input dimensions
n = 4 # Batch size
c = 32 # Number of input channels
h = 16 # Height
w = 16 # Width
# Filter dimensions
k = 64 # Number of output channels
r = 3 # Filter height
s = 3 # Filter width
# Convolution parameters
stride_h = 1 # Height stride
stride_w = 1 # Width stride
pad_h = 1 # Height padding
pad_w = 1 # Width padding
dil_h = 1 # Height dilation
dil_w = 1 # Width dilation
hipdnn_data_type = hipdnn.data_type.FLOAT
torch_data_type = torch.float32
torch_tensor_x = torch.rand(n, c, h, w, dtype=torch_data_type, device="cuda")
torch_tensor_dy = torch.rand(n, k, h, w, dtype=torch_data_type, device="cuda")
hipdnn_handle = hipdnn.create_handle()
graph, hipdnn_tensor_x, hipdnn_tensor_dy, hipdnn_tensor_dw = build_conv_wrw_graph(
hipdnn_handle,
torch_tensor_x,
torch_tensor_dy,
[pad_h, pad_w],
[stride_h, stride_w],
[dil_h, dil_w],
hipdnn_data_type,
)
torch_tensor_dw = torch.empty(hipdnn_tensor_dw.get_dim(), dtype=torch_data_type, device="cuda")
variant_pack = {
hipdnn_tensor_x: torch_tensor_x.data_ptr(),
hipdnn_tensor_dy: torch_tensor_dy.data_ptr(),
hipdnn_tensor_dw: torch_tensor_dw.data_ptr(),
}
workspace = torch.empty(graph.get_workspace_size(), dtype=torch.uint8, device="cuda")
graph.exec(variant_pack=variant_pack, workspace=workspace.data_ptr())
print("Convolution wrw graph execution complete.")
import hipdnn
import torch
def build_ctc_loss_graph(hipdnn_handle, torch_tensor_probs, hipdnn_data_type):
graph = hipdnn.pygraph(
handle=hipdnn_handle,
io_data_type=hipdnn_data_type,
intermediate_data_type=hipdnn.data_type.FLOAT,
compute_data_type=hipdnn.data_type.FLOAT,
name="ctc_loss_inference",
)
hipdnn_tensor_probs = graph.tensor_like(torch_tensor_probs)
losses, gradients = graph.ctc_loss(
probs=hipdnn_tensor_probs,
blank_label_id=0,
apply_softmax=False,
algo=0,
labels=[1, 2, 3, 4, 2, 3, 2],
label_lengths=[1, 2, 1, 3],
input_lengths=[4, 100, 100, 200],
name="ctc_loss",
)
losses.set_output(True)
gradients.set_output(True)
graph.build(hipdnn_handle)
return (graph, hipdnn_tensor_probs, losses, gradients)
if __name__ == "__main__":
batch, max_time, num_classes = 4, 500, 5
hipdnn_data_type = hipdnn.data_type.FLOAT
torch_data_type = torch.float32
torch_tensor_probs = torch.rand(
max_time, batch, num_classes, dtype=torch_data_type, device="cuda"
)
hipdnn_handle = hipdnn.create_handle()
graph, hipdnn_tensor_probs, hipdnn_tensor_losses, hipdnn_tensor_gradients = (
build_ctc_loss_graph(hipdnn_handle, torch_tensor_probs, hipdnn_data_type)
)
torch_tensor_losses = torch.empty(batch, dtype=torch_data_type, device="cuda")
torch_tensor_gradients = torch.empty(
batch, max_time, num_classes, dtype=torch_data_type, device="cuda"
)
variant_pack = {
hipdnn_tensor_probs: torch_tensor_probs.data_ptr(),
hipdnn_tensor_losses: torch_tensor_losses.data_ptr(),
hipdnn_tensor_gradients: torch_tensor_gradients.data_ptr(),
}
workspace = torch.empty(graph.get_workspace_size(), dtype=torch.uint8, device="cuda")
graph.exec(variant_pack=variant_pack, workspace=workspace.data_ptr())
print("ctc_loss graph execution complete.")
import hipdnn
import torch
def build_deform_attention_graph(
hipdnn_handle,
torch_tensor_value,
torch_tensor_spatial_shapes,
torch_tensor_level_start_index,
torch_tensor_sampling_locations,
torch_tensor_attention_weights,
hipdnn_data_type,
):
# Create graph
graph = hipdnn.pygraph(
handle=hipdnn_handle,
io_data_type=hipdnn_data_type,
intermediate_data_type=hipdnn.data_type.FLOAT,
compute_data_type=hipdnn.data_type.FLOAT,
name="deform_attention",
)
# Create hipdnn tensors
hipdnn_tensor_value = graph.tensor_like(torch_tensor_value)
hipdnn_tensor_spatial_shapes = graph.tensor_like(torch_tensor_spatial_shapes)
hipdnn_tensor_level_start_index = graph.tensor_like(torch_tensor_level_start_index)
hipdnn_tensor_sampling_locations = graph.tensor_like(torch_tensor_sampling_locations)
hipdnn_tensor_attention_weights = graph.tensor_like(torch_tensor_attention_weights)
# Create deform attn op
hipdnn_tensor_y = graph.deform_attn_fprop(
value=hipdnn_tensor_value,
spatial_shapes=hipdnn_tensor_spatial_shapes,
level_start_index=hipdnn_tensor_level_start_index,
sampling_locations=hipdnn_tensor_sampling_locations,
attention_weights=hipdnn_tensor_attention_weights,
name="deform_attn_fprop",
)
hipdnn_tensor_y.set_output(True)
graph.build(hipdnn_handle)
return (
graph,
hipdnn_tensor_value,
hipdnn_tensor_spatial_shapes,
hipdnn_tensor_level_start_index,
hipdnn_tensor_sampling_locations,
hipdnn_tensor_attention_weights,
hipdnn_tensor_y,
)
if __name__ == "__main__":
# Input dimensions
n = 2 # batch size
n_heads = 2
embed_dims_per_head = 32
embed_dims = n_heads * embed_dims_per_head
n_levels = 2
n_points = 2
n_queries = 32
spatial_shapes_cpu = torch.randint(low=1, high=16, size=(n_levels, 2), dtype=torch.int64)
# calculate n_keys based on spatial_shapes_cpu
n_keys = spatial_shapes_cpu.prod(dim=1).sum()
# calculate level_start_index based on spatial_shapes_cpu
count_per_level = spatial_shapes_cpu.prod(dim=1)
level_start_index_cpu = torch.zeros_like(count_per_level)
level_start_index_cpu[1:] = torch.cumsum(count_per_level[:-1], dim=0)
hipdnn_data_type = hipdnn.data_type.FLOAT
torch_data_type = torch.float32
torch_tensor_value = torch.rand(
n, n_keys, n_heads, embed_dims_per_head, dtype=torch_data_type, device="cuda"
)
torch_tensor_spatial_shapes = spatial_shapes_cpu.to("cuda")
torch_tensor_level_start_index = level_start_index_cpu.to("cuda")
torch_tensor_sampling_locations = torch.rand(
n, n_queries, n_heads, n_levels, n_points, 2, dtype=torch_data_type, device="cuda"
)
torch_tensor_attention_weights = torch.rand(
n, n_queries, n_heads, n_levels, n_points, dtype=torch_data_type, device="cuda"
)
hipdnn_handle = hipdnn.create_handle()
(
graph,
hipdnn_tensor_value,
hipdnn_tensor_spatial_shapes,
hipdnn_tensor_level_start_index,
hipdnn_tensor_sampling_locations,
hipdnn_tensor_attention_weights,
hipdnn_tensor_y,
) = build_deform_attention_graph(
hipdnn_handle,
torch_tensor_value,
torch_tensor_spatial_shapes,
torch_tensor_level_start_index,
torch_tensor_sampling_locations,
torch_tensor_attention_weights,
hipdnn_data_type,
)
torch_tensor_y = torch.empty(hipdnn_tensor_y.get_dim(), dtype=torch_data_type, device="cuda")
variant_pack = {
hipdnn_tensor_value: torch_tensor_value.data_ptr(),
hipdnn_tensor_spatial_shapes: torch_tensor_spatial_shapes.data_ptr(),
hipdnn_tensor_level_start_index: torch_tensor_level_start_index.data_ptr(),
hipdnn_tensor_sampling_locations: torch_tensor_sampling_locations.data_ptr(),
hipdnn_tensor_attention_weights: torch_tensor_attention_weights.data_ptr(),
hipdnn_tensor_y: torch_tensor_y.data_ptr(),
}
workspace = torch.empty(graph.get_workspace_size(), dtype=torch.uint8, device="cuda")
graph.exec(variant_pack=variant_pack, workspace=workspace.data_ptr())
print("Deform attention graph execution complete.")
import hipdnn
import torch
def build_deform_attention_bwd_graph(
hipdnn_handle,
torch_tensor_value,
torch_tensor_spatial_shapes,
torch_tensor_level_start_index,
torch_tensor_sampling_locations,
torch_tensor_attention_weights,
torch_tensor_grad_output,
hipdnn_data_type,
):
# Create graph
graph = hipdnn.pygraph(
handle=hipdnn_handle,
io_data_type=hipdnn_data_type,
intermediate_data_type=hipdnn.data_type.FLOAT,
compute_data_type=hipdnn.data_type.FLOAT,
name="deform_attention_bwd",
)
# Create hipdnn tensors
hipdnn_tensor_value = graph.tensor_like(torch_tensor_value)
hipdnn_tensor_spatial_shapes = graph.tensor_like(torch_tensor_spatial_shapes)
hipdnn_tensor_level_start_index = graph.tensor_like(torch_tensor_level_start_index)
hipdnn_tensor_sampling_locations = graph.tensor_like(torch_tensor_sampling_locations)
hipdnn_tensor_attention_weights = graph.tensor_like(torch_tensor_attention_weights)
hipdnn_tensor_grad_output = graph.tensor_like(torch_tensor_grad_output)
# Create deform attn op
hipdnn_tensor_grad_value, hipdnn_tensor_grad_sampling_loc, hipdnn_tensor_grad_attn_weight = (
graph.deform_attn_dgrad(
value=hipdnn_tensor_value,
spatial_shapes=hipdnn_tensor_spatial_shapes,
level_start_index=hipdnn_tensor_level_start_index,
sampling_locations=hipdnn_tensor_sampling_locations,
attention_weights=hipdnn_tensor_attention_weights,
grad_output=hipdnn_tensor_grad_output,
name="deform_attn_dgrad",
)
)
hipdnn_tensor_grad_value.set_output(True)
hipdnn_tensor_grad_sampling_loc.set_output(True)
hipdnn_tensor_grad_attn_weight.set_output(True)
graph.build(hipdnn_handle)
return (
graph,
hipdnn_tensor_value,
hipdnn_tensor_spatial_shapes,
hipdnn_tensor_level_start_index,
hipdnn_tensor_sampling_locations,
hipdnn_tensor_attention_weights,
hipdnn_tensor_grad_output,
hipdnn_tensor_grad_value,
hipdnn_tensor_grad_sampling_loc,
hipdnn_tensor_grad_attn_weight,
)
if __name__ == "__main__":
# Input dimensions
n = 2 # batch size
n_heads = 2
embed_dims_per_head = 32
embed_dims = n_heads * embed_dims_per_head
n_levels = 2
n_points = 2
n_queries = 32
spatial_shapes_cpu = torch.randint(low=1, high=16, size=(n_levels, 2), dtype=torch.int64)
# calculate n_keys based on spatial_shapes_cpu
n_keys = spatial_shapes_cpu.prod(dim=1).sum()
# calculate level_start_index based on spatial_shapes_cpu
count_per_level = spatial_shapes_cpu.prod(dim=1)
level_start_index_cpu = torch.zeros_like(count_per_level)
level_start_index_cpu[1:] = torch.cumsum(count_per_level[:-1], dim=0)
hipdnn_data_type = hipdnn.data_type.FLOAT
torch_data_type = torch.float32
torch_tensor_value = torch.rand(
n, n_keys, n_heads, embed_dims_per_head, dtype=torch_data_type, device="cuda"
)
torch_tensor_spatial_shapes = spatial_shapes_cpu.to("cuda")
torch_tensor_level_start_index = level_start_index_cpu.to("cuda")
torch_tensor_sampling_locations = torch.rand(
n, n_queries, n_heads, n_levels, n_points, 2, dtype=torch_data_type, device="cuda"
)
torch_tensor_attention_weights = torch.rand(
n, n_queries, n_heads, n_levels, n_points, dtype=torch_data_type, device="cuda"
)
torch_tensor_grad_output = torch.rand(
n, n_queries, embed_dims, dtype=torch_data_type, device="cuda"
)
hipdnn_handle = hipdnn.create_handle()
(
graph,
hipdnn_tensor_value,
hipdnn_tensor_spatial_shapes,
hipdnn_tensor_level_start_index,
hipdnn_tensor_sampling_locations,
hipdnn_tensor_attention_weights,
hipdnn_tensor_grad_output,
hipdnn_tensor_grad_value,
hipdnn_tensor_grad_sampling_loc,
hipdnn_tensor_grad_attn_weight,
) = build_deform_attention_bwd_graph(
hipdnn_handle,
torch_tensor_value,
torch_tensor_spatial_shapes,
torch_tensor_level_start_index,
torch_tensor_sampling_locations,
torch_tensor_attention_weights,
torch_tensor_grad_output,
hipdnn_data_type,
)
torch_tensor_grad_value = torch.empty(
hipdnn_tensor_grad_value.get_dim(), dtype=torch_data_type, device="cuda"
)
torch_tensor_grad_sampling_loc = torch.empty(
hipdnn_tensor_grad_sampling_loc.get_dim(), dtype=torch_data_type, device="cuda"
)
torch_tensor_grad_attn_weight = torch.empty(
hipdnn_tensor_grad_attn_weight.get_dim(), dtype=torch_data_type, device="cuda"
)
variant_pack = {
hipdnn_tensor_value: torch_tensor_value.data_ptr(),
hipdnn_tensor_spatial_shapes: torch_tensor_spatial_shapes.data_ptr(),
hipdnn_tensor_level_start_index: torch_tensor_level_start_index.data_ptr(),
hipdnn_tensor_sampling_locations: torch_tensor_sampling_locations.data_ptr(),
hipdnn_tensor_attention_weights: torch_tensor_attention_weights.data_ptr(),
hipdnn_tensor_grad_output: torch_tensor_grad_output.data_ptr(),
hipdnn_tensor_grad_value: torch_tensor_grad_value.data_ptr(),
hipdnn_tensor_grad_sampling_loc: torch_tensor_grad_sampling_loc.data_ptr(),
hipdnn_tensor_grad_attn_weight: torch_tensor_grad_attn_weight.data_ptr(),
}
workspace = torch.empty(graph.get_workspace_size(), dtype=torch.uint8, device="cuda")
graph.exec(variant_pack=variant_pack, workspace=workspace.data_ptr())
print("Deform attention bwd graph execution complete.")
import hipdnn
import torch
def build_deform_convolution_graph(
hipdnn_handle,
torch_tensor_x,
torch_tensor_offset,
torch_tensor_w,
torch_tensor_bias,
torch_tensor_mask,
padding,
stride,
dilation,
hipdnn_data_type,
):
# Create graph
graph = hipdnn.pygraph(
handle=hipdnn_handle,
io_data_type=hipdnn_data_type,
intermediate_data_type=hipdnn.data_type.FLOAT,
compute_data_type=hipdnn.data_type.FLOAT,
name="deform_convolution",
)
# Create hipdnn tensors
hipdnn_tensor_x = graph.tensor_like(torch_tensor_x)
hipdnn_tensor_offset = graph.tensor_like(torch_tensor_offset)
hipdnn_tensor_w = graph.tensor_like(torch_tensor_w)
hipdnn_tensor_bias = graph.tensor_like(torch_tensor_bias)
hipdnn_tensor_mask = graph.tensor_like(torch_tensor_mask)
# Create op
hipdnn_tensor_y = graph.deform_conv_fprop(
image=hipdnn_tensor_x,
offset=hipdnn_tensor_offset,
weight=hipdnn_tensor_w,
bias=hipdnn_tensor_bias,
mask=hipdnn_tensor_mask,
padding=padding,
stride=stride,
dilation=dilation,
name="deform_conv_fprop",
)
hipdnn_tensor_y.set_output(True)
graph.build(hipdnn_handle)
return (
graph,
hipdnn_tensor_x,
hipdnn_tensor_offset,
hipdnn_tensor_w,
hipdnn_tensor_bias,
hipdnn_tensor_mask,
hipdnn_tensor_y,
)
if __name__ == "__main__":
# Input dimensions
n = 1 # Batch size
c = 16 # Number of input channels
h = 16 # Height
w = 16 # Width
# Filter dimensions
k = 1 # Number of output channels
r = 3 # Filter height
s = 3 # Filter width
# Convolution parameters
stride_h = 1 # Height stride
stride_w = 1 # Width stride
pad_h = 0 # Height padding
pad_w = 0 # Width padding
dil_h = 1 # Height dilation
dil_w = 1 # Width dilation
h_out = int((h + 2 * pad_h - (dil_h * (r - 1) + 1)) / stride_h + 1)
w_out = int((w + 2 * pad_w - (dil_w * (s - 1) + 1)) / stride_w + 1)
hipdnn_data_type = hipdnn.data_type.FLOAT
torch_data_type = torch.float32
torch_tensor_x = torch.rand(n, c, h, w, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
torch_tensor_offset = torch.rand(
n, 2 * r * s, h_out, w_out, dtype=torch_data_type, device="cuda"
).to(memory_format=torch.channels_last)
torch_tensor_w = torch.rand(k, c, r, s, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
torch_tensor_bias = torch.rand(k, dtype=torch_data_type, device="cuda")
torch_tensor_mask = torch.rand(n, r * s, h_out, w_out, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
hipdnn_handle = hipdnn.create_handle()
(
graph,
hipdnn_tensor_x,
hipdnn_tensor_offset,
hipdnn_tensor_w,
hipdnn_tensor_bias,
hipdnn_tensor_mask,
hipdnn_tensor_y,
) = build_deform_convolution_graph(
hipdnn_handle,
torch_tensor_x,
torch_tensor_offset,
torch_tensor_w,
torch_tensor_bias,
torch_tensor_mask,
[pad_h, pad_w],
[stride_h, stride_w],
[dil_h, dil_w],
hipdnn_data_type,
)
torch_tensor_y = torch.empty(
hipdnn_tensor_y.get_dim(),
dtype=torch_data_type,
memory_format=torch.channels_last,
device="cuda",
)
variant_pack = {
hipdnn_tensor_x: torch_tensor_x.data_ptr(),
hipdnn_tensor_offset: torch_tensor_offset.data_ptr(),
hipdnn_tensor_w: torch_tensor_w.data_ptr(),
hipdnn_tensor_bias: torch_tensor_bias.data_ptr(),
hipdnn_tensor_mask: torch_tensor_mask.data_ptr(),
hipdnn_tensor_y: torch_tensor_y.data_ptr(),
}
workspace = torch.empty(graph.get_workspace_size(), dtype=torch.uint8, device="cuda")
graph.exec(variant_pack=variant_pack, workspace=workspace.data_ptr())
print("deform conv fprop graph execution complete.")
import hipdnn
import torch
def build_deform_convolution_graph(
hipdnn_handle,
torch_tensor_dy,
torch_tensor_x,
torch_tensor_w,
torch_tensor_offset,
torch_tensor_mask,
padding,
stride,
dilation,
hipdnn_data_type,
):
# Create graph
graph = hipdnn.pygraph(
handle=hipdnn_handle,
io_data_type=hipdnn_data_type,
intermediate_data_type=hipdnn.data_type.FLOAT,
compute_data_type=hipdnn.data_type.FLOAT,
name="deform_convolution",
)
# Create hipdnn tensors
hipdnn_tensor_dy = graph.tensor_like(torch_tensor_dy)
hipdnn_tensor_x = graph.tensor_like(torch_tensor_x)
hipdnn_tensor_w = graph.tensor_like(torch_tensor_w)
hipdnn_tensor_offset = graph.tensor_like(torch_tensor_offset)
hipdnn_tensor_mask = graph.tensor_like(torch_tensor_mask)
# Create op
hipdnn_tensor_dx, hipdnn_tensor_doffset, hipdnn_tensor_dmask = graph.deform_conv_dgrad(
loss=hipdnn_tensor_dy,
filter=hipdnn_tensor_w,
offset=hipdnn_tensor_offset,
image=hipdnn_tensor_x,
mask=hipdnn_tensor_mask,
padding=padding,
stride=stride,
dilation=dilation,
name="deform_conv_bwd",
)
hipdnn_tensor_dx.set_output(True)
hipdnn_tensor_doffset.set_output(True)
hipdnn_tensor_dmask.set_output(True)
graph.build(hipdnn_handle)
return (
graph,
hipdnn_tensor_dy,
hipdnn_tensor_w,
hipdnn_tensor_offset,
hipdnn_tensor_x,
hipdnn_tensor_mask,
hipdnn_tensor_dx,
hipdnn_tensor_doffset,
hipdnn_tensor_dmask,
)
if __name__ == "__main__":
# Input dimensions
n = 1 # Batch size
c = 16 # Number of input channels
h = 16 # Height
w = 16 # Width
# Filter dimensions
k = 1 # Number of output channels
r = 3 # Filter height
s = 3 # Filter width
# Convolution parameters
stride_h = 1 # Height stride
stride_w = 1 # Width stride
pad_h = 0 # Height padding
pad_w = 0 # Width padding
dil_h = 1 # Height dilation
dil_w = 1 # Width dilation
h_out = int((h + 2 * pad_h - (dil_h * (r - 1) + 1)) / stride_h + 1)
w_out = int((w + 2 * pad_w - (dil_w * (s - 1) + 1)) / stride_w + 1)
hipdnn_data_type = hipdnn.data_type.FLOAT
torch_data_type = torch.float32
torch_tensor_dy = torch.rand(n, k, h_out, w_out, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
torch_tensor_x = torch.rand(n, c, h, w, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
torch_tensor_w = torch.rand(k, c, r, s, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
torch_tensor_offset = torch.rand(
n, 2 * r * s, h_out, w_out, dtype=torch_data_type, device="cuda"
).to(memory_format=torch.channels_last)
torch_tensor_mask = torch.rand(n, r * s, h_out, w_out, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
hipdnn_handle = hipdnn.create_handle()
(
graph,
hipdnn_tensor_dy,
hipdnn_tensor_w,
hipdnn_tensor_offset,
hipdnn_tensor_x,
hipdnn_tensor_mask,
hipdnn_tensor_dx,
hipdnn_tensor_doffset,
hipdnn_tensor_dmask,
) = build_deform_convolution_graph(
hipdnn_handle,
torch_tensor_dy,
torch_tensor_x,
torch_tensor_w,
torch_tensor_offset,
torch_tensor_mask,
[pad_h, pad_w],
[stride_h, stride_w],
[dil_h, dil_w],
hipdnn_data_type,
)
torch_tensor_dx = torch.empty(
hipdnn_tensor_dx.get_dim(),
dtype=torch_data_type,
memory_format=torch.channels_last,
device="cuda",
)
torch_tensor_doffset = torch.empty(
hipdnn_tensor_doffset.get_dim(),
dtype=torch_data_type,
memory_format=torch.channels_last,
device="cuda",
)
torch_tensor_dmask = torch.empty(
hipdnn_tensor_dmask.get_dim(),
dtype=torch_data_type,
memory_format=torch.channels_last,
device="cuda",
)
variant_pack = {
hipdnn_tensor_dy: torch_tensor_dy.data_ptr(),
hipdnn_tensor_w: torch_tensor_w.data_ptr(),
hipdnn_tensor_offset: torch_tensor_offset.data_ptr(),
hipdnn_tensor_x: torch_tensor_x.data_ptr(),
hipdnn_tensor_mask: torch_tensor_mask.data_ptr(),
hipdnn_tensor_dx: torch_tensor_dx.data_ptr(),
hipdnn_tensor_doffset: torch_tensor_doffset.data_ptr(),
hipdnn_tensor_dmask: torch_tensor_dmask.data_ptr(),
}
workspace = torch.empty(graph.get_workspace_size(), dtype=torch.uint8, device="cuda")
graph.exec(variant_pack=variant_pack, workspace=workspace.data_ptr())
print("deform conv bwd graph execution complete.")
import hipdnn
import torch
def build_deform_convolution_wrw_graph(
hipdnn_handle,
torch_tensor_dy,
torch_tensor_x,
torch_tensor_offset,
torch_tensor_mask,
padding,
stride,
dilation,
dw_dims,
hipdnn_data_type,
):
# Create graph
graph = hipdnn.pygraph(
handle=hipdnn_handle,
io_data_type=hipdnn_data_type,
intermediate_data_type=hipdnn.data_type.FLOAT,
compute_data_type=hipdnn.data_type.FLOAT,
name="deform_convolution_wrw",
)
# Create hipdnn tensors
hipdnn_tensor_dy = graph.tensor_like(torch_tensor_dy)
hipdnn_tensor_x = graph.tensor_like(torch_tensor_x)
hipdnn_tensor_offset = graph.tensor_like(torch_tensor_offset)
hipdnn_tensor_mask = graph.tensor_like(torch_tensor_mask)
# Create op
hipdnn_tensor_dw = graph.deform_conv_wgrad(
image=hipdnn_tensor_x,
offset=hipdnn_tensor_offset,
loss=hipdnn_tensor_dy,
mask=hipdnn_tensor_mask,
padding=padding,
stride=stride,
dilation=dilation,
name="deform_conv2d_wrw",
)
hipdnn_tensor_dw.set_dim(dw_dims).set_output(True)
graph.build(hipdnn_handle)
return (
graph,
hipdnn_tensor_dy,
hipdnn_tensor_offset,
hipdnn_tensor_x,
hipdnn_tensor_mask,
hipdnn_tensor_dw,
)
if __name__ == "__main__":
# Input dimensions
n = 4 # Batch size
c = 64 # Number of input channels
h = 16 # Height
w = 16 # Width
# Filter dimensions
k = 64 # Number of output channels
r = 3 # Filter height
s = 3 # Filter width
# Convolution parameters
stride_h = 1 # Height stride
stride_w = 1 # Width stride
pad_h = 0 # Height padding
pad_w = 0 # Width padding
dil_h = 1 # Height dilation
dil_w = 1 # Width dilation
h_out = int((h + 2 * pad_h - (dil_h * (r - 1) + 1)) / stride_h + 1)
w_out = int((w + 2 * pad_w - (dil_w * (s - 1) + 1)) / stride_w + 1)
hipdnn_data_type = hipdnn.data_type.FLOAT
torch_data_type = torch.float32
torch_tensor_dy = torch.rand(n, k, h_out, w_out, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
torch_tensor_x = torch.rand(n, c, h, w, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
torch_tensor_offset = torch.rand(
n, 2 * r * s, h_out, w_out, dtype=torch_data_type, device="cuda"
).to(memory_format=torch.channels_last)
torch_tensor_mask = torch.rand(n, r * s, h_out, w_out, dtype=torch_data_type, device="cuda").to(
memory_format=torch.channels_last
)
hipdnn_handle = hipdnn.create_handle()
(
graph,
hipdnn_tensor_dy,
hipdnn_tensor_offset,
hipdnn_tensor_x,
hipdnn_tensor_mask,
hipdnn_tensor_dw,
) = build_deform_convolution_wrw_graph(
hipdnn_handle,
torch_tensor_dy,
torch_tensor_x,
torch_tensor_offset,
torch_tensor_mask,
[pad_h, pad_w],
[stride_h, stride_w],
[dil_h, dil_w],
[k, c, r, s],
hipdnn_data_type,
)
torch_tensor_dw = torch.empty(
hipdnn_tensor_dw.get_dim(),
dtype=torch_data_type,
memory_format=torch.channels_last,
device="cuda",
)
variant_pack = {
hipdnn_tensor_dy: torch_tensor_dy.data_ptr(),
hipdnn_tensor_offset: torch_tensor_offset.data_ptr(),
hipdnn_tensor_x: torch_tensor_x.data_ptr(),
hipdnn_tensor_mask: torch_tensor_mask.data_ptr(),
hipdnn_tensor_dw: torch_tensor_dw.data_ptr(),
}
workspace = torch.empty(graph.get_workspace_size(), dtype=torch.uint8, device="cuda")
graph.exec(variant_pack=variant_pack, workspace=workspace.data_ptr())
print("deform conv wrw graph execution complete.")
import hipdnn
import torch
def build_layernorm_fusion_graph(
hipdnn_handle,
torch_tensor_x1,
torch_tensor_x2,
torch_tensor_scale,
torch_tensor_bias,
torch_tensor_epsilon,
mode,
eps,
hipdnn_data_type,
):
# Create graph
graph = hipdnn.pygraph(
handle=hipdnn_handle,
io_data_type=hipdnn_data_type,
intermediate_data_type=hipdnn.data_type.FLOAT,
compute_data_type=hipdnn.data_type.FLOAT,
name="layernorm_fusion_inference",
)
# Create hipdnn tensors
hipdnn_tensor_x1 = graph.tensor_like(torch_tensor_x1)
hipdnn_tensor_x2 = graph.tensor_like(torch_tensor_x2)
hipdnn_tensor_scale = graph.tensor_like(torch_tensor_scale)
hipdnn_tensor_bias = graph.tensor_like(torch_tensor_bias)
hipdnn_tensor_epsilon = graph.tensor_like(torch_tensor_epsilon)
hipdnn_tensor_epsilon.set_value(eps)
# Create op
hipdnn_tensor_add_output = graph.add(a=hipdnn_tensor_x1, b=hipdnn_tensor_x2, name="add")
hipdnn_tensor_add_output.set_output(True)
hipdnn_tensor_y, hipdnn_tensor_mean, hipdnn_tensor_inv_var = graph.layernorm(
mode,
hipdnn_tensor_add_output,
hipdnn_tensor_scale,
hipdnn_tensor_bias,
hipdnn_tensor_epsilon,
hipdnn.data_type.FLOAT,
name="layernorm",
)
hipdnn_tensor_y.set_output(True)
graph.build(hipdnn_handle)
return (
graph,
hipdnn_tensor_x1,
hipdnn_tensor_x2,
hipdnn_tensor_scale,
hipdnn_tensor_bias,
hipdnn_tensor_add_output,
hipdnn_tensor_y,
)
if __name__ == "__main__":
# Input dimensions
batch = 16 # Batch size
seq_len = 32 # Number of input seq
embedding_dim = 64 # Number of feature
mode = hipdnn.norm_forward_phase.INFERENCE # Mode
eps = 1e-5
hipdnn_data_type = hipdnn.data_type.FLOAT
torch_data_type = torch.float32
torch_tensor_x1 = torch.rand(
(batch, seq_len, embedding_dim), dtype=torch_data_type, device="cuda"
)
torch_tensor_x2 = torch.rand(
(batch, seq_len, embedding_dim), dtype=torch_data_type, device="cuda"
)
torch_tensor_scale = torch.rand(embedding_dim, dtype=torch_data_type, device="cuda")
torch_tensor_bias = torch.rand(embedding_dim, dtype=torch_data_type, device="cuda")
torch_tensor_epsilon = torch.full(
(1, 1, 1, 1), eps, dtype=torch.float32, requires_grad=False, device="cpu"
)
hipdnn_handle = hipdnn.create_handle()
(
graph,
hipdnn_tensor_x1,
hipdnn_tensor_x2,
hipdnn_tensor_scale,
hipdnn_tensor_bias,
hipdnn_tensor_add_output,
hipdnn_tensor_y,
) = build_layernorm_fusion_graph(
hipdnn_handle,
torch_tensor_x1,
torch_tensor_x2,
torch_tensor_scale,
torch_tensor_bias,
torch_tensor_epsilon,
mode,
eps,
hipdnn_data_type,
)
torch_tensor_addoutput = torch.empty(
hipdnn_tensor_add_output.get_dim(), dtype=torch_data_type, device="cuda"
)
torch_tensor_y = torch.empty(hipdnn_tensor_y.get_dim(), dtype=torch_data_type, device="cuda")
variant_pack = {
hipdnn_tensor_x1: torch_tensor_x1.data_ptr(),
hipdnn_tensor_x2: torch_tensor_x2.data_ptr(),
hipdnn_tensor_scale: torch_tensor_scale.data_ptr(),
hipdnn_tensor_bias: torch_tensor_bias.data_ptr(),
hipdnn_tensor_add_output: torch_tensor_addoutput.data_ptr(),
hipdnn_tensor_y: torch_tensor_y.data_ptr(),
}
workspace = torch.empty(graph.get_workspace_size(), dtype=torch.uint8, device="cuda")
graph.exec(variant_pack=variant_pack, workspace=workspace.data_ptr())
print("add_layernorm graph execution complete.")
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment