conv_bias_dts.py

import hipdnn
import torch


def build_conv_bias_dts_graph(
    hipdnn_handle,
    torch_tensor_x,
    torch_tensor_w,
    torch_tensor_bias,
    padding,
    stride,
    dilation,
    hipdnn_data_type,
    depth_to_space_mode,
    block_size,
):
    # Create graph
    graph = hipdnn.pygraph(
        handle=hipdnn_handle,
        io_data_type=hipdnn_data_type,
        intermediate_data_type=hipdnn.data_type.FLOAT,
        compute_data_type=hipdnn.data_type.FLOAT,
        name="conv_bias_dts",
    )

    # Create hipdnn tensors
    hipdnn_tensor_x = graph.tensor_like(torch_tensor_x)
    hipdnn_tensor_w = graph.tensor_like(torch_tensor_w)
    hipdnn_tensor_bias = graph.tensor_like(torch_tensor_bias)

    # Create conv
    hipdnn_tensor_conv_output = graph.conv_fprop(
        image=hipdnn_tensor_x,
        weight=hipdnn_tensor_w,
        padding=padding,
        stride=stride,
        dilation=dilation,
        name="conv2d",
    )

    # Create bias
    hipdnn_tensor_bias_output = graph.add(
        a=hipdnn_tensor_conv_output, b=hipdnn_tensor_bias, name="bias"
    )

    n = torch_tensor_x.shape[0]
    H = torch_tensor_x.shape[2]
    W = torch_tensor_x.shape[3]

    k = torch_tensor_w.shape[0]
    r = torch_tensor_w.shape[2]
    s = torch_tensor_w.shape[3]

    outH = int((H + 2 * padding[0] - (dilation[0] * (r - 1) + 1)) / stride[0]) + 1
    outW = int((W + 2 * padding[1] - (dilation[1] * (s - 1) + 1)) / stride[1]) + 1

    if depth_to_space_mode == "CRD":
        first_reshape_dim = [
            n,
            int(k // (block_size * block_size)),
            block_size,
            block_size,
            outH,
            outW,
        ]
        permutation = [0, 1, 4, 2, 5, 3]
    else:
        first_reshape_dim = [n, block_size, block_size, k // (block_size * block_size), outH, outW]
        permutation = [0, 3, 4, 1, 5, 2]
    second_reshape_dim = [
        n,
        int(k // (block_size * block_size)),
        block_size * outH,
        block_size * outW,
    ]

    # Create first reshape
    hipdnn_tensor_first_reshape_output = graph.reshape(
        input=hipdnn_tensor_bias_output, name="first_reshape"
    )
    hipdnn_tensor_first_reshape_output.set_dim(first_reshape_dim)

    # Create transpose
    hipdnn_tensor_transpose_output = graph.transpose(
        input=hipdnn_tensor_first_reshape_output,
        permutation=permutation,
        name="transpose",
    )

    # Create second reshape
    hipdnn_tensor_second_reshape_output = graph.reshape(
        input=hipdnn_tensor_transpose_output, name="second_reshape"
    )
    hipdnn_tensor_second_reshape_output.set_dim(second_reshape_dim).set_stride(
        [k * outH * outW, 1, k // block_size * outW, k // (block_size * block_size)]
    ).set_output(True)

    graph.build(hipdnn_handle)

    return (
        graph,
        hipdnn_tensor_x,
        hipdnn_tensor_w,
        hipdnn_tensor_bias,
        hipdnn_tensor_second_reshape_output,
    )


if __name__ == "__main__":
    # Input dimensions
    n = 1  # Batch size
    c = 8  # Number of input channels
    h = 128  # Height
    w = 128  # Width

    # Filter dimensions
    k = 16  # Number of output channels
    r = 3  # Filter height
    s = 3  # Filter width

    # Convolution parameters
    stride_h = 1  # Height stride
    stride_w = 1  # Width stride
    pad_h = 1  # Height padding
    pad_w = 1  # Width padding
    dil_h = 1  # Height dilation
    dil_w = 1  # Width dilation
    block_size = 2
    depth_to_sacpe_mode = "DCR"

    hipdnn_data_type = hipdnn.data_type.HALF
    torch_data_type = torch.float16

    torch_tensor_x = torch.rand(n, c, h, w, dtype=torch_data_type, device="cuda").to(
        memory_format=torch.channels_last
    )
    torch_tensor_w = torch.rand(k, c, r, s, dtype=torch_data_type, device="cuda").to(
        memory_format=torch.channels_last
    )
    torch_tensor_bias = torch.rand(1, k, 1, 1, dtype=torch_data_type, device="cuda").to(
        memory_format=torch.channels_last
    )

    hipdnn_handle = hipdnn.create_handle()

    graph, hipdnn_tensor_x, hipdnn_tensor_w, hipdnn_tensor_bias, hipdnn_tensor_y = (
        build_conv_bias_dts_graph(
            hipdnn_handle,
            torch_tensor_x,
            torch_tensor_w,
            torch_tensor_bias,
            [pad_h, pad_w],
            [stride_h, stride_w],
            [dil_h, dil_w],
            hipdnn_data_type,
            depth_to_sacpe_mode,
            block_size,
        )
    )

    torch_tensor_y = torch.empty(
        hipdnn_tensor_y.get_dim(),
        dtype=torch_data_type,
        memory_format=torch.channels_last,
        device="cuda",
    )
    variant_pack = {
        hipdnn_tensor_x: torch_tensor_x.data_ptr(),
        hipdnn_tensor_w: torch_tensor_w.data_ptr(),
        hipdnn_tensor_bias: torch_tensor_bias.data_ptr(),
        hipdnn_tensor_y: torch_tensor_y.data_ptr(),
    }
    workspace = torch.empty(graph.get_workspace_size(), dtype=torch.uint8, device="cuda")

    graph.exec(variant_pack=variant_pack, workspace=workspace.data_ptr())
    print("conv_bias_dts graph execution complete.")