"csrc/vscode:/vscode.git/clone" did not exist on "142846b531b2931256b106a08ccb05848732b416"
Commit ca34d4d2 authored by yanjl1's avatar yanjl1
Browse files

Initial

parents
import hipdnn
import torch
def build_rng_graph(
hipdnn_handle,
torch_tensor_seed,
torch_tensor_offset,
rng_distribution,
dim,
stride,
hipdnn_data_type,
):
# Create graph
graph = hipdnn.pygraph(
handle=hipdnn_handle,
io_data_type=hipdnn_data_type,
intermediate_data_type=hipdnn.data_type.FLOAT,
compute_data_type=hipdnn.data_type.FLOAT,
name="rng",
)
# Create hipdnn tensors
hipdnn_tensor_seed = graph.tensor_like(torch_tensor_seed)
hipdnn_tensor_offset = graph.tensor_like(torch_tensor_offset)
# Create rng op
hipdnn_tensor_y = graph.rng(
seed=hipdnn_tensor_seed,
offset=hipdnn_tensor_offset,
rng_distribution=rng_distribution,
dim=dim,
stride=stride,
name="rng",
)
hipdnn_tensor_y.set_output(True)
graph.build(hipdnn_handle)
return (graph, hipdnn_tensor_seed, hipdnn_tensor_offset, hipdnn_tensor_y)
if __name__ == "__main__":
hipdnn_data_type = hipdnn.data_type.FLOAT
torch_data_type = torch.float32
rng_distribution = hipdnn.rng_distribution.UNIFORM
dim = [2, 2]
stride = [1, 1]
torch_tensor_seed = torch.rand(1, dtype=torch_data_type, device="cpu")
torch_tensor_offset = torch.rand(1, dtype=torch_data_type, device="cpu")
hipdnn_handle = hipdnn.create_handle()
# graph, hipdnn_tensor_seed, hipdnn_tensor_offset, hipdnn_tensor_y = build_rng_graph(
# hipdnn_handle, torch_tensor_seed, torch_tensor_offset, rng_distribution, dim, stride, hipdnn_data_type
# )
# torch_tensor_y = torch.empty(hipdnn_tensor_y.get_dim(), dtype=torch_data_type, device="cuda")
# variant_pack = {
# hipdnn_tensor_seed: torch_tensor_seed.data_ptr(),
# hipdnn_tensor_offset: torch_tensor_offset.data_ptr(),
# hipdnn_tensor_y: torch_tensor_y.data_ptr(),
# }
# workspace = torch.empty(graph.get_workspace_size(), dtype=torch.uint8, device="cuda")
# graph.exec(variant_pack=variant_pack, workspace=workspace.data_ptr())
# print("Rng graph execution complete.")
import hipdnn
import torch
def build_rope_backward_graph(
hipdnn_handle, torch_tensor_dy, torch_tensor_cos, torch_tensor_sin, hipdnn_data_type
):
graph = hipdnn.pygraph(
handle=hipdnn_handle,
io_data_type=hipdnn_data_type,
intermediate_data_type=hipdnn.data_type.FLOAT,
compute_data_type=hipdnn.data_type.FLOAT,
name="rope_backward_inference",
)
hipdnn_tensor_dy = graph.tensor_like(torch_tensor_dy)
hipdnn_tensor_cos = graph.tensor_like(torch_tensor_cos)
hipdnn_tensor_sin = graph.tensor_like(torch_tensor_sin)
hipdnn_tensor_dx = graph.rope_backward(
dy=hipdnn_tensor_dy, cos=hipdnn_tensor_cos, sin=hipdnn_tensor_sin, name="rope_backward"
)
hipdnn_tensor_dx.set_output(True)
graph.build(hipdnn_handle)
return (graph, hipdnn_tensor_dy, hipdnn_tensor_cos, hipdnn_tensor_sin, hipdnn_tensor_dx)
if __name__ == "__main__":
batch, seq_len, dim = 2, 4, 8
hipdnn_data_type = hipdnn.data_type.FLOAT
torch_data_type = torch.float32
torch_tensor_dy = torch.rand(batch, seq_len, dim, dtype=torch_data_type, device="cuda")
torch_tensor_cos = torch.rand(batch, seq_len, dim, dtype=torch_data_type, device="cuda")
torch_tensor_sin = torch.rand(batch, seq_len, dim, dtype=torch_data_type, device="cuda")
hipdnn_handle = hipdnn.create_handle()
graph, hipdnn_tensor_dy, hipdnn_tensor_cos, hipdnn_tensor_sin, hipdnn_tensor_dx = (
build_rope_backward_graph(
hipdnn_handle, torch_tensor_dy, torch_tensor_cos, torch_tensor_sin, hipdnn_data_type
)
)
torch_tensor_dx = torch.empty(batch, seq_len, dim, dtype=torch_data_type, device="cuda")
variant_pack = {
hipdnn_tensor_dy: torch_tensor_dy.data_ptr(),
hipdnn_tensor_cos: torch_tensor_cos.data_ptr(),
hipdnn_tensor_sin: torch_tensor_sin.data_ptr(),
hipdnn_tensor_dx: torch_tensor_dx.data_ptr(),
}
workspace = torch.empty(graph.get_workspace_size(), dtype=torch.uint8, device="cuda")
graph.exec(variant_pack=variant_pack, workspace=workspace.data_ptr())
print("rope_backward graph execution complete.")
import hipdnn
import torch
def build_rope_forward_graph(
hipdnn_handle, torch_tensor_x, torch_tensor_cos, torch_tensor_sin, hipdnn_data_type
):
graph = hipdnn.pygraph(
handle=hipdnn_handle,
io_data_type=hipdnn_data_type,
intermediate_data_type=hipdnn.data_type.FLOAT,
compute_data_type=hipdnn.data_type.FLOAT,
name="rope_forward_inference",
)
hipdnn_tensor_x = graph.tensor_like(torch_tensor_x)
hipdnn_tensor_cos = graph.tensor_like(torch_tensor_cos)
hipdnn_tensor_sin = graph.tensor_like(torch_tensor_sin)
hipdnn_tensor_y = graph.rope_forward(
x=hipdnn_tensor_x, cos=hipdnn_tensor_cos, sin=hipdnn_tensor_sin, name="rope_forward"
)
hipdnn_tensor_y.set_output(True)
graph.build(hipdnn_handle)
return (graph, hipdnn_tensor_x, hipdnn_tensor_cos, hipdnn_tensor_sin, hipdnn_tensor_y)
if __name__ == "__main__":
batch, seq_len, dim = 2, 4, 8
hipdnn_data_type = hipdnn.data_type.FLOAT
torch_data_type = torch.float32
torch_tensor_x = torch.rand(batch, seq_len, dim, dtype=torch_data_type, device="cuda")
torch_tensor_cos = torch.rand(batch, seq_len, dim, dtype=torch_data_type, device="cuda")
torch_tensor_sin = torch.rand(batch, seq_len, dim, dtype=torch_data_type, device="cuda")
hipdnn_handle = hipdnn.create_handle()
graph, hipdnn_tensor_x, hipdnn_tensor_cos, hipdnn_tensor_sin, hipdnn_tensor_y = (
build_rope_forward_graph(
hipdnn_handle, torch_tensor_x, torch_tensor_cos, torch_tensor_sin, hipdnn_data_type
)
)
torch_tensor_y = torch.empty(batch, seq_len, dim, dtype=torch_data_type, device="cuda")
variant_pack = {
hipdnn_tensor_x: torch_tensor_x.data_ptr(),
hipdnn_tensor_cos: torch_tensor_cos.data_ptr(),
hipdnn_tensor_sin: torch_tensor_sin.data_ptr(),
hipdnn_tensor_y: torch_tensor_y.data_ptr(),
}
workspace = torch.empty(graph.get_workspace_size(), dtype=torch.uint8, device="cuda")
graph.exec(variant_pack=variant_pack, workspace=workspace.data_ptr())
print("rope_forward graph execution complete.")
import hipdnn
import torch
def build_sdpa_graph(
hipdnn_handle,
torch_tensor_q,
torch_tensor_k,
torch_tensor_v,
torch_tensor_bias,
torch_tensor_seq_q,
torch_tensor_seq_kv,
has_attn_bias,
causal_mask,
padding_mask,
generate_stats,
alibi_mask,
hipdnn_data_type,
):
# Create graph
graph = hipdnn.pygraph(
handle=hipdnn_handle,
io_data_type=hipdnn_data_type,
intermediate_data_type=hipdnn.data_type.FLOAT,
compute_data_type=hipdnn.data_type.FLOAT,
name="sdpa",
)
# Create hipdnn tensors
hipdnn_tensor_q = graph.tensor_like(torch_tensor_q)
hipdnn_tensor_k = graph.tensor_like(torch_tensor_k)
hipdnn_tensor_v = graph.tensor_like(torch_tensor_v)
hipdnn_tensor_bias = graph.tensor_like(torch_tensor_bias)
hipdnn_tensor_seq_q = graph.tensor_like(torch_tensor_seq_q)
hipdnn_tensor_seq_kv = graph.tensor_like(torch_tensor_seq_kv)
# Create sdpa op
hipdnn_tensor_o, hipdnn_tensor_o_stats = graph.scale_dot_product_attention(
q=hipdnn_tensor_q,
k=hipdnn_tensor_k,
v=hipdnn_tensor_v,
attn_scale=1.0,
bias=(hipdnn_tensor_bias if has_attn_bias else None),
seq_len_q=hipdnn_tensor_seq_q,
seq_len_kv=hipdnn_tensor_seq_kv,
use_causal_mask=causal_mask,
use_padding_mask=padding_mask,
generate_stats=generate_stats,
use_alibi_mask=alibi_mask,
name="sdpa",
)
hipdnn_tensor_o.set_output(True)
if generate_stats:
hipdnn_tensor_o_stats.set_output(True)
graph.build(hipdnn_handle)
return (
graph,
hipdnn_tensor_q,
hipdnn_tensor_k,
hipdnn_tensor_v,
hipdnn_tensor_bias,
hipdnn_tensor_seq_q,
hipdnn_tensor_seq_kv,
hipdnn_tensor_o,
hipdnn_tensor_o_stats,
)
if __name__ == "__main__":
# Input dimensions
b = 4 # batch size
h_q = 4 # query number of heads
h_k = 4
h_v = 4
s_q = 64 # maximum sequence length
s_kv = 64
d_qk = 32 # embedding dimension per head
d_v = 32
generate_stats = False # Is it training mode(True) or inference mode(False)
attn_scale = 1.0
has_attn_bias = False
causal_mask = False
padding_mask = False
alibi_mask = False
hipdnn_data_type = hipdnn.data_type.HALF
torch_data_type = torch.float16
shape_q = (b, h_q, s_q, d_qk)
shape_k = (b, h_k, s_kv, d_qk)
shape_v = (b, h_v, s_kv, d_v)
shape_o = (b, h_q, s_q, d_v)
shape_os = (b, h_q, s_q, 1)
shape_bias = (b, 1, s_q, s_kv)
shape_seq = (b, 1, 1, 1)
stride_q = (s_q * h_q * d_qk, d_qk * s_q, d_qk, 1)
stride_k = (s_kv * h_k * d_qk, d_qk * s_kv, d_qk, 1)
stride_v = (s_kv * h_v * d_v, d_v * s_kv, d_v, 1)
stride_o = (s_q * h_q * d_v, s_q * d_v, d_v, 1)
stride_os = (h_q * s_q, s_q, 1, 1)
stride_bias = (s_q * s_kv, s_q * s_kv, s_kv, 1)
stride_seq = (1, 1, 1, 1)
torch_tensor_q = torch.rand(
b * h_q * s_q * d_qk, dtype=torch.float16, device="cuda"
).as_strided(shape_q, stride_q)
torch_tensor_k = torch.rand(
b * h_k * s_kv * d_qk, dtype=torch.float16, device="cuda"
).as_strided(shape_k, stride_k)
torch_tensor_v = torch.rand(
b * h_v * s_kv * d_v, dtype=torch.float16, device="cuda"
).as_strided(shape_v, stride_v)
torch_tensor_bias = torch.empty(b * s_q * s_kv, dtype=torch.float16, device="cuda").as_strided(
shape_bias, stride_bias
)
torch_tensor_seq_q = torch.empty(b, dtype=torch.int32, device="cuda").as_strided(
shape_seq, stride_seq
)
torch_tensor_seq_kv = torch.empty(b, dtype=torch.int32, device="cuda").as_strided(
shape_seq, stride_seq
)
hipdnn_handle = hipdnn.create_handle()
(
graph,
hipdnn_tensor_q,
hipdnn_tensor_k,
hipdnn_tensor_v,
hipdnn_tensor_bias,
hipdnn_tensor_seq_q,
hipdnn_tensor_seq_kv,
hipdnn_tensor_o,
hipdnn_tensor_o_stats,
) = build_sdpa_graph(
hipdnn_handle,
torch_tensor_q,
torch_tensor_k,
torch_tensor_v,
torch_tensor_bias,
torch_tensor_seq_q,
torch_tensor_seq_kv,
has_attn_bias,
causal_mask,
padding_mask,
generate_stats,
alibi_mask,
hipdnn_data_type,
)
torch_tensor_o = torch.empty(hipdnn_tensor_o.get_dim(), dtype=torch_data_type, device="cuda")
variant_pack = {
hipdnn_tensor_q: torch_tensor_q.data_ptr(),
hipdnn_tensor_k: torch_tensor_k.data_ptr(),
hipdnn_tensor_v: torch_tensor_v.data_ptr(),
hipdnn_tensor_o: torch_tensor_o.data_ptr(),
}
if has_attn_bias:
variant_pack[hipdnn_tensor_bias] = torch_tensor_bias.data_ptr()
if generate_stats:
torch_tensor_o_stats = torch.empty(
hipdnn_tensor_o_stats.get_dim(), dtype=torch_data_type, device="cuda"
)
variant_pack[hipdnn_tensor_o_stats] = torch_tensor_o_stats.data_ptr()
if padding_mask:
variant_pack[hipdnn_tensor_seq_q] = torch_tensor_seq_q.data_ptr()
variant_pack[hipdnn_tensor_seq_kv] = torch_tensor_seq_kv.data_ptr()
workspace = torch.empty(graph.get_workspace_size(), dtype=torch.uint8, device="cuda")
graph.exec(variant_pack=variant_pack, workspace=workspace.data_ptr())
print("sdpa graph execution complete.")
import hipdnn
import torch
def build_slice_graph(hipdnn_handle, torch_tensor_x, hipdnn_data_type):
# Create graph
graph = hipdnn.pygraph(
handle=hipdnn_handle,
io_data_type=hipdnn_data_type,
intermediate_data_type=hipdnn.data_type.FLOAT,
compute_data_type=hipdnn.data_type.FLOAT,
name="slice",
)
# Create hipdnn tensors
hipdnn_tensor_x = graph.tensor_like(torch_tensor_x)
# Create conv op
hipdnn_tensor_y = graph.slice(
input=hipdnn_tensor_x,
slices=[slice(0, 1), slice(None), slice(None)],
name="slice",
)
hipdnn_tensor_y.set_output(True)
graph.build(hipdnn_handle)
return (graph, hipdnn_tensor_x, hipdnn_tensor_y)
if __name__ == "__main__":
# Input dimensions
batch, seq_len, embedding_dim = 2, 1024, 768
hipdnn_data_type = hipdnn.data_type.FLOAT
torch_data_type = torch.float32
torch_tensor_x = torch.rand(batch, seq_len, embedding_dim, dtype=torch_data_type, device="cuda")
hipdnn_handle = hipdnn.create_handle()
# graph, hipdnn_tensor_x, hipdnn_tensor_y = build_slice_graph(
# hipdnn_handle, torch_tensor_x, hipdnn_data_type
# )
# torch_tensor_y = torch.empty(hipdnn_tensor_y.get_dim(), dtype=torch_data_type, device="cuda")
# variant_pack = {
# hipdnn_tensor_x: torch_tensor_x.data_ptr(),
# hipdnn_tensor_y: torch_tensor_y.data_ptr(),
# }
# workspace = torch.empty(graph.get_workspace_size(), dtype=torch.uint8, device="cuda")
# graph.exec(variant_pack=variant_pack, workspace=workspace.data_ptr())
# print("slice graph execution complete.")
import hipdnn
import torch
def build_soft_margin_loss_graph(
hipdnn_handle, torch_tensor_input, torch_tensor_target, hipdnn_data_type
):
graph = hipdnn.pygraph(
handle=hipdnn_handle,
io_data_type=hipdnn_data_type,
intermediate_data_type=hipdnn.data_type.FLOAT,
compute_data_type=hipdnn.data_type.FLOAT,
name="soft_margin_loss_forward",
)
hipdnn_tensor_input = graph.tensor_like(torch_tensor_input)
hipdnn_tensor_target = graph.tensor_like(torch_tensor_target)
hipdnn_tensor_output = graph.soft_margin_loss(
input=hipdnn_tensor_input,
target=hipdnn_tensor_target,
reduction=hipdnn.reduction_mode.AVG,
name="soft_margin_loss",
)
hipdnn_tensor_output.set_output(True)
graph.build(hipdnn_handle)
return (graph, hipdnn_tensor_input, hipdnn_tensor_target, hipdnn_tensor_output)
if __name__ == "__main__":
batch, num_classes = 16, 10
hipdnn_data_type = hipdnn.data_type.HALF
torch_data_type = torch.float16
torch_tensor_input = torch.rand(batch, num_classes, dtype=torch_data_type, device="cuda")
torch_tensor_target = torch.rand(batch, num_classes, dtype=torch_data_type, device="cuda")
hipdnn_handle = hipdnn.create_handle()
graph, hipdnn_tensor_input, hipdnn_tensor_target, hipdnn_tensor_output = (
build_soft_margin_loss_graph(
hipdnn_handle,
torch_tensor_input,
torch_tensor_target,
hipdnn_data_type,
)
)
torch_tensor_output = torch.empty(1, dtype=torch_data_type, device="cuda")
variant_pack = {
hipdnn_tensor_input: torch_tensor_input.data_ptr(),
hipdnn_tensor_target: torch_tensor_target.data_ptr(),
hipdnn_tensor_output: torch_tensor_output.data_ptr(),
}
workspace = torch.empty(graph.get_workspace_size(), dtype=torch.uint8, device="cuda")
graph.exec(variant_pack=variant_pack, workspace=workspace.data_ptr())
print("soft_margin_loss graph execution complete.")
import hipdnn
import torch
def build_soft_margin_loss_backward_graph(
hipdnn_handle,
torch_tensor_input,
torch_tensor_target,
torch_tensor_doutput,
hipdnn_data_type,
):
graph = hipdnn.pygraph(
handle=hipdnn_handle,
io_data_type=hipdnn_data_type,
intermediate_data_type=hipdnn.data_type.FLOAT,
compute_data_type=hipdnn.data_type.FLOAT,
name="soft_margin_loss_backward",
)
hipdnn_tensor_input = graph.tensor_like(torch_tensor_input)
hipdnn_tensor_target = graph.tensor_like(torch_tensor_target)
hipdnn_tensor_doutput = graph.tensor_like(torch_tensor_doutput)
hipdnn_tensor_dinput = graph.soft_margin_loss_backward(
input=hipdnn_tensor_input,
target=hipdnn_tensor_target,
doutput=hipdnn_tensor_doutput,
reduction=hipdnn.reduction_mode.NONE,
name="soft_margin_loss_backward",
)
hipdnn_tensor_dinput.set_output(True)
graph.build(hipdnn_handle)
return (
graph,
hipdnn_tensor_input,
hipdnn_tensor_target,
hipdnn_tensor_doutput,
hipdnn_tensor_dinput,
)
if __name__ == "__main__":
batch, num_classes = 16, 10
hipdnn_data_type = hipdnn.data_type.HALF
torch_data_type = torch.float16
torch_tensor_input = torch.rand(batch, num_classes, dtype=torch_data_type, device="cuda")
torch_tensor_target = torch.rand(batch, num_classes, dtype=torch_data_type, device="cuda")
torch_tensor_doutput = torch.rand(batch, num_classes, dtype=torch_data_type, device="cuda")
hipdnn_handle = hipdnn.create_handle()
(
graph,
hipdnn_tensor_input,
hipdnn_tensor_target,
hipdnn_tensor_doutput,
hipdnn_tensor_dinput,
) = build_soft_margin_loss_backward_graph(
hipdnn_handle,
torch_tensor_input,
torch_tensor_target,
torch_tensor_doutput,
hipdnn_data_type,
)
torch_tensor_dinput = torch.empty(batch, num_classes, dtype=torch_data_type, device="cuda")
variant_pack = {
hipdnn_tensor_input: torch_tensor_input.data_ptr(),
hipdnn_tensor_target: torch_tensor_target.data_ptr(),
hipdnn_tensor_doutput: torch_tensor_doutput.data_ptr(),
hipdnn_tensor_dinput: torch_tensor_dinput.data_ptr(),
}
workspace = torch.empty(graph.get_workspace_size(), dtype=torch.uint8, device="cuda")
graph.exec(variant_pack=variant_pack, workspace=workspace.data_ptr())
print("soft_margin_loss_backward graph execution complete.")
import hipdnn
import torch
def build_softmax_graph(hipdnn_handle, torch_tensor_x, axis, hipdnn_data_type):
# Create graph
graph = hipdnn.pygraph(
handle=hipdnn_handle,
io_data_type=hipdnn_data_type,
intermediate_data_type=hipdnn.data_type.FLOAT,
compute_data_type=hipdnn.data_type.FLOAT,
name="softmax",
)
# Create hipdnn tensors
hipdnn_tensor_x = graph.tensor_like(torch_tensor_x)
# Create softmax op
hipdnn_tensor_y = graph.softmax(
input=hipdnn_tensor_x,
axis=axis,
name="softmax",
)
hipdnn_tensor_y.set_output(True)
graph.build(hipdnn_handle)
return (graph, hipdnn_tensor_x, hipdnn_tensor_y)
if __name__ == "__main__":
# Input dimensions
n = 2 # Batch size
c = 3 # Number of channels
h = 4 # Height
w = 5 # Width
# Softmax parameters
axis = 3 # Axis to apply softmax
hipdnn_data_type = hipdnn.data_type.FLOAT
torch_data_type = torch.float32
torch_tensor_x = torch.rand(n, c, h, w, dtype=torch_data_type, device="cuda")
hipdnn_handle = hipdnn.create_handle()
graph, hipdnn_tensor_x, hipdnn_tensor_y = build_softmax_graph(
hipdnn_handle,
torch_tensor_x,
axis,
hipdnn_data_type,
)
torch_tensor_y = torch.empty(hipdnn_tensor_y.get_dim(), dtype=torch_data_type, device="cuda")
variant_pack = {
hipdnn_tensor_x: torch_tensor_x.data_ptr(),
hipdnn_tensor_y: torch_tensor_y.data_ptr(),
}
workspace = torch.empty(graph.get_workspace_size(), dtype=torch.uint8, device="cuda")
graph.exec(variant_pack=variant_pack, workspace=workspace.data_ptr())
print("Softmax graph execution complete.")
import hipdnn
import torch
def example_adamw():
model = torch.nn.Sequential(
torch.nn.Linear(10, 20, device="cuda"),
torch.nn.ReLU(),
torch.nn.Linear(20, 1, device="cuda"),
)
optimizer = hipdnn.TorchAdamW(
model.parameters(), lr=0.001, betas=(0.9, 0.999), weight_decay=0.01
)
for epoch in range(10):
inputs = torch.randn(32, 10, device="cuda")
targets = torch.randn(32, 1, device="cuda")
# 前向传播
outputs = model(inputs)
loss = torch.nn.functional.mse_loss(outputs, targets)
# 反向传播
optimizer.zero_grad()
loss.backward()
# 优化步骤
optimizer.step()
# optimizer.step_batch()
print(f"Epoch {epoch}, Loss: {loss.item():.4f}")
if __name__ == "__main__":
example_adamw()
import hipdnn
import torch
from torch.profiler import profile, ProfilerActivity
if __name__ == "__main__":
# Input dimensions
batch = 128 # Batch size
channels = 64 # Number of input channels
height = 112 # Height
width = 112 # Width
model = hipdnn.TorchPReLU()
input_type = torch.float32
x = torch.rand(batch, channels, height, width, dtype=input_type, device="cuda")
with profile(
activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA], record_shapes=True
) as prof:
with torch.cuda.amp.autocast(dtype=torch.float16):
y = model(x)
y.backward(x)
print(prof.key_averages(group_by_input_shape=True).table(sort_by="self_cuda_time_total"))
torch.cuda.synchronize()
import hipdnn
import torch
def build_transpose_graph(hipdnn_handle, torch_tensor_x, hipdnn_data_type):
# Create graph
graph = hipdnn.pygraph(
handle=hipdnn_handle,
io_data_type=hipdnn_data_type,
intermediate_data_type=hipdnn.data_type.FLOAT,
compute_data_type=hipdnn.data_type.FLOAT,
name="transpose",
)
# Create hipdnn tensors
hipdnn_tensor_x = graph.tensor_like(torch_tensor_x)
# Create transpose op
# nhwc->nchw[0, 1, 2, 3] or nchw->nhwc[0, 2, 3, 1]
hipdnn_tensor_y = graph.transpose(
input=hipdnn_tensor_x,
permutation=[0, 1, 2, 3],
name="transpose",
)
hipdnn_tensor_y.set_output(True)
graph.build(hipdnn_handle)
return (graph, hipdnn_tensor_x, hipdnn_tensor_y)
if __name__ == "__main__":
# Input dimensions
batch, channels, height, width = 2, 3, 4, 5
hipdnn_data_type = hipdnn.data_type.FLOAT
torch_data_type = torch.float32
torch_tensor_x = torch.rand(
batch, channels, height, width, dtype=torch_data_type, device="cuda"
).to(memory_format=torch.channels_last)
hipdnn_handle = hipdnn.create_handle()
graph, hipdnn_tensor_x, hipdnn_tensor_y = build_transpose_graph(
hipdnn_handle, torch_tensor_x, hipdnn_data_type
)
torch_tensor_y = torch.empty(hipdnn_tensor_y.get_dim(), dtype=torch_data_type, device="cuda")
variant_pack = {
hipdnn_tensor_x: torch_tensor_x.data_ptr(),
hipdnn_tensor_y: torch_tensor_y.data_ptr(),
}
workspace = torch.empty(graph.get_workspace_size(), dtype=torch.uint8, device="cuda")
graph.exec(variant_pack=variant_pack, workspace=workspace.data_ptr())
print("Transpose graph execution complete.")
import hipdnn
import torch
def build_transpose_graph(
hipdnn_handle, torch_tensor_x, batch, channels, height, width, hipdnn_data_type
):
# Create graph
graph = hipdnn.pygraph(
handle=hipdnn_handle,
io_data_type=hipdnn_data_type,
intermediate_data_type=hipdnn.data_type.FLOAT,
compute_data_type=hipdnn.data_type.FLOAT,
name="transpose",
)
# Create hipdnn tensors
hipdnn_tensor_x = graph.tensor_like(torch_tensor_x).set_vector_count_and_dimension(32, 1)
# Create reshape op
hipdnn_tensor_x_reshaped = graph.reshape(hipdnn_tensor_x, name="reshape")
hipdnn_tensor_x_reshaped.set_dim([batch, channels, height, width])
# Create transpose op
hipdnn_tensor_y = graph.transpose(
input=hipdnn_tensor_x_reshaped,
permutation=[0, 2, 3, 1],
name="transpose",
)
hipdnn_tensor_y.set_output(True)
graph.build(hipdnn_handle)
return (graph, hipdnn_tensor_x, hipdnn_tensor_y)
if __name__ == "__main__":
# Input dimensions
batch, channels, height, width = 2, 64, 4, 5
hipdnn_data_type = hipdnn.data_type.HALF
torch_data_type = torch.float16
torch_tensor_x = torch.rand(
batch, channels, height, width, dtype=torch_data_type, device="cuda"
).to(memory_format=torch.channels_last)
hipdnn_handle = hipdnn.create_handle()
graph, hipdnn_tensor_x, hipdnn_tensor_y = build_transpose_graph(
hipdnn_handle, torch_tensor_x, batch, channels, height, width, hipdnn_data_type
)
torch_tensor_y = torch.empty(hipdnn_tensor_y.get_dim(), dtype=torch_data_type, device="cuda")
variant_pack = {
hipdnn_tensor_x: torch_tensor_x.data_ptr(),
hipdnn_tensor_y: torch_tensor_y.data_ptr(),
}
workspace = torch.empty(graph.get_workspace_size(), dtype=torch.uint8, device="cuda")
graph.exec(variant_pack=variant_pack, workspace=workspace.data_ptr())
print("Transpose graph execution complete.")
import hipdnn
import torch
def build_transpose_graph(
hipdnn_handle, torch_tensor_x, batch, channels, height, width, hipdnn_data_type
):
# Create graph
graph = hipdnn.pygraph(
handle=hipdnn_handle,
io_data_type=hipdnn_data_type,
intermediate_data_type=hipdnn.data_type.FLOAT,
compute_data_type=hipdnn.data_type.FLOAT,
name="transpose",
)
# Create hipdnn tensors
hipdnn_tensor_x = graph.tensor_like(torch_tensor_x)
# Create reshape op
hipdnn_tensor_x_reshaped = graph.reshape(hipdnn_tensor_x, name="reshape")
hipdnn_tensor_x_reshaped.set_dim([batch, 2, 32, height, width])
# Create transpose op
hipdnn_tensor_y = graph.transpose(
input=hipdnn_tensor_x_reshaped,
permutation=[0, 1, 3, 4, 2],
name="transpose",
)
hipdnn_tensor_y.set_output(True).set_data_type(
hipdnn.data_type.HALF
).set_vector_count_and_dimension(32, 1)
graph.build(hipdnn_handle)
return (graph, hipdnn_tensor_x, hipdnn_tensor_y)
if __name__ == "__main__":
# Input dimensions
batch, channels, height, width = 2, 64, 4, 5
hipdnn_data_type = hipdnn.data_type.HALF
torch_data_type = torch.float16
torch_tensor_x = torch.rand(
batch, channels, height, width, dtype=torch_data_type, device="cuda"
).to(memory_format=torch.channels_last)
hipdnn_handle = hipdnn.create_handle()
graph, hipdnn_tensor_x, hipdnn_tensor_y = build_transpose_graph(
hipdnn_handle, torch_tensor_x, batch, channels, height, width, hipdnn_data_type
)
torch_tensor_y = torch.empty(hipdnn_tensor_y.get_dim(), dtype=torch_data_type, device="cuda")
variant_pack = {
hipdnn_tensor_x: torch_tensor_x.data_ptr(),
hipdnn_tensor_y: torch_tensor_y.data_ptr(),
}
workspace = torch.empty(graph.get_workspace_size(), dtype=torch.uint8, device="cuda")
graph.exec(variant_pack=variant_pack, workspace=workspace.data_ptr())
print("Transpose graph execution complete.")
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment