Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDTK
hipDNN Samples
Commits
ca34d4d2
Commit
ca34d4d2
authored
Jun 02, 2026
by
yanjl1
Browse files
Initial
parents
Changes
173
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
2881 additions
and
0 deletions
+2881
-0
python/concat_conv_fusion/concat_conv_bias_add.py
python/concat_conv_fusion/concat_conv_bias_add.py
+151
-0
python/concat_conv_fusion/concat_conv_bias_relu.py
python/concat_conv_fusion/concat_conv_bias_relu.py
+137
-0
python/concat_conv_fusion/concat_conv_bias_relu_add.py
python/concat_conv_fusion/concat_conv_bias_relu_add.py
+154
-0
python/concatenate/concatenate.py
python/concatenate/concatenate.py
+57
-0
python/conv_bn_fusion/conv_genstats.py
python/conv_bn_fusion/conv_genstats.py
+115
-0
python/conv_bn_fusion/mul_mul_add_add.py
python/conv_bn_fusion/mul_mul_add_add.py
+106
-0
python/conv_bn_fusion/scale_bias.py
python/conv_bn_fusion/scale_bias.py
+72
-0
python/conv_bn_fusion/scale_bias_relu_conv_genstats.py
python/conv_bn_fusion/scale_bias_relu_conv_genstats.py
+141
-0
python/conv_bn_fusion/scale_bias_relu_convwrw.py
python/conv_bn_fusion/scale_bias_relu_convwrw.py
+116
-0
python/conv_bn_fusion/sub_mul_mul_add_convbwd_relubwd_bnwrw.py
...n/conv_bn_fusion/sub_mul_mul_add_convbwd_relubwd_bnwrw.py
+223
-0
python/conv_depthtospace_fusion/conv_bias_add_dts.py
python/conv_depthtospace_fusion/conv_bias_add_dts.py
+203
-0
python/conv_depthtospace_fusion/conv_bias_dts.py
python/conv_depthtospace_fusion/conv_bias_dts.py
+176
-0
python/conv_depthtospace_fusion/conv_bias_dts_add.py
python/conv_depthtospace_fusion/conv_bias_dts_add.py
+200
-0
python/conv_depthtospace_fusion/conv_bias_dts_leakyrelu.py
python/conv_depthtospace_fusion/conv_bias_dts_leakyrelu.py
+176
-0
python/conv_depthtospace_fusion/conv_bias_dts_leakyrelu_add.py
...n/conv_depthtospace_fusion/conv_bias_dts_leakyrelu_add.py
+203
-0
python/conv_depthtospace_fusion/conv_dts.py
python/conv_depthtospace_fusion/conv_dts.py
+158
-0
python/conv_fusion/conv_bias.py
python/conv_fusion/conv_bias.py
+109
-0
python/conv_fusion/conv_bias_add.py
python/conv_fusion/conv_bias_add.py
+131
-0
python/conv_fusion/conv_bias_add_relu.py
python/conv_fusion/conv_bias_add_relu.py
+134
-0
python/conv_fusion/conv_bias_prelu.py
python/conv_fusion/conv_bias_prelu.py
+119
-0
No files found.
python/concat_conv_fusion/concat_conv_bias_add.py
0 → 100644
View file @
ca34d4d2
import
hipdnn
import
torch
def
build_concat_conv_bias_add_graph
(
hipdnn_handle
,
torch_tensor_x1
,
torch_tensor_x2
,
torch_tensor_w
,
torch_tensor_bias
,
torch_tensor_add
,
padding
,
stride
,
dilation
,
hipdnn_data_type
,
concat_axis
,
):
# Create graph
graph
=
hipdnn
.
pygraph
(
handle
=
hipdnn_handle
,
io_data_type
=
hipdnn_data_type
,
intermediate_data_type
=
hipdnn
.
data_type
.
FLOAT
,
compute_data_type
=
hipdnn
.
data_type
.
FLOAT
,
name
=
"concat_conv_bias_add"
,
)
# Create hipdnn tensors
hipdnn_tensor_x1
=
graph
.
tensor_like
(
torch_tensor_x1
)
hipdnn_tensor_x2
=
graph
.
tensor_like
(
torch_tensor_x2
)
hipdnn_tensor_w
=
graph
.
tensor_like
(
torch_tensor_w
)
hipdnn_tensor_bias
=
graph
.
tensor_like
(
torch_tensor_bias
)
hipdnn_tensor_add
=
graph
.
tensor_like
(
torch_tensor_add
)
# Create concatenate op
hipdnn_tensor_concat_output
=
graph
.
concatenate
(
x
=
[
hipdnn_tensor_x1
,
hipdnn_tensor_x2
],
axis
=
concat_axis
,
name
=
"concatenate"
)
# Create conv op
hipdnn_tensor_conv_output
=
graph
.
conv_fprop
(
image
=
hipdnn_tensor_concat_output
,
weight
=
hipdnn_tensor_w
,
padding
=
padding
,
stride
=
stride
,
dilation
=
dilation
,
name
=
"conv2d"
,
)
# Create bias
hipdnn_tensor_bias_output
=
graph
.
add
(
a
=
hipdnn_tensor_conv_output
,
b
=
hipdnn_tensor_bias
,
name
=
"bias"
)
# Create add
hipdnn_tensor_y
=
graph
.
add
(
a
=
hipdnn_tensor_bias_output
,
b
=
hipdnn_tensor_add
,
name
=
"add"
)
hipdnn_tensor_y
.
set_output
(
True
)
graph
.
build
(
hipdnn_handle
)
return
(
graph
,
hipdnn_tensor_x1
,
hipdnn_tensor_x2
,
hipdnn_tensor_w
,
hipdnn_tensor_bias
,
hipdnn_tensor_add
,
hipdnn_tensor_y
,
)
if
__name__
==
"__main__"
:
# Input dimensions
n
=
1
c
=
32
h
=
128
w
=
128
# Filter dimensions
k
=
32
r
=
3
s
=
3
# Convolution parameters
stride_h
=
1
# Height stride
stride_w
=
1
# Width stride
pad_h
=
1
# Height padding
pad_w
=
1
# Width padding
dil_h
=
1
# Height dilation
dil_w
=
1
# Width dilation
hipdnn_data_type
=
hipdnn
.
data_type
.
HALF
torch_data_type
=
torch
.
float16
concat_axis
=
1
torch_tensor_x1
=
torch
.
rand
(
n
,
c
,
h
,
w
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
torch_tensor_x2
=
torch
.
rand
(
n
,
c
,
h
,
w
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
torch_tensor_w
=
torch
.
rand
(
k
,
2
*
c
,
r
,
s
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
torch_tensor_bias
=
torch
.
rand
(
1
,
k
,
1
,
1
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
torch_tensor_add
=
torch
.
rand
(
n
,
k
,
h
,
w
,
dtype
=
torch_data_type
,
device
=
"cuda"
,
).
to
(
memory_format
=
torch
.
channels_last
)
hipdnn_handle
=
hipdnn
.
create_handle
()
(
graph
,
hipdnn_tensor_x1
,
hipdnn_tensor_x2
,
hipdnn_tensor_w
,
hipdnn_tensor_bias
,
hipdnn_tensor_add
,
hipdnn_tensor_y
,
)
=
build_concat_conv_bias_add_graph
(
hipdnn_handle
,
torch_tensor_x1
,
torch_tensor_x2
,
torch_tensor_w
,
torch_tensor_bias
,
torch_tensor_add
,
[
pad_h
,
pad_w
],
[
stride_h
,
stride_w
],
[
dil_h
,
dil_w
],
hipdnn_data_type
,
concat_axis
,
)
torch_tensor_y
=
torch
.
empty
(
hipdnn_tensor_y
.
get_dim
(),
dtype
=
torch_data_type
,
device
=
"cuda"
)
variant_pack
=
{
hipdnn_tensor_x1
:
torch_tensor_x1
.
data_ptr
(),
hipdnn_tensor_x2
:
torch_tensor_x2
.
data_ptr
(),
hipdnn_tensor_w
:
torch_tensor_w
.
data_ptr
(),
hipdnn_tensor_bias
:
torch_tensor_bias
.
data_ptr
(),
hipdnn_tensor_add
:
torch_tensor_add
.
data_ptr
(),
hipdnn_tensor_y
:
torch_tensor_y
.
data_ptr
(),
}
workspace
=
torch
.
empty
(
graph
.
get_workspace_size
(),
dtype
=
torch
.
uint8
,
device
=
"cuda"
)
graph
.
exec
(
variant_pack
=
variant_pack
,
workspace
=
workspace
.
data_ptr
())
print
(
"Concat_conv_bias_add graph execution complete."
)
python/concat_conv_fusion/concat_conv_bias_relu.py
0 → 100644
View file @
ca34d4d2
import
hipdnn
import
torch
def
build_concat_conv_bias_relu_graph
(
hipdnn_handle
,
torch_tensor_x1
,
torch_tensor_x2
,
torch_tensor_w
,
torch_tensor_bias
,
padding
,
stride
,
dilation
,
hipdnn_data_type
,
concat_axis
,
):
# Create graph
graph
=
hipdnn
.
pygraph
(
handle
=
hipdnn_handle
,
io_data_type
=
hipdnn_data_type
,
intermediate_data_type
=
hipdnn
.
data_type
.
FLOAT
,
compute_data_type
=
hipdnn
.
data_type
.
FLOAT
,
name
=
"concat_conv_bias_relu"
,
)
# Create hipdnn tensors
hipdnn_tensor_x1
=
graph
.
tensor_like
(
torch_tensor_x1
)
hipdnn_tensor_x2
=
graph
.
tensor_like
(
torch_tensor_x2
)
hipdnn_tensor_w
=
graph
.
tensor_like
(
torch_tensor_w
)
hipdnn_tensor_bias
=
graph
.
tensor_like
(
torch_tensor_bias
)
# Create concatenate op
hipdnn_tensor_concat_output
=
graph
.
concatenate
(
x
=
[
hipdnn_tensor_x1
,
hipdnn_tensor_x2
],
axis
=
concat_axis
,
name
=
"concatenate"
)
# Create conv op
hipdnn_tensor_conv_output
=
graph
.
conv_fprop
(
image
=
hipdnn_tensor_concat_output
,
weight
=
hipdnn_tensor_w
,
padding
=
padding
,
stride
=
stride
,
dilation
=
dilation
,
name
=
"conv2d"
,
)
# Create bias
hipdnn_tensor_bias_output
=
graph
.
add
(
a
=
hipdnn_tensor_conv_output
,
b
=
hipdnn_tensor_bias
,
name
=
"bias"
)
# Create relu
hipdnn_tensor_y
=
graph
.
relu
(
input
=
hipdnn_tensor_bias_output
,
name
=
"relu"
)
hipdnn_tensor_y
.
set_output
(
True
)
graph
.
build
(
hipdnn_handle
)
return
(
graph
,
hipdnn_tensor_x1
,
hipdnn_tensor_x2
,
hipdnn_tensor_w
,
hipdnn_tensor_bias
,
hipdnn_tensor_y
,
)
if
__name__
==
"__main__"
:
# Input dimensions
n
=
1
c
=
32
h
=
128
w
=
128
# Filter dimensions
k
=
32
r
=
2
s
=
2
# Convolution parameters
stride_h
=
1
# Height stride
stride_w
=
1
# Width stride
pad_h
=
1
# Height padding
pad_w
=
1
# Width padding
dil_h
=
1
# Height dilation
dil_w
=
1
# Width dilation
hipdnn_data_type
=
hipdnn
.
data_type
.
HALF
torch_data_type
=
torch
.
float16
concat_axis
=
1
torch_tensor_x1
=
torch
.
rand
(
n
,
c
,
h
,
w
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
torch_tensor_x2
=
torch
.
rand
(
n
,
c
,
h
,
w
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
torch_tensor_w
=
torch
.
rand
(
k
,
2
*
c
,
r
,
s
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
torch_tensor_bias
=
torch
.
rand
(
1
,
k
,
1
,
1
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
hipdnn_handle
=
hipdnn
.
create_handle
()
(
graph
,
hipdnn_tensor_x1
,
hipdnn_tensor_x2
,
hipdnn_tensor_w
,
hipdnn_tensor_bias
,
hipdnn_tensor_y
,
)
=
build_concat_conv_bias_relu_graph
(
hipdnn_handle
,
torch_tensor_x1
,
torch_tensor_x2
,
torch_tensor_w
,
torch_tensor_bias
,
[
pad_h
,
pad_w
],
[
stride_h
,
stride_w
],
[
dil_h
,
dil_w
],
hipdnn_data_type
,
concat_axis
,
)
torch_tensor_y
=
torch
.
empty
(
hipdnn_tensor_y
.
get_dim
(),
dtype
=
torch_data_type
,
device
=
"cuda"
)
variant_pack
=
{
hipdnn_tensor_x1
:
torch_tensor_x1
.
data_ptr
(),
hipdnn_tensor_x2
:
torch_tensor_x2
.
data_ptr
(),
hipdnn_tensor_w
:
torch_tensor_w
.
data_ptr
(),
hipdnn_tensor_bias
:
torch_tensor_bias
.
data_ptr
(),
hipdnn_tensor_y
:
torch_tensor_y
.
data_ptr
(),
}
workspace
=
torch
.
empty
(
graph
.
get_workspace_size
(),
dtype
=
torch
.
uint8
,
device
=
"cuda"
)
graph
.
exec
(
variant_pack
=
variant_pack
,
workspace
=
workspace
.
data_ptr
())
print
(
"Concat_conv_bias_relu graph execution complete."
)
python/concat_conv_fusion/concat_conv_bias_relu_add.py
0 → 100644
View file @
ca34d4d2
import
hipdnn
import
torch
def
build_concat_conv_bias_relu_add_graph
(
hipdnn_handle
,
torch_tensor_x1
,
torch_tensor_x2
,
torch_tensor_w
,
torch_tensor_bias
,
torch_tensor_add
,
padding
,
stride
,
dilation
,
hipdnn_data_type
,
concat_axis
,
):
# Create graph
graph
=
hipdnn
.
pygraph
(
handle
=
hipdnn_handle
,
io_data_type
=
hipdnn_data_type
,
intermediate_data_type
=
hipdnn
.
data_type
.
FLOAT
,
compute_data_type
=
hipdnn
.
data_type
.
FLOAT
,
name
=
"concat_conv_bias_relu_add"
,
)
# Create hipdnn tensors
hipdnn_tensor_x1
=
graph
.
tensor_like
(
torch_tensor_x1
)
hipdnn_tensor_x2
=
graph
.
tensor_like
(
torch_tensor_x2
)
hipdnn_tensor_w
=
graph
.
tensor_like
(
torch_tensor_w
)
hipdnn_tensor_bias
=
graph
.
tensor_like
(
torch_tensor_bias
)
hipdnn_tensor_add
=
graph
.
tensor_like
(
torch_tensor_add
)
# Create concatenate op
hipdnn_tensor_concat_output
=
graph
.
concatenate
(
x
=
[
hipdnn_tensor_x1
,
hipdnn_tensor_x2
],
axis
=
concat_axis
,
name
=
"concatenate"
)
# Create conv op
hipdnn_tensor_conv_output
=
graph
.
conv_fprop
(
image
=
hipdnn_tensor_concat_output
,
weight
=
hipdnn_tensor_w
,
padding
=
padding
,
stride
=
stride
,
dilation
=
dilation
,
name
=
"conv2d"
,
)
# Create bias
hipdnn_tensor_bias_output
=
graph
.
add
(
a
=
hipdnn_tensor_conv_output
,
b
=
hipdnn_tensor_bias
,
name
=
"bias"
)
# Create relu
hipdnn_tensor_relu_output
=
graph
.
relu
(
input
=
hipdnn_tensor_bias_output
,
name
=
"relu"
)
# Create add
hipdnn_tensor_y
=
graph
.
add
(
a
=
hipdnn_tensor_relu_output
,
b
=
hipdnn_tensor_add
,
name
=
"add"
)
hipdnn_tensor_y
.
set_output
(
True
)
graph
.
build
(
hipdnn_handle
)
return
(
graph
,
hipdnn_tensor_x1
,
hipdnn_tensor_x2
,
hipdnn_tensor_w
,
hipdnn_tensor_bias
,
hipdnn_tensor_add
,
hipdnn_tensor_y
,
)
if
__name__
==
"__main__"
:
# Input dimensions
n
=
1
c
=
32
h
=
128
w
=
128
# Filter dimensions
k
=
32
r
=
3
s
=
3
# Convolution parameters
stride_h
=
1
# Height stride
stride_w
=
1
# Width stride
pad_h
=
1
# Height padding
pad_w
=
1
# Width padding
dil_h
=
1
# Height dilation
dil_w
=
1
# Width dilation
hipdnn_data_type
=
hipdnn
.
data_type
.
HALF
torch_data_type
=
torch
.
float16
concat_axis
=
1
torch_tensor_x1
=
torch
.
rand
(
n
,
c
,
h
,
w
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
torch_tensor_x2
=
torch
.
rand
(
n
,
c
,
h
,
w
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
torch_tensor_w
=
torch
.
rand
(
k
,
2
*
c
,
r
,
s
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
torch_tensor_bias
=
torch
.
rand
(
1
,
k
,
1
,
1
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
torch_tensor_add
=
torch
.
rand
(
n
,
k
,
h
,
w
,
dtype
=
torch_data_type
,
device
=
"cuda"
,
).
to
(
memory_format
=
torch
.
channels_last
)
hipdnn_handle
=
hipdnn
.
create_handle
()
(
graph
,
hipdnn_tensor_x1
,
hipdnn_tensor_x2
,
hipdnn_tensor_w
,
hipdnn_tensor_bias
,
hipdnn_tensor_add
,
hipdnn_tensor_y
,
)
=
build_concat_conv_bias_relu_add_graph
(
hipdnn_handle
,
torch_tensor_x1
,
torch_tensor_x2
,
torch_tensor_w
,
torch_tensor_bias
,
torch_tensor_add
,
[
pad_h
,
pad_w
],
[
stride_h
,
stride_w
],
[
dil_h
,
dil_w
],
hipdnn_data_type
,
concat_axis
,
)
torch_tensor_y
=
torch
.
empty
(
hipdnn_tensor_y
.
get_dim
(),
dtype
=
torch_data_type
,
device
=
"cuda"
)
variant_pack
=
{
hipdnn_tensor_x1
:
torch_tensor_x1
.
data_ptr
(),
hipdnn_tensor_x2
:
torch_tensor_x2
.
data_ptr
(),
hipdnn_tensor_w
:
torch_tensor_w
.
data_ptr
(),
hipdnn_tensor_bias
:
torch_tensor_bias
.
data_ptr
(),
hipdnn_tensor_add
:
torch_tensor_add
.
data_ptr
(),
hipdnn_tensor_y
:
torch_tensor_y
.
data_ptr
(),
}
workspace
=
torch
.
empty
(
graph
.
get_workspace_size
(),
dtype
=
torch
.
uint8
,
device
=
"cuda"
)
graph
.
exec
(
variant_pack
=
variant_pack
,
workspace
=
workspace
.
data_ptr
())
print
(
"Concat_conv_bias_relu_add graph execution complete."
)
python/concatenate/concatenate.py
0 → 100644
View file @
ca34d4d2
import
hipdnn
import
torch
def
build_concatenate_graph
(
hipdnn_handle
,
torch_tensor_x1
,
torch_tensor_x2
,
hipdnn_data_type
):
# Create graph
graph
=
hipdnn
.
pygraph
(
handle
=
hipdnn_handle
,
io_data_type
=
hipdnn_data_type
,
intermediate_data_type
=
hipdnn
.
data_type
.
FLOAT
,
compute_data_type
=
hipdnn
.
data_type
.
FLOAT
,
name
=
"concatenate"
,
)
# Create hipdnn tensors
hipdnn_tensor_x1
=
graph
.
tensor_like
(
torch_tensor_x1
)
hipdnn_tensor_x2
=
graph
.
tensor_like
(
torch_tensor_x2
)
# Create concatenate op
hipdnn_tensor_y
=
graph
.
concatenate
(
x
=
[
hipdnn_tensor_x1
,
hipdnn_tensor_x2
],
axis
=
0
,
name
=
"concatenate"
)
hipdnn_tensor_y
.
set_output
(
True
)
graph
.
build
(
hipdnn_handle
)
return
(
graph
,
hipdnn_tensor_x1
,
hipdnn_tensor_x2
,
hipdnn_tensor_y
)
if
__name__
==
"__main__"
:
# Input dimensions
batch
,
seq_len
,
embedding_dim
=
2
,
1024
,
768
hipdnn_data_type
=
hipdnn
.
data_type
.
FLOAT
torch_data_type
=
torch
.
float32
torch_tensor_x1
=
torch
.
rand
(
batch
,
seq_len
,
embedding_dim
,
dtype
=
torch_data_type
,
device
=
"cuda"
)
torch_tensor_x2
=
torch
.
rand
(
batch
,
seq_len
,
embedding_dim
,
dtype
=
torch_data_type
,
device
=
"cuda"
)
hipdnn_handle
=
hipdnn
.
create_handle
()
graph
,
hipdnn_tensor_x1
,
hipdnn_tensor_x2
,
hipdnn_tensor_y
=
build_concatenate_graph
(
hipdnn_handle
,
torch_tensor_x1
,
torch_tensor_x2
,
hipdnn_data_type
)
torch_tensor_y
=
torch
.
empty
(
hipdnn_tensor_y
.
get_dim
(),
dtype
=
torch_data_type
,
device
=
"cuda"
)
variant_pack
=
{
hipdnn_tensor_x1
:
torch_tensor_x1
.
data_ptr
(),
hipdnn_tensor_x2
:
torch_tensor_x2
.
data_ptr
(),
hipdnn_tensor_y
:
torch_tensor_y
.
data_ptr
(),
}
workspace
=
torch
.
empty
(
graph
.
get_workspace_size
(),
dtype
=
torch
.
uint8
,
device
=
"cuda"
)
graph
.
exec
(
variant_pack
=
variant_pack
,
workspace
=
workspace
.
data_ptr
())
print
(
"Concatenate graph execution complete."
)
python/conv_bn_fusion/conv_genstats.py
0 → 100644
View file @
ca34d4d2
import
hipdnn
import
torch
def
build_conv_genstats_graph
(
hipdnn_handle
,
torch_tensor_x
,
torch_tensor_w
,
padding
,
stride
,
dilation
,
hipdnn_data_type
):
graph
=
hipdnn
.
pygraph
(
handle
=
hipdnn_handle
,
io_data_type
=
hipdnn_data_type
,
intermediate_data_type
=
hipdnn
.
data_type
.
FLOAT
,
compute_data_type
=
hipdnn
.
data_type
.
FLOAT
,
name
=
"conv_genstats"
,
)
hipdnn_tensor_x
=
graph
.
tensor_like
(
torch_tensor_x
)
hipdnn_tensor_w
=
graph
.
tensor_like
(
torch_tensor_w
)
hipdnn_tensor_y
=
graph
.
conv_fprop
(
image
=
hipdnn_tensor_x
,
weight
=
hipdnn_tensor_w
,
padding
=
padding
,
stride
=
stride
,
dilation
=
dilation
,
name
=
"conv"
,
)
hipdnn_tensor_y
.
set_output
(
True
)
hipdnn_tensor_sum
,
hipdnn_tensor_sq_sum
=
graph
.
genstats
(
hipdnn_tensor_y
,
hipdnn
.
data_type
.
FLOAT
,
name
=
"genstats"
)
hipdnn_tensor_sum
.
set_output
(
True
)
hipdnn_tensor_sq_sum
.
set_output
(
True
)
graph
.
build
(
hipdnn_handle
)
return
(
graph
,
hipdnn_tensor_x
,
hipdnn_tensor_w
,
hipdnn_tensor_y
,
hipdnn_tensor_sum
,
hipdnn_tensor_sq_sum
,
)
if
__name__
==
"__main__"
:
n
=
4
c
=
64
h
=
16
w
=
16
k
=
32
r
=
3
s
=
3
stride_h
=
1
stride_w
=
1
pad_h
=
1
pad_w
=
1
dil_h
=
1
dil_w
=
1
hipdnn_data_type
=
hipdnn
.
data_type
.
FLOAT
torch_data_type
=
torch
.
float32
torch_tensor_x
=
torch
.
rand
(
n
,
c
,
h
,
w
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
torch_tensor_w
=
torch
.
rand
(
k
,
c
,
r
,
s
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
hipdnn_handle
=
hipdnn
.
create_handle
()
(
graph
,
hipdnn_tensor_x
,
hipdnn_tensor_w
,
hipdnn_tensor_y
,
hipdnn_tensor_sum
,
hipdnn_tensor_sq_sum
,
)
=
build_conv_genstats_graph
(
hipdnn_handle
,
torch_tensor_x
,
torch_tensor_w
,
[
pad_h
,
pad_w
],
[
stride_h
,
stride_w
],
[
dil_h
,
dil_w
],
hipdnn_data_type
,
)
torch_tensor_y
=
torch
.
empty
(
hipdnn_tensor_y
.
get_dim
(),
dtype
=
torch_data_type
,
memory_format
=
torch
.
channels_last
,
device
=
"cuda"
,
)
torch_tensor_sum
=
torch
.
empty
(
hipdnn_tensor_sum
.
get_dim
(),
dtype
=
torch_data_type
,
device
=
"cuda"
)
torch_tensor_sq_sum
=
torch
.
empty
(
hipdnn_tensor_sq_sum
.
get_dim
(),
dtype
=
torch_data_type
,
device
=
"cuda"
)
variant_pack
=
{
hipdnn_tensor_x
:
torch_tensor_x
.
data_ptr
(),
hipdnn_tensor_w
:
torch_tensor_w
.
data_ptr
(),
hipdnn_tensor_y
:
torch_tensor_y
.
data_ptr
(),
hipdnn_tensor_sum
:
torch_tensor_sum
.
data_ptr
(),
hipdnn_tensor_sq_sum
:
torch_tensor_sq_sum
.
data_ptr
(),
}
workspace
=
torch
.
empty
(
graph
.
get_workspace_size
(),
dtype
=
torch
.
uint8
,
device
=
"cuda"
)
graph
.
exec
(
variant_pack
=
variant_pack
,
workspace
=
workspace
.
data_ptr
())
print
(
"conv_genstats graph execution complete."
)
python/conv_bn_fusion/mul_mul_add_add.py
0 → 100644
View file @
ca34d4d2
import
hipdnn
import
torch
def
build_mul_mul_add_add_graph
(
hipdnn_handle
,
torch_tensor_a
,
torch_tensor_x
,
torch_tensor_b
,
torch_tensor_y
,
torch_tensor_bias
,
hipdnn_data_type
,
):
graph
=
hipdnn
.
pygraph
(
handle
=
hipdnn_handle
,
io_data_type
=
hipdnn_data_type
,
intermediate_data_type
=
hipdnn
.
data_type
.
FLOAT
,
compute_data_type
=
hipdnn
.
data_type
.
FLOAT
,
name
=
"mul_mul_add_add"
,
)
hipdnn_tensor_a
=
graph
.
tensor_like
(
torch_tensor_a
)
hipdnn_tensor_x
=
graph
.
tensor_like
(
torch_tensor_x
)
hipdnn_tensor_b
=
graph
.
tensor_like
(
torch_tensor_b
)
hipdnn_tensor_y
=
graph
.
tensor_like
(
torch_tensor_y
)
hipdnn_tensor_bias
=
graph
.
tensor_like
(
torch_tensor_bias
)
hipdnn_tensor_mul0
=
graph
.
mul
(
a
=
hipdnn_tensor_x
,
b
=
hipdnn_tensor_a
,
name
=
"mul0"
)
hipdnn_tensor_mul1
=
graph
.
mul
(
a
=
hipdnn_tensor_y
,
b
=
hipdnn_tensor_b
,
name
=
"mul1"
)
hipdnn_tensor_add0
=
graph
.
add
(
a
=
hipdnn_tensor_mul0
,
b
=
hipdnn_tensor_mul1
,
name
=
"add0"
)
hipdnn_tensor_z
=
graph
.
add
(
a
=
hipdnn_tensor_add0
,
b
=
hipdnn_tensor_bias
,
name
=
"add1"
)
hipdnn_tensor_z
.
set_output
(
True
)
graph
.
build
(
hipdnn_handle
)
return
(
graph
,
hipdnn_tensor_a
,
hipdnn_tensor_x
,
hipdnn_tensor_b
,
hipdnn_tensor_y
,
hipdnn_tensor_bias
,
hipdnn_tensor_z
,
)
if
__name__
==
"__main__"
:
n
=
1
c
=
4
h
=
32
w
=
32
hipdnn_data_type
=
hipdnn
.
data_type
.
FLOAT
torch_data_type
=
torch
.
float32
torch_tensor_a
=
torch
.
rand
(
1
,
c
,
1
,
1
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
torch_tensor_x
=
torch
.
rand
(
n
,
c
,
h
,
w
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
torch_tensor_b
=
torch
.
rand
(
1
,
c
,
1
,
1
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
torch_tensor_y
=
torch
.
rand
(
n
,
c
,
h
,
w
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
torch_tensor_bias
=
torch
.
rand
(
1
,
c
,
1
,
1
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
hipdnn_handle
=
hipdnn
.
create_handle
()
(
graph
,
hipdnn_tensor_a
,
hipdnn_tensor_x
,
hipdnn_tensor_b
,
hipdnn_tensor_y
,
hipdnn_tensor_bias
,
hipdnn_tensor_z
,
)
=
build_mul_mul_add_add_graph
(
hipdnn_handle
,
torch_tensor_a
,
torch_tensor_x
,
torch_tensor_b
,
torch_tensor_y
,
torch_tensor_bias
,
hipdnn_data_type
,
)
torch_tensor_z
=
torch
.
empty
(
hipdnn_tensor_z
.
get_dim
(),
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
variant_pack
=
{
hipdnn_tensor_a
:
torch_tensor_a
.
data_ptr
(),
hipdnn_tensor_x
:
torch_tensor_x
.
data_ptr
(),
hipdnn_tensor_b
:
torch_tensor_b
.
data_ptr
(),
hipdnn_tensor_y
:
torch_tensor_y
.
data_ptr
(),
hipdnn_tensor_bias
:
torch_tensor_bias
.
data_ptr
(),
hipdnn_tensor_z
:
torch_tensor_z
.
data_ptr
(),
}
workspace
=
torch
.
empty
(
graph
.
get_workspace_size
(),
dtype
=
torch
.
uint8
,
device
=
"cuda"
)
graph
.
exec
(
variant_pack
=
variant_pack
,
workspace
=
workspace
.
data_ptr
())
print
(
"mul_mul_add_add graph execution complete."
)
python/conv_bn_fusion/scale_bias.py
0 → 100644
View file @
ca34d4d2
import
hipdnn
import
torch
def
build_scale_bias_graph
(
hipdnn_handle
,
torch_tensor_x
,
torch_tensor_scale
,
torch_tensor_bias
,
hipdnn_data_type
):
graph
=
hipdnn
.
pygraph
(
handle
=
hipdnn_handle
,
io_data_type
=
hipdnn_data_type
,
intermediate_data_type
=
hipdnn
.
data_type
.
FLOAT
,
compute_data_type
=
hipdnn
.
data_type
.
FLOAT
,
name
=
"scale_bias"
,
)
hipdnn_tensor_x
=
graph
.
tensor_like
(
torch_tensor_x
)
hipdnn_tensor_scale
=
graph
.
tensor_like
(
torch_tensor_scale
)
hipdnn_tensor_bias
=
graph
.
tensor_like
(
torch_tensor_bias
)
hipdnn_tensor_scale_out
=
graph
.
mul
(
a
=
hipdnn_tensor_x
,
b
=
hipdnn_tensor_scale
,
name
=
"scale"
)
hipdnn_tensor_y
=
graph
.
add
(
a
=
hipdnn_tensor_scale_out
,
b
=
hipdnn_tensor_bias
,
name
=
"bias"
)
hipdnn_tensor_y
.
set_output
(
True
)
graph
.
build
(
hipdnn_handle
)
return
(
graph
,
hipdnn_tensor_x
,
hipdnn_tensor_scale
,
hipdnn_tensor_bias
,
hipdnn_tensor_y
)
if
__name__
==
"__main__"
:
n
=
1
c
=
4
h
=
32
w
=
32
hipdnn_data_type
=
hipdnn
.
data_type
.
FLOAT
torch_data_type
=
torch
.
float32
torch_tensor_x
=
torch
.
rand
(
n
,
c
,
h
,
w
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
torch_tensor_scale
=
torch
.
rand
(
1
,
c
,
1
,
1
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
torch_tensor_bias
=
torch
.
rand
(
1
,
c
,
1
,
1
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
hipdnn_handle
=
hipdnn
.
create_handle
()
graph
,
hipdnn_tensor_x
,
hipdnn_tensor_scale
,
hipdnn_tensor_bias
,
hipdnn_tensor_y
=
(
build_scale_bias_graph
(
hipdnn_handle
,
torch_tensor_x
,
torch_tensor_scale
,
torch_tensor_bias
,
hipdnn_data_type
,
)
)
torch_tensor_y
=
torch
.
empty
(
hipdnn_tensor_y
.
get_dim
(),
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
variant_pack
=
{
hipdnn_tensor_x
:
torch_tensor_x
.
data_ptr
(),
hipdnn_tensor_scale
:
torch_tensor_scale
.
data_ptr
(),
hipdnn_tensor_bias
:
torch_tensor_bias
.
data_ptr
(),
hipdnn_tensor_y
:
torch_tensor_y
.
data_ptr
(),
}
workspace
=
torch
.
empty
(
graph
.
get_workspace_size
(),
dtype
=
torch
.
uint8
,
device
=
"cuda"
)
graph
.
exec
(
variant_pack
=
variant_pack
,
workspace
=
workspace
.
data_ptr
())
print
(
"scale_bias graph execution complete."
)
python/conv_bn_fusion/scale_bias_relu_conv_genstats.py
0 → 100644
View file @
ca34d4d2
import
hipdnn
import
torch
def
build_scale_bias_relu_conv_genstats_graph
(
hipdnn_handle
,
torch_tensor_x
,
torch_tensor_w
,
torch_tensor_scale
,
torch_tensor_bias
,
padding
,
stride
,
dilation
,
hipdnn_data_type
,
):
graph
=
hipdnn
.
pygraph
(
handle
=
hipdnn_handle
,
io_data_type
=
hipdnn_data_type
,
intermediate_data_type
=
hipdnn
.
data_type
.
FLOAT
,
compute_data_type
=
hipdnn
.
data_type
.
FLOAT
,
name
=
"scale_bias_relu_conv_genstats"
,
)
hipdnn_tensor_x
=
graph
.
tensor_like
(
torch_tensor_x
)
hipdnn_tensor_w
=
graph
.
tensor_like
(
torch_tensor_w
)
hipdnn_tensor_scale
=
graph
.
tensor_like
(
torch_tensor_scale
)
hipdnn_tensor_bias
=
graph
.
tensor_like
(
torch_tensor_bias
)
hipdnn_tensor_scale_out
=
graph
.
mul
(
a
=
hipdnn_tensor_x
,
b
=
hipdnn_tensor_scale
,
name
=
"scale"
)
hipdnn_tensor_bias_out
=
graph
.
add
(
a
=
hipdnn_tensor_scale_out
,
b
=
hipdnn_tensor_bias
,
name
=
"bias"
)
hipdnn_tensor_relu_out
=
graph
.
relu
(
input
=
hipdnn_tensor_bias_out
,
name
=
"relu"
)
hipdnn_tensor_conv_out
=
graph
.
conv_fprop
(
image
=
hipdnn_tensor_relu_out
,
weight
=
hipdnn_tensor_w
,
padding
=
padding
,
stride
=
stride
,
dilation
=
dilation
,
name
=
"conv"
,
)
hipdnn_tensor_conv_out
.
set_output
(
True
)
hipdnn_tensor_sum_out
,
hipdnn_tensor_sq_sum_out
=
graph
.
genstats
(
hipdnn_tensor_conv_out
,
hipdnn
.
data_type
.
FLOAT
,
name
=
"genstats"
)
hipdnn_tensor_sum_out
.
set_output
(
True
)
hipdnn_tensor_sq_sum_out
.
set_output
(
True
)
graph
.
build
(
hipdnn_handle
)
return
(
graph
,
hipdnn_tensor_x
,
hipdnn_tensor_w
,
hipdnn_tensor_scale
,
hipdnn_tensor_bias
,
hipdnn_tensor_conv_out
,
hipdnn_tensor_sum_out
,
hipdnn_tensor_sq_sum_out
,
)
if
__name__
==
"__main__"
:
n
=
4
c
=
64
h
=
16
w
=
16
k
=
32
r
=
3
s
=
3
stride_h
=
1
stride_w
=
1
pad_h
=
1
pad_w
=
1
dil_h
=
1
dil_w
=
1
hipdnn_data_type
=
hipdnn
.
data_type
.
FLOAT
torch_data_type
=
torch
.
float32
torch_tensor_x
=
torch
.
rand
(
n
,
c
,
h
,
w
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
torch_tensor_w
=
torch
.
rand
(
k
,
c
,
r
,
s
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
torch_tensor_scale
=
torch
.
rand
(
1
,
c
,
1
,
1
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
torch_tensor_bias
=
torch
.
rand
(
1
,
c
,
1
,
1
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
hipdnn_handle
=
hipdnn
.
create_handle
()
(
graph
,
hipdnn_tensor_x
,
hipdnn_tensor_w
,
hipdnn_tensor_scale
,
hipdnn_tensor_bias
,
hipdnn_tensor_conv_out
,
hipdnn_tensor_sum_out
,
hipdnn_tensor_sq_sum_out
,
)
=
build_scale_bias_relu_conv_genstats_graph
(
hipdnn_handle
,
torch_tensor_x
,
torch_tensor_w
,
torch_tensor_scale
,
torch_tensor_bias
,
[
pad_h
,
pad_w
],
[
stride_h
,
stride_w
],
[
dil_h
,
dil_w
],
hipdnn_data_type
,
)
torch_tensor_conv_out
=
torch
.
empty
(
hipdnn_tensor_conv_out
.
get_dim
(),
dtype
=
torch_data_type
,
memory_format
=
torch
.
channels_last
,
device
=
"cuda"
,
)
torch_tensor_sum_out
=
torch
.
empty
(
hipdnn_tensor_sum_out
.
get_dim
(),
dtype
=
torch_data_type
,
device
=
"cuda"
)
torch_tensor_sq_sum_out
=
torch
.
empty
(
hipdnn_tensor_sq_sum_out
.
get_dim
(),
dtype
=
torch_data_type
,
device
=
"cuda"
)
variant_pack
=
{
hipdnn_tensor_x
:
torch_tensor_x
.
data_ptr
(),
hipdnn_tensor_w
:
torch_tensor_w
.
data_ptr
(),
hipdnn_tensor_scale
:
torch_tensor_scale
.
data_ptr
(),
hipdnn_tensor_bias
:
torch_tensor_bias
.
data_ptr
(),
hipdnn_tensor_conv_out
:
torch_tensor_conv_out
.
data_ptr
(),
hipdnn_tensor_sum_out
:
torch_tensor_sum_out
.
data_ptr
(),
hipdnn_tensor_sq_sum_out
:
torch_tensor_sq_sum_out
.
data_ptr
(),
}
workspace
=
torch
.
empty
(
graph
.
get_workspace_size
(),
dtype
=
torch
.
uint8
,
device
=
"cuda"
)
graph
.
exec
(
variant_pack
=
variant_pack
,
workspace
=
workspace
.
data_ptr
())
print
(
"scale_bias_relu_conv_genstats graph execution complete."
)
python/conv_bn_fusion/scale_bias_relu_convwrw.py
0 → 100644
View file @
ca34d4d2
import
hipdnn
import
torch
def
build_scale_bias_relu_convwrw_graph
(
hipdnn_handle
,
torch_tensor_x
,
torch_tensor_dy
,
torch_tensor_scale
,
torch_tensor_bias
,
padding
,
stride
,
dilation
,
hipdnn_data_type
,
):
graph
=
hipdnn
.
pygraph
(
handle
=
hipdnn_handle
,
io_data_type
=
hipdnn_data_type
,
intermediate_data_type
=
hipdnn
.
data_type
.
FLOAT
,
compute_data_type
=
hipdnn
.
data_type
.
FLOAT
,
name
=
"scale_bias_relu_convwrw"
,
)
hipdnn_tensor_x
=
graph
.
tensor_like
(
torch_tensor_x
)
hipdnn_tensor_dy
=
graph
.
tensor_like
(
torch_tensor_dy
)
hipdnn_tensor_scale
=
graph
.
tensor_like
(
torch_tensor_scale
)
hipdnn_tensor_bias
=
graph
.
tensor_like
(
torch_tensor_bias
)
hipdnn_tensor_scale_out
=
graph
.
mul
(
a
=
hipdnn_tensor_x
,
b
=
hipdnn_tensor_scale
,
name
=
"scale"
)
hipdnn_tensor_bias_out
=
graph
.
add
(
a
=
hipdnn_tensor_scale_out
,
b
=
hipdnn_tensor_bias
,
name
=
"bias"
)
hipdnn_tensor_relu_out
=
graph
.
relu
(
input
=
hipdnn_tensor_bias_out
,
name
=
"relu"
)
hipdnn_tensor_dw
=
graph
.
conv_wgrad
(
image
=
hipdnn_tensor_relu_out
,
loss
=
hipdnn_tensor_dy
,
padding
=
padding
,
stride
=
stride
,
dilation
=
dilation
,
name
=
"convwrw"
,
)
hipdnn_tensor_dw
.
set_output
(
True
)
graph
.
build
(
hipdnn_handle
)
return
(
graph
,
hipdnn_tensor_x
,
hipdnn_tensor_dy
,
hipdnn_tensor_scale
,
hipdnn_tensor_bias
,
hipdnn_tensor_dw
,
)
if
__name__
==
"__main__"
:
n
=
1
c
=
32
h
=
128
w
=
128
k
=
32
stride_h
=
1
stride_w
=
1
pad_h
=
1
pad_w
=
1
dil_h
=
1
dil_w
=
1
hipdnn_data_type
=
hipdnn
.
data_type
.
FLOAT
torch_data_type
=
torch
.
float32
torch_tensor_x
=
torch
.
rand
(
n
,
c
,
h
,
w
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
torch_tensor_dy
=
torch
.
rand
(
n
,
k
,
h
,
w
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
torch_tensor_scale
=
torch
.
rand
(
1
,
c
,
1
,
1
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
torch_tensor_bias
=
torch
.
rand
(
1
,
c
,
1
,
1
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
hipdnn_handle
=
hipdnn
.
create_handle
()
(
graph
,
hipdnn_tensor_x
,
hipdnn_tensor_dy
,
hipdnn_tensor_scale
,
hipdnn_tensor_bias
,
hipdnn_tensor_dw
,
)
=
build_scale_bias_relu_convwrw_graph
(
hipdnn_handle
,
torch_tensor_x
,
torch_tensor_dy
,
torch_tensor_scale
,
torch_tensor_bias
,
[
pad_h
,
pad_w
],
[
stride_h
,
stride_w
],
[
dil_h
,
dil_w
],
hipdnn_data_type
,
)
torch_tensor_dw
=
torch
.
empty
(
hipdnn_tensor_dw
.
get_dim
(),
dtype
=
torch_data_type
,
device
=
"cuda"
)
variant_pack
=
{
hipdnn_tensor_x
:
torch_tensor_x
.
data_ptr
(),
hipdnn_tensor_dy
:
torch_tensor_dy
.
data_ptr
(),
hipdnn_tensor_scale
:
torch_tensor_scale
.
data_ptr
(),
hipdnn_tensor_bias
:
torch_tensor_bias
.
data_ptr
(),
hipdnn_tensor_dw
:
torch_tensor_dw
.
data_ptr
(),
}
workspace
=
torch
.
empty
(
graph
.
get_workspace_size
(),
dtype
=
torch
.
uint8
,
device
=
"cuda"
)
graph
.
exec
(
variant_pack
=
variant_pack
,
workspace
=
workspace
.
data_ptr
())
print
(
"scale_bias_relu_convwrw graph execution complete."
)
python/conv_bn_fusion/sub_mul_mul_add_convbwd_relubwd_bnwrw.py
0 → 100644
View file @
ca34d4d2
import
hipdnn
import
torch
def
build_sub_mul_mul_add_convbwd_relubwd_bnwrw_graph
(
hipdnn_handle
,
torch_tensor_x_bn
,
torch_tensor_mean_bn
,
torch_tensor_inv_std_bn
,
torch_tensor_scale_bn
,
torch_tensor_bias_bn
,
torch_tensor_dy
,
torch_tensor_filter
,
padding
,
stride
,
dilation
,
hipdnn_data_type
,
):
graph
=
hipdnn
.
pygraph
(
handle
=
hipdnn_handle
,
io_data_type
=
hipdnn_data_type
,
intermediate_data_type
=
hipdnn
.
data_type
.
FLOAT
,
compute_data_type
=
hipdnn
.
data_type
.
FLOAT
,
name
=
"sub_mul_mul_add_convbwd_relubwd_bnwrw"
,
)
hipdnn_tensor_x_bn
=
graph
.
tensor_like
(
torch_tensor_x_bn
)
hipdnn_tensor_mean_bn
=
graph
.
tensor_like
(
torch_tensor_mean_bn
)
hipdnn_tensor_inv_std_bn
=
graph
.
tensor_like
(
torch_tensor_inv_std_bn
)
hipdnn_tensor_scale_bn
=
graph
.
tensor_like
(
torch_tensor_scale_bn
)
hipdnn_tensor_bias_bn
=
graph
.
tensor_like
(
torch_tensor_bias_bn
)
hipdnn_tensor_dy
=
graph
.
tensor_like
(
torch_tensor_dy
)
hipdnn_tensor_filter
=
graph
.
tensor_like
(
torch_tensor_filter
)
hipdnn_tensor_sub_out
=
graph
.
sub
(
a
=
hipdnn_tensor_x_bn
,
b
=
hipdnn_tensor_mean_bn
,
name
=
"sub"
)
hipdnn_tensor_mul_out0
=
graph
.
mul
(
a
=
hipdnn_tensor_sub_out
,
b
=
hipdnn_tensor_inv_std_bn
,
name
=
"mul0"
)
hipdnn_tensor_mul_out1
=
graph
.
mul
(
a
=
hipdnn_tensor_mul_out0
,
b
=
hipdnn_tensor_scale_bn
,
name
=
"mul1"
)
hipdnn_tensor_add_out
=
graph
.
add
(
a
=
hipdnn_tensor_mul_out1
,
b
=
hipdnn_tensor_bias_bn
,
name
=
"add"
)
hipdnn_tensor_dx
=
graph
.
conv_dgrad
(
loss
=
hipdnn_tensor_dy
,
filter
=
hipdnn_tensor_filter
,
padding
=
padding
,
stride
=
stride
,
dilation
=
dilation
,
name
=
"conv_dgrad"
,
)
hipdnn_tensor_drelu
=
graph
.
relu_backward
(
loss
=
hipdnn_tensor_dx
,
input
=
hipdnn_tensor_add_out
,
name
=
"relu_backward"
)
hipdnn_tensor_drelu
.
set_output
(
True
)
(
hipdnn_tensor_dscale
,
hipdnn_tensor_dbias
,
hipdnn_tensor_eq_scale_dy
,
hipdnn_tensor_eq_scale_x
,
hipdnn_tensor_eq_bias
,
)
=
graph
.
dbn_weight
(
dy
=
hipdnn_tensor_drelu
,
input
=
hipdnn_tensor_x_bn
,
mean
=
hipdnn_tensor_mean_bn
,
inv_variance
=
hipdnn_tensor_inv_std_bn
,
scale
=
hipdnn_tensor_scale_bn
,
name
=
"bn_backward_weight"
,
)
hipdnn_tensor_dscale
.
set_output
(
True
)
hipdnn_tensor_dbias
.
set_output
(
True
)
hipdnn_tensor_eq_scale_dy
.
set_output
(
True
)
hipdnn_tensor_eq_scale_x
.
set_output
(
True
)
hipdnn_tensor_eq_bias
.
set_output
(
True
)
graph
.
build
(
hipdnn_handle
)
return
(
graph
,
hipdnn_tensor_x_bn
,
hipdnn_tensor_mean_bn
,
hipdnn_tensor_inv_std_bn
,
hipdnn_tensor_scale_bn
,
hipdnn_tensor_bias_bn
,
hipdnn_tensor_dy
,
hipdnn_tensor_filter
,
hipdnn_tensor_drelu
,
hipdnn_tensor_dscale
,
hipdnn_tensor_dbias
,
hipdnn_tensor_eq_scale_dy
,
hipdnn_tensor_eq_scale_x
,
hipdnn_tensor_eq_bias
,
)
if
__name__
==
"__main__"
:
n
=
4
c
=
64
h
=
16
w
=
16
k
=
32
r
=
3
s
=
3
stride_h
=
1
stride_w
=
1
pad_h
=
1
pad_w
=
1
dil_h
=
1
dil_w
=
1
hipdnn_data_type
=
hipdnn
.
data_type
.
FLOAT
torch_data_type
=
torch
.
float32
torch_tensor_x_bn
=
torch
.
rand
(
n
,
c
,
h
,
w
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
torch_tensor_mean_bn
=
torch
.
rand
(
1
,
c
,
1
,
1
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
torch_tensor_inv_std_bn
=
torch
.
rand
(
1
,
c
,
1
,
1
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
torch_tensor_scale_bn
=
torch
.
rand
(
1
,
c
,
1
,
1
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
torch_tensor_bias_bn
=
torch
.
rand
(
1
,
c
,
1
,
1
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
torch_tensor_dy
=
torch
.
rand
(
n
,
k
,
h
,
w
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
torch_tensor_filter
=
torch
.
rand
(
k
,
c
,
r
,
s
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
hipdnn_handle
=
hipdnn
.
create_handle
()
(
graph
,
hipdnn_tensor_x_bn
,
hipdnn_tensor_mean_bn
,
hipdnn_tensor_inv_std_bn
,
hipdnn_tensor_scale_bn
,
hipdnn_tensor_bias_bn
,
hipdnn_tensor_dy
,
hipdnn_tensor_filter
,
hipdnn_tensor_drelu
,
hipdnn_tensor_dscale
,
hipdnn_tensor_dbias
,
hipdnn_tensor_eq_scale_dy
,
hipdnn_tensor_eq_scale_x
,
hipdnn_tensor_eq_bias
,
)
=
build_sub_mul_mul_add_convbwd_relubwd_bnwrw_graph
(
hipdnn_handle
,
torch_tensor_x_bn
,
torch_tensor_mean_bn
,
torch_tensor_inv_std_bn
,
torch_tensor_scale_bn
,
torch_tensor_bias_bn
,
torch_tensor_dy
,
torch_tensor_filter
,
[
pad_h
,
pad_w
],
[
stride_h
,
stride_w
],
[
dil_h
,
dil_w
],
hipdnn_data_type
,
)
torch_tensor_drelu
=
torch
.
empty
(
hipdnn_tensor_drelu
.
get_dim
(),
dtype
=
torch_data_type
,
memory_format
=
torch
.
channels_last
,
device
=
"cuda"
,
)
torch_tensor_dscale
=
torch
.
empty
(
hipdnn_tensor_dscale
.
get_dim
(),
dtype
=
torch_data_type
,
memory_format
=
torch
.
channels_last
,
device
=
"cuda"
,
)
torch_tensor_dbias
=
torch
.
empty
(
hipdnn_tensor_dbias
.
get_dim
(),
dtype
=
torch_data_type
,
memory_format
=
torch
.
channels_last
,
device
=
"cuda"
,
)
torch_tensor_eq_scale_dy
=
torch
.
empty
(
hipdnn_tensor_eq_scale_dy
.
get_dim
(),
dtype
=
torch_data_type
,
memory_format
=
torch
.
channels_last
,
device
=
"cuda"
,
)
torch_tensor_eq_scale_x
=
torch
.
empty
(
hipdnn_tensor_eq_scale_x
.
get_dim
(),
dtype
=
torch_data_type
,
memory_format
=
torch
.
channels_last
,
device
=
"cuda"
,
)
torch_tensor_eq_bias
=
torch
.
empty
(
hipdnn_tensor_eq_bias
.
get_dim
(),
dtype
=
torch_data_type
,
memory_format
=
torch
.
channels_last
,
device
=
"cuda"
,
)
variant_pack
=
{
hipdnn_tensor_x_bn
:
torch_tensor_x_bn
.
data_ptr
(),
hipdnn_tensor_mean_bn
:
torch_tensor_mean_bn
.
data_ptr
(),
hipdnn_tensor_inv_std_bn
:
torch_tensor_inv_std_bn
.
data_ptr
(),
hipdnn_tensor_scale_bn
:
torch_tensor_scale_bn
.
data_ptr
(),
hipdnn_tensor_bias_bn
:
torch_tensor_bias_bn
.
data_ptr
(),
hipdnn_tensor_dy
:
torch_tensor_dy
.
data_ptr
(),
hipdnn_tensor_filter
:
torch_tensor_filter
.
data_ptr
(),
hipdnn_tensor_drelu
:
torch_tensor_drelu
.
data_ptr
(),
hipdnn_tensor_dscale
:
torch_tensor_dscale
.
data_ptr
(),
hipdnn_tensor_dbias
:
torch_tensor_dbias
.
data_ptr
(),
hipdnn_tensor_eq_scale_dy
:
torch_tensor_eq_scale_dy
.
data_ptr
(),
hipdnn_tensor_eq_scale_x
:
torch_tensor_eq_scale_x
.
data_ptr
(),
hipdnn_tensor_eq_bias
:
torch_tensor_eq_bias
.
data_ptr
(),
}
workspace
=
torch
.
empty
(
graph
.
get_workspace_size
(),
dtype
=
torch
.
uint8
,
device
=
"cuda"
)
graph
.
exec
(
variant_pack
=
variant_pack
,
workspace
=
workspace
.
data_ptr
())
print
(
"sub_mul_mul_add_convbwd_relubwd_bnwrw graph execution complete."
)
python/conv_depthtospace_fusion/conv_bias_add_dts.py
0 → 100644
View file @
ca34d4d2
import
hipdnn
import
torch
def
build_conv_bias_add_dts_graph
(
hipdnn_handle
,
torch_tensor_x
,
torch_tensor_w
,
torch_tensor_bias
,
torch_tensor_add
,
padding
,
stride
,
dilation
,
hipdnn_data_type
,
depth_to_space_mode
,
block_size
,
):
# Create graph
graph
=
hipdnn
.
pygraph
(
handle
=
hipdnn_handle
,
io_data_type
=
hipdnn_data_type
,
intermediate_data_type
=
hipdnn
.
data_type
.
FLOAT
,
compute_data_type
=
hipdnn
.
data_type
.
FLOAT
,
name
=
"conv_bias_add_dts"
,
)
# Create hipdnn tensors
hipdnn_tensor_x
=
graph
.
tensor_like
(
torch_tensor_x
)
hipdnn_tensor_w
=
graph
.
tensor_like
(
torch_tensor_w
)
hipdnn_tensor_bias
=
graph
.
tensor_like
(
torch_tensor_bias
)
hipdnn_tensor_add
=
graph
.
tensor_like
(
torch_tensor_add
)
# Create conv
hipdnn_tensor_conv_output
=
graph
.
conv_fprop
(
image
=
hipdnn_tensor_x
,
weight
=
hipdnn_tensor_w
,
padding
=
padding
,
stride
=
stride
,
dilation
=
dilation
,
name
=
"conv2d"
,
)
# Create bias
hipdnn_tensor_bias_output
=
graph
.
add
(
a
=
hipdnn_tensor_conv_output
,
b
=
hipdnn_tensor_bias
,
name
=
"bias"
)
# Create add
hipdnn_tensor_add_output
=
graph
.
add
(
a
=
hipdnn_tensor_bias_output
,
b
=
hipdnn_tensor_add
,
name
=
"add"
)
n
=
torch_tensor_x
.
shape
[
0
]
H
=
torch_tensor_x
.
shape
[
2
]
W
=
torch_tensor_x
.
shape
[
3
]
k
=
torch_tensor_w
.
shape
[
0
]
r
=
torch_tensor_w
.
shape
[
2
]
s
=
torch_tensor_w
.
shape
[
3
]
outH
=
int
((
H
+
2
*
padding
[
0
]
-
(
dilation
[
0
]
*
(
r
-
1
)
+
1
))
/
stride
[
0
])
+
1
outW
=
int
((
W
+
2
*
padding
[
1
]
-
(
dilation
[
1
]
*
(
s
-
1
)
+
1
))
/
stride
[
1
])
+
1
if
depth_to_space_mode
==
"CRD"
:
first_reshape_dim
=
[
n
,
int
(
k
//
(
block_size
*
block_size
)),
block_size
,
block_size
,
outH
,
outW
,
]
permutation
=
[
0
,
1
,
4
,
2
,
5
,
3
]
else
:
first_reshape_dim
=
[
n
,
block_size
,
block_size
,
k
//
(
block_size
*
block_size
),
outH
,
outW
]
permutation
=
[
0
,
3
,
4
,
1
,
5
,
2
]
second_reshape_dim
=
[
n
,
int
(
k
//
(
block_size
*
block_size
)),
block_size
*
outH
,
block_size
*
outW
,
]
# Create first reshape
hipdnn_tensor_first_reshape_output
=
graph
.
reshape
(
input
=
hipdnn_tensor_add_output
,
name
=
"first_reshape"
)
hipdnn_tensor_first_reshape_output
.
set_dim
(
first_reshape_dim
)
# Create transpose
hipdnn_tensor_transpose_output
=
graph
.
transpose
(
input
=
hipdnn_tensor_first_reshape_output
,
permutation
=
permutation
,
name
=
"transpose"
,
)
# Create second reshape
hipdnn_tensor_second_reshape_output
=
graph
.
reshape
(
input
=
hipdnn_tensor_transpose_output
,
name
=
"second_reshape"
)
hipdnn_tensor_second_reshape_output
.
set_dim
(
second_reshape_dim
).
set_stride
(
[
k
*
outH
*
outW
,
1
,
k
//
block_size
*
outW
,
k
//
(
block_size
*
block_size
)]
)
hipdnn_tensor_second_reshape_output
.
set_output
(
True
)
graph
.
build
(
hipdnn_handle
)
return
(
graph
,
hipdnn_tensor_x
,
hipdnn_tensor_w
,
hipdnn_tensor_bias
,
hipdnn_tensor_add
,
hipdnn_tensor_second_reshape_output
,
)
if
__name__
==
"__main__"
:
# Input dimensions
n
=
1
# Batch size
c
=
128
# Number of input channels
h
=
270
# Height
w
=
480
# Width
# Filter dimensions
k
=
128
# Number of output channels
r
=
3
# Filter height
s
=
3
# Filter width
# Convolution parameters
stride_h
=
1
# Height stride
stride_w
=
1
# Width stride
pad_h
=
1
# Height padding
pad_w
=
1
# Width padding
dil_h
=
1
# Height dilation
dil_w
=
1
# Width dilation
block_size
=
2
depth_to_sacpe_mode
=
"CRD"
outH
=
int
((
h
+
2
*
pad_h
-
(
dil_h
*
(
r
-
1
)
+
1
))
/
stride_h
)
+
1
outW
=
int
((
w
+
2
*
pad_w
-
(
dil_w
*
(
s
-
1
)
+
1
))
/
stride_w
)
+
1
hipdnn_data_type
=
hipdnn
.
data_type
.
HALF
torch_data_type
=
torch
.
float16
torch_tensor_x
=
torch
.
rand
(
n
,
c
,
h
,
w
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
torch_tensor_w
=
torch
.
rand
(
k
,
c
,
r
,
s
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
torch_tensor_bias
=
torch
.
rand
(
1
,
k
,
1
,
1
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
torch_tensor_add
=
torch
.
rand
(
n
,
k
,
outH
,
outW
,
dtype
=
torch_data_type
,
device
=
"cuda"
,
).
to
(
memory_format
=
torch
.
channels_last
)
hipdnn_handle
=
hipdnn
.
create_handle
()
(
graph
,
hipdnn_tensor_x
,
hipdnn_tensor_w
,
hipdnn_tensor_bias
,
hipdnn_tensor_add
,
hipdnn_tensor_y
,
)
=
build_conv_bias_add_dts_graph
(
hipdnn_handle
,
torch_tensor_x
,
torch_tensor_w
,
torch_tensor_bias
,
torch_tensor_add
,
[
pad_h
,
pad_w
],
[
stride_h
,
stride_w
],
[
dil_h
,
dil_w
],
hipdnn_data_type
,
depth_to_sacpe_mode
,
block_size
,
)
torch_tensor_y
=
torch
.
empty
(
hipdnn_tensor_y
.
get_dim
(),
dtype
=
torch_data_type
,
memory_format
=
torch
.
channels_last
,
device
=
"cuda"
,
)
variant_pack
=
{
hipdnn_tensor_x
:
torch_tensor_x
.
data_ptr
(),
hipdnn_tensor_w
:
torch_tensor_w
.
data_ptr
(),
hipdnn_tensor_bias
:
torch_tensor_bias
.
data_ptr
(),
hipdnn_tensor_add
:
torch_tensor_add
.
data_ptr
(),
hipdnn_tensor_y
:
torch_tensor_y
.
data_ptr
(),
}
workspace
=
torch
.
empty
(
graph
.
get_workspace_size
(),
dtype
=
torch
.
uint8
,
device
=
"cuda"
)
graph
.
exec
(
variant_pack
=
variant_pack
,
workspace
=
workspace
.
data_ptr
())
print
(
"conv_bias_add_dts graph execution complete."
)
python/conv_depthtospace_fusion/conv_bias_dts.py
0 → 100644
View file @
ca34d4d2
import
hipdnn
import
torch
def
build_conv_bias_dts_graph
(
hipdnn_handle
,
torch_tensor_x
,
torch_tensor_w
,
torch_tensor_bias
,
padding
,
stride
,
dilation
,
hipdnn_data_type
,
depth_to_space_mode
,
block_size
,
):
# Create graph
graph
=
hipdnn
.
pygraph
(
handle
=
hipdnn_handle
,
io_data_type
=
hipdnn_data_type
,
intermediate_data_type
=
hipdnn
.
data_type
.
FLOAT
,
compute_data_type
=
hipdnn
.
data_type
.
FLOAT
,
name
=
"conv_bias_dts"
,
)
# Create hipdnn tensors
hipdnn_tensor_x
=
graph
.
tensor_like
(
torch_tensor_x
)
hipdnn_tensor_w
=
graph
.
tensor_like
(
torch_tensor_w
)
hipdnn_tensor_bias
=
graph
.
tensor_like
(
torch_tensor_bias
)
# Create conv
hipdnn_tensor_conv_output
=
graph
.
conv_fprop
(
image
=
hipdnn_tensor_x
,
weight
=
hipdnn_tensor_w
,
padding
=
padding
,
stride
=
stride
,
dilation
=
dilation
,
name
=
"conv2d"
,
)
# Create bias
hipdnn_tensor_bias_output
=
graph
.
add
(
a
=
hipdnn_tensor_conv_output
,
b
=
hipdnn_tensor_bias
,
name
=
"bias"
)
n
=
torch_tensor_x
.
shape
[
0
]
H
=
torch_tensor_x
.
shape
[
2
]
W
=
torch_tensor_x
.
shape
[
3
]
k
=
torch_tensor_w
.
shape
[
0
]
r
=
torch_tensor_w
.
shape
[
2
]
s
=
torch_tensor_w
.
shape
[
3
]
outH
=
int
((
H
+
2
*
padding
[
0
]
-
(
dilation
[
0
]
*
(
r
-
1
)
+
1
))
/
stride
[
0
])
+
1
outW
=
int
((
W
+
2
*
padding
[
1
]
-
(
dilation
[
1
]
*
(
s
-
1
)
+
1
))
/
stride
[
1
])
+
1
if
depth_to_space_mode
==
"CRD"
:
first_reshape_dim
=
[
n
,
int
(
k
//
(
block_size
*
block_size
)),
block_size
,
block_size
,
outH
,
outW
,
]
permutation
=
[
0
,
1
,
4
,
2
,
5
,
3
]
else
:
first_reshape_dim
=
[
n
,
block_size
,
block_size
,
k
//
(
block_size
*
block_size
),
outH
,
outW
]
permutation
=
[
0
,
3
,
4
,
1
,
5
,
2
]
second_reshape_dim
=
[
n
,
int
(
k
//
(
block_size
*
block_size
)),
block_size
*
outH
,
block_size
*
outW
,
]
# Create first reshape
hipdnn_tensor_first_reshape_output
=
graph
.
reshape
(
input
=
hipdnn_tensor_bias_output
,
name
=
"first_reshape"
)
hipdnn_tensor_first_reshape_output
.
set_dim
(
first_reshape_dim
)
# Create transpose
hipdnn_tensor_transpose_output
=
graph
.
transpose
(
input
=
hipdnn_tensor_first_reshape_output
,
permutation
=
permutation
,
name
=
"transpose"
,
)
# Create second reshape
hipdnn_tensor_second_reshape_output
=
graph
.
reshape
(
input
=
hipdnn_tensor_transpose_output
,
name
=
"second_reshape"
)
hipdnn_tensor_second_reshape_output
.
set_dim
(
second_reshape_dim
).
set_stride
(
[
k
*
outH
*
outW
,
1
,
k
//
block_size
*
outW
,
k
//
(
block_size
*
block_size
)]
).
set_output
(
True
)
graph
.
build
(
hipdnn_handle
)
return
(
graph
,
hipdnn_tensor_x
,
hipdnn_tensor_w
,
hipdnn_tensor_bias
,
hipdnn_tensor_second_reshape_output
,
)
if
__name__
==
"__main__"
:
# Input dimensions
n
=
1
# Batch size
c
=
8
# Number of input channels
h
=
128
# Height
w
=
128
# Width
# Filter dimensions
k
=
16
# Number of output channels
r
=
3
# Filter height
s
=
3
# Filter width
# Convolution parameters
stride_h
=
1
# Height stride
stride_w
=
1
# Width stride
pad_h
=
1
# Height padding
pad_w
=
1
# Width padding
dil_h
=
1
# Height dilation
dil_w
=
1
# Width dilation
block_size
=
2
depth_to_sacpe_mode
=
"DCR"
hipdnn_data_type
=
hipdnn
.
data_type
.
HALF
torch_data_type
=
torch
.
float16
torch_tensor_x
=
torch
.
rand
(
n
,
c
,
h
,
w
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
torch_tensor_w
=
torch
.
rand
(
k
,
c
,
r
,
s
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
torch_tensor_bias
=
torch
.
rand
(
1
,
k
,
1
,
1
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
hipdnn_handle
=
hipdnn
.
create_handle
()
graph
,
hipdnn_tensor_x
,
hipdnn_tensor_w
,
hipdnn_tensor_bias
,
hipdnn_tensor_y
=
(
build_conv_bias_dts_graph
(
hipdnn_handle
,
torch_tensor_x
,
torch_tensor_w
,
torch_tensor_bias
,
[
pad_h
,
pad_w
],
[
stride_h
,
stride_w
],
[
dil_h
,
dil_w
],
hipdnn_data_type
,
depth_to_sacpe_mode
,
block_size
,
)
)
torch_tensor_y
=
torch
.
empty
(
hipdnn_tensor_y
.
get_dim
(),
dtype
=
torch_data_type
,
memory_format
=
torch
.
channels_last
,
device
=
"cuda"
,
)
variant_pack
=
{
hipdnn_tensor_x
:
torch_tensor_x
.
data_ptr
(),
hipdnn_tensor_w
:
torch_tensor_w
.
data_ptr
(),
hipdnn_tensor_bias
:
torch_tensor_bias
.
data_ptr
(),
hipdnn_tensor_y
:
torch_tensor_y
.
data_ptr
(),
}
workspace
=
torch
.
empty
(
graph
.
get_workspace_size
(),
dtype
=
torch
.
uint8
,
device
=
"cuda"
)
graph
.
exec
(
variant_pack
=
variant_pack
,
workspace
=
workspace
.
data_ptr
())
print
(
"conv_bias_dts graph execution complete."
)
python/conv_depthtospace_fusion/conv_bias_dts_add.py
0 → 100644
View file @
ca34d4d2
import
hipdnn
import
torch
def
build_conv_bias_dts_add_graph
(
hipdnn_handle
,
torch_tensor_x
,
torch_tensor_w
,
torch_tensor_bias
,
torch_tensor_add
,
padding
,
stride
,
dilation
,
hipdnn_data_type
,
depth_to_space_mode
,
block_size
,
):
# Create graph
graph
=
hipdnn
.
pygraph
(
handle
=
hipdnn_handle
,
io_data_type
=
hipdnn_data_type
,
intermediate_data_type
=
hipdnn
.
data_type
.
FLOAT
,
compute_data_type
=
hipdnn
.
data_type
.
FLOAT
,
name
=
"conv_bias_dts_add"
,
)
# Create hipdnn tensors
hipdnn_tensor_x
=
graph
.
tensor_like
(
torch_tensor_x
)
hipdnn_tensor_w
=
graph
.
tensor_like
(
torch_tensor_w
)
hipdnn_tensor_bias
=
graph
.
tensor_like
(
torch_tensor_bias
)
hipdnn_tensor_add
=
graph
.
tensor_like
(
torch_tensor_add
)
# Create conv
hipdnn_tensor_conv_output
=
graph
.
conv_fprop
(
image
=
hipdnn_tensor_x
,
weight
=
hipdnn_tensor_w
,
padding
=
padding
,
stride
=
stride
,
dilation
=
dilation
,
name
=
"conv2d"
,
)
# Create bias
hipdnn_tensor_bias_output
=
graph
.
add
(
a
=
hipdnn_tensor_conv_output
,
b
=
hipdnn_tensor_bias
,
name
=
"bias"
)
n
=
torch_tensor_x
.
shape
[
0
]
H
=
torch_tensor_x
.
shape
[
2
]
W
=
torch_tensor_x
.
shape
[
3
]
k
=
torch_tensor_w
.
shape
[
0
]
r
=
torch_tensor_w
.
shape
[
2
]
s
=
torch_tensor_w
.
shape
[
3
]
outH
=
int
((
H
+
2
*
padding
[
0
]
-
(
dilation
[
0
]
*
(
r
-
1
)
+
1
))
/
stride
[
0
])
+
1
outW
=
int
((
W
+
2
*
padding
[
1
]
-
(
dilation
[
1
]
*
(
s
-
1
)
+
1
))
/
stride
[
1
])
+
1
if
depth_to_space_mode
==
"CRD"
:
first_reshape_dim
=
[
n
,
int
(
k
//
(
block_size
*
block_size
)),
block_size
,
block_size
,
outH
,
outW
,
]
permutation
=
[
0
,
1
,
4
,
2
,
5
,
3
]
else
:
first_reshape_dim
=
[
n
,
block_size
,
block_size
,
k
//
(
block_size
*
block_size
),
outH
,
outW
]
permutation
=
[
0
,
3
,
4
,
1
,
5
,
2
]
second_reshape_dim
=
[
n
,
int
(
k
//
(
block_size
*
block_size
)),
block_size
*
outH
,
block_size
*
outW
,
]
# Create first reshape
hipdnn_tensor_first_reshape_output
=
graph
.
reshape
(
input
=
hipdnn_tensor_bias_output
,
name
=
"first_reshape"
)
hipdnn_tensor_first_reshape_output
.
set_dim
(
first_reshape_dim
)
# Create transpose
hipdnn_tensor_transpose_output
=
graph
.
transpose
(
input
=
hipdnn_tensor_first_reshape_output
,
permutation
=
permutation
,
name
=
"transpose"
,
)
# Create second reshape
hipdnn_tensor_second_reshape_output
=
graph
.
reshape
(
input
=
hipdnn_tensor_transpose_output
,
name
=
"second_reshape"
)
hipdnn_tensor_second_reshape_output
.
set_dim
(
second_reshape_dim
).
set_stride
(
[
k
*
outH
*
outW
,
1
,
k
//
block_size
*
outW
,
k
//
(
block_size
*
block_size
)]
)
# Create add
hipdnn_tensor_y
=
graph
.
add
(
a
=
hipdnn_tensor_second_reshape_output
,
b
=
hipdnn_tensor_add
,
name
=
"add"
)
hipdnn_tensor_y
.
set_output
(
True
)
graph
.
build
(
hipdnn_handle
)
return
(
graph
,
hipdnn_tensor_x
,
hipdnn_tensor_w
,
hipdnn_tensor_bias
,
hipdnn_tensor_add
,
hipdnn_tensor_y
,
)
if
__name__
==
"__main__"
:
# Input dimensions
n
=
1
# Batch size
c
=
8
# Number of input channels
h
=
128
# Height
w
=
128
# Width
# Filter dimensions
k
=
16
# Number of output channels
r
=
3
# Filter height
s
=
3
# Filter width
# Convolution parameters
stride_h
=
1
# Height stride
stride_w
=
1
# Width stride
pad_h
=
1
# Height padding
pad_w
=
1
# Width padding
dil_h
=
1
# Height dilation
dil_w
=
1
# Width dilation
block_size
=
2
depth_to_sacpe_mode
=
"CRD"
hipdnn_data_type
=
hipdnn
.
data_type
.
HALF
torch_data_type
=
torch
.
float16
torch_tensor_x
=
torch
.
rand
(
n
,
c
,
h
,
w
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
torch_tensor_w
=
torch
.
rand
(
k
,
c
,
r
,
s
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
torch_tensor_bias
=
torch
.
rand
(
1
,
k
,
1
,
1
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
torch_tensor_add
=
torch
.
rand
(
n
,
k
//
(
block_size
*
block_size
),
h
*
block_size
,
w
*
block_size
,
dtype
=
torch_data_type
,
device
=
"cuda"
,
).
to
(
memory_format
=
torch
.
channels_last
)
hipdnn_handle
=
hipdnn
.
create_handle
()
(
graph
,
hipdnn_tensor_x
,
hipdnn_tensor_w
,
hipdnn_tensor_bias
,
hipdnn_tensor_add
,
hipdnn_tensor_y
,
)
=
build_conv_bias_dts_add_graph
(
hipdnn_handle
,
torch_tensor_x
,
torch_tensor_w
,
torch_tensor_bias
,
torch_tensor_add
,
[
pad_h
,
pad_w
],
[
stride_h
,
stride_w
],
[
dil_h
,
dil_w
],
hipdnn_data_type
,
depth_to_sacpe_mode
,
block_size
,
)
torch_tensor_y
=
torch
.
empty
(
hipdnn_tensor_y
.
get_dim
(),
dtype
=
torch_data_type
,
memory_format
=
torch
.
channels_last
,
device
=
"cuda"
,
)
variant_pack
=
{
hipdnn_tensor_x
:
torch_tensor_x
.
data_ptr
(),
hipdnn_tensor_w
:
torch_tensor_w
.
data_ptr
(),
hipdnn_tensor_bias
:
torch_tensor_bias
.
data_ptr
(),
hipdnn_tensor_add
:
torch_tensor_add
.
data_ptr
(),
hipdnn_tensor_y
:
torch_tensor_y
.
data_ptr
(),
}
workspace
=
torch
.
empty
(
graph
.
get_workspace_size
(),
dtype
=
torch
.
uint8
,
device
=
"cuda"
)
graph
.
exec
(
variant_pack
=
variant_pack
,
workspace
=
workspace
.
data_ptr
())
print
(
"conv_bias_dts_add graph execution complete."
)
python/conv_depthtospace_fusion/conv_bias_dts_leakyrelu.py
0 → 100644
View file @
ca34d4d2
import
hipdnn
import
torch
def
build_conv_bias_dts_leakyrelu_graph
(
hipdnn_handle
,
torch_tensor_x
,
torch_tensor_w
,
torch_tensor_bias
,
padding
,
stride
,
dilation
,
hipdnn_data_type
,
depth_to_space_mode
,
block_size
,
):
# Create graph
graph
=
hipdnn
.
pygraph
(
handle
=
hipdnn_handle
,
io_data_type
=
hipdnn_data_type
,
intermediate_data_type
=
hipdnn
.
data_type
.
FLOAT
,
compute_data_type
=
hipdnn
.
data_type
.
FLOAT
,
name
=
"conv_bias_dts_leakyrelu"
,
)
# Create hipdnn tensors
hipdnn_tensor_x
=
graph
.
tensor_like
(
torch_tensor_x
)
hipdnn_tensor_w
=
graph
.
tensor_like
(
torch_tensor_w
)
hipdnn_tensor_bias
=
graph
.
tensor_like
(
torch_tensor_bias
)
# Create conv
hipdnn_tensor_conv_output
=
graph
.
conv_fprop
(
image
=
hipdnn_tensor_x
,
weight
=
hipdnn_tensor_w
,
padding
=
padding
,
stride
=
stride
,
dilation
=
dilation
,
name
=
"conv2d"
,
)
# Create bias
hipdnn_tensor_bias_output
=
graph
.
add
(
a
=
hipdnn_tensor_conv_output
,
b
=
hipdnn_tensor_bias
,
name
=
"bias"
)
n
=
torch_tensor_x
.
shape
[
0
]
H
=
torch_tensor_x
.
shape
[
2
]
W
=
torch_tensor_x
.
shape
[
3
]
k
=
torch_tensor_w
.
shape
[
0
]
r
=
torch_tensor_w
.
shape
[
2
]
s
=
torch_tensor_w
.
shape
[
3
]
outH
=
int
((
H
+
2
*
padding
[
0
]
-
(
dilation
[
0
]
*
(
r
-
1
)
+
1
))
/
stride
[
0
])
+
1
outW
=
int
((
W
+
2
*
padding
[
1
]
-
(
dilation
[
1
]
*
(
s
-
1
)
+
1
))
/
stride
[
1
])
+
1
if
depth_to_space_mode
==
"CRD"
:
first_reshape_dim
=
[
n
,
int
(
k
//
(
block_size
*
block_size
)),
block_size
,
block_size
,
outH
,
outW
,
]
permutation
=
[
0
,
1
,
4
,
2
,
5
,
3
]
else
:
first_reshape_dim
=
[
n
,
block_size
,
block_size
,
k
//
(
block_size
*
block_size
),
outH
,
outW
]
permutation
=
[
0
,
3
,
4
,
1
,
5
,
2
]
second_reshape_dim
=
[
n
,
int
(
k
//
(
block_size
*
block_size
)),
block_size
*
outH
,
block_size
*
outW
,
]
# Create first reshape
hipdnn_tensor_first_reshape_output
=
graph
.
reshape
(
input
=
hipdnn_tensor_bias_output
,
name
=
"first_reshape"
)
hipdnn_tensor_first_reshape_output
.
set_dim
(
first_reshape_dim
)
# Create transpose
hipdnn_tensor_transpose_output
=
graph
.
transpose
(
input
=
hipdnn_tensor_first_reshape_output
,
permutation
=
permutation
,
name
=
"transpose"
,
)
# Create second reshape
hipdnn_tensor_second_reshape_output
=
graph
.
reshape
(
input
=
hipdnn_tensor_transpose_output
,
name
=
"second_reshape"
)
hipdnn_tensor_second_reshape_output
.
set_dim
(
second_reshape_dim
).
set_stride
(
[
k
*
outH
*
outW
,
1
,
k
//
block_size
*
outW
,
k
//
(
block_size
*
block_size
)]
)
# Create leakyrelu
hipdnn_tensor_y
=
graph
.
leaky_relu
(
input
=
hipdnn_tensor_second_reshape_output
,
negative_slope
=-
1.0
,
name
=
"leaky_relu"
)
hipdnn_tensor_y
.
set_output
(
True
)
graph
.
build
(
hipdnn_handle
)
return
(
graph
,
hipdnn_tensor_x
,
hipdnn_tensor_w
,
hipdnn_tensor_bias
,
hipdnn_tensor_y
)
if
__name__
==
"__main__"
:
# Input dimensions
n
=
1
# Batch size
c
=
8
# Number of input channels
h
=
128
# Height
w
=
128
# Width
# Filter dimensions
k
=
16
# Number of output channels
r
=
3
# Filter height
s
=
3
# Filter width
# Convolution parameters
stride_h
=
1
# Height stride
stride_w
=
1
# Width stride
pad_h
=
1
# Height padding
pad_w
=
1
# Width padding
dil_h
=
1
# Height dilation
dil_w
=
1
# Width dilation
block_size
=
2
depth_to_sacpe_mode
=
"DCR"
hipdnn_data_type
=
hipdnn
.
data_type
.
HALF
torch_data_type
=
torch
.
float16
torch_tensor_x
=
torch
.
rand
(
n
,
c
,
h
,
w
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
torch_tensor_w
=
torch
.
rand
(
k
,
c
,
r
,
s
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
torch_tensor_bias
=
torch
.
rand
(
1
,
k
,
1
,
1
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
hipdnn_handle
=
hipdnn
.
create_handle
()
graph
,
hipdnn_tensor_x
,
hipdnn_tensor_w
,
hipdnn_tensor_bias
,
hipdnn_tensor_y
=
(
build_conv_bias_dts_leakyrelu_graph
(
hipdnn_handle
,
torch_tensor_x
,
torch_tensor_w
,
torch_tensor_bias
,
[
pad_h
,
pad_w
],
[
stride_h
,
stride_w
],
[
dil_h
,
dil_w
],
hipdnn_data_type
,
depth_to_sacpe_mode
,
block_size
,
)
)
torch_tensor_y
=
torch
.
empty
(
hipdnn_tensor_y
.
get_dim
(),
dtype
=
torch_data_type
,
memory_format
=
torch
.
channels_last
,
device
=
"cuda"
,
)
variant_pack
=
{
hipdnn_tensor_x
:
torch_tensor_x
.
data_ptr
(),
hipdnn_tensor_w
:
torch_tensor_w
.
data_ptr
(),
hipdnn_tensor_bias
:
torch_tensor_bias
.
data_ptr
(),
hipdnn_tensor_y
:
torch_tensor_y
.
data_ptr
(),
}
workspace
=
torch
.
empty
(
graph
.
get_workspace_size
(),
dtype
=
torch
.
uint8
,
device
=
"cuda"
)
graph
.
exec
(
variant_pack
=
variant_pack
,
workspace
=
workspace
.
data_ptr
())
print
(
"conv_bias_dts_leakyrelu graph execution complete."
)
python/conv_depthtospace_fusion/conv_bias_dts_leakyrelu_add.py
0 → 100644
View file @
ca34d4d2
import
hipdnn
import
torch
def
build_conv_bias_dts_leakyrelu_add_graph
(
hipdnn_handle
,
torch_tensor_x
,
torch_tensor_w
,
torch_tensor_bias
,
torch_tensor_add
,
padding
,
stride
,
dilation
,
hipdnn_data_type
,
depth_to_space_mode
,
block_size
,
):
# Create graph
graph
=
hipdnn
.
pygraph
(
handle
=
hipdnn_handle
,
io_data_type
=
hipdnn_data_type
,
intermediate_data_type
=
hipdnn
.
data_type
.
FLOAT
,
compute_data_type
=
hipdnn
.
data_type
.
FLOAT
,
name
=
"conv_bias_dts_leakyrelu_add"
,
)
# Create hipdnn tensors
hipdnn_tensor_x
=
graph
.
tensor_like
(
torch_tensor_x
)
hipdnn_tensor_w
=
graph
.
tensor_like
(
torch_tensor_w
)
hipdnn_tensor_bias
=
graph
.
tensor_like
(
torch_tensor_bias
)
hipdnn_tensor_add
=
graph
.
tensor_like
(
torch_tensor_add
)
# Create conv
hipdnn_tensor_conv_output
=
graph
.
conv_fprop
(
image
=
hipdnn_tensor_x
,
weight
=
hipdnn_tensor_w
,
padding
=
padding
,
stride
=
stride
,
dilation
=
dilation
,
name
=
"conv2d"
,
)
# Create bias
hipdnn_tensor_bias_output
=
graph
.
add
(
a
=
hipdnn_tensor_conv_output
,
b
=
hipdnn_tensor_bias
,
name
=
"bias"
)
n
=
torch_tensor_x
.
shape
[
0
]
H
=
torch_tensor_x
.
shape
[
2
]
W
=
torch_tensor_x
.
shape
[
3
]
k
=
torch_tensor_w
.
shape
[
0
]
r
=
torch_tensor_w
.
shape
[
2
]
s
=
torch_tensor_w
.
shape
[
3
]
outH
=
int
((
H
+
2
*
padding
[
0
]
-
(
dilation
[
0
]
*
(
r
-
1
)
+
1
))
/
stride
[
0
])
+
1
outW
=
int
((
W
+
2
*
padding
[
1
]
-
(
dilation
[
1
]
*
(
s
-
1
)
+
1
))
/
stride
[
1
])
+
1
if
depth_to_space_mode
==
"CRD"
:
first_reshape_dim
=
[
n
,
int
(
k
//
(
block_size
*
block_size
)),
block_size
,
block_size
,
outH
,
outW
,
]
permutation
=
[
0
,
1
,
4
,
2
,
5
,
3
]
else
:
first_reshape_dim
=
[
n
,
block_size
,
block_size
,
k
//
(
block_size
*
block_size
),
outH
,
outW
]
permutation
=
[
0
,
3
,
4
,
1
,
5
,
2
]
second_reshape_dim
=
[
n
,
int
(
k
//
(
block_size
*
block_size
)),
block_size
*
outH
,
block_size
*
outW
,
]
# Create first reshape
hipdnn_tensor_first_reshape_output
=
graph
.
reshape
(
input
=
hipdnn_tensor_bias_output
,
name
=
"first_reshape"
)
hipdnn_tensor_first_reshape_output
.
set_dim
(
first_reshape_dim
)
# Create transpose
hipdnn_tensor_transpose_output
=
graph
.
transpose
(
input
=
hipdnn_tensor_first_reshape_output
,
permutation
=
permutation
,
name
=
"transpose"
,
)
# Create second reshape
hipdnn_tensor_second_reshape_output
=
graph
.
reshape
(
input
=
hipdnn_tensor_transpose_output
,
name
=
"second_reshape"
)
hipdnn_tensor_second_reshape_output
.
set_dim
(
second_reshape_dim
).
set_stride
(
[
k
*
outH
*
outW
,
1
,
k
//
block_size
*
outW
,
k
//
(
block_size
*
block_size
)]
)
# Create leakyRelu
hipdnn_tensor_leaky_relu_output
=
graph
.
leaky_relu
(
input
=
hipdnn_tensor_second_reshape_output
,
negative_slope
=-
1.0
,
name
=
"leaky_relu"
)
# Cretae add
hipdnn_tensor_y
=
graph
.
add
(
a
=
hipdnn_tensor_leaky_relu_output
,
b
=
hipdnn_tensor_add
,
name
=
"add"
)
hipdnn_tensor_y
.
set_output
(
True
)
graph
.
build
(
hipdnn_handle
)
return
(
graph
,
hipdnn_tensor_x
,
hipdnn_tensor_w
,
hipdnn_tensor_bias
,
hipdnn_tensor_add
,
hipdnn_tensor_y
,
)
if
__name__
==
"__main__"
:
# Input dimensions
n
=
1
# Batch size
c
=
8
# Number of input channels
h
=
128
# Height
w
=
128
# Width
# Filter dimensions
k
=
16
# Number of output channels
r
=
3
# Filter height
s
=
3
# Filter width
# Convolution parameters
stride_h
=
1
# Height stride
stride_w
=
1
# Width stride
pad_h
=
1
# Height padding
pad_w
=
1
# Width padding
dil_h
=
1
# Height dilation
dil_w
=
1
# Width dilation
block_size
=
2
depth_to_sacpe_mode
=
"CRD"
hipdnn_data_type
=
hipdnn
.
data_type
.
HALF
torch_data_type
=
torch
.
float16
torch_tensor_x
=
torch
.
rand
(
n
,
c
,
h
,
w
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
torch_tensor_w
=
torch
.
rand
(
k
,
c
,
r
,
s
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
torch_tensor_bias
=
torch
.
rand
(
1
,
k
,
1
,
1
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
torch_tensor_add
=
torch
.
rand
(
n
,
k
//
(
block_size
*
block_size
),
h
*
block_size
,
w
*
block_size
,
dtype
=
torch_data_type
,
device
=
"cuda"
,
).
to
(
memory_format
=
torch
.
channels_last
)
hipdnn_handle
=
hipdnn
.
create_handle
()
(
graph
,
hipdnn_tensor_x
,
hipdnn_tensor_w
,
hipdnn_tensor_bias
,
hipdnn_tensor_add
,
hipdnn_tensor_y
,
)
=
build_conv_bias_dts_leakyrelu_add_graph
(
hipdnn_handle
,
torch_tensor_x
,
torch_tensor_w
,
torch_tensor_bias
,
torch_tensor_add
,
[
pad_h
,
pad_w
],
[
stride_h
,
stride_w
],
[
dil_h
,
dil_w
],
hipdnn_data_type
,
depth_to_sacpe_mode
,
block_size
,
)
torch_tensor_y
=
torch
.
empty
(
hipdnn_tensor_y
.
get_dim
(),
dtype
=
torch_data_type
,
memory_format
=
torch
.
channels_last
,
device
=
"cuda"
,
)
variant_pack
=
{
hipdnn_tensor_x
:
torch_tensor_x
.
data_ptr
(),
hipdnn_tensor_w
:
torch_tensor_w
.
data_ptr
(),
hipdnn_tensor_bias
:
torch_tensor_bias
.
data_ptr
(),
hipdnn_tensor_add
:
torch_tensor_add
.
data_ptr
(),
hipdnn_tensor_y
:
torch_tensor_y
.
data_ptr
(),
}
workspace
=
torch
.
empty
(
graph
.
get_workspace_size
(),
dtype
=
torch
.
uint8
,
device
=
"cuda"
)
graph
.
exec
(
variant_pack
=
variant_pack
,
workspace
=
workspace
.
data_ptr
())
print
(
"conv_bias_dts_leakyrelu_add graph execution complete."
)
python/conv_depthtospace_fusion/conv_dts.py
0 → 100644
View file @
ca34d4d2
import
hipdnn
import
torch
def
build_conv_dts_graph
(
hipdnn_handle
,
torch_tensor_x
,
torch_tensor_w
,
padding
,
stride
,
dilation
,
hipdnn_data_type
,
depth_to_space_mode
,
block_size
,
):
# Create graph
graph
=
hipdnn
.
pygraph
(
handle
=
hipdnn_handle
,
io_data_type
=
hipdnn_data_type
,
intermediate_data_type
=
hipdnn
.
data_type
.
FLOAT
,
compute_data_type
=
hipdnn
.
data_type
.
FLOAT
,
name
=
"conv_dts"
,
)
# Create hipdnn tensors
hipdnn_tensor_x
=
graph
.
tensor_like
(
torch_tensor_x
)
hipdnn_tensor_w
=
graph
.
tensor_like
(
torch_tensor_w
)
# Create conv
hipdnn_tensor_conv_output
=
graph
.
conv_fprop
(
image
=
hipdnn_tensor_x
,
weight
=
hipdnn_tensor_w
,
padding
=
padding
,
stride
=
stride
,
dilation
=
dilation
,
name
=
"conv2d"
,
)
n
=
torch_tensor_x
.
shape
[
0
]
H
=
torch_tensor_x
.
shape
[
2
]
W
=
torch_tensor_x
.
shape
[
3
]
k
=
torch_tensor_w
.
shape
[
0
]
r
=
torch_tensor_w
.
shape
[
2
]
s
=
torch_tensor_w
.
shape
[
3
]
outH
=
int
((
H
+
2
*
padding
[
0
]
-
(
dilation
[
0
]
*
(
r
-
1
)
+
1
))
/
stride
[
0
])
+
1
outW
=
int
((
W
+
2
*
padding
[
1
]
-
(
dilation
[
1
]
*
(
s
-
1
)
+
1
))
/
stride
[
1
])
+
1
if
depth_to_space_mode
==
"CRD"
:
first_reshape_dim
=
[
n
,
int
(
k
//
(
block_size
*
block_size
)),
block_size
,
block_size
,
outH
,
outW
,
]
permutation
=
[
0
,
1
,
4
,
2
,
5
,
3
]
else
:
first_reshape_dim
=
[
n
,
block_size
,
block_size
,
k
//
(
block_size
*
block_size
),
outH
,
outW
]
permutation
=
[
0
,
3
,
4
,
1
,
5
,
2
]
second_reshape_dim
=
[
n
,
int
(
k
//
(
block_size
*
block_size
)),
block_size
*
outH
,
block_size
*
outW
,
]
print
(
first_reshape_dim
)
# Create first reshape
hipdnn_tensor_first_reshape_output
=
graph
.
reshape
(
input
=
hipdnn_tensor_conv_output
,
name
=
"first_reshape"
)
hipdnn_tensor_first_reshape_output
.
set_dim
(
first_reshape_dim
)
# Create transpose
hipdnn_tensor_transpose_output
=
graph
.
transpose
(
input
=
hipdnn_tensor_first_reshape_output
,
permutation
=
permutation
,
name
=
"transpose"
,
)
# Create second reshape
hipdnn_tensor_second_reshape_output
=
graph
.
reshape
(
input
=
hipdnn_tensor_transpose_output
,
name
=
"second_reshape"
)
hipdnn_tensor_second_reshape_output
.
set_dim
(
second_reshape_dim
).
set_stride
(
[
k
*
outH
*
outW
,
1
,
k
//
block_size
*
outW
,
k
//
(
block_size
*
block_size
)]
).
set_output
(
True
)
graph
.
build
(
hipdnn_handle
)
return
(
graph
,
hipdnn_tensor_x
,
hipdnn_tensor_w
,
hipdnn_tensor_second_reshape_output
)
if
__name__
==
"__main__"
:
# Input dimensions
n
=
1
# Batch size
c
=
8
# Number of input channels
h
=
128
# Height
w
=
128
# Width
# Filter dimensions
k
=
16
# Number of output channels
r
=
3
# Filter height
s
=
3
# Filter width
# Convolution parameters
stride_h
=
1
# Height stride
stride_w
=
1
# Width stride
pad_h
=
1
# Height padding
pad_w
=
1
# Width padding
dil_h
=
1
# Height dilation
dil_w
=
1
# Width dilation
block_size
=
2
depth_to_sacpe_mode
=
"DCR"
hipdnn_data_type
=
hipdnn
.
data_type
.
HALF
torch_data_type
=
torch
.
float16
torch_tensor_x
=
torch
.
rand
(
n
,
c
,
h
,
w
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
torch_tensor_w
=
torch
.
rand
(
k
,
c
,
r
,
s
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
hipdnn_handle
=
hipdnn
.
create_handle
()
graph
,
hipdnn_tensor_x
,
hipdnn_tensor_w
,
hipdnn_tensor_y
=
build_conv_dts_graph
(
hipdnn_handle
,
torch_tensor_x
,
torch_tensor_w
,
[
pad_h
,
pad_w
],
[
stride_h
,
stride_w
],
[
dil_h
,
dil_w
],
hipdnn_data_type
,
depth_to_sacpe_mode
,
block_size
,
)
torch_tensor_y
=
torch
.
empty
(
hipdnn_tensor_y
.
get_dim
(),
dtype
=
torch_data_type
,
memory_format
=
torch
.
channels_last
,
device
=
"cuda"
,
)
variant_pack
=
{
hipdnn_tensor_x
:
torch_tensor_x
.
data_ptr
(),
hipdnn_tensor_w
:
torch_tensor_w
.
data_ptr
(),
hipdnn_tensor_y
:
torch_tensor_y
.
data_ptr
(),
}
workspace
=
torch
.
empty
(
graph
.
get_workspace_size
(),
dtype
=
torch
.
uint8
,
device
=
"cuda"
)
graph
.
exec
(
variant_pack
=
variant_pack
,
workspace
=
workspace
.
data_ptr
())
print
(
"conv_dts graph execution complete."
)
python/conv_fusion/conv_bias.py
0 → 100644
View file @
ca34d4d2
import
hipdnn
import
torch
def
build_conv_bias_graph
(
hipdnn_handle
,
torch_tensor_x
,
torch_tensor_w
,
torch_tensor_bias
,
padding
,
stride
,
dilation
,
hipdnn_data_type
,
):
# Create graph
graph
=
hipdnn
.
pygraph
(
handle
=
hipdnn_handle
,
io_data_type
=
hipdnn_data_type
,
intermediate_data_type
=
hipdnn
.
data_type
.
FLOAT
,
compute_data_type
=
hipdnn
.
data_type
.
FLOAT
,
name
=
"conv_bias"
,
)
# Create hipdnn tensors
hipdnn_tensor_x
=
graph
.
tensor_like
(
torch_tensor_x
)
hipdnn_tensor_w
=
graph
.
tensor_like
(
torch_tensor_w
)
hipdnn_tensor_bias
=
graph
.
tensor_like
(
torch_tensor_bias
)
# Create op
hipdnn_tensor_conv_output
=
graph
.
conv_fprop
(
image
=
hipdnn_tensor_x
,
weight
=
hipdnn_tensor_w
,
padding
=
padding
,
stride
=
stride
,
dilation
=
dilation
,
name
=
"conv2d"
,
)
hipdnn_tensor_y
=
graph
.
add
(
a
=
hipdnn_tensor_conv_output
,
b
=
hipdnn_tensor_bias
,
name
=
"bias"
)
hipdnn_tensor_y
.
set_output
(
True
)
graph
.
build
(
hipdnn_handle
)
return
(
graph
,
hipdnn_tensor_x
,
hipdnn_tensor_w
,
hipdnn_tensor_bias
,
hipdnn_tensor_y
)
if
__name__
==
"__main__"
:
# Input dimensions
n
=
1
# Batch size
c
=
16
# Number of input channels
h
=
16
# Height
w
=
16
# Width
# Filter dimensions
k
=
16
# Number of output channels
r
=
3
# Filter height
s
=
3
# Filter width
# Convolution parameters
stride_h
=
1
# Height stride
stride_w
=
1
# Width stride
pad_h
=
1
# Height padding
pad_w
=
1
# Width padding
dil_h
=
1
# Height dilation
dil_w
=
1
# Width dilation
hipdnn_data_type
=
hipdnn
.
data_type
.
HALF
torch_data_type
=
torch
.
float16
torch_tensor_x
=
torch
.
rand
(
n
,
c
,
h
,
w
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
torch_tensor_w
=
torch
.
rand
(
k
,
c
,
r
,
s
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
torch_tensor_bias
=
torch
.
rand
(
1
,
k
,
1
,
1
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
hipdnn_handle
=
hipdnn
.
create_handle
()
graph
,
hipdnn_tensor_x
,
hipdnn_tensor_w
,
hipdnn_tensor_bias
,
hipdnn_tensor_y
=
(
build_conv_bias_graph
(
hipdnn_handle
,
torch_tensor_x
,
torch_tensor_w
,
torch_tensor_bias
,
[
pad_h
,
pad_w
],
[
stride_h
,
stride_w
],
[
dil_h
,
dil_w
],
hipdnn_data_type
,
)
)
torch_tensor_y
=
torch
.
empty
(
hipdnn_tensor_y
.
get_dim
(),
dtype
=
torch_data_type
,
memory_format
=
torch
.
channels_last
,
device
=
"cuda"
,
)
variant_pack
=
{
hipdnn_tensor_x
:
torch_tensor_x
.
data_ptr
(),
hipdnn_tensor_w
:
torch_tensor_w
.
data_ptr
(),
hipdnn_tensor_bias
:
torch_tensor_bias
.
data_ptr
(),
hipdnn_tensor_y
:
torch_tensor_y
.
data_ptr
(),
}
workspace
=
torch
.
empty
(
graph
.
get_workspace_size
(),
dtype
=
torch
.
uint8
,
device
=
"cuda"
)
graph
.
exec
(
variant_pack
=
variant_pack
,
workspace
=
workspace
.
data_ptr
())
print
(
"conv_bias graph execution complete."
)
python/conv_fusion/conv_bias_add.py
0 → 100644
View file @
ca34d4d2
import
hipdnn
import
torch
def
build_conv_bias_add_graph
(
hipdnn_handle
,
torch_tensor_x
,
torch_tensor_w
,
torch_tensor_bias
,
torch_tensor_add
,
padding
,
stride
,
dilation
,
hipdnn_data_type
,
):
# Create graph
graph
=
hipdnn
.
pygraph
(
handle
=
hipdnn_handle
,
io_data_type
=
hipdnn_data_type
,
intermediate_data_type
=
hipdnn
.
data_type
.
FLOAT
,
compute_data_type
=
hipdnn
.
data_type
.
FLOAT
,
name
=
"conv_bias_add"
,
)
# Create hipdnn tensors
hipdnn_tensor_x
=
graph
.
tensor_like
(
torch_tensor_x
)
hipdnn_tensor_w
=
graph
.
tensor_like
(
torch_tensor_w
)
hipdnn_tensor_bias
=
graph
.
tensor_like
(
torch_tensor_bias
)
hipdnn_tensor_add
=
graph
.
tensor_like
(
torch_tensor_add
)
# Create op
hipdnn_tensor_conv_output
=
graph
.
conv_fprop
(
image
=
hipdnn_tensor_x
,
weight
=
hipdnn_tensor_w
,
padding
=
padding
,
stride
=
stride
,
dilation
=
dilation
,
name
=
"conv2d"
,
)
hipdnn_tensor_add_output
=
graph
.
add
(
a
=
hipdnn_tensor_conv_output
,
b
=
hipdnn_tensor_bias
,
name
=
"bias"
)
hipdnn_tensor_y
=
graph
.
add
(
a
=
hipdnn_tensor_add_output
,
b
=
hipdnn_tensor_add
,
name
=
"add"
)
hipdnn_tensor_y
.
set_output
(
True
)
graph
.
build
(
hipdnn_handle
)
return
(
graph
,
hipdnn_tensor_x
,
hipdnn_tensor_w
,
hipdnn_tensor_bias
,
hipdnn_tensor_add
,
hipdnn_tensor_y
,
)
if
__name__
==
"__main__"
:
# Input dimensions
n
=
1
# Batch size
c
=
16
# Number of input channels
h
=
16
# Height
w
=
16
# Width
# Filter dimensions
k
=
16
# Number of output channels
r
=
3
# Filter height
s
=
3
# Filter width
# Convolution parameters
stride_h
=
1
# Height stride
stride_w
=
1
# Width stride
pad_h
=
1
# Height padding
pad_w
=
1
# Width padding
dil_h
=
1
# Height dilation
dil_w
=
1
# Width dilation
hipdnn_data_type
=
hipdnn
.
data_type
.
HALF
torch_data_type
=
torch
.
float16
torch_tensor_x
=
torch
.
rand
(
n
,
c
,
h
,
w
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
torch_tensor_w
=
torch
.
rand
(
k
,
c
,
r
,
s
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
torch_tensor_bias
=
torch
.
rand
(
1
,
k
,
1
,
1
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
torch_tensor_add
=
torch
.
rand
(
n
,
c
,
h
,
w
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
hipdnn_handle
=
hipdnn
.
create_handle
()
(
graph
,
hipdnn_tensor_x
,
hipdnn_tensor_w
,
hipdnn_tensor_bias
,
hipdnn_tensor_add
,
hipdnn_tensor_y
,
)
=
build_conv_bias_add_graph
(
hipdnn_handle
,
torch_tensor_x
,
torch_tensor_w
,
torch_tensor_bias
,
torch_tensor_add
,
[
pad_h
,
pad_w
],
[
stride_h
,
stride_w
],
[
dil_h
,
dil_w
],
hipdnn_data_type
,
)
torch_tensor_y
=
torch
.
empty
(
hipdnn_tensor_y
.
get_dim
(),
dtype
=
torch_data_type
,
memory_format
=
torch
.
channels_last
,
device
=
"cuda"
,
)
variant_pack
=
{
hipdnn_tensor_x
:
torch_tensor_x
.
data_ptr
(),
hipdnn_tensor_w
:
torch_tensor_w
.
data_ptr
(),
hipdnn_tensor_bias
:
torch_tensor_bias
.
data_ptr
(),
hipdnn_tensor_add
:
torch_tensor_add
.
data_ptr
(),
hipdnn_tensor_y
:
torch_tensor_y
.
data_ptr
(),
}
workspace
=
torch
.
empty
(
graph
.
get_workspace_size
(),
dtype
=
torch
.
uint8
,
device
=
"cuda"
)
graph
.
exec
(
variant_pack
=
variant_pack
,
workspace
=
workspace
.
data_ptr
())
print
(
"conv_bias_add graph execution complete."
)
python/conv_fusion/conv_bias_add_relu.py
0 → 100644
View file @
ca34d4d2
import
hipdnn
import
torch
def
build_conv_bias_add_relu_graph
(
hipdnn_handle
,
torch_tensor_x
,
torch_tensor_w
,
torch_tensor_bias
,
torch_tensor_add
,
padding
,
stride
,
dilation
,
hipdnn_data_type
,
):
# Create graph
graph
=
hipdnn
.
pygraph
(
handle
=
hipdnn_handle
,
io_data_type
=
hipdnn_data_type
,
intermediate_data_type
=
hipdnn
.
data_type
.
FLOAT
,
compute_data_type
=
hipdnn
.
data_type
.
FLOAT
,
name
=
"convolution_bias_add_relu"
,
)
# Create hipdnn tensors
hipdnn_tensor_x
=
graph
.
tensor_like
(
torch_tensor_x
)
hipdnn_tensor_w
=
graph
.
tensor_like
(
torch_tensor_w
)
hipdnn_tensor_bias
=
graph
.
tensor_like
(
torch_tensor_bias
)
hipdnn_tensor_add
=
graph
.
tensor_like
(
torch_tensor_add
)
# Create op
hipdnn_tensor_conv_output
=
graph
.
conv_fprop
(
image
=
hipdnn_tensor_x
,
weight
=
hipdnn_tensor_w
,
padding
=
padding
,
stride
=
stride
,
dilation
=
dilation
,
name
=
"conv2d"
,
)
hipdnn_tensor_bias_output
=
graph
.
add
(
a
=
hipdnn_tensor_conv_output
,
b
=
hipdnn_tensor_bias
,
name
=
"bias"
)
hipdnn_tensor_add_output
=
graph
.
add
(
a
=
hipdnn_tensor_bias_output
,
b
=
hipdnn_tensor_add
,
name
=
"add"
)
hipdnn_tensor_y
=
graph
.
relu
(
input
=
hipdnn_tensor_add_output
,
name
=
"relu"
)
hipdnn_tensor_y
.
set_output
(
True
)
graph
.
build
(
hipdnn_handle
)
return
(
graph
,
hipdnn_tensor_x
,
hipdnn_tensor_w
,
hipdnn_tensor_bias
,
hipdnn_tensor_add
,
hipdnn_tensor_y
,
)
if
__name__
==
"__main__"
:
# Input dimensions
n
=
1
# Batch size
c
=
16
# Number of input channels
h
=
16
# Height
w
=
16
# Width
# Filter dimensions
k
=
16
# Number of output channels
r
=
3
# Filter height
s
=
3
# Filter width
# Convolution parameters
stride_h
=
1
# Height stride
stride_w
=
1
# Width stride
pad_h
=
1
# Height padding
pad_w
=
1
# Width padding
dil_h
=
1
# Height dilation
dil_w
=
1
# Width dilation
hipdnn_data_type
=
hipdnn
.
data_type
.
HALF
torch_data_type
=
torch
.
float16
torch_tensor_x
=
torch
.
rand
(
n
,
c
,
h
,
w
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
torch_tensor_w
=
torch
.
rand
(
k
,
c
,
r
,
s
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
torch_tensor_bias
=
torch
.
rand
(
1
,
k
,
1
,
1
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
torch_tensor_add
=
torch
.
rand
(
n
,
c
,
h
,
w
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
hipdnn_handle
=
hipdnn
.
create_handle
()
(
graph
,
hipdnn_tensor_x
,
hipdnn_tensor_w
,
hipdnn_tensor_bias
,
hipdnn_tensor_add
,
hipdnn_tensor_y
,
)
=
build_conv_bias_add_relu_graph
(
hipdnn_handle
,
torch_tensor_x
,
torch_tensor_w
,
torch_tensor_bias
,
torch_tensor_add
,
[
pad_h
,
pad_w
],
[
stride_h
,
stride_w
],
[
dil_h
,
dil_w
],
hipdnn_data_type
,
)
torch_tensor_y
=
torch
.
empty
(
hipdnn_tensor_y
.
get_dim
(),
dtype
=
torch_data_type
,
memory_format
=
torch
.
channels_last
,
device
=
"cuda"
,
)
variant_pack
=
{
hipdnn_tensor_x
:
torch_tensor_x
.
data_ptr
(),
hipdnn_tensor_w
:
torch_tensor_w
.
data_ptr
(),
hipdnn_tensor_bias
:
torch_tensor_bias
.
data_ptr
(),
hipdnn_tensor_add
:
torch_tensor_add
.
data_ptr
(),
hipdnn_tensor_y
:
torch_tensor_y
.
data_ptr
(),
}
workspace
=
torch
.
empty
(
graph
.
get_workspace_size
(),
dtype
=
torch
.
uint8
,
device
=
"cuda"
)
graph
.
exec
(
variant_pack
=
variant_pack
,
workspace
=
workspace
.
data_ptr
())
print
(
"convolution_bias_add_relu graph execution complete."
)
python/conv_fusion/conv_bias_prelu.py
0 → 100644
View file @
ca34d4d2
import
hipdnn
import
torch
def
build_conv_bias_prelu_graph
(
hipdnn_handle
,
torch_tensor_x
,
torch_tensor_w
,
torch_tensor_bias
,
padding
,
stride
,
dilation
,
negative_slope
,
hipdnn_data_type
,
):
# Create graph
graph
=
hipdnn
.
pygraph
(
handle
=
hipdnn_handle
,
io_data_type
=
hipdnn_data_type
,
intermediate_data_type
=
hipdnn
.
data_type
.
FLOAT
,
compute_data_type
=
hipdnn
.
data_type
.
FLOAT
,
name
=
"conv_bias_prelu"
,
)
# Create hipdnn tensors
hipdnn_tensor_x
=
graph
.
tensor_like
(
torch_tensor_x
)
hipdnn_tensor_w
=
graph
.
tensor_like
(
torch_tensor_w
)
hipdnn_tensor_bias
=
graph
.
tensor_like
(
torch_tensor_bias
)
# Create op
hipdnn_tensor_conv_output
=
graph
.
conv_fprop
(
image
=
hipdnn_tensor_x
,
weight
=
hipdnn_tensor_w
,
padding
=
padding
,
stride
=
stride
,
dilation
=
dilation
,
name
=
"conv2d"
,
)
hipdnn_tensor_add_output
=
graph
.
add
(
a
=
hipdnn_tensor_conv_output
,
b
=
hipdnn_tensor_bias
,
name
=
"bias"
)
hipdnn_tensor_y
=
graph
.
prelu
(
input
=
hipdnn_tensor_add_output
,
negative_slope
=
negative_slope
,
name
=
"prelu"
)
hipdnn_tensor_y
.
set_output
(
True
)
graph
.
build
(
hipdnn_handle
)
return
(
graph
,
hipdnn_tensor_x
,
hipdnn_tensor_w
,
hipdnn_tensor_bias
,
hipdnn_tensor_y
)
if
__name__
==
"__main__"
:
# Input dimensions
n
=
1
# Batch size
c
=
16
# Number of input channels
h
=
16
# Height
w
=
16
# Width
# Filter dimensions
k
=
16
# Number of output channels
r
=
3
# Filter height
s
=
3
# Filter width
# Convolution parameters
stride_h
=
1
# Height stride
stride_w
=
1
# Width stride
pad_h
=
1
# Height padding
pad_w
=
1
# Width padding
dil_h
=
1
# Height dilation
dil_w
=
1
# Width dilation
# activate parameters
negative_slope
=
0.01
# Negative slope
hipdnn_data_type
=
hipdnn
.
data_type
.
HALF
torch_data_type
=
torch
.
float16
torch_tensor_x
=
torch
.
rand
(
n
,
c
,
h
,
w
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
torch_tensor_w
=
torch
.
rand
(
k
,
c
,
r
,
s
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
torch_tensor_bias
=
torch
.
rand
(
1
,
k
,
1
,
1
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
hipdnn_handle
=
hipdnn
.
create_handle
()
graph
,
hipdnn_tensor_x
,
hipdnn_tensor_w
,
hipdnn_tensor_bias
,
hipdnn_tensor_y
=
(
build_conv_bias_prelu_graph
(
hipdnn_handle
,
torch_tensor_x
,
torch_tensor_w
,
torch_tensor_bias
,
[
pad_h
,
pad_w
],
[
stride_h
,
stride_w
],
[
dil_h
,
dil_w
],
negative_slope
,
hipdnn_data_type
,
)
)
torch_tensor_y
=
torch
.
empty
(
hipdnn_tensor_y
.
get_dim
(),
dtype
=
torch_data_type
,
memory_format
=
torch
.
channels_last
,
device
=
"cuda"
,
)
variant_pack
=
{
hipdnn_tensor_x
:
torch_tensor_x
.
data_ptr
(),
hipdnn_tensor_w
:
torch_tensor_w
.
data_ptr
(),
hipdnn_tensor_bias
:
torch_tensor_bias
.
data_ptr
(),
hipdnn_tensor_y
:
torch_tensor_y
.
data_ptr
(),
}
workspace
=
torch
.
empty
(
graph
.
get_workspace_size
(),
dtype
=
torch
.
uint8
,
device
=
"cuda"
)
graph
.
exec
(
variant_pack
=
variant_pack
,
workspace
=
workspace
.
data_ptr
())
print
(
"conv_bias_prelu graph execution complete."
)
Prev
1
2
3
4
5
6
7
8
9
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment