Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDTK
hipDNN Samples
Commits
ca34d4d2
Commit
ca34d4d2
authored
Jun 02, 2026
by
yanjl1
Browse files
Initial
parents
Changes
173
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
2720 additions
and
0 deletions
+2720
-0
python/conv_fusion/conv_bias_prelu_add.py
python/conv_fusion/conv_bias_prelu_add.py
+139
-0
python/conv_fusion/conv_bias_relu.py
python/conv_fusion/conv_bias_relu.py
+112
-0
python/conv_fusion/conv_bias_swish.py
python/conv_fusion/conv_bias_swish.py
+112
-0
python/conv_fusion/conv_bias_swish_add.py
python/conv_fusion/conv_bias_swish_add.py
+132
-0
python/conv_fusion/convbwd_bias_relu.py
python/conv_fusion/convbwd_bias_relu.py
+117
-0
python/conv_fusion/convint8_bias.py
python/conv_fusion/convint8_bias.py
+172
-0
python/conv_fusion/convint8_bias_add.py
python/conv_fusion/convint8_bias_add.py
+213
-0
python/conv_fusion/convint8_bias_add_relu.py
python/conv_fusion/convint8_bias_add_relu.py
+216
-0
python/conv_fusion/convint8_bias_relu.py
python/conv_fusion/convint8_bias_relu.py
+175
-0
python/conv_fusion/convint8_bias_relu_add.py
python/conv_fusion/convint8_bias_relu_add.py
+216
-0
python/convolution/convolution_bwd.py
python/convolution/convolution_bwd.py
+82
-0
python/convolution/convolution_fwd.py
python/convolution/convolution_fwd.py
+83
-0
python/convolution/convolution_wrw.py
python/convolution/convolution_wrw.py
+82
-0
python/ctc_loss/ctc_loss.py
python/ctc_loss/ctc_loss.py
+52
-0
python/deformattention/deform_attention.py
python/deformattention/deform_attention.py
+118
-0
python/deformattention/deform_attention_bwd.py
python/deformattention/deform_attention_bwd.py
+146
-0
python/deformconvolution/deform_convolution.py
python/deformconvolution/deform_convolution.py
+139
-0
python/deformconvolution/deform_convolution_bwd.py
python/deformconvolution/deform_convolution_bwd.py
+161
-0
python/deformconvolution/deform_convolution_wrw.py
python/deformconvolution/deform_convolution_wrw.py
+133
-0
python/fusion/add_layernorm.py
python/fusion/add_layernorm.py
+120
-0
No files found.
python/conv_fusion/conv_bias_prelu_add.py
0 → 100644
View file @
ca34d4d2
import
hipdnn
import
torch
def
build_conv_bias_prelu_add_graph
(
hipdnn_handle
,
torch_tensor_x
,
torch_tensor_w
,
torch_tensor_bias
,
torch_tensor_add
,
padding
,
stride
,
dilation
,
negative_slope
,
hipdnn_data_type
,
):
# Create graph
graph
=
hipdnn
.
pygraph
(
handle
=
hipdnn_handle
,
io_data_type
=
hipdnn_data_type
,
intermediate_data_type
=
hipdnn
.
data_type
.
FLOAT
,
compute_data_type
=
hipdnn
.
data_type
.
FLOAT
,
name
=
"conv_bias_prelu_add"
,
)
# Create hipdnn tensors
hipdnn_tensor_x
=
graph
.
tensor_like
(
torch_tensor_x
)
hipdnn_tensor_w
=
graph
.
tensor_like
(
torch_tensor_w
)
hipdnn_tensor_bias
=
graph
.
tensor_like
(
torch_tensor_bias
)
hipdnn_tensor_add
=
graph
.
tensor_like
(
torch_tensor_add
)
# Create op
hipdnn_tensor_conv_output
=
graph
.
conv_fprop
(
image
=
hipdnn_tensor_x
,
weight
=
hipdnn_tensor_w
,
padding
=
padding
,
stride
=
stride
,
dilation
=
dilation
,
name
=
"conv2d"
,
)
hipdnn_tensor_add_output
=
graph
.
add
(
a
=
hipdnn_tensor_conv_output
,
b
=
hipdnn_tensor_bias
,
name
=
"bias"
)
hipdnn_tensor_prelu_output
=
graph
.
prelu
(
input
=
hipdnn_tensor_add_output
,
negative_slope
=
negative_slope
,
name
=
"prelu"
)
hipdnn_tensor_y
=
graph
.
add
(
a
=
hipdnn_tensor_prelu_output
,
b
=
hipdnn_tensor_add
,
name
=
"add"
)
hipdnn_tensor_y
.
set_output
(
True
)
graph
.
build
(
hipdnn_handle
)
return
(
graph
,
hipdnn_tensor_x
,
hipdnn_tensor_w
,
hipdnn_tensor_bias
,
hipdnn_tensor_add
,
hipdnn_tensor_y
,
)
if
__name__
==
"__main__"
:
# Input dimensions
n
=
1
# Batch size
c
=
16
# Number of input channels
h
=
16
# Height
w
=
16
# Width
# Filter dimensions
k
=
16
# Number of output channels
r
=
3
# Filter height
s
=
3
# Filter width
# Convolution parameters
stride_h
=
1
# Height stride
stride_w
=
1
# Width stride
pad_h
=
1
# Height padding
pad_w
=
1
# Width padding
dil_h
=
1
# Height dilation
dil_w
=
1
# Width dilation
# activate parameters
negative_slope
=
0.01
# Negative slope
hipdnn_data_type
=
hipdnn
.
data_type
.
HALF
torch_data_type
=
torch
.
float16
torch_tensor_x
=
torch
.
rand
(
n
,
c
,
h
,
w
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
torch_tensor_w
=
torch
.
rand
(
k
,
c
,
r
,
s
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
torch_tensor_bias
=
torch
.
rand
(
1
,
k
,
1
,
1
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
torch_tensor_add
=
torch
.
rand
(
n
,
c
,
h
,
w
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
hipdnn_handle
=
hipdnn
.
create_handle
()
(
graph
,
hipdnn_tensor_x
,
hipdnn_tensor_w
,
hipdnn_tensor_bias
,
hipdnn_tensor_add
,
hipdnn_tensor_y
,
)
=
build_conv_bias_prelu_add_graph
(
hipdnn_handle
,
torch_tensor_x
,
torch_tensor_w
,
torch_tensor_bias
,
torch_tensor_add
,
[
pad_h
,
pad_w
],
[
stride_h
,
stride_w
],
[
dil_h
,
dil_w
],
negative_slope
,
hipdnn_data_type
,
)
torch_tensor_y
=
torch
.
empty
(
hipdnn_tensor_y
.
get_dim
(),
dtype
=
torch_data_type
,
memory_format
=
torch
.
channels_last
,
device
=
"cuda"
,
)
variant_pack
=
{
hipdnn_tensor_x
:
torch_tensor_x
.
data_ptr
(),
hipdnn_tensor_w
:
torch_tensor_w
.
data_ptr
(),
hipdnn_tensor_bias
:
torch_tensor_bias
.
data_ptr
(),
hipdnn_tensor_add
:
torch_tensor_add
.
data_ptr
(),
hipdnn_tensor_y
:
torch_tensor_y
.
data_ptr
(),
}
workspace
=
torch
.
empty
(
graph
.
get_workspace_size
(),
dtype
=
torch
.
uint8
,
device
=
"cuda"
)
graph
.
exec
(
variant_pack
=
variant_pack
,
workspace
=
workspace
.
data_ptr
())
print
(
"conv_bias_prelu_add graph execution complete."
)
python/conv_fusion/conv_bias_relu.py
0 → 100644
View file @
ca34d4d2
import
hipdnn
import
torch
def
build_conv_bias_relu_graph
(
hipdnn_handle
,
torch_tensor_x
,
torch_tensor_w
,
torch_tensor_bias
,
padding
,
stride
,
dilation
,
hipdnn_data_type
,
):
# Create graph
graph
=
hipdnn
.
pygraph
(
handle
=
hipdnn_handle
,
io_data_type
=
hipdnn_data_type
,
intermediate_data_type
=
hipdnn
.
data_type
.
FLOAT
,
compute_data_type
=
hipdnn
.
data_type
.
FLOAT
,
name
=
"conv_bias_relu"
,
)
# Create hipdnn tensors
hipdnn_tensor_x
=
graph
.
tensor_like
(
torch_tensor_x
)
hipdnn_tensor_w
=
graph
.
tensor_like
(
torch_tensor_w
)
hipdnn_tensor_bias
=
graph
.
tensor_like
(
torch_tensor_bias
)
# Create op
hipdnn_tensor_conv_output
=
graph
.
conv_fprop
(
image
=
hipdnn_tensor_x
,
weight
=
hipdnn_tensor_w
,
padding
=
padding
,
stride
=
stride
,
dilation
=
dilation
,
name
=
"conv2d"
,
)
hipdnn_tensor_add_output
=
graph
.
add
(
a
=
hipdnn_tensor_conv_output
,
b
=
hipdnn_tensor_bias
,
name
=
"bias"
)
hipdnn_tensor_y
=
graph
.
relu
(
input
=
hipdnn_tensor_add_output
,
name
=
"relu"
)
hipdnn_tensor_y
.
set_output
(
True
)
graph
.
build
(
hipdnn_handle
)
return
(
graph
,
hipdnn_tensor_x
,
hipdnn_tensor_w
,
hipdnn_tensor_bias
,
hipdnn_tensor_y
)
if
__name__
==
"__main__"
:
# Input dimensions
n
=
1
# Batch size
c
=
16
# Number of input channels
h
=
16
# Height
w
=
16
# Width
# Filter dimensions
k
=
16
# Number of output channels
r
=
3
# Filter height
s
=
3
# Filter width
# Convolution parameters
stride_h
=
1
# Height stride
stride_w
=
1
# Width stride
pad_h
=
1
# Height padding
pad_w
=
1
# Width padding
dil_h
=
1
# Height dilation
dil_w
=
1
# Width dilation
hipdnn_data_type
=
hipdnn
.
data_type
.
HALF
torch_data_type
=
torch
.
float16
torch_tensor_x
=
torch
.
rand
(
n
,
c
,
h
,
w
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
torch_tensor_w
=
torch
.
rand
(
k
,
c
,
r
,
s
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
torch_tensor_bias
=
torch
.
rand
(
1
,
k
,
1
,
1
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
hipdnn_handle
=
hipdnn
.
create_handle
()
graph
,
hipdnn_tensor_x
,
hipdnn_tensor_w
,
hipdnn_tensor_bias
,
hipdnn_tensor_y
=
(
build_conv_bias_relu_graph
(
hipdnn_handle
,
torch_tensor_x
,
torch_tensor_w
,
torch_tensor_bias
,
[
pad_h
,
pad_w
],
[
stride_h
,
stride_w
],
[
dil_h
,
dil_w
],
hipdnn_data_type
,
)
)
torch_tensor_y
=
torch
.
empty
(
hipdnn_tensor_y
.
get_dim
(),
dtype
=
torch_data_type
,
memory_format
=
torch
.
channels_last
,
device
=
"cuda"
,
)
variant_pack
=
{
hipdnn_tensor_x
:
torch_tensor_x
.
data_ptr
(),
hipdnn_tensor_w
:
torch_tensor_w
.
data_ptr
(),
hipdnn_tensor_bias
:
torch_tensor_bias
.
data_ptr
(),
hipdnn_tensor_y
:
torch_tensor_y
.
data_ptr
(),
}
workspace
=
torch
.
empty
(
graph
.
get_workspace_size
(),
dtype
=
torch
.
uint8
,
device
=
"cuda"
)
graph
.
exec
(
variant_pack
=
variant_pack
,
workspace
=
workspace
.
data_ptr
())
print
(
"conv_bias_relu graph execution complete."
)
python/conv_fusion/conv_bias_swish.py
0 → 100644
View file @
ca34d4d2
import
hipdnn
import
torch
def
build_conv_bias_swish_graph
(
hipdnn_handle
,
torch_tensor_x
,
torch_tensor_w
,
torch_tensor_bias
,
padding
,
stride
,
dilation
,
hipdnn_data_type
,
):
# Create graph
graph
=
hipdnn
.
pygraph
(
handle
=
hipdnn_handle
,
io_data_type
=
hipdnn_data_type
,
intermediate_data_type
=
hipdnn
.
data_type
.
FLOAT
,
compute_data_type
=
hipdnn
.
data_type
.
FLOAT
,
name
=
"conv_bias_swish"
,
)
# Create hipdnn tensors
hipdnn_tensor_x
=
graph
.
tensor_like
(
torch_tensor_x
)
hipdnn_tensor_w
=
graph
.
tensor_like
(
torch_tensor_w
)
hipdnn_tensor_bias
=
graph
.
tensor_like
(
torch_tensor_bias
)
# Create op
hipdnn_tensor_conv_output
=
graph
.
conv_fprop
(
image
=
hipdnn_tensor_x
,
weight
=
hipdnn_tensor_w
,
padding
=
padding
,
stride
=
stride
,
dilation
=
dilation
,
name
=
"conv2d"
,
)
hipdnn_tensor_add_output
=
graph
.
add
(
a
=
hipdnn_tensor_conv_output
,
b
=
hipdnn_tensor_bias
,
name
=
"bias"
)
hipdnn_tensor_y
=
graph
.
swish
(
input
=
hipdnn_tensor_add_output
,
name
=
"swish"
)
hipdnn_tensor_y
.
set_output
(
True
)
graph
.
build
(
hipdnn_handle
)
return
(
graph
,
hipdnn_tensor_x
,
hipdnn_tensor_w
,
hipdnn_tensor_bias
,
hipdnn_tensor_y
)
if
__name__
==
"__main__"
:
# Input dimensions
n
=
1
# Batch size
c
=
16
# Number of input channels
h
=
16
# Height
w
=
16
# Width
# Filter dimensions
k
=
16
# Number of output channels
r
=
3
# Filter height
s
=
3
# Filter width
# Convolution parameters
stride_h
=
1
# Height stride
stride_w
=
1
# Width stride
pad_h
=
1
# Height padding
pad_w
=
1
# Width padding
dil_h
=
1
# Height dilation
dil_w
=
1
# Width dilation
hipdnn_data_type
=
hipdnn
.
data_type
.
HALF
torch_data_type
=
torch
.
float16
torch_tensor_x
=
torch
.
rand
(
n
,
c
,
h
,
w
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
torch_tensor_w
=
torch
.
rand
(
k
,
c
,
r
,
s
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
torch_tensor_bias
=
torch
.
rand
(
1
,
k
,
1
,
1
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
hipdnn_handle
=
hipdnn
.
create_handle
()
graph
,
hipdnn_tensor_x
,
hipdnn_tensor_w
,
hipdnn_tensor_bias
,
hipdnn_tensor_y
=
(
build_conv_bias_swish_graph
(
hipdnn_handle
,
torch_tensor_x
,
torch_tensor_w
,
torch_tensor_bias
,
[
pad_h
,
pad_w
],
[
stride_h
,
stride_w
],
[
dil_h
,
dil_w
],
hipdnn_data_type
,
)
)
torch_tensor_y
=
torch
.
empty
(
hipdnn_tensor_y
.
get_dim
(),
dtype
=
torch_data_type
,
memory_format
=
torch
.
channels_last
,
device
=
"cuda"
,
)
variant_pack
=
{
hipdnn_tensor_x
:
torch_tensor_x
.
data_ptr
(),
hipdnn_tensor_w
:
torch_tensor_w
.
data_ptr
(),
hipdnn_tensor_bias
:
torch_tensor_bias
.
data_ptr
(),
hipdnn_tensor_y
:
torch_tensor_y
.
data_ptr
(),
}
workspace
=
torch
.
empty
(
graph
.
get_workspace_size
(),
dtype
=
torch
.
uint8
,
device
=
"cuda"
)
graph
.
exec
(
variant_pack
=
variant_pack
,
workspace
=
workspace
.
data_ptr
())
print
(
"conv_bias_swish graph execution complete."
)
python/conv_fusion/conv_bias_swish_add.py
0 → 100644
View file @
ca34d4d2
import
hipdnn
import
torch
def
build_conv_bias_swish_add_graph
(
hipdnn_handle
,
torch_tensor_x
,
torch_tensor_w
,
torch_tensor_bias
,
torch_tensor_add
,
padding
,
stride
,
dilation
,
hipdnn_data_type
,
):
# Create graph
graph
=
hipdnn
.
pygraph
(
handle
=
hipdnn_handle
,
io_data_type
=
hipdnn_data_type
,
intermediate_data_type
=
hipdnn
.
data_type
.
FLOAT
,
compute_data_type
=
hipdnn
.
data_type
.
FLOAT
,
name
=
"conv_bias_swish_add"
,
)
# Create hipdnn tensors
hipdnn_tensor_x
=
graph
.
tensor_like
(
torch_tensor_x
)
hipdnn_tensor_w
=
graph
.
tensor_like
(
torch_tensor_w
)
hipdnn_tensor_bias
=
graph
.
tensor_like
(
torch_tensor_bias
)
hipdnn_tensor_add
=
graph
.
tensor_like
(
torch_tensor_add
)
# Create op
hipdnn_tensor_conv_output
=
graph
.
conv_fprop
(
image
=
hipdnn_tensor_x
,
weight
=
hipdnn_tensor_w
,
padding
=
padding
,
stride
=
stride
,
dilation
=
dilation
,
name
=
"conv2d"
,
)
hipdnn_tensor_bias_output
=
graph
.
add
(
a
=
hipdnn_tensor_conv_output
,
b
=
hipdnn_tensor_bias
,
name
=
"bias"
)
hipdnn_tensor_swish_output
=
graph
.
swish
(
input
=
hipdnn_tensor_bias_output
,
name
=
"swish"
)
hipdnn_tensor_y
=
graph
.
add
(
a
=
hipdnn_tensor_swish_output
,
b
=
hipdnn_tensor_add
,
name
=
"add"
)
hipdnn_tensor_y
.
set_output
(
True
)
graph
.
build
(
hipdnn_handle
)
return
(
graph
,
hipdnn_tensor_x
,
hipdnn_tensor_w
,
hipdnn_tensor_bias
,
hipdnn_tensor_add
,
hipdnn_tensor_y
,
)
if
__name__
==
"__main__"
:
# Input dimensions
n
=
1
# Batch size
c
=
16
# Number of input channels
h
=
16
# Height
w
=
16
# Width
# Filter dimensions
k
=
16
# Number of output channels
r
=
3
# Filter height
s
=
3
# Filter width
# Convolution parameters
stride_h
=
1
# Height stride
stride_w
=
1
# Width stride
pad_h
=
1
# Height padding
pad_w
=
1
# Width padding
dil_h
=
1
# Height dilation
dil_w
=
1
# Width dilation
hipdnn_data_type
=
hipdnn
.
data_type
.
HALF
torch_data_type
=
torch
.
float16
torch_tensor_x
=
torch
.
rand
(
n
,
c
,
h
,
w
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
torch_tensor_w
=
torch
.
rand
(
k
,
c
,
r
,
s
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
torch_tensor_bias
=
torch
.
rand
(
1
,
k
,
1
,
1
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
torch_tensor_add
=
torch
.
rand
(
n
,
c
,
h
,
w
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
hipdnn_handle
=
hipdnn
.
create_handle
()
(
graph
,
hipdnn_tensor_x
,
hipdnn_tensor_w
,
hipdnn_tensor_bias
,
hipdnn_tensor_add
,
hipdnn_tensor_y
,
)
=
build_conv_bias_swish_add_graph
(
hipdnn_handle
,
torch_tensor_x
,
torch_tensor_w
,
torch_tensor_bias
,
torch_tensor_add
,
[
pad_h
,
pad_w
],
[
stride_h
,
stride_w
],
[
dil_h
,
dil_w
],
hipdnn_data_type
,
)
torch_tensor_y
=
torch
.
empty
(
hipdnn_tensor_y
.
get_dim
(),
dtype
=
torch_data_type
,
memory_format
=
torch
.
channels_last
,
device
=
"cuda"
,
)
variant_pack
=
{
hipdnn_tensor_x
:
torch_tensor_x
.
data_ptr
(),
hipdnn_tensor_w
:
torch_tensor_w
.
data_ptr
(),
hipdnn_tensor_bias
:
torch_tensor_bias
.
data_ptr
(),
hipdnn_tensor_add
:
torch_tensor_add
.
data_ptr
(),
hipdnn_tensor_y
:
torch_tensor_y
.
data_ptr
(),
}
workspace
=
torch
.
empty
(
graph
.
get_workspace_size
(),
dtype
=
torch
.
uint8
,
device
=
"cuda"
)
graph
.
exec
(
variant_pack
=
variant_pack
,
workspace
=
workspace
.
data_ptr
())
print
(
"conv_bias_swish_add graph execution complete."
)
python/conv_fusion/convbwd_bias_relu.py
0 → 100644
View file @
ca34d4d2
import
hipdnn
import
torch
def
build_convBwd_bias_relu_graph
(
hipdnn_handle
,
torch_tensor_x
,
torch_tensor_w
,
torch_tensor_bias
,
padding
,
stride
,
dilation
,
output_padding
,
hipdnn_data_type
,
):
# Create graph
graph
=
hipdnn
.
pygraph
(
handle
=
hipdnn_handle
,
io_data_type
=
hipdnn_data_type
,
intermediate_data_type
=
hipdnn
.
data_type
.
FLOAT
,
compute_data_type
=
hipdnn
.
data_type
.
FLOAT
,
name
=
"convBwd_bias_relu"
,
)
# Create hipdnn tensors
hipdnn_tensor_x
=
graph
.
tensor_like
(
torch_tensor_x
)
hipdnn_tensor_w
=
graph
.
tensor_like
(
torch_tensor_w
)
hipdnn_tensor_bias
=
graph
.
tensor_like
(
torch_tensor_bias
)
# Create op
hipdnn_tensor_conv_output
=
graph
.
conv_dgrad
(
loss
=
hipdnn_tensor_x
,
filter
=
hipdnn_tensor_w
,
padding
=
padding
,
stride
=
stride
,
dilation
=
dilation
,
output_padding
=
output_padding
,
name
=
"conv2d"
,
)
hipdnn_tensor_add_output
=
graph
.
add
(
a
=
hipdnn_tensor_conv_output
,
b
=
hipdnn_tensor_bias
,
name
=
"bias"
)
hipdnn_tensor_y
=
graph
.
relu
(
input
=
hipdnn_tensor_add_output
,
name
=
"relu"
)
hipdnn_tensor_y
.
set_output
(
True
)
graph
.
build
(
hipdnn_handle
)
return
(
graph
,
hipdnn_tensor_x
,
hipdnn_tensor_w
,
hipdnn_tensor_bias
,
hipdnn_tensor_y
)
if
__name__
==
"__main__"
:
# Input dimensions
n
=
1
# Batch size
c
=
32
# Number of input channels
h
=
270
# Height
w
=
480
# Width
# Filter dimensions
k
=
32
# Number of output channels
r
=
3
# Filter height
s
=
3
# Filter width
# Convolution parameters
stride_h
=
2
# Height stride
stride_w
=
2
# Width stride
pad_h
=
0
# Height padding
pad_w
=
0
# Width padding
dil_h
=
1
# Height dilation
dil_w
=
1
# Width dilation
output_padding_h
=
1
# Output height padding
output_padding_w
=
1
# Output width padding
hipdnn_data_type
=
hipdnn
.
data_type
.
HALF
torch_data_type
=
torch
.
float16
torch_tensor_x
=
torch
.
rand
(
n
,
c
,
h
,
w
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
torch_tensor_w
=
torch
.
rand
(
k
,
c
,
r
,
s
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
torch_tensor_bias
=
torch
.
rand
(
1
,
k
,
1
,
1
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
hipdnn_handle
=
hipdnn
.
create_handle
()
graph
,
hipdnn_tensor_x
,
hipdnn_tensor_w
,
hipdnn_tensor_bias
,
hipdnn_tensor_y
=
(
build_convBwd_bias_relu_graph
(
hipdnn_handle
,
torch_tensor_x
,
torch_tensor_w
,
torch_tensor_bias
,
[
pad_h
,
pad_w
],
[
stride_h
,
stride_w
],
[
dil_h
,
dil_w
],
[
output_padding_h
,
output_padding_w
],
hipdnn_data_type
,
)
)
torch_tensor_y
=
torch
.
empty
(
hipdnn_tensor_y
.
get_dim
(),
dtype
=
torch_data_type
,
memory_format
=
torch
.
channels_last
,
device
=
"cuda"
,
)
variant_pack
=
{
hipdnn_tensor_x
:
torch_tensor_x
.
data_ptr
(),
hipdnn_tensor_w
:
torch_tensor_w
.
data_ptr
(),
hipdnn_tensor_bias
:
torch_tensor_bias
.
data_ptr
(),
hipdnn_tensor_y
:
torch_tensor_y
.
data_ptr
(),
}
workspace
=
torch
.
empty
(
graph
.
get_workspace_size
(),
dtype
=
torch
.
uint8
,
device
=
"cuda"
)
graph
.
exec
(
variant_pack
=
variant_pack
,
workspace
=
workspace
.
data_ptr
())
print
(
"convBwd_bias_relu graph execution complete."
)
python/conv_fusion/convint8_bias.py
0 → 100644
View file @
ca34d4d2
import
hipdnn
import
torch
def
build_convint8_bias_graph
(
hipdnn_handle
,
torch_tensor_x
,
torch_tensor_w
,
torch_tensor_zero_point_dq
,
torch_tensor_scale_dq
,
torch_tensor_bias
,
torch_tensor_zero_point_q
,
torch_tensor_scale_q
,
padding
,
stride
,
dilation
,
hipdnn_data_type
,
):
# Create graph
graph
=
hipdnn
.
pygraph
(
handle
=
hipdnn_handle
,
io_data_type
=
hipdnn_data_type
,
intermediate_data_type
=
hipdnn_data_type
,
compute_data_type
=
hipdnn
.
data_type
.
FLOAT
,
name
=
"convint8_bias"
,
)
# Create hipdnn conv input and filter tensor with NCHWc32 layout
hipdnn_tensor_x
=
graph
.
tensor_like
(
torch_tensor_x
).
set_vector_count_and_dimension
(
32
,
1
)
hipdnn_tensor_w
=
graph
.
tensor_like
(
torch_tensor_w
).
set_vector_count_and_dimension
(
32
,
1
)
# Create conv_fprop op
hipdnn_tensor_conv_output
=
graph
.
conv_fprop
(
image
=
hipdnn_tensor_x
,
weight
=
hipdnn_tensor_w
,
padding
=
padding
,
stride
=
stride
,
dilation
=
dilation
,
name
=
"conv_fprop_node"
,
)
# Create sub node for dequantize:zero_point_dq
hipdnn_tensor_zero_point_dq
=
graph
.
tensor_like
(
torch_tensor_zero_point_dq
)
hipdnn_tensor_zero_point_dq
.
set_value
(
0.0
)
hipdnn_tensor_conv_deq_sub_output
=
graph
.
sub
(
a
=
hipdnn_tensor_conv_output
,
b
=
hipdnn_tensor_zero_point_dq
,
name
=
"conv_deq_sub_node"
)
# Create mul node for dequantize:scale_dq
hipdnn_tensor_scale_dq
=
graph
.
tensor_like
(
torch_tensor_scale_dq
)
hipdnn_tensor_scale_dq
.
set_value
(
1.0
)
hipdnn_tensor_conv_deq_mul_output
=
graph
.
mul
(
a
=
hipdnn_tensor_conv_deq_sub_output
,
b
=
hipdnn_tensor_scale_dq
,
name
=
"conv_deq_mul_node"
)
# Create bias node
hipdnn_tensor_bias
=
graph
.
tensor_like
(
torch_tensor_bias
)
hipdnn_tensor_bias_output
=
graph
.
add
(
a
=
hipdnn_tensor_conv_deq_mul_output
,
b
=
hipdnn_tensor_bias
,
name
=
"bias_node"
)
# Create div node for quantize:scale_q
hipdnn_tensor_scale_q
=
graph
.
tensor_like
(
torch_tensor_scale_q
)
hipdnn_tensor_scale_q
.
set_value
(
1.0
)
hipdnn_tensor_quantize_div_output
=
graph
.
div
(
a
=
hipdnn_tensor_bias_output
,
b
=
hipdnn_tensor_scale_q
,
name
=
"quantize_div_node"
)
# Create add node for quantize:zero_point_q
hipdnn_tensor_zero_point_q
=
graph
.
tensor_like
(
torch_tensor_zero_point_q
)
hipdnn_tensor_zero_point_q
.
set_value
(
0.0
)
hipdnn_tensor_output
=
graph
.
add
(
a
=
hipdnn_tensor_quantize_div_output
,
b
=
hipdnn_tensor_zero_point_q
,
name
=
"quantize_add_node"
)
hipdnn_tensor_output
.
set_output
(
True
).
set_vector_count_and_dimension
(
32
,
1
)
graph
.
build
(
hipdnn_handle
)
return
(
graph
,
hipdnn_tensor_x
,
hipdnn_tensor_w
,
hipdnn_tensor_bias
,
hipdnn_tensor_output
,
)
if
__name__
==
"__main__"
:
# Input dimensions
n
=
2
# Batch size
c
=
32
# Number of input channels
h
=
16
# Height
w
=
8
# Width
# Filter dimensions
k
=
128
# Number of output channels
r
=
3
# Filter height
s
=
3
# Filter width
# Convolution parameters
stride_h
=
1
# Height stride
stride_w
=
1
# Width stride
pad_h
=
0
# Height padding
pad_w
=
0
# Width padding
dil_h
=
1
# Height dilation
dil_w
=
1
# Width dilation
hipdnn_data_type
=
hipdnn
.
data_type
.
INT8
torch_data_type
=
torch
.
int8
bias_data_type
=
torch
.
float32
quantize_data_type
=
torch
.
float32
torch_tensor_x
=
torch
.
randint
(
low
=-
128
,
high
=
128
,
size
=
(
n
,
c
,
h
,
w
),
dtype
=
torch_data_type
,
device
=
"cuda"
,
)
torch_tensor_w
=
torch
.
randint
(
low
=-
128
,
high
=
128
,
size
=
(
k
,
c
,
r
,
s
),
dtype
=
torch_data_type
,
device
=
"cuda"
)
torch_tensor_zero_point_dq
=
torch
.
rand
(
1
,
1
,
1
,
1
,
device
=
"cuda"
)
torch_tensor_scale_dq
=
torch
.
rand
(
1
,
1
,
1
,
1
,
device
=
"cuda"
)
torch_tensor_bias
=
torch
.
rand
(
1
,
k
,
1
,
1
,
dtype
=
bias_data_type
,
device
=
"cuda"
)
torch_tensor_zero_point_q
=
torch
.
rand
(
1
,
1
,
1
,
1
,
device
=
"cuda"
)
torch_tensor_scale_q
=
torch
.
rand
(
1
,
1
,
1
,
1
,
device
=
"cuda"
)
hipdnn_handle
=
hipdnn
.
create_handle
()
(
graph
,
hipdnn_tensor_x
,
hipdnn_tensor_w
,
hipdnn_tensor_bias
,
hipdnn_tensor_y
,
)
=
build_convint8_bias_graph
(
hipdnn_handle
,
torch_tensor_x
,
torch_tensor_w
,
torch_tensor_zero_point_dq
,
torch_tensor_scale_dq
,
torch_tensor_bias
,
torch_tensor_zero_point_q
,
torch_tensor_scale_q
,
[
pad_h
,
pad_w
],
[
stride_h
,
stride_w
],
[
dil_h
,
dil_w
],
hipdnn_data_type
,
)
torch_tensor_y
=
torch
.
empty
(
hipdnn_tensor_y
.
get_dim
(),
dtype
=
torch_data_type
,
device
=
"cuda"
,
)
variant_pack
=
{
hipdnn_tensor_x
:
torch_tensor_x
.
data_ptr
(),
hipdnn_tensor_w
:
torch_tensor_w
.
data_ptr
(),
hipdnn_tensor_bias
:
torch_tensor_bias
.
data_ptr
(),
hipdnn_tensor_y
:
torch_tensor_y
.
data_ptr
(),
}
workspace
=
torch
.
empty
(
graph
.
get_workspace_size
(),
dtype
=
torch
.
uint8
,
device
=
"cuda"
)
graph
.
exec
(
variant_pack
=
variant_pack
,
workspace
=
workspace
.
data_ptr
())
print
(
"convint8_bias graph execution complete."
)
python/conv_fusion/convint8_bias_add.py
0 → 100644
View file @
ca34d4d2
import
hipdnn
import
torch
def
build_convint8_bias_add_graph
(
hipdnn_handle
,
torch_tensor_x
,
torch_tensor_w
,
torch_tensor_zero_point_dq
,
torch_tensor_scale_dq
,
torch_tensor_bias
,
torch_tensor_add
,
torch_tensor_zero_point_dq_add
,
torch_tensor_scale_dq_add
,
torch_tensor_zero_point_q
,
torch_tensor_scale_q
,
padding
,
stride
,
dilation
,
hipdnn_data_type
,
):
# Create graph
graph
=
hipdnn
.
pygraph
(
handle
=
hipdnn_handle
,
io_data_type
=
hipdnn_data_type
,
intermediate_data_type
=
hipdnn_data_type
,
compute_data_type
=
hipdnn
.
data_type
.
FLOAT
,
name
=
"convint8_bias_add"
,
)
# Create hipdnn conv input and filter tensor with NCHWc32 layout
hipdnn_tensor_x
=
graph
.
tensor_like
(
torch_tensor_x
).
set_vector_count_and_dimension
(
32
,
1
)
hipdnn_tensor_w
=
graph
.
tensor_like
(
torch_tensor_w
).
set_vector_count_and_dimension
(
32
,
1
)
# Create conv_fprop op
hipdnn_tensor_conv_output
=
graph
.
conv_fprop
(
image
=
hipdnn_tensor_x
,
weight
=
hipdnn_tensor_w
,
padding
=
padding
,
stride
=
stride
,
dilation
=
dilation
,
name
=
"conv_fprop_node"
,
)
# Create sub node for dequantize:zero_point_dq
hipdnn_tensor_zero_point_dq
=
graph
.
tensor_like
(
torch_tensor_zero_point_dq
)
hipdnn_tensor_zero_point_dq
.
set_value
(
0.0
)
hipdnn_tensor_conv_deq_sub_output
=
graph
.
sub
(
hipdnn_tensor_conv_output
,
hipdnn_tensor_zero_point_dq
,
name
=
"conv_deq_sub_node"
)
# Create mul node for dequantize:scale_dq
hipdnn_tensor_scale_dq
=
graph
.
tensor_like
(
torch_tensor_scale_dq
)
hipdnn_tensor_scale_dq
.
set_value
(
1.0
)
hipdnn_tensor_conv_deq_mul_output
=
graph
.
mul
(
hipdnn_tensor_conv_deq_sub_output
,
hipdnn_tensor_scale_dq
,
name
=
"conv_deq_mul_node"
)
# Create bias node
hipdnn_tensor_bias
=
graph
.
tensor_like
(
torch_tensor_bias
)
hipdnn_tensor_bias_output
=
graph
.
add
(
hipdnn_tensor_conv_deq_mul_output
,
hipdnn_tensor_bias
,
name
=
"bias_node"
)
# Cretae add original input(without dequantize)
hipdnn_tensor_add
=
graph
.
tensor_like
(
torch_tensor_add
)
hipdnn_tensor_add
.
set_vector_count_and_dimension
(
32
,
1
)
# Create sub node for dequantize:zero_point_dq_add
hipdnn_tensor_zero_point_dq_add
=
graph
.
tensor_like
(
torch_tensor_zero_point_dq_add
)
hipdnn_tensor_zero_point_dq_add
.
set_value
(
0.0
)
hipdnn_tensor_add_deq_sub_output
=
graph
.
sub
(
hipdnn_tensor_add
,
hipdnn_tensor_zero_point_dq_add
,
name
=
"add_deq_sub_node"
)
# Create mul node for dequantize:scale_dq_add
hipdnn_tensor_scale_dq_add
=
graph
.
tensor_like
(
torch_tensor_scale_dq_add
)
hipdnn_tensor_scale_dq_add
.
set_value
(
1.0
)
hipdnn_tensor_add_deq_mul_output
=
graph
.
mul
(
hipdnn_tensor_add_deq_sub_output
,
hipdnn_tensor_scale_dq_add
,
name
=
"add_deq_mul_node"
)
hipdnn_tensor_add_deq_mul_output
# Create add op
hipdnn_tensor_add_output
=
graph
.
add
(
a
=
hipdnn_tensor_bias_output
,
b
=
hipdnn_tensor_add_deq_mul_output
,
name
=
"add_node"
)
# Create div node for quantize:scale_q
hipdnn_tensor_scale_q
=
graph
.
tensor_like
(
torch_tensor_scale_q
)
hipdnn_tensor_scale_q
.
set_value
(
1.0
)
hipdnn_tensor_quantize_div_output
=
graph
.
div
(
a
=
hipdnn_tensor_add_output
,
b
=
hipdnn_tensor_scale_q
,
name
=
"quantize_div_node"
)
# Create add node for quantize:zero_point_q
hipdnn_tensor_zero_point_q
=
graph
.
tensor_like
(
torch_tensor_zero_point_q
)
hipdnn_tensor_zero_point_q
.
set_value
(
0.0
)
hipdnn_tensor_output
=
graph
.
add
(
hipdnn_tensor_quantize_div_output
,
hipdnn_tensor_zero_point_q
,
name
=
"quantize_add_node"
)
hipdnn_tensor_output
.
set_output
(
True
).
set_vector_count_and_dimension
(
32
,
1
)
graph
.
build
(
hipdnn_handle
)
return
(
graph
,
hipdnn_tensor_x
,
hipdnn_tensor_w
,
hipdnn_tensor_bias
,
hipdnn_tensor_add
,
hipdnn_tensor_output
,
)
if
__name__
==
"__main__"
:
# Input dimensions
n
=
2
# Batch size
c
=
32
# Number of input channels
h
=
16
# Height
w
=
8
# Width
# Filter dimensions
k
=
128
# Number of output channels
r
=
3
# Filter height
s
=
3
# Filter width
# Convolution parameters
stride_h
=
1
# Height stride
stride_w
=
1
# Width stride
pad_h
=
0
# Height padding
pad_w
=
0
# Width padding
dil_h
=
1
# Height dilation
dil_w
=
1
# Width dilation
out_h
=
int
(((
h
+
2
*
pad_h
-
(
dil_h
*
(
r
-
1
)
+
1
))
/
stride_h
)
+
1
)
out_w
=
int
(((
w
+
2
*
pad_w
-
(
dil_w
*
(
s
-
1
)
+
1
))
/
stride_w
)
+
1
)
hipdnn_data_type
=
hipdnn
.
data_type
.
INT8
torch_data_type
=
torch
.
int8
bias_data_type
=
torch
.
float32
quantize_data_type
=
torch
.
float32
torch_tensor_x
=
torch
.
randint
(
low
=-
128
,
high
=
128
,
size
=
(
n
,
c
,
h
,
w
),
dtype
=
torch_data_type
,
device
=
"cuda"
,
)
torch_tensor_w
=
torch
.
randint
(
low
=-
128
,
high
=
128
,
size
=
(
k
,
c
,
r
,
s
),
dtype
=
torch_data_type
,
device
=
"cuda"
)
torch_tensor_zero_point_dq
=
torch
.
rand
(
1
,
1
,
1
,
1
,
device
=
"cuda"
)
torch_tensor_scale_dq
=
torch
.
rand
(
1
,
1
,
1
,
1
,
device
=
"cuda"
)
torch_tensor_bias
=
torch
.
rand
(
1
,
k
,
1
,
1
,
dtype
=
bias_data_type
,
device
=
"cuda"
)
torch_tensor_add
=
torch
.
randint
(
low
=-
128
,
high
=
128
,
size
=
(
n
,
k
,
out_h
,
out_w
),
dtype
=
torch_data_type
,
device
=
"cuda"
)
torch_tensor_zero_point_dq_add
=
torch
.
rand
(
1
,
1
,
1
,
1
,
device
=
"cuda"
)
torch_tensor_scale_dq_add
=
torch
.
rand
(
1
,
1
,
1
,
1
,
device
=
"cuda"
)
torch_tensor_zero_point_q
=
torch
.
rand
(
1
,
1
,
1
,
1
,
device
=
"cuda"
)
torch_tensor_scale_q
=
torch
.
rand
(
1
,
1
,
1
,
1
,
device
=
"cuda"
)
hipdnn_handle
=
hipdnn
.
create_handle
()
(
graph
,
hipdnn_tensor_x
,
hipdnn_tensor_w
,
hipdnn_tensor_bias
,
hipdnn_tensor_add
,
hipdnn_tensor_y
,
)
=
build_convint8_bias_add_graph
(
hipdnn_handle
,
torch_tensor_x
,
torch_tensor_w
,
torch_tensor_zero_point_dq
,
torch_tensor_scale_dq
,
torch_tensor_bias
,
torch_tensor_add
,
torch_tensor_zero_point_dq_add
,
torch_tensor_scale_dq_add
,
torch_tensor_zero_point_q
,
torch_tensor_scale_q
,
[
pad_h
,
pad_w
],
[
stride_h
,
stride_w
],
[
dil_h
,
dil_w
],
hipdnn_data_type
,
)
torch_tensor_y
=
torch
.
empty
(
hipdnn_tensor_y
.
get_dim
(),
dtype
=
torch_data_type
,
device
=
"cuda"
,
)
variant_pack
=
{
hipdnn_tensor_x
:
torch_tensor_x
.
data_ptr
(),
hipdnn_tensor_w
:
torch_tensor_w
.
data_ptr
(),
hipdnn_tensor_bias
:
torch_tensor_bias
.
data_ptr
(),
hipdnn_tensor_add
:
torch_tensor_add
.
data_ptr
(),
hipdnn_tensor_y
:
torch_tensor_y
.
data_ptr
(),
}
workspace
=
torch
.
empty
(
graph
.
get_workspace_size
(),
dtype
=
torch
.
uint8
,
device
=
"cuda"
)
graph
.
exec
(
variant_pack
=
variant_pack
,
workspace
=
workspace
.
data_ptr
())
print
(
"convint8_bias_add graph execution complete."
)
python/conv_fusion/convint8_bias_add_relu.py
0 → 100644
View file @
ca34d4d2
import
hipdnn
import
torch
def
build_convint8_bias_add_relu_graph
(
hipdnn_handle
,
torch_tensor_x
,
torch_tensor_w
,
torch_tensor_zero_point_dq
,
torch_tensor_scale_dq
,
torch_tensor_bias
,
torch_tensor_add
,
torch_tensor_zero_point_dq_add
,
torch_tensor_scale_dq_add
,
torch_tensor_zero_point_q
,
torch_tensor_scale_q
,
padding
,
stride
,
dilation
,
hipdnn_data_type
,
):
# Create graph
graph
=
hipdnn
.
pygraph
(
handle
=
hipdnn_handle
,
io_data_type
=
hipdnn_data_type
,
intermediate_data_type
=
hipdnn_data_type
,
compute_data_type
=
hipdnn
.
data_type
.
FLOAT
,
name
=
"convint8_bias_add_relu"
,
)
# Create hipdnn conv input and filter tensor with NCHWc32 layout
hipdnn_tensor_x
=
graph
.
tensor_like
(
torch_tensor_x
).
set_vector_count_and_dimension
(
32
,
1
)
hipdnn_tensor_w
=
graph
.
tensor_like
(
torch_tensor_w
).
set_vector_count_and_dimension
(
32
,
1
)
# Create conv_fprop op
hipdnn_tensor_conv_output
=
graph
.
conv_fprop
(
image
=
hipdnn_tensor_x
,
weight
=
hipdnn_tensor_w
,
padding
=
padding
,
stride
=
stride
,
dilation
=
dilation
,
name
=
"conv_fprop_node"
,
)
# Create sub node for dequantize:zero_point_dq
hipdnn_tensor_zero_point_dq
=
graph
.
tensor_like
(
torch_tensor_zero_point_dq
)
hipdnn_tensor_zero_point_dq
.
set_value
(
0.0
)
hipdnn_tensor_conv_deq_sub_output
=
graph
.
sub
(
hipdnn_tensor_conv_output
,
hipdnn_tensor_zero_point_dq
,
name
=
"conv_deq_sub_node"
)
# Create mul node for dequantize:scale_dq
hipdnn_tensor_scale_dq
=
graph
.
tensor_like
(
torch_tensor_scale_dq
)
hipdnn_tensor_scale_dq
.
set_value
(
1.0
)
hipdnn_tensor_conv_deq_mul_output
=
graph
.
mul
(
hipdnn_tensor_conv_deq_sub_output
,
hipdnn_tensor_scale_dq
,
name
=
"conv_deq_mul_node"
)
# Create bias node
hipdnn_tensor_bias
=
graph
.
tensor_like
(
torch_tensor_bias
)
hipdnn_tensor_bias_output
=
graph
.
add
(
hipdnn_tensor_conv_deq_mul_output
,
hipdnn_tensor_bias
,
name
=
"bias_node"
)
# Cretae add original input(without dequantize)
hipdnn_tensor_add
=
graph
.
tensor_like
(
torch_tensor_add
)
hipdnn_tensor_add
.
set_vector_count_and_dimension
(
32
,
1
)
# Create sub node for dequantize:zero_point_dq_add
hipdnn_tensor_zero_point_dq_add
=
graph
.
tensor_like
(
torch_tensor_zero_point_dq_add
)
hipdnn_tensor_zero_point_dq_add
.
set_value
(
0.0
)
hipdnn_tensor_add_deq_sub_output
=
graph
.
sub
(
hipdnn_tensor_add
,
hipdnn_tensor_zero_point_dq_add
,
name
=
"add_deq_sub_node"
)
# Create mul node for dequantize:scale_dq_add
hipdnn_tensor_scale_dq_add
=
graph
.
tensor_like
(
torch_tensor_scale_dq_add
)
hipdnn_tensor_scale_dq_add
.
set_value
(
1.0
)
hipdnn_tensor_add_deq_mul_output
=
graph
.
mul
(
hipdnn_tensor_add_deq_sub_output
,
hipdnn_tensor_scale_dq_add
,
name
=
"add_deq_mul_node"
)
hipdnn_tensor_add_deq_mul_output
# Create add op
hipdnn_tensor_add_output
=
graph
.
add
(
a
=
hipdnn_tensor_bias_output
,
b
=
hipdnn_tensor_add_deq_mul_output
,
name
=
"add_node"
)
# Create relu node
hipdnn_tensor_relu_output
=
graph
.
relu
(
input
=
hipdnn_tensor_add_output
,
name
=
"relu_node"
)
# Create div node for quantize:scale_q
hipdnn_tensor_scale_q
=
graph
.
tensor_like
(
torch_tensor_scale_q
)
hipdnn_tensor_scale_q
.
set_value
(
1.0
)
hipdnn_tensor_quantize_div_output
=
graph
.
div
(
a
=
hipdnn_tensor_relu_output
,
b
=
hipdnn_tensor_scale_q
,
name
=
"quantize_div_node"
)
# Create add node for quantize:zero_point_q
hipdnn_tensor_zero_point_q
=
graph
.
tensor_like
(
torch_tensor_zero_point_q
)
hipdnn_tensor_zero_point_q
.
set_value
(
0.0
)
hipdnn_tensor_output
=
graph
.
add
(
hipdnn_tensor_quantize_div_output
,
hipdnn_tensor_zero_point_q
,
name
=
"quantize_add_node"
)
hipdnn_tensor_output
.
set_output
(
True
).
set_vector_count_and_dimension
(
32
,
1
)
graph
.
build
(
hipdnn_handle
)
return
(
graph
,
hipdnn_tensor_x
,
hipdnn_tensor_w
,
hipdnn_tensor_bias
,
hipdnn_tensor_add
,
hipdnn_tensor_output
,
)
if
__name__
==
"__main__"
:
# Input dimensions
n
=
2
# Batch size
c
=
32
# Number of input channels
h
=
16
# Height
w
=
8
# Width
# Filter dimensions
k
=
128
# Number of output channels
r
=
3
# Filter height
s
=
3
# Filter width
# Convolution parameters
stride_h
=
1
# Height stride
stride_w
=
1
# Width stride
pad_h
=
0
# Height padding
pad_w
=
0
# Width padding
dil_h
=
1
# Height dilation
dil_w
=
1
# Width dilation
out_h
=
int
(((
h
+
2
*
pad_h
-
(
dil_h
*
(
r
-
1
)
+
1
))
/
stride_h
)
+
1
)
out_w
=
int
(((
w
+
2
*
pad_w
-
(
dil_w
*
(
s
-
1
)
+
1
))
/
stride_w
)
+
1
)
hipdnn_data_type
=
hipdnn
.
data_type
.
INT8
torch_data_type
=
torch
.
int8
bias_data_type
=
torch
.
float32
quantize_data_type
=
torch
.
float32
torch_tensor_x
=
torch
.
randint
(
low
=-
128
,
high
=
128
,
size
=
(
n
,
c
,
h
,
w
),
dtype
=
torch_data_type
,
device
=
"cuda"
,
)
torch_tensor_w
=
torch
.
randint
(
low
=-
128
,
high
=
128
,
size
=
(
k
,
c
,
r
,
s
),
dtype
=
torch_data_type
,
device
=
"cuda"
)
torch_tensor_zero_point_dq
=
torch
.
rand
(
1
,
1
,
1
,
1
,
device
=
"cuda"
)
torch_tensor_scale_dq
=
torch
.
rand
(
1
,
1
,
1
,
1
,
device
=
"cuda"
)
torch_tensor_bias
=
torch
.
rand
(
1
,
k
,
1
,
1
,
dtype
=
bias_data_type
,
device
=
"cuda"
)
torch_tensor_add
=
torch
.
randint
(
low
=-
128
,
high
=
128
,
size
=
(
n
,
k
,
out_h
,
out_w
),
dtype
=
torch_data_type
,
device
=
"cuda"
)
torch_tensor_zero_point_dq_add
=
torch
.
rand
(
1
,
1
,
1
,
1
,
device
=
"cuda"
)
torch_tensor_scale_dq_add
=
torch
.
rand
(
1
,
1
,
1
,
1
,
device
=
"cuda"
)
torch_tensor_zero_point_q
=
torch
.
rand
(
1
,
1
,
1
,
1
,
device
=
"cuda"
)
torch_tensor_scale_q
=
torch
.
rand
(
1
,
1
,
1
,
1
,
device
=
"cuda"
)
hipdnn_handle
=
hipdnn
.
create_handle
()
(
graph
,
hipdnn_tensor_x
,
hipdnn_tensor_w
,
hipdnn_tensor_bias
,
hipdnn_tensor_add
,
hipdnn_tensor_y
,
)
=
build_convint8_bias_add_relu_graph
(
hipdnn_handle
,
torch_tensor_x
,
torch_tensor_w
,
torch_tensor_zero_point_dq
,
torch_tensor_scale_dq
,
torch_tensor_bias
,
torch_tensor_add
,
torch_tensor_zero_point_dq_add
,
torch_tensor_scale_dq_add
,
torch_tensor_zero_point_q
,
torch_tensor_scale_q
,
[
pad_h
,
pad_w
],
[
stride_h
,
stride_w
],
[
dil_h
,
dil_w
],
hipdnn_data_type
,
)
torch_tensor_y
=
torch
.
empty
(
hipdnn_tensor_y
.
get_dim
(),
dtype
=
torch_data_type
,
device
=
"cuda"
,
)
variant_pack
=
{
hipdnn_tensor_x
:
torch_tensor_x
.
data_ptr
(),
hipdnn_tensor_w
:
torch_tensor_w
.
data_ptr
(),
hipdnn_tensor_bias
:
torch_tensor_bias
.
data_ptr
(),
hipdnn_tensor_add
:
torch_tensor_add
.
data_ptr
(),
hipdnn_tensor_y
:
torch_tensor_y
.
data_ptr
(),
}
workspace
=
torch
.
empty
(
graph
.
get_workspace_size
(),
dtype
=
torch
.
uint8
,
device
=
"cuda"
)
graph
.
exec
(
variant_pack
=
variant_pack
,
workspace
=
workspace
.
data_ptr
())
print
(
"convint8_bias_add_relu graph execution complete."
)
python/conv_fusion/convint8_bias_relu.py
0 → 100644
View file @
ca34d4d2
import
hipdnn
import
torch
def
build_convint8_bias_relu_graph
(
hipdnn_handle
,
torch_tensor_x
,
torch_tensor_w
,
torch_tensor_zero_point_dq
,
torch_tensor_scale_dq
,
torch_tensor_bias
,
torch_tensor_zero_point_q
,
torch_tensor_scale_q
,
padding
,
stride
,
dilation
,
hipdnn_data_type
,
):
# Create graph
graph
=
hipdnn
.
pygraph
(
handle
=
hipdnn_handle
,
io_data_type
=
hipdnn_data_type
,
intermediate_data_type
=
hipdnn_data_type
,
compute_data_type
=
hipdnn
.
data_type
.
FLOAT
,
name
=
"convint8_bias_relu"
,
)
# Create hipdnn conv input and filter tensor with NCHWc32 layout
hipdnn_tensor_x
=
graph
.
tensor_like
(
torch_tensor_x
).
set_vector_count_and_dimension
(
32
,
1
)
hipdnn_tensor_w
=
graph
.
tensor_like
(
torch_tensor_w
).
set_vector_count_and_dimension
(
32
,
1
)
# Create conv_fprop op
hipdnn_tensor_conv_output
=
graph
.
conv_fprop
(
image
=
hipdnn_tensor_x
,
weight
=
hipdnn_tensor_w
,
padding
=
padding
,
stride
=
stride
,
dilation
=
dilation
,
name
=
"conv_fprop_node"
,
)
# Create sub node for dequantize:zero_point_dq
hipdnn_tensor_zero_point_dq
=
graph
.
tensor_like
(
torch_tensor_zero_point_dq
)
hipdnn_tensor_zero_point_dq
.
set_value
(
0.0
)
hipdnn_tensor_conv_deq_sub_output
=
graph
.
sub
(
a
=
hipdnn_tensor_conv_output
,
b
=
hipdnn_tensor_zero_point_dq
,
name
=
"conv_deq_sub_node"
)
# Create mul node for dequantize:scale_dq
hipdnn_tensor_scale_dq
=
graph
.
tensor_like
(
torch_tensor_scale_dq
)
hipdnn_tensor_scale_dq
.
set_value
(
1.0
)
hipdnn_tensor_conv_deq_mul_output
=
graph
.
mul
(
a
=
hipdnn_tensor_conv_deq_sub_output
,
b
=
hipdnn_tensor_scale_dq
,
name
=
"conv_deq_mul_node"
)
# Create bias node
hipdnn_tensor_bias
=
graph
.
tensor_like
(
torch_tensor_bias
)
hipdnn_tensor_bias_output
=
graph
.
add
(
a
=
hipdnn_tensor_conv_deq_mul_output
,
b
=
hipdnn_tensor_bias
,
name
=
"bias_node"
)
# Create relu node
hipdnn_tensor_relu_output
=
graph
.
relu
(
input
=
hipdnn_tensor_bias_output
,
name
=
"relu_node"
)
# Create div node for quantize:scale_q
hipdnn_tensor_scale_q
=
graph
.
tensor_like
(
torch_tensor_scale_q
)
hipdnn_tensor_scale_q
.
set_value
(
1.0
)
hipdnn_tensor_quantize_div_output
=
graph
.
div
(
a
=
hipdnn_tensor_relu_output
,
b
=
hipdnn_tensor_scale_q
,
name
=
"quantize_div_node"
)
# Create add node for quantize:zero_point_q
hipdnn_tensor_zero_point_q
=
graph
.
tensor_like
(
torch_tensor_zero_point_q
)
hipdnn_tensor_zero_point_q
.
set_value
(
0.0
)
hipdnn_tensor_output
=
graph
.
add
(
a
=
hipdnn_tensor_quantize_div_output
,
b
=
hipdnn_tensor_zero_point_q
,
name
=
"quantize_add_node"
)
hipdnn_tensor_output
.
set_output
(
True
).
set_vector_count_and_dimension
(
32
,
1
)
graph
.
build
(
hipdnn_handle
)
return
(
graph
,
hipdnn_tensor_x
,
hipdnn_tensor_w
,
hipdnn_tensor_bias
,
hipdnn_tensor_output
,
)
if
__name__
==
"__main__"
:
# Input dimensions
n
=
2
# Batch size
c
=
32
# Number of input channels
h
=
16
# Height
w
=
8
# Width
# Filter dimensions
k
=
128
# Number of output channels
r
=
3
# Filter height
s
=
3
# Filter width
# Convolution parameters
stride_h
=
1
# Height stride
stride_w
=
1
# Width stride
pad_h
=
0
# Height padding
pad_w
=
0
# Width padding
dil_h
=
1
# Height dilation
dil_w
=
1
# Width dilation
hipdnn_data_type
=
hipdnn
.
data_type
.
INT8
torch_data_type
=
torch
.
int8
bias_data_type
=
torch
.
float32
quantize_data_type
=
torch
.
float32
torch_tensor_x
=
torch
.
randint
(
low
=-
128
,
high
=
128
,
size
=
(
n
,
c
,
h
,
w
),
dtype
=
torch_data_type
,
device
=
"cuda"
,
)
torch_tensor_w
=
torch
.
randint
(
low
=-
128
,
high
=
128
,
size
=
(
k
,
c
,
r
,
s
),
dtype
=
torch_data_type
,
device
=
"cuda"
)
torch_tensor_zero_point_dq
=
torch
.
rand
(
1
,
1
,
1
,
1
,
device
=
"cuda"
)
torch_tensor_scale_dq
=
torch
.
rand
(
1
,
1
,
1
,
1
,
device
=
"cuda"
)
torch_tensor_bias
=
torch
.
rand
(
1
,
k
,
1
,
1
,
dtype
=
bias_data_type
,
device
=
"cuda"
)
torch_tensor_zero_point_q
=
torch
.
rand
(
1
,
1
,
1
,
1
,
device
=
"cuda"
)
torch_tensor_scale_q
=
torch
.
rand
(
1
,
1
,
1
,
1
,
device
=
"cuda"
)
hipdnn_handle
=
hipdnn
.
create_handle
()
(
graph
,
hipdnn_tensor_x
,
hipdnn_tensor_w
,
hipdnn_tensor_bias
,
hipdnn_tensor_y
,
)
=
build_convint8_bias_relu_graph
(
hipdnn_handle
,
torch_tensor_x
,
torch_tensor_w
,
torch_tensor_zero_point_dq
,
torch_tensor_scale_dq
,
torch_tensor_bias
,
torch_tensor_zero_point_q
,
torch_tensor_scale_q
,
[
pad_h
,
pad_w
],
[
stride_h
,
stride_w
],
[
dil_h
,
dil_w
],
hipdnn_data_type
,
)
torch_tensor_y
=
torch
.
empty
(
hipdnn_tensor_y
.
get_dim
(),
dtype
=
torch_data_type
,
device
=
"cuda"
,
)
variant_pack
=
{
hipdnn_tensor_x
:
torch_tensor_x
.
data_ptr
(),
hipdnn_tensor_w
:
torch_tensor_w
.
data_ptr
(),
hipdnn_tensor_bias
:
torch_tensor_bias
.
data_ptr
(),
hipdnn_tensor_y
:
torch_tensor_y
.
data_ptr
(),
}
workspace
=
torch
.
empty
(
graph
.
get_workspace_size
(),
dtype
=
torch
.
uint8
,
device
=
"cuda"
)
graph
.
exec
(
variant_pack
=
variant_pack
,
workspace
=
workspace
.
data_ptr
())
print
(
"convint8_bias_relu graph execution complete."
)
python/conv_fusion/convint8_bias_relu_add.py
0 → 100644
View file @
ca34d4d2
import
hipdnn
import
torch
def
build_convint8_bias_relu_add_graph
(
hipdnn_handle
,
torch_tensor_x
,
torch_tensor_w
,
torch_tensor_zero_point_dq
,
torch_tensor_scale_dq
,
torch_tensor_bias
,
torch_tensor_add
,
torch_tensor_zero_point_dq_add
,
torch_tensor_scale_dq_add
,
torch_tensor_zero_point_q
,
torch_tensor_scale_q
,
padding
,
stride
,
dilation
,
hipdnn_data_type
,
):
# Create graph
graph
=
hipdnn
.
pygraph
(
handle
=
hipdnn_handle
,
io_data_type
=
hipdnn_data_type
,
intermediate_data_type
=
hipdnn_data_type
,
compute_data_type
=
hipdnn
.
data_type
.
FLOAT
,
name
=
"convint8_bias_relu_add"
,
)
# Create hipdnn conv input and filter tensor with NCHWc32 layout
hipdnn_tensor_x
=
graph
.
tensor_like
(
torch_tensor_x
).
set_vector_count_and_dimension
(
32
,
1
)
hipdnn_tensor_w
=
graph
.
tensor_like
(
torch_tensor_w
).
set_vector_count_and_dimension
(
32
,
1
)
# Create conv_fprop op
hipdnn_tensor_conv_output
=
graph
.
conv_fprop
(
image
=
hipdnn_tensor_x
,
weight
=
hipdnn_tensor_w
,
padding
=
padding
,
stride
=
stride
,
dilation
=
dilation
,
name
=
"conv_fprop_node"
,
)
# Create sub node for dequantize:zero_point_dq
hipdnn_tensor_zero_point_dq
=
graph
.
tensor_like
(
torch_tensor_zero_point_dq
)
hipdnn_tensor_zero_point_dq
.
set_value
(
0.0
)
hipdnn_tensor_conv_deq_sub_output
=
graph
.
sub
(
hipdnn_tensor_conv_output
,
hipdnn_tensor_zero_point_dq
,
name
=
"conv_deq_sub_node"
)
# Create mul node for dequantize:scale_dq
hipdnn_tensor_scale_dq
=
graph
.
tensor_like
(
torch_tensor_scale_dq
)
hipdnn_tensor_scale_dq
.
set_value
(
1.0
)
hipdnn_tensor_conv_deq_mul_output
=
graph
.
mul
(
hipdnn_tensor_conv_deq_sub_output
,
hipdnn_tensor_scale_dq
,
name
=
"conv_deq_mul_node"
)
# Create bias node
hipdnn_tensor_bias
=
graph
.
tensor_like
(
torch_tensor_bias
)
hipdnn_tensor_bias_output
=
graph
.
add
(
hipdnn_tensor_conv_deq_mul_output
,
hipdnn_tensor_bias
,
name
=
"bias_node"
)
# Create relu node
hipdnn_tensor_relu_output
=
graph
.
relu
(
input
=
hipdnn_tensor_bias_output
,
name
=
"relu_node"
)
# Cretae add original input(without dequantize)
hipdnn_tensor_add
=
graph
.
tensor_like
(
torch_tensor_add
)
hipdnn_tensor_add
.
set_vector_count_and_dimension
(
32
,
1
)
# Create sub node for dequantize:zero_point_dq_add
hipdnn_tensor_zero_point_dq_add
=
graph
.
tensor_like
(
torch_tensor_zero_point_dq_add
)
hipdnn_tensor_zero_point_dq_add
.
set_value
(
0.0
)
hipdnn_tensor_add_deq_sub_output
=
graph
.
sub
(
hipdnn_tensor_add
,
hipdnn_tensor_zero_point_dq_add
,
name
=
"add_deq_sub_node"
)
# Create mul node for dequantize:scale_dq_add
hipdnn_tensor_scale_dq_add
=
graph
.
tensor_like
(
torch_tensor_scale_dq_add
)
hipdnn_tensor_scale_dq_add
.
set_value
(
1.0
)
hipdnn_tensor_add_deq_mul_output
=
graph
.
mul
(
hipdnn_tensor_add_deq_sub_output
,
hipdnn_tensor_scale_dq_add
,
name
=
"add_deq_mul_node"
)
hipdnn_tensor_add_deq_mul_output
# Create add op
hipdnn_tensor_add_output
=
graph
.
add
(
a
=
hipdnn_tensor_relu_output
,
b
=
hipdnn_tensor_add_deq_mul_output
,
name
=
"add_node"
)
# Create div node for quantize:scale_q
hipdnn_tensor_scale_q
=
graph
.
tensor_like
(
torch_tensor_scale_q
)
hipdnn_tensor_scale_q
.
set_value
(
1.0
)
hipdnn_tensor_quantize_div_output
=
graph
.
div
(
a
=
hipdnn_tensor_add_output
,
b
=
hipdnn_tensor_scale_q
,
name
=
"quantize_div_node"
)
# Create add node for quantize:zero_point_q
hipdnn_tensor_zero_point_q
=
graph
.
tensor_like
(
torch_tensor_zero_point_q
)
hipdnn_tensor_zero_point_q
.
set_value
(
0.0
)
hipdnn_tensor_output
=
graph
.
add
(
hipdnn_tensor_quantize_div_output
,
hipdnn_tensor_zero_point_q
,
name
=
"quantize_add_node"
)
hipdnn_tensor_output
.
set_output
(
True
).
set_vector_count_and_dimension
(
32
,
1
)
graph
.
build
(
hipdnn_handle
)
return
(
graph
,
hipdnn_tensor_x
,
hipdnn_tensor_w
,
hipdnn_tensor_bias
,
hipdnn_tensor_add
,
hipdnn_tensor_output
,
)
if
__name__
==
"__main__"
:
# Input dimensions
n
=
2
# Batch size
c
=
32
# Number of input channels
h
=
16
# Height
w
=
8
# Width
# Filter dimensions
k
=
128
# Number of output channels
r
=
3
# Filter height
s
=
3
# Filter width
# Convolution parameters
stride_h
=
1
# Height stride
stride_w
=
1
# Width stride
pad_h
=
0
# Height padding
pad_w
=
0
# Width padding
dil_h
=
1
# Height dilation
dil_w
=
1
# Width dilation
out_h
=
int
(((
h
+
2
*
pad_h
-
(
dil_h
*
(
r
-
1
)
+
1
))
/
stride_h
)
+
1
)
out_w
=
int
(((
w
+
2
*
pad_w
-
(
dil_w
*
(
s
-
1
)
+
1
))
/
stride_w
)
+
1
)
hipdnn_data_type
=
hipdnn
.
data_type
.
INT8
torch_data_type
=
torch
.
int8
bias_data_type
=
torch
.
float32
quantize_data_type
=
torch
.
float32
torch_tensor_x
=
torch
.
randint
(
low
=-
128
,
high
=
128
,
size
=
(
n
,
c
,
h
,
w
),
dtype
=
torch_data_type
,
device
=
"cuda"
,
)
torch_tensor_w
=
torch
.
randint
(
low
=-
128
,
high
=
128
,
size
=
(
k
,
c
,
r
,
s
),
dtype
=
torch_data_type
,
device
=
"cuda"
)
torch_tensor_zero_point_dq
=
torch
.
rand
(
1
,
1
,
1
,
1
,
device
=
"cuda"
)
torch_tensor_scale_dq
=
torch
.
rand
(
1
,
1
,
1
,
1
,
device
=
"cuda"
)
torch_tensor_bias
=
torch
.
rand
(
1
,
k
,
1
,
1
,
dtype
=
bias_data_type
,
device
=
"cuda"
)
torch_tensor_add
=
torch
.
randint
(
low
=-
128
,
high
=
128
,
size
=
(
n
,
k
,
out_h
,
out_w
),
dtype
=
torch_data_type
,
device
=
"cuda"
)
torch_tensor_zero_point_dq_add
=
torch
.
rand
(
1
,
1
,
1
,
1
,
device
=
"cuda"
)
torch_tensor_scale_dq_add
=
torch
.
rand
(
1
,
1
,
1
,
1
,
device
=
"cuda"
)
torch_tensor_zero_point_q
=
torch
.
rand
(
1
,
1
,
1
,
1
,
device
=
"cuda"
)
torch_tensor_scale_q
=
torch
.
rand
(
1
,
1
,
1
,
1
,
device
=
"cuda"
)
hipdnn_handle
=
hipdnn
.
create_handle
()
(
graph
,
hipdnn_tensor_x
,
hipdnn_tensor_w
,
hipdnn_tensor_bias
,
hipdnn_tensor_add
,
hipdnn_tensor_y
,
)
=
build_convint8_bias_relu_add_graph
(
hipdnn_handle
,
torch_tensor_x
,
torch_tensor_w
,
torch_tensor_zero_point_dq
,
torch_tensor_scale_dq
,
torch_tensor_bias
,
torch_tensor_add
,
torch_tensor_zero_point_dq_add
,
torch_tensor_scale_dq_add
,
torch_tensor_zero_point_q
,
torch_tensor_scale_q
,
[
pad_h
,
pad_w
],
[
stride_h
,
stride_w
],
[
dil_h
,
dil_w
],
hipdnn_data_type
,
)
torch_tensor_y
=
torch
.
empty
(
hipdnn_tensor_y
.
get_dim
(),
dtype
=
torch_data_type
,
device
=
"cuda"
,
)
variant_pack
=
{
hipdnn_tensor_x
:
torch_tensor_x
.
data_ptr
(),
hipdnn_tensor_w
:
torch_tensor_w
.
data_ptr
(),
hipdnn_tensor_bias
:
torch_tensor_bias
.
data_ptr
(),
hipdnn_tensor_add
:
torch_tensor_add
.
data_ptr
(),
hipdnn_tensor_y
:
torch_tensor_y
.
data_ptr
(),
}
workspace
=
torch
.
empty
(
graph
.
get_workspace_size
(),
dtype
=
torch
.
uint8
,
device
=
"cuda"
)
graph
.
exec
(
variant_pack
=
variant_pack
,
workspace
=
workspace
.
data_ptr
())
print
(
"convint8_bias_relu_add graph execution complete."
)
python/convolution/convolution_bwd.py
0 → 100644
View file @
ca34d4d2
import
hipdnn
import
torch
def
build_conv_backward_graph
(
hipdnn_handle
,
torch_tensor_dy
,
torch_tensor_w
,
padding
,
stride
,
dilation
,
hipdnn_data_type
):
# Create graph
graph
=
hipdnn
.
pygraph
(
handle
=
hipdnn_handle
,
io_data_type
=
hipdnn_data_type
,
intermediate_data_type
=
hipdnn
.
data_type
.
FLOAT
,
compute_data_type
=
hipdnn
.
data_type
.
FLOAT
,
name
=
"convolution_backward"
,
)
# Create hipdnn tensors
hipdnn_tensor_dy
=
graph
.
tensor_like
(
torch_tensor_dy
)
hipdnn_tensor_w
=
graph
.
tensor_like
(
torch_tensor_w
)
# Create conv op
hipdnn_tensor_dx
=
graph
.
conv_dgrad
(
loss
=
hipdnn_tensor_dy
,
filter
=
hipdnn_tensor_w
,
padding
=
padding
,
stride
=
stride
,
dilation
=
dilation
,
name
=
"conv2d_backward"
,
)
hipdnn_tensor_dx
.
set_output
(
True
)
graph
.
build
(
hipdnn_handle
)
return
(
graph
,
hipdnn_tensor_dy
,
hipdnn_tensor_w
,
hipdnn_tensor_dx
)
if
__name__
==
"__main__"
:
# Input dimensions
n
=
4
# Batch size
c
=
32
# Number of input channels
h
=
16
# Height
w
=
16
# Width
# Filter dimensions
k
=
64
# Number of output channels
r
=
3
# Filter height
s
=
3
# Filter width
# Convolution parameters
stride_h
=
1
# Height stride
stride_w
=
1
# Width stride
pad_h
=
1
# Height padding
pad_w
=
1
# Width padding
dil_h
=
1
# Height dilation
dil_w
=
1
# Width dilation
hipdnn_data_type
=
hipdnn
.
data_type
.
FLOAT
torch_data_type
=
torch
.
float32
torch_tensor_dy
=
torch
.
rand
(
n
,
k
,
h
,
w
,
dtype
=
torch_data_type
,
device
=
"cuda"
)
torch_tensor_w
=
torch
.
rand
(
k
,
c
,
r
,
s
,
dtype
=
torch_data_type
,
device
=
"cuda"
)
hipdnn_handle
=
hipdnn
.
create_handle
()
graph
,
hipdnn_tensor_dy
,
hipdnn_tensor_w
,
hipdnn_tensor_dx
=
build_conv_backward_graph
(
hipdnn_handle
,
torch_tensor_dy
,
torch_tensor_w
,
[
pad_h
,
pad_w
],
[
stride_h
,
stride_w
],
[
dil_h
,
dil_w
],
hipdnn_data_type
,
)
torch_tensor_dx
=
torch
.
empty
(
hipdnn_tensor_dx
.
get_dim
(),
dtype
=
torch_data_type
,
device
=
"cuda"
)
variant_pack
=
{
hipdnn_tensor_dy
:
torch_tensor_dy
.
data_ptr
(),
hipdnn_tensor_w
:
torch_tensor_w
.
data_ptr
(),
hipdnn_tensor_dx
:
torch_tensor_dx
.
data_ptr
(),
}
workspace
=
torch
.
empty
(
graph
.
get_workspace_size
(),
dtype
=
torch
.
uint8
,
device
=
"cuda"
)
graph
.
exec
(
variant_pack
=
variant_pack
,
workspace
=
workspace
.
data_ptr
())
print
(
"Convolution backward graph execution complete."
)
python/convolution/convolution_fwd.py
0 → 100644
View file @
ca34d4d2
import
hipdnn
import
torch
def
build_conv_forward_graph
(
hipdnn_handle
,
torch_tensor_x
,
torch_tensor_w
,
padding
,
stride
,
dilation
,
hipdnn_data_type
):
# Create graph
graph
=
hipdnn
.
pygraph
(
handle
=
hipdnn_handle
,
io_data_type
=
hipdnn_data_type
,
intermediate_data_type
=
hipdnn
.
data_type
.
FLOAT
,
compute_data_type
=
hipdnn
.
data_type
.
FLOAT
,
name
=
"convolution_forward"
,
)
# Create hipdnn tensors
hipdnn_tensor_x
=
graph
.
tensor_like
(
torch_tensor_x
)
hipdnn_tensor_w
=
graph
.
tensor_like
(
torch_tensor_w
)
# Create conv op
hipdnn_tensor_y
=
graph
.
conv_fprop
(
image
=
hipdnn_tensor_x
,
weight
=
hipdnn_tensor_w
,
padding
=
padding
,
stride
=
stride
,
dilation
=
dilation
,
name
=
"conv2d_forward"
,
)
hipdnn_tensor_y
.
set_output
(
True
)
graph
.
build
(
hipdnn_handle
)
return
(
graph
,
hipdnn_tensor_x
,
hipdnn_tensor_w
,
hipdnn_tensor_y
)
if
__name__
==
"__main__"
:
# Input dimensions
n
=
4
# Batch size
c
=
16
# Number of input channels
h
=
56
# Height
w
=
56
# Width
# Filter dimensions
k
=
4
# Number of output channels
r
=
1
# Filter height
s
=
1
# Filter width
# Convolution parameters
stride_h
=
1
# Height stride
stride_w
=
1
# Width stride
pad_h
=
1
# Height padding
pad_w
=
1
# Width padding
dil_h
=
1
# Height dilation
dil_w
=
1
# Width dilation
hipdnn_data_type
=
hipdnn
.
data_type
.
FLOAT
torch_data_type
=
torch
.
float32
torch_tensor_x
=
torch
.
rand
(
n
,
c
,
h
,
w
,
dtype
=
torch_data_type
,
device
=
"cuda"
)
torch_tensor_w
=
torch
.
rand
(
k
,
c
,
r
,
s
,
dtype
=
torch_data_type
,
device
=
"cuda"
)
hipdnn_handle
=
hipdnn
.
create_handle
()
graph
,
hipdnn_tensor_x
,
hipdnn_tensor_w
,
hipdnn_tensor_y
=
build_conv_forward_graph
(
hipdnn_handle
,
torch_tensor_x
,
torch_tensor_w
,
[
pad_h
,
pad_w
],
[
stride_h
,
stride_w
],
[
dil_h
,
dil_w
],
hipdnn_data_type
,
)
torch_tensor_y
=
torch
.
empty
(
hipdnn_tensor_y
.
get_dim
(),
dtype
=
torch_data_type
,
device
=
"cuda"
)
variant_pack
=
{
hipdnn_tensor_x
:
torch_tensor_x
.
data_ptr
(),
hipdnn_tensor_w
:
torch_tensor_w
.
data_ptr
(),
hipdnn_tensor_y
:
torch_tensor_y
.
data_ptr
(),
}
workspace
=
torch
.
empty
(
graph
.
get_workspace_size
(),
dtype
=
torch
.
uint8
,
device
=
"cuda"
)
graph
.
exec
(
variant_pack
=
variant_pack
,
workspace
=
workspace
.
data_ptr
())
print
(
"Convolution forward graph execution complete."
)
python/convolution/convolution_wrw.py
0 → 100644
View file @
ca34d4d2
import
hipdnn
import
torch
def
build_conv_wrw_graph
(
hipdnn_handle
,
torch_tensor_x
,
torch_tensor_dy
,
padding
,
stride
,
dilation
,
hipdnn_data_type
):
# Create graph
graph
=
hipdnn
.
pygraph
(
handle
=
hipdnn_handle
,
io_data_type
=
hipdnn_data_type
,
intermediate_data_type
=
hipdnn
.
data_type
.
FLOAT
,
compute_data_type
=
hipdnn
.
data_type
.
FLOAT
,
name
=
"convolution_wrw"
,
)
# Create hipdnn tensors
hipdnn_tensor_x
=
graph
.
tensor_like
(
torch_tensor_x
)
hipdnn_tensor_dy
=
graph
.
tensor_like
(
torch_tensor_dy
)
# Create conv op
hipdnn_tensor_dw
=
graph
.
conv_wgrad
(
image
=
hipdnn_tensor_x
,
loss
=
hipdnn_tensor_dy
,
padding
=
padding
,
stride
=
stride
,
dilation
=
dilation
,
name
=
"conv2d_wrw"
,
)
hipdnn_tensor_dw
.
set_output
(
True
)
graph
.
build
(
hipdnn_handle
)
return
(
graph
,
hipdnn_tensor_x
,
hipdnn_tensor_dy
,
hipdnn_tensor_dw
)
if
__name__
==
"__main__"
:
# Input dimensions
n
=
4
# Batch size
c
=
32
# Number of input channels
h
=
16
# Height
w
=
16
# Width
# Filter dimensions
k
=
64
# Number of output channels
r
=
3
# Filter height
s
=
3
# Filter width
# Convolution parameters
stride_h
=
1
# Height stride
stride_w
=
1
# Width stride
pad_h
=
1
# Height padding
pad_w
=
1
# Width padding
dil_h
=
1
# Height dilation
dil_w
=
1
# Width dilation
hipdnn_data_type
=
hipdnn
.
data_type
.
FLOAT
torch_data_type
=
torch
.
float32
torch_tensor_x
=
torch
.
rand
(
n
,
c
,
h
,
w
,
dtype
=
torch_data_type
,
device
=
"cuda"
)
torch_tensor_dy
=
torch
.
rand
(
n
,
k
,
h
,
w
,
dtype
=
torch_data_type
,
device
=
"cuda"
)
hipdnn_handle
=
hipdnn
.
create_handle
()
graph
,
hipdnn_tensor_x
,
hipdnn_tensor_dy
,
hipdnn_tensor_dw
=
build_conv_wrw_graph
(
hipdnn_handle
,
torch_tensor_x
,
torch_tensor_dy
,
[
pad_h
,
pad_w
],
[
stride_h
,
stride_w
],
[
dil_h
,
dil_w
],
hipdnn_data_type
,
)
torch_tensor_dw
=
torch
.
empty
(
hipdnn_tensor_dw
.
get_dim
(),
dtype
=
torch_data_type
,
device
=
"cuda"
)
variant_pack
=
{
hipdnn_tensor_x
:
torch_tensor_x
.
data_ptr
(),
hipdnn_tensor_dy
:
torch_tensor_dy
.
data_ptr
(),
hipdnn_tensor_dw
:
torch_tensor_dw
.
data_ptr
(),
}
workspace
=
torch
.
empty
(
graph
.
get_workspace_size
(),
dtype
=
torch
.
uint8
,
device
=
"cuda"
)
graph
.
exec
(
variant_pack
=
variant_pack
,
workspace
=
workspace
.
data_ptr
())
print
(
"Convolution wrw graph execution complete."
)
python/ctc_loss/ctc_loss.py
0 → 100644
View file @
ca34d4d2
import
hipdnn
import
torch
def
build_ctc_loss_graph
(
hipdnn_handle
,
torch_tensor_probs
,
hipdnn_data_type
):
graph
=
hipdnn
.
pygraph
(
handle
=
hipdnn_handle
,
io_data_type
=
hipdnn_data_type
,
intermediate_data_type
=
hipdnn
.
data_type
.
FLOAT
,
compute_data_type
=
hipdnn
.
data_type
.
FLOAT
,
name
=
"ctc_loss_inference"
,
)
hipdnn_tensor_probs
=
graph
.
tensor_like
(
torch_tensor_probs
)
losses
,
gradients
=
graph
.
ctc_loss
(
probs
=
hipdnn_tensor_probs
,
blank_label_id
=
0
,
apply_softmax
=
False
,
algo
=
0
,
labels
=
[
1
,
2
,
3
,
4
,
2
,
3
,
2
],
label_lengths
=
[
1
,
2
,
1
,
3
],
input_lengths
=
[
4
,
100
,
100
,
200
],
name
=
"ctc_loss"
,
)
losses
.
set_output
(
True
)
gradients
.
set_output
(
True
)
graph
.
build
(
hipdnn_handle
)
return
(
graph
,
hipdnn_tensor_probs
,
losses
,
gradients
)
if
__name__
==
"__main__"
:
batch
,
max_time
,
num_classes
=
4
,
500
,
5
hipdnn_data_type
=
hipdnn
.
data_type
.
FLOAT
torch_data_type
=
torch
.
float32
torch_tensor_probs
=
torch
.
rand
(
max_time
,
batch
,
num_classes
,
dtype
=
torch_data_type
,
device
=
"cuda"
)
hipdnn_handle
=
hipdnn
.
create_handle
()
graph
,
hipdnn_tensor_probs
,
hipdnn_tensor_losses
,
hipdnn_tensor_gradients
=
(
build_ctc_loss_graph
(
hipdnn_handle
,
torch_tensor_probs
,
hipdnn_data_type
)
)
torch_tensor_losses
=
torch
.
empty
(
batch
,
dtype
=
torch_data_type
,
device
=
"cuda"
)
torch_tensor_gradients
=
torch
.
empty
(
batch
,
max_time
,
num_classes
,
dtype
=
torch_data_type
,
device
=
"cuda"
)
variant_pack
=
{
hipdnn_tensor_probs
:
torch_tensor_probs
.
data_ptr
(),
hipdnn_tensor_losses
:
torch_tensor_losses
.
data_ptr
(),
hipdnn_tensor_gradients
:
torch_tensor_gradients
.
data_ptr
(),
}
workspace
=
torch
.
empty
(
graph
.
get_workspace_size
(),
dtype
=
torch
.
uint8
,
device
=
"cuda"
)
graph
.
exec
(
variant_pack
=
variant_pack
,
workspace
=
workspace
.
data_ptr
())
print
(
"ctc_loss graph execution complete."
)
python/deformattention/deform_attention.py
0 → 100644
View file @
ca34d4d2
import
hipdnn
import
torch
def
build_deform_attention_graph
(
hipdnn_handle
,
torch_tensor_value
,
torch_tensor_spatial_shapes
,
torch_tensor_level_start_index
,
torch_tensor_sampling_locations
,
torch_tensor_attention_weights
,
hipdnn_data_type
,
):
# Create graph
graph
=
hipdnn
.
pygraph
(
handle
=
hipdnn_handle
,
io_data_type
=
hipdnn_data_type
,
intermediate_data_type
=
hipdnn
.
data_type
.
FLOAT
,
compute_data_type
=
hipdnn
.
data_type
.
FLOAT
,
name
=
"deform_attention"
,
)
# Create hipdnn tensors
hipdnn_tensor_value
=
graph
.
tensor_like
(
torch_tensor_value
)
hipdnn_tensor_spatial_shapes
=
graph
.
tensor_like
(
torch_tensor_spatial_shapes
)
hipdnn_tensor_level_start_index
=
graph
.
tensor_like
(
torch_tensor_level_start_index
)
hipdnn_tensor_sampling_locations
=
graph
.
tensor_like
(
torch_tensor_sampling_locations
)
hipdnn_tensor_attention_weights
=
graph
.
tensor_like
(
torch_tensor_attention_weights
)
# Create deform attn op
hipdnn_tensor_y
=
graph
.
deform_attn_fprop
(
value
=
hipdnn_tensor_value
,
spatial_shapes
=
hipdnn_tensor_spatial_shapes
,
level_start_index
=
hipdnn_tensor_level_start_index
,
sampling_locations
=
hipdnn_tensor_sampling_locations
,
attention_weights
=
hipdnn_tensor_attention_weights
,
name
=
"deform_attn_fprop"
,
)
hipdnn_tensor_y
.
set_output
(
True
)
graph
.
build
(
hipdnn_handle
)
return
(
graph
,
hipdnn_tensor_value
,
hipdnn_tensor_spatial_shapes
,
hipdnn_tensor_level_start_index
,
hipdnn_tensor_sampling_locations
,
hipdnn_tensor_attention_weights
,
hipdnn_tensor_y
,
)
if
__name__
==
"__main__"
:
# Input dimensions
n
=
2
# batch size
n_heads
=
2
embed_dims_per_head
=
32
embed_dims
=
n_heads
*
embed_dims_per_head
n_levels
=
2
n_points
=
2
n_queries
=
32
spatial_shapes_cpu
=
torch
.
randint
(
low
=
1
,
high
=
16
,
size
=
(
n_levels
,
2
),
dtype
=
torch
.
int64
)
# calculate n_keys based on spatial_shapes_cpu
n_keys
=
spatial_shapes_cpu
.
prod
(
dim
=
1
).
sum
()
# calculate level_start_index based on spatial_shapes_cpu
count_per_level
=
spatial_shapes_cpu
.
prod
(
dim
=
1
)
level_start_index_cpu
=
torch
.
zeros_like
(
count_per_level
)
level_start_index_cpu
[
1
:]
=
torch
.
cumsum
(
count_per_level
[:
-
1
],
dim
=
0
)
hipdnn_data_type
=
hipdnn
.
data_type
.
FLOAT
torch_data_type
=
torch
.
float32
torch_tensor_value
=
torch
.
rand
(
n
,
n_keys
,
n_heads
,
embed_dims_per_head
,
dtype
=
torch_data_type
,
device
=
"cuda"
)
torch_tensor_spatial_shapes
=
spatial_shapes_cpu
.
to
(
"cuda"
)
torch_tensor_level_start_index
=
level_start_index_cpu
.
to
(
"cuda"
)
torch_tensor_sampling_locations
=
torch
.
rand
(
n
,
n_queries
,
n_heads
,
n_levels
,
n_points
,
2
,
dtype
=
torch_data_type
,
device
=
"cuda"
)
torch_tensor_attention_weights
=
torch
.
rand
(
n
,
n_queries
,
n_heads
,
n_levels
,
n_points
,
dtype
=
torch_data_type
,
device
=
"cuda"
)
hipdnn_handle
=
hipdnn
.
create_handle
()
(
graph
,
hipdnn_tensor_value
,
hipdnn_tensor_spatial_shapes
,
hipdnn_tensor_level_start_index
,
hipdnn_tensor_sampling_locations
,
hipdnn_tensor_attention_weights
,
hipdnn_tensor_y
,
)
=
build_deform_attention_graph
(
hipdnn_handle
,
torch_tensor_value
,
torch_tensor_spatial_shapes
,
torch_tensor_level_start_index
,
torch_tensor_sampling_locations
,
torch_tensor_attention_weights
,
hipdnn_data_type
,
)
torch_tensor_y
=
torch
.
empty
(
hipdnn_tensor_y
.
get_dim
(),
dtype
=
torch_data_type
,
device
=
"cuda"
)
variant_pack
=
{
hipdnn_tensor_value
:
torch_tensor_value
.
data_ptr
(),
hipdnn_tensor_spatial_shapes
:
torch_tensor_spatial_shapes
.
data_ptr
(),
hipdnn_tensor_level_start_index
:
torch_tensor_level_start_index
.
data_ptr
(),
hipdnn_tensor_sampling_locations
:
torch_tensor_sampling_locations
.
data_ptr
(),
hipdnn_tensor_attention_weights
:
torch_tensor_attention_weights
.
data_ptr
(),
hipdnn_tensor_y
:
torch_tensor_y
.
data_ptr
(),
}
workspace
=
torch
.
empty
(
graph
.
get_workspace_size
(),
dtype
=
torch
.
uint8
,
device
=
"cuda"
)
graph
.
exec
(
variant_pack
=
variant_pack
,
workspace
=
workspace
.
data_ptr
())
print
(
"Deform attention graph execution complete."
)
python/deformattention/deform_attention_bwd.py
0 → 100644
View file @
ca34d4d2
import
hipdnn
import
torch
def
build_deform_attention_bwd_graph
(
hipdnn_handle
,
torch_tensor_value
,
torch_tensor_spatial_shapes
,
torch_tensor_level_start_index
,
torch_tensor_sampling_locations
,
torch_tensor_attention_weights
,
torch_tensor_grad_output
,
hipdnn_data_type
,
):
# Create graph
graph
=
hipdnn
.
pygraph
(
handle
=
hipdnn_handle
,
io_data_type
=
hipdnn_data_type
,
intermediate_data_type
=
hipdnn
.
data_type
.
FLOAT
,
compute_data_type
=
hipdnn
.
data_type
.
FLOAT
,
name
=
"deform_attention_bwd"
,
)
# Create hipdnn tensors
hipdnn_tensor_value
=
graph
.
tensor_like
(
torch_tensor_value
)
hipdnn_tensor_spatial_shapes
=
graph
.
tensor_like
(
torch_tensor_spatial_shapes
)
hipdnn_tensor_level_start_index
=
graph
.
tensor_like
(
torch_tensor_level_start_index
)
hipdnn_tensor_sampling_locations
=
graph
.
tensor_like
(
torch_tensor_sampling_locations
)
hipdnn_tensor_attention_weights
=
graph
.
tensor_like
(
torch_tensor_attention_weights
)
hipdnn_tensor_grad_output
=
graph
.
tensor_like
(
torch_tensor_grad_output
)
# Create deform attn op
hipdnn_tensor_grad_value
,
hipdnn_tensor_grad_sampling_loc
,
hipdnn_tensor_grad_attn_weight
=
(
graph
.
deform_attn_dgrad
(
value
=
hipdnn_tensor_value
,
spatial_shapes
=
hipdnn_tensor_spatial_shapes
,
level_start_index
=
hipdnn_tensor_level_start_index
,
sampling_locations
=
hipdnn_tensor_sampling_locations
,
attention_weights
=
hipdnn_tensor_attention_weights
,
grad_output
=
hipdnn_tensor_grad_output
,
name
=
"deform_attn_dgrad"
,
)
)
hipdnn_tensor_grad_value
.
set_output
(
True
)
hipdnn_tensor_grad_sampling_loc
.
set_output
(
True
)
hipdnn_tensor_grad_attn_weight
.
set_output
(
True
)
graph
.
build
(
hipdnn_handle
)
return
(
graph
,
hipdnn_tensor_value
,
hipdnn_tensor_spatial_shapes
,
hipdnn_tensor_level_start_index
,
hipdnn_tensor_sampling_locations
,
hipdnn_tensor_attention_weights
,
hipdnn_tensor_grad_output
,
hipdnn_tensor_grad_value
,
hipdnn_tensor_grad_sampling_loc
,
hipdnn_tensor_grad_attn_weight
,
)
if
__name__
==
"__main__"
:
# Input dimensions
n
=
2
# batch size
n_heads
=
2
embed_dims_per_head
=
32
embed_dims
=
n_heads
*
embed_dims_per_head
n_levels
=
2
n_points
=
2
n_queries
=
32
spatial_shapes_cpu
=
torch
.
randint
(
low
=
1
,
high
=
16
,
size
=
(
n_levels
,
2
),
dtype
=
torch
.
int64
)
# calculate n_keys based on spatial_shapes_cpu
n_keys
=
spatial_shapes_cpu
.
prod
(
dim
=
1
).
sum
()
# calculate level_start_index based on spatial_shapes_cpu
count_per_level
=
spatial_shapes_cpu
.
prod
(
dim
=
1
)
level_start_index_cpu
=
torch
.
zeros_like
(
count_per_level
)
level_start_index_cpu
[
1
:]
=
torch
.
cumsum
(
count_per_level
[:
-
1
],
dim
=
0
)
hipdnn_data_type
=
hipdnn
.
data_type
.
FLOAT
torch_data_type
=
torch
.
float32
torch_tensor_value
=
torch
.
rand
(
n
,
n_keys
,
n_heads
,
embed_dims_per_head
,
dtype
=
torch_data_type
,
device
=
"cuda"
)
torch_tensor_spatial_shapes
=
spatial_shapes_cpu
.
to
(
"cuda"
)
torch_tensor_level_start_index
=
level_start_index_cpu
.
to
(
"cuda"
)
torch_tensor_sampling_locations
=
torch
.
rand
(
n
,
n_queries
,
n_heads
,
n_levels
,
n_points
,
2
,
dtype
=
torch_data_type
,
device
=
"cuda"
)
torch_tensor_attention_weights
=
torch
.
rand
(
n
,
n_queries
,
n_heads
,
n_levels
,
n_points
,
dtype
=
torch_data_type
,
device
=
"cuda"
)
torch_tensor_grad_output
=
torch
.
rand
(
n
,
n_queries
,
embed_dims
,
dtype
=
torch_data_type
,
device
=
"cuda"
)
hipdnn_handle
=
hipdnn
.
create_handle
()
(
graph
,
hipdnn_tensor_value
,
hipdnn_tensor_spatial_shapes
,
hipdnn_tensor_level_start_index
,
hipdnn_tensor_sampling_locations
,
hipdnn_tensor_attention_weights
,
hipdnn_tensor_grad_output
,
hipdnn_tensor_grad_value
,
hipdnn_tensor_grad_sampling_loc
,
hipdnn_tensor_grad_attn_weight
,
)
=
build_deform_attention_bwd_graph
(
hipdnn_handle
,
torch_tensor_value
,
torch_tensor_spatial_shapes
,
torch_tensor_level_start_index
,
torch_tensor_sampling_locations
,
torch_tensor_attention_weights
,
torch_tensor_grad_output
,
hipdnn_data_type
,
)
torch_tensor_grad_value
=
torch
.
empty
(
hipdnn_tensor_grad_value
.
get_dim
(),
dtype
=
torch_data_type
,
device
=
"cuda"
)
torch_tensor_grad_sampling_loc
=
torch
.
empty
(
hipdnn_tensor_grad_sampling_loc
.
get_dim
(),
dtype
=
torch_data_type
,
device
=
"cuda"
)
torch_tensor_grad_attn_weight
=
torch
.
empty
(
hipdnn_tensor_grad_attn_weight
.
get_dim
(),
dtype
=
torch_data_type
,
device
=
"cuda"
)
variant_pack
=
{
hipdnn_tensor_value
:
torch_tensor_value
.
data_ptr
(),
hipdnn_tensor_spatial_shapes
:
torch_tensor_spatial_shapes
.
data_ptr
(),
hipdnn_tensor_level_start_index
:
torch_tensor_level_start_index
.
data_ptr
(),
hipdnn_tensor_sampling_locations
:
torch_tensor_sampling_locations
.
data_ptr
(),
hipdnn_tensor_attention_weights
:
torch_tensor_attention_weights
.
data_ptr
(),
hipdnn_tensor_grad_output
:
torch_tensor_grad_output
.
data_ptr
(),
hipdnn_tensor_grad_value
:
torch_tensor_grad_value
.
data_ptr
(),
hipdnn_tensor_grad_sampling_loc
:
torch_tensor_grad_sampling_loc
.
data_ptr
(),
hipdnn_tensor_grad_attn_weight
:
torch_tensor_grad_attn_weight
.
data_ptr
(),
}
workspace
=
torch
.
empty
(
graph
.
get_workspace_size
(),
dtype
=
torch
.
uint8
,
device
=
"cuda"
)
graph
.
exec
(
variant_pack
=
variant_pack
,
workspace
=
workspace
.
data_ptr
())
print
(
"Deform attention bwd graph execution complete."
)
python/deformconvolution/deform_convolution.py
0 → 100644
View file @
ca34d4d2
import
hipdnn
import
torch
def
build_deform_convolution_graph
(
hipdnn_handle
,
torch_tensor_x
,
torch_tensor_offset
,
torch_tensor_w
,
torch_tensor_bias
,
torch_tensor_mask
,
padding
,
stride
,
dilation
,
hipdnn_data_type
,
):
# Create graph
graph
=
hipdnn
.
pygraph
(
handle
=
hipdnn_handle
,
io_data_type
=
hipdnn_data_type
,
intermediate_data_type
=
hipdnn
.
data_type
.
FLOAT
,
compute_data_type
=
hipdnn
.
data_type
.
FLOAT
,
name
=
"deform_convolution"
,
)
# Create hipdnn tensors
hipdnn_tensor_x
=
graph
.
tensor_like
(
torch_tensor_x
)
hipdnn_tensor_offset
=
graph
.
tensor_like
(
torch_tensor_offset
)
hipdnn_tensor_w
=
graph
.
tensor_like
(
torch_tensor_w
)
hipdnn_tensor_bias
=
graph
.
tensor_like
(
torch_tensor_bias
)
hipdnn_tensor_mask
=
graph
.
tensor_like
(
torch_tensor_mask
)
# Create op
hipdnn_tensor_y
=
graph
.
deform_conv_fprop
(
image
=
hipdnn_tensor_x
,
offset
=
hipdnn_tensor_offset
,
weight
=
hipdnn_tensor_w
,
bias
=
hipdnn_tensor_bias
,
mask
=
hipdnn_tensor_mask
,
padding
=
padding
,
stride
=
stride
,
dilation
=
dilation
,
name
=
"deform_conv_fprop"
,
)
hipdnn_tensor_y
.
set_output
(
True
)
graph
.
build
(
hipdnn_handle
)
return
(
graph
,
hipdnn_tensor_x
,
hipdnn_tensor_offset
,
hipdnn_tensor_w
,
hipdnn_tensor_bias
,
hipdnn_tensor_mask
,
hipdnn_tensor_y
,
)
if
__name__
==
"__main__"
:
# Input dimensions
n
=
1
# Batch size
c
=
16
# Number of input channels
h
=
16
# Height
w
=
16
# Width
# Filter dimensions
k
=
1
# Number of output channels
r
=
3
# Filter height
s
=
3
# Filter width
# Convolution parameters
stride_h
=
1
# Height stride
stride_w
=
1
# Width stride
pad_h
=
0
# Height padding
pad_w
=
0
# Width padding
dil_h
=
1
# Height dilation
dil_w
=
1
# Width dilation
h_out
=
int
((
h
+
2
*
pad_h
-
(
dil_h
*
(
r
-
1
)
+
1
))
/
stride_h
+
1
)
w_out
=
int
((
w
+
2
*
pad_w
-
(
dil_w
*
(
s
-
1
)
+
1
))
/
stride_w
+
1
)
hipdnn_data_type
=
hipdnn
.
data_type
.
FLOAT
torch_data_type
=
torch
.
float32
torch_tensor_x
=
torch
.
rand
(
n
,
c
,
h
,
w
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
torch_tensor_offset
=
torch
.
rand
(
n
,
2
*
r
*
s
,
h_out
,
w_out
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
torch_tensor_w
=
torch
.
rand
(
k
,
c
,
r
,
s
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
torch_tensor_bias
=
torch
.
rand
(
k
,
dtype
=
torch_data_type
,
device
=
"cuda"
)
torch_tensor_mask
=
torch
.
rand
(
n
,
r
*
s
,
h_out
,
w_out
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
hipdnn_handle
=
hipdnn
.
create_handle
()
(
graph
,
hipdnn_tensor_x
,
hipdnn_tensor_offset
,
hipdnn_tensor_w
,
hipdnn_tensor_bias
,
hipdnn_tensor_mask
,
hipdnn_tensor_y
,
)
=
build_deform_convolution_graph
(
hipdnn_handle
,
torch_tensor_x
,
torch_tensor_offset
,
torch_tensor_w
,
torch_tensor_bias
,
torch_tensor_mask
,
[
pad_h
,
pad_w
],
[
stride_h
,
stride_w
],
[
dil_h
,
dil_w
],
hipdnn_data_type
,
)
torch_tensor_y
=
torch
.
empty
(
hipdnn_tensor_y
.
get_dim
(),
dtype
=
torch_data_type
,
memory_format
=
torch
.
channels_last
,
device
=
"cuda"
,
)
variant_pack
=
{
hipdnn_tensor_x
:
torch_tensor_x
.
data_ptr
(),
hipdnn_tensor_offset
:
torch_tensor_offset
.
data_ptr
(),
hipdnn_tensor_w
:
torch_tensor_w
.
data_ptr
(),
hipdnn_tensor_bias
:
torch_tensor_bias
.
data_ptr
(),
hipdnn_tensor_mask
:
torch_tensor_mask
.
data_ptr
(),
hipdnn_tensor_y
:
torch_tensor_y
.
data_ptr
(),
}
workspace
=
torch
.
empty
(
graph
.
get_workspace_size
(),
dtype
=
torch
.
uint8
,
device
=
"cuda"
)
graph
.
exec
(
variant_pack
=
variant_pack
,
workspace
=
workspace
.
data_ptr
())
print
(
"deform conv fprop graph execution complete."
)
python/deformconvolution/deform_convolution_bwd.py
0 → 100644
View file @
ca34d4d2
import
hipdnn
import
torch
def
build_deform_convolution_graph
(
hipdnn_handle
,
torch_tensor_dy
,
torch_tensor_x
,
torch_tensor_w
,
torch_tensor_offset
,
torch_tensor_mask
,
padding
,
stride
,
dilation
,
hipdnn_data_type
,
):
# Create graph
graph
=
hipdnn
.
pygraph
(
handle
=
hipdnn_handle
,
io_data_type
=
hipdnn_data_type
,
intermediate_data_type
=
hipdnn
.
data_type
.
FLOAT
,
compute_data_type
=
hipdnn
.
data_type
.
FLOAT
,
name
=
"deform_convolution"
,
)
# Create hipdnn tensors
hipdnn_tensor_dy
=
graph
.
tensor_like
(
torch_tensor_dy
)
hipdnn_tensor_x
=
graph
.
tensor_like
(
torch_tensor_x
)
hipdnn_tensor_w
=
graph
.
tensor_like
(
torch_tensor_w
)
hipdnn_tensor_offset
=
graph
.
tensor_like
(
torch_tensor_offset
)
hipdnn_tensor_mask
=
graph
.
tensor_like
(
torch_tensor_mask
)
# Create op
hipdnn_tensor_dx
,
hipdnn_tensor_doffset
,
hipdnn_tensor_dmask
=
graph
.
deform_conv_dgrad
(
loss
=
hipdnn_tensor_dy
,
filter
=
hipdnn_tensor_w
,
offset
=
hipdnn_tensor_offset
,
image
=
hipdnn_tensor_x
,
mask
=
hipdnn_tensor_mask
,
padding
=
padding
,
stride
=
stride
,
dilation
=
dilation
,
name
=
"deform_conv_bwd"
,
)
hipdnn_tensor_dx
.
set_output
(
True
)
hipdnn_tensor_doffset
.
set_output
(
True
)
hipdnn_tensor_dmask
.
set_output
(
True
)
graph
.
build
(
hipdnn_handle
)
return
(
graph
,
hipdnn_tensor_dy
,
hipdnn_tensor_w
,
hipdnn_tensor_offset
,
hipdnn_tensor_x
,
hipdnn_tensor_mask
,
hipdnn_tensor_dx
,
hipdnn_tensor_doffset
,
hipdnn_tensor_dmask
,
)
if
__name__
==
"__main__"
:
# Input dimensions
n
=
1
# Batch size
c
=
16
# Number of input channels
h
=
16
# Height
w
=
16
# Width
# Filter dimensions
k
=
1
# Number of output channels
r
=
3
# Filter height
s
=
3
# Filter width
# Convolution parameters
stride_h
=
1
# Height stride
stride_w
=
1
# Width stride
pad_h
=
0
# Height padding
pad_w
=
0
# Width padding
dil_h
=
1
# Height dilation
dil_w
=
1
# Width dilation
h_out
=
int
((
h
+
2
*
pad_h
-
(
dil_h
*
(
r
-
1
)
+
1
))
/
stride_h
+
1
)
w_out
=
int
((
w
+
2
*
pad_w
-
(
dil_w
*
(
s
-
1
)
+
1
))
/
stride_w
+
1
)
hipdnn_data_type
=
hipdnn
.
data_type
.
FLOAT
torch_data_type
=
torch
.
float32
torch_tensor_dy
=
torch
.
rand
(
n
,
k
,
h_out
,
w_out
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
torch_tensor_x
=
torch
.
rand
(
n
,
c
,
h
,
w
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
torch_tensor_w
=
torch
.
rand
(
k
,
c
,
r
,
s
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
torch_tensor_offset
=
torch
.
rand
(
n
,
2
*
r
*
s
,
h_out
,
w_out
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
torch_tensor_mask
=
torch
.
rand
(
n
,
r
*
s
,
h_out
,
w_out
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
hipdnn_handle
=
hipdnn
.
create_handle
()
(
graph
,
hipdnn_tensor_dy
,
hipdnn_tensor_w
,
hipdnn_tensor_offset
,
hipdnn_tensor_x
,
hipdnn_tensor_mask
,
hipdnn_tensor_dx
,
hipdnn_tensor_doffset
,
hipdnn_tensor_dmask
,
)
=
build_deform_convolution_graph
(
hipdnn_handle
,
torch_tensor_dy
,
torch_tensor_x
,
torch_tensor_w
,
torch_tensor_offset
,
torch_tensor_mask
,
[
pad_h
,
pad_w
],
[
stride_h
,
stride_w
],
[
dil_h
,
dil_w
],
hipdnn_data_type
,
)
torch_tensor_dx
=
torch
.
empty
(
hipdnn_tensor_dx
.
get_dim
(),
dtype
=
torch_data_type
,
memory_format
=
torch
.
channels_last
,
device
=
"cuda"
,
)
torch_tensor_doffset
=
torch
.
empty
(
hipdnn_tensor_doffset
.
get_dim
(),
dtype
=
torch_data_type
,
memory_format
=
torch
.
channels_last
,
device
=
"cuda"
,
)
torch_tensor_dmask
=
torch
.
empty
(
hipdnn_tensor_dmask
.
get_dim
(),
dtype
=
torch_data_type
,
memory_format
=
torch
.
channels_last
,
device
=
"cuda"
,
)
variant_pack
=
{
hipdnn_tensor_dy
:
torch_tensor_dy
.
data_ptr
(),
hipdnn_tensor_w
:
torch_tensor_w
.
data_ptr
(),
hipdnn_tensor_offset
:
torch_tensor_offset
.
data_ptr
(),
hipdnn_tensor_x
:
torch_tensor_x
.
data_ptr
(),
hipdnn_tensor_mask
:
torch_tensor_mask
.
data_ptr
(),
hipdnn_tensor_dx
:
torch_tensor_dx
.
data_ptr
(),
hipdnn_tensor_doffset
:
torch_tensor_doffset
.
data_ptr
(),
hipdnn_tensor_dmask
:
torch_tensor_dmask
.
data_ptr
(),
}
workspace
=
torch
.
empty
(
graph
.
get_workspace_size
(),
dtype
=
torch
.
uint8
,
device
=
"cuda"
)
graph
.
exec
(
variant_pack
=
variant_pack
,
workspace
=
workspace
.
data_ptr
())
print
(
"deform conv bwd graph execution complete."
)
python/deformconvolution/deform_convolution_wrw.py
0 → 100644
View file @
ca34d4d2
import
hipdnn
import
torch
def
build_deform_convolution_wrw_graph
(
hipdnn_handle
,
torch_tensor_dy
,
torch_tensor_x
,
torch_tensor_offset
,
torch_tensor_mask
,
padding
,
stride
,
dilation
,
dw_dims
,
hipdnn_data_type
,
):
# Create graph
graph
=
hipdnn
.
pygraph
(
handle
=
hipdnn_handle
,
io_data_type
=
hipdnn_data_type
,
intermediate_data_type
=
hipdnn
.
data_type
.
FLOAT
,
compute_data_type
=
hipdnn
.
data_type
.
FLOAT
,
name
=
"deform_convolution_wrw"
,
)
# Create hipdnn tensors
hipdnn_tensor_dy
=
graph
.
tensor_like
(
torch_tensor_dy
)
hipdnn_tensor_x
=
graph
.
tensor_like
(
torch_tensor_x
)
hipdnn_tensor_offset
=
graph
.
tensor_like
(
torch_tensor_offset
)
hipdnn_tensor_mask
=
graph
.
tensor_like
(
torch_tensor_mask
)
# Create op
hipdnn_tensor_dw
=
graph
.
deform_conv_wgrad
(
image
=
hipdnn_tensor_x
,
offset
=
hipdnn_tensor_offset
,
loss
=
hipdnn_tensor_dy
,
mask
=
hipdnn_tensor_mask
,
padding
=
padding
,
stride
=
stride
,
dilation
=
dilation
,
name
=
"deform_conv2d_wrw"
,
)
hipdnn_tensor_dw
.
set_dim
(
dw_dims
).
set_output
(
True
)
graph
.
build
(
hipdnn_handle
)
return
(
graph
,
hipdnn_tensor_dy
,
hipdnn_tensor_offset
,
hipdnn_tensor_x
,
hipdnn_tensor_mask
,
hipdnn_tensor_dw
,
)
if
__name__
==
"__main__"
:
# Input dimensions
n
=
4
# Batch size
c
=
64
# Number of input channels
h
=
16
# Height
w
=
16
# Width
# Filter dimensions
k
=
64
# Number of output channels
r
=
3
# Filter height
s
=
3
# Filter width
# Convolution parameters
stride_h
=
1
# Height stride
stride_w
=
1
# Width stride
pad_h
=
0
# Height padding
pad_w
=
0
# Width padding
dil_h
=
1
# Height dilation
dil_w
=
1
# Width dilation
h_out
=
int
((
h
+
2
*
pad_h
-
(
dil_h
*
(
r
-
1
)
+
1
))
/
stride_h
+
1
)
w_out
=
int
((
w
+
2
*
pad_w
-
(
dil_w
*
(
s
-
1
)
+
1
))
/
stride_w
+
1
)
hipdnn_data_type
=
hipdnn
.
data_type
.
FLOAT
torch_data_type
=
torch
.
float32
torch_tensor_dy
=
torch
.
rand
(
n
,
k
,
h_out
,
w_out
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
torch_tensor_x
=
torch
.
rand
(
n
,
c
,
h
,
w
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
torch_tensor_offset
=
torch
.
rand
(
n
,
2
*
r
*
s
,
h_out
,
w_out
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
torch_tensor_mask
=
torch
.
rand
(
n
,
r
*
s
,
h_out
,
w_out
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
hipdnn_handle
=
hipdnn
.
create_handle
()
(
graph
,
hipdnn_tensor_dy
,
hipdnn_tensor_offset
,
hipdnn_tensor_x
,
hipdnn_tensor_mask
,
hipdnn_tensor_dw
,
)
=
build_deform_convolution_wrw_graph
(
hipdnn_handle
,
torch_tensor_dy
,
torch_tensor_x
,
torch_tensor_offset
,
torch_tensor_mask
,
[
pad_h
,
pad_w
],
[
stride_h
,
stride_w
],
[
dil_h
,
dil_w
],
[
k
,
c
,
r
,
s
],
hipdnn_data_type
,
)
torch_tensor_dw
=
torch
.
empty
(
hipdnn_tensor_dw
.
get_dim
(),
dtype
=
torch_data_type
,
memory_format
=
torch
.
channels_last
,
device
=
"cuda"
,
)
variant_pack
=
{
hipdnn_tensor_dy
:
torch_tensor_dy
.
data_ptr
(),
hipdnn_tensor_offset
:
torch_tensor_offset
.
data_ptr
(),
hipdnn_tensor_x
:
torch_tensor_x
.
data_ptr
(),
hipdnn_tensor_mask
:
torch_tensor_mask
.
data_ptr
(),
hipdnn_tensor_dw
:
torch_tensor_dw
.
data_ptr
(),
}
workspace
=
torch
.
empty
(
graph
.
get_workspace_size
(),
dtype
=
torch
.
uint8
,
device
=
"cuda"
)
graph
.
exec
(
variant_pack
=
variant_pack
,
workspace
=
workspace
.
data_ptr
())
print
(
"deform conv wrw graph execution complete."
)
python/fusion/add_layernorm.py
0 → 100644
View file @
ca34d4d2
import
hipdnn
import
torch
def
build_layernorm_fusion_graph
(
hipdnn_handle
,
torch_tensor_x1
,
torch_tensor_x2
,
torch_tensor_scale
,
torch_tensor_bias
,
torch_tensor_epsilon
,
mode
,
eps
,
hipdnn_data_type
,
):
# Create graph
graph
=
hipdnn
.
pygraph
(
handle
=
hipdnn_handle
,
io_data_type
=
hipdnn_data_type
,
intermediate_data_type
=
hipdnn
.
data_type
.
FLOAT
,
compute_data_type
=
hipdnn
.
data_type
.
FLOAT
,
name
=
"layernorm_fusion_inference"
,
)
# Create hipdnn tensors
hipdnn_tensor_x1
=
graph
.
tensor_like
(
torch_tensor_x1
)
hipdnn_tensor_x2
=
graph
.
tensor_like
(
torch_tensor_x2
)
hipdnn_tensor_scale
=
graph
.
tensor_like
(
torch_tensor_scale
)
hipdnn_tensor_bias
=
graph
.
tensor_like
(
torch_tensor_bias
)
hipdnn_tensor_epsilon
=
graph
.
tensor_like
(
torch_tensor_epsilon
)
hipdnn_tensor_epsilon
.
set_value
(
eps
)
# Create op
hipdnn_tensor_add_output
=
graph
.
add
(
a
=
hipdnn_tensor_x1
,
b
=
hipdnn_tensor_x2
,
name
=
"add"
)
hipdnn_tensor_add_output
.
set_output
(
True
)
hipdnn_tensor_y
,
hipdnn_tensor_mean
,
hipdnn_tensor_inv_var
=
graph
.
layernorm
(
mode
,
hipdnn_tensor_add_output
,
hipdnn_tensor_scale
,
hipdnn_tensor_bias
,
hipdnn_tensor_epsilon
,
hipdnn
.
data_type
.
FLOAT
,
name
=
"layernorm"
,
)
hipdnn_tensor_y
.
set_output
(
True
)
graph
.
build
(
hipdnn_handle
)
return
(
graph
,
hipdnn_tensor_x1
,
hipdnn_tensor_x2
,
hipdnn_tensor_scale
,
hipdnn_tensor_bias
,
hipdnn_tensor_add_output
,
hipdnn_tensor_y
,
)
if
__name__
==
"__main__"
:
# Input dimensions
batch
=
16
# Batch size
seq_len
=
32
# Number of input seq
embedding_dim
=
64
# Number of feature
mode
=
hipdnn
.
norm_forward_phase
.
INFERENCE
# Mode
eps
=
1e-5
hipdnn_data_type
=
hipdnn
.
data_type
.
FLOAT
torch_data_type
=
torch
.
float32
torch_tensor_x1
=
torch
.
rand
(
(
batch
,
seq_len
,
embedding_dim
),
dtype
=
torch_data_type
,
device
=
"cuda"
)
torch_tensor_x2
=
torch
.
rand
(
(
batch
,
seq_len
,
embedding_dim
),
dtype
=
torch_data_type
,
device
=
"cuda"
)
torch_tensor_scale
=
torch
.
rand
(
embedding_dim
,
dtype
=
torch_data_type
,
device
=
"cuda"
)
torch_tensor_bias
=
torch
.
rand
(
embedding_dim
,
dtype
=
torch_data_type
,
device
=
"cuda"
)
torch_tensor_epsilon
=
torch
.
full
(
(
1
,
1
,
1
,
1
),
eps
,
dtype
=
torch
.
float32
,
requires_grad
=
False
,
device
=
"cpu"
)
hipdnn_handle
=
hipdnn
.
create_handle
()
(
graph
,
hipdnn_tensor_x1
,
hipdnn_tensor_x2
,
hipdnn_tensor_scale
,
hipdnn_tensor_bias
,
hipdnn_tensor_add_output
,
hipdnn_tensor_y
,
)
=
build_layernorm_fusion_graph
(
hipdnn_handle
,
torch_tensor_x1
,
torch_tensor_x2
,
torch_tensor_scale
,
torch_tensor_bias
,
torch_tensor_epsilon
,
mode
,
eps
,
hipdnn_data_type
,
)
torch_tensor_addoutput
=
torch
.
empty
(
hipdnn_tensor_add_output
.
get_dim
(),
dtype
=
torch_data_type
,
device
=
"cuda"
)
torch_tensor_y
=
torch
.
empty
(
hipdnn_tensor_y
.
get_dim
(),
dtype
=
torch_data_type
,
device
=
"cuda"
)
variant_pack
=
{
hipdnn_tensor_x1
:
torch_tensor_x1
.
data_ptr
(),
hipdnn_tensor_x2
:
torch_tensor_x2
.
data_ptr
(),
hipdnn_tensor_scale
:
torch_tensor_scale
.
data_ptr
(),
hipdnn_tensor_bias
:
torch_tensor_bias
.
data_ptr
(),
hipdnn_tensor_add_output
:
torch_tensor_addoutput
.
data_ptr
(),
hipdnn_tensor_y
:
torch_tensor_y
.
data_ptr
(),
}
workspace
=
torch
.
empty
(
graph
.
get_workspace_size
(),
dtype
=
torch
.
uint8
,
device
=
"cuda"
)
graph
.
exec
(
variant_pack
=
variant_pack
,
workspace
=
workspace
.
data_ptr
())
print
(
"add_layernorm graph execution complete."
)
Prev
1
…
3
4
5
6
7
8
9
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment