Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDTK
hipDNN Samples
Commits
ca34d4d2
"devtools/ci/gh-actions/start_docker_locally.sh" did not exist on "3aa4bb8c5d1d6da0e4207283fa5ea7f7a70d4005"
Commit
ca34d4d2
authored
Jun 02, 2026
by
yanjl1
Browse files
Initial
parents
Changes
173
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
2720 additions
and
0 deletions
+2720
-0
python/conv_fusion/conv_bias_prelu_add.py
python/conv_fusion/conv_bias_prelu_add.py
+139
-0
python/conv_fusion/conv_bias_relu.py
python/conv_fusion/conv_bias_relu.py
+112
-0
python/conv_fusion/conv_bias_swish.py
python/conv_fusion/conv_bias_swish.py
+112
-0
python/conv_fusion/conv_bias_swish_add.py
python/conv_fusion/conv_bias_swish_add.py
+132
-0
python/conv_fusion/convbwd_bias_relu.py
python/conv_fusion/convbwd_bias_relu.py
+117
-0
python/conv_fusion/convint8_bias.py
python/conv_fusion/convint8_bias.py
+172
-0
python/conv_fusion/convint8_bias_add.py
python/conv_fusion/convint8_bias_add.py
+213
-0
python/conv_fusion/convint8_bias_add_relu.py
python/conv_fusion/convint8_bias_add_relu.py
+216
-0
python/conv_fusion/convint8_bias_relu.py
python/conv_fusion/convint8_bias_relu.py
+175
-0
python/conv_fusion/convint8_bias_relu_add.py
python/conv_fusion/convint8_bias_relu_add.py
+216
-0
python/convolution/convolution_bwd.py
python/convolution/convolution_bwd.py
+82
-0
python/convolution/convolution_fwd.py
python/convolution/convolution_fwd.py
+83
-0
python/convolution/convolution_wrw.py
python/convolution/convolution_wrw.py
+82
-0
python/ctc_loss/ctc_loss.py
python/ctc_loss/ctc_loss.py
+52
-0
python/deformattention/deform_attention.py
python/deformattention/deform_attention.py
+118
-0
python/deformattention/deform_attention_bwd.py
python/deformattention/deform_attention_bwd.py
+146
-0
python/deformconvolution/deform_convolution.py
python/deformconvolution/deform_convolution.py
+139
-0
python/deformconvolution/deform_convolution_bwd.py
python/deformconvolution/deform_convolution_bwd.py
+161
-0
python/deformconvolution/deform_convolution_wrw.py
python/deformconvolution/deform_convolution_wrw.py
+133
-0
python/fusion/add_layernorm.py
python/fusion/add_layernorm.py
+120
-0
No files found.
python/conv_fusion/conv_bias_prelu_add.py
0 → 100644
View file @
ca34d4d2
import
hipdnn
import
torch
def
build_conv_bias_prelu_add_graph
(
hipdnn_handle
,
torch_tensor_x
,
torch_tensor_w
,
torch_tensor_bias
,
torch_tensor_add
,
padding
,
stride
,
dilation
,
negative_slope
,
hipdnn_data_type
,
):
# Create graph
graph
=
hipdnn
.
pygraph
(
handle
=
hipdnn_handle
,
io_data_type
=
hipdnn_data_type
,
intermediate_data_type
=
hipdnn
.
data_type
.
FLOAT
,
compute_data_type
=
hipdnn
.
data_type
.
FLOAT
,
name
=
"conv_bias_prelu_add"
,
)
# Create hipdnn tensors
hipdnn_tensor_x
=
graph
.
tensor_like
(
torch_tensor_x
)
hipdnn_tensor_w
=
graph
.
tensor_like
(
torch_tensor_w
)
hipdnn_tensor_bias
=
graph
.
tensor_like
(
torch_tensor_bias
)
hipdnn_tensor_add
=
graph
.
tensor_like
(
torch_tensor_add
)
# Create op
hipdnn_tensor_conv_output
=
graph
.
conv_fprop
(
image
=
hipdnn_tensor_x
,
weight
=
hipdnn_tensor_w
,
padding
=
padding
,
stride
=
stride
,
dilation
=
dilation
,
name
=
"conv2d"
,
)
hipdnn_tensor_add_output
=
graph
.
add
(
a
=
hipdnn_tensor_conv_output
,
b
=
hipdnn_tensor_bias
,
name
=
"bias"
)
hipdnn_tensor_prelu_output
=
graph
.
prelu
(
input
=
hipdnn_tensor_add_output
,
negative_slope
=
negative_slope
,
name
=
"prelu"
)
hipdnn_tensor_y
=
graph
.
add
(
a
=
hipdnn_tensor_prelu_output
,
b
=
hipdnn_tensor_add
,
name
=
"add"
)
hipdnn_tensor_y
.
set_output
(
True
)
graph
.
build
(
hipdnn_handle
)
return
(
graph
,
hipdnn_tensor_x
,
hipdnn_tensor_w
,
hipdnn_tensor_bias
,
hipdnn_tensor_add
,
hipdnn_tensor_y
,
)
if
__name__
==
"__main__"
:
# Input dimensions
n
=
1
# Batch size
c
=
16
# Number of input channels
h
=
16
# Height
w
=
16
# Width
# Filter dimensions
k
=
16
# Number of output channels
r
=
3
# Filter height
s
=
3
# Filter width
# Convolution parameters
stride_h
=
1
# Height stride
stride_w
=
1
# Width stride
pad_h
=
1
# Height padding
pad_w
=
1
# Width padding
dil_h
=
1
# Height dilation
dil_w
=
1
# Width dilation
# activate parameters
negative_slope
=
0.01
# Negative slope
hipdnn_data_type
=
hipdnn
.
data_type
.
HALF
torch_data_type
=
torch
.
float16
torch_tensor_x
=
torch
.
rand
(
n
,
c
,
h
,
w
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
torch_tensor_w
=
torch
.
rand
(
k
,
c
,
r
,
s
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
torch_tensor_bias
=
torch
.
rand
(
1
,
k
,
1
,
1
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
torch_tensor_add
=
torch
.
rand
(
n
,
c
,
h
,
w
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
hipdnn_handle
=
hipdnn
.
create_handle
()
(
graph
,
hipdnn_tensor_x
,
hipdnn_tensor_w
,
hipdnn_tensor_bias
,
hipdnn_tensor_add
,
hipdnn_tensor_y
,
)
=
build_conv_bias_prelu_add_graph
(
hipdnn_handle
,
torch_tensor_x
,
torch_tensor_w
,
torch_tensor_bias
,
torch_tensor_add
,
[
pad_h
,
pad_w
],
[
stride_h
,
stride_w
],
[
dil_h
,
dil_w
],
negative_slope
,
hipdnn_data_type
,
)
torch_tensor_y
=
torch
.
empty
(
hipdnn_tensor_y
.
get_dim
(),
dtype
=
torch_data_type
,
memory_format
=
torch
.
channels_last
,
device
=
"cuda"
,
)
variant_pack
=
{
hipdnn_tensor_x
:
torch_tensor_x
.
data_ptr
(),
hipdnn_tensor_w
:
torch_tensor_w
.
data_ptr
(),
hipdnn_tensor_bias
:
torch_tensor_bias
.
data_ptr
(),
hipdnn_tensor_add
:
torch_tensor_add
.
data_ptr
(),
hipdnn_tensor_y
:
torch_tensor_y
.
data_ptr
(),
}
workspace
=
torch
.
empty
(
graph
.
get_workspace_size
(),
dtype
=
torch
.
uint8
,
device
=
"cuda"
)
graph
.
exec
(
variant_pack
=
variant_pack
,
workspace
=
workspace
.
data_ptr
())
print
(
"conv_bias_prelu_add graph execution complete."
)
python/conv_fusion/conv_bias_relu.py
0 → 100644
View file @
ca34d4d2
import
hipdnn
import
torch
def
build_conv_bias_relu_graph
(
hipdnn_handle
,
torch_tensor_x
,
torch_tensor_w
,
torch_tensor_bias
,
padding
,
stride
,
dilation
,
hipdnn_data_type
,
):
# Create graph
graph
=
hipdnn
.
pygraph
(
handle
=
hipdnn_handle
,
io_data_type
=
hipdnn_data_type
,
intermediate_data_type
=
hipdnn
.
data_type
.
FLOAT
,
compute_data_type
=
hipdnn
.
data_type
.
FLOAT
,
name
=
"conv_bias_relu"
,
)
# Create hipdnn tensors
hipdnn_tensor_x
=
graph
.
tensor_like
(
torch_tensor_x
)
hipdnn_tensor_w
=
graph
.
tensor_like
(
torch_tensor_w
)
hipdnn_tensor_bias
=
graph
.
tensor_like
(
torch_tensor_bias
)
# Create op
hipdnn_tensor_conv_output
=
graph
.
conv_fprop
(
image
=
hipdnn_tensor_x
,
weight
=
hipdnn_tensor_w
,
padding
=
padding
,
stride
=
stride
,
dilation
=
dilation
,
name
=
"conv2d"
,
)
hipdnn_tensor_add_output
=
graph
.
add
(
a
=
hipdnn_tensor_conv_output
,
b
=
hipdnn_tensor_bias
,
name
=
"bias"
)
hipdnn_tensor_y
=
graph
.
relu
(
input
=
hipdnn_tensor_add_output
,
name
=
"relu"
)
hipdnn_tensor_y
.
set_output
(
True
)
graph
.
build
(
hipdnn_handle
)
return
(
graph
,
hipdnn_tensor_x
,
hipdnn_tensor_w
,
hipdnn_tensor_bias
,
hipdnn_tensor_y
)
if
__name__
==
"__main__"
:
# Input dimensions
n
=
1
# Batch size
c
=
16
# Number of input channels
h
=
16
# Height
w
=
16
# Width
# Filter dimensions
k
=
16
# Number of output channels
r
=
3
# Filter height
s
=
3
# Filter width
# Convolution parameters
stride_h
=
1
# Height stride
stride_w
=
1
# Width stride
pad_h
=
1
# Height padding
pad_w
=
1
# Width padding
dil_h
=
1
# Height dilation
dil_w
=
1
# Width dilation
hipdnn_data_type
=
hipdnn
.
data_type
.
HALF
torch_data_type
=
torch
.
float16
torch_tensor_x
=
torch
.
rand
(
n
,
c
,
h
,
w
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
torch_tensor_w
=
torch
.
rand
(
k
,
c
,
r
,
s
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
torch_tensor_bias
=
torch
.
rand
(
1
,
k
,
1
,
1
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
hipdnn_handle
=
hipdnn
.
create_handle
()
graph
,
hipdnn_tensor_x
,
hipdnn_tensor_w
,
hipdnn_tensor_bias
,
hipdnn_tensor_y
=
(
build_conv_bias_relu_graph
(
hipdnn_handle
,
torch_tensor_x
,
torch_tensor_w
,
torch_tensor_bias
,
[
pad_h
,
pad_w
],
[
stride_h
,
stride_w
],
[
dil_h
,
dil_w
],
hipdnn_data_type
,
)
)
torch_tensor_y
=
torch
.
empty
(
hipdnn_tensor_y
.
get_dim
(),
dtype
=
torch_data_type
,
memory_format
=
torch
.
channels_last
,
device
=
"cuda"
,
)
variant_pack
=
{
hipdnn_tensor_x
:
torch_tensor_x
.
data_ptr
(),
hipdnn_tensor_w
:
torch_tensor_w
.
data_ptr
(),
hipdnn_tensor_bias
:
torch_tensor_bias
.
data_ptr
(),
hipdnn_tensor_y
:
torch_tensor_y
.
data_ptr
(),
}
workspace
=
torch
.
empty
(
graph
.
get_workspace_size
(),
dtype
=
torch
.
uint8
,
device
=
"cuda"
)
graph
.
exec
(
variant_pack
=
variant_pack
,
workspace
=
workspace
.
data_ptr
())
print
(
"conv_bias_relu graph execution complete."
)
python/conv_fusion/conv_bias_swish.py
0 → 100644
View file @
ca34d4d2
import
hipdnn
import
torch
def
build_conv_bias_swish_graph
(
hipdnn_handle
,
torch_tensor_x
,
torch_tensor_w
,
torch_tensor_bias
,
padding
,
stride
,
dilation
,
hipdnn_data_type
,
):
# Create graph
graph
=
hipdnn
.
pygraph
(
handle
=
hipdnn_handle
,
io_data_type
=
hipdnn_data_type
,
intermediate_data_type
=
hipdnn
.
data_type
.
FLOAT
,
compute_data_type
=
hipdnn
.
data_type
.
FLOAT
,
name
=
"conv_bias_swish"
,
)
# Create hipdnn tensors
hipdnn_tensor_x
=
graph
.
tensor_like
(
torch_tensor_x
)
hipdnn_tensor_w
=
graph
.
tensor_like
(
torch_tensor_w
)
hipdnn_tensor_bias
=
graph
.
tensor_like
(
torch_tensor_bias
)
# Create op
hipdnn_tensor_conv_output
=
graph
.
conv_fprop
(
image
=
hipdnn_tensor_x
,
weight
=
hipdnn_tensor_w
,
padding
=
padding
,
stride
=
stride
,
dilation
=
dilation
,
name
=
"conv2d"
,
)
hipdnn_tensor_add_output
=
graph
.
add
(
a
=
hipdnn_tensor_conv_output
,
b
=
hipdnn_tensor_bias
,
name
=
"bias"
)
hipdnn_tensor_y
=
graph
.
swish
(
input
=
hipdnn_tensor_add_output
,
name
=
"swish"
)
hipdnn_tensor_y
.
set_output
(
True
)
graph
.
build
(
hipdnn_handle
)
return
(
graph
,
hipdnn_tensor_x
,
hipdnn_tensor_w
,
hipdnn_tensor_bias
,
hipdnn_tensor_y
)
if
__name__
==
"__main__"
:
# Input dimensions
n
=
1
# Batch size
c
=
16
# Number of input channels
h
=
16
# Height
w
=
16
# Width
# Filter dimensions
k
=
16
# Number of output channels
r
=
3
# Filter height
s
=
3
# Filter width
# Convolution parameters
stride_h
=
1
# Height stride
stride_w
=
1
# Width stride
pad_h
=
1
# Height padding
pad_w
=
1
# Width padding
dil_h
=
1
# Height dilation
dil_w
=
1
# Width dilation
hipdnn_data_type
=
hipdnn
.
data_type
.
HALF
torch_data_type
=
torch
.
float16
torch_tensor_x
=
torch
.
rand
(
n
,
c
,
h
,
w
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
torch_tensor_w
=
torch
.
rand
(
k
,
c
,
r
,
s
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
torch_tensor_bias
=
torch
.
rand
(
1
,
k
,
1
,
1
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
hipdnn_handle
=
hipdnn
.
create_handle
()
graph
,
hipdnn_tensor_x
,
hipdnn_tensor_w
,
hipdnn_tensor_bias
,
hipdnn_tensor_y
=
(
build_conv_bias_swish_graph
(
hipdnn_handle
,
torch_tensor_x
,
torch_tensor_w
,
torch_tensor_bias
,
[
pad_h
,
pad_w
],
[
stride_h
,
stride_w
],
[
dil_h
,
dil_w
],
hipdnn_data_type
,
)
)
torch_tensor_y
=
torch
.
empty
(
hipdnn_tensor_y
.
get_dim
(),
dtype
=
torch_data_type
,
memory_format
=
torch
.
channels_last
,
device
=
"cuda"
,
)
variant_pack
=
{
hipdnn_tensor_x
:
torch_tensor_x
.
data_ptr
(),
hipdnn_tensor_w
:
torch_tensor_w
.
data_ptr
(),
hipdnn_tensor_bias
:
torch_tensor_bias
.
data_ptr
(),
hipdnn_tensor_y
:
torch_tensor_y
.
data_ptr
(),
}
workspace
=
torch
.
empty
(
graph
.
get_workspace_size
(),
dtype
=
torch
.
uint8
,
device
=
"cuda"
)
graph
.
exec
(
variant_pack
=
variant_pack
,
workspace
=
workspace
.
data_ptr
())
print
(
"conv_bias_swish graph execution complete."
)
python/conv_fusion/conv_bias_swish_add.py
0 → 100644
View file @
ca34d4d2
import
hipdnn
import
torch
def
build_conv_bias_swish_add_graph
(
hipdnn_handle
,
torch_tensor_x
,
torch_tensor_w
,
torch_tensor_bias
,
torch_tensor_add
,
padding
,
stride
,
dilation
,
hipdnn_data_type
,
):
# Create graph
graph
=
hipdnn
.
pygraph
(
handle
=
hipdnn_handle
,
io_data_type
=
hipdnn_data_type
,
intermediate_data_type
=
hipdnn
.
data_type
.
FLOAT
,
compute_data_type
=
hipdnn
.
data_type
.
FLOAT
,
name
=
"conv_bias_swish_add"
,
)
# Create hipdnn tensors
hipdnn_tensor_x
=
graph
.
tensor_like
(
torch_tensor_x
)
hipdnn_tensor_w
=
graph
.
tensor_like
(
torch_tensor_w
)
hipdnn_tensor_bias
=
graph
.
tensor_like
(
torch_tensor_bias
)
hipdnn_tensor_add
=
graph
.
tensor_like
(
torch_tensor_add
)
# Create op
hipdnn_tensor_conv_output
=
graph
.
conv_fprop
(
image
=
hipdnn_tensor_x
,
weight
=
hipdnn_tensor_w
,
padding
=
padding
,
stride
=
stride
,
dilation
=
dilation
,
name
=
"conv2d"
,
)
hipdnn_tensor_bias_output
=
graph
.
add
(
a
=
hipdnn_tensor_conv_output
,
b
=
hipdnn_tensor_bias
,
name
=
"bias"
)
hipdnn_tensor_swish_output
=
graph
.
swish
(
input
=
hipdnn_tensor_bias_output
,
name
=
"swish"
)
hipdnn_tensor_y
=
graph
.
add
(
a
=
hipdnn_tensor_swish_output
,
b
=
hipdnn_tensor_add
,
name
=
"add"
)
hipdnn_tensor_y
.
set_output
(
True
)
graph
.
build
(
hipdnn_handle
)
return
(
graph
,
hipdnn_tensor_x
,
hipdnn_tensor_w
,
hipdnn_tensor_bias
,
hipdnn_tensor_add
,
hipdnn_tensor_y
,
)
if
__name__
==
"__main__"
:
# Input dimensions
n
=
1
# Batch size
c
=
16
# Number of input channels
h
=
16
# Height
w
=
16
# Width
# Filter dimensions
k
=
16
# Number of output channels
r
=
3
# Filter height
s
=
3
# Filter width
# Convolution parameters
stride_h
=
1
# Height stride
stride_w
=
1
# Width stride
pad_h
=
1
# Height padding
pad_w
=
1
# Width padding
dil_h
=
1
# Height dilation
dil_w
=
1
# Width dilation
hipdnn_data_type
=
hipdnn
.
data_type
.
HALF
torch_data_type
=
torch
.
float16
torch_tensor_x
=
torch
.
rand
(
n
,
c
,
h
,
w
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
torch_tensor_w
=
torch
.
rand
(
k
,
c
,
r
,
s
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
torch_tensor_bias
=
torch
.
rand
(
1
,
k
,
1
,
1
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
torch_tensor_add
=
torch
.
rand
(
n
,
c
,
h
,
w
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
hipdnn_handle
=
hipdnn
.
create_handle
()
(
graph
,
hipdnn_tensor_x
,
hipdnn_tensor_w
,
hipdnn_tensor_bias
,
hipdnn_tensor_add
,
hipdnn_tensor_y
,
)
=
build_conv_bias_swish_add_graph
(
hipdnn_handle
,
torch_tensor_x
,
torch_tensor_w
,
torch_tensor_bias
,
torch_tensor_add
,
[
pad_h
,
pad_w
],
[
stride_h
,
stride_w
],
[
dil_h
,
dil_w
],
hipdnn_data_type
,
)
torch_tensor_y
=
torch
.
empty
(
hipdnn_tensor_y
.
get_dim
(),
dtype
=
torch_data_type
,
memory_format
=
torch
.
channels_last
,
device
=
"cuda"
,
)
variant_pack
=
{
hipdnn_tensor_x
:
torch_tensor_x
.
data_ptr
(),
hipdnn_tensor_w
:
torch_tensor_w
.
data_ptr
(),
hipdnn_tensor_bias
:
torch_tensor_bias
.
data_ptr
(),
hipdnn_tensor_add
:
torch_tensor_add
.
data_ptr
(),
hipdnn_tensor_y
:
torch_tensor_y
.
data_ptr
(),
}
workspace
=
torch
.
empty
(
graph
.
get_workspace_size
(),
dtype
=
torch
.
uint8
,
device
=
"cuda"
)
graph
.
exec
(
variant_pack
=
variant_pack
,
workspace
=
workspace
.
data_ptr
())
print
(
"conv_bias_swish_add graph execution complete."
)
python/conv_fusion/convbwd_bias_relu.py
0 → 100644
View file @
ca34d4d2
import
hipdnn
import
torch
def
build_convBwd_bias_relu_graph
(
hipdnn_handle
,
torch_tensor_x
,
torch_tensor_w
,
torch_tensor_bias
,
padding
,
stride
,
dilation
,
output_padding
,
hipdnn_data_type
,
):
# Create graph
graph
=
hipdnn
.
pygraph
(
handle
=
hipdnn_handle
,
io_data_type
=
hipdnn_data_type
,
intermediate_data_type
=
hipdnn
.
data_type
.
FLOAT
,
compute_data_type
=
hipdnn
.
data_type
.
FLOAT
,
name
=
"convBwd_bias_relu"
,
)
# Create hipdnn tensors
hipdnn_tensor_x
=
graph
.
tensor_like
(
torch_tensor_x
)
hipdnn_tensor_w
=
graph
.
tensor_like
(
torch_tensor_w
)
hipdnn_tensor_bias
=
graph
.
tensor_like
(
torch_tensor_bias
)
# Create op
hipdnn_tensor_conv_output
=
graph
.
conv_dgrad
(
loss
=
hipdnn_tensor_x
,
filter
=
hipdnn_tensor_w
,
padding
=
padding
,
stride
=
stride
,
dilation
=
dilation
,
output_padding
=
output_padding
,
name
=
"conv2d"
,
)
hipdnn_tensor_add_output
=
graph
.
add
(
a
=
hipdnn_tensor_conv_output
,
b
=
hipdnn_tensor_bias
,
name
=
"bias"
)
hipdnn_tensor_y
=
graph
.
relu
(
input
=
hipdnn_tensor_add_output
,
name
=
"relu"
)
hipdnn_tensor_y
.
set_output
(
True
)
graph
.
build
(
hipdnn_handle
)
return
(
graph
,
hipdnn_tensor_x
,
hipdnn_tensor_w
,
hipdnn_tensor_bias
,
hipdnn_tensor_y
)
if
__name__
==
"__main__"
:
# Input dimensions
n
=
1
# Batch size
c
=
32
# Number of input channels
h
=
270
# Height
w
=
480
# Width
# Filter dimensions
k
=
32
# Number of output channels
r
=
3
# Filter height
s
=
3
# Filter width
# Convolution parameters
stride_h
=
2
# Height stride
stride_w
=
2
# Width stride
pad_h
=
0
# Height padding
pad_w
=
0
# Width padding
dil_h
=
1
# Height dilation
dil_w
=
1
# Width dilation
output_padding_h
=
1
# Output height padding
output_padding_w
=
1
# Output width padding
hipdnn_data_type
=
hipdnn
.
data_type
.
HALF
torch_data_type
=
torch
.
float16
torch_tensor_x
=
torch
.
rand
(
n
,
c
,
h
,
w
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
torch_tensor_w
=
torch
.
rand
(
k
,
c
,
r
,
s
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
torch_tensor_bias
=
torch
.
rand
(
1
,
k
,
1
,
1
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
hipdnn_handle
=
hipdnn
.
create_handle
()
graph
,
hipdnn_tensor_x
,
hipdnn_tensor_w
,
hipdnn_tensor_bias
,
hipdnn_tensor_y
=
(
build_convBwd_bias_relu_graph
(
hipdnn_handle
,
torch_tensor_x
,
torch_tensor_w
,
torch_tensor_bias
,
[
pad_h
,
pad_w
],
[
stride_h
,
stride_w
],
[
dil_h
,
dil_w
],
[
output_padding_h
,
output_padding_w
],
hipdnn_data_type
,
)
)
torch_tensor_y
=
torch
.
empty
(
hipdnn_tensor_y
.
get_dim
(),
dtype
=
torch_data_type
,
memory_format
=
torch
.
channels_last
,
device
=
"cuda"
,
)
variant_pack
=
{
hipdnn_tensor_x
:
torch_tensor_x
.
data_ptr
(),
hipdnn_tensor_w
:
torch_tensor_w
.
data_ptr
(),
hipdnn_tensor_bias
:
torch_tensor_bias
.
data_ptr
(),
hipdnn_tensor_y
:
torch_tensor_y
.
data_ptr
(),
}
workspace
=
torch
.
empty
(
graph
.
get_workspace_size
(),
dtype
=
torch
.
uint8
,
device
=
"cuda"
)
graph
.
exec
(
variant_pack
=
variant_pack
,
workspace
=
workspace
.
data_ptr
())
print
(
"convBwd_bias_relu graph execution complete."
)
python/conv_fusion/convint8_bias.py
0 → 100644
View file @
ca34d4d2
import
hipdnn
import
torch
def
build_convint8_bias_graph
(
hipdnn_handle
,
torch_tensor_x
,
torch_tensor_w
,
torch_tensor_zero_point_dq
,
torch_tensor_scale_dq
,
torch_tensor_bias
,
torch_tensor_zero_point_q
,
torch_tensor_scale_q
,
padding
,
stride
,
dilation
,
hipdnn_data_type
,
):
# Create graph
graph
=
hipdnn
.
pygraph
(
handle
=
hipdnn_handle
,
io_data_type
=
hipdnn_data_type
,
intermediate_data_type
=
hipdnn_data_type
,
compute_data_type
=
hipdnn
.
data_type
.
FLOAT
,
name
=
"convint8_bias"
,
)
# Create hipdnn conv input and filter tensor with NCHWc32 layout
hipdnn_tensor_x
=
graph
.
tensor_like
(
torch_tensor_x
).
set_vector_count_and_dimension
(
32
,
1
)
hipdnn_tensor_w
=
graph
.
tensor_like
(
torch_tensor_w
).
set_vector_count_and_dimension
(
32
,
1
)
# Create conv_fprop op
hipdnn_tensor_conv_output
=
graph
.
conv_fprop
(
image
=
hipdnn_tensor_x
,
weight
=
hipdnn_tensor_w
,
padding
=
padding
,
stride
=
stride
,
dilation
=
dilation
,
name
=
"conv_fprop_node"
,
)
# Create sub node for dequantize:zero_point_dq
hipdnn_tensor_zero_point_dq
=
graph
.
tensor_like
(
torch_tensor_zero_point_dq
)
hipdnn_tensor_zero_point_dq
.
set_value
(
0.0
)
hipdnn_tensor_conv_deq_sub_output
=
graph
.
sub
(
a
=
hipdnn_tensor_conv_output
,
b
=
hipdnn_tensor_zero_point_dq
,
name
=
"conv_deq_sub_node"
)
# Create mul node for dequantize:scale_dq
hipdnn_tensor_scale_dq
=
graph
.
tensor_like
(
torch_tensor_scale_dq
)
hipdnn_tensor_scale_dq
.
set_value
(
1.0
)
hipdnn_tensor_conv_deq_mul_output
=
graph
.
mul
(
a
=
hipdnn_tensor_conv_deq_sub_output
,
b
=
hipdnn_tensor_scale_dq
,
name
=
"conv_deq_mul_node"
)
# Create bias node
hipdnn_tensor_bias
=
graph
.
tensor_like
(
torch_tensor_bias
)
hipdnn_tensor_bias_output
=
graph
.
add
(
a
=
hipdnn_tensor_conv_deq_mul_output
,
b
=
hipdnn_tensor_bias
,
name
=
"bias_node"
)
# Create div node for quantize:scale_q
hipdnn_tensor_scale_q
=
graph
.
tensor_like
(
torch_tensor_scale_q
)
hipdnn_tensor_scale_q
.
set_value
(
1.0
)
hipdnn_tensor_quantize_div_output
=
graph
.
div
(
a
=
hipdnn_tensor_bias_output
,
b
=
hipdnn_tensor_scale_q
,
name
=
"quantize_div_node"
)
# Create add node for quantize:zero_point_q
hipdnn_tensor_zero_point_q
=
graph
.
tensor_like
(
torch_tensor_zero_point_q
)
hipdnn_tensor_zero_point_q
.
set_value
(
0.0
)
hipdnn_tensor_output
=
graph
.
add
(
a
=
hipdnn_tensor_quantize_div_output
,
b
=
hipdnn_tensor_zero_point_q
,
name
=
"quantize_add_node"
)
hipdnn_tensor_output
.
set_output
(
True
).
set_vector_count_and_dimension
(
32
,
1
)
graph
.
build
(
hipdnn_handle
)
return
(
graph
,
hipdnn_tensor_x
,
hipdnn_tensor_w
,
hipdnn_tensor_bias
,
hipdnn_tensor_output
,
)
if
__name__
==
"__main__"
:
# Input dimensions
n
=
2
# Batch size
c
=
32
# Number of input channels
h
=
16
# Height
w
=
8
# Width
# Filter dimensions
k
=
128
# Number of output channels
r
=
3
# Filter height
s
=
3
# Filter width
# Convolution parameters
stride_h
=
1
# Height stride
stride_w
=
1
# Width stride
pad_h
=
0
# Height padding
pad_w
=
0
# Width padding
dil_h
=
1
# Height dilation
dil_w
=
1
# Width dilation
hipdnn_data_type
=
hipdnn
.
data_type
.
INT8
torch_data_type
=
torch
.
int8
bias_data_type
=
torch
.
float32
quantize_data_type
=
torch
.
float32
torch_tensor_x
=
torch
.
randint
(
low
=-
128
,
high
=
128
,
size
=
(
n
,
c
,
h
,
w
),
dtype
=
torch_data_type
,
device
=
"cuda"
,
)
torch_tensor_w
=
torch
.
randint
(
low
=-
128
,
high
=
128
,
size
=
(
k
,
c
,
r
,
s
),
dtype
=
torch_data_type
,
device
=
"cuda"
)
torch_tensor_zero_point_dq
=
torch
.
rand
(
1
,
1
,
1
,
1
,
device
=
"cuda"
)
torch_tensor_scale_dq
=
torch
.
rand
(
1
,
1
,
1
,
1
,
device
=
"cuda"
)
torch_tensor_bias
=
torch
.
rand
(
1
,
k
,
1
,
1
,
dtype
=
bias_data_type
,
device
=
"cuda"
)
torch_tensor_zero_point_q
=
torch
.
rand
(
1
,
1
,
1
,
1
,
device
=
"cuda"
)
torch_tensor_scale_q
=
torch
.
rand
(
1
,
1
,
1
,
1
,
device
=
"cuda"
)
hipdnn_handle
=
hipdnn
.
create_handle
()
(
graph
,
hipdnn_tensor_x
,
hipdnn_tensor_w
,
hipdnn_tensor_bias
,
hipdnn_tensor_y
,
)
=
build_convint8_bias_graph
(
hipdnn_handle
,
torch_tensor_x
,
torch_tensor_w
,
torch_tensor_zero_point_dq
,
torch_tensor_scale_dq
,
torch_tensor_bias
,
torch_tensor_zero_point_q
,
torch_tensor_scale_q
,
[
pad_h
,
pad_w
],
[
stride_h
,
stride_w
],
[
dil_h
,
dil_w
],
hipdnn_data_type
,
)
torch_tensor_y
=
torch
.
empty
(
hipdnn_tensor_y
.
get_dim
(),
dtype
=
torch_data_type
,
device
=
"cuda"
,
)
variant_pack
=
{
hipdnn_tensor_x
:
torch_tensor_x
.
data_ptr
(),
hipdnn_tensor_w
:
torch_tensor_w
.
data_ptr
(),
hipdnn_tensor_bias
:
torch_tensor_bias
.
data_ptr
(),
hipdnn_tensor_y
:
torch_tensor_y
.
data_ptr
(),
}
workspace
=
torch
.
empty
(
graph
.
get_workspace_size
(),
dtype
=
torch
.
uint8
,
device
=
"cuda"
)
graph
.
exec
(
variant_pack
=
variant_pack
,
workspace
=
workspace
.
data_ptr
())
print
(
"convint8_bias graph execution complete."
)
python/conv_fusion/convint8_bias_add.py
0 → 100644
View file @
ca34d4d2
import
hipdnn
import
torch
def
build_convint8_bias_add_graph
(
hipdnn_handle
,
torch_tensor_x
,
torch_tensor_w
,
torch_tensor_zero_point_dq
,
torch_tensor_scale_dq
,
torch_tensor_bias
,
torch_tensor_add
,
torch_tensor_zero_point_dq_add
,
torch_tensor_scale_dq_add
,
torch_tensor_zero_point_q
,
torch_tensor_scale_q
,
padding
,
stride
,
dilation
,
hipdnn_data_type
,
):
# Create graph
graph
=
hipdnn
.
pygraph
(
handle
=
hipdnn_handle
,
io_data_type
=
hipdnn_data_type
,
intermediate_data_type
=
hipdnn_data_type
,
compute_data_type
=
hipdnn
.
data_type
.
FLOAT
,
name
=
"convint8_bias_add"
,
)
# Create hipdnn conv input and filter tensor with NCHWc32 layout
hipdnn_tensor_x
=
graph
.
tensor_like
(
torch_tensor_x
).
set_vector_count_and_dimension
(
32
,
1
)
hipdnn_tensor_w
=
graph
.
tensor_like
(
torch_tensor_w
).
set_vector_count_and_dimension
(
32
,
1
)
# Create conv_fprop op
hipdnn_tensor_conv_output
=
graph
.
conv_fprop
(
image
=
hipdnn_tensor_x
,
weight
=
hipdnn_tensor_w
,
padding
=
padding
,
stride
=
stride
,
dilation
=
dilation
,
name
=
"conv_fprop_node"
,
)
# Create sub node for dequantize:zero_point_dq
hipdnn_tensor_zero_point_dq
=
graph
.
tensor_like
(
torch_tensor_zero_point_dq
)
hipdnn_tensor_zero_point_dq
.
set_value
(
0.0
)
hipdnn_tensor_conv_deq_sub_output
=
graph
.
sub
(
hipdnn_tensor_conv_output
,
hipdnn_tensor_zero_point_dq
,
name
=
"conv_deq_sub_node"
)
# Create mul node for dequantize:scale_dq
hipdnn_tensor_scale_dq
=
graph
.
tensor_like
(
torch_tensor_scale_dq
)
hipdnn_tensor_scale_dq
.
set_value
(
1.0
)
hipdnn_tensor_conv_deq_mul_output
=
graph
.
mul
(
hipdnn_tensor_conv_deq_sub_output
,
hipdnn_tensor_scale_dq
,
name
=
"conv_deq_mul_node"
)
# Create bias node
hipdnn_tensor_bias
=
graph
.
tensor_like
(
torch_tensor_bias
)
hipdnn_tensor_bias_output
=
graph
.
add
(
hipdnn_tensor_conv_deq_mul_output
,
hipdnn_tensor_bias
,
name
=
"bias_node"
)
# Cretae add original input(without dequantize)
hipdnn_tensor_add
=
graph
.
tensor_like
(
torch_tensor_add
)
hipdnn_tensor_add
.
set_vector_count_and_dimension
(
32
,
1
)
# Create sub node for dequantize:zero_point_dq_add
hipdnn_tensor_zero_point_dq_add
=
graph
.
tensor_like
(
torch_tensor_zero_point_dq_add
)
hipdnn_tensor_zero_point_dq_add
.
set_value
(
0.0
)
hipdnn_tensor_add_deq_sub_output
=
graph
.
sub
(
hipdnn_tensor_add
,
hipdnn_tensor_zero_point_dq_add
,
name
=
"add_deq_sub_node"
)
# Create mul node for dequantize:scale_dq_add
hipdnn_tensor_scale_dq_add
=
graph
.
tensor_like
(
torch_tensor_scale_dq_add
)
hipdnn_tensor_scale_dq_add
.
set_value
(
1.0
)
hipdnn_tensor_add_deq_mul_output
=
graph
.
mul
(
hipdnn_tensor_add_deq_sub_output
,
hipdnn_tensor_scale_dq_add
,
name
=
"add_deq_mul_node"
)
hipdnn_tensor_add_deq_mul_output
# Create add op
hipdnn_tensor_add_output
=
graph
.
add
(
a
=
hipdnn_tensor_bias_output
,
b
=
hipdnn_tensor_add_deq_mul_output
,
name
=
"add_node"
)
# Create div node for quantize:scale_q
hipdnn_tensor_scale_q
=
graph
.
tensor_like
(
torch_tensor_scale_q
)
hipdnn_tensor_scale_q
.
set_value
(
1.0
)
hipdnn_tensor_quantize_div_output
=
graph
.
div
(
a
=
hipdnn_tensor_add_output
,
b
=
hipdnn_tensor_scale_q
,
name
=
"quantize_div_node"
)
# Create add node for quantize:zero_point_q
hipdnn_tensor_zero_point_q
=
graph
.
tensor_like
(
torch_tensor_zero_point_q
)
hipdnn_tensor_zero_point_q
.
set_value
(
0.0
)
hipdnn_tensor_output
=
graph
.
add
(
hipdnn_tensor_quantize_div_output
,
hipdnn_tensor_zero_point_q
,
name
=
"quantize_add_node"
)
hipdnn_tensor_output
.
set_output
(
True
).
set_vector_count_and_dimension
(
32
,
1
)
graph
.
build
(
hipdnn_handle
)
return
(
graph
,
hipdnn_tensor_x
,
hipdnn_tensor_w
,
hipdnn_tensor_bias
,
hipdnn_tensor_add
,
hipdnn_tensor_output
,
)
if
__name__
==
"__main__"
:
# Input dimensions
n
=
2
# Batch size
c
=
32
# Number of input channels
h
=
16
# Height
w
=
8
# Width
# Filter dimensions
k
=
128
# Number of output channels
r
=
3
# Filter height
s
=
3
# Filter width
# Convolution parameters
stride_h
=
1
# Height stride
stride_w
=
1
# Width stride
pad_h
=
0
# Height padding
pad_w
=
0
# Width padding
dil_h
=
1
# Height dilation
dil_w
=
1
# Width dilation
out_h
=
int
(((
h
+
2
*
pad_h
-
(
dil_h
*
(
r
-
1
)
+
1
))
/
stride_h
)
+
1
)
out_w
=
int
(((
w
+
2
*
pad_w
-
(
dil_w
*
(
s
-
1
)
+
1
))
/
stride_w
)
+
1
)
hipdnn_data_type
=
hipdnn
.
data_type
.
INT8
torch_data_type
=
torch
.
int8
bias_data_type
=
torch
.
float32
quantize_data_type
=
torch
.
float32
torch_tensor_x
=
torch
.
randint
(
low
=-
128
,
high
=
128
,
size
=
(
n
,
c
,
h
,
w
),
dtype
=
torch_data_type
,
device
=
"cuda"
,
)
torch_tensor_w
=
torch
.
randint
(
low
=-
128
,
high
=
128
,
size
=
(
k
,
c
,
r
,
s
),
dtype
=
torch_data_type
,
device
=
"cuda"
)
torch_tensor_zero_point_dq
=
torch
.
rand
(
1
,
1
,
1
,
1
,
device
=
"cuda"
)
torch_tensor_scale_dq
=
torch
.
rand
(
1
,
1
,
1
,
1
,
device
=
"cuda"
)
torch_tensor_bias
=
torch
.
rand
(
1
,
k
,
1
,
1
,
dtype
=
bias_data_type
,
device
=
"cuda"
)
torch_tensor_add
=
torch
.
randint
(
low
=-
128
,
high
=
128
,
size
=
(
n
,
k
,
out_h
,
out_w
),
dtype
=
torch_data_type
,
device
=
"cuda"
)
torch_tensor_zero_point_dq_add
=
torch
.
rand
(
1
,
1
,
1
,
1
,
device
=
"cuda"
)
torch_tensor_scale_dq_add
=
torch
.
rand
(
1
,
1
,
1
,
1
,
device
=
"cuda"
)
torch_tensor_zero_point_q
=
torch
.
rand
(
1
,
1
,
1
,
1
,
device
=
"cuda"
)
torch_tensor_scale_q
=
torch
.
rand
(
1
,
1
,
1
,
1
,
device
=
"cuda"
)
hipdnn_handle
=
hipdnn
.
create_handle
()
(
graph
,
hipdnn_tensor_x
,
hipdnn_tensor_w
,
hipdnn_tensor_bias
,
hipdnn_tensor_add
,
hipdnn_tensor_y
,
)
=
build_convint8_bias_add_graph
(
hipdnn_handle
,
torch_tensor_x
,
torch_tensor_w
,
torch_tensor_zero_point_dq
,
torch_tensor_scale_dq
,
torch_tensor_bias
,
torch_tensor_add
,
torch_tensor_zero_point_dq_add
,
torch_tensor_scale_dq_add
,
torch_tensor_zero_point_q
,
torch_tensor_scale_q
,
[
pad_h
,
pad_w
],
[
stride_h
,
stride_w
],
[
dil_h
,
dil_w
],
hipdnn_data_type
,
)
torch_tensor_y
=
torch
.
empty
(
hipdnn_tensor_y
.
get_dim
(),
dtype
=
torch_data_type
,
device
=
"cuda"
,
)
variant_pack
=
{
hipdnn_tensor_x
:
torch_tensor_x
.
data_ptr
(),
hipdnn_tensor_w
:
torch_tensor_w
.
data_ptr
(),
hipdnn_tensor_bias
:
torch_tensor_bias
.
data_ptr
(),
hipdnn_tensor_add
:
torch_tensor_add
.
data_ptr
(),
hipdnn_tensor_y
:
torch_tensor_y
.
data_ptr
(),
}
workspace
=
torch
.
empty
(
graph
.
get_workspace_size
(),
dtype
=
torch
.
uint8
,
device
=
"cuda"
)
graph
.
exec
(
variant_pack
=
variant_pack
,
workspace
=
workspace
.
data_ptr
())
print
(
"convint8_bias_add graph execution complete."
)
python/conv_fusion/convint8_bias_add_relu.py
0 → 100644
View file @
ca34d4d2
import
hipdnn
import
torch
def
build_convint8_bias_add_relu_graph
(
hipdnn_handle
,
torch_tensor_x
,
torch_tensor_w
,
torch_tensor_zero_point_dq
,
torch_tensor_scale_dq
,
torch_tensor_bias
,
torch_tensor_add
,
torch_tensor_zero_point_dq_add
,
torch_tensor_scale_dq_add
,
torch_tensor_zero_point_q
,
torch_tensor_scale_q
,
padding
,
stride
,
dilation
,
hipdnn_data_type
,
):
# Create graph
graph
=
hipdnn
.
pygraph
(
handle
=
hipdnn_handle
,
io_data_type
=
hipdnn_data_type
,
intermediate_data_type
=
hipdnn_data_type
,
compute_data_type
=
hipdnn
.
data_type
.
FLOAT
,
name
=
"convint8_bias_add_relu"
,
)
# Create hipdnn conv input and filter tensor with NCHWc32 layout
hipdnn_tensor_x
=
graph
.
tensor_like
(
torch_tensor_x
).
set_vector_count_and_dimension
(
32
,
1
)
hipdnn_tensor_w
=
graph
.
tensor_like
(
torch_tensor_w
).
set_vector_count_and_dimension
(
32
,
1
)
# Create conv_fprop op
hipdnn_tensor_conv_output
=
graph
.
conv_fprop
(
image
=
hipdnn_tensor_x
,
weight
=
hipdnn_tensor_w
,
padding
=
padding
,
stride
=
stride
,
dilation
=
dilation
,
name
=
"conv_fprop_node"
,
)
# Create sub node for dequantize:zero_point_dq
hipdnn_tensor_zero_point_dq
=
graph
.
tensor_like
(
torch_tensor_zero_point_dq
)
hipdnn_tensor_zero_point_dq
.
set_value
(
0.0
)
hipdnn_tensor_conv_deq_sub_output
=
graph
.
sub
(
hipdnn_tensor_conv_output
,
hipdnn_tensor_zero_point_dq
,
name
=
"conv_deq_sub_node"
)
# Create mul node for dequantize:scale_dq
hipdnn_tensor_scale_dq
=
graph
.
tensor_like
(
torch_tensor_scale_dq
)
hipdnn_tensor_scale_dq
.
set_value
(
1.0
)
hipdnn_tensor_conv_deq_mul_output
=
graph
.
mul
(
hipdnn_tensor_conv_deq_sub_output
,
hipdnn_tensor_scale_dq
,
name
=
"conv_deq_mul_node"
)
# Create bias node
hipdnn_tensor_bias
=
graph
.
tensor_like
(
torch_tensor_bias
)
hipdnn_tensor_bias_output
=
graph
.
add
(
hipdnn_tensor_conv_deq_mul_output
,
hipdnn_tensor_bias
,
name
=
"bias_node"
)
# Cretae add original input(without dequantize)
hipdnn_tensor_add
=
graph
.
tensor_like
(
torch_tensor_add
)
hipdnn_tensor_add
.
set_vector_count_and_dimension
(
32
,
1
)
# Create sub node for dequantize:zero_point_dq_add
hipdnn_tensor_zero_point_dq_add
=
graph
.
tensor_like
(
torch_tensor_zero_point_dq_add
)
hipdnn_tensor_zero_point_dq_add
.
set_value
(
0.0
)
hipdnn_tensor_add_deq_sub_output
=
graph
.
sub
(
hipdnn_tensor_add
,
hipdnn_tensor_zero_point_dq_add
,
name
=
"add_deq_sub_node"
)
# Create mul node for dequantize:scale_dq_add
hipdnn_tensor_scale_dq_add
=
graph
.
tensor_like
(
torch_tensor_scale_dq_add
)
hipdnn_tensor_scale_dq_add
.
set_value
(
1.0
)
hipdnn_tensor_add_deq_mul_output
=
graph
.
mul
(
hipdnn_tensor_add_deq_sub_output
,
hipdnn_tensor_scale_dq_add
,
name
=
"add_deq_mul_node"
)
hipdnn_tensor_add_deq_mul_output
# Create add op
hipdnn_tensor_add_output
=
graph
.
add
(
a
=
hipdnn_tensor_bias_output
,
b
=
hipdnn_tensor_add_deq_mul_output
,
name
=
"add_node"
)
# Create relu node
hipdnn_tensor_relu_output
=
graph
.
relu
(
input
=
hipdnn_tensor_add_output
,
name
=
"relu_node"
)
# Create div node for quantize:scale_q
hipdnn_tensor_scale_q
=
graph
.
tensor_like
(
torch_tensor_scale_q
)
hipdnn_tensor_scale_q
.
set_value
(
1.0
)
hipdnn_tensor_quantize_div_output
=
graph
.
div
(
a
=
hipdnn_tensor_relu_output
,
b
=
hipdnn_tensor_scale_q
,
name
=
"quantize_div_node"
)
# Create add node for quantize:zero_point_q
hipdnn_tensor_zero_point_q
=
graph
.
tensor_like
(
torch_tensor_zero_point_q
)
hipdnn_tensor_zero_point_q
.
set_value
(
0.0
)
hipdnn_tensor_output
=
graph
.
add
(
hipdnn_tensor_quantize_div_output
,
hipdnn_tensor_zero_point_q
,
name
=
"quantize_add_node"
)
hipdnn_tensor_output
.
set_output
(
True
).
set_vector_count_and_dimension
(
32
,
1
)
graph
.
build
(
hipdnn_handle
)
return
(
graph
,
hipdnn_tensor_x
,
hipdnn_tensor_w
,
hipdnn_tensor_bias
,
hipdnn_tensor_add
,
hipdnn_tensor_output
,
)
if
__name__
==
"__main__"
:
# Input dimensions
n
=
2
# Batch size
c
=
32
# Number of input channels
h
=
16
# Height
w
=
8
# Width
# Filter dimensions
k
=
128
# Number of output channels
r
=
3
# Filter height
s
=
3
# Filter width
# Convolution parameters
stride_h
=
1
# Height stride
stride_w
=
1
# Width stride
pad_h
=
0
# Height padding
pad_w
=
0
# Width padding
dil_h
=
1
# Height dilation
dil_w
=
1
# Width dilation
out_h
=
int
(((
h
+
2
*
pad_h
-
(
dil_h
*
(
r
-
1
)
+
1
))
/
stride_h
)
+
1
)
out_w
=
int
(((
w
+
2
*
pad_w
-
(
dil_w
*
(
s
-
1
)
+
1
))
/
stride_w
)
+
1
)
hipdnn_data_type
=
hipdnn
.
data_type
.
INT8
torch_data_type
=
torch
.
int8
bias_data_type
=
torch
.
float32
quantize_data_type
=
torch
.
float32
torch_tensor_x
=
torch
.
randint
(
low
=-
128
,
high
=
128
,
size
=
(
n
,
c
,
h
,
w
),
dtype
=
torch_data_type
,
device
=
"cuda"
,
)
torch_tensor_w
=
torch
.
randint
(
low
=-
128
,
high
=
128
,
size
=
(
k
,
c
,
r
,
s
),
dtype
=
torch_data_type
,
device
=
"cuda"
)
torch_tensor_zero_point_dq
=
torch
.
rand
(
1
,
1
,
1
,
1
,
device
=
"cuda"
)
torch_tensor_scale_dq
=
torch
.
rand
(
1
,
1
,
1
,
1
,
device
=
"cuda"
)
torch_tensor_bias
=
torch
.
rand
(
1
,
k
,
1
,
1
,
dtype
=
bias_data_type
,
device
=
"cuda"
)
torch_tensor_add
=
torch
.
randint
(
low
=-
128
,
high
=
128
,
size
=
(
n
,
k
,
out_h
,
out_w
),
dtype
=
torch_data_type
,
device
=
"cuda"
)
torch_tensor_zero_point_dq_add
=
torch
.
rand
(
1
,
1
,
1
,
1
,
device
=
"cuda"
)
torch_tensor_scale_dq_add
=
torch
.
rand
(
1
,
1
,
1
,
1
,
device
=
"cuda"
)
torch_tensor_zero_point_q
=
torch
.
rand
(
1
,
1
,
1
,
1
,
device
=
"cuda"
)
torch_tensor_scale_q
=
torch
.
rand
(
1
,
1
,
1
,
1
,
device
=
"cuda"
)
hipdnn_handle
=
hipdnn
.
create_handle
()
(
graph
,
hipdnn_tensor_x
,
hipdnn_tensor_w
,
hipdnn_tensor_bias
,
hipdnn_tensor_add
,
hipdnn_tensor_y
,
)
=
build_convint8_bias_add_relu_graph
(
hipdnn_handle
,
torch_tensor_x
,
torch_tensor_w
,
torch_tensor_zero_point_dq
,
torch_tensor_scale_dq
,
torch_tensor_bias
,
torch_tensor_add
,
torch_tensor_zero_point_dq_add
,
torch_tensor_scale_dq_add
,
torch_tensor_zero_point_q
,
torch_tensor_scale_q
,
[
pad_h
,
pad_w
],
[
stride_h
,
stride_w
],
[
dil_h
,
dil_w
],
hipdnn_data_type
,
)
torch_tensor_y
=
torch
.
empty
(
hipdnn_tensor_y
.
get_dim
(),
dtype
=
torch_data_type
,
device
=
"cuda"
,
)
variant_pack
=
{
hipdnn_tensor_x
:
torch_tensor_x
.
data_ptr
(),
hipdnn_tensor_w
:
torch_tensor_w
.
data_ptr
(),
hipdnn_tensor_bias
:
torch_tensor_bias
.
data_ptr
(),
hipdnn_tensor_add
:
torch_tensor_add
.
data_ptr
(),
hipdnn_tensor_y
:
torch_tensor_y
.
data_ptr
(),
}
workspace
=
torch
.
empty
(
graph
.
get_workspace_size
(),
dtype
=
torch
.
uint8
,
device
=
"cuda"
)
graph
.
exec
(
variant_pack
=
variant_pack
,
workspace
=
workspace
.
data_ptr
())
print
(
"convint8_bias_add_relu graph execution complete."
)
python/conv_fusion/convint8_bias_relu.py
0 → 100644
View file @
ca34d4d2
import
hipdnn
import
torch
def
build_convint8_bias_relu_graph
(
hipdnn_handle
,
torch_tensor_x
,
torch_tensor_w
,
torch_tensor_zero_point_dq
,
torch_tensor_scale_dq
,
torch_tensor_bias
,
torch_tensor_zero_point_q
,
torch_tensor_scale_q
,
padding
,
stride
,
dilation
,
hipdnn_data_type
,
):
# Create graph
graph
=
hipdnn
.
pygraph
(
handle
=
hipdnn_handle
,
io_data_type
=
hipdnn_data_type
,
intermediate_data_type
=
hipdnn_data_type
,
compute_data_type
=
hipdnn
.
data_type
.
FLOAT
,
name
=
"convint8_bias_relu"
,
)
# Create hipdnn conv input and filter tensor with NCHWc32 layout
hipdnn_tensor_x
=
graph
.
tensor_like
(
torch_tensor_x
).
set_vector_count_and_dimension
(
32
,
1
)
hipdnn_tensor_w
=
graph
.
tensor_like
(
torch_tensor_w
).
set_vector_count_and_dimension
(
32
,
1
)
# Create conv_fprop op
hipdnn_tensor_conv_output
=
graph
.
conv_fprop
(
image
=
hipdnn_tensor_x
,
weight
=
hipdnn_tensor_w
,
padding
=
padding
,
stride
=
stride
,
dilation
=
dilation
,
name
=
"conv_fprop_node"
,
)
# Create sub node for dequantize:zero_point_dq
hipdnn_tensor_zero_point_dq
=
graph
.
tensor_like
(
torch_tensor_zero_point_dq
)
hipdnn_tensor_zero_point_dq
.
set_value
(
0.0
)
hipdnn_tensor_conv_deq_sub_output
=
graph
.
sub
(
a
=
hipdnn_tensor_conv_output
,
b
=
hipdnn_tensor_zero_point_dq
,
name
=
"conv_deq_sub_node"
)
# Create mul node for dequantize:scale_dq
hipdnn_tensor_scale_dq
=
graph
.
tensor_like
(
torch_tensor_scale_dq
)
hipdnn_tensor_scale_dq
.
set_value
(
1.0
)
hipdnn_tensor_conv_deq_mul_output
=
graph
.
mul
(
a
=
hipdnn_tensor_conv_deq_sub_output
,
b
=
hipdnn_tensor_scale_dq
,
name
=
"conv_deq_mul_node"
)
# Create bias node
hipdnn_tensor_bias
=
graph
.
tensor_like
(
torch_tensor_bias
)
hipdnn_tensor_bias_output
=
graph
.
add
(
a
=
hipdnn_tensor_conv_deq_mul_output
,
b
=
hipdnn_tensor_bias
,
name
=
"bias_node"
)
# Create relu node
hipdnn_tensor_relu_output
=
graph
.
relu
(
input
=
hipdnn_tensor_bias_output
,
name
=
"relu_node"
)
# Create div node for quantize:scale_q
hipdnn_tensor_scale_q
=
graph
.
tensor_like
(
torch_tensor_scale_q
)
hipdnn_tensor_scale_q
.
set_value
(
1.0
)
hipdnn_tensor_quantize_div_output
=
graph
.
div
(
a
=
hipdnn_tensor_relu_output
,
b
=
hipdnn_tensor_scale_q
,
name
=
"quantize_div_node"
)
# Create add node for quantize:zero_point_q
hipdnn_tensor_zero_point_q
=
graph
.
tensor_like
(
torch_tensor_zero_point_q
)
hipdnn_tensor_zero_point_q
.
set_value
(
0.0
)
hipdnn_tensor_output
=
graph
.
add
(
a
=
hipdnn_tensor_quantize_div_output
,
b
=
hipdnn_tensor_zero_point_q
,
name
=
"quantize_add_node"
)
hipdnn_tensor_output
.
set_output
(
True
).
set_vector_count_and_dimension
(
32
,
1
)
graph
.
build
(
hipdnn_handle
)
return
(
graph
,
hipdnn_tensor_x
,
hipdnn_tensor_w
,
hipdnn_tensor_bias
,
hipdnn_tensor_output
,
)
if
__name__
==
"__main__"
:
# Input dimensions
n
=
2
# Batch size
c
=
32
# Number of input channels
h
=
16
# Height
w
=
8
# Width
# Filter dimensions
k
=
128
# Number of output channels
r
=
3
# Filter height
s
=
3
# Filter width
# Convolution parameters
stride_h
=
1
# Height stride
stride_w
=
1
# Width stride
pad_h
=
0
# Height padding
pad_w
=
0
# Width padding
dil_h
=
1
# Height dilation
dil_w
=
1
# Width dilation
hipdnn_data_type
=
hipdnn
.
data_type
.
INT8
torch_data_type
=
torch
.
int8
bias_data_type
=
torch
.
float32
quantize_data_type
=
torch
.
float32
torch_tensor_x
=
torch
.
randint
(
low
=-
128
,
high
=
128
,
size
=
(
n
,
c
,
h
,
w
),
dtype
=
torch_data_type
,
device
=
"cuda"
,
)
torch_tensor_w
=
torch
.
randint
(
low
=-
128
,
high
=
128
,
size
=
(
k
,
c
,
r
,
s
),
dtype
=
torch_data_type
,
device
=
"cuda"
)
torch_tensor_zero_point_dq
=
torch
.
rand
(
1
,
1
,
1
,
1
,
device
=
"cuda"
)
torch_tensor_scale_dq
=
torch
.
rand
(
1
,
1
,
1
,
1
,
device
=
"cuda"
)
torch_tensor_bias
=
torch
.
rand
(
1
,
k
,
1
,
1
,
dtype
=
bias_data_type
,
device
=
"cuda"
)
torch_tensor_zero_point_q
=
torch
.
rand
(
1
,
1
,
1
,
1
,
device
=
"cuda"
)
torch_tensor_scale_q
=
torch
.
rand
(
1
,
1
,
1
,
1
,
device
=
"cuda"
)
hipdnn_handle
=
hipdnn
.
create_handle
()
(
graph
,
hipdnn_tensor_x
,
hipdnn_tensor_w
,
hipdnn_tensor_bias
,
hipdnn_tensor_y
,
)
=
build_convint8_bias_relu_graph
(
hipdnn_handle
,
torch_tensor_x
,
torch_tensor_w
,
torch_tensor_zero_point_dq
,
torch_tensor_scale_dq
,
torch_tensor_bias
,
torch_tensor_zero_point_q
,
torch_tensor_scale_q
,
[
pad_h
,
pad_w
],
[
stride_h
,
stride_w
],
[
dil_h
,
dil_w
],
hipdnn_data_type
,
)
torch_tensor_y
=
torch
.
empty
(
hipdnn_tensor_y
.
get_dim
(),
dtype
=
torch_data_type
,
device
=
"cuda"
,
)
variant_pack
=
{
hipdnn_tensor_x
:
torch_tensor_x
.
data_ptr
(),
hipdnn_tensor_w
:
torch_tensor_w
.
data_ptr
(),
hipdnn_tensor_bias
:
torch_tensor_bias
.
data_ptr
(),
hipdnn_tensor_y
:
torch_tensor_y
.
data_ptr
(),
}
workspace
=
torch
.
empty
(
graph
.
get_workspace_size
(),
dtype
=
torch
.
uint8
,
device
=
"cuda"
)
graph
.
exec
(
variant_pack
=
variant_pack
,
workspace
=
workspace
.
data_ptr
())
print
(
"convint8_bias_relu graph execution complete."
)
python/conv_fusion/convint8_bias_relu_add.py
0 → 100644
View file @
ca34d4d2
import
hipdnn
import
torch
def
build_convint8_bias_relu_add_graph
(
hipdnn_handle
,
torch_tensor_x
,
torch_tensor_w
,
torch_tensor_zero_point_dq
,
torch_tensor_scale_dq
,
torch_tensor_bias
,
torch_tensor_add
,
torch_tensor_zero_point_dq_add
,
torch_tensor_scale_dq_add
,
torch_tensor_zero_point_q
,
torch_tensor_scale_q
,
padding
,
stride
,
dilation
,
hipdnn_data_type
,
):
# Create graph
graph
=
hipdnn
.
pygraph
(
handle
=
hipdnn_handle
,
io_data_type
=
hipdnn_data_type
,
intermediate_data_type
=
hipdnn_data_type
,
compute_data_type
=
hipdnn
.
data_type
.
FLOAT
,
name
=
"convint8_bias_relu_add"
,
)
# Create hipdnn conv input and filter tensor with NCHWc32 layout
hipdnn_tensor_x
=
graph
.
tensor_like
(
torch_tensor_x
).
set_vector_count_and_dimension
(
32
,
1
)
hipdnn_tensor_w
=
graph
.
tensor_like
(
torch_tensor_w
).
set_vector_count_and_dimension
(
32
,
1
)
# Create conv_fprop op
hipdnn_tensor_conv_output
=
graph
.
conv_fprop
(
image
=
hipdnn_tensor_x
,
weight
=
hipdnn_tensor_w
,
padding
=
padding
,
stride
=
stride
,
dilation
=
dilation
,
name
=
"conv_fprop_node"
,
)
# Create sub node for dequantize:zero_point_dq
hipdnn_tensor_zero_point_dq
=
graph
.
tensor_like
(
torch_tensor_zero_point_dq
)
hipdnn_tensor_zero_point_dq
.
set_value
(
0.0
)
hipdnn_tensor_conv_deq_sub_output
=
graph
.
sub
(
hipdnn_tensor_conv_output
,
hipdnn_tensor_zero_point_dq
,
name
=
"conv_deq_sub_node"
)
# Create mul node for dequantize:scale_dq
hipdnn_tensor_scale_dq
=
graph
.
tensor_like
(
torch_tensor_scale_dq
)
hipdnn_tensor_scale_dq
.
set_value
(
1.0
)
hipdnn_tensor_conv_deq_mul_output
=
graph
.
mul
(
hipdnn_tensor_conv_deq_sub_output
,
hipdnn_tensor_scale_dq
,
name
=
"conv_deq_mul_node"
)
# Create bias node
hipdnn_tensor_bias
=
graph
.
tensor_like
(
torch_tensor_bias
)
hipdnn_tensor_bias_output
=
graph
.
add
(
hipdnn_tensor_conv_deq_mul_output
,
hipdnn_tensor_bias
,
name
=
"bias_node"
)
# Create relu node
hipdnn_tensor_relu_output
=
graph
.
relu
(
input
=
hipdnn_tensor_bias_output
,
name
=
"relu_node"
)
# Cretae add original input(without dequantize)
hipdnn_tensor_add
=
graph
.
tensor_like
(
torch_tensor_add
)
hipdnn_tensor_add
.
set_vector_count_and_dimension
(
32
,
1
)
# Create sub node for dequantize:zero_point_dq_add
hipdnn_tensor_zero_point_dq_add
=
graph
.
tensor_like
(
torch_tensor_zero_point_dq_add
)
hipdnn_tensor_zero_point_dq_add
.
set_value
(
0.0
)
hipdnn_tensor_add_deq_sub_output
=
graph
.
sub
(
hipdnn_tensor_add
,
hipdnn_tensor_zero_point_dq_add
,
name
=
"add_deq_sub_node"
)
# Create mul node for dequantize:scale_dq_add
hipdnn_tensor_scale_dq_add
=
graph
.
tensor_like
(
torch_tensor_scale_dq_add
)
hipdnn_tensor_scale_dq_add
.
set_value
(
1.0
)
hipdnn_tensor_add_deq_mul_output
=
graph
.
mul
(
hipdnn_tensor_add_deq_sub_output
,
hipdnn_tensor_scale_dq_add
,
name
=
"add_deq_mul_node"
)
hipdnn_tensor_add_deq_mul_output
# Create add op
hipdnn_tensor_add_output
=
graph
.
add
(
a
=
hipdnn_tensor_relu_output
,
b
=
hipdnn_tensor_add_deq_mul_output
,
name
=
"add_node"
)
# Create div node for quantize:scale_q
hipdnn_tensor_scale_q
=
graph
.
tensor_like
(
torch_tensor_scale_q
)
hipdnn_tensor_scale_q
.
set_value
(
1.0
)
hipdnn_tensor_quantize_div_output
=
graph
.
div
(
a
=
hipdnn_tensor_add_output
,
b
=
hipdnn_tensor_scale_q
,
name
=
"quantize_div_node"
)
# Create add node for quantize:zero_point_q
hipdnn_tensor_zero_point_q
=
graph
.
tensor_like
(
torch_tensor_zero_point_q
)
hipdnn_tensor_zero_point_q
.
set_value
(
0.0
)
hipdnn_tensor_output
=
graph
.
add
(
hipdnn_tensor_quantize_div_output
,
hipdnn_tensor_zero_point_q
,
name
=
"quantize_add_node"
)
hipdnn_tensor_output
.
set_output
(
True
).
set_vector_count_and_dimension
(
32
,
1
)
graph
.
build
(
hipdnn_handle
)
return
(
graph
,
hipdnn_tensor_x
,
hipdnn_tensor_w
,
hipdnn_tensor_bias
,
hipdnn_tensor_add
,
hipdnn_tensor_output
,
)
if
__name__
==
"__main__"
:
# Input dimensions
n
=
2
# Batch size
c
=
32
# Number of input channels
h
=
16
# Height
w
=
8
# Width
# Filter dimensions
k
=
128
# Number of output channels
r
=
3
# Filter height
s
=
3
# Filter width
# Convolution parameters
stride_h
=
1
# Height stride
stride_w
=
1
# Width stride
pad_h
=
0
# Height padding
pad_w
=
0
# Width padding
dil_h
=
1
# Height dilation
dil_w
=
1
# Width dilation
out_h
=
int
(((
h
+
2
*
pad_h
-
(
dil_h
*
(
r
-
1
)
+
1
))
/
stride_h
)
+
1
)
out_w
=
int
(((
w
+
2
*
pad_w
-
(
dil_w
*
(
s
-
1
)
+
1
))
/
stride_w
)
+
1
)
hipdnn_data_type
=
hipdnn
.
data_type
.
INT8
torch_data_type
=
torch
.
int8
bias_data_type
=
torch
.
float32
quantize_data_type
=
torch
.
float32
torch_tensor_x
=
torch
.
randint
(
low
=-
128
,
high
=
128
,
size
=
(
n
,
c
,
h
,
w
),
dtype
=
torch_data_type
,
device
=
"cuda"
,
)
torch_tensor_w
=
torch
.
randint
(
low
=-
128
,
high
=
128
,
size
=
(
k
,
c
,
r
,
s
),
dtype
=
torch_data_type
,
device
=
"cuda"
)
torch_tensor_zero_point_dq
=
torch
.
rand
(
1
,
1
,
1
,
1
,
device
=
"cuda"
)
torch_tensor_scale_dq
=
torch
.
rand
(
1
,
1
,
1
,
1
,
device
=
"cuda"
)
torch_tensor_bias
=
torch
.
rand
(
1
,
k
,
1
,
1
,
dtype
=
bias_data_type
,
device
=
"cuda"
)
torch_tensor_add
=
torch
.
randint
(
low
=-
128
,
high
=
128
,
size
=
(
n
,
k
,
out_h
,
out_w
),
dtype
=
torch_data_type
,
device
=
"cuda"
)
torch_tensor_zero_point_dq_add
=
torch
.
rand
(
1
,
1
,
1
,
1
,
device
=
"cuda"
)
torch_tensor_scale_dq_add
=
torch
.
rand
(
1
,
1
,
1
,
1
,
device
=
"cuda"
)
torch_tensor_zero_point_q
=
torch
.
rand
(
1
,
1
,
1
,
1
,
device
=
"cuda"
)
torch_tensor_scale_q
=
torch
.
rand
(
1
,
1
,
1
,
1
,
device
=
"cuda"
)
hipdnn_handle
=
hipdnn
.
create_handle
()
(
graph
,
hipdnn_tensor_x
,
hipdnn_tensor_w
,
hipdnn_tensor_bias
,
hipdnn_tensor_add
,
hipdnn_tensor_y
,
)
=
build_convint8_bias_relu_add_graph
(
hipdnn_handle
,
torch_tensor_x
,
torch_tensor_w
,
torch_tensor_zero_point_dq
,
torch_tensor_scale_dq
,
torch_tensor_bias
,
torch_tensor_add
,
torch_tensor_zero_point_dq_add
,
torch_tensor_scale_dq_add
,
torch_tensor_zero_point_q
,
torch_tensor_scale_q
,
[
pad_h
,
pad_w
],
[
stride_h
,
stride_w
],
[
dil_h
,
dil_w
],
hipdnn_data_type
,
)
torch_tensor_y
=
torch
.
empty
(
hipdnn_tensor_y
.
get_dim
(),
dtype
=
torch_data_type
,
device
=
"cuda"
,
)
variant_pack
=
{
hipdnn_tensor_x
:
torch_tensor_x
.
data_ptr
(),
hipdnn_tensor_w
:
torch_tensor_w
.
data_ptr
(),
hipdnn_tensor_bias
:
torch_tensor_bias
.
data_ptr
(),
hipdnn_tensor_add
:
torch_tensor_add
.
data_ptr
(),
hipdnn_tensor_y
:
torch_tensor_y
.
data_ptr
(),
}
workspace
=
torch
.
empty
(
graph
.
get_workspace_size
(),
dtype
=
torch
.
uint8
,
device
=
"cuda"
)
graph
.
exec
(
variant_pack
=
variant_pack
,
workspace
=
workspace
.
data_ptr
())
print
(
"convint8_bias_relu_add graph execution complete."
)
python/convolution/convolution_bwd.py
0 → 100644
View file @
ca34d4d2
import
hipdnn
import
torch
def
build_conv_backward_graph
(
hipdnn_handle
,
torch_tensor_dy
,
torch_tensor_w
,
padding
,
stride
,
dilation
,
hipdnn_data_type
):
# Create graph
graph
=
hipdnn
.
pygraph
(
handle
=
hipdnn_handle
,
io_data_type
=
hipdnn_data_type
,
intermediate_data_type
=
hipdnn
.
data_type
.
FLOAT
,
compute_data_type
=
hipdnn
.
data_type
.
FLOAT
,
name
=
"convolution_backward"
,
)
# Create hipdnn tensors
hipdnn_tensor_dy
=
graph
.
tensor_like
(
torch_tensor_dy
)
hipdnn_tensor_w
=
graph
.
tensor_like
(
torch_tensor_w
)
# Create conv op
hipdnn_tensor_dx
=
graph
.
conv_dgrad
(
loss
=
hipdnn_tensor_dy
,
filter
=
hipdnn_tensor_w
,
padding
=
padding
,
stride
=
stride
,
dilation
=
dilation
,
name
=
"conv2d_backward"
,
)
hipdnn_tensor_dx
.
set_output
(
True
)
graph
.
build
(
hipdnn_handle
)
return
(
graph
,
hipdnn_tensor_dy
,
hipdnn_tensor_w
,
hipdnn_tensor_dx
)
if
__name__
==
"__main__"
:
# Input dimensions
n
=
4
# Batch size
c
=
32
# Number of input channels
h
=
16
# Height
w
=
16
# Width
# Filter dimensions
k
=
64
# Number of output channels
r
=
3
# Filter height
s
=
3
# Filter width
# Convolution parameters
stride_h
=
1
# Height stride
stride_w
=
1
# Width stride
pad_h
=
1
# Height padding
pad_w
=
1
# Width padding
dil_h
=
1
# Height dilation
dil_w
=
1
# Width dilation
hipdnn_data_type
=
hipdnn
.
data_type
.
FLOAT
torch_data_type
=
torch
.
float32
torch_tensor_dy
=
torch
.
rand
(
n
,
k
,
h
,
w
,
dtype
=
torch_data_type
,
device
=
"cuda"
)
torch_tensor_w
=
torch
.
rand
(
k
,
c
,
r
,
s
,
dtype
=
torch_data_type
,
device
=
"cuda"
)
hipdnn_handle
=
hipdnn
.
create_handle
()
graph
,
hipdnn_tensor_dy
,
hipdnn_tensor_w
,
hipdnn_tensor_dx
=
build_conv_backward_graph
(
hipdnn_handle
,
torch_tensor_dy
,
torch_tensor_w
,
[
pad_h
,
pad_w
],
[
stride_h
,
stride_w
],
[
dil_h
,
dil_w
],
hipdnn_data_type
,
)
torch_tensor_dx
=
torch
.
empty
(
hipdnn_tensor_dx
.
get_dim
(),
dtype
=
torch_data_type
,
device
=
"cuda"
)
variant_pack
=
{
hipdnn_tensor_dy
:
torch_tensor_dy
.
data_ptr
(),
hipdnn_tensor_w
:
torch_tensor_w
.
data_ptr
(),
hipdnn_tensor_dx
:
torch_tensor_dx
.
data_ptr
(),
}
workspace
=
torch
.
empty
(
graph
.
get_workspace_size
(),
dtype
=
torch
.
uint8
,
device
=
"cuda"
)
graph
.
exec
(
variant_pack
=
variant_pack
,
workspace
=
workspace
.
data_ptr
())
print
(
"Convolution backward graph execution complete."
)
python/convolution/convolution_fwd.py
0 → 100644
View file @
ca34d4d2
import
hipdnn
import
torch
def
build_conv_forward_graph
(
hipdnn_handle
,
torch_tensor_x
,
torch_tensor_w
,
padding
,
stride
,
dilation
,
hipdnn_data_type
):
# Create graph
graph
=
hipdnn
.
pygraph
(
handle
=
hipdnn_handle
,
io_data_type
=
hipdnn_data_type
,
intermediate_data_type
=
hipdnn
.
data_type
.
FLOAT
,
compute_data_type
=
hipdnn
.
data_type
.
FLOAT
,
name
=
"convolution_forward"
,
)
# Create hipdnn tensors
hipdnn_tensor_x
=
graph
.
tensor_like
(
torch_tensor_x
)
hipdnn_tensor_w
=
graph
.
tensor_like
(
torch_tensor_w
)
# Create conv op
hipdnn_tensor_y
=
graph
.
conv_fprop
(
image
=
hipdnn_tensor_x
,
weight
=
hipdnn_tensor_w
,
padding
=
padding
,
stride
=
stride
,
dilation
=
dilation
,
name
=
"conv2d_forward"
,
)
hipdnn_tensor_y
.
set_output
(
True
)
graph
.
build
(
hipdnn_handle
)
return
(
graph
,
hipdnn_tensor_x
,
hipdnn_tensor_w
,
hipdnn_tensor_y
)
if
__name__
==
"__main__"
:
# Input dimensions
n
=
4
# Batch size
c
=
16
# Number of input channels
h
=
56
# Height
w
=
56
# Width
# Filter dimensions
k
=
4
# Number of output channels
r
=
1
# Filter height
s
=
1
# Filter width
# Convolution parameters
stride_h
=
1
# Height stride
stride_w
=
1
# Width stride
pad_h
=
1
# Height padding
pad_w
=
1
# Width padding
dil_h
=
1
# Height dilation
dil_w
=
1
# Width dilation
hipdnn_data_type
=
hipdnn
.
data_type
.
FLOAT
torch_data_type
=
torch
.
float32
torch_tensor_x
=
torch
.
rand
(
n
,
c
,
h
,
w
,
dtype
=
torch_data_type
,
device
=
"cuda"
)
torch_tensor_w
=
torch
.
rand
(
k
,
c
,
r
,
s
,
dtype
=
torch_data_type
,
device
=
"cuda"
)
hipdnn_handle
=
hipdnn
.
create_handle
()
graph
,
hipdnn_tensor_x
,
hipdnn_tensor_w
,
hipdnn_tensor_y
=
build_conv_forward_graph
(
hipdnn_handle
,
torch_tensor_x
,
torch_tensor_w
,
[
pad_h
,
pad_w
],
[
stride_h
,
stride_w
],
[
dil_h
,
dil_w
],
hipdnn_data_type
,
)
torch_tensor_y
=
torch
.
empty
(
hipdnn_tensor_y
.
get_dim
(),
dtype
=
torch_data_type
,
device
=
"cuda"
)
variant_pack
=
{
hipdnn_tensor_x
:
torch_tensor_x
.
data_ptr
(),
hipdnn_tensor_w
:
torch_tensor_w
.
data_ptr
(),
hipdnn_tensor_y
:
torch_tensor_y
.
data_ptr
(),
}
workspace
=
torch
.
empty
(
graph
.
get_workspace_size
(),
dtype
=
torch
.
uint8
,
device
=
"cuda"
)
graph
.
exec
(
variant_pack
=
variant_pack
,
workspace
=
workspace
.
data_ptr
())
print
(
"Convolution forward graph execution complete."
)
python/convolution/convolution_wrw.py
0 → 100644
View file @
ca34d4d2
import
hipdnn
import
torch
def
build_conv_wrw_graph
(
hipdnn_handle
,
torch_tensor_x
,
torch_tensor_dy
,
padding
,
stride
,
dilation
,
hipdnn_data_type
):
# Create graph
graph
=
hipdnn
.
pygraph
(
handle
=
hipdnn_handle
,
io_data_type
=
hipdnn_data_type
,
intermediate_data_type
=
hipdnn
.
data_type
.
FLOAT
,
compute_data_type
=
hipdnn
.
data_type
.
FLOAT
,
name
=
"convolution_wrw"
,
)
# Create hipdnn tensors
hipdnn_tensor_x
=
graph
.
tensor_like
(
torch_tensor_x
)
hipdnn_tensor_dy
=
graph
.
tensor_like
(
torch_tensor_dy
)
# Create conv op
hipdnn_tensor_dw
=
graph
.
conv_wgrad
(
image
=
hipdnn_tensor_x
,
loss
=
hipdnn_tensor_dy
,
padding
=
padding
,
stride
=
stride
,
dilation
=
dilation
,
name
=
"conv2d_wrw"
,
)
hipdnn_tensor_dw
.
set_output
(
True
)
graph
.
build
(
hipdnn_handle
)
return
(
graph
,
hipdnn_tensor_x
,
hipdnn_tensor_dy
,
hipdnn_tensor_dw
)
if
__name__
==
"__main__"
:
# Input dimensions
n
=
4
# Batch size
c
=
32
# Number of input channels
h
=
16
# Height
w
=
16
# Width
# Filter dimensions
k
=
64
# Number of output channels
r
=
3
# Filter height
s
=
3
# Filter width
# Convolution parameters
stride_h
=
1
# Height stride
stride_w
=
1
# Width stride
pad_h
=
1
# Height padding
pad_w
=
1
# Width padding
dil_h
=
1
# Height dilation
dil_w
=
1
# Width dilation
hipdnn_data_type
=
hipdnn
.
data_type
.
FLOAT
torch_data_type
=
torch
.
float32
torch_tensor_x
=
torch
.
rand
(
n
,
c
,
h
,
w
,
dtype
=
torch_data_type
,
device
=
"cuda"
)
torch_tensor_dy
=
torch
.
rand
(
n
,
k
,
h
,
w
,
dtype
=
torch_data_type
,
device
=
"cuda"
)
hipdnn_handle
=
hipdnn
.
create_handle
()
graph
,
hipdnn_tensor_x
,
hipdnn_tensor_dy
,
hipdnn_tensor_dw
=
build_conv_wrw_graph
(
hipdnn_handle
,
torch_tensor_x
,
torch_tensor_dy
,
[
pad_h
,
pad_w
],
[
stride_h
,
stride_w
],
[
dil_h
,
dil_w
],
hipdnn_data_type
,
)
torch_tensor_dw
=
torch
.
empty
(
hipdnn_tensor_dw
.
get_dim
(),
dtype
=
torch_data_type
,
device
=
"cuda"
)
variant_pack
=
{
hipdnn_tensor_x
:
torch_tensor_x
.
data_ptr
(),
hipdnn_tensor_dy
:
torch_tensor_dy
.
data_ptr
(),
hipdnn_tensor_dw
:
torch_tensor_dw
.
data_ptr
(),
}
workspace
=
torch
.
empty
(
graph
.
get_workspace_size
(),
dtype
=
torch
.
uint8
,
device
=
"cuda"
)
graph
.
exec
(
variant_pack
=
variant_pack
,
workspace
=
workspace
.
data_ptr
())
print
(
"Convolution wrw graph execution complete."
)
python/ctc_loss/ctc_loss.py
0 → 100644
View file @
ca34d4d2
import
hipdnn
import
torch
def
build_ctc_loss_graph
(
hipdnn_handle
,
torch_tensor_probs
,
hipdnn_data_type
):
graph
=
hipdnn
.
pygraph
(
handle
=
hipdnn_handle
,
io_data_type
=
hipdnn_data_type
,
intermediate_data_type
=
hipdnn
.
data_type
.
FLOAT
,
compute_data_type
=
hipdnn
.
data_type
.
FLOAT
,
name
=
"ctc_loss_inference"
,
)
hipdnn_tensor_probs
=
graph
.
tensor_like
(
torch_tensor_probs
)
losses
,
gradients
=
graph
.
ctc_loss
(
probs
=
hipdnn_tensor_probs
,
blank_label_id
=
0
,
apply_softmax
=
False
,
algo
=
0
,
labels
=
[
1
,
2
,
3
,
4
,
2
,
3
,
2
],
label_lengths
=
[
1
,
2
,
1
,
3
],
input_lengths
=
[
4
,
100
,
100
,
200
],
name
=
"ctc_loss"
,
)
losses
.
set_output
(
True
)
gradients
.
set_output
(
True
)
graph
.
build
(
hipdnn_handle
)
return
(
graph
,
hipdnn_tensor_probs
,
losses
,
gradients
)
if
__name__
==
"__main__"
:
batch
,
max_time
,
num_classes
=
4
,
500
,
5
hipdnn_data_type
=
hipdnn
.
data_type
.
FLOAT
torch_data_type
=
torch
.
float32
torch_tensor_probs
=
torch
.
rand
(
max_time
,
batch
,
num_classes
,
dtype
=
torch_data_type
,
device
=
"cuda"
)
hipdnn_handle
=
hipdnn
.
create_handle
()
graph
,
hipdnn_tensor_probs
,
hipdnn_tensor_losses
,
hipdnn_tensor_gradients
=
(
build_ctc_loss_graph
(
hipdnn_handle
,
torch_tensor_probs
,
hipdnn_data_type
)
)
torch_tensor_losses
=
torch
.
empty
(
batch
,
dtype
=
torch_data_type
,
device
=
"cuda"
)
torch_tensor_gradients
=
torch
.
empty
(
batch
,
max_time
,
num_classes
,
dtype
=
torch_data_type
,
device
=
"cuda"
)
variant_pack
=
{
hipdnn_tensor_probs
:
torch_tensor_probs
.
data_ptr
(),
hipdnn_tensor_losses
:
torch_tensor_losses
.
data_ptr
(),
hipdnn_tensor_gradients
:
torch_tensor_gradients
.
data_ptr
(),
}
workspace
=
torch
.
empty
(
graph
.
get_workspace_size
(),
dtype
=
torch
.
uint8
,
device
=
"cuda"
)
graph
.
exec
(
variant_pack
=
variant_pack
,
workspace
=
workspace
.
data_ptr
())
print
(
"ctc_loss graph execution complete."
)
python/deformattention/deform_attention.py
0 → 100644
View file @
ca34d4d2
import
hipdnn
import
torch
def
build_deform_attention_graph
(
hipdnn_handle
,
torch_tensor_value
,
torch_tensor_spatial_shapes
,
torch_tensor_level_start_index
,
torch_tensor_sampling_locations
,
torch_tensor_attention_weights
,
hipdnn_data_type
,
):
# Create graph
graph
=
hipdnn
.
pygraph
(
handle
=
hipdnn_handle
,
io_data_type
=
hipdnn_data_type
,
intermediate_data_type
=
hipdnn
.
data_type
.
FLOAT
,
compute_data_type
=
hipdnn
.
data_type
.
FLOAT
,
name
=
"deform_attention"
,
)
# Create hipdnn tensors
hipdnn_tensor_value
=
graph
.
tensor_like
(
torch_tensor_value
)
hipdnn_tensor_spatial_shapes
=
graph
.
tensor_like
(
torch_tensor_spatial_shapes
)
hipdnn_tensor_level_start_index
=
graph
.
tensor_like
(
torch_tensor_level_start_index
)
hipdnn_tensor_sampling_locations
=
graph
.
tensor_like
(
torch_tensor_sampling_locations
)
hipdnn_tensor_attention_weights
=
graph
.
tensor_like
(
torch_tensor_attention_weights
)
# Create deform attn op
hipdnn_tensor_y
=
graph
.
deform_attn_fprop
(
value
=
hipdnn_tensor_value
,
spatial_shapes
=
hipdnn_tensor_spatial_shapes
,
level_start_index
=
hipdnn_tensor_level_start_index
,
sampling_locations
=
hipdnn_tensor_sampling_locations
,
attention_weights
=
hipdnn_tensor_attention_weights
,
name
=
"deform_attn_fprop"
,
)
hipdnn_tensor_y
.
set_output
(
True
)
graph
.
build
(
hipdnn_handle
)
return
(
graph
,
hipdnn_tensor_value
,
hipdnn_tensor_spatial_shapes
,
hipdnn_tensor_level_start_index
,
hipdnn_tensor_sampling_locations
,
hipdnn_tensor_attention_weights
,
hipdnn_tensor_y
,
)
if
__name__
==
"__main__"
:
# Input dimensions
n
=
2
# batch size
n_heads
=
2
embed_dims_per_head
=
32
embed_dims
=
n_heads
*
embed_dims_per_head
n_levels
=
2
n_points
=
2
n_queries
=
32
spatial_shapes_cpu
=
torch
.
randint
(
low
=
1
,
high
=
16
,
size
=
(
n_levels
,
2
),
dtype
=
torch
.
int64
)
# calculate n_keys based on spatial_shapes_cpu
n_keys
=
spatial_shapes_cpu
.
prod
(
dim
=
1
).
sum
()
# calculate level_start_index based on spatial_shapes_cpu
count_per_level
=
spatial_shapes_cpu
.
prod
(
dim
=
1
)
level_start_index_cpu
=
torch
.
zeros_like
(
count_per_level
)
level_start_index_cpu
[
1
:]
=
torch
.
cumsum
(
count_per_level
[:
-
1
],
dim
=
0
)
hipdnn_data_type
=
hipdnn
.
data_type
.
FLOAT
torch_data_type
=
torch
.
float32
torch_tensor_value
=
torch
.
rand
(
n
,
n_keys
,
n_heads
,
embed_dims_per_head
,
dtype
=
torch_data_type
,
device
=
"cuda"
)
torch_tensor_spatial_shapes
=
spatial_shapes_cpu
.
to
(
"cuda"
)
torch_tensor_level_start_index
=
level_start_index_cpu
.
to
(
"cuda"
)
torch_tensor_sampling_locations
=
torch
.
rand
(
n
,
n_queries
,
n_heads
,
n_levels
,
n_points
,
2
,
dtype
=
torch_data_type
,
device
=
"cuda"
)
torch_tensor_attention_weights
=
torch
.
rand
(
n
,
n_queries
,
n_heads
,
n_levels
,
n_points
,
dtype
=
torch_data_type
,
device
=
"cuda"
)
hipdnn_handle
=
hipdnn
.
create_handle
()
(
graph
,
hipdnn_tensor_value
,
hipdnn_tensor_spatial_shapes
,
hipdnn_tensor_level_start_index
,
hipdnn_tensor_sampling_locations
,
hipdnn_tensor_attention_weights
,
hipdnn_tensor_y
,
)
=
build_deform_attention_graph
(
hipdnn_handle
,
torch_tensor_value
,
torch_tensor_spatial_shapes
,
torch_tensor_level_start_index
,
torch_tensor_sampling_locations
,
torch_tensor_attention_weights
,
hipdnn_data_type
,
)
torch_tensor_y
=
torch
.
empty
(
hipdnn_tensor_y
.
get_dim
(),
dtype
=
torch_data_type
,
device
=
"cuda"
)
variant_pack
=
{
hipdnn_tensor_value
:
torch_tensor_value
.
data_ptr
(),
hipdnn_tensor_spatial_shapes
:
torch_tensor_spatial_shapes
.
data_ptr
(),
hipdnn_tensor_level_start_index
:
torch_tensor_level_start_index
.
data_ptr
(),
hipdnn_tensor_sampling_locations
:
torch_tensor_sampling_locations
.
data_ptr
(),
hipdnn_tensor_attention_weights
:
torch_tensor_attention_weights
.
data_ptr
(),
hipdnn_tensor_y
:
torch_tensor_y
.
data_ptr
(),
}
workspace
=
torch
.
empty
(
graph
.
get_workspace_size
(),
dtype
=
torch
.
uint8
,
device
=
"cuda"
)
graph
.
exec
(
variant_pack
=
variant_pack
,
workspace
=
workspace
.
data_ptr
())
print
(
"Deform attention graph execution complete."
)
python/deformattention/deform_attention_bwd.py
0 → 100644
View file @
ca34d4d2
import
hipdnn
import
torch
def
build_deform_attention_bwd_graph
(
hipdnn_handle
,
torch_tensor_value
,
torch_tensor_spatial_shapes
,
torch_tensor_level_start_index
,
torch_tensor_sampling_locations
,
torch_tensor_attention_weights
,
torch_tensor_grad_output
,
hipdnn_data_type
,
):
# Create graph
graph
=
hipdnn
.
pygraph
(
handle
=
hipdnn_handle
,
io_data_type
=
hipdnn_data_type
,
intermediate_data_type
=
hipdnn
.
data_type
.
FLOAT
,
compute_data_type
=
hipdnn
.
data_type
.
FLOAT
,
name
=
"deform_attention_bwd"
,
)
# Create hipdnn tensors
hipdnn_tensor_value
=
graph
.
tensor_like
(
torch_tensor_value
)
hipdnn_tensor_spatial_shapes
=
graph
.
tensor_like
(
torch_tensor_spatial_shapes
)
hipdnn_tensor_level_start_index
=
graph
.
tensor_like
(
torch_tensor_level_start_index
)
hipdnn_tensor_sampling_locations
=
graph
.
tensor_like
(
torch_tensor_sampling_locations
)
hipdnn_tensor_attention_weights
=
graph
.
tensor_like
(
torch_tensor_attention_weights
)
hipdnn_tensor_grad_output
=
graph
.
tensor_like
(
torch_tensor_grad_output
)
# Create deform attn op
hipdnn_tensor_grad_value
,
hipdnn_tensor_grad_sampling_loc
,
hipdnn_tensor_grad_attn_weight
=
(
graph
.
deform_attn_dgrad
(
value
=
hipdnn_tensor_value
,
spatial_shapes
=
hipdnn_tensor_spatial_shapes
,
level_start_index
=
hipdnn_tensor_level_start_index
,
sampling_locations
=
hipdnn_tensor_sampling_locations
,
attention_weights
=
hipdnn_tensor_attention_weights
,
grad_output
=
hipdnn_tensor_grad_output
,
name
=
"deform_attn_dgrad"
,
)
)
hipdnn_tensor_grad_value
.
set_output
(
True
)
hipdnn_tensor_grad_sampling_loc
.
set_output
(
True
)
hipdnn_tensor_grad_attn_weight
.
set_output
(
True
)
graph
.
build
(
hipdnn_handle
)
return
(
graph
,
hipdnn_tensor_value
,
hipdnn_tensor_spatial_shapes
,
hipdnn_tensor_level_start_index
,
hipdnn_tensor_sampling_locations
,
hipdnn_tensor_attention_weights
,
hipdnn_tensor_grad_output
,
hipdnn_tensor_grad_value
,
hipdnn_tensor_grad_sampling_loc
,
hipdnn_tensor_grad_attn_weight
,
)
if
__name__
==
"__main__"
:
# Input dimensions
n
=
2
# batch size
n_heads
=
2
embed_dims_per_head
=
32
embed_dims
=
n_heads
*
embed_dims_per_head
n_levels
=
2
n_points
=
2
n_queries
=
32
spatial_shapes_cpu
=
torch
.
randint
(
low
=
1
,
high
=
16
,
size
=
(
n_levels
,
2
),
dtype
=
torch
.
int64
)
# calculate n_keys based on spatial_shapes_cpu
n_keys
=
spatial_shapes_cpu
.
prod
(
dim
=
1
).
sum
()
# calculate level_start_index based on spatial_shapes_cpu
count_per_level
=
spatial_shapes_cpu
.
prod
(
dim
=
1
)
level_start_index_cpu
=
torch
.
zeros_like
(
count_per_level
)
level_start_index_cpu
[
1
:]
=
torch
.
cumsum
(
count_per_level
[:
-
1
],
dim
=
0
)
hipdnn_data_type
=
hipdnn
.
data_type
.
FLOAT
torch_data_type
=
torch
.
float32
torch_tensor_value
=
torch
.
rand
(
n
,
n_keys
,
n_heads
,
embed_dims_per_head
,
dtype
=
torch_data_type
,
device
=
"cuda"
)
torch_tensor_spatial_shapes
=
spatial_shapes_cpu
.
to
(
"cuda"
)
torch_tensor_level_start_index
=
level_start_index_cpu
.
to
(
"cuda"
)
torch_tensor_sampling_locations
=
torch
.
rand
(
n
,
n_queries
,
n_heads
,
n_levels
,
n_points
,
2
,
dtype
=
torch_data_type
,
device
=
"cuda"
)
torch_tensor_attention_weights
=
torch
.
rand
(
n
,
n_queries
,
n_heads
,
n_levels
,
n_points
,
dtype
=
torch_data_type
,
device
=
"cuda"
)
torch_tensor_grad_output
=
torch
.
rand
(
n
,
n_queries
,
embed_dims
,
dtype
=
torch_data_type
,
device
=
"cuda"
)
hipdnn_handle
=
hipdnn
.
create_handle
()
(
graph
,
hipdnn_tensor_value
,
hipdnn_tensor_spatial_shapes
,
hipdnn_tensor_level_start_index
,
hipdnn_tensor_sampling_locations
,
hipdnn_tensor_attention_weights
,
hipdnn_tensor_grad_output
,
hipdnn_tensor_grad_value
,
hipdnn_tensor_grad_sampling_loc
,
hipdnn_tensor_grad_attn_weight
,
)
=
build_deform_attention_bwd_graph
(
hipdnn_handle
,
torch_tensor_value
,
torch_tensor_spatial_shapes
,
torch_tensor_level_start_index
,
torch_tensor_sampling_locations
,
torch_tensor_attention_weights
,
torch_tensor_grad_output
,
hipdnn_data_type
,
)
torch_tensor_grad_value
=
torch
.
empty
(
hipdnn_tensor_grad_value
.
get_dim
(),
dtype
=
torch_data_type
,
device
=
"cuda"
)
torch_tensor_grad_sampling_loc
=
torch
.
empty
(
hipdnn_tensor_grad_sampling_loc
.
get_dim
(),
dtype
=
torch_data_type
,
device
=
"cuda"
)
torch_tensor_grad_attn_weight
=
torch
.
empty
(
hipdnn_tensor_grad_attn_weight
.
get_dim
(),
dtype
=
torch_data_type
,
device
=
"cuda"
)
variant_pack
=
{
hipdnn_tensor_value
:
torch_tensor_value
.
data_ptr
(),
hipdnn_tensor_spatial_shapes
:
torch_tensor_spatial_shapes
.
data_ptr
(),
hipdnn_tensor_level_start_index
:
torch_tensor_level_start_index
.
data_ptr
(),
hipdnn_tensor_sampling_locations
:
torch_tensor_sampling_locations
.
data_ptr
(),
hipdnn_tensor_attention_weights
:
torch_tensor_attention_weights
.
data_ptr
(),
hipdnn_tensor_grad_output
:
torch_tensor_grad_output
.
data_ptr
(),
hipdnn_tensor_grad_value
:
torch_tensor_grad_value
.
data_ptr
(),
hipdnn_tensor_grad_sampling_loc
:
torch_tensor_grad_sampling_loc
.
data_ptr
(),
hipdnn_tensor_grad_attn_weight
:
torch_tensor_grad_attn_weight
.
data_ptr
(),
}
workspace
=
torch
.
empty
(
graph
.
get_workspace_size
(),
dtype
=
torch
.
uint8
,
device
=
"cuda"
)
graph
.
exec
(
variant_pack
=
variant_pack
,
workspace
=
workspace
.
data_ptr
())
print
(
"Deform attention bwd graph execution complete."
)
python/deformconvolution/deform_convolution.py
0 → 100644
View file @
ca34d4d2
import
hipdnn
import
torch
def
build_deform_convolution_graph
(
hipdnn_handle
,
torch_tensor_x
,
torch_tensor_offset
,
torch_tensor_w
,
torch_tensor_bias
,
torch_tensor_mask
,
padding
,
stride
,
dilation
,
hipdnn_data_type
,
):
# Create graph
graph
=
hipdnn
.
pygraph
(
handle
=
hipdnn_handle
,
io_data_type
=
hipdnn_data_type
,
intermediate_data_type
=
hipdnn
.
data_type
.
FLOAT
,
compute_data_type
=
hipdnn
.
data_type
.
FLOAT
,
name
=
"deform_convolution"
,
)
# Create hipdnn tensors
hipdnn_tensor_x
=
graph
.
tensor_like
(
torch_tensor_x
)
hipdnn_tensor_offset
=
graph
.
tensor_like
(
torch_tensor_offset
)
hipdnn_tensor_w
=
graph
.
tensor_like
(
torch_tensor_w
)
hipdnn_tensor_bias
=
graph
.
tensor_like
(
torch_tensor_bias
)
hipdnn_tensor_mask
=
graph
.
tensor_like
(
torch_tensor_mask
)
# Create op
hipdnn_tensor_y
=
graph
.
deform_conv_fprop
(
image
=
hipdnn_tensor_x
,
offset
=
hipdnn_tensor_offset
,
weight
=
hipdnn_tensor_w
,
bias
=
hipdnn_tensor_bias
,
mask
=
hipdnn_tensor_mask
,
padding
=
padding
,
stride
=
stride
,
dilation
=
dilation
,
name
=
"deform_conv_fprop"
,
)
hipdnn_tensor_y
.
set_output
(
True
)
graph
.
build
(
hipdnn_handle
)
return
(
graph
,
hipdnn_tensor_x
,
hipdnn_tensor_offset
,
hipdnn_tensor_w
,
hipdnn_tensor_bias
,
hipdnn_tensor_mask
,
hipdnn_tensor_y
,
)
if
__name__
==
"__main__"
:
# Input dimensions
n
=
1
# Batch size
c
=
16
# Number of input channels
h
=
16
# Height
w
=
16
# Width
# Filter dimensions
k
=
1
# Number of output channels
r
=
3
# Filter height
s
=
3
# Filter width
# Convolution parameters
stride_h
=
1
# Height stride
stride_w
=
1
# Width stride
pad_h
=
0
# Height padding
pad_w
=
0
# Width padding
dil_h
=
1
# Height dilation
dil_w
=
1
# Width dilation
h_out
=
int
((
h
+
2
*
pad_h
-
(
dil_h
*
(
r
-
1
)
+
1
))
/
stride_h
+
1
)
w_out
=
int
((
w
+
2
*
pad_w
-
(
dil_w
*
(
s
-
1
)
+
1
))
/
stride_w
+
1
)
hipdnn_data_type
=
hipdnn
.
data_type
.
FLOAT
torch_data_type
=
torch
.
float32
torch_tensor_x
=
torch
.
rand
(
n
,
c
,
h
,
w
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
torch_tensor_offset
=
torch
.
rand
(
n
,
2
*
r
*
s
,
h_out
,
w_out
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
torch_tensor_w
=
torch
.
rand
(
k
,
c
,
r
,
s
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
torch_tensor_bias
=
torch
.
rand
(
k
,
dtype
=
torch_data_type
,
device
=
"cuda"
)
torch_tensor_mask
=
torch
.
rand
(
n
,
r
*
s
,
h_out
,
w_out
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
hipdnn_handle
=
hipdnn
.
create_handle
()
(
graph
,
hipdnn_tensor_x
,
hipdnn_tensor_offset
,
hipdnn_tensor_w
,
hipdnn_tensor_bias
,
hipdnn_tensor_mask
,
hipdnn_tensor_y
,
)
=
build_deform_convolution_graph
(
hipdnn_handle
,
torch_tensor_x
,
torch_tensor_offset
,
torch_tensor_w
,
torch_tensor_bias
,
torch_tensor_mask
,
[
pad_h
,
pad_w
],
[
stride_h
,
stride_w
],
[
dil_h
,
dil_w
],
hipdnn_data_type
,
)
torch_tensor_y
=
torch
.
empty
(
hipdnn_tensor_y
.
get_dim
(),
dtype
=
torch_data_type
,
memory_format
=
torch
.
channels_last
,
device
=
"cuda"
,
)
variant_pack
=
{
hipdnn_tensor_x
:
torch_tensor_x
.
data_ptr
(),
hipdnn_tensor_offset
:
torch_tensor_offset
.
data_ptr
(),
hipdnn_tensor_w
:
torch_tensor_w
.
data_ptr
(),
hipdnn_tensor_bias
:
torch_tensor_bias
.
data_ptr
(),
hipdnn_tensor_mask
:
torch_tensor_mask
.
data_ptr
(),
hipdnn_tensor_y
:
torch_tensor_y
.
data_ptr
(),
}
workspace
=
torch
.
empty
(
graph
.
get_workspace_size
(),
dtype
=
torch
.
uint8
,
device
=
"cuda"
)
graph
.
exec
(
variant_pack
=
variant_pack
,
workspace
=
workspace
.
data_ptr
())
print
(
"deform conv fprop graph execution complete."
)
python/deformconvolution/deform_convolution_bwd.py
0 → 100644
View file @
ca34d4d2
import
hipdnn
import
torch
def
build_deform_convolution_graph
(
hipdnn_handle
,
torch_tensor_dy
,
torch_tensor_x
,
torch_tensor_w
,
torch_tensor_offset
,
torch_tensor_mask
,
padding
,
stride
,
dilation
,
hipdnn_data_type
,
):
# Create graph
graph
=
hipdnn
.
pygraph
(
handle
=
hipdnn_handle
,
io_data_type
=
hipdnn_data_type
,
intermediate_data_type
=
hipdnn
.
data_type
.
FLOAT
,
compute_data_type
=
hipdnn
.
data_type
.
FLOAT
,
name
=
"deform_convolution"
,
)
# Create hipdnn tensors
hipdnn_tensor_dy
=
graph
.
tensor_like
(
torch_tensor_dy
)
hipdnn_tensor_x
=
graph
.
tensor_like
(
torch_tensor_x
)
hipdnn_tensor_w
=
graph
.
tensor_like
(
torch_tensor_w
)
hipdnn_tensor_offset
=
graph
.
tensor_like
(
torch_tensor_offset
)
hipdnn_tensor_mask
=
graph
.
tensor_like
(
torch_tensor_mask
)
# Create op
hipdnn_tensor_dx
,
hipdnn_tensor_doffset
,
hipdnn_tensor_dmask
=
graph
.
deform_conv_dgrad
(
loss
=
hipdnn_tensor_dy
,
filter
=
hipdnn_tensor_w
,
offset
=
hipdnn_tensor_offset
,
image
=
hipdnn_tensor_x
,
mask
=
hipdnn_tensor_mask
,
padding
=
padding
,
stride
=
stride
,
dilation
=
dilation
,
name
=
"deform_conv_bwd"
,
)
hipdnn_tensor_dx
.
set_output
(
True
)
hipdnn_tensor_doffset
.
set_output
(
True
)
hipdnn_tensor_dmask
.
set_output
(
True
)
graph
.
build
(
hipdnn_handle
)
return
(
graph
,
hipdnn_tensor_dy
,
hipdnn_tensor_w
,
hipdnn_tensor_offset
,
hipdnn_tensor_x
,
hipdnn_tensor_mask
,
hipdnn_tensor_dx
,
hipdnn_tensor_doffset
,
hipdnn_tensor_dmask
,
)
if
__name__
==
"__main__"
:
# Input dimensions
n
=
1
# Batch size
c
=
16
# Number of input channels
h
=
16
# Height
w
=
16
# Width
# Filter dimensions
k
=
1
# Number of output channels
r
=
3
# Filter height
s
=
3
# Filter width
# Convolution parameters
stride_h
=
1
# Height stride
stride_w
=
1
# Width stride
pad_h
=
0
# Height padding
pad_w
=
0
# Width padding
dil_h
=
1
# Height dilation
dil_w
=
1
# Width dilation
h_out
=
int
((
h
+
2
*
pad_h
-
(
dil_h
*
(
r
-
1
)
+
1
))
/
stride_h
+
1
)
w_out
=
int
((
w
+
2
*
pad_w
-
(
dil_w
*
(
s
-
1
)
+
1
))
/
stride_w
+
1
)
hipdnn_data_type
=
hipdnn
.
data_type
.
FLOAT
torch_data_type
=
torch
.
float32
torch_tensor_dy
=
torch
.
rand
(
n
,
k
,
h_out
,
w_out
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
torch_tensor_x
=
torch
.
rand
(
n
,
c
,
h
,
w
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
torch_tensor_w
=
torch
.
rand
(
k
,
c
,
r
,
s
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
torch_tensor_offset
=
torch
.
rand
(
n
,
2
*
r
*
s
,
h_out
,
w_out
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
torch_tensor_mask
=
torch
.
rand
(
n
,
r
*
s
,
h_out
,
w_out
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
hipdnn_handle
=
hipdnn
.
create_handle
()
(
graph
,
hipdnn_tensor_dy
,
hipdnn_tensor_w
,
hipdnn_tensor_offset
,
hipdnn_tensor_x
,
hipdnn_tensor_mask
,
hipdnn_tensor_dx
,
hipdnn_tensor_doffset
,
hipdnn_tensor_dmask
,
)
=
build_deform_convolution_graph
(
hipdnn_handle
,
torch_tensor_dy
,
torch_tensor_x
,
torch_tensor_w
,
torch_tensor_offset
,
torch_tensor_mask
,
[
pad_h
,
pad_w
],
[
stride_h
,
stride_w
],
[
dil_h
,
dil_w
],
hipdnn_data_type
,
)
torch_tensor_dx
=
torch
.
empty
(
hipdnn_tensor_dx
.
get_dim
(),
dtype
=
torch_data_type
,
memory_format
=
torch
.
channels_last
,
device
=
"cuda"
,
)
torch_tensor_doffset
=
torch
.
empty
(
hipdnn_tensor_doffset
.
get_dim
(),
dtype
=
torch_data_type
,
memory_format
=
torch
.
channels_last
,
device
=
"cuda"
,
)
torch_tensor_dmask
=
torch
.
empty
(
hipdnn_tensor_dmask
.
get_dim
(),
dtype
=
torch_data_type
,
memory_format
=
torch
.
channels_last
,
device
=
"cuda"
,
)
variant_pack
=
{
hipdnn_tensor_dy
:
torch_tensor_dy
.
data_ptr
(),
hipdnn_tensor_w
:
torch_tensor_w
.
data_ptr
(),
hipdnn_tensor_offset
:
torch_tensor_offset
.
data_ptr
(),
hipdnn_tensor_x
:
torch_tensor_x
.
data_ptr
(),
hipdnn_tensor_mask
:
torch_tensor_mask
.
data_ptr
(),
hipdnn_tensor_dx
:
torch_tensor_dx
.
data_ptr
(),
hipdnn_tensor_doffset
:
torch_tensor_doffset
.
data_ptr
(),
hipdnn_tensor_dmask
:
torch_tensor_dmask
.
data_ptr
(),
}
workspace
=
torch
.
empty
(
graph
.
get_workspace_size
(),
dtype
=
torch
.
uint8
,
device
=
"cuda"
)
graph
.
exec
(
variant_pack
=
variant_pack
,
workspace
=
workspace
.
data_ptr
())
print
(
"deform conv bwd graph execution complete."
)
python/deformconvolution/deform_convolution_wrw.py
0 → 100644
View file @
ca34d4d2
import
hipdnn
import
torch
def
build_deform_convolution_wrw_graph
(
hipdnn_handle
,
torch_tensor_dy
,
torch_tensor_x
,
torch_tensor_offset
,
torch_tensor_mask
,
padding
,
stride
,
dilation
,
dw_dims
,
hipdnn_data_type
,
):
# Create graph
graph
=
hipdnn
.
pygraph
(
handle
=
hipdnn_handle
,
io_data_type
=
hipdnn_data_type
,
intermediate_data_type
=
hipdnn
.
data_type
.
FLOAT
,
compute_data_type
=
hipdnn
.
data_type
.
FLOAT
,
name
=
"deform_convolution_wrw"
,
)
# Create hipdnn tensors
hipdnn_tensor_dy
=
graph
.
tensor_like
(
torch_tensor_dy
)
hipdnn_tensor_x
=
graph
.
tensor_like
(
torch_tensor_x
)
hipdnn_tensor_offset
=
graph
.
tensor_like
(
torch_tensor_offset
)
hipdnn_tensor_mask
=
graph
.
tensor_like
(
torch_tensor_mask
)
# Create op
hipdnn_tensor_dw
=
graph
.
deform_conv_wgrad
(
image
=
hipdnn_tensor_x
,
offset
=
hipdnn_tensor_offset
,
loss
=
hipdnn_tensor_dy
,
mask
=
hipdnn_tensor_mask
,
padding
=
padding
,
stride
=
stride
,
dilation
=
dilation
,
name
=
"deform_conv2d_wrw"
,
)
hipdnn_tensor_dw
.
set_dim
(
dw_dims
).
set_output
(
True
)
graph
.
build
(
hipdnn_handle
)
return
(
graph
,
hipdnn_tensor_dy
,
hipdnn_tensor_offset
,
hipdnn_tensor_x
,
hipdnn_tensor_mask
,
hipdnn_tensor_dw
,
)
if
__name__
==
"__main__"
:
# Input dimensions
n
=
4
# Batch size
c
=
64
# Number of input channels
h
=
16
# Height
w
=
16
# Width
# Filter dimensions
k
=
64
# Number of output channels
r
=
3
# Filter height
s
=
3
# Filter width
# Convolution parameters
stride_h
=
1
# Height stride
stride_w
=
1
# Width stride
pad_h
=
0
# Height padding
pad_w
=
0
# Width padding
dil_h
=
1
# Height dilation
dil_w
=
1
# Width dilation
h_out
=
int
((
h
+
2
*
pad_h
-
(
dil_h
*
(
r
-
1
)
+
1
))
/
stride_h
+
1
)
w_out
=
int
((
w
+
2
*
pad_w
-
(
dil_w
*
(
s
-
1
)
+
1
))
/
stride_w
+
1
)
hipdnn_data_type
=
hipdnn
.
data_type
.
FLOAT
torch_data_type
=
torch
.
float32
torch_tensor_dy
=
torch
.
rand
(
n
,
k
,
h_out
,
w_out
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
torch_tensor_x
=
torch
.
rand
(
n
,
c
,
h
,
w
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
torch_tensor_offset
=
torch
.
rand
(
n
,
2
*
r
*
s
,
h_out
,
w_out
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
torch_tensor_mask
=
torch
.
rand
(
n
,
r
*
s
,
h_out
,
w_out
,
dtype
=
torch_data_type
,
device
=
"cuda"
).
to
(
memory_format
=
torch
.
channels_last
)
hipdnn_handle
=
hipdnn
.
create_handle
()
(
graph
,
hipdnn_tensor_dy
,
hipdnn_tensor_offset
,
hipdnn_tensor_x
,
hipdnn_tensor_mask
,
hipdnn_tensor_dw
,
)
=
build_deform_convolution_wrw_graph
(
hipdnn_handle
,
torch_tensor_dy
,
torch_tensor_x
,
torch_tensor_offset
,
torch_tensor_mask
,
[
pad_h
,
pad_w
],
[
stride_h
,
stride_w
],
[
dil_h
,
dil_w
],
[
k
,
c
,
r
,
s
],
hipdnn_data_type
,
)
torch_tensor_dw
=
torch
.
empty
(
hipdnn_tensor_dw
.
get_dim
(),
dtype
=
torch_data_type
,
memory_format
=
torch
.
channels_last
,
device
=
"cuda"
,
)
variant_pack
=
{
hipdnn_tensor_dy
:
torch_tensor_dy
.
data_ptr
(),
hipdnn_tensor_offset
:
torch_tensor_offset
.
data_ptr
(),
hipdnn_tensor_x
:
torch_tensor_x
.
data_ptr
(),
hipdnn_tensor_mask
:
torch_tensor_mask
.
data_ptr
(),
hipdnn_tensor_dw
:
torch_tensor_dw
.
data_ptr
(),
}
workspace
=
torch
.
empty
(
graph
.
get_workspace_size
(),
dtype
=
torch
.
uint8
,
device
=
"cuda"
)
graph
.
exec
(
variant_pack
=
variant_pack
,
workspace
=
workspace
.
data_ptr
())
print
(
"deform conv wrw graph execution complete."
)
python/fusion/add_layernorm.py
0 → 100644
View file @
ca34d4d2
import
hipdnn
import
torch
def
build_layernorm_fusion_graph
(
hipdnn_handle
,
torch_tensor_x1
,
torch_tensor_x2
,
torch_tensor_scale
,
torch_tensor_bias
,
torch_tensor_epsilon
,
mode
,
eps
,
hipdnn_data_type
,
):
# Create graph
graph
=
hipdnn
.
pygraph
(
handle
=
hipdnn_handle
,
io_data_type
=
hipdnn_data_type
,
intermediate_data_type
=
hipdnn
.
data_type
.
FLOAT
,
compute_data_type
=
hipdnn
.
data_type
.
FLOAT
,
name
=
"layernorm_fusion_inference"
,
)
# Create hipdnn tensors
hipdnn_tensor_x1
=
graph
.
tensor_like
(
torch_tensor_x1
)
hipdnn_tensor_x2
=
graph
.
tensor_like
(
torch_tensor_x2
)
hipdnn_tensor_scale
=
graph
.
tensor_like
(
torch_tensor_scale
)
hipdnn_tensor_bias
=
graph
.
tensor_like
(
torch_tensor_bias
)
hipdnn_tensor_epsilon
=
graph
.
tensor_like
(
torch_tensor_epsilon
)
hipdnn_tensor_epsilon
.
set_value
(
eps
)
# Create op
hipdnn_tensor_add_output
=
graph
.
add
(
a
=
hipdnn_tensor_x1
,
b
=
hipdnn_tensor_x2
,
name
=
"add"
)
hipdnn_tensor_add_output
.
set_output
(
True
)
hipdnn_tensor_y
,
hipdnn_tensor_mean
,
hipdnn_tensor_inv_var
=
graph
.
layernorm
(
mode
,
hipdnn_tensor_add_output
,
hipdnn_tensor_scale
,
hipdnn_tensor_bias
,
hipdnn_tensor_epsilon
,
hipdnn
.
data_type
.
FLOAT
,
name
=
"layernorm"
,
)
hipdnn_tensor_y
.
set_output
(
True
)
graph
.
build
(
hipdnn_handle
)
return
(
graph
,
hipdnn_tensor_x1
,
hipdnn_tensor_x2
,
hipdnn_tensor_scale
,
hipdnn_tensor_bias
,
hipdnn_tensor_add_output
,
hipdnn_tensor_y
,
)
if
__name__
==
"__main__"
:
# Input dimensions
batch
=
16
# Batch size
seq_len
=
32
# Number of input seq
embedding_dim
=
64
# Number of feature
mode
=
hipdnn
.
norm_forward_phase
.
INFERENCE
# Mode
eps
=
1e-5
hipdnn_data_type
=
hipdnn
.
data_type
.
FLOAT
torch_data_type
=
torch
.
float32
torch_tensor_x1
=
torch
.
rand
(
(
batch
,
seq_len
,
embedding_dim
),
dtype
=
torch_data_type
,
device
=
"cuda"
)
torch_tensor_x2
=
torch
.
rand
(
(
batch
,
seq_len
,
embedding_dim
),
dtype
=
torch_data_type
,
device
=
"cuda"
)
torch_tensor_scale
=
torch
.
rand
(
embedding_dim
,
dtype
=
torch_data_type
,
device
=
"cuda"
)
torch_tensor_bias
=
torch
.
rand
(
embedding_dim
,
dtype
=
torch_data_type
,
device
=
"cuda"
)
torch_tensor_epsilon
=
torch
.
full
(
(
1
,
1
,
1
,
1
),
eps
,
dtype
=
torch
.
float32
,
requires_grad
=
False
,
device
=
"cpu"
)
hipdnn_handle
=
hipdnn
.
create_handle
()
(
graph
,
hipdnn_tensor_x1
,
hipdnn_tensor_x2
,
hipdnn_tensor_scale
,
hipdnn_tensor_bias
,
hipdnn_tensor_add_output
,
hipdnn_tensor_y
,
)
=
build_layernorm_fusion_graph
(
hipdnn_handle
,
torch_tensor_x1
,
torch_tensor_x2
,
torch_tensor_scale
,
torch_tensor_bias
,
torch_tensor_epsilon
,
mode
,
eps
,
hipdnn_data_type
,
)
torch_tensor_addoutput
=
torch
.
empty
(
hipdnn_tensor_add_output
.
get_dim
(),
dtype
=
torch_data_type
,
device
=
"cuda"
)
torch_tensor_y
=
torch
.
empty
(
hipdnn_tensor_y
.
get_dim
(),
dtype
=
torch_data_type
,
device
=
"cuda"
)
variant_pack
=
{
hipdnn_tensor_x1
:
torch_tensor_x1
.
data_ptr
(),
hipdnn_tensor_x2
:
torch_tensor_x2
.
data_ptr
(),
hipdnn_tensor_scale
:
torch_tensor_scale
.
data_ptr
(),
hipdnn_tensor_bias
:
torch_tensor_bias
.
data_ptr
(),
hipdnn_tensor_add_output
:
torch_tensor_addoutput
.
data_ptr
(),
hipdnn_tensor_y
:
torch_tensor_y
.
data_ptr
(),
}
workspace
=
torch
.
empty
(
graph
.
get_workspace_size
(),
dtype
=
torch
.
uint8
,
device
=
"cuda"
)
graph
.
exec
(
variant_pack
=
variant_pack
,
workspace
=
workspace
.
data_ptr
())
print
(
"add_layernorm graph execution complete."
)
Prev
1
…
3
4
5
6
7
8
9
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment