Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
687c6d17
Commit
687c6d17
authored
Nov 23, 2023
by
Artur Wojcik
Browse files
Merge branch 'develop' into uif2-initial
parents
6fd76845
d3e5a5c0
Changes
38
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
910 additions
and
104 deletions
+910
-104
.github/workflows/ci.yaml
.github/workflows/ci.yaml
+1
-1
cmake/Embed.cmake
cmake/Embed.cmake
+0
-0
src/include/migraphx/op/pooling.hpp
src/include/migraphx/op/pooling.hpp
+40
-11
src/include/migraphx/rewrite_pooling.hpp
src/include/migraphx/rewrite_pooling.hpp
+1
-0
src/onnx/parse_pooling.cpp
src/onnx/parse_pooling.cpp
+15
-3
src/onnx/parse_qlinearunary.cpp
src/onnx/parse_qlinearunary.cpp
+151
-0
src/rewrite_pooling.cpp
src/rewrite_pooling.cpp
+131
-17
src/simplify_dyn_ops.cpp
src/simplify_dyn_ops.cpp
+171
-22
src/targets/cpu/pooling.cpp
src/targets/cpu/pooling.cpp
+13
-4
src/targets/gpu/compile_hip.cpp
src/targets/gpu/compile_hip.cpp
+27
-23
src/targets/gpu/compile_hip_code_object.cpp
src/targets/gpu/compile_hip_code_object.cpp
+1
-1
src/targets/gpu/device/include/migraphx/gpu/device/scan.hpp
src/targets/gpu/device/include/migraphx/gpu/device/scan.hpp
+15
-7
src/targets/gpu/driver/precompile_op.cpp
src/targets/gpu/driver/precompile_op.cpp
+84
-0
src/targets/gpu/include/migraphx/gpu/compile_hip.hpp
src/targets/gpu/include/migraphx/gpu/compile_hip.hpp
+3
-3
src/targets/gpu/include/migraphx/gpu/miopen.hpp
src/targets/gpu/include/migraphx/gpu/miopen.hpp
+6
-0
test/onnx/averagepool_dilate_test.onnx
test/onnx/averagepool_dilate_test.onnx
+17
-0
test/onnx/gen_onnx.py
test/onnx/gen_onnx.py
+72
-0
test/onnx/maxpool_dilate_test.onnx
test/onnx/maxpool_dilate_test.onnx
+17
-0
test/onnx/onnx_test.cpp
test/onnx/onnx_test.cpp
+145
-12
test/onnx/qlinearleakyrelu_test.onnx
test/onnx/qlinearleakyrelu_test.onnx
+0
-0
No files found.
.github/workflows/ci.yaml
View file @
687c6d17
...
...
@@ -465,7 +465,7 @@ jobs:
-
name
:
Upload code coverage
if
:
"
matrix.configuration
==
'codecov'"
env
:
CODECOV_TOKEN
:
"
8545af1c-f90b-4345-92a5-0d075503ca56
"
CODECOV_TOKEN
:
"
f5d5a10b-3177-4c76-b25f-9b1c2f165e8b
"
run
:
|
sudo apt-get install -y lcov
cd build
...
...
cmake/Embed.cmake
100755 → 100644
View file @
687c6d17
File mode changed from 100755 to 100644
src/include/migraphx/op/pooling.hpp
View file @
687c6d17
...
...
@@ -70,7 +70,8 @@ struct pooling
// 2 smaller than the input tensor rank (NCHW layout)
std
::
vector
<
std
::
size_t
>
lengths
=
{
1
,
1
};
// Dilations are not supported at this time.
// Spacing between the elements of the pooling kernel. Must be the same ndim as lengths.
std
::
vector
<
std
::
size_t
>
dilations
=
{
1
,
1
};
// ceiling mode is a flag affecting output size
// or equivalently, placements of the pooling kernel.
...
...
@@ -99,6 +100,7 @@ struct pooling
f
(
self
.
padding_mode
,
"padding_mode"
),
f
(
self
.
stride
,
"stride"
),
f
(
self
.
lengths
,
"lengths"
),
f
(
self
.
dilations
,
"dilations"
),
f
(
self
.
ceil_mode
,
"ceil_mode"
),
f
(
self
.
lp_order
,
"lp_order"
),
f
(
self
.
dyn_global
,
"dyn_global"
));
...
...
@@ -112,14 +114,17 @@ struct pooling
return
;
if
((
padding_mode
!=
default_
and
padding
.
size
()
!=
stride
.
size
()
and
(
padding
.
size
())
!=
stride
.
size
()
*
2
)
or
stride
.
size
()
!=
lengths
.
size
())
stride
.
size
()
!=
lengths
.
size
()
or
dilations
.
size
()
!=
lengths
.
size
())
{
MIGRAPHX_THROW
(
"POOLING: inconsistent attribute sizes"
);
}
if
(
std
::
any_of
(
lengths
.
begin
(),
lengths
.
end
(),
[
&
](
auto
i
)
{
return
(
i
==
0
);
})
or
std
::
any_of
(
stride
.
begin
(),
stride
.
end
(),
[
&
](
auto
i
)
{
return
(
i
==
0
);
}))
const
auto
is_zero
=
[](
auto
el
)
{
return
el
==
0
;
};
if
(
std
::
any_of
(
lengths
.
begin
(),
lengths
.
end
(),
is_zero
)
or
std
::
any_of
(
stride
.
begin
(),
stride
.
end
(),
is_zero
)
or
std
::
any_of
(
dilations
.
begin
(),
dilations
.
end
(),
is_zero
))
{
MIGRAPHX_THROW
(
"POOLING: size 0 pooling kernel or stride"
);
MIGRAPHX_THROW
(
"POOLING: size 0 pooling kernel or stride
or dilations
"
);
}
// TODO: update lowering to run the reference
...
...
@@ -142,6 +147,11 @@ struct pooling
value
attributes
()
const
{
return
{{
"normalize_padding"
,
"padding"
}};
}
inline
std
::
size_t
dilate_dim
(
std
::
size_t
dim
,
std
::
size_t
dilation
)
const
{
return
1
+
dilation
*
(
dim
-
1
);
}
std
::
vector
<
std
::
size_t
>
calc_spatial_dim_out
(
const
std
::
vector
<
std
::
size_t
>&
input_lens
,
std
::
size_t
kdims
)
const
{
...
...
@@ -151,8 +161,9 @@ struct pooling
std
::
size_t
padding_factor
=
2
*
padding
[
i
];
if
(
padding
.
size
()
==
2
*
kdims
)
padding_factor
=
padding
[
i
]
+
padding
[
i
+
kdims
];
std
::
size_t
dilated_length
=
dilate_dim
(
lengths
[
i
],
dilations
[
i
]);
std
::
size_t
dim_size
;
if
(
input_lens
[
i
+
2
]
+
padding_factor
<
length
s
[
i
]
)
if
(
input_lens
[
i
+
2
]
+
padding_factor
<
dilated_
length
)
{
if
(
padding_mode
==
default_
)
MIGRAPHX_THROW
(
"POOLING: not enough padding for the given kernel size"
);
...
...
@@ -162,7 +173,7 @@ struct pooling
}
else
{
dim_size
=
input_lens
[
i
+
2
]
+
padding_factor
-
length
s
[
i
]
;
dim_size
=
input_lens
[
i
+
2
]
+
padding_factor
-
dilated_
length
;
}
std
::
size_t
len
=
(
ceil_mode
)
...
...
@@ -331,6 +342,7 @@ struct pooling
int
start
=
static_cast
<
int
>
(
idx_o
[
dim
]
*
stride
[
d_2
])
-
static_cast
<
int
>
(
padding_vals
[
d_2
]);
int
end
;
std
::
size_t
dilated_kernel_dim
=
dilate_dim
(
kernel_dims
[
d_2
],
dilations
[
d_2
]);
// NOLINT
if
(
count_include_pad
and
ceil_mode
and
(
mode
!=
pooling_mode
::
max
))
{
...
...
@@ -340,15 +352,14 @@ struct pooling
// padding. Clip out-of-bounds indexes but not padding.
// Check if this kernel extends beyond the padding at end of dimension
end
=
std
::
min
(
start
+
kernel_dim
s
[
d_2
]
,
end
=
std
::
min
(
start
+
dilated_
kernel_dim
,
in_lens
[
dim
]
+
static_cast
<
int
>
(
padding_vals
[
d_2
]));
}
else
{
// In non-ceiling mode, when
// count_include_pad is false, or for max pooling, clip off padding.
end
=
std
::
min
(
start
+
kernel_dims
[
d_2
],
in_lens
[
dim
]);
start
=
std
::
max
(
start
,
0
);
end
=
std
::
min
(
start
+
dilated_kernel_dim
,
in_lens
[
dim
]);
}
win_start
.
push_back
(
start
);
if
(
end
<
start
)
...
...
@@ -366,6 +377,16 @@ struct pooling
// for each element in the window...
shape_for_each
(
win_shape
,
[
&
](
const
auto
&
idx_w
)
{
// Skip elements that belong to the dilated area
for
(
size_t
axis
=
0
;
axis
<
idx_w
.
size
();
++
axis
)
{
if
(
idx_w
[
axis
]
%
dilations
[
axis
])
{
pool_size
-=
1
;
return
;
}
}
// the coordinates of this element
auto
idx
=
idx_o
;
...
...
@@ -390,7 +411,15 @@ struct pooling
// this is a padding element. Padding locations
// don't contribute to average or max pooling total but can play in
// lpnorm pooling.
output_val
=
op
(
output_val
,
0
);
if
(
mode
==
pooling_mode
::
lpnorm
)
{
output_val
=
op
(
output_val
,
op
.
template
init
<
Type
>());
}
if
(
mode
==
pooling_mode
::
average
)
{
// Ignore padding
pool_size
-=
1
;
}
}
});
output
[
i
]
=
Type
(
op
.
final
(
output_val
,
pool_size
));
...
...
src/include/migraphx/rewrite_pooling.hpp
View file @
687c6d17
...
...
@@ -26,6 +26,7 @@
#include <string>
#include <migraphx/config.hpp>
#include <migraphx/instruction_ref.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
...
...
src/onnx/parse_pooling.cpp
View file @
687c6d17
...
...
@@ -91,6 +91,14 @@ struct parse_pooling : op_parser<parse_pooling>
kdims
,
values
[
"lengths"
].
size
(),
"PARSE_POOLING: inconsistent lengths"
);
}
if
(
contains
(
info
.
attributes
,
"dilations"
))
{
values
[
"dilations"
].
clear
();
copy
(
info
.
attributes
[
"dilations"
].
ints
(),
std
::
back_inserter
(
values
[
"dilations"
]));
check_attr_sizes
(
kdims
,
values
[
"dilations"
].
size
(),
"PARSE_POOLING: inconsistent dilations"
);
}
// lp_order attribute
if
(
contains
(
info
.
attributes
,
"p"
))
{
...
...
@@ -169,10 +177,15 @@ struct parse_pooling : op_parser<parse_pooling>
std
::
fill_n
(
values
[
"stride"
].
begin
(),
kdims
,
1
);
}
if
(
values
[
"dilations"
].
size
()
!=
kdims
)
{
values
[
"dilations"
].
resize
(
kdims
);
std
::
fill_n
(
values
[
"dilations"
].
begin
(),
kdims
,
1
);
}
// used to calculate the supposed output shape
std
::
vector
<
int64_t
>
orig_padding
=
paddings
;
// TODO: add parsing for dilations
if
(
contains
(
info
.
attributes
,
"auto_pad"
)
and
to_upper
(
info
.
attributes
[
"auto_pad"
].
s
())
!=
"NOTSET"
)
{
...
...
@@ -189,11 +202,10 @@ struct parse_pooling : op_parser<parse_pooling>
else
{
// Calculate auto padding
// dilations (argument 4) not supported; default to all 1's
cal_auto_padding_size
(
info
,
values
,
values
[
"lengths"
].
to_vector
<
std
::
size_t
>
(),
std
::
vector
<
size_t
>
(
in_shape
.
ndim
()
-
2
,
1
),
values
[
"dilations"
].
to_
vector
<
std
::
size_t
>
(),
in_shape
.
lens
(),
paddings
);
values
[
"padding"
]
=
paddings
;
...
...
src/onnx/parse_qlinearunary.cpp
0 → 100644
View file @
687c6d17
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/onnx/op_parser.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/common.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/onnx/checks.hpp>
#include <migraphx/onnx/broadcast_qdq.hpp>
#include <migraphx/op/pooling.hpp>
#include <migraphx/instruction.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
onnx
{
/*
*********************************************************************************
* Reference: see QLinearSigmoid, QLinearLeakyRelu in *
* https://github.com/microsoft/onnxruntime/blob/main/docs/ContribOperators.md *
*********************************************************************************
com.microsoft.QLinearSigmoid
QLinearSigmoid takes quantized input data (Tensor), and quantize parameter for output, and produces
one output data (Tensor) where the function f(x) = quantize(Sigmoid(dequantize(x))), is applied to
the data tensor elementwise. Where the function Sigmoid(x) = 1 / (1 + exp(-x))
Version
This version of the operator has been available since version 1 of the 'com.microsoft' operator
set.
*****************************************************************************************************
com.microsoft.QLinearLeakyRelu
QLinearLeakyRelu takes quantized input data (Tensor), an argument alpha, and quantize parameter for
output, and produces one output data (Tensor) where the function f(x) = quantize(alpha *
dequantize(x)) for dequantize(x) < 0, f(x) = quantize(dequantize(x)) for dequantize(x) >= 0, is
applied to the data tensor elementwise.
Version
This version of the operator has been available since version 1 of the 'com.microsoft' operator set.
Attributes
alpha : float
Coefficient of leakage.
******************************************************************************************************
Generic input layout of QLinear unary operators:
Inputs (4 - 5)
X : T
Input tensor
X_scale : tensor(float)
Input X's scale. It's a scalar, which means a per-tensor/layer quantization.
X_zero_point (optional) : T
Input X's zero point. Default value is 0 if it's not specified. It's a scalar, which means a
per-tensor/layer quantization.
Y_scale : tensor(float) Output Y's scale. It's a scalar, which means
a per-tensor/layer quantization.
Y_zero_point (optional) : T Output Y's zero point. Default value is
0 if it's not specified. It's a scalar, which means a per-tensor/layer quantization.
Outputs
Y : T
Output tensor
Type Constraints
T : tensor(uint8), tensor(int8)
Constrain input and output types to 8 bit tensors.
*/
struct
parse_qlinearunary
:
op_parser
<
parse_qlinearunary
>
{
std
::
vector
<
op_desc
>
operators
()
const
{
return
{{
"QLinearSigmoid"
,
"sigmoid"
},
{
"QLinearLeakyRelu"
,
"leaky_relu"
}};
}
void
check_inputs
(
const
op_desc
&
opd
,
const
std
::
vector
<
instruction_ref
>&
args
)
const
{
if
(
args
.
size
()
<
4
)
MIGRAPHX_THROW
(
opd
.
op_name
+
": missing inputs"
);
const
auto
&
in_x
=
args
[
0
];
auto
sh_x
=
in_x
->
get_shape
();
auto
type_x
=
sh_x
.
type
();
if
(
type_x
!=
migraphx
::
shape
::
int8_type
and
type_x
!=
migraphx
::
shape
::
uint8_type
)
MIGRAPHX_THROW
(
opd
.
op_name
+
": unsupported input type"
);
}
instruction_ref
parse
(
const
op_desc
&
opd
,
const
onnx_parser
&
parser
,
const
onnx_parser
::
node_info
&
info
,
const
std
::
vector
<
instruction_ref
>&
args
)
const
{
check_inputs
(
opd
,
args
);
// X
const
auto
&
in_x
=
args
[
0
];
const
auto
&
in_scale_x
=
args
[
1
];
const
auto
&
in_zero_pt_x
=
args
[
2
];
auto
dquant_x
=
bcast_qdq_instr
(
"dequantizelinear"
,
in_x
,
in_scale_x
,
in_zero_pt_x
,
info
);
// Y = (op(dequantizelinear(x))
auto
op
=
parser
.
load
(
opd
.
op_name
,
info
);
auto
y
=
info
.
add_instruction
(
op
,
dquant_x
);
const
auto
&
in_scale_y
=
args
[
3
];
// zero_pt for Y is supplied as the last optional argument..
if
(
args
.
size
()
==
5
)
return
(
bcast_qdq_instr
(
"quantizelinear"
,
y
,
in_scale_y
,
args
[
4
],
info
));
// if no zero_pt: just broadcast the scale..
auto
bcast_scale_sigm
=
bcast_scalar_instr
(
y
->
get_shape
(),
in_scale_y
,
info
);
return
(
info
.
add_instruction
(
migraphx
::
make_op
(
"quantizelinear"
),
y
,
bcast_scale_sigm
));
}
};
}
// namespace onnx
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/rewrite_pooling.cpp
View file @
687c6d17
...
...
@@ -35,6 +35,110 @@
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
static
void
replace_with_reduce
(
module
&
m
,
instruction_ref
ins
)
{
auto
&&
s
=
ins
->
inputs
().
front
()
->
get_shape
();
auto
&&
op
=
any_cast
<
op
::
pooling
>
(
ins
->
get_operator
());
auto
lens
=
s
.
lens
();
std
::
vector
<
std
::
int64_t
>
axes
(
lens
.
size
()
-
2
);
std
::
iota
(
axes
.
begin
(),
axes
.
end
(),
2
);
// average pooling
if
(
op
.
mode
==
op
::
pooling_mode
::
average
)
{
m
.
replace_instruction
(
ins
,
make_op
(
"reduce_mean"
,
{{
"axes"
,
axes
}}),
ins
->
inputs
());
}
// max pooling
else
{
m
.
replace_instruction
(
ins
,
make_op
(
"reduce_max"
,
{{
"axes"
,
axes
}}),
ins
->
inputs
());
}
}
static
void
replace_dilations_with_gather_pooling
(
module
&
m
,
instruction_ref
ins
)
{
// TODO remove this when MIOpen supports dilated pooling
auto
&&
s
=
ins
->
inputs
().
front
()
->
get_shape
();
auto
&&
op
=
any_cast
<
op
::
pooling
>
(
ins
->
get_operator
());
// Ignore N, C axes
std
::
vector
<
size_t
>
dims
=
{
s
.
lens
().
cbegin
()
+
2
,
s
.
lens
().
cend
()};
bool
default_padding
=
std
::
all_of
(
op
.
padding
.
cbegin
(),
op
.
padding
.
cend
(),
[](
auto
i
)
{
return
i
==
0
;
});
if
(
not
default_padding
)
{
for
(
size_t
idx
{
0
};
idx
<
op
.
padding
.
size
();
++
idx
)
{
// We need to pad both ends
dims
[
idx
]
+=
op
.
padding
.
at
(
idx
)
*
2
;
}
}
std
::
vector
<
size_t
>
kernels
=
op
.
lengths
;
std
::
vector
<
size_t
>
strides
=
op
.
stride
;
std
::
vector
<
size_t
>
dilations
=
op
.
dilations
;
std
::
vector
<
std
::
vector
<
int
>>
axis_indices
;
axis_indices
.
resize
(
dims
.
size
());
for
(
auto
idx
{
0
};
idx
<
dims
.
size
();
++
idx
)
{
// Only consider if iw fits into the window
for
(
size_t
stride
{
0
};
stride
<
dims
.
at
(
idx
)
-
dilations
.
at
(
idx
)
*
(
kernels
.
at
(
idx
)
-
1
);
stride
+=
strides
.
at
(
idx
))
{
for
(
size_t
step
{
0
};
step
<
kernels
.
at
(
idx
);
++
step
)
{
axis_indices
.
at
(
idx
).
push_back
(
stride
+
dilations
.
at
(
idx
)
*
step
);
}
}
}
auto
elements
=
ins
->
inputs
().
front
();
if
(
not
default_padding
)
{
// Pad supports asym, we need to provide both ends
std
::
vector
<
size_t
>
padding
(
2
*
s
.
lens
().
size
(),
0
);
// Format will be e.g {N, C, P1, P2, N, C, P1, P2}
for
(
size_t
idx
{
0
};
idx
<
op
.
padding
.
size
();
++
idx
)
{
// Ignore N, C axes
padding
.
at
(
2
+
idx
)
=
op
.
padding
.
at
(
idx
);
padding
.
at
(
2
+
idx
+
s
.
lens
().
size
())
=
op
.
padding
.
at
(
idx
);
}
// Default value needed for Max pooling
elements
=
m
.
insert_instruction
(
ins
,
make_op
(
"pad"
,
{{
"pads"
,
padding
},
{
"value"
,
std
::
numeric_limits
<
float
>::
lowest
()}}),
elements
);
}
for
(
auto
idx
{
0
};
idx
<
axis_indices
.
size
();
++
idx
)
{
migraphx
::
shape
s_indices
{
migraphx
::
shape
::
int32_type
,
{
axis_indices
.
at
(
idx
).
size
()}};
auto
indices
=
m
.
add_literal
(
migraphx
::
literal
{
s_indices
,
axis_indices
.
at
(
idx
)});
elements
=
m
.
insert_instruction
(
ins
,
make_op
(
"gather"
,
{{
"axis"
,
idx
+
2
/*ignore N,C*/
}}),
elements
,
indices
);
}
// Ignore padding
std
::
vector
<
size_t
>
new_padding
(
kernels
.
size
(),
0
);
// The kernel window elements are places next to each other. E.g. {x1, y1, x2, y2, ...}
// We need to skip them to not overlap
std
::
vector
<
size_t
>
new_strides
(
kernels
);
// Ignore dilations
std
::
vector
<
size_t
>
new_dilations
(
kernels
.
size
(),
1
);
m
.
replace_instruction
(
ins
,
make_op
(
"pooling"
,
{{
"mode"
,
op
.
mode
},
{
"padding"
,
new_padding
},
{
"stride"
,
new_strides
},
{
"lengths"
,
kernels
},
{
"dilations"
,
new_dilations
}}),
elements
);
}
void
rewrite_pooling
::
apply
(
module
&
m
)
const
{
for
(
auto
ins
:
iterator_for
(
m
))
...
...
@@ -43,26 +147,36 @@ void rewrite_pooling::apply(module& m) const
continue
;
if
(
ins
->
inputs
().
empty
())
continue
;
auto
&&
s
=
ins
->
inputs
().
front
()
->
get_shape
();
auto
&&
op
=
any_cast
<
op
::
pooling
>
(
ins
->
get_operator
());
if
(
not
std
::
all_of
(
op
.
padding
.
begin
(),
op
.
padding
.
end
(),
[](
auto
i
)
{
return
i
==
0
;
}))
continue
;
if
(
not
std
::
all_of
(
op
.
stride
.
begin
(),
op
.
stride
.
end
(),
[](
auto
i
)
{
return
i
==
1
;
}))
continue
;
auto
lens
=
s
.
lens
();
if
(
not
std
::
equal
(
lens
.
begin
()
+
2
,
lens
.
end
(),
op
.
lengths
.
begin
(),
op
.
lengths
.
end
()))
continue
;
std
::
vector
<
std
::
int64_t
>
axes
(
lens
.
size
()
-
2
);
std
::
iota
(
axes
.
begin
(),
axes
.
end
(),
2
);
// average pooling
if
(
op
.
mode
==
op
::
pooling_mode
::
average
)
auto
&&
s
=
ins
->
inputs
().
front
()
->
get_shape
();
auto
&&
op
=
any_cast
<
op
::
pooling
>
(
ins
->
get_operator
());
bool
same_kernel_as_shape
=
std
::
equal
(
s
.
lens
().
cbegin
()
+
2
,
s
.
lens
().
cend
(),
op
.
lengths
.
cbegin
(),
op
.
lengths
.
cend
());
bool
default_strides
=
std
::
all_of
(
op
.
stride
.
cbegin
(),
op
.
stride
.
cend
(),
[](
auto
i
)
{
return
i
==
1
;
});
bool
default_padding
=
std
::
all_of
(
op
.
padding
.
cbegin
(),
op
.
padding
.
cend
(),
[](
auto
i
)
{
return
i
==
0
;
});
bool
default_dilations
=
std
::
all_of
(
op
.
dilations
.
cbegin
(),
op
.
dilations
.
cend
(),
[](
auto
i
)
{
return
i
==
1
;
});
if
(
same_kernel_as_shape
and
default_strides
and
default_padding
and
default_dilations
)
{
m
.
replace_
instruction
(
ins
,
make_op
(
"reduce_mean"
,
{{
"axes"
,
axes
}}),
ins
->
inputs
()
);
replace_
with_reduce
(
m
,
ins
);
}
// max pooling
else
else
if
(
not
default_dilations
)
{
m
.
replace_instruction
(
ins
,
make_op
(
"reduce_max"
,
{{
"axes"
,
axes
}}),
ins
->
inputs
());
// Dilated AvgPool with padding is not supported
if
(
not
default_padding
and
op
.
mode
==
op
::
pooling_mode
::
average
)
{
continue
;
}
auto
size
=
std
::
accumulate
(
s
.
lens
().
cbegin
(),
s
.
lens
().
cend
(),
1
,
std
::
multiplies
<
size_t
>
());
// Can't handle too much size because of literal size
if
(
size
>
100000
)
{
continue
;
}
replace_dilations_with_gather_pooling
(
m
,
ins
);
}
}
}
...
...
src/simplify_dyn_ops.cpp
View file @
687c6d17
...
...
@@ -22,6 +22,7 @@
* THE SOFTWARE.
*/
#include <migraphx/simplify_dyn_ops.hpp>
#include <migraphx/op/slice.hpp>
#include <migraphx/matcher.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/literal.hpp>
...
...
@@ -33,6 +34,10 @@ inline namespace MIGRAPHX_INLINE_NS {
* Convert 2 input static shape broadcast/multibroadcast into 1 input version.
* Some compiler passes (ex. simplify_algebra) only support the 1 input versions
* of the broadcasting operators.
* From:
* broadcast_op(argument_with_static_shape, argument_with_static_shape)
* To:
* broadcast_op(argument_with_static_shape); broadcast_op.out_lens = constant_output_dims
*/
struct
find_static_2in_broadcasts
{
...
...
@@ -61,8 +66,65 @@ struct find_static_2in_broadcasts
};
/**
* Simplify slice with variable `starts` and `ends` to the constant version if
* the `input_starts` and `input_ends` inputs are constant.
* Simplify slice with 2 inputs to the 1 input version if inputs[1] is constant.
* From:
* slice(data, constant_input); two attributes set
* To:
* slice(data); slice.starts, slice.ends. slice.axes set
*/
struct
find_const_2in_slice
{
auto
matcher
()
const
{
return
match
::
name
(
"slice"
)(
match
::
nargs
(
2
),
match
::
arg
(
1
)(
match
::
is_constant
()));
}
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
mr
)
const
{
auto
ins
=
mr
.
result
;
auto
inputs
=
ins
->
inputs
();
auto
slice_op
=
any_cast
<
op
::
slice
>
(
ins
->
get_operator
());
auto
set_attrs
=
slice_op
.
get_set_attributes
();
std
::
vector
<
int64_t
>
starts_vec
;
std
::
vector
<
int64_t
>
ends_vec
;
std
::
vector
<
int64_t
>
axes_vec
;
if
(
set_attrs
==
op
::
slice
::
ends_axes
)
{
// slice(data, starts)
inputs
.
at
(
1
)
->
eval
().
visit
(
[
&
](
auto
output
)
{
starts_vec
.
assign
(
output
.
begin
(),
output
.
end
());
});
ends_vec
=
slice_op
.
ends
;
axes_vec
=
slice_op
.
axes
;
}
else
if
(
set_attrs
==
op
::
slice
::
starts_axes
)
{
// slice(data, ends)
inputs
.
at
(
1
)
->
eval
().
visit
(
[
&
](
auto
output
)
{
ends_vec
.
assign
(
output
.
begin
(),
output
.
end
());
});
starts_vec
=
slice_op
.
starts
;
axes_vec
=
slice_op
.
axes
;
}
else
{
// slice(data, axes)
inputs
.
at
(
1
)
->
eval
().
visit
(
[
&
](
auto
output
)
{
axes_vec
.
assign
(
output
.
begin
(),
output
.
end
());
});
starts_vec
=
slice_op
.
starts
;
ends_vec
=
slice_op
.
ends
;
}
m
.
replace_instruction
(
ins
,
make_op
(
"slice"
,
{{
"starts"
,
starts_vec
},
{
"ends"
,
ends_vec
},
{
"axes"
,
axes_vec
}}),
inputs
.
at
(
0
));
}
};
/**
* Simplify slice with 3 inputs to the 1 input version if inputs[1:2] are constant.
* From:
* slice(data, constant_input1, constant_input2); one attribute set
* To:
* slice(data); slice.starts, slice.ends. slice.axes set
*/
struct
find_const_3in_slice
{
...
...
@@ -77,27 +139,51 @@ struct find_const_3in_slice
{
auto
ins
=
mr
.
result
;
auto
inputs
=
ins
->
inputs
();
argument
starts_arg
=
inputs
.
at
(
1
)
->
eval
();
argument
ends_arg
=
inputs
.
at
(
2
)
->
eval
();
if
(
not
starts_arg
.
empty
()
and
not
ends_arg
.
empty
())
auto
slice_op
=
any_cast
<
op
::
slice
>
(
ins
->
get_operator
());
auto
set_attrs
=
slice_op
.
get_set_attributes
();
std
::
vector
<
int64_t
>
starts_vec
;
std
::
vector
<
int64_t
>
ends_vec
;
std
::
vector
<
int64_t
>
axes_vec
;
if
(
set_attrs
==
op
::
slice
::
axes_only
)
{
std
::
vector
<
int64_t
>
starts_vec
;
std
::
vector
<
int64_t
>
ends_vec
;
starts_arg
.
visit
([
&
](
auto
output
)
{
starts_vec
.
assign
(
output
.
begin
(),
output
.
end
());
});
ends_arg
.
visit
([
&
](
auto
output
)
{
ends_vec
.
assign
(
output
.
begin
(),
output
.
end
());
});
auto
slice_val
=
ins
->
get_operator
().
to_value
();
auto
axes_vec
=
slice_val
.
at
(
"axes"
).
to_vector
<
int64_t
>
();
m
.
replace_instruction
(
ins
,
make_op
(
"slice"
,
{{
"starts"
,
starts_vec
},
{
"ends"
,
ends_vec
},
{
"axes"
,
axes_vec
}}),
inputs
.
at
(
0
));
// slice(data, starts, ends)
inputs
.
at
(
1
)
->
eval
().
visit
(
[
&
](
auto
output
)
{
starts_vec
.
assign
(
output
.
begin
(),
output
.
end
());
});
inputs
.
at
(
2
)
->
eval
().
visit
(
[
&
](
auto
output
)
{
ends_vec
.
assign
(
output
.
begin
(),
output
.
end
());
});
axes_vec
=
slice_op
.
axes
;
}
else
if
(
set_attrs
==
op
::
slice
::
ends_only
)
{
// slice(data, starts, axes)
inputs
.
at
(
1
)
->
eval
().
visit
(
[
&
](
auto
output
)
{
starts_vec
.
assign
(
output
.
begin
(),
output
.
end
());
});
inputs
.
at
(
2
)
->
eval
().
visit
(
[
&
](
auto
output
)
{
axes_vec
.
assign
(
output
.
begin
(),
output
.
end
());
});
ends_vec
=
slice_op
.
ends
;
}
else
{
// slice(data, ends, axes)
inputs
.
at
(
1
)
->
eval
().
visit
(
[
&
](
auto
output
)
{
ends_vec
.
assign
(
output
.
begin
(),
output
.
end
());
});
inputs
.
at
(
2
)
->
eval
().
visit
(
[
&
](
auto
output
)
{
axes_vec
.
assign
(
output
.
begin
(),
output
.
end
());
});
starts_vec
=
slice_op
.
starts
;
}
m
.
replace_instruction
(
ins
,
make_op
(
"slice"
,
{{
"starts"
,
starts_vec
},
{
"ends"
,
ends_vec
},
{
"axes"
,
axes_vec
}}),
inputs
.
at
(
0
));
}
};
/**
* Simplify slice with variable `starts`, `ends`, and `input_axes` to the constant version if
* the `input_starts`, `input_ends`, and `input_axes` inputs are constant.
* Simplify slice with 4 inputs to the 1 input version if inputs[1:3] are constant.
* From:
* slice(data, constant_starts, constant_ends, constant_axes)
* To:
* slice(data); slice.starts, slice.ends. slice.axes set
*/
struct
find_const_4in_slice
{
...
...
@@ -113,9 +199,9 @@ struct find_const_4in_slice
{
auto
ins
=
mr
.
result
;
auto
inputs
=
ins
->
inputs
();
argument
starts_arg
=
inputs
.
at
(
1
)
->
eval
();
argument
ends_arg
=
inputs
.
at
(
2
)
->
eval
();
argument
axes_arg
=
inputs
.
at
(
3
)
->
eval
();
argument
starts_arg
=
inputs
.
at
(
1
)
->
eval
(
false
);
argument
ends_arg
=
inputs
.
at
(
2
)
->
eval
(
false
);
argument
axes_arg
=
inputs
.
at
(
3
)
->
eval
(
false
);
if
(
not
starts_arg
.
empty
()
and
not
ends_arg
.
empty
()
and
not
axes_arg
.
empty
())
{
std
::
vector
<
int64_t
>
starts_vec
;
...
...
@@ -172,13 +258,76 @@ struct find_static_dimensions_of
}
};
/**
* Simplify allocate into 2 argument reshape that has constant output dimensions into a static 1
* argument reshape. Intended to simplify what ONNX parse_reshape creates for dynamic reshapes.
* This matcher can be generalized to matching reshape(data, static_shape_output_tensor).
* From:
* x = allocate(constant_output_dims) -> reshape(data, x)
* To:
* reshape(data); reshape.dims = constant_output_dims
*/
struct
find_const_alloc_reshapes
{
auto
matcher
()
const
{
return
match
::
name
(
"reshape"
)(
match
::
nargs
(
2
),
match
::
arg
(
1
)(
match
::
name
(
"allocate"
)(
match
::
is_constant
())));
}
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
mr
)
const
{
auto
reshape_ins
=
mr
.
result
;
auto
reshape_inputs
=
reshape_ins
->
inputs
();
auto
alloc_ins
=
reshape_inputs
.
at
(
1
);
argument
output_dims_arg
=
alloc_ins
->
inputs
().
at
(
0
)
->
eval
(
false
);
std
::
vector
<
int64_t
>
output_dims_vec
;
output_dims_arg
.
visit
(
[
&
](
auto
output
)
{
output_dims_vec
.
assign
(
output
.
begin
(),
output
.
end
());
});
m
.
replace_instruction
(
reshape_ins
,
make_op
(
"reshape"
,
{{
"dims"
,
output_dims_vec
}}),
reshape_inputs
.
at
(
0
));
// have dead_code_elimination remove the previous allocate
}
};
/**
* Simplify allocate into fill operator that has constant output dimensions and constant value.
* The allocate into fill instructions is what is produced when parsing the ONNX
* ConstantOfShape operator. This replacement could be handled with propagate_constant, but
* would rather have the simplification happen earlier during compiling.
* This matcher can be generalized to matching fill(constant_value, static_shape_output_tensor).
* From:
* x = allocate(constant_ouptut_dims) -> fill(constant_value, x)
* To:
* literal
*/
struct
find_const_alloc_fill
{
auto
matcher
()
const
{
return
match
::
name
(
"fill"
)(
match
::
arg
(
0
)(
match
::
is_constant
()),
match
::
arg
(
1
)(
match
::
name
(
"allocate"
)(
match
::
is_constant
())));
}
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
mr
)
const
{
auto
fill_ins
=
mr
.
result
;
auto
fill_arg
=
fill_ins
->
eval
(
false
);
auto
l
=
m
.
add_literal
(
fill_arg
.
get_shape
(),
fill_arg
.
data
());
m
.
replace_instruction
(
fill_ins
,
l
);
}
};
void
simplify_dyn_ops
::
apply
(
module
&
m
)
const
{
match
::
find_matches
(
m
,
find_static_2in_broadcasts
{},
find_static_dimensions_of
{},
find_const_alloc_reshapes
{},
find_static_2in_broadcasts
{},
find_const_2in_slice
{},
find_const_3in_slice
{},
find_const_4in_slice
{});
find_const_4in_slice
{},
find_const_alloc_fill
{});
}
}
// namespace MIGRAPHX_INLINE_NS
...
...
src/targets/cpu/pooling.cpp
View file @
687c6d17
...
...
@@ -34,23 +34,32 @@ namespace migraphx {
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
cpu
{
struct
dnnl_pooling
:
dnnl_extend_op
<
dnnl_pooling
,
dnnl
::
pooling_forward
,
op
::
pooling
>
struct
dnnl_pooling
:
dnnl_extend_op
<
dnnl_pooling
,
dnnl
::
pooling_
v2_
forward
,
op
::
pooling
>
{
std
::
vector
<
int
>
arg_map
(
int
)
const
{
return
{
MIGRAPHX_DNNL_PREFIX
(
ARG_SRC
)};
}
dnnl
::
pooling_forward
::
desc
get_desc
(
const
std
::
unordered_map
<
int
,
dnnl
::
memory
::
desc
>&
m
)
const
dnnl
::
pooling_v2_forward
::
desc
get_desc
(
const
std
::
unordered_map
<
int
,
dnnl
::
memory
::
desc
>&
m
)
const
{
auto
algo
=
op
.
mode
==
op
::
pooling_mode
::
max
?
dnnl
::
algorithm
::
pooling_max
:
dnnl
::
algorithm
::
pooling_avg
;
auto
algo
=
op
.
mode
==
op
::
pooling_mode
::
max
?
dnnl
::
algorithm
::
pooling_max
:
dnnl
::
algorithm
::
pooling_avg
;
auto
kdims
=
op
.
kdims
();
std
::
vector
<
size_t
>
padding_l
(
op
.
padding
.
begin
(),
op
.
padding
.
begin
()
+
kdims
);
std
::
vector
<
size_t
>
padding_r
(
op
.
padding
.
begin
()
+
kdims
,
op
.
padding
.
end
());
// Note: It is not documented, but the default dilation seems to be 0 instead of 1.
// We need to offset dilations with -1.
std
::
vector
<
size_t
>
dilations
;
std
::
transform
(
op
.
dilations
.
cbegin
(),
op
.
dilations
.
cend
(),
std
::
back_inserter
(
dilations
),
[](
size_t
d
)
{
return
d
-
1
;
});
return
{
dnnl
::
prop_kind
::
forward_inference
,
algo
,
m
.
at
(
MIGRAPHX_DNNL_PREFIX
(
ARG_SRC
)),
m
.
at
(
MIGRAPHX_DNNL_PREFIX
(
ARG_DST
)),
to_dnnl_dims
(
op
.
stride
),
to_dnnl_dims
(
op
.
lengths
),
to_dnnl_dims
(
dilations
),
to_dnnl_dims
(
padding_l
),
to_dnnl_dims
(
padding_r
)};
}
...
...
src/targets/gpu/compile_hip.cpp
View file @
687c6d17
...
...
@@ -194,7 +194,7 @@ struct hiprtc_program
};
std
::
vector
<
std
::
vector
<
char
>>
compile_hip_src_with_hiprtc
(
std
::
vector
<
hiprtc_src_file
>
srcs
,
std
::
string
params
,
const
std
::
string
&
params
,
const
std
::
string
&
arch
)
{
hiprtc_program
prog
(
std
::
move
(
srcs
));
...
...
@@ -238,8 +238,9 @@ bool hip_has_flags(const std::vector<std::string>& flags)
}
}
std
::
vector
<
std
::
vector
<
char
>>
compile_hip_src
(
const
std
::
vector
<
src_file
>&
srcs
,
std
::
string
params
,
const
std
::
string
&
arch
)
std
::
vector
<
std
::
vector
<
char
>>
compile_hip_src
(
const
std
::
vector
<
src_file
>&
srcs
,
const
std
::
string
&
params
,
const
std
::
string
&
arch
)
{
std
::
vector
<
hiprtc_src_file
>
hsrcs
{
srcs
.
begin
(),
srcs
.
end
()};
if
(
enabled
(
MIGRAPHX_GPU_DUMP_SRC
{}))
...
...
@@ -281,13 +282,13 @@ compile_hip_src(const std::vector<src_file>& srcs, std::string params, const std
if
(
fs
::
exists
(
out
))
return
{
read_buffer
(
out
.
string
())};
}
return
compile_hip_src_with_hiprtc
(
std
::
move
(
hsrcs
),
std
::
move
(
params
)
,
arch
);
return
compile_hip_src_with_hiprtc
(
std
::
move
(
hsrcs
),
params
,
arch
);
}
#else // MIGRAPHX_USE_HIPRTC
std
::
vector
<
std
::
vector
<
char
>>
compile_hip_src_with_hiprtc
(
std
::
vector
<
hiprtc_src_file
>
,
// NOLINT
std
::
string
,
// NOLINT
const
std
::
string
&
,
// NOLINT
const
std
::
string
&
)
{
MIGRAPHX_THROW
(
"Not using hiprtc"
);
...
...
@@ -316,29 +317,15 @@ src_compiler assemble(src_compiler compiler)
return
compiler
;
}
std
::
vector
<
std
::
vector
<
char
>>
compile_hip_src
(
const
std
::
vector
<
src_file
>&
srcs
,
std
::
string
params
,
const
std
::
string
&
arch
)
std
::
vector
<
std
::
vector
<
char
>>
compile_hip_src
(
const
std
::
vector
<
src_file
>&
srcs
,
const
std
::
string
&
params
,
const
std
::
string
&
arch
)
{
assert
(
not
srcs
.
empty
());
if
(
not
is_hip_clang_compiler
())
MIGRAPHX_THROW
(
"Unknown hip compiler: "
MIGRAPHX_HIP_COMPILER
);
if
(
params
.
find
(
"-std="
)
==
std
::
string
::
npos
)
params
+=
" --std=c++17"
;
params
+=
" -fno-gpu-rdc"
;
if
(
enabled
(
MIGRAPHX_GPU_DEBUG_SYM
{}))
params
+=
" -g"
;
params
+=
" -c"
;
params
+=
" --offload-arch="
+
arch
;
params
+=
" --cuda-device-only"
;
params
+=
" -O"
+
string_value_of
(
MIGRAPHX_GPU_OPTIMIZE
{},
"3"
)
+
" "
;
if
(
enabled
(
MIGRAPHX_GPU_DEBUG
{}))
params
+=
" -DMIGRAPHX_DEBUG"
;
params
+=
" -Wno-unused-command-line-argument -Wno-cuda-compat "
;
params
+=
MIGRAPHX_HIP_COMPILER_FLAGS
;
src_compiler
compiler
;
compiler
.
flags
=
params
;
compiler
.
compiler
=
MIGRAPHX_HIP_COMPILER
;
...
...
@@ -346,6 +333,23 @@ compile_hip_src(const std::vector<src_file>& srcs, std::string params, const std
if
(
has_compiler_launcher
())
compiler
.
launcher
=
MIGRAPHX_HIP_COMPILER_LAUNCHER
;
#endif
if
(
params
.
find
(
"-std="
)
==
std
::
string
::
npos
)
compiler
.
flags
+=
" --std=c++17"
;
compiler
.
flags
+=
" -fno-gpu-rdc"
;
if
(
enabled
(
MIGRAPHX_GPU_DEBUG_SYM
{}))
compiler
.
flags
+=
" -g"
;
compiler
.
flags
+=
" -c"
;
compiler
.
flags
+=
" --offload-arch="
+
arch
;
compiler
.
flags
+=
" --cuda-device-only"
;
compiler
.
flags
+=
" -O"
+
string_value_of
(
MIGRAPHX_GPU_OPTIMIZE
{},
"3"
)
+
" "
;
if
(
enabled
(
MIGRAPHX_GPU_DEBUG
{}))
compiler
.
flags
+=
" -DMIGRAPHX_DEBUG"
;
compiler
.
flags
+=
" -Wno-unused-command-line-argument -Wno-cuda-compat "
;
compiler
.
flags
+=
MIGRAPHX_HIP_COMPILER_FLAGS
;
if
(
enabled
(
MIGRAPHX_GPU_DUMP_SRC
{}))
{
for
(
const
auto
&
src
:
srcs
)
...
...
src/targets/gpu/compile_hip_code_object.cpp
View file @
687c6d17
...
...
@@ -200,7 +200,7 @@ operation compile_hip_code_object(const std::string& content, hip_compile_option
options
.
params
+=
" "
+
join_strings
(
compiler_warnings
(),
" "
);
options
.
params
+=
" -ftemplate-backtrace-limit=0"
;
options
.
params
+=
" -Werror"
;
auto
cos
=
compile_hip_src
(
srcs
,
std
::
move
(
options
.
params
)
,
get_device_name
());
auto
cos
=
compile_hip_src
(
srcs
,
options
.
params
,
get_device_name
());
if
(
cos
.
size
()
!=
1
)
MIGRAPHX_THROW
(
"No code object"
);
return
code_object_op
{
value
::
binary
{
cos
.
front
()},
...
...
src/targets/gpu/device/include/migraphx/gpu/device/scan.hpp
View file @
687c6d17
...
...
@@ -43,24 +43,32 @@ template <index_int N,
__device__
void
block_scan
(
index
idx
,
Op
op
,
T
init
,
ForStride
fs
,
Input
input
,
Output
output
)
{
using
type
=
decltype
(
input
(
deduce_for_stride
(
fs
)));
MIGRAPHX_DEVICE_SHARED
type
buffer
[
N
];
MIGRAPHX_DEVICE_SHARED
type
buffer
[
2
][
N
];
type
x
=
init
;
fs
([
&
](
auto
i
)
{
index_int
iout
=
0
;
index_int
iin
=
1
;
if
(
idx
.
local
==
0
)
buffer
[
idx
.
local
]
=
op
(
input
(
i
),
x
);
buffer
[
iout
][
idx
.
local
]
=
op
(
input
(
i
),
x
);
else
buffer
[
idx
.
local
]
=
input
(
i
);
buffer
[
iout
][
idx
.
local
]
=
input
(
i
);
__syncthreads
();
for
(
index_int
s
=
1
;
s
<
idx
.
nlocal
();
s
*=
2
)
{
if
(
idx
.
local
+
s
<
idx
.
nlocal
())
iout
=
1
-
iout
;
iin
=
1
-
iin
;
if
(
idx
.
local
>=
s
)
{
buffer
[
idx
.
local
+
s
]
=
op
(
buffer
[
idx
.
local
],
buffer
[
idx
.
local
+
s
]);
buffer
[
iout
][
idx
.
local
]
=
op
(
buffer
[
iin
][
idx
.
local
],
buffer
[
iin
][
idx
.
local
-
s
]);
}
else
{
buffer
[
iout
][
idx
.
local
]
=
buffer
[
iin
][
idx
.
local
];
}
__syncthreads
();
}
x
=
buffer
[
idx
.
nlocal
()
-
1
];
output
(
i
,
buffer
[
idx
.
local
]);
x
=
buffer
[
iout
][
idx
.
nlocal
()
-
1
];
output
(
i
,
buffer
[
iout
][
idx
.
local
]);
});
}
...
...
src/targets/gpu/driver/precompile_op.cpp
0 → 100644
View file @
687c6d17
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/gpu/driver/action.hpp>
#include <migraphx/gpu/time_op.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/lowering.hpp>
#include <migraphx/gpu/compile_ops.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/pass_manager.hpp>
#include <migraphx/program.hpp>
#include <migraphx/instruction.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
driver
{
struct
precompile_op
:
action
<
precompile_op
>
{
static
program
create_preop_program
(
const
operation
&
preop
,
std
::
vector
<
shape
>
inputs
)
{
program
p
;
auto
*
mm
=
p
.
get_main_module
();
std
::
vector
<
instruction_ref
>
args
;
inputs
.
pop_back
();
transform
(
inputs
,
range
(
inputs
.
size
()),
std
::
back_inserter
(
args
),
[
&
](
auto
input
,
auto
i
)
{
return
mm
->
add_parameter
(
"x"
+
std
::
to_string
(
i
),
input
);
});
mm
->
add_instruction
(
preop
,
args
);
return
p
;
}
static
operation
get_code_object
(
const
program
&
p
)
{
MIGRAPHX_TIDY_CONST
auto
*
mm
=
p
.
get_main_module
();
auto
it
=
std
::
find_if
(
mm
->
begin
(),
mm
->
end
(),
[](
const
auto
&
ins
)
{
return
(
ins
.
name
()
==
"gpu::code_object"
);
});
if
(
it
==
mm
->
end
())
MIGRAPHX_THROW
(
"Failed to create code object"
);
return
it
->
get_operator
();
}
static
void
apply
(
const
parser
&
p
,
const
value
&
v
)
{
context
ctx
;
auto
inputs
=
p
.
parse_shapes
(
v
.
at
(
"inputs"
));
auto
name
=
v
.
at
(
"name"
).
to
<
std
::
string
>
();
auto
preop
=
make_op
(
name
);
if
(
v
.
contains
(
"fields"
))
preop
.
from_value
(
v
.
at
(
"fields"
));
bool
exhaustive
=
v
.
get
(
"exhaustive"
,
false
);
auto
prog
=
create_preop_program
(
preop
,
inputs
);
run_passes
(
prog
,
{
lowering
{},
compile_ops
{
&
ctx
,
exhaustive
}});
auto
op
=
get_code_object
(
prog
);
auto
t
=
time_op
(
ctx
,
op
,
inputs
,
p
.
get
(
v
,
"iterations"
,
100
));
std
::
cout
<<
preop
<<
": "
<<
t
<<
"ms"
<<
std
::
endl
;
}
};
}
// namespace driver
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/include/migraphx/gpu/compile_hip.hpp
View file @
687c6d17
...
...
@@ -58,10 +58,10 @@ struct hiprtc_src_file
MIGRAPHX_GPU_EXPORT
bool
hip_has_flags
(
const
std
::
vector
<
std
::
string
>&
flags
);
MIGRAPHX_GPU_EXPORT
std
::
vector
<
std
::
vector
<
char
>>
compile_hip_src_with_hiprtc
(
std
::
vector
<
hiprtc_src_file
>
srcs
,
std
::
string
params
,
const
std
::
string
&
arch
);
std
::
vector
<
hiprtc_src_file
>
srcs
,
const
std
::
string
&
params
,
const
std
::
string
&
arch
);
MIGRAPHX_GPU_EXPORT
std
::
vector
<
std
::
vector
<
char
>>
compile_hip_src
(
const
std
::
vector
<
src_file
>&
srcs
,
std
::
string
params
,
const
std
::
string
&
arch
);
MIGRAPHX_GPU_EXPORT
std
::
vector
<
std
::
vector
<
char
>>
compile_hip_src
(
const
std
::
vector
<
src_file
>&
srcs
,
const
std
::
string
&
params
,
const
std
::
string
&
arch
);
MIGRAPHX_GPU_EXPORT
std
::
string
enum_params
(
std
::
size_t
count
,
std
::
string
param
);
...
...
src/targets/gpu/include/migraphx/gpu/miopen.hpp
View file @
687c6d17
...
...
@@ -211,6 +211,12 @@ inline pooling_descriptor make_pooling(const migraphx::op::pooling& op)
ss
<<
op
.
mode
;
MIGRAPHX_THROW
(
ss
.
str
());
}
if
(
not
std
::
all_of
(
op
.
dilations
.
cbegin
(),
op
.
dilations
.
cend
(),
[](
std
::
size_t
d
)
{
return
d
==
1
;
}))
{
MIGRAPHX_THROW
(
"Unsupported dilations for pooling: ["
+
to_string_range
(
op
.
dilations
)
+
"]"
);
}
auto
p
=
make_obj
<
pooling_descriptor
>
(
&
miopenCreatePoolingDescriptor
);
int
kdims
=
op
.
kdims
();
...
...
test/onnx/averagepool_dilate_test.onnx
0 → 100644
View file @
687c6d17
averagepool_dilate_test:
Y
xy"AveragePool*
dilations@*
kernel_shape@*
pads@@*
strides@averagepool_dilate_testZ
x
b
y
B
\ No newline at end of file
test/onnx/gen_onnx.py
View file @
687c6d17
...
...
@@ -276,6 +276,22 @@ def averagepool_1d_test():
return
([
node
],
[
x
],
[
out
])
@
onnx_test
()
def
averagepool_dilate_test
():
x
=
helper
.
make_tensor_value_info
(
'x'
,
TensorProto
.
FLOAT
,
[
1
,
4
,
3
])
y
=
helper
.
make_tensor_value_info
(
'y'
,
TensorProto
.
FLOAT
,
[
1
,
4
,
2
])
node
=
onnx
.
helper
.
make_node
(
'AveragePool'
,
inputs
=
[
'x'
],
outputs
=
[
'y'
],
kernel_shape
=
[
2
],
strides
=
[
1
],
pads
=
[
1
,
1
],
dilations
=
[
3
])
return
([
node
],
[
x
],
[
y
])
@
onnx_test
()
def
averagepool_3d_test
():
x
=
helper
.
make_tensor_value_info
(
'0'
,
TensorProto
.
FLOAT
,
[
1
,
3
,
5
,
5
,
5
])
...
...
@@ -4882,6 +4898,22 @@ def maxpool_notset_test():
return
([
node
],
[
x
],
[
y
])
@
onnx_test
()
def
maxpool_dilate_test
():
x
=
helper
.
make_tensor_value_info
(
'x'
,
TensorProto
.
FLOAT
,
[
1
,
4
,
3
])
y
=
helper
.
make_tensor_value_info
(
'y'
,
TensorProto
.
FLOAT
,
[
1
,
4
,
2
])
node
=
onnx
.
helper
.
make_node
(
'MaxPool'
,
inputs
=
[
'x'
],
outputs
=
[
'y'
],
kernel_shape
=
[
2
],
strides
=
[
1
],
pads
=
[
1
,
1
],
dilations
=
[
3
])
return
([
node
],
[
x
],
[
y
])
@
onnx_test
()
def
maxpool_same_upper_test
():
x
=
helper
.
make_tensor_value_info
(
'x'
,
TensorProto
.
FLOAT
,
[
1
,
1
,
5
,
5
])
...
...
@@ -6094,6 +6126,26 @@ def qlinearglobalavgpool_test():
return
([
n
],
[
x
],
[
y
],
[
sc_x
,
z_pt_x
,
sc_y
,
z_pt_y
])
@
onnx_test
()
def
qlinearleakyrelu_test
():
x
=
helper
.
make_tensor_value_info
(
'X'
,
TensorProto
.
INT8
,
[
64
])
sc_x
=
helper
.
make_tensor
(
'X_scale'
,
TensorProto
.
FLOAT
,
[],
[
0.05
])
zero_pt_x
=
helper
.
make_tensor
(
'X_zero_point'
,
TensorProto
.
INT8
,
[],
[
0
])
sc_y
=
helper
.
make_tensor
(
'Y_scale'
,
TensorProto
.
FLOAT
,
[],
[
0.05
])
zero_pt_y
=
helper
.
make_tensor
(
'Y_zero_point'
,
TensorProto
.
INT8
,
[],
[
10
])
y
=
helper
.
make_tensor_value_info
(
'Y'
,
TensorProto
.
INT8
,
[
64
])
node
=
onnx
.
helper
.
make_node
(
'QLinearLeakyRelu'
,
inputs
=
[
'X'
,
'X_scale'
,
'X_zero_point'
,
'Y_scale'
,
'Y_zero_point'
],
outputs
=
[
'Y'
],
alpha
=
1.1
,
)
return
([
node
],
[
x
],
[
y
],
[
sc_x
,
zero_pt_x
,
sc_y
,
zero_pt_y
])
def
qlinearmatmul_1D_test
():
a
=
helper
.
make_tensor_value_info
(
'A'
,
TensorProto
.
UINT8
,
[
8
])
sc_a
=
helper
.
make_tensor
(
'A_scale'
,
TensorProto
.
FLOAT
,
[],
[
0.05
])
...
...
@@ -6234,6 +6286,26 @@ def qlinearmul_bcast_test():
[
sc_a
,
zero_pt_a
,
sc_b
,
zero_pt_b
,
sc_c
,
zero_pt_c
])
@
onnx_test
()
def
qlinearsigmoid_test
():
x
=
helper
.
make_tensor_value_info
(
'X'
,
TensorProto
.
INT8
,
[
64
])
sc_x
=
helper
.
make_tensor
(
'X_scale'
,
TensorProto
.
FLOAT
,
[],
[
0.05
])
zero_pt_x
=
helper
.
make_tensor
(
'X_zero_point'
,
TensorProto
.
INT8
,
[],
[
0
])
sc_y
=
helper
.
make_tensor
(
'Y_scale'
,
TensorProto
.
FLOAT
,
[],
[
0.0035
])
zero_pt_y
=
helper
.
make_tensor
(
'Y_zero_point'
,
TensorProto
.
INT8
,
[],
[
-
128
])
y
=
helper
.
make_tensor_value_info
(
'Y'
,
TensorProto
.
INT8
,
[
64
])
node
=
onnx
.
helper
.
make_node
(
'QLinearSigmoid'
,
inputs
=
[
'X'
,
'X_scale'
,
'X_zero_point'
,
'Y_scale'
,
'Y_zero_point'
],
outputs
=
[
'Y'
],
)
return
([
node
],
[
x
],
[
y
],
[
sc_x
,
zero_pt_x
,
sc_y
,
zero_pt_y
])
@
onnx_test
()
def
quantizelinear_test
():
arg0
=
helper
.
make_tensor_value_info
(
'0'
,
TensorProto
.
FLOAT
,
[
5
])
...
...
test/onnx/maxpool_dilate_test.onnx
0 → 100644
View file @
687c6d17
maxpool_dilate_test:
U
xy"MaxPool*
dilations@*
kernel_shape@*
pads@@*
strides@maxpool_dilate_testZ
x
b
y
B
\ No newline at end of file
test/onnx/onnx_test.cpp
View file @
687c6d17
...
...
@@ -296,13 +296,32 @@ TEST_CASE(averagepool_1d_test)
{{"mode", migraphx::op::pooling_mode::average},
{"padding", {0, 0}},
{"stride", {1}},
{"lengths", {3}}}),
{"lengths", {3}},
{"dilations", {1}}}),
l0);
auto prog = optimize_onnx("averagepool_1d_test.onnx");
EXPECT(p == prog);
}
TEST_CASE(averagepool_dilate_test)
{
migraphx::program p;
auto* mm = p.get_main_module();
auto input = mm->add_parameter("x", migraphx::shape{migraphx::shape::float_type, {1, 4, 3}});
mm->add_instruction(migraphx::make_op("pooling",
{{"mode", migraphx::op::pooling_mode::average},
{"padding", {1, 1}},
{"stride", {1}},
{"lengths", {2}},
{"dilations", {3}}}),
input);
auto prog = optimize_onnx("averagepool_dilate_test.onnx");
EXPECT(p == prog);
}
TEST_CASE(averagepool_3d_test)
{
migraphx::program p;
...
...
@@ -312,7 +331,8 @@ TEST_CASE(averagepool_3d_test)
{{"mode", migraphx::op::pooling_mode::average},
{"padding", {0, 0, 0, 0, 0, 0}},
{"stride", {1, 1, 1}},
{"lengths", {3, 3, 3}}}),
{"lengths", {3, 3, 3}},
{"dilations", {1, 1, 1}}}),
l0);
auto prog = optimize_onnx("averagepool_3d_test.onnx");
...
...
@@ -332,6 +352,7 @@ TEST_CASE(averagepool_dyn_test)
{"mode", migraphx::op::pooling_mode::average},
{"stride", {2, 2, 2}},
{"lengths", {3, 3, 3}},
{"dilations", {1, 1, 1}},
{"padding", {1, 1, 1, 1, 1, 1}},
{"padding_mode", 0},
}),
...
...
@@ -357,6 +378,7 @@ TEST_CASE(averagepool_dyn_autopad_test)
{"mode", migraphx::op::pooling_mode::average},
{"stride", {2, 2, 2}},
{"lengths", {3, 3, 3}},
{"dilations", {1, 1, 1}},
{"padding", {0, 0, 0, 0, 0, 0}},
{"padding_mode", migraphx::op::padding_mode_t::same_upper},
}),
...
...
@@ -394,7 +416,8 @@ TEST_CASE(averagepool_notset_test)
{{"mode", migraphx::op::pooling_mode::average},
{"padding", {2, 2, 2, 2}},
{"stride", {2, 2}},
{"lengths", {6, 6}}}),
{"lengths", {6, 6}},
{"dilations", {1, 1}}}),
input);
auto ret = mm->add_instruction(
migraphx::make_op("slice", {{"axes", {2, 3}}, {"starts", {1, 1}}, {"ends", {2, 2}}}), ins);
...
...
@@ -415,7 +438,8 @@ TEST_CASE(averagepool_nt_cip_test)
{{"mode", migraphx::op::pooling_mode::average},
{"padding", {0, 0, 0, 0}},
{"stride", {2, 2}},
{"lengths", {6, 6}}}),
{"lengths", {6, 6}},
{"dilations", {1, 1}}}),
ins_pad);
mm->add_return({ret});
...
...
@@ -437,6 +461,7 @@ TEST_CASE(averagepool_same_lower_test)
{"padding", {1, 1, 1, 1}},
{"stride", {1, 1}},
{"lengths", {2, 2}},
{"dilations", {1, 1}},
{"padding_mode", migraphx::op::padding_mode_t::default_},
}),
input);
...
...
@@ -459,7 +484,8 @@ TEST_CASE(averagepool_sl_cip_test)
{{"mode", migraphx::op::pooling_mode::average},
{"padding", {0, 0, 0, 0}},
{"stride", {1, 1}},
{"lengths", {2, 2}}}),
{"lengths", {2, 2}},
{"dilations", {1, 1}}}),
ins_pad);
mm->add_return({ret});
auto prog = migraphx::parse_onnx("averagepool_sl_cip_test.onnx");
...
...
@@ -476,7 +502,8 @@ TEST_CASE(averagepool_same_upper_test)
{{"mode", migraphx::op::pooling_mode::average},
{"padding", {1, 1, 1, 1}},
{"stride", {1, 1}},
{"lengths", {2, 2}}}),
{"lengths", {2, 2}},
{"dilations", {1, 1}}}),
input);
auto ret = mm->add_instruction(
migraphx::make_op("slice", {{"axes", {2, 3}}, {"starts", {1, 1}}, {"ends", {6, 6}}}), ins);
...
...
@@ -1307,7 +1334,8 @@ TEST_CASE(conv_bn_relu_maxpool_test)
{{"mode", migraphx::op::pooling_mode::max},
{"padding", {0, 0, 0, 0}},
{"stride", {2, 2}},
{"lengths", {2, 2}}}),
{"lengths", {2, 2}},
{"dilations", {1, 1}}}),
l7);
auto prog = optimize_onnx("conv_bn_relu_maxpool_test.onnx");
...
...
@@ -1505,7 +1533,8 @@ TEST_CASE(conv_relu_maxpool_test)
{{"mode", migraphx::op::pooling_mode::max},
{"padding", {0, 0, 0, 0}},
{"stride", {2, 2}},
{"lengths", {2, 2}}}),
{"lengths", {2, 2}},
{"dilations", {1, 1}}}),
l6);
auto prog = optimize_onnx("conv_relu_maxpool_test.onnx");
...
...
@@ -1530,7 +1559,8 @@ TEST_CASE(conv_relu_maxpool_x2_test)
{{"mode", migraphx::op::pooling_mode::max},
{"padding", {0, 0, 0, 0}},
{"stride", {2, 2}},
{"lengths", {2, 2}}}),
{"lengths", {2, 2}},
{"dilations", {1, 1}}}),
l6);
auto l8 = mm->add_parameter("3", {migraphx::shape::float_type, {1, 5, 5, 5}});
...
...
@@ -1546,7 +1576,8 @@ TEST_CASE(conv_relu_maxpool_x2_test)
{{"mode", migraphx::op::pooling_mode::max},
{"padding", {0, 0, 0, 0}},
{"stride", {2, 2}},
{"lengths", {2, 2}}}),
{"lengths", {2, 2}},
{"dilations", {1, 1}}}),
l13);
auto prog = optimize_onnx("conv_relu_maxpool_x2_test.onnx");
...
...
@@ -4245,6 +4276,7 @@ TEST_CASE(lppool_l1_test)
{"padding", {0, 0}},
{"stride", {1}},
{"lengths", {3}},
{"dilations", {1}},
{"lp_order", 1}}),
l0);
auto prog = optimize_onnx("lppool_l1_test.onnx");
...
...
@@ -4261,6 +4293,7 @@ TEST_CASE(lppool_l2_test)
{"padding", {0, 0}},
{"stride", {1}},
{"lengths", {3}},
{"dilations", {1}},
{"lp_order", 2}}),
l0);
auto prog = optimize_onnx("lppool_l2_test.onnx");
...
...
@@ -4513,7 +4546,8 @@ TEST_CASE(maxpool_notset_test)
{{"mode", migraphx::op::pooling_mode::max},
{"padding", {0, 0, 1, 1}},
{"stride", {2, 2}},
{"lengths", {6, 6}}}),
{"lengths", {6, 6}},
{"dilations", {1, 1}}}),
input);
auto prog = optimize_onnx("maxpool_notset_test.onnx");
...
...
@@ -4521,6 +4555,24 @@ TEST_CASE(maxpool_notset_test)
EXPECT(p == prog);
}
TEST_CASE(maxpool_dilate_test)
{
migraphx::program p;
auto* mm = p.get_main_module();
auto input = mm->add_parameter("x", migraphx::shape{migraphx::shape::float_type, {1, 4, 3}});
mm->add_instruction(migraphx::make_op("pooling",
{{"mode", migraphx::op::pooling_mode::max},
{"padding", {1, 1}},
{"stride", {1}},
{"lengths", {2}},
{"dilations", {3}}}),
input);
auto prog = optimize_onnx("maxpool_dilate_test.onnx");
EXPECT(p == prog);
}
TEST_CASE(maxpool_same_upper_test)
{
migraphx::program p;
...
...
@@ -4530,7 +4582,8 @@ TEST_CASE(maxpool_same_upper_test)
{{"mode", migraphx::op::pooling_mode::max},
{"padding", {0, 0, 1, 1}},
{"stride", {1, 1}},
{"lengths", {2, 2}}}),
{"lengths", {2, 2}},
{"dilations", {1, 1}}}),
input);
auto prog = optimize_onnx("maxpool_same_upper_test.onnx");
...
...
@@ -5642,6 +5695,46 @@ TEST_CASE(qlinearglobalavgpool_test)
EXPECT(p.sort() == prog.sort());
}
TEST_CASE(qlinearleakyrelu_test)
{
migraphx::program p;
auto* mm = p.get_main_module();
auto x = mm->add_parameter("X", {migraphx::shape::int8_type, {64}});
auto sc_x = mm->add_literal(migraphx::literal{migraphx::shape::float_type, {0.05}});
auto z_pt_x = mm->add_literal(migraphx::literal{migraphx::shape::int8_type, {0}});
auto sc_y = mm->add_literal(migraphx::literal{migraphx::shape::float_type, {0.05}});
auto z_pt_y = mm->add_literal(migraphx::literal{migraphx::shape::int8_type, {10}});
auto scale_x_bcast =
mm->add_instruction(migraphx::make_op("multibroadcast", {{"out_lens", {64}}}), sc_x);
auto z_pt_x_bcast =
mm->add_instruction(migraphx::make_op("multibroadcast", {{"out_lens", {64}}}), z_pt_x);
auto fp_x =
mm->add_instruction(migraphx::make_op("dequantizelinear"), x, scale_x_bcast, z_pt_x_bcast);
auto fp_y = mm->add_instruction(migraphx::make_op("leaky_relu", {{"alpha", 1.1}}), fp_x);
auto scale_y_bcast =
mm->add_instruction(migraphx::make_op("multibroadcast", {{"out_lens", {64}}}), sc_y);
auto z_pt_y_bcast =
mm->add_instruction(migraphx::make_op("multibroadcast", {{"out_lens", {64}}}), z_pt_y);
auto y =
mm->add_instruction(migraphx::make_op("quantizelinear"), fp_y, scale_y_bcast, z_pt_y_bcast);
mm->add_return({y});
auto prog = migraphx::parse_onnx("qlinearleakyrelu_test.onnx");
EXPECT(p.sort() == prog.sort());
}
TEST_CASE(qlinearmatmul_1D_test)
{
migraphx::program p;
...
...
@@ -5807,6 +5900,46 @@ TEST_CASE(qlinearmul_test)
EXPECT(p.sort() == prog.sort());
}
TEST_CASE(qlinearsigmoid_test)
{
migraphx::program p;
auto* mm = p.get_main_module();
auto x = mm->add_parameter("X", {migraphx::shape::int8_type, {64}});
auto sc_x = mm->add_literal(migraphx::literal{migraphx::shape::float_type, {0.05}});
auto z_pt_x = mm->add_literal(migraphx::literal{migraphx::shape::int8_type, {0}});
auto sc_y = mm->add_literal(migraphx::literal{migraphx::shape::float_type, {0.0035}});
auto z_pt_y = mm->add_literal(migraphx::literal{migraphx::shape::int8_type, {-128}});
auto scale_x_bcast =
mm->add_instruction(migraphx::make_op("multibroadcast", {{"out_lens", {64}}}), sc_x);
auto z_pt_x_bcast =
mm->add_instruction(migraphx::make_op("multibroadcast", {{"out_lens", {64}}}), z_pt_x);
auto fp_x =
mm->add_instruction(migraphx::make_op("dequantizelinear"), x, scale_x_bcast, z_pt_x_bcast);
auto fp_y = mm->add_instruction(migraphx::make_op("sigmoid"), fp_x);
auto scale_y_bcast =
mm->add_instruction(migraphx::make_op("multibroadcast", {{"out_lens", {64}}}), sc_y);
auto z_pt_y_bcast =
mm->add_instruction(migraphx::make_op("multibroadcast", {{"out_lens", {64}}}), z_pt_y);
auto y =
mm->add_instruction(migraphx::make_op("quantizelinear"), fp_y, scale_y_bcast, z_pt_y_bcast);
mm->add_return({y});
auto prog = migraphx::parse_onnx("qlinearsigmoid_test.onnx");
EXPECT(p.sort() == prog.sort());
}
migraphx::instruction_ref insert_quantizelinear_clip(migraphx::module& m,
const migraphx::instruction_ref ins,
const migraphx::instruction_ref round,
...
...
test/onnx/qlinearleakyrelu_test.onnx
0 → 100644
View file @
687c6d17
File added
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment