Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
ac04f3cc
Commit
ac04f3cc
authored
Nov 10, 2023
by
Khalique Ahmed
Browse files
manual_merge
parents
d39c3343
d8011adf
Changes
539
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
691 additions
and
154 deletions
+691
-154
src/onnx/parse_split.cpp
src/onnx/parse_split.cpp
+26
-5
src/onnx/parse_trilu.cpp
src/onnx/parse_trilu.cpp
+4
-4
src/optimize_module.cpp
src/optimize_module.cpp
+7
-3
src/pad_calc.cpp
src/pad_calc.cpp
+40
-1
src/process.cpp
src/process.cpp
+167
-1
src/program.cpp
src/program.cpp
+3
-3
src/propagate_constant.cpp
src/propagate_constant.cpp
+4
-4
src/py/CMakeLists.txt
src/py/CMakeLists.txt
+14
-17
src/py/migraphx_py.cpp
src/py/migraphx_py.cpp
+5
-2
src/quantization.cpp
src/quantization.cpp
+5
-4
src/rewrite_pooling.cpp
src/rewrite_pooling.cpp
+5
-16
src/rewrite_quantization.cpp
src/rewrite_quantization.cpp
+19
-4
src/shape.cpp
src/shape.cpp
+23
-16
src/simplify_algebra.cpp
src/simplify_algebra.cpp
+118
-34
src/simplify_dyn_ops.cpp
src/simplify_dyn_ops.cpp
+185
-0
src/simplify_reshapes.cpp
src/simplify_reshapes.cpp
+48
-2
src/split_single_dyn_dim.cpp
src/split_single_dyn_dim.cpp
+0
-32
src/targets/cpu/include/migraphx/cpu/dnnl.hpp
src/targets/cpu/include/migraphx/cpu/dnnl.hpp
+15
-2
src/targets/cpu/include/migraphx/cpu/fuse_ops.hpp
src/targets/cpu/include/migraphx/cpu/fuse_ops.hpp
+2
-4
src/targets/cpu/include/migraphx/cpu/pointwise.hpp
src/targets/cpu/include/migraphx/cpu/pointwise.hpp
+1
-0
No files found.
src/onnx/parse_split.cpp
View file @
ac04f3cc
...
...
@@ -68,13 +68,34 @@ struct parse_split : op_parser<parse_split>
// no split attribute, input is equally divided
else
{
if
((
lens
[
tuned_axis
]
%
info
.
num_outputs
)
!=
0
)
std
::
size_t
num_outputs
=
info
.
num_outputs
;
// the num_outputs attribute seems to be redundant since we already have
// node_info::num_outputs, but we can still perform an error check
if
(
contains
(
info
.
attributes
,
"num_outputs"
))
{
MIGRAPHX_THROW
(
"PARSE_SPLIT: input cannot be equally divided into "
+
std
::
to_string
(
info
.
num_outputs
)
+
" splits!"
);
num_outputs
=
parser
.
parse_value
(
info
.
attributes
.
at
(
"num_outputs"
)).
at
<
std
::
size_t
>
();
if
(
num_outputs
!=
info
.
num_outputs
)
{
MIGRAPHX_THROW
(
"PARSE_SPLIT: num_outputs attribute "
+
std
::
to_string
(
num_outputs
)
+
" doesn't match actual number of outputs "
+
std
::
to_string
(
info
.
num_outputs
)
+
"!"
);
}
}
if
(
lens
[
tuned_axis
]
%
num_outputs
==
0
)
{
std
::
size_t
chunk_size
=
lens
[
tuned_axis
]
/
num_outputs
;
vec_splits
.
resize
(
num_outputs
,
chunk_size
);
}
else
{
std
::
size_t
chunk_size
=
lens
[
tuned_axis
]
/
num_outputs
+
1
;
std
::
size_t
last_chunk_size
=
lens
[
tuned_axis
]
-
chunk_size
*
(
num_outputs
-
1
);
vec_splits
.
resize
(
num_outputs
-
1
,
chunk_size
);
vec_splits
.
push_back
(
last_chunk_size
);
}
auto
dl
=
lens
[
tuned_axis
]
/
info
.
num_outputs
;
vec_splits
.
resize
(
info
.
num_outputs
,
dl
);
}
if
(
std
::
accumulate
(
vec_splits
.
begin
(),
vec_splits
.
end
(),
int64_t
(
0
))
!=
...
...
src/onnx/parse_trilu.cpp
View file @
ac04f3cc
...
...
@@ -56,9 +56,6 @@ struct parse_trilu : op_parser<parse_trilu>
k
=
arg_k
.
at
<
int
>
();
}
if
(
k
<
0
)
MIGRAPHX_THROW
(
"PARSE_TRILU: negative k values not supported"
);
if
(
contains
(
info
.
attributes
,
"upper"
))
{
upper
=
static_cast
<
bool
>
(
info
.
attributes
.
at
(
"upper"
).
i
());
...
...
@@ -69,9 +66,12 @@ struct parse_trilu : op_parser<parse_trilu>
// when creating the mask, if upper == 1,
// the inner triangle will have values set to 0
std
::
vector
<
bool
>
mask_mat
(
num_rows
*
num_cols
,
upper
);
// if upper == 0, kth diagonal must also be masked
if
(
not
upper
)
k
++
;
for
(
size_t
i
=
0
;
i
<
num_rows
;
i
++
)
{
for
(
size_
t
j
=
0
;
j
<
std
::
min
(
k
,
static_cast
<
int
>
(
num_cols
));
j
++
)
for
(
in
t
j
=
0
;
j
<
std
::
min
(
k
,
static_cast
<
int
>
(
num_cols
));
j
++
)
{
mask_mat
[
i
*
num_cols
+
j
]
=
not
upper
;
}
...
...
src/optimize_module.cpp
View file @
ac04f3cc
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-202
2
Advanced Micro Devices, Inc. All rights reserved.
* Copyright (c) 2015-202
3
Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
...
...
@@ -35,9 +35,13 @@ inline namespace MIGRAPHX_INLINE_NS {
void
optimize_module
::
apply
(
module_pass_manager
&
mpm
)
const
{
for
(
int
i
=
0
;
i
<
2
;
i
++
)
{
// loop to further optimize after initial transformations
for
(
int
j
=
0
;
j
<
2
;
j
++
)
{
mpm
.
run_pass
(
simplify_reshapes
{});
mpm
.
run_pass
(
simplify_algebra
{});
}
mpm
.
run_pass
(
eliminate_common_subexpression
{});
mpm
.
run_pass
(
dead_code_elimination
{});
mpm
.
run_pass
(
propagate_constant
{});
...
...
src/pad_calc.cpp
View file @
ac04f3cc
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-202
2
Advanced Micro Devices, Inc. All rights reserved.
* Copyright (c) 2015-202
3
Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
...
...
@@ -52,6 +52,11 @@ void calculate_padding(int64_t idx,
}
}
/**
* Given the input array dimensions; kernel (wei_lens); strides; and dilations,
* calculate the padding value in each dimension.
*
*/
std
::
vector
<
std
::
size_t
>
calc_dyn_auto_pad
(
const
std
::
vector
<
std
::
size_t
>&
input_lens
,
const
std
::
vector
<
std
::
size_t
>&
wei_lens
,
const
std
::
vector
<
std
::
size_t
>&
strides
,
...
...
@@ -60,6 +65,7 @@ std::vector<std::size_t> calc_dyn_auto_pad(const std::vector<std::size_t>& input
{
std
::
vector
<
std
::
size_t
>
padding
;
assert
(
input_lens
.
size
()
>=
3
);
assert
(
input_lens
.
size
()
==
wei_lens
.
size
());
std
::
size_t
num_spatial_dims
=
input_lens
.
size
()
-
2
;
padding
.
resize
(
2
*
num_spatial_dims
);
for
(
std
::
size_t
i
=
0
;
i
<
num_spatial_dims
;
i
++
)
...
...
@@ -88,6 +94,11 @@ std::vector<std::size_t> calc_dyn_auto_pad(const std::vector<std::size_t>& input
return
padding
;
}
/**
* Calculate the correct output shape for a convolution with
* a given input size and other parameters.
*
*/
shape
compute_padded_shape
(
const
shape
&
input
,
const
shape
&
weights
,
const
std
::
vector
<
std
::
size_t
>&
padding
,
...
...
@@ -111,5 +122,33 @@ shape compute_padded_shape(const shape& input,
return
input
.
with_lens
(
output_lens
);
}
/**
* Calculate the correct output shape for a pooling with
* a given input size and other parameters. This uses
* the same formula for pooling that compute_padded_shape() uses
* for convolutions, but takes slightly different inputs.
*
*/
shape
compute_padded_pool_shape
(
const
shape
&
input
,
const
shape
&
kernel
,
const
std
::
vector
<
std
::
size_t
>&
padding
,
const
std
::
vector
<
std
::
size_t
>&
stride
,
const
std
::
vector
<
std
::
size_t
>&
dilation
)
{
const
size_t
num_spatial_dims
=
input
.
lens
().
size
()
-
2
;
std
::
vector
<
size_t
>
output_lens
{
input
.
lens
()[
0
],
input
.
lens
()[
1
]};
// calculate the output shape of the pooling: ((W - K + 2P) / S) + 1
for
(
size_t
i
=
0
;
i
<
num_spatial_dims
;
++
i
)
{
auto
padding_factor
=
padding
[
i
]
+
padding
[
i
+
num_spatial_dims
];
output_lens
.
push_back
(
std
::
size_t
(
std
::
max
<
std
::
ptrdiff_t
>
(
1
,
(
input
.
lens
()[
i
+
2
]
-
(
1
+
dilation
[
i
]
*
(
kernel
.
lens
()[
i
]
-
1
))
+
padding_factor
)
/
stride
[
i
]
+
1
)));
}
return
input
.
with_lens
(
output_lens
);
}
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/process.cpp
View file @
ac04f3cc
...
...
@@ -26,13 +26,23 @@
#include <migraphx/env.hpp>
#include <functional>
#include <iostream>
#include <optional>
#ifdef _WIN32
// cppcheck-suppress definePrefix
#define WIN32_LEAN_AND_MEAN
#include <Windows.h>
#else
#include <unistd.h>
#endif
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_TRACE_CMD_EXECUTE
)
#ifndef _WIN32
std
::
function
<
void
(
const
char
*
)
>
redirect_to
(
std
::
ostream
&
os
)
{
return
[
&
](
const
char
*
x
)
{
os
<<
x
;
};
...
...
@@ -74,6 +84,155 @@ int exec(const std::string& cmd, std::function<void(process::writer)> std_in)
});
}
#else
constexpr
std
::
size_t
MIGRAPHX_PROCESS_BUFSIZE
=
4096
;
class
pipe
{
public:
explicit
pipe
(
bool
inherit_handle
=
true
)
{
SECURITY_ATTRIBUTES
attrs
;
attrs
.
nLength
=
sizeof
(
SECURITY_ATTRIBUTES
);
attrs
.
bInheritHandle
=
inherit_handle
?
TRUE
:
FALSE
;
attrs
.
lpSecurityDescriptor
=
nullptr
;
if
(
CreatePipe
(
&
m_read
,
&
m_write
,
&
attrs
,
0
)
==
FALSE
)
throw
GetLastError
();
if
(
SetHandleInformation
(
&
m_read
,
HANDLE_FLAG_INHERIT
,
0
)
==
FALSE
)
throw
GetLastError
();
}
pipe
(
const
pipe
&
)
=
delete
;
pipe
&
operator
=
(
const
pipe
&
)
=
delete
;
pipe
(
pipe
&&
)
=
default
;
~
pipe
()
{
CloseHandle
(
m_read
);
m_read
=
nullptr
;
CloseHandle
(
m_write
);
m_write
=
nullptr
;
}
std
::
optional
<
std
::
pair
<
bool
,
DWORD
>>
read
(
LPVOID
buffer
,
DWORD
length
)
const
{
DWORD
bytes_read
;
if
(
ReadFile
(
m_read
,
buffer
,
length
,
&
bytes_read
,
nullptr
)
==
FALSE
)
{
DWORD
error
{
GetLastError
()};
if
(
error
!=
ERROR_MORE_DATA
)
{
return
std
::
nullopt
;
}
return
{{
true
,
bytes_read
}};
}
return
{{
false
,
bytes_read
}};
}
HANDLE
get_read_handle
()
const
{
return
m_read
;
}
bool
write
(
LPCVOID
buffer
,
DWORD
length
)
const
{
DWORD
bytes_written
;
return
WriteFile
(
m_write
,
buffer
,
length
,
&
bytes_written
,
nullptr
)
==
TRUE
;
}
HANDLE
get_write_handle
()
const
{
return
m_write
;
}
private:
HANDLE
m_write
=
nullptr
,
m_read
=
nullptr
;
};
template
<
typename
F
>
int
exec
(
const
std
::
string
&
cmd
,
F
f
)
{
try
{
if
(
enabled
(
MIGRAPHX_TRACE_CMD_EXECUTE
{}))
std
::
cout
<<
cmd
<<
std
::
endl
;
STARTUPINFO
info
;
PROCESS_INFORMATION
process_info
;
pipe
in
{},
out
{};
ZeroMemory
(
&
info
,
sizeof
(
STARTUPINFO
));
info
.
cb
=
sizeof
(
STARTUPINFO
);
info
.
hStdError
=
out
.
get_write_handle
();
info
.
hStdOutput
=
out
.
get_write_handle
();
info
.
hStdInput
=
in
.
get_read_handle
();
info
.
dwFlags
|=
STARTF_USESTDHANDLES
;
ZeroMemory
(
&
process_info
,
sizeof
(
process_info
));
if
(
CreateProcess
(
nullptr
,
const_cast
<
LPSTR
>
(
cmd
.
c_str
()),
nullptr
,
nullptr
,
TRUE
,
0
,
nullptr
,
nullptr
,
&
info
,
&
process_info
)
==
FALSE
)
{
return
GetLastError
();
}
f
(
in
,
out
);
WaitForSingleObject
(
process_info
.
hProcess
,
INFINITE
);
DWORD
status
{};
GetExitCodeProcess
(
process_info
.
hProcess
,
&
status
);
CloseHandle
(
process_info
.
hProcess
);
CloseHandle
(
process_info
.
hThread
);
return
static_cast
<
int
>
(
status
);
}
// cppcheck-suppress catchExceptionByValue
catch
(
DWORD
last_error
)
{
return
last_error
;
}
}
int
exec
(
const
std
::
string
&
cmd
)
{
TCHAR
buffer
[
MIGRAPHX_PROCESS_BUFSIZE
];
HANDLE
std_out
{
GetStdHandle
(
STD_OUTPUT_HANDLE
)};
return
(
std_out
==
nullptr
or
std_out
==
INVALID_HANDLE_VALUE
)
?
GetLastError
()
:
exec
(
cmd
,
[
&
](
const
pipe
&
,
const
pipe
&
out
)
{
for
(;;)
{
if
(
auto
result
=
out
.
read
(
buffer
,
MIGRAPHX_PROCESS_BUFSIZE
))
{
auto
[
more_data
,
bytes_read
]
=
*
result
;
if
(
not
more_data
or
bytes_read
==
0
)
break
;
DWORD
written
;
if
(
WriteFile
(
std_out
,
buffer
,
bytes_read
,
&
written
,
nullptr
)
==
FALSE
)
break
;
}
}
});
}
int
exec
(
const
std
::
string
&
cmd
,
std
::
function
<
void
(
process
::
writer
)
>
std_in
)
{
return
exec
(
cmd
,
[
&
](
const
pipe
&
in
,
const
pipe
&
)
{
std_in
([
&
](
const
char
*
buffer
,
std
::
size_t
n
)
{
in
.
write
(
buffer
,
n
);
});
});
}
#endif
struct
process_impl
{
std
::
string
command
{};
...
...
@@ -119,7 +278,14 @@ process& process::cwd(const fs::path& p)
return
*
this
;
}
void
process
::
exec
()
{
impl
->
check_exec
(
impl
->
get_command
(),
redirect_to
(
std
::
cout
));
}
void
process
::
exec
()
{
#ifndef _WIN32
impl
->
check_exec
(
impl
->
get_command
(),
redirect_to
(
std
::
cout
));
#else
impl
->
check_exec
(
impl
->
get_command
());
#endif
}
void
process
::
write
(
std
::
function
<
void
(
process
::
writer
)
>
pipe_in
)
{
...
...
src/program.cpp
View file @
ac04f3cc
...
...
@@ -347,7 +347,7 @@ void program::finalize()
template
<
class
T
>
std
::
string
classify
(
T
x
)
{
switch
(
std
::
fpclassify
(
x
))
switch
(
std
::
fpclassify
(
static_cast
<
double
>
(
x
)
))
{
case
FP_INFINITE
:
return
"inf"
;
case
FP_NAN
:
return
"nan"
;
...
...
@@ -624,7 +624,7 @@ std::string get_migraphx_version()
program file version is for the data structure or format of the MXR file. Version should be bumped
if any changes occur to the format of the MXR file.
*/
const
int
program_file_version
=
6
;
const
int
program_file_version
=
7
;
value
program
::
to_value
()
const
{
...
...
@@ -936,7 +936,7 @@ void program::perf_report(std::ostream& os,
os
<<
std
::
endl
;
os
<<
"Batch size: "
<<
batch
<<
std
::
endl
;
os
<<
"Rate: "
<<
rate
*
batch
<<
"/sec"
<<
std
::
endl
;
os
<<
"Rate: "
<<
rate
*
batch
<<
"
inferences
/sec"
<<
std
::
endl
;
os
<<
"Total time: "
<<
total_time
<<
"ms"
<<
std
::
endl
;
os
<<
"Total instructions time: "
<<
total_instruction_time
<<
"ms"
<<
std
::
endl
;
os
<<
"Overhead time: "
<<
overhead_time
<<
"ms"
...
...
src/propagate_constant.cpp
View file @
ac04f3cc
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-202
2
Advanced Micro Devices, Inc. All rights reserved.
* Copyright (c) 2015-202
3
Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
...
...
@@ -35,10 +35,10 @@ inline namespace MIGRAPHX_INLINE_NS {
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_TRACE_PROPAGATE_CONSTANT
)
bool
skip_prop
o
gate
(
instruction_ref
ins
)
bool
skip_prop
a
gate
(
instruction_ref
ins
)
{
if
(
ins
->
name
()
==
"contiguous"
)
return
skip_prop
o
gate
(
ins
->
inputs
().
front
());
return
skip_prop
a
gate
(
ins
->
inputs
().
front
());
auto
&&
s
=
ins
->
get_shape
();
if
(
s
.
broadcasted
()
and
not
s
.
scalar
())
return
true
;
...
...
@@ -47,7 +47,7 @@ bool skip_propogate(instruction_ref ins)
return
false
;
}
bool
is_const_ins
(
instruction_ref
ins
)
{
return
ins
->
can_eval
()
and
not
skip_prop
o
gate
(
ins
);
}
bool
is_const_ins
(
instruction_ref
ins
)
{
return
ins
->
can_eval
()
and
not
skip_prop
a
gate
(
ins
);
}
void
propagate_constant
::
apply
(
module
&
m
)
const
{
...
...
src/py/CMakeLists.txt
View file @
ac04f3cc
...
...
@@ -22,17 +22,15 @@
# THE SOFTWARE.
#####################################################################################
option
(
MIGRAPHX_ENABLE_PYTHON
"Enable python bindings"
ON
)
add_library
(
migraphx_py py_loader.cpp
)
migraphx_generate_export_header
(
migraphx_py
)
target_include_directories
(
migraphx_py PRIVATE include
)
target_link_libraries
(
migraphx_py PUBLIC migraphx
)
rocm_install_targets
(
TARGETS migraphx_py INCLUDE include
)
if
(
MIGRAPHX_ENABLE_PYTHON
)
include
(
PythonModules
)
include
(
PythonModules
)
foreach
(
PYTHON_VERSION
${
PYTHON_VERSIONS
}
)
foreach
(
PYTHON_VERSION
${
PYTHON_VERSIONS
}
)
py_add_module
(
migraphx_pybind_
${
PYTHON_VERSION
}
migraphx_py.cpp PYTHON_VERSION
${
PYTHON_VERSION
}
PYTHON_MODULE migraphx
)
target_link_libraries
(
migraphx_pybind_
${
PYTHON_VERSION
}
PRIVATE migraphx migraphx_tf migraphx_onnx migraphx_all_targets
)
rocm_install_targets
(
TARGETS migraphx_pybind_
${
PYTHON_VERSION
}
)
...
...
@@ -44,5 +42,4 @@ if(MIGRAPHX_ENABLE_PYTHON)
target_link_libraries
(
migraphx_py_
${
PYTHON_VERSION
}
PRIVATE pybind11::pybind11 python
${
PYTHON_VERSION
}
::runtime
)
rocm_install_targets
(
TARGETS migraphx_py_
${
PYTHON_VERSION
}
)
add_dependencies
(
migraphx_py migraphx_py_
${
PYTHON_VERSION
}
)
endforeach
()
endif
()
endforeach
()
src/py/migraphx_py.cpp
View file @
ac04f3cc
...
...
@@ -472,7 +472,8 @@ MIGRAPHX_PYBIND11_MODULE(migraphx, m)
map_dyn_input_dims
,
bool
skip_unknown_operators
,
bool
print_program_on_error
,
int64_t
max_loop_iterations
)
{
int64_t
max_loop_iterations
,
int64_t
limit_max_iterations
)
{
migraphx
::
onnx_options
options
;
options
.
default_dim_value
=
default_dim_value
;
options
.
default_dyn_dim_value
=
default_dyn_dim_value
;
...
...
@@ -481,6 +482,7 @@ MIGRAPHX_PYBIND11_MODULE(migraphx, m)
options
.
skip_unknown_operators
=
skip_unknown_operators
;
options
.
print_program_on_error
=
print_program_on_error
;
options
.
max_loop_iterations
=
max_loop_iterations
;
options
.
limit_max_iterations
=
limit_max_iterations
;
return
migraphx
::
parse_onnx
(
filename
,
options
);
},
"Parse onnx file"
,
...
...
@@ -492,7 +494,8 @@ MIGRAPHX_PYBIND11_MODULE(migraphx, m)
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
migraphx
::
shape
::
dynamic_dimension
>>
(),
py
::
arg
(
"skip_unknown_operators"
)
=
false
,
py
::
arg
(
"print_program_on_error"
)
=
false
,
py
::
arg
(
"max_loop_iterations"
)
=
10
);
py
::
arg
(
"max_loop_iterations"
)
=
10
,
py
::
arg
(
"limit_max_iterations"
)
=
std
::
numeric_limits
<
uint16_t
>::
max
());
m
.
def
(
"parse_onnx_buffer"
,
...
...
src/quantization.cpp
View file @
ac04f3cc
...
...
@@ -70,6 +70,10 @@ void quantize_int8(program& prog,
MIGRAPHX_THROW
(
"QUANTIZE_INT8: only support DOT and CONVOLUTION operation"
);
}
// Run optimize_module() before converting to int8 to const eval and fold in FP32 to
// avoid loss of precision.
run_passes
(
prog
,
{
optimize_module
{}});
std
::
shared_ptr
<
std
::
vector
<
std
::
pair
<
float
,
float
>>>
int8_quant_params
=
std
::
make_shared
<
std
::
vector
<
std
::
pair
<
float
,
float
>>>
();
std
::
shared_ptr
<
std
::
vector
<
float
>>
max_abs_vals
=
std
::
make_shared
<
std
::
vector
<
float
>>
();
...
...
@@ -143,11 +147,8 @@ void quantize_int8(program& prog,
run_passes
(
prog
,
{
quantize_int8_pass
{
ins_names
,
*
int8_quant_params
},
eliminate_common_subexpression
{},
dead_code_elimination
{},
simplify_reshapes
{},
dead_code_elimination
{},
simplify_qdq
{},
optimize_module
{},
dead_code_elimination
{}});
}
...
...
src/rewrite_pooling.cpp
View file @
ac04f3cc
...
...
@@ -44,8 +44,6 @@ void rewrite_pooling::apply(module& m) const
if
(
ins
->
inputs
().
empty
())
continue
;
auto
&&
s
=
ins
->
inputs
().
front
()
->
get_shape
();
if
(
not
s
.
standard
())
continue
;
auto
&&
op
=
any_cast
<
op
::
pooling
>
(
ins
->
get_operator
());
if
(
not
std
::
all_of
(
op
.
padding
.
begin
(),
op
.
padding
.
end
(),
[](
auto
i
)
{
return
i
==
0
;
}))
continue
;
...
...
@@ -54,27 +52,18 @@ void rewrite_pooling::apply(module& m) const
auto
lens
=
s
.
lens
();
if
(
not
std
::
equal
(
lens
.
begin
()
+
2
,
lens
.
end
(),
op
.
lengths
.
begin
(),
op
.
lengths
.
end
()))
continue
;
std
::
int64_t
n
=
s
.
lens
()[
0
];
std
::
int64_t
c
=
s
.
lens
()[
1
];
auto
reshape
=
m
.
insert_instruction
(
ins
,
make_op
(
"reshape"
,
{{
"dims"
,
{
n
*
c
,
-
1
}}}),
ins
->
inputs
().
front
());
instruction_ref
pooling
{};
std
::
vector
<
std
::
int64_t
>
axes
(
lens
.
size
()
-
2
);
std
::
iota
(
axes
.
begin
(),
axes
.
end
(),
2
);
// average pooling
if
(
op
.
mode
==
op
::
pooling_mode
::
average
)
{
pooling
=
m
.
insert
_instruction
(
ins
,
make_op
(
"reduce_mean"
,
{{
"axes"
,
{
1
}
}}),
reshape
);
m
.
replace
_instruction
(
ins
,
make_op
(
"reduce_mean"
,
{{
"axes"
,
axes
}}),
ins
->
inputs
()
);
}
// max pooling
else
{
pooling
=
m
.
insert
_instruction
(
ins
,
make_op
(
"reduce_max"
,
{{
"axes"
,
{
1
}
}}),
reshape
);
m
.
replace
_instruction
(
ins
,
make_op
(
"reduce_max"
,
{{
"axes"
,
axes
}}),
ins
->
inputs
()
);
}
std
::
vector
<
int64_t
>
rsp_lens
(
lens
.
size
(),
1
);
rsp_lens
[
0
]
=
n
;
rsp_lens
[
1
]
=
c
;
m
.
replace_instruction
(
ins
,
make_op
(
"reshape"
,
{{
"dims"
,
rsp_lens
}}),
pooling
);
}
}
...
...
src/rewrite_quantization.cpp
View file @
ac04f3cc
...
...
@@ -33,6 +33,8 @@
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_ENABLE_CK_WORKAROUNDS
);
void
apply_quantizelinear
(
module
&
m
,
instruction_ref
ins
)
{
assert
(
ins
->
name
()
==
"quantizelinear"
);
...
...
@@ -45,7 +47,7 @@ void apply_quantizelinear(module& m, instruction_ref ins)
ins
,
make_op
(
"convert"
,
{{
"target_type"
,
y_scale
->
get_shape
().
type
()}}),
x
);
}
auto
div
=
m
.
insert_instruction
(
ins
,
make_op
(
"div"
),
x
,
y_scale
);
auto
add_zero_point
=
m
.
insert_instruction
(
ins
,
make_op
(
"
round
"
),
div
);
auto
add_zero_point
=
m
.
insert_instruction
(
ins
,
make_op
(
"
nearbyint
"
),
div
);
if
(
ins
->
inputs
().
size
()
==
3
)
{
...
...
@@ -63,8 +65,21 @@ void apply_quantizelinear(module& m, instruction_ref ins)
min_quant
=
qt
.
min
();
});
auto
s
=
add_zero_point
->
get_shape
();
auto
min_arg
=
m
.
add_literal
(
literal
{
shape
{
s
.
type
()},
{
min_quant
}});
auto
max_arg
=
m
.
add_literal
(
literal
{
shape
{
s
.
type
()},
{
max_quant
}});
instruction_ref
min_arg
;
instruction_ref
max_arg
;
if
(
enabled
(
MIGRAPHX_ENABLE_CK_WORKAROUNDS
{}))
{
std
::
vector
<
int
>
min_data
(
s
.
elements
(),
min_quant
);
std
::
vector
<
int
>
max_data
(
s
.
elements
(),
max_quant
);
min_arg
=
m
.
add_literal
(
literal
(
s
,
min_data
));
max_arg
=
m
.
add_literal
(
literal
(
s
,
max_data
));
}
else
{
min_arg
=
m
.
add_literal
(
literal
{
shape
{
s
.
type
()},
{
min_quant
}});
max_arg
=
m
.
add_literal
(
literal
{
shape
{
s
.
type
()},
{
max_quant
}});
}
auto
saturate
=
insert_common_op
(
m
,
ins
,
make_op
(
"clip"
),
{
add_zero_point
,
min_arg
,
max_arg
});
m
.
replace_instruction
(
ins
,
make_op
(
"convert"
,
{{
"target_type"
,
ins
->
get_shape
().
type
()}}),
saturate
);
...
...
src/shape.cpp
View file @
ac04f3cc
...
...
@@ -50,13 +50,14 @@ struct shape_impl
{
assert
(
t
!=
shape
::
tuple_type
);
}
shape_impl
(
shape
::
type_t
t
,
std
::
vector
<
std
::
size_t
>
l
)
:
m_type
(
t
),
m_lens
(
std
::
move
(
l
)),
m_standard
(
true
)
{
assert
(
t
!=
shape
::
tuple_type
);
this
->
calculate_strides
();
assert
(
m_lens
.
size
()
==
m_strides
.
size
());
}
shape_impl
(
shape
::
type_t
t
,
std
::
vector
<
std
::
size_t
>
l
,
std
::
vector
<
std
::
size_t
>
s
)
:
m_type
(
t
),
m_lens
(
std
::
move
(
l
)),
m_strides
(
std
::
move
(
s
))
{
...
...
@@ -151,6 +152,22 @@ struct shape_impl
m_lens
.
begin
(),
m_lens
.
end
(),
std
::
size_t
{
1
},
std
::
multiplies
<
std
::
size_t
>
());
}
std
::
size_t
get_index
(
size_t
i
)
const
{
std
::
size_t
result
=
0
;
std
::
size_t
s
=
1
;
for
(
auto
k
:
migraphx
::
reverse
(
migraphx
::
range
(
m_lens
.
size
())))
{
std
::
size_t
stride
=
m_strides
[
k
];
std
::
size_t
len
=
m_lens
[
k
];
std
::
size_t
idx
=
(
i
%
(
s
*
len
))
/
s
;
result
+=
stride
*
idx
;
s
*=
len
;
}
return
result
;
}
std
::
vector
<
std
::
size_t
>
min_lens
()
const
{
std
::
vector
<
std
::
size_t
>
ret
(
m_dyn_dims
.
size
());
...
...
@@ -213,6 +230,7 @@ std::string shape::name(shape::type_t t)
}
MIGRAPHX_THROW
(
"Invalid type"
);
}
std
::
string
shape
::
cpp_type
(
shape
::
type_t
t
)
{
switch
(
t
)
...
...
@@ -229,10 +247,12 @@ std::string shape::cpp_type(shape::type_t t)
shape
::
shape
()
:
impl
(
shape_impl
::
default_shape
())
{}
shape
::
shape
(
type_t
t
)
:
impl
(
std
::
make_shared
<
shape_impl
>
(
t
))
{}
shape
::
shape
(
type_t
t
,
std
::
vector
<
std
::
size_t
>
l
)
:
impl
(
std
::
make_shared
<
shape_impl
>
(
t
,
std
::
move
(
l
)))
{
}
shape
::
shape
(
type_t
t
,
std
::
vector
<
std
::
size_t
>
l
,
std
::
vector
<
std
::
size_t
>
s
)
:
impl
(
std
::
make_shared
<
shape_impl
>
(
t
,
std
::
move
(
l
),
std
::
move
(
s
)))
{
...
...
@@ -358,21 +378,8 @@ std::size_t shape::index(std::size_t i) const
assert
(
this
->
lens
().
size
()
==
this
->
strides
().
size
());
if
(
this
->
standard
())
return
i
;
else
{
std
::
size_t
s
=
1
;
std
::
size_t
result
=
0
;
for
(
std
::
size_t
j
=
0
;
j
<
this
->
lens
().
size
();
j
++
)
{
const
std
::
size_t
k
=
this
->
lens
().
size
()
-
j
-
1
;
const
std
::
size_t
stride
=
this
->
strides
()[
k
];
const
std
::
size_t
len
=
this
->
lens
()[
k
];
const
std
::
size_t
idx
=
(
i
%
(
s
*
len
))
/
s
;
result
+=
stride
*
idx
;
s
*=
len
;
}
return
result
;
}
return
impl
->
get_index
(
i
);
}
std
::
vector
<
std
::
size_t
>
shape
::
multi
(
std
::
size_t
idx
)
const
...
...
src/simplify_algebra.cpp
View file @
ac04f3cc
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-202
2
Advanced Micro Devices, Inc. All rights reserved.
* Copyright (c) 2015-202
3
Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
...
...
@@ -521,6 +521,27 @@ struct find_inner_broadcast
})
<
(
lens
.
size
()
-
1
);
}))
return
;
if
(
broadcasts
.
size
()
>
1
)
{
auto
bcast_strides
=
broadcasts
.
front
()
->
get_shape
().
strides
().
size
();
std
::
vector
<
size_t
>
common_axis
(
bcast_strides
,
0
);
// go through the strides of each broadcast,
// keep track of values that are equal to 0 in a dimension
for
(
auto
i
=
0
;
i
<
bcast_strides
;
i
++
)
{
for
(
const
auto
&
broadcast
:
broadcasts
)
{
if
(
broadcast
->
get_shape
().
strides
()[
i
]
==
0
)
common_axis
[
i
]
++
;
}
}
// if no common broadcast axis, transformation is not useful
if
(
std
::
find_if
(
common_axis
.
begin
(),
common_axis
.
end
(),
[](
auto
num_common
)
{
return
num_common
>
1
;
})
==
common_axis
.
end
())
return
;
}
std
::
vector
<
instruction_ref
>
inputs
;
std
::
transform
(
broadcasts
.
begin
(),
broadcasts
.
end
(),
...
...
@@ -1327,46 +1348,57 @@ struct find_split_reshape
{
auto
slc
=
r
.
instructions
[
"slice"
];
auto
rsp
=
r
.
instructions
[
"reshape"
];
auto
input
=
slc
->
inputs
().
front
();
// Only apply simplification when slices are on a single axis
auto
axes
=
any_cast
<
op
::
slice
>
(
slc
->
get_operator
()).
axes
;
if
(
axes
.
size
()
>
1
)
{
return
;
}
auto
split_outputs
=
get_splits
(
input
);
if
(
split_outputs
.
empty
())
{
return
;
}
// Only want to apply this optimization if each split output is followed by
// a contiguous op and a reshape
if
(
std
::
any_of
(
split_outputs
.
begin
(),
split_outputs
.
end
(),
[](
auto
i
)
{
if
(
i
->
outputs
().
size
()
==
1
)
// Find all the reshapes (similar to rsp) that can be simplified
std
::
vector
<
instruction_ref
>
conts
;
std
::
vector
<
instruction_ref
>
vec_rsp
;
// Iterate through slice and contiguous outputs to allow simplifications when
// slice is followed by multiple reshapes
for
(
auto
&
i
:
split_outputs
)
{
auto
cont
=
i
->
outputs
().
front
();
return
cont
->
outputs
().
size
()
!=
1
;
std
::
copy_if
(
i
->
outputs
().
begin
(),
i
->
outputs
().
end
(),
std
::
back_inserter
(
conts
),
[](
auto
j
)
{
return
j
->
name
()
==
"contiguous"
;
});
}
return
false
;
})
)
for
(
auto
&
i
:
conts
)
{
return
;
std
::
copy_if
(
i
->
outputs
().
begin
(),
i
->
outputs
().
end
(),
std
::
back_inserter
(
vec_rsp
),
[
&
](
auto
j
)
{
return
j
->
get_operator
()
==
rsp
->
get_operator
();
});
}
std
::
vector
<
instruction_ref
>
vec_rsp
(
split_outputs
.
size
());
std
::
transform
(
split_outputs
.
begin
(),
split_outputs
.
end
(),
vec_rsp
.
begin
(),
[](
auto
i
)
{
auto
cont
=
i
->
outputs
().
front
();
return
cont
->
outputs
().
front
();
});
// all outputs are reshape and of the same shape
auto
dims
=
any_cast
<
op
::
reshape
>
(
rsp
->
get_operator
()).
dims
;
if
(
not
same_ops
(
vec_rsp
))
// No simplification needed if there is only one slice -> cont -> reshape
if
(
vec_rsp
.
size
()
<=
1
)
{
return
;
}
// ensure reshape happens after the axis dimension
auto
axis
=
any_cast
<
op
::
slice
>
(
slc
->
get_operator
()).
axes
[
0
];
auto
axis
=
axes
[
0
];
auto
slc_lens
=
slc
->
get_shape
().
lens
();
auto
slc_dim_size
=
std
::
accumulate
(
slc_lens
.
begin
()
+
axis
,
slc_lens
.
end
(),
1
,
std
::
multiplies
<
std
::
size_t
>
());
auto
input_lens
=
input
->
get_shape
().
lens
();
auto
input_size
=
input
->
get_shape
().
elements
();
auto
slc_axis_len
=
input_lens
[
axis
];
// search the reshape output (standard shape) to decide which axis are
// in its output corresponding to the slc_dim_size
...
...
@@ -1393,16 +1425,67 @@ struct find_split_reshape
{
rsp_axis
=
std
::
distance
(
rsp_strides
.
begin
(),
ait
);
}
// calculate reshape output shape
std
::
vector
<
int64_t
>
vec_dims
(
vec_rsp
.
size
());
std
::
transform
(
vec_rsp
.
begin
(),
vec_rsp
.
end
(),
vec_dims
.
begin
(),
[
&
](
auto
is
)
{
return
is
->
get_shape
().
lens
()[
rsp_axis
];
});
// Calculate reshape output shape
// Need to find a reshape such that data represented by instructions in vec_rsp can be
// written as slices of this new reshape. This is done by holding all the dims constant in
// rsp_lens to compute the required dim for rsp_axis (axis that will be sliced)
// ex 1: Input Shape: {2, 12, 4}, Slice Axis: 1, Slices are: (0:4), (4:8), (8:12),
// Reshape Outputs: {2, 2, 2, 4}, {2, 2, 2, 4}, {2, 2, 2, 4}
// rsp_axis = 1, rsp_out_lens (initial) = {2, 1, 2, 4}, rsp_fixed_size = 2*1*2*4 = 16
// rsp_axis_len = 2*12*4 / 16 = 6
// rsp_out_lens (final) = {2, 6, 2, 4}
// ex 2: Input Shape: {2, 12, 4}, Slice Axis: 1, Slices are: (0:4), (4:8), (8:12),
// Reshape Outputs: {2, 16}, {2, 16}, {2, 16}
// rsp_axis = 1, rsp_out_lens (initial) = {2, 1}, rsp_fixed_size = 2*1 = 2
// rsp_axis_len = 2*12*4 / 2 = 48
// rsp_out_lens (final) = {2, 48}
std
::
vector
<
int64_t
>
rsp_out_lens
(
rsp_lens
.
begin
(),
rsp_lens
.
end
());
rsp_out_lens
[
rsp_axis
]
=
1
;
auto
rsp_fixed_size
=
std
::
accumulate
(
rsp_out_lens
.
begin
(),
rsp_out_lens
.
end
(),
1
,
std
::
multiplies
<
std
::
size_t
>
());
rsp_out_lens
[
rsp_axis
]
=
std
::
accumulate
(
vec_dims
.
begin
(),
vec_dims
.
end
(),
std
::
int64_t
{
0
});
// cannot create a valid reshape for simplification
if
(
input_size
%
rsp_fixed_size
!=
0
)
{
return
;
}
auto
rsp_axis_len
=
input_size
/
rsp_fixed_size
;
rsp_out_lens
[
rsp_axis
]
=
rsp_axis_len
;
// Calculate new slice start and end indices. Indices are scaled using the new reshape axis
// and the original slice axis. See examples:
// ex 1: Input Shape: {2, 12, 4}, Slice Axis: 1, Slices are: (0:4), (4:8), (8:12),
// Reshape Outputs: {2, 2, 2, 4}, {2, 2, 2, 4}, {2, 2, 2, 4}
// slc_axis_len = 12, rsp_axis_len = 6
// New Starts: {0*6/12, 4*6/12, 8*6/12} = {0, 2, 4}
// New Ends: {4*6/12, 8*6/12, 12*6/12} = {2, 4, 6}
// ex 2: Input Shape: {2, 12, 4}, Slice Axis: 1, Slices are: (0:4), (4:8), (8:12),
// Reshape Outputs: {2, 16}, {2, 16}, {2, 16}
// slc_axis_len = 12, rsp_axis_len = 48
// New Starts: {0*48/12, 4*48/12, 8*48/12} = { 0, 16, 32}
// New Ends: {4*48/12, 8*48/12, 12*48/12} = {16, 32, 48}
std
::
vector
<
int64_t
>
new_starts
(
vec_rsp
.
size
());
std
::
transform
(
vec_rsp
.
begin
(),
vec_rsp
.
end
(),
new_starts
.
begin
(),
[
&
](
auto
is
)
{
auto
cont
=
is
->
inputs
().
front
();
auto
og_slc
=
cont
->
inputs
().
front
();
return
any_cast
<
op
::
slice
>
(
og_slc
->
get_operator
()).
starts
[
0
]
*
rsp_axis_len
/
slc_axis_len
;
});
std
::
vector
<
int64_t
>
new_ends
(
vec_rsp
.
size
());
std
::
transform
(
vec_rsp
.
begin
(),
vec_rsp
.
end
(),
new_ends
.
begin
(),
[
&
](
auto
is
)
{
auto
cont
=
is
->
inputs
().
front
();
auto
og_slc
=
cont
->
inputs
().
front
();
return
any_cast
<
op
::
slice
>
(
og_slc
->
get_operator
()).
ends
[
0
]
*
rsp_axis_len
/
slc_axis_len
;
});
// insert the reshape instruction and add contiguous if needed
if
(
not
input
->
get_shape
().
standard
())
...
...
@@ -1413,16 +1496,14 @@ struct find_split_reshape
std
::
next
(
input
),
make_op
(
"reshape"
,
{{
"dims"
,
rsp_out_lens
}}),
input
);
// replace the original reshape with slice
int64_t
start
=
0
;
for
(
std
::
size_t
i
=
0
;
i
<
vec_rsp
.
size
();
++
i
)
{
m
.
replace_instruction
(
vec_rsp
[
i
],
make_op
(
"slice"
,
{{
"axes"
,
{
rsp_axis
}},
{
"starts"
,
{
start
}},
{
"ends"
,
{
start
+
vec_dim
s
[
i
]}}}),
{{
"axes"
,
{
rsp_axis
}},
{
"starts"
,
{
new_
start
s
[
i
]
}},
{
"ends"
,
{
new_end
s
[
i
]}}}),
rsp_ins
);
start
+=
vec_dims
[
i
];
}
}
};
...
...
@@ -1446,10 +1527,13 @@ struct find_split_transpose
{
return
;
}
if
(
std
::
any_of
(
split_outputs
.
begin
(),
split_outputs
.
end
(),
[](
auto
i
)
{
return
i
->
outputs
().
size
()
!=
1
;
}))
return
;
std
::
vector
<
instruction_ref
>
vec_trans
(
split_outputs
.
size
());
std
::
transform
(
split_outputs
.
begin
(),
split_outputs
.
end
(),
vec_trans
.
begin
(),
[](
auto
i
)
{
assert
(
i
->
outputs
().
size
()
==
1
);
return
i
->
outputs
().
front
();
});
...
...
src/simplify_dyn_ops.cpp
0 → 100644
View file @
ac04f3cc
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/simplify_dyn_ops.hpp>
#include <migraphx/matcher.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/literal.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
/**
* Convert 2 input static shape broadcast/multibroadcast into 1 input version.
* Some compiler passes (ex. simplify_algebra) only support the 1 input versions
* of the broadcasting operators.
*/
struct
find_static_2in_broadcasts
{
auto
matcher
()
const
{
return
match
::
broadcast
(
match
::
nargs
(
2
),
match
::
arg
(
0
)(
match
::
static_shape
()),
match
::
arg
(
1
)(
match
::
static_shape
()));
}
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
mr
)
const
{
auto
ins
=
mr
.
result
;
auto
out_lens
=
ins
->
get_shape
().
lens
();
auto
broadcast_op
=
ins
->
get_operator
();
if
(
broadcast_op
.
name
()
==
"broadcast"
)
{
broadcast_op
.
from_value
({{
"out_lens"
,
out_lens
}});
}
else
{
broadcast_op
.
from_value
({{
"out_lens"
,
out_lens
},
{
"out_dyn_dims"
,
{}}});
}
m
.
replace_instruction
(
ins
,
broadcast_op
,
ins
->
inputs
().
at
(
0
));
}
};
/**
* Simplify slice with variable `starts` and `ends` to the constant version if
* the `input_starts` and `input_ends` inputs are constant.
*/
struct
find_const_3in_slice
{
auto
matcher
()
const
{
return
match
::
name
(
"slice"
)(
match
::
nargs
(
3
),
match
::
arg
(
1
)(
match
::
is_constant
()),
match
::
arg
(
2
)(
match
::
is_constant
()));
}
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
mr
)
const
{
auto
ins
=
mr
.
result
;
auto
inputs
=
ins
->
inputs
();
argument
starts_arg
=
inputs
.
at
(
1
)
->
eval
();
argument
ends_arg
=
inputs
.
at
(
2
)
->
eval
();
if
(
not
starts_arg
.
empty
()
and
not
ends_arg
.
empty
())
{
std
::
vector
<
int64_t
>
starts_vec
;
std
::
vector
<
int64_t
>
ends_vec
;
starts_arg
.
visit
([
&
](
auto
output
)
{
starts_vec
.
assign
(
output
.
begin
(),
output
.
end
());
});
ends_arg
.
visit
([
&
](
auto
output
)
{
ends_vec
.
assign
(
output
.
begin
(),
output
.
end
());
});
auto
slice_val
=
ins
->
get_operator
().
to_value
();
auto
axes_vec
=
slice_val
.
at
(
"axes"
).
to_vector
<
int64_t
>
();
m
.
replace_instruction
(
ins
,
make_op
(
"slice"
,
{{
"starts"
,
starts_vec
},
{
"ends"
,
ends_vec
},
{
"axes"
,
axes_vec
}}),
inputs
.
at
(
0
));
}
}
};
/**
* Simplify slice with variable `starts`, `ends`, and `input_axes` to the constant version if
* the `input_starts`, `input_ends`, and `input_axes` inputs are constant.
*/
struct
find_const_4in_slice
{
auto
matcher
()
const
{
return
match
::
name
(
"slice"
)(
match
::
nargs
(
4
),
match
::
arg
(
1
)(
match
::
is_constant
()),
match
::
arg
(
2
)(
match
::
is_constant
()),
match
::
arg
(
3
)(
match
::
is_constant
()));
}
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
mr
)
const
{
auto
ins
=
mr
.
result
;
auto
inputs
=
ins
->
inputs
();
argument
starts_arg
=
inputs
.
at
(
1
)
->
eval
();
argument
ends_arg
=
inputs
.
at
(
2
)
->
eval
();
argument
axes_arg
=
inputs
.
at
(
3
)
->
eval
();
if
(
not
starts_arg
.
empty
()
and
not
ends_arg
.
empty
()
and
not
axes_arg
.
empty
())
{
std
::
vector
<
int64_t
>
starts_vec
;
std
::
vector
<
int64_t
>
ends_vec
;
std
::
vector
<
int64_t
>
axes_vec
;
starts_arg
.
visit
([
&
](
auto
output
)
{
starts_vec
.
assign
(
output
.
begin
(),
output
.
end
());
});
ends_arg
.
visit
([
&
](
auto
output
)
{
ends_vec
.
assign
(
output
.
begin
(),
output
.
end
());
});
axes_arg
.
visit
([
&
](
auto
output
)
{
axes_vec
.
assign
(
output
.
begin
(),
output
.
end
());
});
m
.
replace_instruction
(
ins
,
make_op
(
"slice"
,
{{
"starts"
,
starts_vec
},
{
"ends"
,
ends_vec
},
{
"axes"
,
axes_vec
}}),
inputs
.
at
(
0
));
}
}
};
/**
* Simplify dimensions_of to a literal when the input arugment has a static shape
* or the dynamic dimensions from `start` to `end` are fixed.
*/
struct
find_static_dimensions_of
{
auto
matcher
()
const
{
return
match
::
name
(
"dimensions_of"
)();
}
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
mr
)
const
{
auto
ins
=
mr
.
result
;
auto
input
=
ins
->
inputs
().
at
(
0
);
auto
dimensions_of_value
=
ins
->
get_operator
().
to_value
();
auto
start
=
dimensions_of_value
.
at
(
"start"
).
to
<
std
::
size_t
>
();
auto
end
=
dimensions_of_value
.
at
(
"end"
).
to
<
std
::
size_t
>
();
if
(
input
->
get_shape
().
dynamic
())
{
// check if dynamic dimensions from start to end are fixed
auto
dds
=
input
->
get_shape
().
dyn_dims
();
if
(
std
::
any_of
(
dds
.
begin
()
+
start
,
dds
.
begin
()
+
end
,
[](
auto
dd
)
{
return
not
dd
.
is_fixed
();
}))
{
return
;
}
}
std
::
size_t
output_ndim
=
end
-
start
;
std
::
vector
<
int64_t
>
vec_shape
(
output_ndim
);
migraphx
::
shape
s
(
migraphx
::
shape
::
int64_type
,
{
output_ndim
});
std
::
vector
<
std
::
size_t
>
input_lens
=
input
->
get_shape
().
to_static
(
1
).
lens
();
std
::
transform
(
input_lens
.
begin
()
+
start
,
input_lens
.
begin
()
+
end
,
vec_shape
.
begin
(),
[](
auto
i
)
{
return
int64_t
(
i
);
});
migraphx
::
shape
output_shape
{
migraphx
::
shape
::
int64_type
,
{
end
-
start
}};
auto
lit_ins
=
m
.
add_literal
(
migraphx
::
literal
{
output_shape
,
vec_shape
});
m
.
replace_instruction
(
ins
,
lit_ins
);
}
};
void
simplify_dyn_ops
::
apply
(
module
&
m
)
const
{
match
::
find_matches
(
m
,
find_static_2in_broadcasts
{},
find_static_dimensions_of
{},
find_const_3in_slice
{},
find_const_4in_slice
{});
}
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/simplify_reshapes.cpp
View file @
ac04f3cc
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-202
2
Advanced Micro Devices, Inc. All rights reserved.
* Copyright (c) 2015-202
3
Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
...
...
@@ -122,6 +122,11 @@ struct find_nop_reshapes
reshapes
.
insert
(
"pad"
);
reshapes
.
insert
(
"slice"
);
reshapes
.
insert
(
"transpose"
);
reshapes
.
insert
(
"reduce_mean"
);
reshapes
.
insert
(
"reduce_max"
);
reshapes
.
insert
(
"reduce_min"
);
reshapes
.
insert
(
"reduce_sum"
);
reshapes
.
insert
(
"reduce_prod"
);
return
match
::
name
(
reshapes
)(
match
::
same_shape
(
match
::
arg
(
0
)));
}
...
...
@@ -627,6 +632,46 @@ struct find_transpose_contiguous_reshaper_unary
}
};
// simplifies broadcast->transpose to transpose->broadcast
// in the case of a scalar, simply rewrite to broadcast
// this can allow for further optimizations with find_inner_broadcast() in simplify_algebra.cpp
struct
find_broadcast_transpose
{
auto
matcher
()
const
{
return
match
::
name
(
"transpose"
)(
match
::
arg
(
0
)(
match
::
name
(
"multibroadcast"
).
bind
(
"bcast_ins"
)));
}
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
r
)
const
{
auto
transpose
=
r
.
result
;
auto
transpose_lens
=
transpose
->
get_shape
().
lens
();
auto
bcast_ins
=
r
.
instructions
[
"bcast_ins"
];
auto
input
=
bcast_ins
->
inputs
().
front
();
// scalar transformation does not need extra transpose
if
(
not
input
->
get_shape
().
scalar
())
{
// find common shape
auto
in_lens
=
input
->
get_shape
().
lens
();
int
lens_diff
=
transpose_lens
.
size
()
-
in_lens
.
size
();
// insert unsqueeze if input lens < transpose lens
if
(
lens_diff
>
0
)
{
std
::
vector
<
size_t
>
unsqueeze_axes
(
lens_diff
);
std
::
iota
(
unsqueeze_axes
.
begin
(),
unsqueeze_axes
.
end
(),
0
);
input
=
m
.
insert_instruction
(
bcast_ins
,
make_op
(
"unsqueeze"
,
{{
"axes"
,
unsqueeze_axes
}}),
input
);
}
// apply transpose before the multibroadcast
input
=
m
.
insert_instruction
(
bcast_ins
,
transpose
->
get_operator
(),
input
);
}
auto
new_mbcast
=
m
.
insert_instruction
(
bcast_ins
,
make_op
(
"multibroadcast"
,
{{
"out_lens"
,
transpose_lens
}}),
input
);
m
.
replace_instruction
(
transpose
,
new_mbcast
);
}
};
struct
find_slice_transpose
{
auto
matcher
()
const
...
...
@@ -784,7 +829,7 @@ struct find_transpose_slice
void
simplify_reshapes
::
apply
(
module
&
m
)
const
{
for
(
int
i
=
0
;
i
<
4
;
i
++
)
for
(
int
i
=
0
;
i
<
depth
;
i
++
)
{
match
::
find_matches
(
m
,
find_where_op
{},
...
...
@@ -799,6 +844,7 @@ void simplify_reshapes::apply(module& m) const
find_nested_slice
{},
find_nested_concat
{},
find_transpose_slice
{},
find_broadcast_transpose
{},
find_slice_transpose
{},
find_transpose_contiguous_reshaper_unary
{});
dead_code_elimination
{}.
apply
(
m
);
...
...
src/split_single_dyn_dim.cpp
View file @
ac04f3cc
...
...
@@ -68,37 +68,6 @@ has_one_dyn_dim(const std::unordered_map<std::string, shape>& param_shapes)
dds_it
->
max
};
}
namespace
{
struct
find_static_2in_broadcasts
{
// Convert 2 input static shape broadcast/multibroadcast into 1 input version.
// Some compiler passes (ex. simplify_algebra) only support the 1 input versions
// of the broadcasting operators.
auto
matcher
()
const
{
return
match
::
broadcast
(
match
::
nargs
(
2
),
match
::
arg
(
0
)(
match
::
static_shape
()),
match
::
arg
(
1
)(
match
::
static_shape
()));
}
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
mr
)
const
{
auto
ins
=
mr
.
result
;
auto
out_lens
=
ins
->
get_shape
().
lens
();
auto
broadcast_op
=
ins
->
get_operator
();
if
(
broadcast_op
.
name
()
==
"broadcast"
)
{
broadcast_op
.
from_value
({{
"out_lens"
,
out_lens
}});
}
else
{
broadcast_op
.
from_value
({{
"out_lens"
,
out_lens
},
{
"out_dyn_dims"
,
{}}});
}
m
.
replace_instruction
(
ins
,
broadcast_op
,
ins
->
inputs
().
at
(
0
));
}
};
}
// namespace
/**
* Makes all the shapes in the dynamic_dimension range. Probably won't work for `if`
* and `loop` instructions, depending on how the submodules for those
...
...
@@ -135,7 +104,6 @@ void split_single_dyn_dim::apply(module_pass_manager& mpm) const
dd_check
->
dyn_param_str
,
migraphx
::
shape
{
dyn_param_shape
.
type
(),
static_lens
});
auto
outputs
=
submod
->
add_instructions
(
mm
,
map_ins
);
submod
->
add_return
({
outputs
});
match
::
find_matches
(
*
submod
,
find_static_2in_broadcasts
{});
submodules
.
push_back
(
submod
);
}
// redirect to select_module operator and return
...
...
src/targets/cpu/include/migraphx/cpu/dnnl.hpp
View file @
ac04f3cc
...
...
@@ -91,6 +91,19 @@ struct post_op : reflect_equality<post_op>, reflect_stream<post_op>
}
};
template
<
class
F
>
struct
execute_wrapper
{
F
f
;
argument
operator
()(
context
&
,
const
std
::
vector
<
argument
>&
args
)
const
{
return
f
(
args
);
}
};
template
<
class
F
>
execute_wrapper
<
F
>
make_execute_wrapper
(
F
f
)
{
return
{
std
::
move
(
f
)};
}
template
<
class
Derived
,
class
Primitive
>
struct
dnnl_op
:
auto_register_op
<
Derived
>
{
...
...
@@ -308,7 +321,7 @@ struct dnnl_op : auto_register_op<Derived>
#ifndef NDEBUG
auto
prim_attr
=
get_primitive_attr
(
md
);
#endif
execute
=
[
=
](
context
&
,
const
std
::
vector
<
argument
>&
args
)
{
execute
=
make_execute_wrapper
([
=
](
const
std
::
vector
<
argument
>&
args
)
{
#ifndef NDEBUG
// Check that the memory descriptors have not changed
auto
debug_args
=
args
;
...
...
@@ -379,7 +392,7 @@ struct dnnl_op : auto_register_op<Derived>
m
[
arg_lookup
[
i
]]
=
to_dnnl_memory
(
md
.
at
(
arg_lookup
[
i
]),
args
[
i
]);
prim
.
execute
(
get_dnnl_context
().
stream
,
m
);
return
args
.
back
();
};
}
)
;
}
std
::
vector
<
shape
>
trim_post_op_inputs
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
...
...
src/targets/cpu/include/migraphx/cpu/fuse_ops.hpp
View file @
ac04f3cc
...
...
@@ -24,7 +24,7 @@
#ifndef MIGRAPHX_GUARD_CPU_FUSE_OPS_HPP
#define MIGRAPHX_GUARD_CPU_FUSE_OPS_HPP
#include <migraphx/c
onfig
.hpp>
#include <migraphx/c
pu/context
.hpp>
#include <string>
namespace
migraphx
{
...
...
@@ -34,9 +34,7 @@ struct module;
namespace
cpu
{
struct
context
;
struct
fuse_ops
struct
MIGRAPHX_CPU_EXPORT
fuse_ops
{
context
*
ctx
=
nullptr
;
std
::
string
name
()
const
{
return
"cpu::fuse_ops"
;
}
...
...
src/targets/cpu/include/migraphx/cpu/pointwise.hpp
View file @
ac04f3cc
...
...
@@ -24,6 +24,7 @@
#ifndef MIGRAPHX_GUARD_AMDMIGRAPHX_CPU_POINTWISE_HPP
#define MIGRAPHX_GUARD_AMDMIGRAPHX_CPU_POINTWISE_HPP
#include <array>
#include <migraphx/config.hpp>
#include <migraphx/context.hpp>
#include <migraphx/check_shapes.hpp>
...
...
Prev
1
…
4
5
6
7
8
9
10
11
12
…
27
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment