Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
5a14c0bf
Commit
5a14c0bf
authored
Oct 19, 2022
by
umangyadav
Browse files
Merge branch 'develop' into workspace_size
parents
cb01e280
5fa42993
Changes
319
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
367 additions
and
577 deletions
+367
-577
src/opt/memory_coloring_impl.cpp
src/opt/memory_coloring_impl.cpp
+7
-7
src/pad_calc.cpp
src/pad_calc.cpp
+33
-8
src/program.cpp
src/program.cpp
+53
-40
src/py/migraphx_py.cpp
src/py/migraphx_py.cpp
+23
-5
src/replace_allocate.cpp
src/replace_allocate.cpp
+1
-1
src/rewrite_batchnorm.cpp
src/rewrite_batchnorm.cpp
+0
-83
src/simplify_algebra.cpp
src/simplify_algebra.cpp
+138
-14
src/simplify_reshapes.cpp
src/simplify_reshapes.cpp
+39
-0
src/targets/cpu/CMakeLists.txt
src/targets/cpu/CMakeLists.txt
+2
-0
src/targets/cpu/fmod.cpp
src/targets/cpu/fmod.cpp
+6
-12
src/targets/cpu/lowering.cpp
src/targets/cpu/lowering.cpp
+2
-1
src/targets/cpu/mod.cpp
src/targets/cpu/mod.cpp
+6
-12
src/targets/cpu/target.cpp
src/targets/cpu/target.cpp
+0
-3
src/targets/gpu/CMakeLists.txt
src/targets/gpu/CMakeLists.txt
+20
-157
src/targets/gpu/batch_norm_inference.cpp
src/targets/gpu/batch_norm_inference.cpp
+0
-85
src/targets/gpu/clip.cpp
src/targets/gpu/clip.cpp
+0
-46
src/targets/gpu/compile_gen.cpp
src/targets/gpu/compile_gen.cpp
+32
-3
src/targets/gpu/compile_hip_code_object.cpp
src/targets/gpu/compile_hip_code_object.cpp
+5
-5
src/targets/gpu/concat.cpp
src/targets/gpu/concat.cpp
+0
-48
src/targets/gpu/convert.cpp
src/targets/gpu/convert.cpp
+0
-47
No files found.
src/opt/memory_coloring_impl.cpp
View file @
5a14c0bf
...
...
@@ -72,7 +72,7 @@ bool memory_coloring_impl::allocate(interval_ptr interval)
if
(
conflict_table
.
find
(
vn
)
!=
conflict_table
.
end
())
{
std
::
set
<
int
>&
vn_set
=
conflict_table
[
vn
];
const
std
::
set
<
int
>&
vn_set
=
conflict_table
[
vn
];
for
(
const
auto
&
iter
:
vn_set
)
{
live_range
*
range
=
live_ranges
[
iter
];
...
...
@@ -267,8 +267,8 @@ void memory_coloring_impl::verify()
{
for
(
int
i
=
0
;
i
<
num_of_lives
;
++
i
)
{
live_interval
&
interval
=
live_intervals
[
i
];
live_range
&
segment
=
interval
.
segment
;
const
live_interval
&
interval
=
live_intervals
[
i
];
const
live_range
&
segment
=
interval
.
segment
;
if
(
segment
.
begin
==
invalid_offset
)
{
...
...
@@ -284,7 +284,7 @@ void memory_coloring_impl::verify()
int
vn
=
segment
.
vn
;
if
(
conflict_table
.
find
(
vn
)
!=
conflict_table
.
end
())
{
std
::
set
<
int
>&
vn_set
=
conflict_table
[
vn
];
const
std
::
set
<
int
>&
vn_set
=
conflict_table
[
vn
];
for
(
const
auto
&
iter
:
vn_set
)
{
live_range
*
range
=
live_ranges
[
iter
];
...
...
@@ -319,8 +319,8 @@ void memory_coloring_impl::dump_intervals()
{
std
::
cout
<<
" segment:"
<<
i
;
std
::
cout
<<
" =>"
;
std
::
set
<
int
>&
table
=
conflict_table
[
i
];
for
(
auto
&
iter
:
table
)
const
std
::
set
<
int
>&
table
=
conflict_table
[
i
];
for
(
const
auto
&
iter
:
table
)
{
std
::
cout
<<
(
iter
)
<<
","
;
}
...
...
@@ -357,7 +357,7 @@ void live_interval::dump()
std
::
cout
<<
"id:"
<<
id
;
segment
.
dump
();
std
::
cout
<<
" uses:"
;
for
(
auto
&
iter
:
use_points
)
for
(
const
auto
&
iter
:
use_points
)
{
std
::
cout
<<
" "
<<
get_ins_enum
(
iter
)
<<
","
;
}
...
...
src/pad_calc.cpp
View file @
5a14c0bf
...
...
@@ -52,19 +52,21 @@ void calculate_padding(int64_t idx,
}
}
std
::
vector
<
std
::
size_t
>
calc_dyn_auto_pad
(
std
::
vector
<
std
::
size_t
>
tensor
_lens
,
std
::
vector
<
std
::
size_t
>
k
_lens
,
std
::
vector
<
std
::
size_t
>
strides
,
std
::
vector
<
std
::
size_t
>
dilations
,
std
::
vector
<
std
::
size_t
>
calc_dyn_auto_pad
(
const
std
::
vector
<
std
::
size_t
>
&
input
_lens
,
const
std
::
vector
<
std
::
size_t
>
&
wei
_lens
,
const
std
::
vector
<
std
::
size_t
>
&
strides
,
const
std
::
vector
<
std
::
size_t
>
&
dilations
,
bool
use_upper
)
{
std
::
vector
<
std
::
size_t
>
padding
;
padding
.
resize
(
2
*
k_lens
.
size
());
for
(
std
::
size_t
i
=
0
;
i
<
padding
.
size
()
/
2
;
i
++
)
assert
(
input_lens
.
size
()
>=
3
);
std
::
size_t
num_spatial_dims
=
input_lens
.
size
()
-
2
;
padding
.
resize
(
2
*
num_spatial_dims
);
for
(
std
::
size_t
i
=
0
;
i
<
num_spatial_dims
;
i
++
)
{
std
::
ptrdiff_t
input_dim
=
tensor
_lens
[
i
];
std
::
ptrdiff_t
input_dim
=
input
_lens
[
i
+
2
];
std
::
ptrdiff_t
stride
=
strides
[
i
];
std
::
ptrdiff_t
weight_dim
=
k
_lens
[
i
];
std
::
ptrdiff_t
weight_dim
=
wei
_lens
[
i
+
2
];
std
::
ptrdiff_t
dilation
=
dilations
[
i
];
std
::
ptrdiff_t
output_dim
=
(
input_dim
+
stride
-
1
)
/
stride
;
// round up result
std
::
ptrdiff_t
new_weight_dim
=
weight_dim
+
(
weight_dim
-
1
)
*
(
dilation
-
1
);
...
...
@@ -86,5 +88,28 @@ std::vector<std::size_t> calc_dyn_auto_pad(std::vector<std::size_t> tensor_lens,
return
padding
;
}
shape
compute_padded_shape
(
const
shape
&
input
,
const
shape
&
weights
,
const
std
::
vector
<
std
::
size_t
>&
padding
,
const
std
::
vector
<
std
::
size_t
>&
stride
,
const
std
::
vector
<
std
::
size_t
>&
dilation
)
{
const
size_t
num_spatial_dims
=
input
.
lens
().
size
()
-
2
;
std
::
vector
<
size_t
>
output_lens
{
input
.
lens
()[
0
],
weights
.
lens
()[
0
]};
// calculate the output shape of the convolution: ((W - K + 2P) / S) + 1
for
(
size_t
i
=
0
;
i
<
num_spatial_dims
;
++
i
)
{
auto
padding_factor
=
padding
[
i
]
+
padding
[
i
+
num_spatial_dims
];
output_lens
.
push_back
(
std
::
size_t
(
std
::
max
<
std
::
ptrdiff_t
>
(
1
,
(
input
.
lens
()[
i
+
2
]
-
(
1
+
dilation
[
i
]
*
(
weights
.
lens
()[
i
+
2
]
-
1
))
+
padding_factor
)
/
stride
[
i
]
+
1
)));
}
return
input
.
with_lens
(
output_lens
);
}
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/program.cpp
View file @
5a14c0bf
...
...
@@ -398,7 +398,7 @@ std::vector<argument> generic_eval(const program& p,
return
generic_eval
(
mm
,
ctx
,
params
,
{},
make_trace
);
}
std
::
vector
<
argument
>
program
::
eval
(
parameter_map
params
)
const
std
::
vector
<
argument
>
program
::
eval
(
parameter_map
params
,
execution_environment
exec_env
)
const
{
auto
&
ctx
=
this
->
impl
->
ctx
;
#ifndef NDEBUG
...
...
@@ -423,6 +423,12 @@ std::vector<argument> program::eval(parameter_map params) const
#endif
auto
trace_level
=
value_of
(
MIGRAPHX_TRACE_EVAL
{});
std
::
vector
<
argument
>
ret
;
if
(
exec_env
.
async
)
{
ctx
.
wait_for
(
exec_env
.
queue
);
}
if
(
trace_level
>
0
)
{
...
...
@@ -434,49 +440,56 @@ std::vector<argument> program::eval(parameter_map params) const
ins_out
[
x
]
=
ss
.
str
();
});
ret
urn
generic_eval
(
*
this
,
ctx
,
std
::
move
(
params
),
with_check_context
([
&
](
auto
&
ins
,
auto
f
,
auto
&&
check_context
)
{
ctx
.
finish
();
std
::
cout
<<
"Run instruction: "
<<
ins_out
.
at
(
ins
)
<<
std
::
endl
;
timer
t
{};
auto
result
=
check_context
(
f
);
double
t1
=
t
.
record
<
milliseconds
>
();
ctx
.
finish
();
double
t2
=
t
.
record
<
milliseconds
>
();
std
::
cout
<<
"Time: "
<<
t1
<<
"ms, "
<<
t2
<<
"ms"
<<
std
::
endl
;
if
(
trace_level
>
1
and
ins
->
name
().
front
()
!=
'@'
and
ins
->
name
()
!=
"load"
and
not
result
.
empty
())
{
target
tgt
=
make_target
(
this
->
impl
->
target_name
);
auto
buffer
=
tgt
.
copy_from
(
result
);
if
(
trace_level
==
2
)
{
std
::
cout
<<
"Output has "
<<
to_string_range
(
classify_argument
(
buffer
))
<<
std
::
endl
;
std
::
cout
<<
"Output: "
;
preview_argument
(
std
::
cout
,
buffer
);
std
::
cout
<<
std
::
endl
;
}
else
{
std
::
cout
<<
"Output: "
<<
buffer
<<
std
::
endl
;
}
}
return
result
;
}));
ret
=
generic_eval
(
*
this
,
ctx
,
std
::
move
(
params
),
with_check_context
([
&
](
auto
&
ins
,
auto
f
,
auto
&&
check_context
)
{
ctx
.
finish
();
std
::
cout
<<
"Run instruction: "
<<
ins_out
.
at
(
ins
)
<<
std
::
endl
;
timer
t
{};
auto
result
=
check_context
(
f
);
double
t1
=
t
.
record
<
milliseconds
>
();
ctx
.
finish
();
double
t2
=
t
.
record
<
milliseconds
>
();
std
::
cout
<<
"Time: "
<<
t1
<<
"ms, "
<<
t2
<<
"ms"
<<
std
::
endl
;
if
(
trace_level
>
1
and
ins
->
name
().
front
()
!=
'@'
and
ins
->
name
()
!=
"load"
and
not
result
.
empty
())
{
target
tgt
=
make_target
(
this
->
impl
->
target_name
);
auto
buffer
=
tgt
.
copy_from
(
result
);
if
(
trace_level
==
2
)
{
std
::
cout
<<
"Output has "
<<
to_string_range
(
classify_argument
(
buffer
))
<<
std
::
endl
;
std
::
cout
<<
"Output: "
;
preview_argument
(
std
::
cout
,
buffer
);
std
::
cout
<<
std
::
endl
;
}
else
{
std
::
cout
<<
"Output: "
<<
buffer
<<
std
::
endl
;
}
}
return
result
;
}));
}
else
{
ret
urn
generic_eval
(
*
this
,
ctx
,
std
::
move
(
params
),
with_check_context
([
&
](
auto
&
,
auto
f
,
auto
&&
check_context
)
{
return
check_context
(
f
);
}));
ret
=
generic_eval
(
*
this
,
ctx
,
std
::
move
(
params
),
with_check_context
([
&
](
auto
&
,
auto
f
,
auto
&&
check_context
)
{
return
check_context
(
f
);
}));
}
if
(
exec_env
.
async
)
{
ctx
.
finish_on
(
exec_env
.
queue
);
}
return
ret
;
}
const
int
program_file_version
=
5
;
...
...
src/py/migraphx_py.cpp
View file @
5a14c0bf
...
...
@@ -264,12 +264,13 @@ MIGRAPHX_PYBIND11_MODULE(migraphx, m)
py
::
class_
<
migraphx
::
argument
>
(
m
,
"argument"
,
py
::
buffer_protocol
())
.
def_buffer
([](
migraphx
::
argument
&
x
)
->
py
::
buffer_info
{
return
to_buffer_info
(
x
);
})
.
def
(
"__init__"
,
[](
migraphx
::
argument
&
x
,
py
::
buffer
b
)
{
py
::
buffer_info
info
=
b
.
request
();
new
(
&
x
)
migraphx
::
argument
(
to_shape
(
info
),
info
.
ptr
);
})
.
def
(
py
::
init
([](
py
::
buffer
b
)
{
py
::
buffer_info
info
=
b
.
request
();
return
migraphx
::
argument
(
to_shape
(
info
),
info
.
ptr
);
}))
.
def
(
"get_shape"
,
&
migraphx
::
argument
::
get_shape
)
.
def
(
"data_ptr"
,
[](
migraphx
::
argument
&
x
)
{
return
reinterpret_cast
<
std
::
uintptr_t
>
(
x
.
data
());
})
.
def
(
"tolist"
,
[](
migraphx
::
argument
&
x
)
{
py
::
list
l
{
x
.
get_shape
().
elements
()};
...
...
@@ -354,6 +355,23 @@ MIGRAPHX_PYBIND11_MODULE(migraphx, m)
}
return
p
.
eval
(
pm
);
})
.
def
(
"run_async"
,
[](
migraphx
::
program
&
p
,
py
::
dict
params
,
std
::
uintptr_t
stream
,
std
::
string
stream_name
)
{
migraphx
::
parameter_map
pm
;
for
(
auto
x
:
params
)
{
std
::
string
key
=
x
.
first
.
cast
<
std
::
string
>
();
py
::
buffer
b
=
x
.
second
.
cast
<
py
::
buffer
>
();
py
::
buffer_info
info
=
b
.
request
();
pm
[
key
]
=
migraphx
::
argument
(
to_shape
(
info
),
info
.
ptr
);
}
migraphx
::
execution_environment
exec_env
{
migraphx
::
any_ptr
(
reinterpret_cast
<
void
*>
(
stream
),
stream_name
),
true
};
return
p
.
eval
(
pm
,
exec_env
);
})
.
def
(
"sort"
,
&
migraphx
::
program
::
sort
)
.
def
(
"print"
,
[](
const
migraphx
::
program
&
p
)
{
std
::
cout
<<
p
<<
std
::
endl
;
})
.
def
(
"__eq__"
,
std
::
equal_to
<
migraphx
::
program
>
{})
...
...
src/replace_allocate.cpp
View file @
5a14c0bf
...
...
@@ -73,7 +73,7 @@ void insert_submod_allocations(instruction_ref ins, module& mod, const allocatio
name_shapes
.
insert
(
ps
.
begin
(),
ps
.
end
());
}
for
(
auto
&
pn
:
name_shapes
)
for
(
const
auto
&
pn
:
name_shapes
)
{
const
auto
&
s
=
pn
.
second
;
instruction_ref
output
{};
...
...
src/rewrite_batchnorm.cpp
deleted
100644 → 0
View file @
cb01e280
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/rewrite_batchnorm.hpp>
#include <migraphx/program.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/op/batch_norm_inference.hpp>
#include <migraphx/op/broadcast.hpp>
#include <migraphx/op/add.hpp>
#include <migraphx/op/mul.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/dfor.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
void
rewrite_batchnorm
::
apply
(
module
&
m
)
const
{
for
(
auto
ins
:
iterator_for
(
m
))
{
if
(
ins
->
name
()
!=
"batch_norm_inference"
)
continue
;
// Get scale, bias, mean, variance from inputs
auto
gamma
=
ins
->
inputs
()[
1
]
->
eval
();
auto
bias
=
ins
->
inputs
()[
2
]
->
eval
();
auto
mean
=
ins
->
inputs
()[
3
]
->
eval
();
auto
variance
=
ins
->
inputs
()[
4
]
->
eval
();
if
(
any_of
({
gamma
,
bias
,
mean
,
variance
},
[](
auto
arg
)
{
return
arg
.
empty
();
}))
continue
;
std
::
vector
<
std
::
size_t
>
lens
=
ins
->
inputs
()[
1
]
->
get_shape
().
lens
();
shape
s
{
ins
->
get_shape
().
type
(),
lens
};
// Get epsilon
auto
bn_op
=
any_cast
<
op
::
batch_norm_inference
>
(
ins
->
get_operator
());
auto
epsilon
=
bn_op
.
epsilon
;
argument
a
{
s
};
argument
b
{
s
};
visit_all
(
gamma
,
bias
,
mean
,
variance
,
a
,
b
)(
[
&
](
auto
gamma2
,
auto
bias2
,
auto
mean2
,
auto
variance2
,
auto
a2
,
auto
b2
)
{
dfor
(
a
.
get_shape
().
elements
())(
[
&
](
std
::
size_t
c
)
{
a2
[
c
]
=
gamma2
[
c
]
/
std
::
sqrt
(
variance2
[
c
]
+
epsilon
);
});
dfor
(
b
.
get_shape
().
elements
())([
&
](
std
::
size_t
c
)
{
b2
[
c
]
=
bias2
[
c
]
-
(
gamma2
[
c
]
*
mean2
[
c
]
/
std
::
sqrt
(
variance2
[
c
]
+
epsilon
));
});
});
auto
broadcast
=
op
::
broadcast
{
1
,
ins
->
get_shape
().
lens
()};
auto
a_ins
=
m
.
add_literal
({
a
.
get_shape
(),
a
.
data
()});
auto
a_broadcast
=
m
.
insert_instruction
(
ins
,
broadcast
,
a_ins
);
auto
mul
=
m
.
insert_instruction
(
ins
,
make_op
(
"mul"
),
ins
->
inputs
().
front
(),
a_broadcast
);
auto
b_ins
=
m
.
add_literal
({
b
.
get_shape
(),
b
.
data
()});
auto
b_broadcast
=
m
.
insert_instruction
(
ins
,
broadcast
,
b_ins
);
auto
add
=
m
.
insert_instruction
(
ins
,
make_op
(
"add"
),
mul
,
b_broadcast
);
m
.
replace_instruction
(
ins
,
add
);
}
}
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/simplify_algebra.cpp
View file @
5a14c0bf
...
...
@@ -57,12 +57,14 @@ auto conv_const_weights()
auto
reduction
()
{
return
match
::
name_contains
(
"reduce"
);
}
// conv(x, w) * a => conv(x, a * w)
struct
find_mul_conv
{
auto
matcher
()
const
{
return
match
::
name
(
"mul"
)(
match
::
either_arg
(
0
,
1
)(
conv_const_weights
().
bind
(
"conv"
),
match
::
name
(
"broadcast"
).
bind
(
"a"
)));
return
match
::
name
(
"mul"
)(
match
::
either_arg
(
0
,
1
)(
conv_const_weights
().
bind
(
"conv"
),
match
::
name
(
"broadcast"
,
"multibroadcast"
).
bind
(
"a"
)));
}
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
r
)
const
...
...
@@ -72,14 +74,35 @@ struct find_mul_conv
auto
a_ins
=
r
.
instructions
[
"a"
];
auto
w_ins
=
r
.
instructions
[
"w"
];
auto
broadcast_op
=
any_cast
<
op
::
broadcast
>
(
a_ins
->
get_operator
());
if
(
broadcast_op
.
axis
!=
1
)
const
auto
&
a_input_lens
=
a_ins
->
inputs
().
front
()
->
get_shape
().
lens
();
std
::
size_t
num_not_one_dims
=
std
::
count_if
(
a_input_lens
.
cbegin
(),
a_input_lens
.
cend
(),
[](
auto
dim
)
{
return
dim
!=
1
;
});
if
(
num_not_one_dims
>
1
)
return
;
// check broadcasted along channels
const
auto
&
a_lens
=
a_ins
->
get_shape
().
lens
();
const
auto
&
a_strides
=
a_ins
->
get_shape
().
strides
();
auto
is_broadcasted_axis
=
[](
auto
len
,
auto
stride
)
{
return
len
==
1
or
stride
==
0
;
};
if
(
a_strides
.
at
(
1
)
!=
1
)
return
;
if
(
not
is_broadcasted_axis
(
a_lens
.
front
(),
a_strides
.
front
()))
return
;
if
(
not
std
::
equal
(
a_lens
.
begin
()
+
2
,
a_lens
.
end
(),
a_strides
.
begin
()
+
2
,
a_strides
.
end
(),
is_broadcasted_axis
))
return
;
auto
sq
=
m
.
insert_instruction
(
ins
,
make_op
(
"squeeze"
),
a_ins
->
inputs
().
front
());
auto
new_a
=
m
.
insert_instruction
(
ins
,
make_op
(
"broadcast"
,
{{
"axis"
,
0
},
{
"out_lens"
,
w_ins
->
get_shape
().
lens
()}}),
a_ins
->
inputs
().
front
());
ins
,
make_op
(
"broadcast"
,
{{
"axis"
,
0
},
{
"out_lens"
,
w_ins
->
get_shape
().
lens
()}}),
sq
);
auto
new_mul
=
m
.
insert_instruction
(
ins
,
make_op
(
"mul"
),
new_a
,
w_ins
);
auto
new_conv
=
m
.
insert_instruction
(
ins
,
conv_ins
->
get_operator
(),
conv_ins
->
inputs
().
front
(),
new_mul
);
...
...
@@ -412,6 +435,24 @@ struct find_concat_op
}
};
void
move_instructions_back
(
module
&
m
,
instruction_ref
pos
,
std
::
vector
<
instruction_ref
>
inss
)
{
auto
start
=
range
(
m
.
begin
(),
pos
);
for
(
auto
ins
:
iterator_for
(
start
))
{
auto
it
=
std
::
find
(
inss
.
begin
(),
inss
.
end
(),
ins
);
if
(
it
!=
inss
.
end
())
inss
.
erase
(
it
);
}
for
(
auto
ins
:
inss
)
{
if
(
not
m
.
has_instruction
(
ins
))
continue
;
move_instructions_back
(
m
,
pos
,
ins
->
inputs
());
m
.
move_instruction
(
ins
,
pos
);
}
}
std
::
vector
<
instruction_ref
>
get_splits
(
instruction_ref
ins
)
{
std
::
vector
<
instruction_ref
>
result
;
...
...
@@ -587,8 +628,7 @@ struct find_splits
}))
return
;
for
(
auto
data
:
data_args
)
m
.
move_instructions
(
data
,
ins
);
move_instructions_back
(
m
,
ins
,
data_args
);
auto
slice_op
=
any_cast
<
op
::
slice
>
(
splits
.
front
()
->
get_operator
());
assert
(
not
slice_op
.
axes
.
empty
());
...
...
@@ -841,8 +881,7 @@ struct find_conv_dot_horiz_fusion
concat_axis
=
axis
;
}
for
(
auto
arg
:
args
)
m
.
move_instructions
(
arg
,
input
);
move_instructions_back
(
m
,
input
,
args
);
// TODO: Check if axes match
auto
concat
=
m
.
insert_instruction
(
input
,
make_op
(
"concat"
,
{{
"axis"
,
concat_axis
}}),
args
);
...
...
@@ -894,6 +933,73 @@ struct find_div_const
}
};
struct
find_unit_ops
{
auto
matcher
()
const
{
auto
mul_1
=
match
::
name
(
"mul"
)(
match
::
either_arg
(
0
,
1
)(
match
::
has_value
(
1.0
f
),
match
::
any
().
bind
(
"x"
)));
auto
div_1
=
match
::
name
(
"div"
)(
match
::
args
(
match
::
any
().
bind
(
"x"
),
match
::
has_value
(
1.0
f
)));
auto
add_0
=
match
::
name
(
"add"
)(
match
::
either_arg
(
0
,
1
)(
match
::
has_value
(
0.0
f
,
1e-12
),
match
::
any
().
bind
(
"x"
)));
auto
sub_0
=
match
::
name
(
"sub"
)(
match
::
args
(
match
::
any
().
bind
(
"x"
),
match
::
has_value
(
0.0
f
)));
return
match
::
any_of
(
mul_1
,
div_1
,
add_0
,
sub_0
);
}
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
r
)
const
{
auto
ins
=
r
.
result
;
auto
c_in
=
r
.
instructions
[
"x"
];
m
.
replace_instruction
(
ins
,
c_in
);
}
};
struct
find_neg_unit_ops
{
auto
matcher
()
const
{
auto
mul_neg_1
=
match
::
name
(
"mul"
)(
match
::
either_arg
(
0
,
1
)(
match
::
has_value
(
-
1.0
f
),
match
::
any
().
bind
(
"x"
)));
auto
div_neg_1
=
match
::
name
(
"div"
)(
match
::
args
(
match
::
any
().
bind
(
"x"
),
match
::
has_value
(
-
1.0
f
)));
auto
sub_0
=
match
::
name
(
"sub"
)(
match
::
args
(
match
::
has_value
(
0.0
f
),
match
::
any
().
bind
(
"x"
)));
return
match
::
any_of
(
mul_neg_1
,
div_neg_1
,
sub_0
);
}
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
r
)
const
{
auto
ins
=
r
.
result
;
auto
c_in
=
r
.
instructions
[
"x"
];
auto
neg
=
m
.
add_instruction
(
make_op
(
"neg"
),
c_in
);
m
.
replace_instruction
(
ins
,
neg
);
}
};
struct
find_zero_ops
{
auto
matcher
()
const
{
auto
mul_zero
=
match
::
name
(
"mul"
)(
match
::
either_arg
(
0
,
1
)(
match
::
has_value
(
0.0
f
).
bind
(
"x"
),
match
::
any
()));
auto
div_zero
=
match
::
name
(
"div"
)(
match
::
args
(
match
::
has_value
(
0.0
f
).
bind
(
"x"
),
match
::
any
()));
return
match
::
any_of
(
mul_zero
,
div_zero
);
}
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
r
)
const
{
auto
ins
=
r
.
result
;
auto
zero_ins
=
r
.
instructions
[
"x"
];
m
.
replace_instruction
(
ins
,
zero_ins
);
}
};
struct
find_sub_const
{
auto
matcher
()
const
...
...
@@ -985,20 +1091,35 @@ struct find_split_reshape
auto
rsp_lens
=
rsp
->
get_shape
().
lens
();
auto
rsp_strides
=
rsp
->
get_shape
().
strides
();
rsp_strides
.
insert
(
rsp_strides
.
begin
(),
rsp_strides
[
0
]
*
rsp_lens
[
0
]);
auto
ait
=
std
::
find
(
rsp_strides
.
begin
(),
rsp_strides
.
end
(),
slc_dim_size
);
auto
ait
=
std
::
find
(
rsp_strides
.
begin
(),
rsp_strides
.
end
(),
slc_dim_size
);
int
rsp_axis
=
-
1
;
if
(
ait
==
rsp_strides
.
end
())
{
return
;
}
int
rsp_axis
=
std
::
distance
(
rsp_strides
.
begin
(),
ait
);
else
if
(
ait
==
rsp_strides
.
end
()
-
1
)
{
// edge case
// slice_dim == 1, in that case it could match with last stride of 1.
// it should accumulate lengths from last dim in that case. discount 1 to avoid going
// out of bounds.
assert
(
slc_dim_size
==
1
);
rsp_axis
=
std
::
distance
(
rsp_strides
.
begin
(),
ait
)
-
1
;
}
else
{
rsp_axis
=
std
::
distance
(
rsp_strides
.
begin
(),
ait
);
}
// calculate reshape output shape
std
::
vector
<
int64_t
>
vec_dims
(
vec_rsp
.
size
());
std
::
transform
(
vec_rsp
.
begin
(),
vec_rsp
.
end
(),
vec_dims
.
begin
(),
[
&
](
auto
is
)
{
return
is
->
get_shape
().
lens
()[
rsp_axis
];
});
std
::
vector
<
int64_t
>
rsp_out_lens
(
rsp_lens
.
begin
(),
rsp_lens
.
end
());
rsp_out_lens
[
rsp_axis
]
=
std
::
accumulate
(
vec_dims
.
begin
(),
vec_dims
.
end
(),
std
::
int64_t
{
0
});
// insert the reshape instruction and add contiguous if needed
...
...
@@ -1095,6 +1216,9 @@ void simplify_algebra::apply(module& m) const
find_mul_conv
{},
find_mul_slice_conv
{},
find_mul_add
{},
find_unit_ops
{},
find_neg_unit_ops
{},
find_zero_ops
{},
find_dot_add
{},
find_div_const
{},
find_sub_const
{},
...
...
src/simplify_reshapes.cpp
View file @
5a14c0bf
...
...
@@ -271,6 +271,44 @@ struct find_nested_slice
}
};
struct
find_concat_multibroadcasts
{
auto
matcher
()
const
{
return
match
::
name
(
"concat"
)(
match
::
all_of
[
match
::
inputs
()](
match
::
name
(
"multibroadcast"
)));
}
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
mr
)
const
{
auto
ins
=
mr
.
result
;
auto
op
=
any_cast
<
op
::
concat
>
(
ins
->
get_operator
());
auto
out_lens
=
ins
->
get_shape
().
lens
();
auto
inputs
=
ins
->
inputs
();
auto
in_strides
=
inputs
.
front
()
->
get_shape
().
strides
();
// Only apply when concat axis is not a broadcasted dimension
if
(
std
::
any_of
(
inputs
.
begin
(),
inputs
.
end
(),
[
&
](
auto
i
)
{
return
i
->
get_shape
().
strides
()[
op
.
axis
]
==
0
;
}))
{
return
;
}
// Use inputs of multibroadcast ops as inputs to new concat op
std
::
transform
(
inputs
.
begin
(),
inputs
.
end
(),
inputs
.
begin
(),
[](
auto
i
)
{
return
i
->
inputs
().
front
();
});
// Reduce axis by number of leading broadcasted dimensions
if
(
inputs
.
front
()
->
get_shape
().
lens
().
size
()
<
out_lens
.
size
())
op
.
axis
-=
std
::
count
(
in_strides
.
begin
(),
in_strides
.
begin
()
+
op
.
axis
,
0
);
auto
concat
=
m
.
insert_instruction
(
ins
,
op
,
inputs
);
m
.
replace_instruction
(
ins
,
migraphx
::
make_op
(
"multibroadcast"
,
{{
"out_lens"
,
out_lens
}}),
concat
);
}
};
struct
find_concat_transpose
{
auto
matcher
()
const
...
...
@@ -764,6 +802,7 @@ void simplify_reshapes::apply(module& m) const
find_reshaper
{},
find_transpose
{},
find_concat_transpose
{},
find_concat_multibroadcasts
{},
find_nested_convert
{},
find_nested_slice
{},
find_nested_concat
{},
...
...
src/targets/cpu/CMakeLists.txt
View file @
5a14c0bf
...
...
@@ -35,6 +35,7 @@ add_library(migraphx_cpu
dnnl.cpp
eltwise.cpp
erf.cpp
fmod.cpp
fuse_ops.cpp
gather.cpp
gemm.cpp
...
...
@@ -42,6 +43,7 @@ add_library(migraphx_cpu
logsoftmax.cpp
lowering.cpp
lrn.cpp
mod.cpp
preallocate.cpp
pooling.cpp
reduction.cpp
...
...
src/targets/
gpu/include/migraphx/gpu/cos.h
pp
→
src/targets/
cpu/fmod.c
pp
View file @
5a14c0bf
...
...
@@ -21,22 +21,16 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef MIGRAPHX_GUARD_RTGLIB_COS_HPP
#define MIGRAPHX_GUARD_RTGLIB_COS_HPP
#include <migraphx/gpu/oper.hpp>
#include <migraphx/gpu/device/cos.hpp>
#include <migraphx/config.hpp>
#include <migraphx/cpu/pointwise.hpp>
#include <migraphx/op/fmod.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
g
pu
{
namespace
c
pu
{
struct
hip_cos
:
unary_device
<
hip_cos
,
device
::
cos
>
{
};
template
struct
cpu_binary
<
op
::
fmod
>;
}
// namespace
g
pu
}
// namespace
c
pu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
src/targets/cpu/lowering.cpp
View file @
5a14c0bf
...
...
@@ -26,7 +26,6 @@
#include <migraphx/instruction.hpp>
#include <migraphx/dfor.hpp>
#include <migraphx/op/identity.hpp>
#include <migraphx/op/batch_norm_inference.hpp>
#include <migraphx/op/convolution.hpp>
#include <migraphx/op/deconvolution.hpp>
#include <migraphx/op/quant_convolution.hpp>
...
...
@@ -43,6 +42,8 @@
#include <migraphx/op/argmax.hpp>
#include <migraphx/op/argmin.hpp>
#include <migraphx/op/rnn_var_sl_last_output.hpp>
#include <migraphx/op/mod.hpp>
#include <migraphx/op/fmod.hpp>
#include <migraphx/shape_for_each.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/par_dfor.hpp>
...
...
src/targets/
g
pu/
include/migraphx/gpu/exp.h
pp
→
src/targets/
c
pu/
mod.c
pp
View file @
5a14c0bf
...
...
@@ -21,22 +21,16 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef MIGRAPHX_GUARD_RTGLIB_EXP_HPP
#define MIGRAPHX_GUARD_RTGLIB_EXP_HPP
#include <migraphx/gpu/oper.hpp>
#include <migraphx/gpu/device/exp.hpp>
#include <migraphx/config.hpp>
#include <migraphx/cpu/pointwise.hpp>
#include <migraphx/op/mod.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
g
pu
{
namespace
c
pu
{
struct
hip_exp
:
unary_device
<
hip_exp
,
device
::
exp
>
{
};
template
struct
cpu_binary
<
op
::
mod
>;
}
// namespace
g
pu
}
// namespace
c
pu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
src/targets/cpu/target.cpp
View file @
5a14c0bf
...
...
@@ -37,7 +37,6 @@
#include <migraphx/propagate_constant.hpp>
#include <migraphx/register_target.hpp>
#include <migraphx/replace_allocate.hpp>
#include <migraphx/rewrite_batchnorm.hpp>
#include <migraphx/rewrite_pooling.hpp>
#include <migraphx/rewrite_quantization.hpp>
#include <migraphx/rewrite_rnn.hpp>
...
...
@@ -78,8 +77,6 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
eliminate_identity
{},
eliminate_pad
{},
dead_code_elimination
{},
rewrite_batchnorm
{},
dead_code_elimination
{},
rewrite_rnn
{},
dead_code_elimination
{},
eliminate_common_subexpression
{},
...
...
src/targets/gpu/CMakeLists.txt
100755 → 100644
View file @
5a14c0bf
...
...
@@ -39,81 +39,9 @@ file(GLOB KERNEL_FILES ${CONFIGURE_DEPENDS}
message
(
STATUS
"KERNEL_FILES:
${
KERNEL_FILES
}
"
)
add_embed_library
(
migraphx_kernels
${
KERNEL_FILES
}
)
add_library
(
migraphx_device
device/acos.cpp
device/acosh.cpp
device/add.cpp
device/add_clip.cpp
device/add_relu.cpp
device/add_sigmoid.cpp
device/add_tanh.cpp
device/argmax.cpp
device/argmin.cpp
device/asin.cpp
device/asinh.cpp
device/atan.cpp
device/atanh.cpp
device/ceil.cpp
device/clip.cpp
device/concat.cpp
device/contiguous.cpp
device/convert.cpp
device/cos.cpp
device/cosh.cpp
device/div.cpp
device/equal.cpp
device/erf.cpp
device/exp.cpp
device/fill.cpp
device/floor.cpp
device/gather.cpp
device/gelu.cpp
device/greater.cpp
device/int8_gemm_pack.cpp
device/layernorm.cpp
device/less.cpp
device/log.cpp
device/logical_and.cpp
device/logical_or.cpp
device/logical_xor.cpp
device/logsoftmax.cpp
device/max.cpp
device/min.cpp
device/mul.cpp
device/mul_add.cpp
device/mul_add_relu.cpp
device/multinomial.cpp
device/nonzero.cpp
device/pad.cpp
device/pow.cpp
device/prelu.cpp
device/prefix_scan_sum.cpp
device/recip.cpp
device/reduce_max.cpp
device/reduce_mean.cpp
device/reduce_min.cpp
device/reduce_sum.cpp
device/reduce_prod.cpp
device/relu.cpp
device/reverse.cpp
device/rnn_variable_seq_lens.cpp
device/round.cpp
device/rsqrt.cpp
device/scatter.cpp
device/sigmoid.cpp
device/sign.cpp
device/sin.cpp
device/sinh.cpp
device/softmax.cpp
device/sqdiff.cpp
device/sqrt.cpp
device/sub.cpp
device/tan.cpp
device/tanh.cpp
device/topk.cpp
device/unary_not.cpp
device/where.cpp
)
file
(
GLOB DEVICE_GPU_SRCS
${
CONFIGURE_DEPENDS
}
${
CMAKE_CURRENT_SOURCE_DIR
}
/device/*.cpp
)
add_library
(
migraphx_device
${
DEVICE_GPU_SRCS
}
)
add_library
(
compile_for_gpu INTERFACE
)
target_compile_options
(
compile_for_gpu INTERFACE -std=c++17 -fno-gpu-rdc -Wno-cuda-compat -Wno-unused-command-line-argument -Xclang -fallow-half-arguments-and-returns
)
target_link_libraries
(
compile_for_gpu INTERFACE hip::device -fno-gpu-rdc -Wno-invalid-command-line-argument -Wno-unused-command-line-argument -Wno-option-ignored
)
...
...
@@ -150,18 +78,12 @@ add_library(migraphx_gpu
allocation_model.cpp
argmax.cpp
argmin.cpp
batch_norm_inference.cpp
clip.cpp
code_object_op.cpp
compile_ops.cpp
compile_gen.cpp
compile_hip.cpp
compile_hip_code_object.cpp
compiler.cpp
concat.cpp
convert.cpp
convolution.cpp
deconvolution.cpp
device_name.cpp
elu.cpp
fuse_mlir.cpp
...
...
@@ -186,13 +108,11 @@ add_library(migraphx_gpu
pad.cpp
perfdb.cpp
pooling.cpp
quant_convolution.cpp
reverse.cpp
rnn_variable_seq_lens.cpp
rocblas.cpp
scatter.cpp
schedule_model.cpp
softmax.cpp
sync_device.cpp
target.cpp
topk.cpp
...
...
@@ -207,81 +127,27 @@ function(register_migraphx_gpu_ops PREFIX)
endforeach
()
endfunction
()
register_migraphx_gpu_ops
(
hip_
acosh
acos
add
argmax
argmin
asinh
asin
atanh
atan
ceil
clip
concat
convert
cosh
cos
div
equal
erf
exp
floor
gather
greater
less
log
logsoftmax
logical_and
logical_or
logical_xor
loop
max
min
mul
multinomial
nonzero
pad
pow
prelu
prefix_scan_sum
recip
reduce_max
reduce_mean
reduce_min
reduce_prod
reduce_sum
relu
reverse
round
rsqrt
scatter
sigmoid
sign
sinh
sin
softmax
sqdiff
sqrt
sub
tanh
tan
topk
unary_not
where
)
register_migraphx_gpu_ops
(
miopen_
abs
batch_norm_inference
contiguous
convolution
deconvolution
elu
int8_conv_pack
leaky_relu
lrn
pooling
quant_convolution
)
register_op
(
migraphx_gpu
HEADER migraphx/gpu/rnn_variable_seq_lens.hpp
...
...
@@ -295,6 +161,9 @@ register_op(migraphx_gpu
HEADER migraphx/gpu/gemm.hpp
OPERATORS gpu::rocblas_gemm<op::dot> gpu::rocblas_gemm<op::quant_dot>
INCLUDES migraphx/gpu/context.hpp
)
register_op
(
migraphx_gpu HEADER migraphx/gpu/convolution.hpp
OPERATORS gpu::miopen_convolution<op::convolution> gpu::miopen_convolution<op::deconvolution> gpu::miopen_convolution<op::quant_convolution>
INCLUDES migraphx/gpu/context.hpp
)
rocm_set_soversion
(
migraphx_gpu
${
MIGRAPHX_SO_VERSION
}
)
rocm_clang_tidy_check
(
migraphx_gpu
)
...
...
@@ -322,26 +191,11 @@ message(STATUS "extractkernel: ${MIGRAPHX_EXTRACT_KERNEL}")
set
(
MIGRAPHX_ENABLE_MLIR OFF CACHE BOOL
""
)
if
(
MIGRAPHX_ENABLE_MLIR
)
find_library
(
MLIRAPI_LIBRARY MLIRMIOpen
PATH_SUFFIXES
# Workaournd broken mlir install
lib/ lib/lib
)
# REQUIRED is not supported before cmake 3.18
if
(
NOT MLIRAPI_LIBRARY
)
message
(
FATAL_ERROR
"libMLIRMIOpen not found"
)
else
()
message
(
STATUS
"Build with libMLIRMIOpen: "
${
MLIRAPI_LIBRARY
}
)
endif
()
find_path
(
MLIRAPI_HEADERS NAMES mlir-c/Dialect/MIGraphX.h
)
# Workaround MLIR broken installation
find_path
(
MLIRAPI_HEADERS2 NAMES mlir-c/Registration.h
PATH_SUFFIXES
include/external/include external/include
)
# Find package rocMLIR
find_package
(
rocMLIR 1.0.0 CONFIG REQUIRED
)
message
(
STATUS
"Build with rocMLIR::rockCompiler
${
rocMLIR_VERSION
}
"
)
target_compile_definitions
(
migraphx_gpu PRIVATE
"-DMIGRAPHX_MLIR"
)
target_include_directories
(
migraphx_gpu SYSTEM PRIVATE
${
MLIRAPI_HEADERS
}
${
MLIRAPI_HEADERS2
}
)
target_link_libraries
(
migraphx_gpu PUBLIC
${
MLIRAPI_LIBRARY
}
)
target_link_libraries
(
migraphx_gpu PUBLIC rocMLIR::rockCompiler
)
endif
()
set
(
MIGRAPHX_USE_HIPRTC OFF CACHE BOOL
""
)
...
...
@@ -380,9 +234,18 @@ endif()
include
(
CheckLibraryExists
)
get_target_property
(
MIOPEN_LOCATION MIOpen LOCATION
)
check_library_exists
(
MIOpen
"miopenHiddenSetConvolutionFindMode"
"
${
MIOPEN_LOCATION
}
"
HAS_FIND_MODE_API
)
check_library_exists
(
MIOpen
"miopenFindSolutions"
"
${
MIOPEN_LOCATION
}
"
HAS_FIND_2_API
)
if
(
HAS_FIND_2_API
)
target_compile_definitions
(
migraphx_gpu PUBLIC -DMIGRAPHX_HAS_FIND_2_API
)
message
(
STATUS
"MIGraphx is using Find-2.0 API of MIOpen"
)
else
()
message
(
STATUS
"MIOpen does not have Find-2.0 API"
)
endif
()
if
(
HAS_FIND_MODE_API
)
target_compile_definitions
(
migraphx_gpu PUBLIC -DMIGRAPHX_HAS_FIND_MODE_API
)
message
(
STATUS
"MI
Open has f
ind
m
ode
api
"
)
message
(
STATUS
"MI
Graphx is using F
ind
M
ode
API of MIOpen
"
)
else
()
message
(
STATUS
"MIOpen does not have find mode api"
)
endif
()
...
...
src/targets/gpu/batch_norm_inference.cpp
deleted
100644 → 0
View file @
cb01e280
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/gpu/batch_norm_inference.hpp>
#include <migraphx/gpu/context.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
shape
miopen_batch_norm_inference
::
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
check_shapes
{
inputs
,
*
this
}.
has
(
6
);
check_shapes
{
inputs
.
data
(),
inputs
.
data
()
+
1
,
*
this
}.
same_ndims
().
max_ndims
(
5
);
return
op
.
compute_shape
({
inputs
.
at
(
0
),
inputs
.
at
(
1
),
inputs
.
at
(
2
),
inputs
.
at
(
3
),
inputs
.
at
(
4
)});
}
inline
shape
reshape_to_2d
(
const
shape
&
input
)
{
auto
dims
=
input
.
lens
();
if
(
dims
.
size
()
>=
4
)
return
input
;
std
::
vector
<
size_t
>
new_dims
(
dims
.
begin
(),
dims
.
end
());
std
::
size_t
num
=
4
-
dims
.
size
();
new_dims
.
insert
(
new_dims
.
end
(),
num
,
1
);
return
{
input
.
type
(),
new_dims
};
}
argument
miopen_batch_norm_inference
::
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
{
shape
x_shape
=
args
[
0
].
get_shape
();
shape
y_shape
=
output_shape
;
shape
bn_shape
=
args
[
3
].
get_shape
();
auto
x_desc
=
make_tensor
(
reshape_to_2d
(
x_shape
));
auto
y_desc
=
make_tensor
(
reshape_to_2d
(
y_shape
));
auto
bn_desc
=
make_tensor
(
reshape_to_2d
(
bn_shape
));
float
alpha
=
1.0
;
float
beta
=
0.0
f
;
miopenBatchNormalizationForwardInference
(
ctx
.
get_stream
().
get_miopen
(),
miopenBatchNormMode_t
(
op
.
bn_mode
),
&
alpha
,
&
beta
,
x_desc
.
get
(),
args
[
0
].
implicit
(),
y_desc
.
get
(),
args
[
5
].
implicit
(),
bn_desc
.
get
(),
args
[
1
].
implicit
(),
args
[
2
].
implicit
(),
args
[
3
].
implicit
(),
args
[
4
].
implicit
(),
op
.
epsilon
);
return
args
[
5
];
}
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/clip.cpp
deleted
100644 → 0
View file @
cb01e280
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/gpu/clip.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/device/clip.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
shape
hip_clip
::
compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
{
inputs
.
pop_back
();
return
op
.
compute_shape
(
inputs
);
}
argument
hip_clip
::
compute
(
context
&
ctx
,
const
shape
&
,
const
std
::
vector
<
argument
>&
args
)
const
{
device
::
clip
(
ctx
.
get_stream
().
get
(),
args
.
back
(),
args
.
front
(),
args
.
at
(
1
),
args
.
at
(
2
));
return
args
.
back
();
}
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/compile_gen.cpp
View file @
5a14c0bf
...
...
@@ -22,6 +22,7 @@
* THE SOFTWARE.
*/
#include <migraphx/gpu/compile_gen.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/shape.hpp>
#include <migraphx/permutation.hpp>
#include <migraphx/stringutils.hpp>
...
...
@@ -48,12 +49,13 @@ static std::vector<std::size_t> vector_sizes(const std::vector<shape>& inputs)
return
{
4
,
2
};
}
vectorize
vectorize
::
elements
(
std
::
size_t
axis
,
const
std
::
vector
<
shape
>&
inputs
)
vectorize
vectorize
::
elements
(
std
::
size_t
axis
,
const
std
::
vector
<
shape
>&
inputs
,
const
std
::
vector
<
std
::
size_t
>&
sizes
)
{
if
(
std
::
all_of
(
inputs
.
begin
(),
inputs
.
end
(),
[
&
](
const
auto
&
s
)
{
return
s
.
lens
()[
axis
]
==
1
;
}))
return
{
1
,
axis
};
auto
sizes
=
vector_sizes
(
inputs
);
std
::
vector
<
std
::
size_t
>
max_vec_size
;
std
::
transform
(
inputs
.
begin
(),
inputs
.
end
(),
...
...
@@ -81,6 +83,33 @@ vectorize vectorize::elements(std::size_t axis, const std::vector<shape>& inputs
return
{
*
std
::
min_element
(
max_vec_size
.
begin
(),
max_vec_size
.
end
()),
axis
};
}
vectorize
vectorize
::
elements
(
context
&
ctx
,
std
::
size_t
axis
,
const
std
::
vector
<
shape
>&
inputs
)
{
if
(
inputs
.
empty
())
return
{
1
,
axis
};
std
::
size_t
n
=
std
::
max_element
(
inputs
.
begin
(),
inputs
.
end
(),
by
(
std
::
less
<>
{},
[](
const
auto
&
s
)
{
return
s
.
elements
();
}))
->
elements
();
std
::
size_t
max_global
=
ctx
.
get_current_device
().
get_cu_count
()
*
ctx
.
get_current_device
().
get_max_workitems_per_cu
();
std
::
size_t
over
=
n
/
max_global
;
bool
broadcasted
=
std
::
any_of
(
inputs
.
begin
(),
inputs
.
end
(),
[](
const
auto
&
s
)
{
return
s
.
broadcasted
();
});
std
::
vector
<
std
::
size_t
>
sizes
;
if
(
broadcasted
and
over
>
8
)
sizes
.
push_back
(
8
);
if
(
over
>
4
)
sizes
.
push_back
(
4
);
sizes
.
push_back
(
2
);
return
elements
(
axis
,
inputs
,
sizes
);
}
vectorize
vectorize
::
elements
(
std
::
size_t
axis
,
const
std
::
vector
<
shape
>&
inputs
)
{
return
elements
(
axis
,
inputs
,
vector_sizes
(
inputs
));
}
std
::
string
vectorize
::
str
()
const
{
return
"vectorize<"
+
to_string
(
size
)
+
", "
+
to_string
(
axis
)
+
">()"
;
...
...
@@ -102,7 +131,7 @@ preload preload::broadcasts(std::size_t axis, const std::vector<shape>& inputs)
std
::
size_t
bytes
=
0
;
for
(
auto
i
:
preloaded
)
{
auto
input
=
inputs
[
i
];
const
auto
&
input
=
inputs
[
i
];
bytes
+=
input
.
bytes
();
if
(
bytes
>
max_lds_bytes
)
break
;
...
...
src/targets/gpu/compile_hip_code_object.cpp
View file @
5a14c0bf
...
...
@@ -138,16 +138,16 @@ compute_global_for(context& ctx, std::size_t n, std::size_t over)
std
::
size_t
groups
=
(
n
+
local
-
1
)
/
local
;
std
::
size_t
max_blocks
=
max_global
/
local
;
std
::
size_t
nglobal
=
std
::
min
(
max_blocks
*
over
,
groups
)
*
local
;
return
nglobal
;
return
std
::
min
(
nglobal
,
n
)
;
};
}
std
::
size_t
compute_block_size
(
std
::
size_t
n
,
std
::
size_t
max_block_size
)
{
size_t
block_size
=
128
;
while
(
block_size
<=
max_block_size
and
block_size
<
=
n
)
block_size
*=
2
;
return
block_size
/
2
;
const
std
::
size_t
min_
block_size
=
64
;
const
std
::
size_t
base_
block_size
=
32
;
auto
block_size
=
(((
n
-
1
)
/
base_block_size
+
1
))
*
base_block_size
;
return
std
::
min
(
std
::
max
(
min_block_size
,
block_size
),
max_block_size
)
;
}
operation
compile_hip_code_object
(
const
std
::
string
&
content
,
hip_compile_options
options
)
...
...
src/targets/gpu/concat.cpp
deleted
100644 → 0
View file @
cb01e280
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/gpu/concat.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/device/concat.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
shape
hip_concat
::
compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
{
inputs
.
pop_back
();
return
op
.
normalize_compute_shape
(
inputs
);
}
argument
hip_concat
::
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
{
std
::
vector
<
std
::
size_t
>
offsets
=
op
.
compute_offsets
(
output_shape
,
args
);
return
device
::
concat
(
ctx
.
get_stream
().
get
(),
output_shape
,
args
,
offsets
);
}
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/convert.cpp
deleted
100644 → 0
View file @
cb01e280
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/gpu/convert.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/device/convert.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
shape
hip_convert
::
compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
{
inputs
.
pop_back
();
check_shapes
{
inputs
,
*
this
}.
packed
();
return
op
.
compute_shape
(
inputs
);
}
argument
hip_convert
::
compute
(
context
&
ctx
,
const
shape
&
,
const
std
::
vector
<
argument
>&
args
)
const
{
device
::
convert
(
ctx
.
get_stream
().
get
(),
args
[
1
],
args
[
0
]);
return
args
[
1
];
}
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
Prev
1
2
3
4
5
6
7
…
16
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment