Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
5a14c0bf
Commit
5a14c0bf
authored
Oct 19, 2022
by
umangyadav
Browse files
Merge branch 'develop' into workspace_size
parents
cb01e280
5fa42993
Changes
319
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
367 additions
and
577 deletions
+367
-577
src/opt/memory_coloring_impl.cpp
src/opt/memory_coloring_impl.cpp
+7
-7
src/pad_calc.cpp
src/pad_calc.cpp
+33
-8
src/program.cpp
src/program.cpp
+53
-40
src/py/migraphx_py.cpp
src/py/migraphx_py.cpp
+23
-5
src/replace_allocate.cpp
src/replace_allocate.cpp
+1
-1
src/rewrite_batchnorm.cpp
src/rewrite_batchnorm.cpp
+0
-83
src/simplify_algebra.cpp
src/simplify_algebra.cpp
+138
-14
src/simplify_reshapes.cpp
src/simplify_reshapes.cpp
+39
-0
src/targets/cpu/CMakeLists.txt
src/targets/cpu/CMakeLists.txt
+2
-0
src/targets/cpu/fmod.cpp
src/targets/cpu/fmod.cpp
+6
-12
src/targets/cpu/lowering.cpp
src/targets/cpu/lowering.cpp
+2
-1
src/targets/cpu/mod.cpp
src/targets/cpu/mod.cpp
+6
-12
src/targets/cpu/target.cpp
src/targets/cpu/target.cpp
+0
-3
src/targets/gpu/CMakeLists.txt
src/targets/gpu/CMakeLists.txt
+20
-157
src/targets/gpu/batch_norm_inference.cpp
src/targets/gpu/batch_norm_inference.cpp
+0
-85
src/targets/gpu/clip.cpp
src/targets/gpu/clip.cpp
+0
-46
src/targets/gpu/compile_gen.cpp
src/targets/gpu/compile_gen.cpp
+32
-3
src/targets/gpu/compile_hip_code_object.cpp
src/targets/gpu/compile_hip_code_object.cpp
+5
-5
src/targets/gpu/concat.cpp
src/targets/gpu/concat.cpp
+0
-48
src/targets/gpu/convert.cpp
src/targets/gpu/convert.cpp
+0
-47
No files found.
src/opt/memory_coloring_impl.cpp
View file @
5a14c0bf
...
@@ -72,7 +72,7 @@ bool memory_coloring_impl::allocate(interval_ptr interval)
...
@@ -72,7 +72,7 @@ bool memory_coloring_impl::allocate(interval_ptr interval)
if
(
conflict_table
.
find
(
vn
)
!=
conflict_table
.
end
())
if
(
conflict_table
.
find
(
vn
)
!=
conflict_table
.
end
())
{
{
std
::
set
<
int
>&
vn_set
=
conflict_table
[
vn
];
const
std
::
set
<
int
>&
vn_set
=
conflict_table
[
vn
];
for
(
const
auto
&
iter
:
vn_set
)
for
(
const
auto
&
iter
:
vn_set
)
{
{
live_range
*
range
=
live_ranges
[
iter
];
live_range
*
range
=
live_ranges
[
iter
];
...
@@ -267,8 +267,8 @@ void memory_coloring_impl::verify()
...
@@ -267,8 +267,8 @@ void memory_coloring_impl::verify()
{
{
for
(
int
i
=
0
;
i
<
num_of_lives
;
++
i
)
for
(
int
i
=
0
;
i
<
num_of_lives
;
++
i
)
{
{
live_interval
&
interval
=
live_intervals
[
i
];
const
live_interval
&
interval
=
live_intervals
[
i
];
live_range
&
segment
=
interval
.
segment
;
const
live_range
&
segment
=
interval
.
segment
;
if
(
segment
.
begin
==
invalid_offset
)
if
(
segment
.
begin
==
invalid_offset
)
{
{
...
@@ -284,7 +284,7 @@ void memory_coloring_impl::verify()
...
@@ -284,7 +284,7 @@ void memory_coloring_impl::verify()
int
vn
=
segment
.
vn
;
int
vn
=
segment
.
vn
;
if
(
conflict_table
.
find
(
vn
)
!=
conflict_table
.
end
())
if
(
conflict_table
.
find
(
vn
)
!=
conflict_table
.
end
())
{
{
std
::
set
<
int
>&
vn_set
=
conflict_table
[
vn
];
const
std
::
set
<
int
>&
vn_set
=
conflict_table
[
vn
];
for
(
const
auto
&
iter
:
vn_set
)
for
(
const
auto
&
iter
:
vn_set
)
{
{
live_range
*
range
=
live_ranges
[
iter
];
live_range
*
range
=
live_ranges
[
iter
];
...
@@ -319,8 +319,8 @@ void memory_coloring_impl::dump_intervals()
...
@@ -319,8 +319,8 @@ void memory_coloring_impl::dump_intervals()
{
{
std
::
cout
<<
" segment:"
<<
i
;
std
::
cout
<<
" segment:"
<<
i
;
std
::
cout
<<
" =>"
;
std
::
cout
<<
" =>"
;
std
::
set
<
int
>&
table
=
conflict_table
[
i
];
const
std
::
set
<
int
>&
table
=
conflict_table
[
i
];
for
(
auto
&
iter
:
table
)
for
(
const
auto
&
iter
:
table
)
{
{
std
::
cout
<<
(
iter
)
<<
","
;
std
::
cout
<<
(
iter
)
<<
","
;
}
}
...
@@ -357,7 +357,7 @@ void live_interval::dump()
...
@@ -357,7 +357,7 @@ void live_interval::dump()
std
::
cout
<<
"id:"
<<
id
;
std
::
cout
<<
"id:"
<<
id
;
segment
.
dump
();
segment
.
dump
();
std
::
cout
<<
" uses:"
;
std
::
cout
<<
" uses:"
;
for
(
auto
&
iter
:
use_points
)
for
(
const
auto
&
iter
:
use_points
)
{
{
std
::
cout
<<
" "
<<
get_ins_enum
(
iter
)
<<
","
;
std
::
cout
<<
" "
<<
get_ins_enum
(
iter
)
<<
","
;
}
}
...
...
src/pad_calc.cpp
View file @
5a14c0bf
...
@@ -52,19 +52,21 @@ void calculate_padding(int64_t idx,
...
@@ -52,19 +52,21 @@ void calculate_padding(int64_t idx,
}
}
}
}
std
::
vector
<
std
::
size_t
>
calc_dyn_auto_pad
(
std
::
vector
<
std
::
size_t
>
tensor
_lens
,
std
::
vector
<
std
::
size_t
>
calc_dyn_auto_pad
(
const
std
::
vector
<
std
::
size_t
>
&
input
_lens
,
std
::
vector
<
std
::
size_t
>
k
_lens
,
const
std
::
vector
<
std
::
size_t
>
&
wei
_lens
,
std
::
vector
<
std
::
size_t
>
strides
,
const
std
::
vector
<
std
::
size_t
>
&
strides
,
std
::
vector
<
std
::
size_t
>
dilations
,
const
std
::
vector
<
std
::
size_t
>
&
dilations
,
bool
use_upper
)
bool
use_upper
)
{
{
std
::
vector
<
std
::
size_t
>
padding
;
std
::
vector
<
std
::
size_t
>
padding
;
padding
.
resize
(
2
*
k_lens
.
size
());
assert
(
input_lens
.
size
()
>=
3
);
for
(
std
::
size_t
i
=
0
;
i
<
padding
.
size
()
/
2
;
i
++
)
std
::
size_t
num_spatial_dims
=
input_lens
.
size
()
-
2
;
padding
.
resize
(
2
*
num_spatial_dims
);
for
(
std
::
size_t
i
=
0
;
i
<
num_spatial_dims
;
i
++
)
{
{
std
::
ptrdiff_t
input_dim
=
tensor
_lens
[
i
];
std
::
ptrdiff_t
input_dim
=
input
_lens
[
i
+
2
];
std
::
ptrdiff_t
stride
=
strides
[
i
];
std
::
ptrdiff_t
stride
=
strides
[
i
];
std
::
ptrdiff_t
weight_dim
=
k
_lens
[
i
];
std
::
ptrdiff_t
weight_dim
=
wei
_lens
[
i
+
2
];
std
::
ptrdiff_t
dilation
=
dilations
[
i
];
std
::
ptrdiff_t
dilation
=
dilations
[
i
];
std
::
ptrdiff_t
output_dim
=
(
input_dim
+
stride
-
1
)
/
stride
;
// round up result
std
::
ptrdiff_t
output_dim
=
(
input_dim
+
stride
-
1
)
/
stride
;
// round up result
std
::
ptrdiff_t
new_weight_dim
=
weight_dim
+
(
weight_dim
-
1
)
*
(
dilation
-
1
);
std
::
ptrdiff_t
new_weight_dim
=
weight_dim
+
(
weight_dim
-
1
)
*
(
dilation
-
1
);
...
@@ -86,5 +88,28 @@ std::vector<std::size_t> calc_dyn_auto_pad(std::vector<std::size_t> tensor_lens,
...
@@ -86,5 +88,28 @@ std::vector<std::size_t> calc_dyn_auto_pad(std::vector<std::size_t> tensor_lens,
return
padding
;
return
padding
;
}
}
shape
compute_padded_shape
(
const
shape
&
input
,
const
shape
&
weights
,
const
std
::
vector
<
std
::
size_t
>&
padding
,
const
std
::
vector
<
std
::
size_t
>&
stride
,
const
std
::
vector
<
std
::
size_t
>&
dilation
)
{
const
size_t
num_spatial_dims
=
input
.
lens
().
size
()
-
2
;
std
::
vector
<
size_t
>
output_lens
{
input
.
lens
()[
0
],
weights
.
lens
()[
0
]};
// calculate the output shape of the convolution: ((W - K + 2P) / S) + 1
for
(
size_t
i
=
0
;
i
<
num_spatial_dims
;
++
i
)
{
auto
padding_factor
=
padding
[
i
]
+
padding
[
i
+
num_spatial_dims
];
output_lens
.
push_back
(
std
::
size_t
(
std
::
max
<
std
::
ptrdiff_t
>
(
1
,
(
input
.
lens
()[
i
+
2
]
-
(
1
+
dilation
[
i
]
*
(
weights
.
lens
()[
i
+
2
]
-
1
))
+
padding_factor
)
/
stride
[
i
]
+
1
)));
}
return
input
.
with_lens
(
output_lens
);
}
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
}
// namespace migraphx
src/program.cpp
View file @
5a14c0bf
...
@@ -398,7 +398,7 @@ std::vector<argument> generic_eval(const program& p,
...
@@ -398,7 +398,7 @@ std::vector<argument> generic_eval(const program& p,
return
generic_eval
(
mm
,
ctx
,
params
,
{},
make_trace
);
return
generic_eval
(
mm
,
ctx
,
params
,
{},
make_trace
);
}
}
std
::
vector
<
argument
>
program
::
eval
(
parameter_map
params
)
const
std
::
vector
<
argument
>
program
::
eval
(
parameter_map
params
,
execution_environment
exec_env
)
const
{
{
auto
&
ctx
=
this
->
impl
->
ctx
;
auto
&
ctx
=
this
->
impl
->
ctx
;
#ifndef NDEBUG
#ifndef NDEBUG
...
@@ -423,6 +423,12 @@ std::vector<argument> program::eval(parameter_map params) const
...
@@ -423,6 +423,12 @@ std::vector<argument> program::eval(parameter_map params) const
#endif
#endif
auto
trace_level
=
value_of
(
MIGRAPHX_TRACE_EVAL
{});
auto
trace_level
=
value_of
(
MIGRAPHX_TRACE_EVAL
{});
std
::
vector
<
argument
>
ret
;
if
(
exec_env
.
async
)
{
ctx
.
wait_for
(
exec_env
.
queue
);
}
if
(
trace_level
>
0
)
if
(
trace_level
>
0
)
{
{
...
@@ -434,7 +440,7 @@ std::vector<argument> program::eval(parameter_map params) const
...
@@ -434,7 +440,7 @@ std::vector<argument> program::eval(parameter_map params) const
ins_out
[
x
]
=
ss
.
str
();
ins_out
[
x
]
=
ss
.
str
();
});
});
ret
urn
generic_eval
(
*
this
,
ret
=
generic_eval
(
*
this
,
ctx
,
ctx
,
std
::
move
(
params
),
std
::
move
(
params
),
with_check_context
([
&
](
auto
&
ins
,
auto
f
,
auto
&&
check_context
)
{
with_check_context
([
&
](
auto
&
ins
,
auto
f
,
auto
&&
check_context
)
{
...
@@ -470,13 +476,20 @@ std::vector<argument> program::eval(parameter_map params) const
...
@@ -470,13 +476,20 @@ std::vector<argument> program::eval(parameter_map params) const
}
}
else
else
{
{
ret
urn
generic_eval
(
*
this
,
ret
=
generic_eval
(
*
this
,
ctx
,
ctx
,
std
::
move
(
params
),
std
::
move
(
params
),
with_check_context
([
&
](
auto
&
,
auto
f
,
auto
&&
check_context
)
{
with_check_context
([
&
](
auto
&
,
auto
f
,
auto
&&
check_context
)
{
return
check_context
(
f
);
return
check_context
(
f
);
}));
}));
}
}
if
(
exec_env
.
async
)
{
ctx
.
finish_on
(
exec_env
.
queue
);
}
return
ret
;
}
}
const
int
program_file_version
=
5
;
const
int
program_file_version
=
5
;
...
...
src/py/migraphx_py.cpp
View file @
5a14c0bf
...
@@ -264,12 +264,13 @@ MIGRAPHX_PYBIND11_MODULE(migraphx, m)
...
@@ -264,12 +264,13 @@ MIGRAPHX_PYBIND11_MODULE(migraphx, m)
py
::
class_
<
migraphx
::
argument
>
(
m
,
"argument"
,
py
::
buffer_protocol
())
py
::
class_
<
migraphx
::
argument
>
(
m
,
"argument"
,
py
::
buffer_protocol
())
.
def_buffer
([](
migraphx
::
argument
&
x
)
->
py
::
buffer_info
{
return
to_buffer_info
(
x
);
})
.
def_buffer
([](
migraphx
::
argument
&
x
)
->
py
::
buffer_info
{
return
to_buffer_info
(
x
);
})
.
def
(
"__init__"
,
.
def
(
py
::
init
([](
py
::
buffer
b
)
{
[](
migraphx
::
argument
&
x
,
py
::
buffer
b
)
{
py
::
buffer_info
info
=
b
.
request
();
py
::
buffer_info
info
=
b
.
request
();
new
(
&
x
)
migraphx
::
argument
(
to_shape
(
info
),
info
.
ptr
);
return
migraphx
::
argument
(
to_shape
(
info
),
info
.
ptr
);
})
})
)
.
def
(
"get_shape"
,
&
migraphx
::
argument
::
get_shape
)
.
def
(
"get_shape"
,
&
migraphx
::
argument
::
get_shape
)
.
def
(
"data_ptr"
,
[](
migraphx
::
argument
&
x
)
{
return
reinterpret_cast
<
std
::
uintptr_t
>
(
x
.
data
());
})
.
def
(
"tolist"
,
.
def
(
"tolist"
,
[](
migraphx
::
argument
&
x
)
{
[](
migraphx
::
argument
&
x
)
{
py
::
list
l
{
x
.
get_shape
().
elements
()};
py
::
list
l
{
x
.
get_shape
().
elements
()};
...
@@ -354,6 +355,23 @@ MIGRAPHX_PYBIND11_MODULE(migraphx, m)
...
@@ -354,6 +355,23 @@ MIGRAPHX_PYBIND11_MODULE(migraphx, m)
}
}
return
p
.
eval
(
pm
);
return
p
.
eval
(
pm
);
})
})
.
def
(
"run_async"
,
[](
migraphx
::
program
&
p
,
py
::
dict
params
,
std
::
uintptr_t
stream
,
std
::
string
stream_name
)
{
migraphx
::
parameter_map
pm
;
for
(
auto
x
:
params
)
{
std
::
string
key
=
x
.
first
.
cast
<
std
::
string
>
();
py
::
buffer
b
=
x
.
second
.
cast
<
py
::
buffer
>
();
py
::
buffer_info
info
=
b
.
request
();
pm
[
key
]
=
migraphx
::
argument
(
to_shape
(
info
),
info
.
ptr
);
}
migraphx
::
execution_environment
exec_env
{
migraphx
::
any_ptr
(
reinterpret_cast
<
void
*>
(
stream
),
stream_name
),
true
};
return
p
.
eval
(
pm
,
exec_env
);
})
.
def
(
"sort"
,
&
migraphx
::
program
::
sort
)
.
def
(
"sort"
,
&
migraphx
::
program
::
sort
)
.
def
(
"print"
,
[](
const
migraphx
::
program
&
p
)
{
std
::
cout
<<
p
<<
std
::
endl
;
})
.
def
(
"print"
,
[](
const
migraphx
::
program
&
p
)
{
std
::
cout
<<
p
<<
std
::
endl
;
})
.
def
(
"__eq__"
,
std
::
equal_to
<
migraphx
::
program
>
{})
.
def
(
"__eq__"
,
std
::
equal_to
<
migraphx
::
program
>
{})
...
...
src/replace_allocate.cpp
View file @
5a14c0bf
...
@@ -73,7 +73,7 @@ void insert_submod_allocations(instruction_ref ins, module& mod, const allocatio
...
@@ -73,7 +73,7 @@ void insert_submod_allocations(instruction_ref ins, module& mod, const allocatio
name_shapes
.
insert
(
ps
.
begin
(),
ps
.
end
());
name_shapes
.
insert
(
ps
.
begin
(),
ps
.
end
());
}
}
for
(
auto
&
pn
:
name_shapes
)
for
(
const
auto
&
pn
:
name_shapes
)
{
{
const
auto
&
s
=
pn
.
second
;
const
auto
&
s
=
pn
.
second
;
instruction_ref
output
{};
instruction_ref
output
{};
...
...
src/rewrite_batchnorm.cpp
deleted
100644 → 0
View file @
cb01e280
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/rewrite_batchnorm.hpp>
#include <migraphx/program.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/op/batch_norm_inference.hpp>
#include <migraphx/op/broadcast.hpp>
#include <migraphx/op/add.hpp>
#include <migraphx/op/mul.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/dfor.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
void
rewrite_batchnorm
::
apply
(
module
&
m
)
const
{
for
(
auto
ins
:
iterator_for
(
m
))
{
if
(
ins
->
name
()
!=
"batch_norm_inference"
)
continue
;
// Get scale, bias, mean, variance from inputs
auto
gamma
=
ins
->
inputs
()[
1
]
->
eval
();
auto
bias
=
ins
->
inputs
()[
2
]
->
eval
();
auto
mean
=
ins
->
inputs
()[
3
]
->
eval
();
auto
variance
=
ins
->
inputs
()[
4
]
->
eval
();
if
(
any_of
({
gamma
,
bias
,
mean
,
variance
},
[](
auto
arg
)
{
return
arg
.
empty
();
}))
continue
;
std
::
vector
<
std
::
size_t
>
lens
=
ins
->
inputs
()[
1
]
->
get_shape
().
lens
();
shape
s
{
ins
->
get_shape
().
type
(),
lens
};
// Get epsilon
auto
bn_op
=
any_cast
<
op
::
batch_norm_inference
>
(
ins
->
get_operator
());
auto
epsilon
=
bn_op
.
epsilon
;
argument
a
{
s
};
argument
b
{
s
};
visit_all
(
gamma
,
bias
,
mean
,
variance
,
a
,
b
)(
[
&
](
auto
gamma2
,
auto
bias2
,
auto
mean2
,
auto
variance2
,
auto
a2
,
auto
b2
)
{
dfor
(
a
.
get_shape
().
elements
())(
[
&
](
std
::
size_t
c
)
{
a2
[
c
]
=
gamma2
[
c
]
/
std
::
sqrt
(
variance2
[
c
]
+
epsilon
);
});
dfor
(
b
.
get_shape
().
elements
())([
&
](
std
::
size_t
c
)
{
b2
[
c
]
=
bias2
[
c
]
-
(
gamma2
[
c
]
*
mean2
[
c
]
/
std
::
sqrt
(
variance2
[
c
]
+
epsilon
));
});
});
auto
broadcast
=
op
::
broadcast
{
1
,
ins
->
get_shape
().
lens
()};
auto
a_ins
=
m
.
add_literal
({
a
.
get_shape
(),
a
.
data
()});
auto
a_broadcast
=
m
.
insert_instruction
(
ins
,
broadcast
,
a_ins
);
auto
mul
=
m
.
insert_instruction
(
ins
,
make_op
(
"mul"
),
ins
->
inputs
().
front
(),
a_broadcast
);
auto
b_ins
=
m
.
add_literal
({
b
.
get_shape
(),
b
.
data
()});
auto
b_broadcast
=
m
.
insert_instruction
(
ins
,
broadcast
,
b_ins
);
auto
add
=
m
.
insert_instruction
(
ins
,
make_op
(
"add"
),
mul
,
b_broadcast
);
m
.
replace_instruction
(
ins
,
add
);
}
}
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/simplify_algebra.cpp
View file @
5a14c0bf
...
@@ -57,12 +57,14 @@ auto conv_const_weights()
...
@@ -57,12 +57,14 @@ auto conv_const_weights()
auto
reduction
()
{
return
match
::
name_contains
(
"reduce"
);
}
auto
reduction
()
{
return
match
::
name_contains
(
"reduce"
);
}
// conv(x, w) * a => conv(x, a * w)
struct
find_mul_conv
struct
find_mul_conv
{
{
auto
matcher
()
const
auto
matcher
()
const
{
{
return
match
::
name
(
"mul"
)(
match
::
either_arg
(
0
,
1
)(
conv_const_weights
().
bind
(
"conv"
),
return
match
::
name
(
"mul"
)(
match
::
name
(
"broadcast"
).
bind
(
"a"
)));
match
::
either_arg
(
0
,
1
)(
conv_const_weights
().
bind
(
"conv"
),
match
::
name
(
"broadcast"
,
"multibroadcast"
).
bind
(
"a"
)));
}
}
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
r
)
const
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
r
)
const
...
@@ -72,14 +74,35 @@ struct find_mul_conv
...
@@ -72,14 +74,35 @@ struct find_mul_conv
auto
a_ins
=
r
.
instructions
[
"a"
];
auto
a_ins
=
r
.
instructions
[
"a"
];
auto
w_ins
=
r
.
instructions
[
"w"
];
auto
w_ins
=
r
.
instructions
[
"w"
];
auto
broadcast_op
=
any_cast
<
op
::
broadcast
>
(
a_ins
->
get_operator
());
const
auto
&
a_input_lens
=
a_ins
->
inputs
().
front
()
->
get_shape
().
lens
();
if
(
broadcast_op
.
axis
!=
1
)
std
::
size_t
num_not_one_dims
=
std
::
count_if
(
a_input_lens
.
cbegin
(),
a_input_lens
.
cend
(),
[](
auto
dim
)
{
return
dim
!=
1
;
});
if
(
num_not_one_dims
>
1
)
return
;
// check broadcasted along channels
const
auto
&
a_lens
=
a_ins
->
get_shape
().
lens
();
const
auto
&
a_strides
=
a_ins
->
get_shape
().
strides
();
auto
is_broadcasted_axis
=
[](
auto
len
,
auto
stride
)
{
return
len
==
1
or
stride
==
0
;
};
if
(
a_strides
.
at
(
1
)
!=
1
)
return
;
if
(
not
is_broadcasted_axis
(
a_lens
.
front
(),
a_strides
.
front
()))
return
;
return
;
if
(
not
std
::
equal
(
a_lens
.
begin
()
+
2
,
a_lens
.
end
(),
a_strides
.
begin
()
+
2
,
a_strides
.
end
(),
is_broadcasted_axis
))
return
;
auto
sq
=
m
.
insert_instruction
(
ins
,
make_op
(
"squeeze"
),
a_ins
->
inputs
().
front
());
auto
new_a
=
m
.
insert_instruction
(
auto
new_a
=
m
.
insert_instruction
(
ins
,
ins
,
make_op
(
"broadcast"
,
{{
"axis"
,
0
},
{
"out_lens"
,
w_ins
->
get_shape
().
lens
()}}),
sq
);
make_op
(
"broadcast"
,
{{
"axis"
,
0
},
{
"out_lens"
,
w_ins
->
get_shape
().
lens
()}}),
a_ins
->
inputs
().
front
());
auto
new_mul
=
m
.
insert_instruction
(
ins
,
make_op
(
"mul"
),
new_a
,
w_ins
);
auto
new_mul
=
m
.
insert_instruction
(
ins
,
make_op
(
"mul"
),
new_a
,
w_ins
);
auto
new_conv
=
m
.
insert_instruction
(
auto
new_conv
=
m
.
insert_instruction
(
ins
,
conv_ins
->
get_operator
(),
conv_ins
->
inputs
().
front
(),
new_mul
);
ins
,
conv_ins
->
get_operator
(),
conv_ins
->
inputs
().
front
(),
new_mul
);
...
@@ -412,6 +435,24 @@ struct find_concat_op
...
@@ -412,6 +435,24 @@ struct find_concat_op
}
}
};
};
void
move_instructions_back
(
module
&
m
,
instruction_ref
pos
,
std
::
vector
<
instruction_ref
>
inss
)
{
auto
start
=
range
(
m
.
begin
(),
pos
);
for
(
auto
ins
:
iterator_for
(
start
))
{
auto
it
=
std
::
find
(
inss
.
begin
(),
inss
.
end
(),
ins
);
if
(
it
!=
inss
.
end
())
inss
.
erase
(
it
);
}
for
(
auto
ins
:
inss
)
{
if
(
not
m
.
has_instruction
(
ins
))
continue
;
move_instructions_back
(
m
,
pos
,
ins
->
inputs
());
m
.
move_instruction
(
ins
,
pos
);
}
}
std
::
vector
<
instruction_ref
>
get_splits
(
instruction_ref
ins
)
std
::
vector
<
instruction_ref
>
get_splits
(
instruction_ref
ins
)
{
{
std
::
vector
<
instruction_ref
>
result
;
std
::
vector
<
instruction_ref
>
result
;
...
@@ -587,8 +628,7 @@ struct find_splits
...
@@ -587,8 +628,7 @@ struct find_splits
}))
}))
return
;
return
;
for
(
auto
data
:
data_args
)
move_instructions_back
(
m
,
ins
,
data_args
);
m
.
move_instructions
(
data
,
ins
);
auto
slice_op
=
any_cast
<
op
::
slice
>
(
splits
.
front
()
->
get_operator
());
auto
slice_op
=
any_cast
<
op
::
slice
>
(
splits
.
front
()
->
get_operator
());
assert
(
not
slice_op
.
axes
.
empty
());
assert
(
not
slice_op
.
axes
.
empty
());
...
@@ -841,8 +881,7 @@ struct find_conv_dot_horiz_fusion
...
@@ -841,8 +881,7 @@ struct find_conv_dot_horiz_fusion
concat_axis
=
axis
;
concat_axis
=
axis
;
}
}
for
(
auto
arg
:
args
)
move_instructions_back
(
m
,
input
,
args
);
m
.
move_instructions
(
arg
,
input
);
// TODO: Check if axes match
// TODO: Check if axes match
auto
concat
=
auto
concat
=
m
.
insert_instruction
(
input
,
make_op
(
"concat"
,
{{
"axis"
,
concat_axis
}}),
args
);
m
.
insert_instruction
(
input
,
make_op
(
"concat"
,
{{
"axis"
,
concat_axis
}}),
args
);
...
@@ -894,6 +933,73 @@ struct find_div_const
...
@@ -894,6 +933,73 @@ struct find_div_const
}
}
};
};
struct
find_unit_ops
{
auto
matcher
()
const
{
auto
mul_1
=
match
::
name
(
"mul"
)(
match
::
either_arg
(
0
,
1
)(
match
::
has_value
(
1.0
f
),
match
::
any
().
bind
(
"x"
)));
auto
div_1
=
match
::
name
(
"div"
)(
match
::
args
(
match
::
any
().
bind
(
"x"
),
match
::
has_value
(
1.0
f
)));
auto
add_0
=
match
::
name
(
"add"
)(
match
::
either_arg
(
0
,
1
)(
match
::
has_value
(
0.0
f
,
1e-12
),
match
::
any
().
bind
(
"x"
)));
auto
sub_0
=
match
::
name
(
"sub"
)(
match
::
args
(
match
::
any
().
bind
(
"x"
),
match
::
has_value
(
0.0
f
)));
return
match
::
any_of
(
mul_1
,
div_1
,
add_0
,
sub_0
);
}
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
r
)
const
{
auto
ins
=
r
.
result
;
auto
c_in
=
r
.
instructions
[
"x"
];
m
.
replace_instruction
(
ins
,
c_in
);
}
};
struct
find_neg_unit_ops
{
auto
matcher
()
const
{
auto
mul_neg_1
=
match
::
name
(
"mul"
)(
match
::
either_arg
(
0
,
1
)(
match
::
has_value
(
-
1.0
f
),
match
::
any
().
bind
(
"x"
)));
auto
div_neg_1
=
match
::
name
(
"div"
)(
match
::
args
(
match
::
any
().
bind
(
"x"
),
match
::
has_value
(
-
1.0
f
)));
auto
sub_0
=
match
::
name
(
"sub"
)(
match
::
args
(
match
::
has_value
(
0.0
f
),
match
::
any
().
bind
(
"x"
)));
return
match
::
any_of
(
mul_neg_1
,
div_neg_1
,
sub_0
);
}
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
r
)
const
{
auto
ins
=
r
.
result
;
auto
c_in
=
r
.
instructions
[
"x"
];
auto
neg
=
m
.
add_instruction
(
make_op
(
"neg"
),
c_in
);
m
.
replace_instruction
(
ins
,
neg
);
}
};
struct
find_zero_ops
{
auto
matcher
()
const
{
auto
mul_zero
=
match
::
name
(
"mul"
)(
match
::
either_arg
(
0
,
1
)(
match
::
has_value
(
0.0
f
).
bind
(
"x"
),
match
::
any
()));
auto
div_zero
=
match
::
name
(
"div"
)(
match
::
args
(
match
::
has_value
(
0.0
f
).
bind
(
"x"
),
match
::
any
()));
return
match
::
any_of
(
mul_zero
,
div_zero
);
}
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
r
)
const
{
auto
ins
=
r
.
result
;
auto
zero_ins
=
r
.
instructions
[
"x"
];
m
.
replace_instruction
(
ins
,
zero_ins
);
}
};
struct
find_sub_const
struct
find_sub_const
{
{
auto
matcher
()
const
auto
matcher
()
const
...
@@ -985,20 +1091,35 @@ struct find_split_reshape
...
@@ -985,20 +1091,35 @@ struct find_split_reshape
auto
rsp_lens
=
rsp
->
get_shape
().
lens
();
auto
rsp_lens
=
rsp
->
get_shape
().
lens
();
auto
rsp_strides
=
rsp
->
get_shape
().
strides
();
auto
rsp_strides
=
rsp
->
get_shape
().
strides
();
rsp_strides
.
insert
(
rsp_strides
.
begin
(),
rsp_strides
[
0
]
*
rsp_lens
[
0
]);
rsp_strides
.
insert
(
rsp_strides
.
begin
(),
rsp_strides
[
0
]
*
rsp_lens
[
0
]);
auto
ait
=
std
::
find
(
rsp_strides
.
begin
(),
rsp_strides
.
end
(),
slc_dim_size
);
auto
ait
=
std
::
find
(
rsp_strides
.
begin
(),
rsp_strides
.
end
(),
slc_dim_size
);
int
rsp_axis
=
-
1
;
if
(
ait
==
rsp_strides
.
end
())
if
(
ait
==
rsp_strides
.
end
())
{
{
return
;
return
;
}
}
int
rsp_axis
=
std
::
distance
(
rsp_strides
.
begin
(),
ait
);
else
if
(
ait
==
rsp_strides
.
end
()
-
1
)
{
// edge case
// slice_dim == 1, in that case it could match with last stride of 1.
// it should accumulate lengths from last dim in that case. discount 1 to avoid going
// out of bounds.
assert
(
slc_dim_size
==
1
);
rsp_axis
=
std
::
distance
(
rsp_strides
.
begin
(),
ait
)
-
1
;
}
else
{
rsp_axis
=
std
::
distance
(
rsp_strides
.
begin
(),
ait
);
}
// calculate reshape output shape
// calculate reshape output shape
std
::
vector
<
int64_t
>
vec_dims
(
vec_rsp
.
size
());
std
::
vector
<
int64_t
>
vec_dims
(
vec_rsp
.
size
());
std
::
transform
(
vec_rsp
.
begin
(),
vec_rsp
.
end
(),
vec_dims
.
begin
(),
[
&
](
auto
is
)
{
std
::
transform
(
vec_rsp
.
begin
(),
vec_rsp
.
end
(),
vec_dims
.
begin
(),
[
&
](
auto
is
)
{
return
is
->
get_shape
().
lens
()[
rsp_axis
];
return
is
->
get_shape
().
lens
()[
rsp_axis
];
});
});
std
::
vector
<
int64_t
>
rsp_out_lens
(
rsp_lens
.
begin
(),
rsp_lens
.
end
());
std
::
vector
<
int64_t
>
rsp_out_lens
(
rsp_lens
.
begin
(),
rsp_lens
.
end
());
rsp_out_lens
[
rsp_axis
]
=
std
::
accumulate
(
vec_dims
.
begin
(),
vec_dims
.
end
(),
std
::
int64_t
{
0
});
rsp_out_lens
[
rsp_axis
]
=
std
::
accumulate
(
vec_dims
.
begin
(),
vec_dims
.
end
(),
std
::
int64_t
{
0
});
// insert the reshape instruction and add contiguous if needed
// insert the reshape instruction and add contiguous if needed
...
@@ -1095,6 +1216,9 @@ void simplify_algebra::apply(module& m) const
...
@@ -1095,6 +1216,9 @@ void simplify_algebra::apply(module& m) const
find_mul_conv
{},
find_mul_conv
{},
find_mul_slice_conv
{},
find_mul_slice_conv
{},
find_mul_add
{},
find_mul_add
{},
find_unit_ops
{},
find_neg_unit_ops
{},
find_zero_ops
{},
find_dot_add
{},
find_dot_add
{},
find_div_const
{},
find_div_const
{},
find_sub_const
{},
find_sub_const
{},
...
...
src/simplify_reshapes.cpp
View file @
5a14c0bf
...
@@ -271,6 +271,44 @@ struct find_nested_slice
...
@@ -271,6 +271,44 @@ struct find_nested_slice
}
}
};
};
struct
find_concat_multibroadcasts
{
auto
matcher
()
const
{
return
match
::
name
(
"concat"
)(
match
::
all_of
[
match
::
inputs
()](
match
::
name
(
"multibroadcast"
)));
}
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
mr
)
const
{
auto
ins
=
mr
.
result
;
auto
op
=
any_cast
<
op
::
concat
>
(
ins
->
get_operator
());
auto
out_lens
=
ins
->
get_shape
().
lens
();
auto
inputs
=
ins
->
inputs
();
auto
in_strides
=
inputs
.
front
()
->
get_shape
().
strides
();
// Only apply when concat axis is not a broadcasted dimension
if
(
std
::
any_of
(
inputs
.
begin
(),
inputs
.
end
(),
[
&
](
auto
i
)
{
return
i
->
get_shape
().
strides
()[
op
.
axis
]
==
0
;
}))
{
return
;
}
// Use inputs of multibroadcast ops as inputs to new concat op
std
::
transform
(
inputs
.
begin
(),
inputs
.
end
(),
inputs
.
begin
(),
[](
auto
i
)
{
return
i
->
inputs
().
front
();
});
// Reduce axis by number of leading broadcasted dimensions
if
(
inputs
.
front
()
->
get_shape
().
lens
().
size
()
<
out_lens
.
size
())
op
.
axis
-=
std
::
count
(
in_strides
.
begin
(),
in_strides
.
begin
()
+
op
.
axis
,
0
);
auto
concat
=
m
.
insert_instruction
(
ins
,
op
,
inputs
);
m
.
replace_instruction
(
ins
,
migraphx
::
make_op
(
"multibroadcast"
,
{{
"out_lens"
,
out_lens
}}),
concat
);
}
};
struct
find_concat_transpose
struct
find_concat_transpose
{
{
auto
matcher
()
const
auto
matcher
()
const
...
@@ -764,6 +802,7 @@ void simplify_reshapes::apply(module& m) const
...
@@ -764,6 +802,7 @@ void simplify_reshapes::apply(module& m) const
find_reshaper
{},
find_reshaper
{},
find_transpose
{},
find_transpose
{},
find_concat_transpose
{},
find_concat_transpose
{},
find_concat_multibroadcasts
{},
find_nested_convert
{},
find_nested_convert
{},
find_nested_slice
{},
find_nested_slice
{},
find_nested_concat
{},
find_nested_concat
{},
...
...
src/targets/cpu/CMakeLists.txt
View file @
5a14c0bf
...
@@ -35,6 +35,7 @@ add_library(migraphx_cpu
...
@@ -35,6 +35,7 @@ add_library(migraphx_cpu
dnnl.cpp
dnnl.cpp
eltwise.cpp
eltwise.cpp
erf.cpp
erf.cpp
fmod.cpp
fuse_ops.cpp
fuse_ops.cpp
gather.cpp
gather.cpp
gemm.cpp
gemm.cpp
...
@@ -42,6 +43,7 @@ add_library(migraphx_cpu
...
@@ -42,6 +43,7 @@ add_library(migraphx_cpu
logsoftmax.cpp
logsoftmax.cpp
lowering.cpp
lowering.cpp
lrn.cpp
lrn.cpp
mod.cpp
preallocate.cpp
preallocate.cpp
pooling.cpp
pooling.cpp
reduction.cpp
reduction.cpp
...
...
src/targets/
gpu/include/migraphx/gpu/cos.h
pp
→
src/targets/
cpu/fmod.c
pp
View file @
5a14c0bf
...
@@ -21,22 +21,16 @@
...
@@ -21,22 +21,16 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* THE SOFTWARE.
*/
*/
#ifndef MIGRAPHX_GUARD_RTGLIB_COS_HPP
#include <migraphx/config.hpp>
#define MIGRAPHX_GUARD_RTGLIB_COS_HPP
#include <migraphx/cpu/pointwise.hpp>
#include <migraphx/op/fmod.hpp>
#include <migraphx/gpu/oper.hpp>
#include <migraphx/gpu/device/cos.hpp>
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
g
pu
{
namespace
c
pu
{
struct
hip_cos
:
unary_device
<
hip_cos
,
device
::
cos
>
template
struct
cpu_binary
<
op
::
fmod
>;
{
};
}
// namespace
g
pu
}
// namespace
c
pu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
}
// namespace migraphx
#endif
src/targets/cpu/lowering.cpp
View file @
5a14c0bf
...
@@ -26,7 +26,6 @@
...
@@ -26,7 +26,6 @@
#include <migraphx/instruction.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/dfor.hpp>
#include <migraphx/dfor.hpp>
#include <migraphx/op/identity.hpp>
#include <migraphx/op/identity.hpp>
#include <migraphx/op/batch_norm_inference.hpp>
#include <migraphx/op/convolution.hpp>
#include <migraphx/op/convolution.hpp>
#include <migraphx/op/deconvolution.hpp>
#include <migraphx/op/deconvolution.hpp>
#include <migraphx/op/quant_convolution.hpp>
#include <migraphx/op/quant_convolution.hpp>
...
@@ -43,6 +42,8 @@
...
@@ -43,6 +42,8 @@
#include <migraphx/op/argmax.hpp>
#include <migraphx/op/argmax.hpp>
#include <migraphx/op/argmin.hpp>
#include <migraphx/op/argmin.hpp>
#include <migraphx/op/rnn_var_sl_last_output.hpp>
#include <migraphx/op/rnn_var_sl_last_output.hpp>
#include <migraphx/op/mod.hpp>
#include <migraphx/op/fmod.hpp>
#include <migraphx/shape_for_each.hpp>
#include <migraphx/shape_for_each.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/par_dfor.hpp>
#include <migraphx/par_dfor.hpp>
...
...
src/targets/
g
pu/
include/migraphx/gpu/exp.h
pp
→
src/targets/
c
pu/
mod.c
pp
View file @
5a14c0bf
...
@@ -21,22 +21,16 @@
...
@@ -21,22 +21,16 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* THE SOFTWARE.
*/
*/
#ifndef MIGRAPHX_GUARD_RTGLIB_EXP_HPP
#include <migraphx/config.hpp>
#define MIGRAPHX_GUARD_RTGLIB_EXP_HPP
#include <migraphx/cpu/pointwise.hpp>
#include <migraphx/op/mod.hpp>
#include <migraphx/gpu/oper.hpp>
#include <migraphx/gpu/device/exp.hpp>
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
g
pu
{
namespace
c
pu
{
struct
hip_exp
:
unary_device
<
hip_exp
,
device
::
exp
>
template
struct
cpu_binary
<
op
::
mod
>;
{
};
}
// namespace
g
pu
}
// namespace
c
pu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
}
// namespace migraphx
#endif
src/targets/cpu/target.cpp
View file @
5a14c0bf
...
@@ -37,7 +37,6 @@
...
@@ -37,7 +37,6 @@
#include <migraphx/propagate_constant.hpp>
#include <migraphx/propagate_constant.hpp>
#include <migraphx/register_target.hpp>
#include <migraphx/register_target.hpp>
#include <migraphx/replace_allocate.hpp>
#include <migraphx/replace_allocate.hpp>
#include <migraphx/rewrite_batchnorm.hpp>
#include <migraphx/rewrite_pooling.hpp>
#include <migraphx/rewrite_pooling.hpp>
#include <migraphx/rewrite_quantization.hpp>
#include <migraphx/rewrite_quantization.hpp>
#include <migraphx/rewrite_rnn.hpp>
#include <migraphx/rewrite_rnn.hpp>
...
@@ -78,8 +77,6 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
...
@@ -78,8 +77,6 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
eliminate_identity
{},
eliminate_identity
{},
eliminate_pad
{},
eliminate_pad
{},
dead_code_elimination
{},
dead_code_elimination
{},
rewrite_batchnorm
{},
dead_code_elimination
{},
rewrite_rnn
{},
rewrite_rnn
{},
dead_code_elimination
{},
dead_code_elimination
{},
eliminate_common_subexpression
{},
eliminate_common_subexpression
{},
...
...
src/targets/gpu/CMakeLists.txt
100755 → 100644
View file @
5a14c0bf
...
@@ -39,81 +39,9 @@ file(GLOB KERNEL_FILES ${CONFIGURE_DEPENDS}
...
@@ -39,81 +39,9 @@ file(GLOB KERNEL_FILES ${CONFIGURE_DEPENDS}
message
(
STATUS
"KERNEL_FILES:
${
KERNEL_FILES
}
"
)
message
(
STATUS
"KERNEL_FILES:
${
KERNEL_FILES
}
"
)
add_embed_library
(
migraphx_kernels
${
KERNEL_FILES
}
)
add_embed_library
(
migraphx_kernels
${
KERNEL_FILES
}
)
add_library
(
migraphx_device
file
(
GLOB DEVICE_GPU_SRCS
${
CONFIGURE_DEPENDS
}
${
CMAKE_CURRENT_SOURCE_DIR
}
/device/*.cpp
)
device/acos.cpp
add_library
(
migraphx_device
${
DEVICE_GPU_SRCS
}
)
device/acosh.cpp
device/add.cpp
device/add_clip.cpp
device/add_relu.cpp
device/add_sigmoid.cpp
device/add_tanh.cpp
device/argmax.cpp
device/argmin.cpp
device/asin.cpp
device/asinh.cpp
device/atan.cpp
device/atanh.cpp
device/ceil.cpp
device/clip.cpp
device/concat.cpp
device/contiguous.cpp
device/convert.cpp
device/cos.cpp
device/cosh.cpp
device/div.cpp
device/equal.cpp
device/erf.cpp
device/exp.cpp
device/fill.cpp
device/floor.cpp
device/gather.cpp
device/gelu.cpp
device/greater.cpp
device/int8_gemm_pack.cpp
device/layernorm.cpp
device/less.cpp
device/log.cpp
device/logical_and.cpp
device/logical_or.cpp
device/logical_xor.cpp
device/logsoftmax.cpp
device/max.cpp
device/min.cpp
device/mul.cpp
device/mul_add.cpp
device/mul_add_relu.cpp
device/multinomial.cpp
device/nonzero.cpp
device/pad.cpp
device/pow.cpp
device/prelu.cpp
device/prefix_scan_sum.cpp
device/recip.cpp
device/reduce_max.cpp
device/reduce_mean.cpp
device/reduce_min.cpp
device/reduce_sum.cpp
device/reduce_prod.cpp
device/relu.cpp
device/reverse.cpp
device/rnn_variable_seq_lens.cpp
device/round.cpp
device/rsqrt.cpp
device/scatter.cpp
device/sigmoid.cpp
device/sign.cpp
device/sin.cpp
device/sinh.cpp
device/softmax.cpp
device/sqdiff.cpp
device/sqrt.cpp
device/sub.cpp
device/tan.cpp
device/tanh.cpp
device/topk.cpp
device/unary_not.cpp
device/where.cpp
)
add_library
(
compile_for_gpu INTERFACE
)
add_library
(
compile_for_gpu INTERFACE
)
target_compile_options
(
compile_for_gpu INTERFACE -std=c++17 -fno-gpu-rdc -Wno-cuda-compat -Wno-unused-command-line-argument -Xclang -fallow-half-arguments-and-returns
)
target_compile_options
(
compile_for_gpu INTERFACE -std=c++17 -fno-gpu-rdc -Wno-cuda-compat -Wno-unused-command-line-argument -Xclang -fallow-half-arguments-and-returns
)
target_link_libraries
(
compile_for_gpu INTERFACE hip::device -fno-gpu-rdc -Wno-invalid-command-line-argument -Wno-unused-command-line-argument -Wno-option-ignored
)
target_link_libraries
(
compile_for_gpu INTERFACE hip::device -fno-gpu-rdc -Wno-invalid-command-line-argument -Wno-unused-command-line-argument -Wno-option-ignored
)
...
@@ -150,18 +78,12 @@ add_library(migraphx_gpu
...
@@ -150,18 +78,12 @@ add_library(migraphx_gpu
allocation_model.cpp
allocation_model.cpp
argmax.cpp
argmax.cpp
argmin.cpp
argmin.cpp
batch_norm_inference.cpp
clip.cpp
code_object_op.cpp
code_object_op.cpp
compile_ops.cpp
compile_ops.cpp
compile_gen.cpp
compile_gen.cpp
compile_hip.cpp
compile_hip.cpp
compile_hip_code_object.cpp
compile_hip_code_object.cpp
compiler.cpp
compiler.cpp
concat.cpp
convert.cpp
convolution.cpp
deconvolution.cpp
device_name.cpp
device_name.cpp
elu.cpp
elu.cpp
fuse_mlir.cpp
fuse_mlir.cpp
...
@@ -186,13 +108,11 @@ add_library(migraphx_gpu
...
@@ -186,13 +108,11 @@ add_library(migraphx_gpu
pad.cpp
pad.cpp
perfdb.cpp
perfdb.cpp
pooling.cpp
pooling.cpp
quant_convolution.cpp
reverse.cpp
reverse.cpp
rnn_variable_seq_lens.cpp
rnn_variable_seq_lens.cpp
rocblas.cpp
rocblas.cpp
scatter.cpp
scatter.cpp
schedule_model.cpp
schedule_model.cpp
softmax.cpp
sync_device.cpp
sync_device.cpp
target.cpp
target.cpp
topk.cpp
topk.cpp
...
@@ -207,81 +127,27 @@ function(register_migraphx_gpu_ops PREFIX)
...
@@ -207,81 +127,27 @@ function(register_migraphx_gpu_ops PREFIX)
endforeach
()
endforeach
()
endfunction
()
endfunction
()
register_migraphx_gpu_ops
(
hip_
register_migraphx_gpu_ops
(
hip_
acosh
acos
add
argmax
argmax
argmin
argmin
asinh
asin
atanh
atan
ceil
clip
concat
convert
cosh
cos
div
equal
erf
exp
floor
gather
gather
greater
less
log
logsoftmax
logsoftmax
logical_and
logical_or
logical_xor
loop
loop
max
min
mul
multinomial
multinomial
nonzero
nonzero
pad
pad
pow
prelu
prefix_scan_sum
prefix_scan_sum
recip
reduce_max
reduce_mean
reduce_min
reduce_prod
reduce_sum
relu
reverse
reverse
round
rsqrt
scatter
scatter
sigmoid
sign
sinh
sin
softmax
sqdiff
sqrt
sub
tanh
tan
topk
topk
unary_not
where
)
)
register_migraphx_gpu_ops
(
miopen_
register_migraphx_gpu_ops
(
miopen_
abs
abs
batch_norm_inference
contiguous
contiguous
convolution
deconvolution
elu
elu
int8_conv_pack
int8_conv_pack
leaky_relu
leaky_relu
lrn
lrn
pooling
pooling
quant_convolution
)
)
register_op
(
migraphx_gpu
register_op
(
migraphx_gpu
HEADER migraphx/gpu/rnn_variable_seq_lens.hpp
HEADER migraphx/gpu/rnn_variable_seq_lens.hpp
...
@@ -295,6 +161,9 @@ register_op(migraphx_gpu
...
@@ -295,6 +161,9 @@ register_op(migraphx_gpu
HEADER migraphx/gpu/gemm.hpp
HEADER migraphx/gpu/gemm.hpp
OPERATORS gpu::rocblas_gemm<op::dot> gpu::rocblas_gemm<op::quant_dot>
OPERATORS gpu::rocblas_gemm<op::dot> gpu::rocblas_gemm<op::quant_dot>
INCLUDES migraphx/gpu/context.hpp
)
INCLUDES migraphx/gpu/context.hpp
)
register_op
(
migraphx_gpu HEADER migraphx/gpu/convolution.hpp
OPERATORS gpu::miopen_convolution<op::convolution> gpu::miopen_convolution<op::deconvolution> gpu::miopen_convolution<op::quant_convolution>
INCLUDES migraphx/gpu/context.hpp
)
rocm_set_soversion
(
migraphx_gpu
${
MIGRAPHX_SO_VERSION
}
)
rocm_set_soversion
(
migraphx_gpu
${
MIGRAPHX_SO_VERSION
}
)
rocm_clang_tidy_check
(
migraphx_gpu
)
rocm_clang_tidy_check
(
migraphx_gpu
)
...
@@ -322,26 +191,11 @@ message(STATUS "extractkernel: ${MIGRAPHX_EXTRACT_KERNEL}")
...
@@ -322,26 +191,11 @@ message(STATUS "extractkernel: ${MIGRAPHX_EXTRACT_KERNEL}")
set
(
MIGRAPHX_ENABLE_MLIR OFF CACHE BOOL
""
)
set
(
MIGRAPHX_ENABLE_MLIR OFF CACHE BOOL
""
)
if
(
MIGRAPHX_ENABLE_MLIR
)
if
(
MIGRAPHX_ENABLE_MLIR
)
find_library
(
MLIRAPI_LIBRARY MLIRMIOpen
# Find package rocMLIR
PATH_SUFFIXES
find_package
(
rocMLIR 1.0.0 CONFIG REQUIRED
)
# Workaournd broken mlir install
message
(
STATUS
"Build with rocMLIR::rockCompiler
${
rocMLIR_VERSION
}
"
)
lib/ lib/lib
)
# REQUIRED is not supported before cmake 3.18
if
(
NOT MLIRAPI_LIBRARY
)
message
(
FATAL_ERROR
"libMLIRMIOpen not found"
)
else
()
message
(
STATUS
"Build with libMLIRMIOpen: "
${
MLIRAPI_LIBRARY
}
)
endif
()
find_path
(
MLIRAPI_HEADERS NAMES mlir-c/Dialect/MIGraphX.h
)
# Workaround MLIR broken installation
find_path
(
MLIRAPI_HEADERS2 NAMES mlir-c/Registration.h
PATH_SUFFIXES
include/external/include external/include
)
target_compile_definitions
(
migraphx_gpu PRIVATE
"-DMIGRAPHX_MLIR"
)
target_compile_definitions
(
migraphx_gpu PRIVATE
"-DMIGRAPHX_MLIR"
)
target_include_directories
(
migraphx_gpu SYSTEM PRIVATE
${
MLIRAPI_HEADERS
}
${
MLIRAPI_HEADERS2
}
)
target_link_libraries
(
migraphx_gpu PUBLIC rocMLIR::rockCompiler
)
target_link_libraries
(
migraphx_gpu PUBLIC
${
MLIRAPI_LIBRARY
}
)
endif
()
endif
()
set
(
MIGRAPHX_USE_HIPRTC OFF CACHE BOOL
""
)
set
(
MIGRAPHX_USE_HIPRTC OFF CACHE BOOL
""
)
...
@@ -380,9 +234,18 @@ endif()
...
@@ -380,9 +234,18 @@ endif()
include
(
CheckLibraryExists
)
include
(
CheckLibraryExists
)
get_target_property
(
MIOPEN_LOCATION MIOpen LOCATION
)
get_target_property
(
MIOPEN_LOCATION MIOpen LOCATION
)
check_library_exists
(
MIOpen
"miopenHiddenSetConvolutionFindMode"
"
${
MIOPEN_LOCATION
}
"
HAS_FIND_MODE_API
)
check_library_exists
(
MIOpen
"miopenHiddenSetConvolutionFindMode"
"
${
MIOPEN_LOCATION
}
"
HAS_FIND_MODE_API
)
check_library_exists
(
MIOpen
"miopenFindSolutions"
"
${
MIOPEN_LOCATION
}
"
HAS_FIND_2_API
)
if
(
HAS_FIND_2_API
)
target_compile_definitions
(
migraphx_gpu PUBLIC -DMIGRAPHX_HAS_FIND_2_API
)
message
(
STATUS
"MIGraphx is using Find-2.0 API of MIOpen"
)
else
()
message
(
STATUS
"MIOpen does not have Find-2.0 API"
)
endif
()
if
(
HAS_FIND_MODE_API
)
if
(
HAS_FIND_MODE_API
)
target_compile_definitions
(
migraphx_gpu PUBLIC -DMIGRAPHX_HAS_FIND_MODE_API
)
target_compile_definitions
(
migraphx_gpu PUBLIC -DMIGRAPHX_HAS_FIND_MODE_API
)
message
(
STATUS
"MI
Open has f
ind
m
ode
api
"
)
message
(
STATUS
"MI
Graphx is using F
ind
M
ode
API of MIOpen
"
)
else
()
else
()
message
(
STATUS
"MIOpen does not have find mode api"
)
message
(
STATUS
"MIOpen does not have find mode api"
)
endif
()
endif
()
...
...
src/targets/gpu/batch_norm_inference.cpp
deleted
100644 → 0
View file @
cb01e280
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/gpu/batch_norm_inference.hpp>
#include <migraphx/gpu/context.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
shape
miopen_batch_norm_inference
::
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
check_shapes
{
inputs
,
*
this
}.
has
(
6
);
check_shapes
{
inputs
.
data
(),
inputs
.
data
()
+
1
,
*
this
}.
same_ndims
().
max_ndims
(
5
);
return
op
.
compute_shape
({
inputs
.
at
(
0
),
inputs
.
at
(
1
),
inputs
.
at
(
2
),
inputs
.
at
(
3
),
inputs
.
at
(
4
)});
}
inline
shape
reshape_to_2d
(
const
shape
&
input
)
{
auto
dims
=
input
.
lens
();
if
(
dims
.
size
()
>=
4
)
return
input
;
std
::
vector
<
size_t
>
new_dims
(
dims
.
begin
(),
dims
.
end
());
std
::
size_t
num
=
4
-
dims
.
size
();
new_dims
.
insert
(
new_dims
.
end
(),
num
,
1
);
return
{
input
.
type
(),
new_dims
};
}
argument
miopen_batch_norm_inference
::
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
{
shape
x_shape
=
args
[
0
].
get_shape
();
shape
y_shape
=
output_shape
;
shape
bn_shape
=
args
[
3
].
get_shape
();
auto
x_desc
=
make_tensor
(
reshape_to_2d
(
x_shape
));
auto
y_desc
=
make_tensor
(
reshape_to_2d
(
y_shape
));
auto
bn_desc
=
make_tensor
(
reshape_to_2d
(
bn_shape
));
float
alpha
=
1.0
;
float
beta
=
0.0
f
;
miopenBatchNormalizationForwardInference
(
ctx
.
get_stream
().
get_miopen
(),
miopenBatchNormMode_t
(
op
.
bn_mode
),
&
alpha
,
&
beta
,
x_desc
.
get
(),
args
[
0
].
implicit
(),
y_desc
.
get
(),
args
[
5
].
implicit
(),
bn_desc
.
get
(),
args
[
1
].
implicit
(),
args
[
2
].
implicit
(),
args
[
3
].
implicit
(),
args
[
4
].
implicit
(),
op
.
epsilon
);
return
args
[
5
];
}
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/clip.cpp
deleted
100644 → 0
View file @
cb01e280
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/gpu/clip.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/device/clip.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
shape
hip_clip
::
compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
{
inputs
.
pop_back
();
return
op
.
compute_shape
(
inputs
);
}
argument
hip_clip
::
compute
(
context
&
ctx
,
const
shape
&
,
const
std
::
vector
<
argument
>&
args
)
const
{
device
::
clip
(
ctx
.
get_stream
().
get
(),
args
.
back
(),
args
.
front
(),
args
.
at
(
1
),
args
.
at
(
2
));
return
args
.
back
();
}
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/compile_gen.cpp
View file @
5a14c0bf
...
@@ -22,6 +22,7 @@
...
@@ -22,6 +22,7 @@
* THE SOFTWARE.
* THE SOFTWARE.
*/
*/
#include <migraphx/gpu/compile_gen.hpp>
#include <migraphx/gpu/compile_gen.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/shape.hpp>
#include <migraphx/shape.hpp>
#include <migraphx/permutation.hpp>
#include <migraphx/permutation.hpp>
#include <migraphx/stringutils.hpp>
#include <migraphx/stringutils.hpp>
...
@@ -48,12 +49,13 @@ static std::vector<std::size_t> vector_sizes(const std::vector<shape>& inputs)
...
@@ -48,12 +49,13 @@ static std::vector<std::size_t> vector_sizes(const std::vector<shape>& inputs)
return
{
4
,
2
};
return
{
4
,
2
};
}
}
vectorize
vectorize
::
elements
(
std
::
size_t
axis
,
const
std
::
vector
<
shape
>&
inputs
)
vectorize
vectorize
::
elements
(
std
::
size_t
axis
,
const
std
::
vector
<
shape
>&
inputs
,
const
std
::
vector
<
std
::
size_t
>&
sizes
)
{
{
if
(
std
::
all_of
(
if
(
std
::
all_of
(
inputs
.
begin
(),
inputs
.
end
(),
[
&
](
const
auto
&
s
)
{
return
s
.
lens
()[
axis
]
==
1
;
}))
inputs
.
begin
(),
inputs
.
end
(),
[
&
](
const
auto
&
s
)
{
return
s
.
lens
()[
axis
]
==
1
;
}))
return
{
1
,
axis
};
return
{
1
,
axis
};
auto
sizes
=
vector_sizes
(
inputs
);
std
::
vector
<
std
::
size_t
>
max_vec_size
;
std
::
vector
<
std
::
size_t
>
max_vec_size
;
std
::
transform
(
inputs
.
begin
(),
std
::
transform
(
inputs
.
begin
(),
inputs
.
end
(),
inputs
.
end
(),
...
@@ -81,6 +83,33 @@ vectorize vectorize::elements(std::size_t axis, const std::vector<shape>& inputs
...
@@ -81,6 +83,33 @@ vectorize vectorize::elements(std::size_t axis, const std::vector<shape>& inputs
return
{
*
std
::
min_element
(
max_vec_size
.
begin
(),
max_vec_size
.
end
()),
axis
};
return
{
*
std
::
min_element
(
max_vec_size
.
begin
(),
max_vec_size
.
end
()),
axis
};
}
}
vectorize
vectorize
::
elements
(
context
&
ctx
,
std
::
size_t
axis
,
const
std
::
vector
<
shape
>&
inputs
)
{
if
(
inputs
.
empty
())
return
{
1
,
axis
};
std
::
size_t
n
=
std
::
max_element
(
inputs
.
begin
(),
inputs
.
end
(),
by
(
std
::
less
<>
{},
[](
const
auto
&
s
)
{
return
s
.
elements
();
}))
->
elements
();
std
::
size_t
max_global
=
ctx
.
get_current_device
().
get_cu_count
()
*
ctx
.
get_current_device
().
get_max_workitems_per_cu
();
std
::
size_t
over
=
n
/
max_global
;
bool
broadcasted
=
std
::
any_of
(
inputs
.
begin
(),
inputs
.
end
(),
[](
const
auto
&
s
)
{
return
s
.
broadcasted
();
});
std
::
vector
<
std
::
size_t
>
sizes
;
if
(
broadcasted
and
over
>
8
)
sizes
.
push_back
(
8
);
if
(
over
>
4
)
sizes
.
push_back
(
4
);
sizes
.
push_back
(
2
);
return
elements
(
axis
,
inputs
,
sizes
);
}
vectorize
vectorize
::
elements
(
std
::
size_t
axis
,
const
std
::
vector
<
shape
>&
inputs
)
{
return
elements
(
axis
,
inputs
,
vector_sizes
(
inputs
));
}
std
::
string
vectorize
::
str
()
const
std
::
string
vectorize
::
str
()
const
{
{
return
"vectorize<"
+
to_string
(
size
)
+
", "
+
to_string
(
axis
)
+
">()"
;
return
"vectorize<"
+
to_string
(
size
)
+
", "
+
to_string
(
axis
)
+
">()"
;
...
@@ -102,7 +131,7 @@ preload preload::broadcasts(std::size_t axis, const std::vector<shape>& inputs)
...
@@ -102,7 +131,7 @@ preload preload::broadcasts(std::size_t axis, const std::vector<shape>& inputs)
std
::
size_t
bytes
=
0
;
std
::
size_t
bytes
=
0
;
for
(
auto
i
:
preloaded
)
for
(
auto
i
:
preloaded
)
{
{
auto
input
=
inputs
[
i
];
const
auto
&
input
=
inputs
[
i
];
bytes
+=
input
.
bytes
();
bytes
+=
input
.
bytes
();
if
(
bytes
>
max_lds_bytes
)
if
(
bytes
>
max_lds_bytes
)
break
;
break
;
...
...
src/targets/gpu/compile_hip_code_object.cpp
View file @
5a14c0bf
...
@@ -138,16 +138,16 @@ compute_global_for(context& ctx, std::size_t n, std::size_t over)
...
@@ -138,16 +138,16 @@ compute_global_for(context& ctx, std::size_t n, std::size_t over)
std
::
size_t
groups
=
(
n
+
local
-
1
)
/
local
;
std
::
size_t
groups
=
(
n
+
local
-
1
)
/
local
;
std
::
size_t
max_blocks
=
max_global
/
local
;
std
::
size_t
max_blocks
=
max_global
/
local
;
std
::
size_t
nglobal
=
std
::
min
(
max_blocks
*
over
,
groups
)
*
local
;
std
::
size_t
nglobal
=
std
::
min
(
max_blocks
*
over
,
groups
)
*
local
;
return
nglobal
;
return
std
::
min
(
nglobal
,
n
)
;
};
};
}
}
std
::
size_t
compute_block_size
(
std
::
size_t
n
,
std
::
size_t
max_block_size
)
std
::
size_t
compute_block_size
(
std
::
size_t
n
,
std
::
size_t
max_block_size
)
{
{
size_t
block_size
=
128
;
const
std
::
size_t
min_
block_size
=
64
;
while
(
block_size
<=
max_block_size
and
block_size
<
=
n
)
const
std
::
size_t
base_
block_size
=
32
;
block_size
*=
2
;
auto
block_size
=
(((
n
-
1
)
/
base_block_size
+
1
))
*
base_block_size
;
return
block_size
/
2
;
return
std
::
min
(
std
::
max
(
min_block_size
,
block_size
),
max_block_size
)
;
}
}
operation
compile_hip_code_object
(
const
std
::
string
&
content
,
hip_compile_options
options
)
operation
compile_hip_code_object
(
const
std
::
string
&
content
,
hip_compile_options
options
)
...
...
src/targets/gpu/concat.cpp
deleted
100644 → 0
View file @
cb01e280
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/gpu/concat.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/device/concat.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
shape
hip_concat
::
compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
{
inputs
.
pop_back
();
return
op
.
normalize_compute_shape
(
inputs
);
}
argument
hip_concat
::
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
{
std
::
vector
<
std
::
size_t
>
offsets
=
op
.
compute_offsets
(
output_shape
,
args
);
return
device
::
concat
(
ctx
.
get_stream
().
get
(),
output_shape
,
args
,
offsets
);
}
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/convert.cpp
deleted
100644 → 0
View file @
cb01e280
This diff is collapsed.
Click to expand it.
Prev
1
2
3
4
5
6
7
…
16
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment