Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
99604c1c
Commit
99604c1c
authored
Oct 07, 2022
by
Alan Turner
Browse files
Merge remote-tracking branch 'origin/develop' into ck-poc3
parents
78a300ff
f7d987ba
Changes
250
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
344 additions
and
606 deletions
+344
-606
src/onnx/parse_batchnorm.cpp
src/onnx/parse_batchnorm.cpp
+49
-14
src/opt/memory_coloring_impl.cpp
src/opt/memory_coloring_impl.cpp
+7
-7
src/program.cpp
src/program.cpp
+53
-40
src/py/migraphx_py.cpp
src/py/migraphx_py.cpp
+17
-0
src/replace_allocate.cpp
src/replace_allocate.cpp
+1
-1
src/simplify_algebra.cpp
src/simplify_algebra.cpp
+20
-4
src/targets/cpu/CMakeLists.txt
src/targets/cpu/CMakeLists.txt
+2
-0
src/targets/cpu/fmod.cpp
src/targets/cpu/fmod.cpp
+6
-12
src/targets/cpu/lowering.cpp
src/targets/cpu/lowering.cpp
+2
-0
src/targets/cpu/mod.cpp
src/targets/cpu/mod.cpp
+6
-12
src/targets/gpu/CMakeLists.txt
src/targets/gpu/CMakeLists.txt
+13
-130
src/targets/gpu/clip.cpp
src/targets/gpu/clip.cpp
+0
-46
src/targets/gpu/compile_gen.cpp
src/targets/gpu/compile_gen.cpp
+32
-3
src/targets/gpu/concat.cpp
src/targets/gpu/concat.cpp
+0
-48
src/targets/gpu/convert.cpp
src/targets/gpu/convert.cpp
+0
-47
src/targets/gpu/convolution.cpp
src/targets/gpu/convolution.cpp
+136
-49
src/targets/gpu/device/acos.cpp
src/targets/gpu/device/acos.cpp
+0
-41
src/targets/gpu/device/acosh.cpp
src/targets/gpu/device/acosh.cpp
+0
-41
src/targets/gpu/device/add.cpp
src/targets/gpu/device/add.cpp
+0
-50
src/targets/gpu/device/add_clip.cpp
src/targets/gpu/device/add_clip.cpp
+0
-61
No files found.
src/onnx/parse_batchnorm.cpp
View file @
99604c1c
...
@@ -24,7 +24,7 @@
...
@@ -24,7 +24,7 @@
#include <migraphx/onnx/op_parser.hpp>
#include <migraphx/onnx/op_parser.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/
op/batch_norm_inference
.hpp>
#include <migraphx/
instruction
.hpp>
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
...
@@ -36,28 +36,63 @@ struct parse_batchnorm : op_parser<parse_batchnorm>
...
@@ -36,28 +36,63 @@ struct parse_batchnorm : op_parser<parse_batchnorm>
instruction_ref
parse
(
const
op_desc
&
/*opd*/
,
instruction_ref
parse
(
const
op_desc
&
/*opd*/
,
const
onnx_parser
&
parser
,
const
onnx_parser
&
parser
,
onnx_parser
::
node_info
info
,
const
onnx_parser
::
node_info
&
info
,
const
std
::
vector
<
instruction_ref
>
&
args
)
const
std
::
vector
<
instruction_ref
>
args
)
const
{
{
float
epsilon
=
1e-5
f
;
float
epsilon
=
1e-5
f
;
float
momentum
=
0.9
f
;
op
::
batch_norm_inference
::
bn_infer_mode_t
bn_mode
=
op
::
batch_norm_inference
::
spatial
;
if
(
contains
(
info
.
attributes
,
"epsilon"
))
if
(
contains
(
info
.
attributes
,
"epsilon"
))
{
{
epsilon
=
parser
.
parse_value
(
info
.
attributes
.
at
(
"epsilon"
)).
at
<
float
>
();
epsilon
=
parser
.
parse_value
(
info
.
attributes
.
at
(
"epsilon"
)).
at
<
float
>
();
}
}
if
(
contains
(
info
.
attributes
,
"momentum"
))
auto
x_lens
=
args
[
0
]
->
get_shape
().
lens
();
auto
x_type
=
args
[
0
]
->
get_shape
().
type
();
if
(
std
::
any_of
(
args
.
cbegin
()
+
1
,
args
.
cend
(),
[](
auto
a
)
{
return
a
->
get_shape
().
lens
().
size
()
!=
1
;
}))
{
MIGRAPHX_THROW
(
"PARSE_BATCHNORM: argument scale, bias, mean, or var rank != 1"
);
}
if
(
x_lens
.
size
()
==
1
)
{
auto
rt
=
info
.
add_literal
(
migraphx
::
literal
{
migraphx
::
shape
{
x_type
},
{
0.5
}});
auto
eps
=
info
.
add_literal
(
migraphx
::
literal
{
migraphx
::
shape
{
x_type
},
{
epsilon
}});
auto
n0
=
info
.
add_broadcastable_binary_op
(
"sub"
,
args
[
0
],
args
[
3
]);
auto
d0
=
info
.
add_broadcastable_binary_op
(
"add"
,
args
[
4
],
eps
);
auto
d1
=
info
.
add_broadcastable_binary_op
(
"pow"
,
d0
,
rt
);
auto
div0
=
info
.
add_broadcastable_binary_op
(
"div"
,
n0
,
d1
);
auto
r0
=
info
.
add_broadcastable_binary_op
(
"mul"
,
div0
,
args
[
1
]);
return
info
.
add_broadcastable_binary_op
(
"add"
,
r0
,
args
[
2
]);
}
else
if
(
x_lens
.
size
()
>
2
)
{
{
momentum
=
parser
.
parse_value
(
info
.
attributes
.
at
(
"momentum"
)).
at
<
float
>
();
// unsqueeze tensors of shape (C) to broadcast correctly
std
::
vector
<
int64_t
>
unsqueeze_axes
(
x_lens
.
size
()
-
2
);
std
::
iota
(
unsqueeze_axes
.
begin
(),
unsqueeze_axes
.
end
(),
1
);
auto
rt
=
info
.
add_literal
(
migraphx
::
literal
{
migraphx
::
shape
{
x_type
},
{
0.5
}});
auto
eps
=
info
.
add_literal
(
migraphx
::
literal
{
migraphx
::
shape
{
x_type
},
{
epsilon
}});
auto
scale_unsqueeze
=
info
.
add_instruction
(
migraphx
::
make_op
(
"unsqueeze"
,
{{
"axes"
,
unsqueeze_axes
}}),
args
[
1
]);
auto
bias_unsqueeze
=
info
.
add_instruction
(
migraphx
::
make_op
(
"unsqueeze"
,
{{
"axes"
,
unsqueeze_axes
}}),
args
[
2
]);
auto
mean_unsqueeze
=
info
.
add_instruction
(
migraphx
::
make_op
(
"unsqueeze"
,
{{
"axes"
,
unsqueeze_axes
}}),
args
[
3
]);
auto
var_unsqueeze
=
info
.
add_instruction
(
migraphx
::
make_op
(
"unsqueeze"
,
{{
"axes"
,
unsqueeze_axes
}}),
args
[
4
]);
auto
numer
=
info
.
add_broadcastable_binary_op
(
"sub"
,
args
[
0
],
mean_unsqueeze
);
auto
var_eps
=
info
.
add_broadcastable_binary_op
(
"add"
,
var_unsqueeze
,
eps
);
auto
denom
=
info
.
add_broadcastable_binary_op
(
"pow"
,
var_eps
,
rt
);
auto
div0
=
info
.
add_broadcastable_binary_op
(
"div"
,
numer
,
denom
);
auto
r0
=
info
.
add_broadcastable_binary_op
(
"mul"
,
div0
,
scale_unsqueeze
);
return
info
.
add_broadcastable_binary_op
(
"add"
,
r0
,
bias_unsqueeze
);
}
}
if
(
contains
(
info
.
attributes
,
"spatial"
))
else
{
{
bn_mode
=
(
parser
.
parse_value
(
info
.
attributes
.
at
(
"spatial"
)).
at
<
uint64_t
>
()
>
0
)
// num dims either 0 or 2
?
op
::
batch_norm_inference
::
spatial
MIGRAPHX_THROW
(
"PARSE_BATCHNORM: rank "
+
std
::
to_string
(
x_lens
.
size
())
+
:
op
::
batch_norm_inference
::
per_activation
;
" input tensor, unhandled data format"
)
;
}
}
op
::
batch_norm_inference
op
{
epsilon
,
momentum
,
bn_mode
};
return
info
.
add_instruction
(
op
,
args
);
}
}
};
};
...
...
src/opt/memory_coloring_impl.cpp
View file @
99604c1c
...
@@ -72,7 +72,7 @@ bool memory_coloring_impl::allocate(interval_ptr interval)
...
@@ -72,7 +72,7 @@ bool memory_coloring_impl::allocate(interval_ptr interval)
if
(
conflict_table
.
find
(
vn
)
!=
conflict_table
.
end
())
if
(
conflict_table
.
find
(
vn
)
!=
conflict_table
.
end
())
{
{
std
::
set
<
int
>&
vn_set
=
conflict_table
[
vn
];
const
std
::
set
<
int
>&
vn_set
=
conflict_table
[
vn
];
for
(
const
auto
&
iter
:
vn_set
)
for
(
const
auto
&
iter
:
vn_set
)
{
{
live_range
*
range
=
live_ranges
[
iter
];
live_range
*
range
=
live_ranges
[
iter
];
...
@@ -267,8 +267,8 @@ void memory_coloring_impl::verify()
...
@@ -267,8 +267,8 @@ void memory_coloring_impl::verify()
{
{
for
(
int
i
=
0
;
i
<
num_of_lives
;
++
i
)
for
(
int
i
=
0
;
i
<
num_of_lives
;
++
i
)
{
{
live_interval
&
interval
=
live_intervals
[
i
];
const
live_interval
&
interval
=
live_intervals
[
i
];
live_range
&
segment
=
interval
.
segment
;
const
live_range
&
segment
=
interval
.
segment
;
if
(
segment
.
begin
==
invalid_offset
)
if
(
segment
.
begin
==
invalid_offset
)
{
{
...
@@ -284,7 +284,7 @@ void memory_coloring_impl::verify()
...
@@ -284,7 +284,7 @@ void memory_coloring_impl::verify()
int
vn
=
segment
.
vn
;
int
vn
=
segment
.
vn
;
if
(
conflict_table
.
find
(
vn
)
!=
conflict_table
.
end
())
if
(
conflict_table
.
find
(
vn
)
!=
conflict_table
.
end
())
{
{
std
::
set
<
int
>&
vn_set
=
conflict_table
[
vn
];
const
std
::
set
<
int
>&
vn_set
=
conflict_table
[
vn
];
for
(
const
auto
&
iter
:
vn_set
)
for
(
const
auto
&
iter
:
vn_set
)
{
{
live_range
*
range
=
live_ranges
[
iter
];
live_range
*
range
=
live_ranges
[
iter
];
...
@@ -319,8 +319,8 @@ void memory_coloring_impl::dump_intervals()
...
@@ -319,8 +319,8 @@ void memory_coloring_impl::dump_intervals()
{
{
std
::
cout
<<
" segment:"
<<
i
;
std
::
cout
<<
" segment:"
<<
i
;
std
::
cout
<<
" =>"
;
std
::
cout
<<
" =>"
;
std
::
set
<
int
>&
table
=
conflict_table
[
i
];
const
std
::
set
<
int
>&
table
=
conflict_table
[
i
];
for
(
auto
&
iter
:
table
)
for
(
const
auto
&
iter
:
table
)
{
{
std
::
cout
<<
(
iter
)
<<
","
;
std
::
cout
<<
(
iter
)
<<
","
;
}
}
...
@@ -357,7 +357,7 @@ void live_interval::dump()
...
@@ -357,7 +357,7 @@ void live_interval::dump()
std
::
cout
<<
"id:"
<<
id
;
std
::
cout
<<
"id:"
<<
id
;
segment
.
dump
();
segment
.
dump
();
std
::
cout
<<
" uses:"
;
std
::
cout
<<
" uses:"
;
for
(
auto
&
iter
:
use_points
)
for
(
const
auto
&
iter
:
use_points
)
{
{
std
::
cout
<<
" "
<<
get_ins_enum
(
iter
)
<<
","
;
std
::
cout
<<
" "
<<
get_ins_enum
(
iter
)
<<
","
;
}
}
...
...
src/program.cpp
View file @
99604c1c
...
@@ -398,7 +398,7 @@ std::vector<argument> generic_eval(const program& p,
...
@@ -398,7 +398,7 @@ std::vector<argument> generic_eval(const program& p,
return
generic_eval
(
mm
,
ctx
,
params
,
{},
make_trace
);
return
generic_eval
(
mm
,
ctx
,
params
,
{},
make_trace
);
}
}
std
::
vector
<
argument
>
program
::
eval
(
parameter_map
params
)
const
std
::
vector
<
argument
>
program
::
eval
(
parameter_map
params
,
execution_environment
exec_env
)
const
{
{
auto
&
ctx
=
this
->
impl
->
ctx
;
auto
&
ctx
=
this
->
impl
->
ctx
;
#ifndef NDEBUG
#ifndef NDEBUG
...
@@ -423,6 +423,12 @@ std::vector<argument> program::eval(parameter_map params) const
...
@@ -423,6 +423,12 @@ std::vector<argument> program::eval(parameter_map params) const
#endif
#endif
auto
trace_level
=
value_of
(
MIGRAPHX_TRACE_EVAL
{});
auto
trace_level
=
value_of
(
MIGRAPHX_TRACE_EVAL
{});
std
::
vector
<
argument
>
ret
;
if
(
exec_env
.
async
)
{
ctx
.
wait_for
(
exec_env
.
queue
);
}
if
(
trace_level
>
0
)
if
(
trace_level
>
0
)
{
{
...
@@ -434,49 +440,56 @@ std::vector<argument> program::eval(parameter_map params) const
...
@@ -434,49 +440,56 @@ std::vector<argument> program::eval(parameter_map params) const
ins_out
[
x
]
=
ss
.
str
();
ins_out
[
x
]
=
ss
.
str
();
});
});
ret
urn
generic_eval
(
*
this
,
ret
=
generic_eval
(
*
this
,
ctx
,
ctx
,
std
::
move
(
params
),
std
::
move
(
params
),
with_check_context
([
&
](
auto
&
ins
,
auto
f
,
auto
&&
check_context
)
{
with_check_context
([
&
](
auto
&
ins
,
auto
f
,
auto
&&
check_context
)
{
ctx
.
finish
();
ctx
.
finish
();
std
::
cout
<<
"Run instruction: "
<<
ins_out
.
at
(
ins
)
<<
std
::
endl
;
std
::
cout
<<
"Run instruction: "
<<
ins_out
.
at
(
ins
)
<<
std
::
endl
;
timer
t
{};
timer
t
{};
auto
result
=
check_context
(
f
);
auto
result
=
check_context
(
f
);
double
t1
=
t
.
record
<
milliseconds
>
();
double
t1
=
t
.
record
<
milliseconds
>
();
ctx
.
finish
();
ctx
.
finish
();
double
t2
=
t
.
record
<
milliseconds
>
();
double
t2
=
t
.
record
<
milliseconds
>
();
std
::
cout
<<
"Time: "
<<
t1
<<
"ms, "
<<
t2
<<
"ms"
<<
std
::
endl
;
std
::
cout
<<
"Time: "
<<
t1
<<
"ms, "
<<
t2
<<
"ms"
<<
std
::
endl
;
if
(
trace_level
>
1
and
ins
->
name
().
front
()
!=
'@'
and
if
(
trace_level
>
1
and
ins
->
name
().
front
()
!=
'@'
and
ins
->
name
()
!=
"load"
and
not
result
.
empty
())
ins
->
name
()
!=
"load"
and
not
result
.
empty
())
{
{
target
tgt
=
make_target
(
this
->
impl
->
target_name
);
target
tgt
=
make_target
(
this
->
impl
->
target_name
);
auto
buffer
=
tgt
.
copy_from
(
result
);
auto
buffer
=
tgt
.
copy_from
(
result
);
if
(
trace_level
==
2
)
if
(
trace_level
==
2
)
{
{
std
::
cout
<<
"Output has "
std
::
cout
<<
"Output has "
<<
to_string_range
(
classify_argument
(
buffer
))
<<
to_string_range
(
classify_argument
(
buffer
))
<<
std
::
endl
;
<<
std
::
endl
;
std
::
cout
<<
"Output: "
;
std
::
cout
<<
"Output: "
;
preview_argument
(
std
::
cout
,
buffer
);
preview_argument
(
std
::
cout
,
buffer
);
std
::
cout
<<
std
::
endl
;
std
::
cout
<<
std
::
endl
;
}
}
else
else
{
{
std
::
cout
<<
"Output: "
<<
buffer
<<
std
::
endl
;
std
::
cout
<<
"Output: "
<<
buffer
<<
std
::
endl
;
}
}
}
}
return
result
;
return
result
;
}));
}));
}
}
else
else
{
{
ret
urn
generic_eval
(
*
this
,
ret
=
generic_eval
(
*
this
,
ctx
,
ctx
,
std
::
move
(
params
),
std
::
move
(
params
),
with_check_context
([
&
](
auto
&
,
auto
f
,
auto
&&
check_context
)
{
with_check_context
([
&
](
auto
&
,
auto
f
,
auto
&&
check_context
)
{
return
check_context
(
f
);
return
check_context
(
f
);
}));
}));
}
}
if
(
exec_env
.
async
)
{
ctx
.
finish_on
(
exec_env
.
queue
);
}
return
ret
;
}
}
const
int
program_file_version
=
5
;
const
int
program_file_version
=
5
;
...
...
src/py/migraphx_py.cpp
View file @
99604c1c
...
@@ -355,6 +355,23 @@ MIGRAPHX_PYBIND11_MODULE(migraphx, m)
...
@@ -355,6 +355,23 @@ MIGRAPHX_PYBIND11_MODULE(migraphx, m)
}
}
return
p
.
eval
(
pm
);
return
p
.
eval
(
pm
);
})
})
.
def
(
"run_async"
,
[](
migraphx
::
program
&
p
,
py
::
dict
params
,
std
::
uintptr_t
stream
,
std
::
string
stream_name
)
{
migraphx
::
parameter_map
pm
;
for
(
auto
x
:
params
)
{
std
::
string
key
=
x
.
first
.
cast
<
std
::
string
>
();
py
::
buffer
b
=
x
.
second
.
cast
<
py
::
buffer
>
();
py
::
buffer_info
info
=
b
.
request
();
pm
[
key
]
=
migraphx
::
argument
(
to_shape
(
info
),
info
.
ptr
);
}
migraphx
::
execution_environment
exec_env
{
migraphx
::
any_ptr
(
reinterpret_cast
<
void
*>
(
stream
),
stream_name
),
true
};
return
p
.
eval
(
pm
,
exec_env
);
})
.
def
(
"sort"
,
&
migraphx
::
program
::
sort
)
.
def
(
"sort"
,
&
migraphx
::
program
::
sort
)
.
def
(
"print"
,
[](
const
migraphx
::
program
&
p
)
{
std
::
cout
<<
p
<<
std
::
endl
;
})
.
def
(
"print"
,
[](
const
migraphx
::
program
&
p
)
{
std
::
cout
<<
p
<<
std
::
endl
;
})
.
def
(
"__eq__"
,
std
::
equal_to
<
migraphx
::
program
>
{})
.
def
(
"__eq__"
,
std
::
equal_to
<
migraphx
::
program
>
{})
...
...
src/replace_allocate.cpp
View file @
99604c1c
...
@@ -73,7 +73,7 @@ void insert_submod_allocations(instruction_ref ins, module& mod, const allocatio
...
@@ -73,7 +73,7 @@ void insert_submod_allocations(instruction_ref ins, module& mod, const allocatio
name_shapes
.
insert
(
ps
.
begin
(),
ps
.
end
());
name_shapes
.
insert
(
ps
.
begin
(),
ps
.
end
());
}
}
for
(
auto
&
pn
:
name_shapes
)
for
(
const
auto
&
pn
:
name_shapes
)
{
{
const
auto
&
s
=
pn
.
second
;
const
auto
&
s
=
pn
.
second
;
instruction_ref
output
{};
instruction_ref
output
{};
...
...
src/simplify_algebra.cpp
View file @
99604c1c
...
@@ -435,6 +435,24 @@ struct find_concat_op
...
@@ -435,6 +435,24 @@ struct find_concat_op
}
}
};
};
void
move_instructions_back
(
module
&
m
,
instruction_ref
pos
,
std
::
vector
<
instruction_ref
>
inss
)
{
auto
start
=
range
(
m
.
begin
(),
pos
);
for
(
auto
ins
:
iterator_for
(
start
))
{
auto
it
=
std
::
find
(
inss
.
begin
(),
inss
.
end
(),
ins
);
if
(
it
!=
inss
.
end
())
inss
.
erase
(
it
);
}
for
(
auto
ins
:
inss
)
{
if
(
not
m
.
has_instruction
(
ins
))
continue
;
move_instructions_back
(
m
,
pos
,
ins
->
inputs
());
m
.
move_instruction
(
ins
,
pos
);
}
}
std
::
vector
<
instruction_ref
>
get_splits
(
instruction_ref
ins
)
std
::
vector
<
instruction_ref
>
get_splits
(
instruction_ref
ins
)
{
{
std
::
vector
<
instruction_ref
>
result
;
std
::
vector
<
instruction_ref
>
result
;
...
@@ -610,8 +628,7 @@ struct find_splits
...
@@ -610,8 +628,7 @@ struct find_splits
}))
}))
return
;
return
;
for
(
auto
data
:
data_args
)
move_instructions_back
(
m
,
ins
,
data_args
);
m
.
move_instructions
(
data
,
ins
);
auto
slice_op
=
any_cast
<
op
::
slice
>
(
splits
.
front
()
->
get_operator
());
auto
slice_op
=
any_cast
<
op
::
slice
>
(
splits
.
front
()
->
get_operator
());
assert
(
not
slice_op
.
axes
.
empty
());
assert
(
not
slice_op
.
axes
.
empty
());
...
@@ -864,8 +881,7 @@ struct find_conv_dot_horiz_fusion
...
@@ -864,8 +881,7 @@ struct find_conv_dot_horiz_fusion
concat_axis
=
axis
;
concat_axis
=
axis
;
}
}
for
(
auto
arg
:
args
)
move_instructions_back
(
m
,
input
,
args
);
m
.
move_instructions
(
arg
,
input
);
// TODO: Check if axes match
// TODO: Check if axes match
auto
concat
=
auto
concat
=
m
.
insert_instruction
(
input
,
make_op
(
"concat"
,
{{
"axis"
,
concat_axis
}}),
args
);
m
.
insert_instruction
(
input
,
make_op
(
"concat"
,
{{
"axis"
,
concat_axis
}}),
args
);
...
...
src/targets/cpu/CMakeLists.txt
View file @
99604c1c
...
@@ -35,6 +35,7 @@ add_library(migraphx_cpu
...
@@ -35,6 +35,7 @@ add_library(migraphx_cpu
dnnl.cpp
dnnl.cpp
eltwise.cpp
eltwise.cpp
erf.cpp
erf.cpp
fmod.cpp
fuse_ops.cpp
fuse_ops.cpp
gather.cpp
gather.cpp
gemm.cpp
gemm.cpp
...
@@ -42,6 +43,7 @@ add_library(migraphx_cpu
...
@@ -42,6 +43,7 @@ add_library(migraphx_cpu
logsoftmax.cpp
logsoftmax.cpp
lowering.cpp
lowering.cpp
lrn.cpp
lrn.cpp
mod.cpp
preallocate.cpp
preallocate.cpp
pooling.cpp
pooling.cpp
reduction.cpp
reduction.cpp
...
...
src/targets/
gpu/include/migraphx/gpu/cos.h
pp
→
src/targets/
cpu/fmod.c
pp
View file @
99604c1c
...
@@ -21,22 +21,16 @@
...
@@ -21,22 +21,16 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* THE SOFTWARE.
*/
*/
#ifndef MIGRAPHX_GUARD_RTGLIB_COS_HPP
#include <migraphx/config.hpp>
#define MIGRAPHX_GUARD_RTGLIB_COS_HPP
#include <migraphx/cpu/pointwise.hpp>
#include <migraphx/op/fmod.hpp>
#include <migraphx/gpu/oper.hpp>
#include <migraphx/gpu/device/cos.hpp>
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
g
pu
{
namespace
c
pu
{
struct
hip_cos
:
unary_device
<
hip_cos
,
device
::
cos
>
template
struct
cpu_binary
<
op
::
fmod
>;
{
};
}
// namespace
g
pu
}
// namespace
c
pu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
}
// namespace migraphx
#endif
src/targets/cpu/lowering.cpp
View file @
99604c1c
...
@@ -43,6 +43,8 @@
...
@@ -43,6 +43,8 @@
#include <migraphx/op/argmax.hpp>
#include <migraphx/op/argmax.hpp>
#include <migraphx/op/argmin.hpp>
#include <migraphx/op/argmin.hpp>
#include <migraphx/op/rnn_var_sl_last_output.hpp>
#include <migraphx/op/rnn_var_sl_last_output.hpp>
#include <migraphx/op/mod.hpp>
#include <migraphx/op/fmod.hpp>
#include <migraphx/shape_for_each.hpp>
#include <migraphx/shape_for_each.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/par_dfor.hpp>
#include <migraphx/par_dfor.hpp>
...
...
src/targets/
g
pu/
include/migraphx/gpu/exp.h
pp
→
src/targets/
c
pu/
mod.c
pp
View file @
99604c1c
...
@@ -21,22 +21,16 @@
...
@@ -21,22 +21,16 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* THE SOFTWARE.
*/
*/
#ifndef MIGRAPHX_GUARD_RTGLIB_EXP_HPP
#include <migraphx/config.hpp>
#define MIGRAPHX_GUARD_RTGLIB_EXP_HPP
#include <migraphx/cpu/pointwise.hpp>
#include <migraphx/op/mod.hpp>
#include <migraphx/gpu/oper.hpp>
#include <migraphx/gpu/device/exp.hpp>
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
g
pu
{
namespace
c
pu
{
struct
hip_exp
:
unary_device
<
hip_exp
,
device
::
exp
>
template
struct
cpu_binary
<
op
::
mod
>;
{
};
}
// namespace
g
pu
}
// namespace
c
pu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
}
// namespace migraphx
#endif
src/targets/gpu/CMakeLists.txt
View file @
99604c1c
...
@@ -41,81 +41,9 @@ file(GLOB KERNEL_FILES ${CONFIGURE_DEPENDS}
...
@@ -41,81 +41,9 @@ file(GLOB KERNEL_FILES ${CONFIGURE_DEPENDS}
message
(
STATUS
"KERNEL_FILES:
${
KERNEL_FILES
}
"
)
message
(
STATUS
"KERNEL_FILES:
${
KERNEL_FILES
}
"
)
add_embed_library
(
migraphx_kernels
${
KERNEL_FILES
}
)
add_embed_library
(
migraphx_kernels
${
KERNEL_FILES
}
)
add_library
(
migraphx_device
file
(
GLOB DEVICE_GPU_SRCS
${
CONFIGURE_DEPENDS
}
${
CMAKE_CURRENT_SOURCE_DIR
}
/device/*.cpp
)
device/acos.cpp
add_library
(
migraphx_device
${
DEVICE_GPU_SRCS
}
)
device/acosh.cpp
device/add.cpp
device/add_clip.cpp
device/add_relu.cpp
device/add_sigmoid.cpp
device/add_tanh.cpp
device/argmax.cpp
device/argmin.cpp
device/asin.cpp
device/asinh.cpp
device/atan.cpp
device/atanh.cpp
device/ceil.cpp
device/clip.cpp
device/concat.cpp
device/contiguous.cpp
device/convert.cpp
device/cos.cpp
device/cosh.cpp
device/div.cpp
device/equal.cpp
device/erf.cpp
device/exp.cpp
device/fill.cpp
device/floor.cpp
device/gather.cpp
device/gelu.cpp
device/greater.cpp
device/int8_gemm_pack.cpp
device/layernorm.cpp
device/less.cpp
device/log.cpp
device/logical_and.cpp
device/logical_or.cpp
device/logical_xor.cpp
device/logsoftmax.cpp
device/max.cpp
device/min.cpp
device/mul.cpp
device/mul_add.cpp
device/mul_add_relu.cpp
device/multinomial.cpp
device/nonzero.cpp
device/pad.cpp
device/pow.cpp
device/prelu.cpp
device/prefix_scan_sum.cpp
device/recip.cpp
device/reduce_max.cpp
device/reduce_mean.cpp
device/reduce_min.cpp
device/reduce_sum.cpp
device/reduce_prod.cpp
device/relu.cpp
device/reverse.cpp
device/rnn_variable_seq_lens.cpp
device/round.cpp
device/rsqrt.cpp
device/scatter.cpp
device/sigmoid.cpp
device/sign.cpp
device/sin.cpp
device/sinh.cpp
device/softmax.cpp
device/sqdiff.cpp
device/sqrt.cpp
device/sub.cpp
device/tan.cpp
device/tanh.cpp
device/topk.cpp
device/unary_not.cpp
device/where.cpp
)
add_library
(
compile_for_gpu INTERFACE
)
add_library
(
compile_for_gpu INTERFACE
)
target_compile_options
(
compile_for_gpu INTERFACE -std=c++17 -fno-gpu-rdc -Wno-cuda-compat -Wno-unused-command-line-argument -Xclang -fallow-half-arguments-and-returns
)
target_compile_options
(
compile_for_gpu INTERFACE -std=c++17 -fno-gpu-rdc -Wno-cuda-compat -Wno-unused-command-line-argument -Xclang -fallow-half-arguments-and-returns
)
target_link_libraries
(
compile_for_gpu INTERFACE hip::device -fno-gpu-rdc -Wno-invalid-command-line-argument -Wno-unused-command-line-argument -Wno-option-ignored
)
target_link_libraries
(
compile_for_gpu INTERFACE hip::device -fno-gpu-rdc -Wno-invalid-command-line-argument -Wno-unused-command-line-argument -Wno-option-ignored
)
...
@@ -153,15 +81,12 @@ add_library(migraphx_gpu
...
@@ -153,15 +81,12 @@ add_library(migraphx_gpu
argmax.cpp
argmax.cpp
argmin.cpp
argmin.cpp
batch_norm_inference.cpp
batch_norm_inference.cpp
clip.cpp
code_object_op.cpp
code_object_op.cpp
compile_ops.cpp
compile_ops.cpp
compile_gen.cpp
compile_gen.cpp
compile_hip.cpp
compile_hip.cpp
compile_hip_code_object.cpp
compile_hip_code_object.cpp
compiler.cpp
compiler.cpp
concat.cpp
convert.cpp
convolution.cpp
convolution.cpp
deconvolution.cpp
deconvolution.cpp
device_name.cpp
device_name.cpp
...
@@ -194,7 +119,6 @@ add_library(migraphx_gpu
...
@@ -194,7 +119,6 @@ add_library(migraphx_gpu
rocblas.cpp
rocblas.cpp
scatter.cpp
scatter.cpp
schedule_model.cpp
schedule_model.cpp
softmax.cpp
sync_device.cpp
sync_device.cpp
target.cpp
target.cpp
topk.cpp
topk.cpp
...
@@ -209,68 +133,18 @@ function(register_migraphx_gpu_ops PREFIX)
...
@@ -209,68 +133,18 @@ function(register_migraphx_gpu_ops PREFIX)
endforeach
()
endforeach
()
endfunction
()
endfunction
()
register_migraphx_gpu_ops
(
hip_
register_migraphx_gpu_ops
(
hip_
acosh
acos
add
argmax
argmax
argmin
argmin
asinh
asin
atanh
atan
ceil
clip
concat
convert
cosh
cos
div
equal
erf
exp
floor
gather
gather
greater
less
log
logsoftmax
logsoftmax
logical_and
logical_or
logical_xor
loop
loop
max
min
mul
multinomial
multinomial
nonzero
nonzero
pad
pad
pow
prelu
prefix_scan_sum
prefix_scan_sum
recip
reduce_max
reduce_mean
reduce_min
reduce_prod
reduce_sum
relu
reverse
reverse
round
rsqrt
scatter
scatter
sigmoid
sign
sinh
sin
softmax
sqdiff
sqrt
sub
tanh
tan
topk
topk
unary_not
where
)
)
register_migraphx_gpu_ops
(
miopen_
register_migraphx_gpu_ops
(
miopen_
abs
abs
...
@@ -372,9 +246,18 @@ endif()
...
@@ -372,9 +246,18 @@ endif()
include
(
CheckLibraryExists
)
include
(
CheckLibraryExists
)
get_target_property
(
MIOPEN_LOCATION MIOpen LOCATION
)
get_target_property
(
MIOPEN_LOCATION MIOpen LOCATION
)
check_library_exists
(
MIOpen
"miopenHiddenSetConvolutionFindMode"
"
${
MIOPEN_LOCATION
}
"
HAS_FIND_MODE_API
)
check_library_exists
(
MIOpen
"miopenHiddenSetConvolutionFindMode"
"
${
MIOPEN_LOCATION
}
"
HAS_FIND_MODE_API
)
check_library_exists
(
MIOpen
"miopenFindSolutions"
"
${
MIOPEN_LOCATION
}
"
HAS_FIND_2_API
)
if
(
HAS_FIND_2_API
)
target_compile_definitions
(
migraphx_gpu PUBLIC -DMIGRAPHX_HAS_FIND_2_API
)
message
(
STATUS
"MIGraphx is using Find-2.0 API of MIOpen"
)
else
()
message
(
STATUS
"MIOpen does not have Find-2.0 API"
)
endif
()
if
(
HAS_FIND_MODE_API
)
if
(
HAS_FIND_MODE_API
)
target_compile_definitions
(
migraphx_gpu PUBLIC -DMIGRAPHX_HAS_FIND_MODE_API
)
target_compile_definitions
(
migraphx_gpu PUBLIC -DMIGRAPHX_HAS_FIND_MODE_API
)
message
(
STATUS
"MI
Open has f
ind
m
ode
api
"
)
message
(
STATUS
"MI
Graphx is using F
ind
M
ode
API of MIOpen
"
)
else
()
else
()
message
(
STATUS
"MIOpen does not have find mode api"
)
message
(
STATUS
"MIOpen does not have find mode api"
)
endif
()
endif
()
...
...
src/targets/gpu/clip.cpp
deleted
100644 → 0
View file @
78a300ff
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/gpu/clip.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/device/clip.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
shape
hip_clip
::
compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
{
inputs
.
pop_back
();
return
op
.
compute_shape
(
inputs
);
}
argument
hip_clip
::
compute
(
context
&
ctx
,
const
shape
&
,
const
std
::
vector
<
argument
>&
args
)
const
{
device
::
clip
(
ctx
.
get_stream
().
get
(),
args
.
back
(),
args
.
front
(),
args
.
at
(
1
),
args
.
at
(
2
));
return
args
.
back
();
}
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/compile_gen.cpp
View file @
99604c1c
...
@@ -22,6 +22,7 @@
...
@@ -22,6 +22,7 @@
* THE SOFTWARE.
* THE SOFTWARE.
*/
*/
#include <migraphx/gpu/compile_gen.hpp>
#include <migraphx/gpu/compile_gen.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/shape.hpp>
#include <migraphx/shape.hpp>
#include <migraphx/permutation.hpp>
#include <migraphx/permutation.hpp>
#include <migraphx/stringutils.hpp>
#include <migraphx/stringutils.hpp>
...
@@ -48,12 +49,13 @@ static std::vector<std::size_t> vector_sizes(const std::vector<shape>& inputs)
...
@@ -48,12 +49,13 @@ static std::vector<std::size_t> vector_sizes(const std::vector<shape>& inputs)
return
{
4
,
2
};
return
{
4
,
2
};
}
}
vectorize
vectorize
::
elements
(
std
::
size_t
axis
,
const
std
::
vector
<
shape
>&
inputs
)
vectorize
vectorize
::
elements
(
std
::
size_t
axis
,
const
std
::
vector
<
shape
>&
inputs
,
const
std
::
vector
<
std
::
size_t
>&
sizes
)
{
{
if
(
std
::
all_of
(
if
(
std
::
all_of
(
inputs
.
begin
(),
inputs
.
end
(),
[
&
](
const
auto
&
s
)
{
return
s
.
lens
()[
axis
]
==
1
;
}))
inputs
.
begin
(),
inputs
.
end
(),
[
&
](
const
auto
&
s
)
{
return
s
.
lens
()[
axis
]
==
1
;
}))
return
{
1
,
axis
};
return
{
1
,
axis
};
auto
sizes
=
vector_sizes
(
inputs
);
std
::
vector
<
std
::
size_t
>
max_vec_size
;
std
::
vector
<
std
::
size_t
>
max_vec_size
;
std
::
transform
(
inputs
.
begin
(),
std
::
transform
(
inputs
.
begin
(),
inputs
.
end
(),
inputs
.
end
(),
...
@@ -81,6 +83,33 @@ vectorize vectorize::elements(std::size_t axis, const std::vector<shape>& inputs
...
@@ -81,6 +83,33 @@ vectorize vectorize::elements(std::size_t axis, const std::vector<shape>& inputs
return
{
*
std
::
min_element
(
max_vec_size
.
begin
(),
max_vec_size
.
end
()),
axis
};
return
{
*
std
::
min_element
(
max_vec_size
.
begin
(),
max_vec_size
.
end
()),
axis
};
}
}
vectorize
vectorize
::
elements
(
context
&
ctx
,
std
::
size_t
axis
,
const
std
::
vector
<
shape
>&
inputs
)
{
if
(
inputs
.
empty
())
return
{
1
,
axis
};
std
::
size_t
n
=
std
::
max_element
(
inputs
.
begin
(),
inputs
.
end
(),
by
(
std
::
less
<>
{},
[](
const
auto
&
s
)
{
return
s
.
elements
();
}))
->
elements
();
std
::
size_t
max_global
=
ctx
.
get_current_device
().
get_cu_count
()
*
ctx
.
get_current_device
().
get_max_workitems_per_cu
();
std
::
size_t
over
=
n
/
max_global
;
bool
broadcasted
=
std
::
any_of
(
inputs
.
begin
(),
inputs
.
end
(),
[](
const
auto
&
s
)
{
return
s
.
broadcasted
();
});
std
::
vector
<
std
::
size_t
>
sizes
;
if
(
broadcasted
and
over
>
8
)
sizes
.
push_back
(
8
);
if
(
over
>
4
)
sizes
.
push_back
(
4
);
sizes
.
push_back
(
2
);
return
elements
(
axis
,
inputs
,
sizes
);
}
vectorize
vectorize
::
elements
(
std
::
size_t
axis
,
const
std
::
vector
<
shape
>&
inputs
)
{
return
elements
(
axis
,
inputs
,
vector_sizes
(
inputs
));
}
std
::
string
vectorize
::
str
()
const
std
::
string
vectorize
::
str
()
const
{
{
return
"vectorize<"
+
to_string
(
size
)
+
", "
+
to_string
(
axis
)
+
">()"
;
return
"vectorize<"
+
to_string
(
size
)
+
", "
+
to_string
(
axis
)
+
">()"
;
...
@@ -102,7 +131,7 @@ preload preload::broadcasts(std::size_t axis, const std::vector<shape>& inputs)
...
@@ -102,7 +131,7 @@ preload preload::broadcasts(std::size_t axis, const std::vector<shape>& inputs)
std
::
size_t
bytes
=
0
;
std
::
size_t
bytes
=
0
;
for
(
auto
i
:
preloaded
)
for
(
auto
i
:
preloaded
)
{
{
auto
input
=
inputs
[
i
];
const
auto
&
input
=
inputs
[
i
];
bytes
+=
input
.
bytes
();
bytes
+=
input
.
bytes
();
if
(
bytes
>
max_lds_bytes
)
if
(
bytes
>
max_lds_bytes
)
break
;
break
;
...
...
src/targets/gpu/concat.cpp
deleted
100644 → 0
View file @
78a300ff
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/gpu/concat.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/device/concat.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
shape
hip_concat
::
compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
{
inputs
.
pop_back
();
return
op
.
normalize_compute_shape
(
inputs
);
}
argument
hip_concat
::
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
{
std
::
vector
<
std
::
size_t
>
offsets
=
op
.
compute_offsets
(
output_shape
,
args
);
return
device
::
concat
(
ctx
.
get_stream
().
get
(),
output_shape
,
args
,
offsets
);
}
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/convert.cpp
deleted
100644 → 0
View file @
78a300ff
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/gpu/convert.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/device/convert.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
shape
hip_convert
::
compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
{
inputs
.
pop_back
();
check_shapes
{
inputs
,
*
this
}.
packed
();
return
op
.
compute_shape
(
inputs
);
}
argument
hip_convert
::
compute
(
context
&
ctx
,
const
shape
&
,
const
std
::
vector
<
argument
>&
args
)
const
{
device
::
convert
(
ctx
.
get_stream
().
get
(),
args
[
1
],
args
[
0
]);
return
args
[
1
];
}
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/convolution.cpp
View file @
99604c1c
...
@@ -24,6 +24,7 @@
...
@@ -24,6 +24,7 @@
#include <migraphx/gpu/convolution.hpp>
#include <migraphx/gpu/convolution.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/generate.hpp>
#include <migraphx/generate.hpp>
#include <miopen/miopen.h>
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
...
@@ -55,14 +56,40 @@ argument miopen_convolution::compute(context& ctx,
...
@@ -55,14 +56,40 @@ argument miopen_convolution::compute(context& ctx,
const
shape
&
output_shape
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
const
std
::
vector
<
argument
>&
args
)
const
{
{
auto
x_desc
=
make_tensor
(
reshape_if_1d
(
args
[
0
].
get_shape
()));
auto
x_desc
=
make_tensor
(
reshape_if_1d
(
args
[
0
].
get_shape
()));
auto
w_desc
=
make_tensor
(
reshape_if_1d
(
args
[
1
].
get_shape
()));
auto
w_desc
=
make_tensor
(
reshape_if_1d
(
args
[
1
].
get_shape
()));
auto
y_desc
=
make_tensor
(
reshape_if_1d
(
output_shape
));
auto
y_desc
=
make_tensor
(
reshape_if_1d
(
output_shape
));
auto
*
miopen_stream_handle
=
ctx
.
get_stream
().
get_miopen
();
auto
workspace_size
=
args
[
2
].
get_shape
().
bytes
();
#ifdef MIGRAPHX_HAS_FIND_2_API
{
const
miopenTensorArgument_t
tensor_args
[
3
]
=
{
{
miopenTensorConvolutionX
,
nullptr
,
args
[
0
].
implicit
()},
{
miopenTensorConvolutionW
,
nullptr
,
args
[
1
].
implicit
()},
{
miopenTensorConvolutionY
,
nullptr
,
args
[
3
].
implicit
()},
};
if
(
solution_ptr
.
get
()
==
nullptr
)
MIGRAPHX_THROW
(
"MIOpen Convolution : Load MIOpen Solution before running it"
);
auto
status
=
miopenRunSolution
(
miopen_stream_handle
,
solution_ptr
.
get
(),
3
,
tensor_args
,
args
[
2
].
implicit
(),
workspace_size
);
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen Convolution: running convolution using find_2.0 failed"
);
return
args
[
3
];
}
#else
// else use immediate mode
if
(
solution_id
==
0
)
if
(
solution_id
==
0
)
MIGRAPHX_THROW
(
"MIOpen Convolution: invalid solution ID"
);
MIGRAPHX_THROW
(
"MIOpen Convolution: invalid solution ID"
);
auto
status
=
miopenConvolutionForwardImmediate
(
ctx
.
get_stream
().
get_miopen
()
,
auto
status
=
miopenConvolutionForwardImmediate
(
miopen_stream_handle
,
w_desc
.
get
(),
w_desc
.
get
(),
args
[
1
].
implicit
(),
args
[
1
].
implicit
(),
x_desc
.
get
(),
x_desc
.
get
(),
...
@@ -71,29 +98,66 @@ argument miopen_convolution::compute(context& ctx,
...
@@ -71,29 +98,66 @@ argument miopen_convolution::compute(context& ctx,
y_desc
.
get
(),
y_desc
.
get
(),
args
[
3
].
implicit
(),
args
[
3
].
implicit
(),
args
[
2
].
implicit
(),
args
[
2
].
implicit
(),
args
[
2
].
get_shape
().
bytes
()
,
workspace_size
,
solution_id
);
solution_id
);
if
(
status
!=
miopenStatusSuccess
)
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen Convolution: running convolution failed"
);
MIGRAPHX_THROW
(
"MIOpen Convolution: running convolution failed"
);
return
args
[
3
];
return
args
[
3
];
#endif
}
}
shape
miopen_convolution
::
find
(
context
&
ctx
,
const
shape
&
output_shape
,
std
::
vector
<
shape
>
inputs
)
shape
miopen_convolution
::
find
(
context
&
ctx
,
const
shape
&
output_shape
,
std
::
vector
<
shape
>
inputs
)
{
{
shape
workspace_shape
{};
shape
workspace_shape
{};
auto
x_desc
=
make_tensor
(
reshape_if_1d
(
inputs
[
0
]));
auto
w_desc
=
make_tensor
(
reshape_if_1d
(
inputs
[
1
]));
auto
y_desc
=
make_tensor
(
reshape_if_1d
(
output_shape
));
std
::
size_t
workspace_size
=
0
;
auto
x_desc
=
make_tensor
(
reshape_if_1d
(
inputs
[
0
]));
#ifdef MIGRAPHX_HAS_FIND_2_API
auto
w_desc
=
make_tensor
(
reshape_if_1d
(
inputs
[
1
]));
{
auto
y_desc
=
make_tensor
(
reshape_if_1d
(
output_shape
));
auto
conv_problem
=
make_obj
<
miopen_problem
>
(
&
miopenCreateConvProblem
,
cd
.
get
(),
miopenProblemDirectionForward
);
set_tensor_descriptor
(
miopenTensorConvolutionX
,
x_desc
,
conv_problem
);
set_tensor_descriptor
(
miopenTensorConvolutionW
,
w_desc
,
conv_problem
);
set_tensor_descriptor
(
miopenTensorConvolutionY
,
y_desc
,
conv_problem
);
auto
*
miopen_stream_handle
=
ctx
.
get_stream
().
get_miopen
();
solution_ptr
=
find_solution
(
miopen_stream_handle
,
conv_problem
.
get
());
auto
status
=
miopenGetSolutionWorkspaceSize
(
solution_ptr
.
get
(),
&
workspace_size
);
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen Convolution : failed to get solution's workspace size"
);
std
::
size_t
solution_size
;
status
=
miopenGetSolutionSize
(
solution_ptr
.
get
(),
&
solution_size
);
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen Convolution: Failed to fetch solution size"
);
auto
solution_binary
=
std
::
vector
<
char
>
{};
solution_binary
.
resize
(
solution_size
);
status
=
miopenSaveSolution
(
solution_ptr
.
get
(),
solution_binary
.
data
());
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen Convolution: Saving solution failed"
);
solution_object
=
value
::
binary
{
solution_binary
.
data
(),
solution_size
};
return
shape
{
shape
::
int8_type
,
{
workspace_size
}};
}
#else
// else use immediate find mode
auto
status
=
miopenConvolutionForwardGetWorkSpaceSize
(
ctx
.
get_stream
().
get_miopen
(),
w_desc
.
get
(),
x_desc
.
get
(),
cd
.
get
(),
y_desc
.
get
(),
&
workspace_size
);
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen Convolution: Failed to get forward workspace size"
);
std
::
size_t
workspace_size
=
0
;
miopenConvolutionForwardGetWorkSpaceSize
(
ctx
.
get_stream
().
get_miopen
(),
w_desc
.
get
(),
x_desc
.
get
(),
cd
.
get
(),
y_desc
.
get
(),
&
workspace_size
);
workspace_shape
=
shape
{
shape
::
int8_type
,
{
workspace_size
}};
workspace_shape
=
shape
{
shape
::
int8_type
,
{
workspace_size
}};
auto
x
=
to_gpu
(
generate_argument
(
inputs
[
0
]));
auto
x
=
to_gpu
(
generate_argument
(
inputs
[
0
]));
...
@@ -103,20 +167,20 @@ shape miopen_convolution::find(context& ctx, const shape& output_shape, std::vec
...
@@ -103,20 +167,20 @@ shape miopen_convolution::find(context& ctx, const shape& output_shape, std::vec
int
algo_count
=
1
;
int
algo_count
=
1
;
miopenConvAlgoPerf_t
perf
;
miopenConvAlgoPerf_t
perf
;
auto
status
=
miopenFindConvolutionForwardAlgorithm
(
ctx
.
get_stream
().
get_miopen
(),
status
=
miopenFindConvolutionForwardAlgorithm
(
ctx
.
get_stream
().
get_miopen
(),
x_desc
.
get
(),
x_desc
.
get
(),
x
.
implicit
(),
x
.
implicit
(),
w_desc
.
get
(),
w_desc
.
get
(),
w
.
implicit
(),
w
.
implicit
(),
cd
.
get
(),
cd
.
get
(),
y_desc
.
get
(),
y_desc
.
get
(),
y
.
implicit
(),
y
.
implicit
(),
1
,
1
,
&
algo_count
,
&
algo_count
,
&
perf
,
&
perf
,
workspace
.
implicit
(),
workspace
.
implicit
(),
workspace_size
,
workspace_size
,
false
);
false
);
if
(
status
!=
miopenStatusSuccess
)
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen Convolution: find convolution failed"
);
MIGRAPHX_THROW
(
"MIOpen Convolution: find convolution failed"
);
algo
=
perf
.
fwd_algo
;
algo
=
perf
.
fwd_algo
;
...
@@ -148,35 +212,58 @@ shape miopen_convolution::find(context& ctx, const shape& output_shape, std::vec
...
@@ -148,35 +212,58 @@ shape miopen_convolution::find(context& ctx, const shape& output_shape, std::vec
solution_id
=
solutions
.
front
().
solution_id
;
solution_id
=
solutions
.
front
().
solution_id
;
return
shape
{
shape
::
int8_type
,
{
perf
.
memory
}};
return
shape
{
shape
::
int8_type
,
{
perf
.
memory
}};
#endif
}
}
void
miopen_convolution
::
finalize
(
context
&
ctx
,
void
miopen_convolution
::
finalize
(
context
&
ctx
,
const
shape
&
output_shape
,
const
shape
&
output_shape
,
std
::
vector
<
shape
>
inputs
)
const
std
::
vector
<
shape
>
&
inputs
)
{
{
if
(
cd
==
nullptr
)
#ifdef MIGRAPHX_HAS_FIND_2_API
cd
=
make_conv
(
op
);
if
(
solution_id
==
0
)
{
{
// Check that workspace hasn't changed
(
void
)(
ctx
);
// avoid warnings
auto
size
=
inputs
.
at
(
2
).
bytes
();
(
void
)(
output_shape
);
auto
ws
=
find
(
ctx
,
output_shape
,
inputs
);
(
void
)(
inputs
);
if
(
ws
.
bytes
()
>
size
)
// load solution
MIGRAPHX_THROW
(
"MIOpen Convolution: workspace has changed during finalization."
);
if
(
solution_ptr
==
nullptr
)
{
miopenSolution_t
ptr
;
auto
status
=
miopenLoadSolution
(
&
ptr
,
reinterpret_cast
<
const
char
*>
(
solution_object
.
data
()),
solution_object
.
size
());
solution_ptr
=
miopen_solution
{
ptr
};
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen Convolution: loading convolution solution failed"
);
}
}
}
#else
// Use immediate mode API
{
if
(
cd
==
nullptr
)
cd
=
make_conv
(
op
);
if
(
solution_id
==
0
)
{
// Check that workspace hasn't changed
auto
size
=
inputs
.
at
(
2
).
bytes
();
auto
ws
=
find
(
ctx
,
output_shape
,
inputs
);
if
(
ws
.
bytes
()
>
size
)
MIGRAPHX_THROW
(
"MIOpen Convolution: workspace has changed during finalization."
);
}
auto
x_desc
=
make_tensor
(
reshape_if_1d
(
inputs
[
0
]));
auto
x_desc
=
make_tensor
(
reshape_if_1d
(
inputs
[
0
]));
auto
w_desc
=
make_tensor
(
reshape_if_1d
(
inputs
[
1
]));
auto
w_desc
=
make_tensor
(
reshape_if_1d
(
inputs
[
1
]));
auto
y_desc
=
make_tensor
(
reshape_if_1d
(
output_shape
));
auto
y_desc
=
make_tensor
(
reshape_if_1d
(
output_shape
));
auto
status
=
miopenConvolutionForwardCompileSolution
(
ctx
.
get_stream
().
get_miopen
(),
auto
status
=
miopenConvolutionForwardCompileSolution
(
ctx
.
get_stream
().
get_miopen
(),
w_desc
.
get
(),
w_desc
.
get
(),
x_desc
.
get
(),
x_desc
.
get
(),
cd
.
get
(),
cd
.
get
(),
y_desc
.
get
(),
y_desc
.
get
(),
solution_id
);
solution_id
);
if
(
status
!=
miopenStatusSuccess
)
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen Convolution: compile solution failed"
);
MIGRAPHX_THROW
(
"MIOpen Convolution: compile solution failed"
);
}
#endif
}
}
}
// namespace gpu
}
// namespace gpu
...
...
src/targets/gpu/device/acos.cpp
deleted
100644 → 0
View file @
78a300ff
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/gpu/device/acos.hpp>
#include <migraphx/gpu/device/nary.hpp>
#include <migraphx/gpu/device/types.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
device
{
void
acos
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
)
{
nary
(
stream
,
result
,
arg
)([](
auto
x
)
__device__
{
return
::
acos
(
to_hip_type
(
x
));
});
}
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/device/acosh.cpp
deleted
100644 → 0
View file @
78a300ff
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/gpu/device/acosh.hpp>
#include <migraphx/gpu/device/nary.hpp>
#include <migraphx/gpu/device/types.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
device
{
void
acosh
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
)
{
nary
(
stream
,
result
,
arg
)([](
auto
x
)
{
return
::
acosh
(
to_hip_type
(
x
));
});
}
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/device/add.cpp
deleted
100644 → 0
View file @
78a300ff
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/gpu/device/add.hpp>
#include <migraphx/gpu/device/nary.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
device
{
void
add
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg1
,
const
argument
&
arg2
)
{
nary
(
stream
,
result
,
arg1
,
arg2
)([](
auto
x
,
auto
y
)
__device__
{
return
x
+
y
;
});
}
void
add
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg1
,
const
argument
&
arg2
,
const
argument
&
arg3
)
{
nary
(
stream
,
result
,
arg1
,
arg2
,
arg3
)([](
auto
x
,
auto
y
,
auto
z
)
__device__
{
return
x
+
y
+
z
;
});
}
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/device/add_clip.cpp
deleted
100644 → 0
View file @
78a300ff
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/gpu/device/add_clip.hpp>
#include <migraphx/gpu/device/nary.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
device
{
void
add_clip
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg1
,
const
argument
&
arg2
,
const
argument
&
min_arg
,
const
argument
&
max_arg
)
{
nary
(
stream
,
result
,
arg1
,
arg2
,
min_arg
,
max_arg
)(
[](
auto
x
,
auto
y
,
auto
min
,
auto
max
)
__device__
{
return
::
min
<
decltype
(
x
+
y
)
>
(
::
max
<
decltype
(
x
)
>
(
min
,
x
+
y
),
max
);
});
}
void
add_clip
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg1
,
const
argument
&
arg2
,
const
argument
&
arg3
,
const
argument
&
min_arg
,
const
argument
&
max_arg
)
{
nary
(
stream
,
result
,
arg1
,
arg2
,
arg3
,
min_arg
,
max_arg
)(
[](
auto
x
,
auto
y
,
auto
z
,
auto
min
,
auto
max
)
__device__
{
return
::
min
<
decltype
(
x
+
y
+
z
)
>
(
::
max
<
decltype
(
x
)
>
(
min
,
x
+
y
+
z
),
max
);
});
}
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
Prev
1
2
3
4
5
6
…
13
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment